| /* |
| * Copyright (c) 2017-2020 ARM Limited. |
| * |
| * SPDX-License-Identifier: MIT |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to |
| * deal in the Software without restriction, including without limitation the |
| * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| * sell copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in all |
| * copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| #include "mali_profiler_magma.h" |
| |
| #include "hwcpipe_log.h" |
| |
| #include "magma.h" |
| #include "magma_arm_mali_types.h" |
| #include "magma_vendor_queries.h" |
| #include <algorithm> |
| #include <stdexcept> |
| |
| #include <filesystem> |
| #include <lib/fdio/directory.h> |
| #include <lib/zx/channel.h> |
| |
| using mali_userspace::MALI_NAME_BLOCK_JM; |
| using mali_userspace::MALI_NAME_BLOCK_MMU; |
| using mali_userspace::MALI_NAME_BLOCK_SHADER; |
| using mali_userspace::MALI_NAME_BLOCK_TILER; |
| |
| namespace hwcpipe |
| { |
| namespace |
| { |
| struct MaliHWInfo |
| { |
| unsigned mp_count; |
| unsigned gpu_id; |
| unsigned r_value; |
| unsigned p_value; |
| unsigned core_mask; |
| unsigned l2_slices; |
| }; |
| |
| static uint32_t extract_bits(uint64_t input, uint32_t shift, uint32_t width) |
| { |
| return (input >> shift) & ((1 << width) - 1); |
| } |
| } // namespace |
| |
| typedef std::function<uint64_t(void)> MaliValueGetter; |
| |
| MaliProfilerMagma::MaliProfilerMagma(const GpuCounterSet &enabled_counters) : |
| enabled_counters_(enabled_counters) |
| { |
| // Throws if setup fails |
| init(); |
| |
| const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> valhall_mappings = { |
| {GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }}, |
| {GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }}, |
| {GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }}, |
| {GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }}, |
| |
| {GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }}, |
| {GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }}, |
| {GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }}, |
| |
| {GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }}, |
| {GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }}, |
| {GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }}, |
| {GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }}, |
| {GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }}, |
| {GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }}, |
| |
| {GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_MSG"); }}, |
| {GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }}, |
| |
| {GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }}, |
| // The three units run in parallel so we can approximate cycles by taking the largest value. SFU instructions use 4 cycles per warp. |
| {GpuCounter::ShaderArithmeticCycles, [this] { return std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA"), std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT"), 4 * get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU"))); }}, |
| {GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }}, |
| {GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }}, |
| |
| {GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }}, |
| {GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }}, |
| {GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }}, |
| {GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }}, |
| {GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }}, |
| {GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }}, |
| {GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }}, |
| {GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }}, |
| }; |
| |
| const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> bifrost_mappings = { |
| {GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }}, |
| {GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }}, |
| {GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }}, |
| {GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }}, |
| |
| {GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }}, |
| {GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }}, |
| {GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }}, |
| |
| {GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }}, |
| {GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }}, |
| {GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }}, |
| {GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }}, |
| {GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }}, |
| {GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }}, |
| |
| {GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }}, |
| {GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }}, |
| |
| {GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }}, |
| {GpuCounter::ShaderArithmeticCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }}, |
| {GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }}, |
| {GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }}, |
| |
| {GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }}, |
| {GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }}, |
| {GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }}, |
| {GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }}, |
| {GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }}, |
| {GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }}, |
| {GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }}, |
| {GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }}, |
| }; |
| |
| const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> midgard_mappings = { |
| {GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }}, |
| {GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }}, |
| {GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }}, |
| |
| {GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }}, |
| {GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }}, |
| {GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }}, |
| |
| {GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }}, |
| {GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }}, |
| {GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }}, |
| {GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILLED"); }}, |
| {GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_TEST"); }}, |
| {GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_KILLED"); }}, |
| |
| {GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }}, |
| {GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }}, |
| {GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }}, |
| {GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }}, |
| {GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }}, |
| {GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }}, |
| {GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }}, |
| {GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }}, |
| }; |
| |
| auto product = std::find_if(std::begin(mali_userspace::products), std::end(mali_userspace::products), [&](const mali_userspace::CounterMapping &cm) { |
| return (cm.product_mask & gpu_id_) == cm.product_id; |
| }); |
| |
| if (product != std::end(mali_userspace::products)) |
| { |
| switch (product->product_id) |
| { |
| case mali_userspace::PRODUCT_ID_T60X: |
| case mali_userspace::PRODUCT_ID_T62X: |
| case mali_userspace::PRODUCT_ID_T72X: |
| mappings_ = midgard_mappings; |
| mappings_[GpuCounter::Pixels] = [this]() { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 256; }; |
| break; |
| case mali_userspace::PRODUCT_ID_T76X: |
| case mali_userspace::PRODUCT_ID_T82X: |
| case mali_userspace::PRODUCT_ID_T83X: |
| case mali_userspace::PRODUCT_ID_T86X: |
| case mali_userspace::PRODUCT_ID_TFRX: |
| mappings_ = midgard_mappings; |
| break; |
| case mali_userspace::PRODUCT_ID_TMIX: |
| case mali_userspace::PRODUCT_ID_THEX: |
| mappings_ = bifrost_mappings; |
| mappings_[GpuCounter::ShaderTextureCycles] = [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_COORD_ISSUE"); }; |
| case mali_userspace::PRODUCT_ID_TSIX: |
| case mali_userspace::PRODUCT_ID_TNOX: |
| case mali_userspace::PRODUCT_ID_TGOX: |
| case mali_userspace::PRODUCT_ID_TDVX: |
| mappings_ = bifrost_mappings; |
| case mali_userspace::PRODUCT_ID_TNAXa: |
| case mali_userspace::PRODUCT_ID_TNAXb: |
| case mali_userspace::PRODUCT_ID_TTRX: |
| default: |
| mappings_ = valhall_mappings; |
| break; |
| } |
| } |
| else |
| { |
| HWCPIPE_LOG("Mali counters initialization failed: Failed to identify GPU"); |
| } |
| } |
| |
| MaliProfilerMagma::~MaliProfilerMagma() |
| { |
| if (buffer_) |
| magma_release_buffer(connection_, buffer_); |
| if (pool_) |
| magma_connection_release_performance_counter_buffer_pool(connection_, pool_); |
| if (connection_) |
| magma_release_connection(connection_); |
| if (device_) |
| magma_device_release(device_); |
| } |
| |
| void MaliProfilerMagma::init() |
| { |
| MaliHWInfo hw_info; |
| for (auto &p : std::filesystem::directory_iterator("/dev/class/gpu")) |
| { |
| zx::channel server_end, client_end; |
| zx_status_t zx_status = zx::channel::create(0, &server_end, &client_end); |
| if (zx_status != ZX_OK) |
| { |
| throw std::runtime_error("Failed to create zx channel"); |
| } |
| zx_status = fdio_service_connect(p.path().c_str(), server_end.release()); |
| if (zx_status != ZX_OK) |
| { |
| throw std::runtime_error("Failed to connect to device"); |
| } |
| |
| magma_device_t device; |
| magma_status_t status = magma_device_import(client_end.release(), &device); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Failed to find magma device."); |
| } |
| uint64_t vendor_id = 0; |
| status = magma_query2(device, MAGMA_QUERY_VENDOR_ID, &vendor_id); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Failed to query vendor id"); |
| } |
| if (vendor_id != MAGMA_VENDOR_ID_MALI) |
| { |
| magma_device_release(device); |
| continue; |
| } |
| device_ = device; |
| break; |
| } |
| |
| if (!device_) |
| { |
| throw std::runtime_error("Didn't find valid mali device."); |
| } |
| |
| memset(&hw_info, 0, sizeof(hw_info)); |
| uint64_t device_id = 0; |
| magma_status_t status = magma_query2(device_, MAGMA_QUERY_DEVICE_ID, &device_id); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Querying device ID failed."); |
| } |
| hw_info.gpu_id = extract_bits(device_id, 16, 16); |
| hw_info.r_value = extract_bits(device_id, 12, 4); |
| hw_info.p_value = extract_bits(device_id, 4, 8); |
| uint64_t shader_mask; |
| status = magma_query2(device_, kMsdArmVendorQueryShaderPresent, &shader_mask); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Querying shader present failed."); |
| } |
| hw_info.core_mask = shader_mask; |
| hw_info.mp_count = __builtin_popcountll(hw_info.core_mask); |
| uint64_t mem_features; |
| status = magma_query2(device_, kMsdArmVendorQueryMemoryFeatures, &mem_features); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Querying memory features failed."); |
| } |
| hw_info.l2_slices = extract_bits(mem_features, 8, 5) + 1; |
| |
| num_cores_ = hw_info.mp_count; |
| num_l2_slices_ = hw_info.l2_slices; |
| gpu_id_ = hw_info.gpu_id; |
| |
| status = magma_create_connection2(device_, &connection_); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Creatng magma connection failed."); |
| } |
| |
| bool success = false; |
| for (auto &p : std::filesystem::directory_iterator("/dev/class/gpu-performance-counters")) |
| { |
| zx::channel server_end, client_end; |
| zx::channel::create(0, &server_end, &client_end); |
| |
| zx_status_t zx_status = fdio_service_connect(p.path().c_str(), server_end.release()); |
| if (zx_status != ZX_OK) |
| { |
| throw std::runtime_error("Failed to connect to GPU perf count access service\n"); |
| } |
| magma_status_t status = |
| magma_connection_access_performance_counters(connection_, client_end.release()); |
| if (status == MAGMA_STATUS_OK) |
| { |
| success = true; |
| } |
| } |
| if (!success) |
| { |
| throw std::runtime_error("Failed to enable perf count access."); |
| } |
| |
| size_t buffer_size; |
| |
| // At the moment we only ever should have 1 read outstanding, so only create one buffer. |
| constexpr uint32_t kBufferSize = 4096; |
| status = magma_create_buffer(connection_, kBufferSize, &buffer_size, &buffer_); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Create buffer failed."); |
| } |
| buffer_size_ = buffer_size; |
| |
| status = magma_connection_create_performance_counter_buffer_pool(connection_, &pool_, ¬ification_handle_); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Create performance counter buffer pool failed."); |
| } |
| magma_buffer_offset offset; |
| offset.buffer_id = magma_get_buffer_id(buffer_); |
| offset.offset = 0; |
| offset.length = 4096; |
| status = magma_connection_add_performance_counter_buffer_offsets_to_pool(connection_, pool_, &offset, 1); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Add performance counters failed."); |
| } |
| |
| uint64_t vector = 1; |
| status = magma_connection_enable_performance_counters(connection_, &vector, 1); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Enable performance counters failed."); |
| } |
| |
| auto product = std::find_if(std::begin(mali_userspace::products), std::end(mali_userspace::products), [&](const mali_userspace::CounterMapping &cm) { |
| return (cm.product_mask & hw_info.gpu_id) == cm.product_id; |
| }); |
| |
| if (product != std::end(mali_userspace::products)) |
| { |
| names_lut_ = product->names_lut; |
| } |
| else |
| { |
| throw std::runtime_error("Could not identify GPU."); |
| } |
| |
| raw_counter_buffer_.resize(buffer_size_ / sizeof(uint32_t)); |
| |
| // Build core remap table. |
| core_index_remap_.clear(); |
| core_index_remap_.reserve(hw_info.mp_count); |
| |
| unsigned int mask = hw_info.core_mask; |
| |
| while (mask != 0) |
| { |
| unsigned int bit = __builtin_ctz(mask); |
| core_index_remap_.push_back(bit); |
| mask &= ~(1u << bit); |
| } |
| } |
| |
| void MaliProfilerMagma::run() |
| { |
| sample_counters(); |
| wait_next_event(); |
| } |
| |
| void MaliProfilerMagma::stop() |
| { |
| // We don't need to do anything on stop() |
| } |
| |
| const GpuMeasurements &MaliProfilerMagma::sample() |
| { |
| sample_counters(); |
| wait_next_event(); |
| |
| for (const auto &counter : enabled_counters_) |
| { |
| auto mapping = mappings_.find(counter); |
| if (mapping == mappings_.end()) |
| { |
| continue; |
| } |
| |
| measurements_[mapping->first] = mapping->second(); |
| } |
| |
| return measurements_; |
| } |
| |
| void MaliProfilerMagma::sample_counters() |
| { |
| magma_status_t status = magma_connection_dump_performance_counters(connection_, pool_, 1); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Dump performance counters failed."); |
| } |
| } |
| |
| void MaliProfilerMagma::wait_next_event() |
| { |
| magma_poll_item_t poll_item{}; |
| poll_item.type = MAGMA_POLL_TYPE_HANDLE; |
| poll_item.condition = MAGMA_POLL_CONDITION_READABLE; |
| poll_item.handle = notification_handle_; |
| magma_status_t status = magma_poll(&poll_item, 1, INT64_MAX); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Poll for performance counters failed."); |
| } |
| uint32_t trigger_id; |
| uint64_t buffer_id; |
| uint32_t buffer_offset; |
| uint64_t time; |
| uint32_t result_flags; |
| status = magma_connection_read_performance_counter_completion( |
| connection_, pool_, &trigger_id, &buffer_id, &buffer_offset, |
| &time, &result_flags); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Read performance counters failed."); |
| } |
| void *data; |
| status = magma_map(connection_, buffer_, &data); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Mapping performance counters failed."); |
| } |
| memcpy(raw_counter_buffer_.data(), data, 4096); |
| timestamp_ = time; |
| magma_buffer_offset offset; |
| offset.buffer_id = magma_get_buffer_id(buffer_); |
| offset.offset = 0; |
| offset.length = 4096; |
| status = magma_connection_add_performance_counter_buffer_offsets_to_pool(connection_, pool_, &offset, 1); |
| if (status != MAGMA_STATUS_OK) |
| { |
| throw std::runtime_error("Add performance counters failed."); |
| } |
| } |
| |
| uint64_t MaliProfilerMagma::get_counter_value(mali_userspace::MaliCounterBlockName block, const char *name) const |
| { |
| uint64_t sum = 0; |
| switch (block) |
| { |
| case mali_userspace::MALI_NAME_BLOCK_MMU: |
| // If an MMU counter is selected, sum the values over MMU slices |
| for (int i = 0; i < num_l2_slices_; i++) |
| { |
| sum += get_counters(block, i)[find_counter_index_by_name(block, name)]; |
| } |
| return sum; |
| |
| case mali_userspace::MALI_NAME_BLOCK_SHADER: |
| // If a shader core counter is selected, sum the values over shader cores |
| for (int i = 0; i < num_cores_; i++) |
| { |
| sum += get_counters(block, i)[find_counter_index_by_name(block, name)]; |
| } |
| return sum; |
| |
| case mali_userspace::MALI_NAME_BLOCK_JM: |
| case mali_userspace::MALI_NAME_BLOCK_TILER: |
| default: |
| return static_cast<uint64_t>(get_counters(block)[find_counter_index_by_name(block, name)]); |
| } |
| } |
| |
| const uint32_t *MaliProfilerMagma::get_counters(mali_userspace::MaliCounterBlockName block, int index) const |
| { |
| switch (block) |
| { |
| case mali_userspace::MALI_NAME_BLOCK_JM: |
| return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 0; |
| case mali_userspace::MALI_NAME_BLOCK_MMU: |
| if (index < 0 || index >= num_l2_slices_) |
| { |
| throw std::runtime_error("Invalid slice number."); |
| } |
| |
| // If an MMU counter is selected, index refers to the MMU slice |
| return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * (2 + index); |
| case mali_userspace::MALI_NAME_BLOCK_TILER: |
| return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 1; |
| default: |
| if (index < 0 || index >= num_cores_) |
| { |
| throw std::runtime_error("Invalid core number."); |
| } |
| |
| // If a shader core counter is selected, index refers to the core index |
| return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * (2 + num_l2_slices_ + core_index_remap_[index]); |
| } |
| } |
| |
| int MaliProfilerMagma::find_counter_index_by_name(mali_userspace::MaliCounterBlockName block, const char *name) const |
| { |
| const char *const *names = &names_lut_[mali_userspace::MALI_NAME_BLOCK_SIZE * block]; |
| |
| for (int i = 0; i < mali_userspace::MALI_NAME_BLOCK_SIZE; ++i) |
| { |
| if (strstr(names[i], name) != nullptr) |
| { |
| return i; |
| } |
| } |
| |
| return -1; |
| } |
| |
| } // namespace hwcpipe |