blob: ab2dc580f87261981ed980942ea96af4653d8f4e [file] [log] [blame]
/*
* Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "mali_profiler_magma.h"
#include "hwcpipe_log.h"
#include "magma.h"
#include "magma_arm_mali_types.h"
#include "magma_vendor_queries.h"
#include <algorithm>
#include <stdexcept>
#include <filesystem>
#include <lib/fdio/directory.h>
#include <lib/zx/channel.h>
#include <lib/zx/vmar.h>
using mali_userspace::MALI_NAME_BLOCK_JM;
using mali_userspace::MALI_NAME_BLOCK_MMU;
using mali_userspace::MALI_NAME_BLOCK_SHADER;
using mali_userspace::MALI_NAME_BLOCK_TILER;
namespace hwcpipe
{
namespace
{
struct MaliHWInfo
{
unsigned mp_count;
unsigned gpu_id;
unsigned r_value;
unsigned p_value;
unsigned core_mask;
unsigned l2_slices;
};
static uint32_t extract_bits(uint64_t input, uint32_t shift, uint32_t width)
{
return (input >> shift) & ((1 << width) - 1);
}
} // namespace
typedef std::function<uint64_t(void)> MaliValueGetter;
MaliProfilerMagma::MaliProfilerMagma(const GpuCounterSet &enabled_counters) :
enabled_counters_(enabled_counters)
{
// Throws if setup fails
init();
const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> valhall_mappings = {
{GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
{GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
{GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
{GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }},
{GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
{GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
{GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
{GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
{GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
{GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
{GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }},
{GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }},
{GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }},
{GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_MSG"); }},
{GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }},
{GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }},
// The three units run in parallel so we can approximate cycles by taking the largest value. SFU instructions use 4 cycles per warp.
{GpuCounter::ShaderArithmeticCycles, [this] { return std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA"), std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT"), 4 * get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU"))); }},
{GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }},
{GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }},
{GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
{GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
{GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
{GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
{GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
{GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
{GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
{GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
};
const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> bifrost_mappings = {
{GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
{GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
{GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
{GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }},
{GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
{GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
{GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
{GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
{GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
{GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
{GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }},
{GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }},
{GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }},
{GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }},
{GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }},
{GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }},
{GpuCounter::ShaderArithmeticCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }},
{GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }},
{GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }},
{GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
{GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
{GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
{GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
{GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
{GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
{GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
{GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
};
const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> midgard_mappings = {
{GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
{GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
{GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
{GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
{GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
{GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
{GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
{GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
{GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
{GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILLED"); }},
{GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_TEST"); }},
{GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_KILLED"); }},
{GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
{GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
{GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
{GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
{GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
{GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
{GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
{GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
};
auto product = std::find_if(std::begin(mali_userspace::products), std::end(mali_userspace::products), [&](const mali_userspace::CounterMapping &cm) {
return (cm.product_mask & gpu_id_) == cm.product_id;
});
if (product != std::end(mali_userspace::products))
{
switch (product->product_id)
{
case mali_userspace::PRODUCT_ID_T60X:
case mali_userspace::PRODUCT_ID_T62X:
case mali_userspace::PRODUCT_ID_T72X:
mappings_ = midgard_mappings;
mappings_[GpuCounter::Pixels] = [this]() { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 256; };
break;
case mali_userspace::PRODUCT_ID_T76X:
case mali_userspace::PRODUCT_ID_T82X:
case mali_userspace::PRODUCT_ID_T83X:
case mali_userspace::PRODUCT_ID_T86X:
case mali_userspace::PRODUCT_ID_TFRX:
mappings_ = midgard_mappings;
break;
case mali_userspace::PRODUCT_ID_TMIX:
case mali_userspace::PRODUCT_ID_THEX:
mappings_ = bifrost_mappings;
mappings_[GpuCounter::ShaderTextureCycles] = [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_COORD_ISSUE"); };
case mali_userspace::PRODUCT_ID_TSIX:
case mali_userspace::PRODUCT_ID_TNOX:
case mali_userspace::PRODUCT_ID_TGOX:
case mali_userspace::PRODUCT_ID_TDVX:
mappings_ = bifrost_mappings;
case mali_userspace::PRODUCT_ID_TNAXa:
case mali_userspace::PRODUCT_ID_TNAXb:
case mali_userspace::PRODUCT_ID_TTRX:
default:
mappings_ = valhall_mappings;
break;
}
}
else
{
HWCPIPE_LOG("Mali counters initialization failed: Failed to identify GPU");
}
}
MaliProfilerMagma::~MaliProfilerMagma()
{
if (buffer_)
magma_release_buffer(connection_, buffer_);
if (pool_)
magma_connection_release_performance_counter_buffer_pool(connection_, pool_);
if (connection_)
magma_release_connection(connection_);
if (device_)
magma_device_release(device_);
}
void MaliProfilerMagma::init()
{
MaliHWInfo hw_info;
for (auto &p : std::filesystem::directory_iterator("/dev/class/gpu"))
{
zx::channel server_end, client_end;
zx_status_t zx_status = zx::channel::create(0, &server_end, &client_end);
if (zx_status != ZX_OK)
{
throw std::runtime_error("Failed to create zx channel");
}
zx_status = fdio_service_connect(p.path().c_str(), server_end.release());
if (zx_status != ZX_OK)
{
throw std::runtime_error("Failed to connect to device");
}
magma_device_t device;
magma_status_t status = magma_device_import(client_end.release(), &device);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Failed to find magma device.");
}
uint64_t vendor_id = 0;
status = magma_query2(device, MAGMA_QUERY_VENDOR_ID, &vendor_id);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Failed to query vendor id");
}
if (vendor_id != MAGMA_VENDOR_ID_MALI)
{
magma_device_release(device);
continue;
}
device_ = device;
break;
}
if (!device_)
{
throw std::runtime_error("Didn't find valid mali device.");
}
memset(&hw_info, 0, sizeof(hw_info));
uint64_t device_id = 0;
magma_status_t status = magma_query2(device_, MAGMA_QUERY_DEVICE_ID, &device_id);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Querying device ID failed.");
}
hw_info.gpu_id = extract_bits(device_id, 16, 16);
hw_info.r_value = extract_bits(device_id, 12, 4);
hw_info.p_value = extract_bits(device_id, 4, 8);
uint64_t shader_mask;
status = magma_query2(device_, kMsdArmVendorQueryShaderPresent, &shader_mask);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Querying shader present failed.");
}
hw_info.core_mask = shader_mask;
hw_info.mp_count = __builtin_popcountll(hw_info.core_mask);
uint64_t mem_features;
status = magma_query2(device_, kMsdArmVendorQueryMemoryFeatures, &mem_features);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Querying memory features failed.");
}
hw_info.l2_slices = extract_bits(mem_features, 8, 5) + 1;
num_cores_ = hw_info.mp_count;
num_l2_slices_ = hw_info.l2_slices;
gpu_id_ = hw_info.gpu_id;
status = magma_create_connection2(device_, &connection_);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Creatng magma connection failed.");
}
bool success = false;
for (auto &p : std::filesystem::directory_iterator("/dev/class/gpu-performance-counters"))
{
zx::channel server_end, client_end;
zx::channel::create(0, &server_end, &client_end);
zx_status_t zx_status = fdio_service_connect(p.path().c_str(), server_end.release());
if (zx_status != ZX_OK)
{
throw std::runtime_error("Failed to connect to GPU perf count access service\n");
}
magma_status_t status =
magma_connection_access_performance_counters(connection_, client_end.release());
if (status == MAGMA_STATUS_OK)
{
success = true;
}
}
if (!success)
{
throw std::runtime_error("Failed to enable perf count access.");
}
size_t buffer_size;
// At the moment we only ever should have 1 read outstanding, so only create one buffer.
constexpr uint32_t kBufferSize = 4096;
status = magma_create_buffer(connection_, kBufferSize, &buffer_size, &buffer_);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Create buffer failed.");
}
buffer_size_ = buffer_size;
status = magma_connection_create_performance_counter_buffer_pool(connection_, &pool_, &notification_handle_);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Create performance counter buffer pool failed.");
}
magma_buffer_offset offset;
offset.buffer_id = magma_get_buffer_id(buffer_);
offset.offset = 0;
offset.length = 4096;
status = magma_connection_add_performance_counter_buffer_offsets_to_pool(connection_, pool_, &offset, 1);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Add performance counters failed.");
}
uint64_t vector = 1;
status = magma_connection_enable_performance_counters(connection_, &vector, 1);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Enable performance counters failed.");
}
auto product = std::find_if(std::begin(mali_userspace::products), std::end(mali_userspace::products), [&](const mali_userspace::CounterMapping &cm) {
return (cm.product_mask & hw_info.gpu_id) == cm.product_id;
});
if (product != std::end(mali_userspace::products))
{
names_lut_ = product->names_lut;
}
else
{
throw std::runtime_error("Could not identify GPU.");
}
raw_counter_buffer_.resize(buffer_size_ / sizeof(uint32_t));
// Build core remap table.
core_index_remap_.clear();
core_index_remap_.reserve(hw_info.mp_count);
unsigned int mask = hw_info.core_mask;
while (mask != 0)
{
unsigned int bit = __builtin_ctz(mask);
core_index_remap_.push_back(bit);
mask &= ~(1u << bit);
}
}
void MaliProfilerMagma::run()
{
sample_counters();
wait_next_event();
}
void MaliProfilerMagma::stop()
{
// We don't need to do anything on stop()
}
const GpuMeasurements &MaliProfilerMagma::sample()
{
sample_counters();
wait_next_event();
for (const auto &counter : enabled_counters_)
{
auto mapping = mappings_.find(counter);
if (mapping == mappings_.end())
{
continue;
}
measurements_[mapping->first] = mapping->second();
}
return measurements_;
}
void MaliProfilerMagma::sample_counters()
{
magma_status_t status = magma_connection_dump_performance_counters(connection_, pool_, 1);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Dump performance counters failed.");
}
}
void MaliProfilerMagma::wait_next_event()
{
magma_poll_item_t poll_item{};
poll_item.type = MAGMA_POLL_TYPE_HANDLE;
poll_item.condition = MAGMA_POLL_CONDITION_READABLE;
poll_item.handle = notification_handle_;
magma_status_t status = magma_poll(&poll_item, 1, INT64_MAX);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Poll for performance counters failed.");
}
uint32_t trigger_id;
uint64_t buffer_id;
uint32_t buffer_offset;
uint64_t time;
uint32_t result_flags;
status = magma_connection_read_performance_counter_completion(
connection_, pool_, &trigger_id, &buffer_id, &buffer_offset,
&time, &result_flags);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Read performance counters failed.");
}
void* data {};
{
magma_handle_t handle;
magma_status_t status = magma_get_buffer_handle(connection_, buffer_, &handle);
if (status != MAGMA_STATUS_OK)
throw std::runtime_error("Failed to get buffer handle");
zx::vmo vmo(handle);
zx_vaddr_t zx_vaddr;
zx_status_t zx_status = zx::vmar::root_self()->map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE,
0, //vmar_offset,
vmo,
0, //offset
buffer_size_, &zx_vaddr);
if (zx_status != ZX_OK)
throw std::runtime_error("Failed to map buffer");
data = reinterpret_cast<void *>(zx_vaddr);
}
memcpy(raw_counter_buffer_.data(), data, 4096);
zx::vmar::root_self()->unmap(reinterpret_cast<zx_vaddr_t>(data), buffer_size_);
timestamp_ = time;
magma_buffer_offset offset;
offset.buffer_id = magma_get_buffer_id(buffer_);
offset.offset = 0;
offset.length = 4096;
status = magma_connection_add_performance_counter_buffer_offsets_to_pool(connection_, pool_, &offset, 1);
if (status != MAGMA_STATUS_OK)
{
throw std::runtime_error("Add performance counters failed.");
}
}
uint64_t MaliProfilerMagma::get_counter_value(mali_userspace::MaliCounterBlockName block, const char *name) const
{
uint64_t sum = 0;
switch (block)
{
case mali_userspace::MALI_NAME_BLOCK_MMU:
// If an MMU counter is selected, sum the values over MMU slices
for (int i = 0; i < num_l2_slices_; i++)
{
sum += get_counters(block, i)[find_counter_index_by_name(block, name)];
}
return sum;
case mali_userspace::MALI_NAME_BLOCK_SHADER:
// If a shader core counter is selected, sum the values over shader cores
for (int i = 0; i < num_cores_; i++)
{
sum += get_counters(block, i)[find_counter_index_by_name(block, name)];
}
return sum;
case mali_userspace::MALI_NAME_BLOCK_JM:
case mali_userspace::MALI_NAME_BLOCK_TILER:
default:
return static_cast<uint64_t>(get_counters(block)[find_counter_index_by_name(block, name)]);
}
}
const uint32_t *MaliProfilerMagma::get_counters(mali_userspace::MaliCounterBlockName block, int index) const
{
switch (block)
{
case mali_userspace::MALI_NAME_BLOCK_JM:
return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 0;
case mali_userspace::MALI_NAME_BLOCK_MMU:
if (index < 0 || index >= num_l2_slices_)
{
throw std::runtime_error("Invalid slice number.");
}
// If an MMU counter is selected, index refers to the MMU slice
return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * (2 + index);
case mali_userspace::MALI_NAME_BLOCK_TILER:
return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 1;
default:
if (index < 0 || index >= num_cores_)
{
throw std::runtime_error("Invalid core number.");
}
// If a shader core counter is selected, index refers to the core index
return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * (2 + num_l2_slices_ + core_index_remap_[index]);
}
}
int MaliProfilerMagma::find_counter_index_by_name(mali_userspace::MaliCounterBlockName block, const char *name) const
{
const char *const *names = &names_lut_[mali_userspace::MALI_NAME_BLOCK_SIZE * block];
for (int i = 0; i < mali_userspace::MALI_NAME_BLOCK_SIZE; ++i)
{
if (strstr(names[i], name) != nullptr)
{
return i;
}
}
return -1;
}
} // namespace hwcpipe