Add Magma support
Copy mali_profiler.h/cpp into new files and convert them to use the
magma performance counter API.
Change-Id: If8b8556468d063e89a1fa00d79c94f49ddd899ab
Reviewed-on: https://fuchsia-review.googlesource.com/c/third_party/github.com/ARM-software/HWCPipe/+/413156
Reviewed-by: Craig Stout <cstout@google.com>
diff --git a/BUILD.gn b/BUILD.gn
new file mode 100644
index 0000000..8a099cf
--- /dev/null
+++ b/BUILD.gn
@@ -0,0 +1,34 @@
+# Copyright 2020 The Fuchsia Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+config("hwcpipe_config") {
+ include_dirs = [ "." ]
+ defines = [ "HWCPIPE_NO_JSON" ]
+}
+
+source_set("mali_profiler") {
+ sources = [
+ "cpu_profiler.h",
+ "gpu_profiler.h",
+ "hwcpipe.cpp",
+ "hwcpipe.h",
+ "vendor/arm/mali/hwc.hpp",
+ "vendor/arm/mali/hwc_names.hpp",
+ "vendor/arm/mali/mali_profiler_magma.cpp",
+ "vendor/arm/mali/mali_profiler_magma.h",
+ ]
+ public_configs = [ ":hwcpipe_config" ]
+ configs -= [ "//build/config:no_exceptions" ]
+
+ # Disable ShadowCallStack, since there seem to be some issues with the call stack after exceptions
+ # are caught.
+ # TODO(fxb/41627): Re-enable.
+ cflags = [ "-fno-sanitize=shadow-call-stack" ]
+ deps = [
+ "//sdk/lib/fdio",
+ "//src/graphics/drivers/msd-arm-mali/include",
+ "//src/graphics/lib/magma/src/libmagma",
+ "//zircon/public/lib/zx",
+ ]
+}
diff --git a/hwcpipe.cpp b/hwcpipe.cpp
index 2d3ba19..5e22372 100644
--- a/hwcpipe.cpp
+++ b/hwcpipe.cpp
@@ -30,8 +30,12 @@
# include "vendor/arm/mali/mali_profiler.h"
#endif
+#ifdef __Fuchsia__
+# include "vendor/arm/mali/mali_profiler_magma.h"
+#endif
+
#ifndef HWCPIPE_NO_JSON
-#include <json.hpp>
+# include <json.hpp>
using json = nlohmann::json;
#endif
@@ -189,6 +193,15 @@
{
HWCPIPE_LOG("Mali profiler initialization failed: %s", e.what());
}
+#elif defined(__Fuchsia__)
+ try
+ {
+ gpu_profiler_ = std::unique_ptr<MaliProfilerMagma>(new MaliProfilerMagma(enabled_gpu_counters));
+ }
+ catch (const std::runtime_error &e)
+ {
+ HWCPIPE_LOG("Mali profiler initialization failed: %s", e.what());
+ }
#else
HWCPIPE_LOG("No counters available for this platform.");
#endif
diff --git a/vendor/arm/mali/hwc_names.hpp b/vendor/arm/mali/hwc_names.hpp
index 84a6e3f..348ecd8 100644
--- a/vendor/arm/mali/hwc_names.hpp
+++ b/vendor/arm/mali/hwc_names.hpp
@@ -4411,6 +4411,11 @@
PRODUCT_ID_TTRX,
hardware_counters_mali_tTRx,
},
+ {
+ PRODUCT_ID_MASK_NEW,
+ PRODUCT_ID_TGOX,
+ hardware_counters_mali_tGOx,
+ },
};
enum
diff --git a/vendor/arm/mali/mali_profiler_magma.cpp b/vendor/arm/mali/mali_profiler_magma.cpp
new file mode 100644
index 0000000..4839e64
--- /dev/null
+++ b/vendor/arm/mali/mali_profiler_magma.cpp
@@ -0,0 +1,539 @@
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "mali_profiler_magma.h"
+
+#include "hwcpipe_log.h"
+
+#include "magma.h"
+#include "magma_arm_mali_types.h"
+#include "magma_vendor_queries.h"
+#include <algorithm>
+#include <stdexcept>
+
+#include <filesystem>
+#include <lib/fdio/directory.h>
+#include <lib/zx/channel.h>
+
+using mali_userspace::MALI_NAME_BLOCK_JM;
+using mali_userspace::MALI_NAME_BLOCK_MMU;
+using mali_userspace::MALI_NAME_BLOCK_SHADER;
+using mali_userspace::MALI_NAME_BLOCK_TILER;
+
+namespace hwcpipe
+{
+namespace
+{
+struct MaliHWInfo
+{
+ unsigned mp_count;
+ unsigned gpu_id;
+ unsigned r_value;
+ unsigned p_value;
+ unsigned core_mask;
+ unsigned l2_slices;
+};
+
+static uint32_t extract_bits(uint64_t input, uint32_t shift, uint32_t width)
+{
+ return (input >> shift) & ((1 << width) - 1);
+}
+} // namespace
+
+typedef std::function<uint64_t(void)> MaliValueGetter;
+
+MaliProfilerMagma::MaliProfilerMagma(const GpuCounterSet &enabled_counters) :
+ enabled_counters_(enabled_counters)
+{
+ // Throws if setup fails
+ init();
+
+ const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> valhall_mappings = {
+ {GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
+ {GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
+ {GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
+ {GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }},
+
+ {GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
+ {GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
+ {GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
+
+ {GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
+ {GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
+ {GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
+ {GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }},
+ {GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }},
+ {GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }},
+
+ {GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_MSG"); }},
+ {GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }},
+
+ {GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }},
+ // The three units run in parallel so we can approximate cycles by taking the largest value. SFU instructions use 4 cycles per warp.
+ {GpuCounter::ShaderArithmeticCycles, [this] { return std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA"), std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT"), 4 * get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU"))); }},
+ {GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }},
+ {GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }},
+
+ {GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
+ {GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
+ {GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
+ {GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
+ {GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
+ {GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
+ {GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
+ {GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
+ };
+
+ const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> bifrost_mappings = {
+ {GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
+ {GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
+ {GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
+ {GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }},
+
+ {GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
+ {GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
+ {GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
+
+ {GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
+ {GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
+ {GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
+ {GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }},
+ {GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }},
+ {GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }},
+
+ {GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }},
+ {GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }},
+
+ {GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }},
+ {GpuCounter::ShaderArithmeticCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }},
+ {GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }},
+ {GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }},
+
+ {GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
+ {GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
+ {GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
+ {GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
+ {GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
+ {GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
+ {GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
+ {GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
+ };
+
+ const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> midgard_mappings = {
+ {GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
+ {GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
+ {GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
+
+ {GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
+ {GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
+ {GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
+
+ {GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
+ {GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
+ {GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
+ {GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILLED"); }},
+ {GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_TEST"); }},
+ {GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_KILLED"); }},
+
+ {GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
+ {GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
+ {GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
+ {GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
+ {GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
+ {GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
+ {GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
+ {GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
+ };
+
+ auto product = std::find_if(std::begin(mali_userspace::products), std::end(mali_userspace::products), [&](const mali_userspace::CounterMapping &cm) {
+ return (cm.product_mask & gpu_id_) == cm.product_id;
+ });
+
+ if (product != std::end(mali_userspace::products))
+ {
+ switch (product->product_id)
+ {
+ case mali_userspace::PRODUCT_ID_T60X:
+ case mali_userspace::PRODUCT_ID_T62X:
+ case mali_userspace::PRODUCT_ID_T72X:
+ mappings_ = midgard_mappings;
+ mappings_[GpuCounter::Pixels] = [this]() { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 256; };
+ break;
+ case mali_userspace::PRODUCT_ID_T76X:
+ case mali_userspace::PRODUCT_ID_T82X:
+ case mali_userspace::PRODUCT_ID_T83X:
+ case mali_userspace::PRODUCT_ID_T86X:
+ case mali_userspace::PRODUCT_ID_TFRX:
+ mappings_ = midgard_mappings;
+ break;
+ case mali_userspace::PRODUCT_ID_TMIX:
+ case mali_userspace::PRODUCT_ID_THEX:
+ mappings_ = bifrost_mappings;
+ mappings_[GpuCounter::ShaderTextureCycles] = [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_COORD_ISSUE"); };
+ case mali_userspace::PRODUCT_ID_TSIX:
+ case mali_userspace::PRODUCT_ID_TNOX:
+ case mali_userspace::PRODUCT_ID_TGOX:
+ case mali_userspace::PRODUCT_ID_TDVX:
+ mappings_ = bifrost_mappings;
+ case mali_userspace::PRODUCT_ID_TNAXa:
+ case mali_userspace::PRODUCT_ID_TNAXb:
+ case mali_userspace::PRODUCT_ID_TTRX:
+ default:
+ mappings_ = valhall_mappings;
+ break;
+ }
+ }
+ else
+ {
+ HWCPIPE_LOG("Mali counters initialization failed: Failed to identify GPU");
+ }
+}
+
+MaliProfilerMagma::~MaliProfilerMagma()
+{
+ if (buffer_)
+ magma_release_buffer(connection_, buffer_);
+ if (pool_)
+ magma_connection_release_performance_counter_buffer_pool(connection_, pool_);
+ if (connection_)
+ magma_release_connection(connection_);
+ if (device_)
+ magma_device_release(device_);
+}
+
+void MaliProfilerMagma::init()
+{
+ MaliHWInfo hw_info;
+ for (auto &p : std::filesystem::directory_iterator("/dev/class/gpu"))
+ {
+ zx::channel server_end, client_end;
+ zx_status_t zx_status = zx::channel::create(0, &server_end, &client_end);
+ if (zx_status != ZX_OK)
+ {
+ throw std::runtime_error("Failed to create zx channel");
+ }
+ zx_status = fdio_service_connect(p.path().c_str(), server_end.release());
+ if (zx_status != ZX_OK)
+ {
+ throw std::runtime_error("Failed to connect to device");
+ }
+
+ magma_device_t device;
+ magma_status_t status = magma_device_import(client_end.release(), &device);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Failed to find magma device.");
+ }
+ uint64_t vendor_id = 0;
+ status = magma_query2(device, MAGMA_QUERY_VENDOR_ID, &vendor_id);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Failed to query vendor id");
+ }
+ if (vendor_id != MAGMA_VENDOR_ID_MALI)
+ {
+ magma_device_release(device);
+ continue;
+ }
+ device_ = device;
+ break;
+ }
+
+ if (!device_)
+ {
+ throw std::runtime_error("Didn't find valid mali device.");
+ }
+
+ memset(&hw_info, 0, sizeof(hw_info));
+ uint64_t device_id = 0;
+ magma_status_t status = magma_query2(device_, MAGMA_QUERY_DEVICE_ID, &device_id);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Querying device ID failed.");
+ }
+ hw_info.gpu_id = extract_bits(device_id, 16, 16);
+ hw_info.r_value = extract_bits(device_id, 12, 4);
+ hw_info.p_value = extract_bits(device_id, 4, 8);
+ uint64_t shader_mask;
+ status = magma_query2(device_, kMsdArmVendorQueryShaderPresent, &shader_mask);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Querying shader present failed.");
+ }
+ hw_info.core_mask = shader_mask;
+ hw_info.mp_count = __builtin_popcountll(hw_info.core_mask);
+ uint64_t mem_features;
+ status = magma_query2(device_, kMsdArmVendorQueryMemoryFeatures, &mem_features);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Querying memory features failed.");
+ }
+ hw_info.l2_slices = extract_bits(mem_features, 8, 5);
+
+ num_cores_ = hw_info.mp_count;
+ num_l2_slices_ = hw_info.l2_slices;
+ gpu_id_ = hw_info.gpu_id;
+
+ status = magma_create_connection2(device_, &connection_);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Creatng magma connection failed.");
+ }
+
+ bool success = false;
+ for (auto &p : std::filesystem::directory_iterator("/dev/class/gpu-performance-counters"))
+ {
+ zx::channel server_end, client_end;
+ zx::channel::create(0, &server_end, &client_end);
+
+ zx_status_t zx_status = fdio_service_connect(p.path().c_str(), server_end.release());
+ if (zx_status != ZX_OK)
+ {
+ throw std::runtime_error("Failed to connect to GPU perf count access service\n");
+ }
+ magma_status_t status =
+ magma_connection_access_performance_counters(connection_, client_end.release());
+ if (status == MAGMA_STATUS_OK)
+ {
+ success = true;
+ }
+ }
+ if (!success)
+ {
+ throw std::runtime_error("Failed to enable perf count access.");
+ }
+
+ size_t buffer_size;
+
+ // At the moment we only ever should have 1 read outstanding, so only create one buffer.
+ constexpr uint32_t kBufferSize = 4096;
+ status = magma_create_buffer(connection_, kBufferSize, &buffer_size, &buffer_);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Create buffer failed.");
+ }
+ buffer_size_ = buffer_size;
+
+ status = magma_connection_create_performance_counter_buffer_pool(connection_, &pool_, ¬ification_handle_);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Create performance counter buffer pool failed.");
+ }
+ magma_buffer_offset offset;
+ offset.buffer_id = magma_get_buffer_id(buffer_);
+ offset.offset = 0;
+ offset.length = 4096;
+ status = magma_connection_add_performance_counter_buffer_offsets_to_pool(connection_, pool_, &offset, 1);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Add performance counters failed.");
+ }
+
+ uint64_t vector = 1;
+ status = magma_connection_enable_performance_counters(connection_, &vector, 1);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Enable performance counters failed.");
+ }
+
+ auto product = std::find_if(std::begin(mali_userspace::products), std::end(mali_userspace::products), [&](const mali_userspace::CounterMapping &cm) {
+ return (cm.product_mask & hw_info.gpu_id) == cm.product_id;
+ });
+
+ if (product != std::end(mali_userspace::products))
+ {
+ names_lut_ = product->names_lut;
+ }
+ else
+ {
+ throw std::runtime_error("Could not identify GPU.");
+ }
+
+ raw_counter_buffer_.resize(buffer_size_ / sizeof(uint32_t));
+
+ // Build core remap table.
+ core_index_remap_.clear();
+ core_index_remap_.reserve(hw_info.mp_count);
+
+ unsigned int mask = hw_info.core_mask;
+
+ while (mask != 0)
+ {
+ unsigned int bit = __builtin_ctz(mask);
+ core_index_remap_.push_back(bit);
+ mask &= ~(1u << bit);
+ }
+}
+
+void MaliProfilerMagma::run()
+{
+ sample_counters();
+ wait_next_event();
+}
+
+void MaliProfilerMagma::stop()
+{
+ // We don't need to do anything on stop()
+}
+
+const GpuMeasurements &MaliProfilerMagma::sample()
+{
+ sample_counters();
+ wait_next_event();
+
+ for (const auto &counter : enabled_counters_)
+ {
+ auto mapping = mappings_.find(counter);
+ if (mapping == mappings_.end())
+ {
+ continue;
+ }
+
+ measurements_[mapping->first] = mapping->second();
+ }
+
+ return measurements_;
+}
+
+void MaliProfilerMagma::sample_counters()
+{
+ magma_status_t status = magma_connection_dump_performance_counters(connection_, pool_, 1);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Dump performance counters failed.");
+ }
+}
+
+void MaliProfilerMagma::wait_next_event()
+{
+ magma_poll_item_t poll_item{};
+ poll_item.type = MAGMA_POLL_TYPE_HANDLE;
+ poll_item.condition = MAGMA_POLL_CONDITION_READABLE;
+ poll_item.handle = notification_handle_;
+ magma_status_t status = magma_poll(&poll_item, 1, INT64_MAX);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Poll for performance counters failed.");
+ }
+ uint32_t trigger_id;
+ uint64_t buffer_id;
+ uint32_t buffer_offset;
+ uint64_t time;
+ uint32_t result_flags;
+ status = magma_connection_read_performance_counter_completion(
+ connection_, pool_, &trigger_id, &buffer_id, &buffer_offset,
+ &time, &result_flags);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Read performance counters failed.");
+ }
+ void *data;
+ status = magma_map(connection_, buffer_, &data);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Mapping performance counters failed.");
+ }
+ memcpy(raw_counter_buffer_.data(), data, 4096);
+ timestamp_ = time;
+ magma_buffer_offset offset;
+ offset.buffer_id = magma_get_buffer_id(buffer_);
+ offset.offset = 0;
+ offset.length = 4096;
+ status = magma_connection_add_performance_counter_buffer_offsets_to_pool(connection_, pool_, &offset, 1);
+ if (status != MAGMA_STATUS_OK)
+ {
+ throw std::runtime_error("Add performance counters failed.");
+ }
+}
+
+uint64_t MaliProfilerMagma::get_counter_value(mali_userspace::MaliCounterBlockName block, const char *name) const
+{
+ uint64_t sum = 0;
+ switch (block)
+ {
+ case mali_userspace::MALI_NAME_BLOCK_MMU:
+ // If an MMU counter is selected, sum the values over MMU slices
+ for (int i = 0; i < num_l2_slices_; i++)
+ {
+ sum += get_counters(block, i)[find_counter_index_by_name(block, name)];
+ }
+ return sum;
+
+ case mali_userspace::MALI_NAME_BLOCK_SHADER:
+ // If a shader core counter is selected, sum the values over shader cores
+ for (int i = 0; i < num_cores_; i++)
+ {
+ sum += get_counters(block, i)[find_counter_index_by_name(block, name)];
+ }
+ return sum;
+
+ case mali_userspace::MALI_NAME_BLOCK_JM:
+ case mali_userspace::MALI_NAME_BLOCK_TILER:
+ default:
+ return static_cast<uint64_t>(get_counters(block)[find_counter_index_by_name(block, name)]);
+ }
+}
+
+const uint32_t *MaliProfilerMagma::get_counters(mali_userspace::MaliCounterBlockName block, int index) const
+{
+ switch (block)
+ {
+ case mali_userspace::MALI_NAME_BLOCK_JM:
+ return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 0;
+ case mali_userspace::MALI_NAME_BLOCK_MMU:
+ if (index < 0 || index >= num_l2_slices_)
+ {
+ throw std::runtime_error("Invalid slice number.");
+ }
+
+ // If an MMU counter is selected, index refers to the MMU slice
+ return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * (2 + index);
+ case mali_userspace::MALI_NAME_BLOCK_TILER:
+ return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 1;
+ default:
+ if (index < 0 || index >= num_cores_)
+ {
+ throw std::runtime_error("Invalid core number.");
+ }
+
+ // If a shader core counter is selected, index refers to the core index
+ return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * (2 + num_l2_slices_ + core_index_remap_[index]);
+ }
+}
+
+int MaliProfilerMagma::find_counter_index_by_name(mali_userspace::MaliCounterBlockName block, const char *name) const
+{
+ const char *const *names = &names_lut_[mali_userspace::MALI_NAME_BLOCK_SIZE * block];
+
+ for (int i = 0; i < mali_userspace::MALI_NAME_BLOCK_SIZE; ++i)
+ {
+ if (strstr(names[i], name) != nullptr)
+ {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+} // namespace hwcpipe
diff --git a/vendor/arm/mali/mali_profiler_magma.h b/vendor/arm/mali/mali_profiler_magma.h
new file mode 100644
index 0000000..0a604e5
--- /dev/null
+++ b/vendor/arm/mali/mali_profiler_magma.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef THIRD_PARTY_GITHUB_COM_ARM_SOFTWARE_HWCPIPE_VENDOR_ARM_MALI_MALI_PROFILER_MAGMA_H_
+#define THIRD_PARTY_GITHUB_COM_ARM_SOFTWARE_HWCPIPE_VENDOR_ARM_MALI_MALI_PROFILER_MAGMA_H_
+
+#include "gpu_profiler.h"
+
+#include "hwc.hpp"
+
+#include <functional>
+#include <vector>
+
+#include "magma.h"
+
+namespace hwcpipe
+{
+/** A Gpu profiler that uses Mali counter data. */
+class MaliProfilerMagma : public GpuProfiler
+{
+ public:
+ explicit MaliProfilerMagma(const GpuCounterSet &enabled_counters);
+ virtual ~MaliProfilerMagma();
+
+ virtual const GpuCounterSet &enabled_counters() const override
+ {
+ return enabled_counters_;
+ }
+
+ virtual const GpuCounterSet &supported_counters() const override
+ {
+ return supported_counters_;
+ };
+
+ virtual void set_enabled_counters(GpuCounterSet counters) override
+ {
+ enabled_counters_ = std::move(counters);
+ };
+
+ virtual void run() override;
+ virtual const GpuMeasurements &sample() override;
+ virtual void stop() override;
+
+ private:
+ GpuCounterSet enabled_counters_{};
+
+ const GpuCounterSet supported_counters_{
+ GpuCounter::GpuCycles,
+ GpuCounter::VertexComputeCycles,
+ GpuCounter::FragmentCycles,
+ GpuCounter::TilerCycles,
+ GpuCounter::VertexComputeJobs,
+ GpuCounter::Tiles,
+ GpuCounter::TransactionEliminations,
+ GpuCounter::FragmentJobs,
+ GpuCounter::Pixels,
+ GpuCounter::EarlyZTests,
+ GpuCounter::EarlyZKilled,
+ GpuCounter::LateZTests,
+ GpuCounter::LateZKilled,
+ GpuCounter::Instructions,
+ GpuCounter::DivergedInstructions,
+ GpuCounter::ShaderCycles,
+ GpuCounter::ShaderArithmeticCycles,
+ GpuCounter::ShaderLoadStoreCycles,
+ GpuCounter::ShaderTextureCycles,
+ GpuCounter::CacheReadLookups,
+ GpuCounter::CacheWriteLookups,
+ GpuCounter::ExternalMemoryReadAccesses,
+ GpuCounter::ExternalMemoryWriteAccesses,
+ GpuCounter::ExternalMemoryReadStalls,
+ GpuCounter::ExternalMemoryWriteStalls,
+ GpuCounter::ExternalMemoryReadBytes,
+ GpuCounter::ExternalMemoryWriteBytes,
+ };
+
+ typedef std::function<double(void)> MaliValueGetter;
+ std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> mappings_{};
+
+ int num_cores_{0};
+ int num_l2_slices_{0};
+ int gpu_id_{0};
+ size_t buffer_size_{0};
+ magma_buffer_t buffer_ = 0;
+ uint64_t timestamp_{0};
+ const char *const *names_lut_{
+ nullptr};
+ std::vector<uint32_t> raw_counter_buffer_{};
+ std::vector<unsigned int> core_index_remap_{};
+ magma_device_t device_ = 0;
+ magma_connection_t connection_{};
+ magma_perf_count_pool_t pool_{};
+ magma_handle_t notification_handle_ = 0;
+
+ GpuMeasurements measurements_{};
+
+ void init();
+ void sample_counters();
+ void wait_next_event();
+ const uint32_t *get_counters(mali_userspace::MaliCounterBlockName block, int index = 0) const;
+ uint64_t get_counter_value(mali_userspace::MaliCounterBlockName block, const char *name) const;
+ int find_counter_index_by_name(mali_userspace::MaliCounterBlockName block, const char *name) const;
+};
+
+} // namespace hwcpipe
+
+#endif // THIRD_PARTY_GITHUB_COM_ARM_SOFTWARE_HWCPIPE_VENDOR_ARM_MALI_MALI_PROFILER_MAGMA_H_