| // Copyright 2016-2021 The Khronos Group, Inc. |
| // |
| // SPDX-License-Identifier: CC-BY-4.0 |
| |
| include::{generated}/meta/{refprefix}VK_KHR_performance_query.txt[] |
| |
| === Other Extension Metadata |
| |
| *Last Modified Date*:: |
| 2019-10-08 |
| *IP Status*:: |
| No known IP claims. |
| *Contributors*:: |
| - Jesse Barker, Unity Technologies |
| - Kenneth Benzie, Codeplay |
| - Jan-Harald Fredriksen, ARM |
| - Jeff Leger, Qualcomm |
| - Jesse Hall, Google |
| - Tobias Hector, AMD |
| - Neil Henning, Codeplay |
| - Baldur Karlsson |
| - Lionel Landwerlin, Intel |
| - Peter Lohrmann, AMD |
| - Alon Or-bach, Samsung |
| - Daniel Rakos, AMD |
| - Niklas Smedberg, Unity Technologies |
| - Igor Ostrowski, Intel |
| |
| === Description |
| |
| The `VK_KHR_performance_query` extension adds a mechanism to allow querying |
| of performance counters for use in applications and by profiling tools. |
| |
| Each queue family may: expose counters that can: be enabled on a queue of |
| that family. |
| We extend elink:VkQueryType to add a new query type for performance queries, |
| and chain a structure on slink:VkQueryPoolCreateInfo to specify the |
| performance queries to enable. |
| |
| include::{generated}/interfaces/VK_KHR_performance_query.txt[] |
| |
| === Issues |
| |
| 1) Should this extension include a mechanism to begin a query in command |
| buffer _A_ and end the query in command buffer _B_? |
| |
| *RESOLVED* No - queries are tied to command buffer creation and thus have to |
| be encapsulated within a single command buffer. |
| |
| 2) Should this extension include a mechanism to begin and end queries |
| globally on the queue, not using the existing command buffer commands? |
| |
| *RESOLVED* No - for the same reasoning as the resolution of 1). |
| |
| 3) Should this extension expose counters that require multiple passes? |
| |
| *RESOLVED* Yes - users should re-submit a command buffer with the same |
| commands in it multiple times, specifying the pass to count as the query |
| parameter in VkPerformanceQuerySubmitInfoKHR. |
| |
| 4) How to handle counters across parallel workloads? |
| |
| *RESOLVED* In the spirit of Vulkan, a counter description flag |
| ename:VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR |
| denotes that the accuracy of a counter result is affected by parallel |
| workloads. |
| |
| 5) How to handle secondary command buffers? |
| |
| *RESOLVED* Secondary command buffers inherit any counter pass index |
| specified in the parent primary command buffer. |
| Note: this is no longer an issue after change from issue 10 resolution |
| |
| 6) What commands does the profiling lock have to be held for? |
| |
| *RESOLVED* For any command buffer that is being queried with a performance |
| query pool, the profiling lock must: be held while that command buffer is in |
| the _recording_, _executable_, or _pending state_. |
| |
| 7) Should we support flink:vkCmdCopyQueryPoolResults? |
| |
| *RESOLVED* Yes. |
| |
| 8) Should we allow performance queries to interact with multiview? |
| |
| *RESOLVED* Yes, but the performance queries must be performed once for each |
| pass per view. |
| |
| 9) Should a `queryCount > 1` be usable for performance queries? |
| |
| *RESOLVED* Yes. |
| Some vendors will have costly performance counter query pool creation, and |
| would rather if a certain set of counters were to be used multiple times |
| that a `queryCount > 1` can be used to amortize the instantiation cost. |
| |
| 10) Should we introduce an indirect mechanism to set the counter pass index? |
| |
| *RESOLVED* Specify the counter pass index at submit time instead, to avoid |
| requiring re-recording of command buffers when multiple counter passes are |
| needed. |
| |
| |
| === Examples |
| |
| The following example shows how to find what performance counters a queue |
| family supports, setup a query pool to record these performance counters, |
| how to add the query pool to the command buffer to record information, and |
| how to get the results from the query pool. |
| |
| [source,c++] |
| -------------------------------------- |
| // A previously created physical device |
| VkPhysicalDevice physicalDevice; |
| |
| // One of the queue families our device supports |
| uint32_t queueFamilyIndex; |
| |
| uint32_t counterCount; |
| |
| // Get the count of counters supported |
| vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( |
| physicalDevice, |
| queueFamilyIndex, |
| &counterCount, |
| NULL, |
| NULL); |
| |
| VkPerformanceCounterKHR* counters = |
| malloc(sizeof(VkPerformanceCounterKHR) * counterCount); |
| VkPerformanceCounterDescriptionKHR* counterDescriptions = |
| malloc(sizeof(VkPerformanceCounterDescriptionKHR) * counterCount); |
| |
| // Get the counters supported |
| vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( |
| physicalDevice, |
| queueFamilyIndex, |
| &counterCount, |
| counters, |
| counterDescriptions); |
| |
| // Try to enable the first 8 counters |
| uint32_t enabledCounters[8]; |
| |
| const uint32_t enabledCounterCount = min(counterCount, 8)); |
| |
| for (uint32_t i = 0; i < enabledCounterCount; i++) { |
| enabledCounters[i] = i; |
| } |
| |
| // A previously created device that had the performanceCounterQueryPools feature |
| // set to VK_TRUE |
| VkDevice device; |
| |
| VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo = { |
| VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR, |
| NULL, |
| |
| // Specify the queue family that this performance query is performed on |
| queueFamilyIndex, |
| |
| // The number of counters to enable |
| enabledCounterCount, |
| |
| // The array of indices of counters to enable |
| enabledCounters |
| }; |
| |
| |
| // Get the number of passes our counters will require. |
| uint32_t numPasses; |
| |
| vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( |
| physicalDevice, |
| &performanceQueryCreateInfo, |
| &numPasses); |
| |
| VkQueryPoolCreateInfo queryPoolCreateInfo = { |
| VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, |
| &performanceQueryCreateInfo, |
| 0, |
| |
| // Using our new query type here |
| VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR, |
| |
| 1, |
| |
| 0 |
| }; |
| |
| VkQueryPool queryPool; |
| |
| VkResult result = vkCreateQueryPool( |
| device, |
| &queryPoolCreateInfo, |
| NULL, |
| &queryPool); |
| |
| assert(VK_SUCCESS == result); |
| |
| // A queue from queueFamilyIndex |
| VkQueue queue; |
| |
| // A command buffer we want to record counters on |
| VkCommandBuffer commandBuffer; |
| |
| VkCommandBufferBeginInfo commandBufferBeginInfo = { |
| VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, |
| NULL, |
| 0, |
| NULL |
| }; |
| |
| VkAcquireProfilingLockInfoKHR lockInfo = { |
| VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR, |
| NULL, |
| 0, |
| UINT64_MAX // Wait forever for the lock |
| }; |
| |
| // Acquire the profiling lock before we record command buffers |
| // that will use performance queries |
| |
| result = vkAcquireProfilingLockKHR(device, &lockInfo); |
| |
| assert(VK_SUCCESS == result); |
| |
| result = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo); |
| |
| assert(VK_SUCCESS == result); |
| |
| vkCmdResetQueryPool( |
| commandBuffer, |
| queryPool, |
| 0, |
| 1); |
| |
| vkCmdBeginQuery( |
| commandBuffer, |
| queryPool, |
| 0, |
| 0); |
| |
| // Perform the commands you want to get performance information on |
| // ... |
| |
| // Perform a barrier to ensure all previous commands were complete before |
| // ending the query |
| vkCmdPipelineBarrier(commandBuffer, |
| VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, |
| VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, |
| 0, |
| 0, |
| NULL, |
| 0, |
| NULL, |
| 0, |
| NULL); |
| |
| vkCmdEndQuery( |
| commandBuffer, |
| queryPool, |
| 0); |
| |
| result = vkEndCommandBuffer(commandBuffer); |
| |
| assert(VK_SUCCESS == result); |
| |
| for (uint32_t counterPass = 0; counterPass < numPasses; counterPass++) { |
| |
| VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = { |
| VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, |
| NULL, |
| counterPass |
| }; |
| |
| |
| // Submit the command buffer and wait for its completion |
| // ... |
| } |
| |
| // Release the profiling lock after the command buffer is no longer in the |
| // pending state. |
| vkReleaseProfilingLockKHR(device); |
| |
| result = vkResetCommandBuffer(commandBuffer, 0); |
| |
| assert(VK_SUCCESS == result); |
| |
| // Create an array to hold the results of all counters |
| VkPerformanceCounterResultKHR* recordedCounters = malloc( |
| sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount); |
| |
| result = vkGetQueryPoolResults( |
| device, |
| queryPool, |
| 0, |
| 1, |
| sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount, |
| recordedCounters, |
| sizeof(VkPerformanceCounterResultKHR), |
| NULL); |
| |
| // recordedCounters is filled with our counters, we will look at one for posterity |
| switch (counters[0].storage) { |
| case VK_PERFORMANCE_COUNTER_STORAGE_INT32: |
| // use recordCounters[0].int32 to get at the counter result! |
| break; |
| case VK_PERFORMANCE_COUNTER_STORAGE_INT64: |
| // use recordCounters[0].int64 to get at the counter result! |
| break; |
| case VK_PERFORMANCE_COUNTER_STORAGE_UINT32: |
| // use recordCounters[0].uint32 to get at the counter result! |
| break; |
| case VK_PERFORMANCE_COUNTER_STORAGE_UINT64: |
| // use recordCounters[0].uint64 to get at the counter result! |
| break; |
| case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32: |
| // use recordCounters[0].float32 to get at the counter result! |
| break; |
| case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64: |
| // use recordCounters[0].float64 to get at the counter result! |
| break; |
| } |
| -------------------------------------- |
| |
| === Version History |
| |
| * Revision 1, 2019-10-08 |