blob: f632dd8ec2f0660456603d06e8777475e5c0289b [file] [log] [blame]
// Copyright 2016-2021 The Khronos Group, Inc.
//
// SPDX-License-Identifier: CC-BY-4.0
include::{generated}/meta/{refprefix}VK_KHR_performance_query.txt[]
=== Other Extension Metadata
*Last Modified Date*::
2019-10-08
*IP Status*::
No known IP claims.
*Contributors*::
- Jesse Barker, Unity Technologies
- Kenneth Benzie, Codeplay
- Jan-Harald Fredriksen, ARM
- Jeff Leger, Qualcomm
- Jesse Hall, Google
- Tobias Hector, AMD
- Neil Henning, Codeplay
- Baldur Karlsson
- Lionel Landwerlin, Intel
- Peter Lohrmann, AMD
- Alon Or-bach, Samsung
- Daniel Rakos, AMD
- Niklas Smedberg, Unity Technologies
- Igor Ostrowski, Intel
=== Description
The `VK_KHR_performance_query` extension adds a mechanism to allow querying
of performance counters for use in applications and by profiling tools.
Each queue family may: expose counters that can: be enabled on a queue of
that family.
We extend elink:VkQueryType to add a new query type for performance queries,
and chain a structure on slink:VkQueryPoolCreateInfo to specify the
performance queries to enable.
include::{generated}/interfaces/VK_KHR_performance_query.txt[]
=== Issues
1) Should this extension include a mechanism to begin a query in command
buffer _A_ and end the query in command buffer _B_?
*RESOLVED* No - queries are tied to command buffer creation and thus have to
be encapsulated within a single command buffer.
2) Should this extension include a mechanism to begin and end queries
globally on the queue, not using the existing command buffer commands?
*RESOLVED* No - for the same reasoning as the resolution of 1).
3) Should this extension expose counters that require multiple passes?
*RESOLVED* Yes - users should re-submit a command buffer with the same
commands in it multiple times, specifying the pass to count as the query
parameter in VkPerformanceQuerySubmitInfoKHR.
4) How to handle counters across parallel workloads?
*RESOLVED* In the spirit of Vulkan, a counter description flag
ename:VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR
denotes that the accuracy of a counter result is affected by parallel
workloads.
5) How to handle secondary command buffers?
*RESOLVED* Secondary command buffers inherit any counter pass index
specified in the parent primary command buffer.
Note: this is no longer an issue after change from issue 10 resolution
6) What commands does the profiling lock have to be held for?
*RESOLVED* For any command buffer that is being queried with a performance
query pool, the profiling lock must: be held while that command buffer is in
the _recording_, _executable_, or _pending state_.
7) Should we support flink:vkCmdCopyQueryPoolResults?
*RESOLVED* Yes.
8) Should we allow performance queries to interact with multiview?
*RESOLVED* Yes, but the performance queries must be performed once for each
pass per view.
9) Should a `queryCount > 1` be usable for performance queries?
*RESOLVED* Yes.
Some vendors will have costly performance counter query pool creation, and
would rather if a certain set of counters were to be used multiple times
that a `queryCount > 1` can be used to amortize the instantiation cost.
10) Should we introduce an indirect mechanism to set the counter pass index?
*RESOLVED* Specify the counter pass index at submit time instead, to avoid
requiring re-recording of command buffers when multiple counter passes are
needed.
=== Examples
The following example shows how to find what performance counters a queue
family supports, setup a query pool to record these performance counters,
how to add the query pool to the command buffer to record information, and
how to get the results from the query pool.
[source,c++]
--------------------------------------
// A previously created physical device
VkPhysicalDevice physicalDevice;
// One of the queue families our device supports
uint32_t queueFamilyIndex;
uint32_t counterCount;
// Get the count of counters supported
vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
physicalDevice,
queueFamilyIndex,
&counterCount,
NULL,
NULL);
VkPerformanceCounterKHR* counters =
malloc(sizeof(VkPerformanceCounterKHR) * counterCount);
VkPerformanceCounterDescriptionKHR* counterDescriptions =
malloc(sizeof(VkPerformanceCounterDescriptionKHR) * counterCount);
// Get the counters supported
vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
physicalDevice,
queueFamilyIndex,
&counterCount,
counters,
counterDescriptions);
// Try to enable the first 8 counters
uint32_t enabledCounters[8];
const uint32_t enabledCounterCount = min(counterCount, 8));
for (uint32_t i = 0; i < enabledCounterCount; i++) {
enabledCounters[i] = i;
}
// A previously created device that had the performanceCounterQueryPools feature
// set to VK_TRUE
VkDevice device;
VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo = {
VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR,
NULL,
// Specify the queue family that this performance query is performed on
queueFamilyIndex,
// The number of counters to enable
enabledCounterCount,
// The array of indices of counters to enable
enabledCounters
};
// Get the number of passes our counters will require.
uint32_t numPasses;
vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
physicalDevice,
&performanceQueryCreateInfo,
&numPasses);
VkQueryPoolCreateInfo queryPoolCreateInfo = {
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
&performanceQueryCreateInfo,
0,
// Using our new query type here
VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR,
1,
0
};
VkQueryPool queryPool;
VkResult result = vkCreateQueryPool(
device,
&queryPoolCreateInfo,
NULL,
&queryPool);
assert(VK_SUCCESS == result);
// A queue from queueFamilyIndex
VkQueue queue;
// A command buffer we want to record counters on
VkCommandBuffer commandBuffer;
VkCommandBufferBeginInfo commandBufferBeginInfo = {
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
NULL,
0,
NULL
};
VkAcquireProfilingLockInfoKHR lockInfo = {
VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
NULL,
0,
UINT64_MAX // Wait forever for the lock
};
// Acquire the profiling lock before we record command buffers
// that will use performance queries
result = vkAcquireProfilingLockKHR(device, &lockInfo);
assert(VK_SUCCESS == result);
result = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
assert(VK_SUCCESS == result);
vkCmdResetQueryPool(
commandBuffer,
queryPool,
0,
1);
vkCmdBeginQuery(
commandBuffer,
queryPool,
0,
0);
// Perform the commands you want to get performance information on
// ...
// Perform a barrier to ensure all previous commands were complete before
// ending the query
vkCmdPipelineBarrier(commandBuffer,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
0,
0,
NULL,
0,
NULL,
0,
NULL);
vkCmdEndQuery(
commandBuffer,
queryPool,
0);
result = vkEndCommandBuffer(commandBuffer);
assert(VK_SUCCESS == result);
for (uint32_t counterPass = 0; counterPass < numPasses; counterPass++) {
VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
NULL,
counterPass
};
// Submit the command buffer and wait for its completion
// ...
}
// Release the profiling lock after the command buffer is no longer in the
// pending state.
vkReleaseProfilingLockKHR(device);
result = vkResetCommandBuffer(commandBuffer, 0);
assert(VK_SUCCESS == result);
// Create an array to hold the results of all counters
VkPerformanceCounterResultKHR* recordedCounters = malloc(
sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount);
result = vkGetQueryPoolResults(
device,
queryPool,
0,
1,
sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount,
recordedCounters,
sizeof(VkPerformanceCounterResultKHR),
NULL);
// recordedCounters is filled with our counters, we will look at one for posterity
switch (counters[0].storage) {
case VK_PERFORMANCE_COUNTER_STORAGE_INT32:
// use recordCounters[0].int32 to get at the counter result!
break;
case VK_PERFORMANCE_COUNTER_STORAGE_INT64:
// use recordCounters[0].int64 to get at the counter result!
break;
case VK_PERFORMANCE_COUNTER_STORAGE_UINT32:
// use recordCounters[0].uint32 to get at the counter result!
break;
case VK_PERFORMANCE_COUNTER_STORAGE_UINT64:
// use recordCounters[0].uint64 to get at the counter result!
break;
case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32:
// use recordCounters[0].float32 to get at the counter result!
break;
case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64:
// use recordCounters[0].float64 to get at the counter result!
break;
}
--------------------------------------
=== Version History
* Revision 1, 2019-10-08