blob: 90881808974f824a6813bd060183731f9a06124d [file] [log] [blame]
/*
* Copyright © 2022 Imagination Technologies Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* 'pvr_write_query_to_buffer()' and 'pvr_wait_for_available()' based on anv:
* Copyright © 2015 Intel Corporation
*/
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <vulkan/vulkan.h>
#include "pvr_bo.h"
#include "pvr_csb.h"
#include "pvr_device_info.h"
#include "pvr_private.h"
#include "util/macros.h"
#include "util/os_time.h"
#include "vk_log.h"
#include "vk_object.h"
VkResult pvr_CreateQueryPool(VkDevice _device,
const VkQueryPoolCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkQueryPool *pQueryPool)
{
PVR_FROM_HANDLE(pvr_device, device, _device);
const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
const uint32_t query_size = pCreateInfo->queryCount * sizeof(uint32_t);
struct pvr_query_pool *pool;
uint64_t alloc_size;
VkResult result;
/* Vulkan 1.0 supports only occlusion, timestamp, and pipeline statistics
* query.
* We don't currently support timestamp queries.
* VkQueueFamilyProperties->timestampValidBits = 0.
* We don't currently support pipeline statistics queries.
* VkPhysicalDeviceFeatures->pipelineStatisticsQuery = false.
*/
assert(!device->vk.enabled_features.pipelineStatisticsQuery);
assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION);
pool = vk_object_alloc(&device->vk,
pAllocator,
sizeof(*pool),
VK_OBJECT_TYPE_QUERY_POOL);
if (!pool)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
pool->result_stride =
ALIGN_POT(query_size, ROGUE_CR_ISP_OCLQRY_BASE_ADDR_ALIGNMENT);
pool->query_count = pCreateInfo->queryCount;
/* Each Phantom writes to a separate offset within the vis test heap so
* allocate space for the total number of Phantoms.
*/
alloc_size = (uint64_t)pool->result_stride * core_count;
result = pvr_bo_suballoc(&device->suballoc_vis_test,
alloc_size,
ROGUE_CR_ISP_OCLQRY_BASE_ADDR_ALIGNMENT,
false,
&pool->result_buffer);
if (result != VK_SUCCESS)
goto err_free_pool;
result = pvr_bo_suballoc(&device->suballoc_general,
query_size,
sizeof(uint32_t),
false,
&pool->availability_buffer);
if (result != VK_SUCCESS)
goto err_free_result_buffer;
*pQueryPool = pvr_query_pool_to_handle(pool);
return VK_SUCCESS;
err_free_result_buffer:
pvr_bo_suballoc_free(pool->result_buffer);
err_free_pool:
vk_object_free(&device->vk, pAllocator, pool);
return result;
}
void pvr_DestroyQueryPool(VkDevice _device,
VkQueryPool queryPool,
const VkAllocationCallbacks *pAllocator)
{
PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
PVR_FROM_HANDLE(pvr_device, device, _device);
if (!pool)
return;
pvr_bo_suballoc_free(pool->availability_buffer);
pvr_bo_suballoc_free(pool->result_buffer);
vk_object_free(&device->vk, pAllocator, pool);
}
/* Note: make sure to make the availability buffer's memory defined in
* accordance to how the device is expected to fill it. We don't make it defined
* here since that would cover up usage of this function while the underlying
* buffer region being accessed wasn't expect to have been written by the
* device.
*/
static inline bool pvr_query_is_available(const struct pvr_query_pool *pool,
uint32_t query_idx)
{
volatile uint32_t *available =
pvr_bo_suballoc_get_map_addr(pool->availability_buffer);
return !!available[query_idx];
}
#define NSEC_PER_SEC UINT64_C(1000000000)
#define PVR_WAIT_TIMEOUT UINT64_C(5)
/* Note: make sure to make the availability buffer's memory defined in
* accordance to how the device is expected to fill it. We don't make it defined
* here since that would cover up usage of this function while the underlying
* buffer region being accessed wasn't expect to have been written by the
* device.
*/
/* TODO: Handle device loss scenario properly. */
static VkResult pvr_wait_for_available(struct pvr_device *device,
const struct pvr_query_pool *pool,
uint32_t query_idx)
{
const uint64_t abs_timeout =
os_time_get_absolute_timeout(PVR_WAIT_TIMEOUT * NSEC_PER_SEC);
/* From the Vulkan 1.0 spec:
*
* Commands that wait indefinitely for device execution (namely
* vkDeviceWaitIdle, vkQueueWaitIdle, vkWaitForFences or
* vkAcquireNextImageKHR with a maximum timeout, and
* vkGetQueryPoolResults with the VK_QUERY_RESULT_WAIT_BIT bit set in
* flags) must return in finite time even in the case of a lost device,
* and return either VK_SUCCESS or VK_ERROR_DEVICE_LOST.
*/
while (os_time_get_nano() < abs_timeout) {
if (pvr_query_is_available(pool, query_idx) != 0)
return VK_SUCCESS;
}
return vk_error(device, VK_ERROR_DEVICE_LOST);
}
#undef NSEC_PER_SEC
#undef PVR_WAIT_TIMEOUT
static inline void pvr_write_query_to_buffer(uint8_t *buffer,
VkQueryResultFlags flags,
uint32_t idx,
uint64_t value)
{
if (flags & VK_QUERY_RESULT_64_BIT) {
uint64_t *query_data = (uint64_t *)buffer;
query_data[idx] = value;
} else {
uint32_t *query_data = (uint32_t *)buffer;
query_data[idx] = value;
}
}
VkResult pvr_GetQueryPoolResults(VkDevice _device,
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount,
size_t dataSize,
void *pData,
VkDeviceSize stride,
VkQueryResultFlags flags)
{
PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
PVR_FROM_HANDLE(pvr_device, device, _device);
VG(volatile uint32_t *available =
pvr_bo_suballoc_get_map_addr(pool->availability_buffer));
volatile uint32_t *query_results =
pvr_bo_suballoc_get_map_addr(pool->result_buffer);
const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
uint8_t *data = (uint8_t *)pData;
VkResult result = VK_SUCCESS;
/* TODO: Instead of making the memory defined here for valgrind, to better
* catch out of bounds access and other memory errors we should move them
* where where the query buffers are changed by the driver or device (e.g.
* "vkCmdResetQueryPool()", "vkGetQueryPoolResults()", etc.).
*/
VG(VALGRIND_MAKE_MEM_DEFINED(&available[firstQuery],
queryCount * sizeof(uint32_t)));
for (uint32_t i = 0; i < core_count; i++) {
VG(VALGRIND_MAKE_MEM_DEFINED(
&query_results[firstQuery + i * pool->result_stride],
queryCount * sizeof(uint32_t)));
}
for (uint32_t i = 0; i < queryCount; i++) {
bool is_available = pvr_query_is_available(pool, firstQuery + i);
uint64_t count = 0;
if (flags & VK_QUERY_RESULT_WAIT_BIT && !is_available) {
result = pvr_wait_for_available(device, pool, firstQuery + i);
if (result != VK_SUCCESS)
return result;
is_available = true;
}
for (uint32_t j = 0; j < core_count; j++)
count += query_results[pool->result_stride * j + firstQuery + i];
if (is_available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
pvr_write_query_to_buffer(data, flags, 0, count);
else
result = VK_NOT_READY;
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
pvr_write_query_to_buffer(data, flags, 1, is_available);
data += stride;
}
VG(VALGRIND_MAKE_MEM_UNDEFINED(&available[firstQuery],
queryCount * sizeof(uint32_t)));
for (uint32_t i = 0; i < core_count; i++) {
VG(VALGRIND_MAKE_MEM_UNDEFINED(
&query_results[firstQuery + i * pool->result_stride],
queryCount * sizeof(uint32_t)));
}
return result;
}
void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount)
{
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
struct pvr_query_info query_info;
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
query_info.type = PVR_QUERY_TYPE_RESET_QUERY_POOL;
query_info.reset_query_pool.query_pool = queryPool;
query_info.reset_query_pool.first_query = firstQuery;
query_info.reset_query_pool.query_count = queryCount;
pvr_add_query_program(cmd_buffer, &query_info);
}
void pvr_ResetQueryPool(VkDevice _device,
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount)
{
PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
uint32_t *availability =
pvr_bo_suballoc_get_map_addr(pool->availability_buffer);
memset(availability + firstQuery, 0, sizeof(uint32_t) * queryCount);
}
void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize stride,
VkQueryResultFlags flags)
{
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
struct pvr_query_info query_info;
VkResult result;
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
query_info.type = PVR_QUERY_TYPE_COPY_QUERY_RESULTS;
query_info.copy_query_results.query_pool = queryPool;
query_info.copy_query_results.first_query = firstQuery;
query_info.copy_query_results.query_count = queryCount;
query_info.copy_query_results.dst_buffer = dstBuffer;
query_info.copy_query_results.dst_offset = dstOffset;
query_info.copy_query_results.stride = stride;
query_info.copy_query_results.flags = flags;
result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT);
if (result != VK_SUCCESS)
return;
/* The Vulkan 1.3.231 spec says:
*
* "vkCmdCopyQueryPoolResults is considered to be a transfer operation,
* and its writes to buffer memory must be synchronized using
* VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT before
* using the results."
*
*/
/* We record barrier event sub commands to sync the compute job used for the
* copy query results program with transfer jobs to prevent an overlapping
* transfer job with the compute job.
*/
cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){
.type = PVR_EVENT_TYPE_BARRIER,
.barrier = {
.wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
.wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
},
};
result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
if (result != VK_SUCCESS)
return;
pvr_add_query_program(cmd_buffer, &query_info);
result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT);
if (result != VK_SUCCESS)
return;
cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){
.type = PVR_EVENT_TYPE_BARRIER,
.barrier = {
.wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
.wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
},
};
}
void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query,
VkQueryControlFlags flags)
{
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool);
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
/* Occlusion queries can't be nested. */
assert(!state->vis_test_enabled);
if (state->current_sub_cmd) {
assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
if (!state->current_sub_cmd->gfx.query_pool) {
state->current_sub_cmd->gfx.query_pool = pool;
} else if (state->current_sub_cmd->gfx.query_pool != pool) {
VkResult result;
/* Kick render. */
state->current_sub_cmd->gfx.barrier_store = true;
result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
if (result != VK_SUCCESS)
return;
result =
pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
if (result != VK_SUCCESS)
return;
/* Use existing render setup, but load color attachments from HW
* BGOBJ.
*/
state->current_sub_cmd->gfx.barrier_load = true;
state->current_sub_cmd->gfx.barrier_store = false;
state->current_sub_cmd->gfx.query_pool = pool;
}
}
state->query_pool = pool;
state->vis_test_enabled = true;
state->vis_reg = query;
state->dirty.vis_test = true;
/* Add the index to the list for this render. */
util_dynarray_append(&state->query_indices, __typeof__(query), query);
}
void pvr_CmdEndQuery(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query)
{
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
state->vis_test_enabled = false;
state->dirty.vis_test = true;
}