blob: 0263a1792c9b04035ab894dbf264b640f151d435 [file] [log] [blame]
/* Copyright (c) 2018-2024 The Khronos Group Inc.
* Copyright (c) 2018-2024 Valve Corporation
* Copyright (c) 2018-2024 LunarG, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "gpu/resources/gpuav_state_trackers.h"
#include "gpu/resources/gpuav_shader_resources.h"
#include "gpu/core/gpuav.h"
#include "gpu/core/gpuav_constants.h"
#include "gpu/descriptor_validation/gpuav_image_layout.h"
#include "gpu/descriptor_validation/gpuav_descriptor_validation.h"
#include "gpu/shaders/gpuav_error_header.h"
#include "gpu/debug_printf/debug_printf.h"
namespace gpuav {
Buffer::Buffer(ValidationStateTracker &dev_data, VkBuffer buff, const VkBufferCreateInfo *pCreateInfo, DescriptorHeap &desc_heap_)
: vvl::Buffer(dev_data, buff, pCreateInfo),
desc_heap(desc_heap_),
id(desc_heap.NextId(VulkanTypedHandle(buff, kVulkanObjectTypeBuffer))) {}
void Buffer::Destroy() {
desc_heap.DeleteId(id);
vvl::Buffer::Destroy();
}
void Buffer::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
desc_heap.DeleteId(id);
vvl::Buffer::NotifyInvalidate(invalid_nodes, unlink);
}
BufferView::BufferView(const std::shared_ptr<vvl::Buffer> &bf, VkBufferView bv, const VkBufferViewCreateInfo *ci,
VkFormatFeatureFlags2KHR buf_ff, DescriptorHeap &desc_heap_)
: vvl::BufferView(bf, bv, ci, buf_ff),
desc_heap(desc_heap_),
id(desc_heap.NextId(VulkanTypedHandle(bv, kVulkanObjectTypeBufferView))) {}
void BufferView::Destroy() {
desc_heap.DeleteId(id);
vvl::BufferView::Destroy();
}
void BufferView::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
desc_heap.DeleteId(id);
vvl::BufferView::NotifyInvalidate(invalid_nodes, unlink);
}
ImageView::ImageView(const std::shared_ptr<vvl::Image> &image_state, VkImageView iv, const VkImageViewCreateInfo *ci,
VkFormatFeatureFlags2KHR ff, const VkFilterCubicImageViewImageFormatPropertiesEXT &cubic_props,
DescriptorHeap &desc_heap_)
: vvl::ImageView(image_state, iv, ci, ff, cubic_props),
desc_heap(desc_heap_),
id(desc_heap.NextId(VulkanTypedHandle(iv, kVulkanObjectTypeImageView))) {}
void ImageView::Destroy() {
desc_heap.DeleteId(id);
vvl::ImageView::Destroy();
}
void ImageView::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
desc_heap.DeleteId(id);
vvl::ImageView::NotifyInvalidate(invalid_nodes, unlink);
}
Sampler::Sampler(const VkSampler s, const VkSamplerCreateInfo *pci, DescriptorHeap &desc_heap_)
: vvl::Sampler(s, pci), desc_heap(desc_heap_), id(desc_heap.NextId(VulkanTypedHandle(s, kVulkanObjectTypeSampler))) {}
void Sampler::Destroy() {
desc_heap.DeleteId(id);
vvl::Sampler::Destroy();
}
void Sampler::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
desc_heap.DeleteId(id);
vvl::Sampler::NotifyInvalidate(invalid_nodes, unlink);
}
AccelerationStructureKHR::AccelerationStructureKHR(VkAccelerationStructureKHR as, const VkAccelerationStructureCreateInfoKHR *ci,
std::shared_ptr<vvl::Buffer> &&buf_state, DescriptorHeap &desc_heap_)
: vvl::AccelerationStructureKHR(as, ci, std::move(buf_state)),
desc_heap(desc_heap_),
id(desc_heap.NextId(VulkanTypedHandle(as, kVulkanObjectTypeAccelerationStructureKHR))) {}
void AccelerationStructureKHR::Destroy() {
desc_heap.DeleteId(id);
vvl::AccelerationStructureKHR::Destroy();
}
void AccelerationStructureKHR::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
desc_heap.DeleteId(id);
vvl::AccelerationStructureKHR::NotifyInvalidate(invalid_nodes, unlink);
}
AccelerationStructureNV::AccelerationStructureNV(VkDevice device, VkAccelerationStructureNV as,
const VkAccelerationStructureCreateInfoNV *ci, DescriptorHeap &desc_heap_)
: vvl::AccelerationStructureNV(device, as, ci),
desc_heap(desc_heap_),
id(desc_heap.NextId(VulkanTypedHandle(as, kVulkanObjectTypeAccelerationStructureNV))) {}
void AccelerationStructureNV::Destroy() {
desc_heap.DeleteId(id);
vvl::AccelerationStructureNV::Destroy();
}
void AccelerationStructureNV::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
desc_heap.DeleteId(id);
vvl::AccelerationStructureNV::NotifyInvalidate(invalid_nodes, unlink);
}
CommandBuffer::CommandBuffer(Validator &gpuav, VkCommandBuffer handle, const VkCommandBufferAllocateInfo *pCreateInfo,
const vvl::CommandPool *pool)
: vvl::CommandBuffer(gpuav, handle, pCreateInfo, pool),
gpu_resources_manager(*gpuav.desc_set_manager_),
state_(gpuav),
error_output_buffer_(gpuav),
cmd_errors_counts_buffer_(gpuav),
bda_ranges_snapshot_(gpuav) {
Location loc(vvl::Func::vkAllocateCommandBuffers);
AllocateResources(loc);
}
static bool AllocateErrorLogsBuffer(Validator &gpuav, VkCommandBuffer command_buffer, vko::Buffer &error_output_buffer,
const Location &loc) {
VkBufferCreateInfo buffer_info = vku::InitStructHelper();
buffer_info.size = glsl::kErrorBufferByteSize;
buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo alloc_info = {};
alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
alloc_info.pool = gpuav.output_buffer_pool_;
const bool success = error_output_buffer.Create(loc, &buffer_info, &alloc_info);
if (!success) {
return false;
}
auto output_buffer_ptr = (uint32_t *)error_output_buffer.MapMemory(loc);
memset(output_buffer_ptr, 0, glsl::kErrorBufferByteSize);
if (gpuav.gpuav_settings.shader_instrumentation.descriptor_checks) {
output_buffer_ptr[cst::stream_output_flags_offset] = cst::inst_buffer_oob_enabled;
}
error_output_buffer.UnmapMemory();
return true;
}
void CommandBuffer::AllocateResources(const Location &loc) {
auto gpuav = static_cast<Validator *>(&dev_data);
VkResult result = VK_SUCCESS;
// Instrumentation descriptor set layout
if (instrumentation_desc_set_layout_ == VK_NULL_HANDLE) {
assert(!gpuav->instrumentation_bindings_.empty());
VkDescriptorSetLayoutCreateInfo instrumentation_desc_set_layout_ci = vku::InitStructHelper();
instrumentation_desc_set_layout_ci.bindingCount = static_cast<uint32_t>(gpuav->instrumentation_bindings_.size());
instrumentation_desc_set_layout_ci.pBindings = gpuav->instrumentation_bindings_.data();
result = DispatchCreateDescriptorSetLayout(gpuav->device, &instrumentation_desc_set_layout_ci, nullptr,
&instrumentation_desc_set_layout_);
if (result != VK_SUCCESS) {
gpuav->InternalError(gpuav->device, loc, "Unable to create instrumentation descriptor set layout.");
return;
}
}
// Error output buffer
if (!AllocateErrorLogsBuffer(*gpuav, VkHandle(), error_output_buffer_, loc)) {
return;
}
// Commands errors counts buffer
{
VkBufferCreateInfo buffer_info = vku::InitStructHelper();
buffer_info.size = GetCmdErrorsCountsBufferByteSize();
buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo alloc_info = {};
alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
alloc_info.pool = gpuav->output_buffer_pool_;
const bool success = cmd_errors_counts_buffer_.Create(loc, &buffer_info, &alloc_info);
if (!success) {
return;
}
ClearCmdErrorsCountsBuffer(loc);
if (gpuav->aborted_) return;
}
// BDA snapshot
if (gpuav->gpuav_settings.shader_instrumentation.buffer_device_address) {
VkBufferCreateInfo buffer_info = vku::InitStructHelper();
buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo alloc_info = {};
buffer_info.size = GetBdaRangesBufferByteSize();
// This buffer could be very large if an application uses many buffers. Allocating it as HOST_CACHED
// and manually flushing it at the end of the state updates is faster than using HOST_COHERENT.
alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
bool success = bda_ranges_snapshot_.Create(loc, &buffer_info, &alloc_info);
if (!success) {
return;
}
}
// Update validation commands common descriptor set
{
const std::vector<VkDescriptorSetLayoutBinding> validation_cmd_bindings = {
// Error output buffer
{glsl::kBindingDiagErrorBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr},
// Buffer holding action command index in command buffer
{glsl::kBindingDiagActionIndex, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_ALL, nullptr},
// Buffer holding a resource index from the per command buffer command resources list
{glsl::kBindingDiagCmdResourceIndex, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_ALL, nullptr},
// Commands errors counts buffer
{glsl::kBindingDiagCmdErrorsCount, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr},
};
if (error_logging_desc_set_layout_ == VK_NULL_HANDLE) {
VkDescriptorSetLayoutCreateInfo validation_cmd_desc_set_layout_ci = vku::InitStructHelper();
validation_cmd_desc_set_layout_ci.bindingCount = static_cast<uint32_t>(validation_cmd_bindings.size());
validation_cmd_desc_set_layout_ci.pBindings = validation_cmd_bindings.data();
result = DispatchCreateDescriptorSetLayout(gpuav->device, &validation_cmd_desc_set_layout_ci, nullptr,
&error_logging_desc_set_layout_);
if (result != VK_SUCCESS) {
gpuav->InternalError(gpuav->device, loc, "Unable to create descriptor set layout used for validation commands.");
return;
}
}
assert(validation_cmd_desc_pool_ == VK_NULL_HANDLE);
assert(error_logging_desc_set_ == VK_NULL_HANDLE);
result = gpuav->desc_set_manager_->GetDescriptorSet(&validation_cmd_desc_pool_, error_logging_desc_set_layout_,
&error_logging_desc_set_);
if (result != VK_SUCCESS) {
gpuav->InternalError(gpuav->device, loc, "Unable to create descriptor set used for validation commands.");
return;
}
std::array<VkWriteDescriptorSet, 4> validation_cmd_descriptor_writes = {};
assert(validation_cmd_bindings.size() == validation_cmd_descriptor_writes.size());
VkDescriptorBufferInfo error_output_buffer_desc_info = {};
assert(!error_output_buffer_.IsDestroyed());
error_output_buffer_desc_info.buffer = error_output_buffer_.VkHandle();
error_output_buffer_desc_info.offset = 0;
error_output_buffer_desc_info.range = VK_WHOLE_SIZE;
validation_cmd_descriptor_writes[0] = vku::InitStructHelper();
validation_cmd_descriptor_writes[0].dstBinding = glsl::kBindingDiagErrorBuffer;
validation_cmd_descriptor_writes[0].descriptorCount = 1;
validation_cmd_descriptor_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
validation_cmd_descriptor_writes[0].pBufferInfo = &error_output_buffer_desc_info;
validation_cmd_descriptor_writes[0].dstSet = GetErrorLoggingDescSet();
VkDescriptorBufferInfo cmd_indices_buffer_desc_info = {};
assert(!gpuav->indices_buffer_.IsDestroyed());
cmd_indices_buffer_desc_info.buffer = gpuav->indices_buffer_.VkHandle();
cmd_indices_buffer_desc_info.offset = 0;
cmd_indices_buffer_desc_info.range = sizeof(uint32_t);
validation_cmd_descriptor_writes[1] = vku::InitStructHelper();
validation_cmd_descriptor_writes[1].dstBinding = glsl::kBindingDiagActionIndex;
validation_cmd_descriptor_writes[1].descriptorCount = 1;
validation_cmd_descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
validation_cmd_descriptor_writes[1].pBufferInfo = &cmd_indices_buffer_desc_info;
validation_cmd_descriptor_writes[1].dstSet = GetErrorLoggingDescSet();
validation_cmd_descriptor_writes[2] = validation_cmd_descriptor_writes[1];
validation_cmd_descriptor_writes[2].dstBinding = glsl::kBindingDiagCmdResourceIndex;
VkDescriptorBufferInfo cmd_errors_count_buffer_desc_info = {};
cmd_errors_count_buffer_desc_info.buffer = GetCmdErrorsCountsBuffer();
cmd_errors_count_buffer_desc_info.offset = 0;
cmd_errors_count_buffer_desc_info.range = VK_WHOLE_SIZE;
validation_cmd_descriptor_writes[3] = vku::InitStructHelper();
validation_cmd_descriptor_writes[3].dstBinding = glsl::kBindingDiagCmdErrorsCount;
validation_cmd_descriptor_writes[3].descriptorCount = 1;
validation_cmd_descriptor_writes[3].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
validation_cmd_descriptor_writes[3].pBufferInfo = &cmd_errors_count_buffer_desc_info;
validation_cmd_descriptor_writes[3].dstSet = GetErrorLoggingDescSet();
DispatchUpdateDescriptorSets(gpuav->device, static_cast<uint32_t>(validation_cmd_descriptor_writes.size()),
validation_cmd_descriptor_writes.data(), 0, NULL);
}
}
bool CommandBuffer::UpdateBdaRangesBuffer(const Location &loc) {
auto gpuav = static_cast<Validator *>(&dev_data);
// By supplying a "date"
if (!gpuav->gpuav_settings.shader_instrumentation.buffer_device_address ||
bda_ranges_snapshot_version_ == gpuav->buffer_device_address_ranges_version) {
return true;
}
// Update buffer device address table
// ---
auto bda_table_ptr = (VkDeviceAddress *)bda_ranges_snapshot_.MapMemory(loc);
// Buffer device address table layout
// Ranges are sorted from low to high, and do not overlap
// QWord 0 | Number of *ranges* (1 range occupies 2 QWords)
// QWord 1 | Range 1 begin
// QWord 2 | Range 1 end
// QWord 3 | Range 2 begin
// QWord 4 | Range 2 end
// QWord 5 | ...
const size_t max_recordable_ranges =
static_cast<size_t>((GetBdaRangesBufferByteSize() - sizeof(uint64_t)) / (2 * sizeof(VkDeviceAddress)));
auto bda_ranges = reinterpret_cast<ValidationStateTracker::BufferAddressRange *>(bda_table_ptr + 1);
const auto [ranges_to_update_count, total_address_ranges_count] =
gpuav->GetBufferAddressRanges(bda_ranges, max_recordable_ranges);
bda_table_ptr[0] = ranges_to_update_count;
if (total_address_ranges_count > size_t(gpuav->gpuav_settings.max_bda_in_use)) {
std::ostringstream problem_string;
problem_string << "Number of buffer device addresses ranges in use (" << total_address_ranges_count
<< ") is greater than khronos_validation.gpuav_max_buffer_device_addresses ("
<< gpuav->gpuav_settings.max_bda_in_use
<< "). Truncating buffer device address table could result in invalid validation.";
gpuav->InternalError(gpuav->device, loc, problem_string.str().c_str());
return false;
}
// Post update cleanups
// ---
// Flush the BDA buffer before un-mapping so that the new state is visible to the GPU
bda_ranges_snapshot_.FlushAllocation(loc);
bda_ranges_snapshot_.UnmapMemory();
bda_ranges_snapshot_version_ = gpuav->buffer_device_address_ranges_version;
return true;
}
VkDeviceSize CommandBuffer::GetBdaRangesBufferByteSize() const {
auto gpuav = static_cast<Validator *>(&dev_data);
return (1 // 1 QWORD for the number of address ranges
+ 2 * gpuav->gpuav_settings.max_bda_in_use // 2 QWORDS per address range
) *
8;
}
CommandBuffer::~CommandBuffer() { Destroy(); }
void CommandBuffer::Destroy() {
{
auto guard = WriteLock();
ResetCBState();
}
vvl::CommandBuffer::Destroy();
}
void CommandBuffer::Reset(const Location &loc) {
vvl::CommandBuffer::Reset(loc);
ResetCBState();
// TODO: Calling AllocateResources in Reset like so is a kind of a hack,
// relying on CommandBuffer internal logic to work.
// Tried to call it in ResetCBState, hang on command buffer mutex :/
AllocateResources(loc);
}
void CommandBuffer::ResetCBState() {
auto gpuav = static_cast<Validator *>(&dev_data);
// Free the device memory and descriptor set(s) associated with a command buffer.
for (DebugPrintfBufferInfo &printf_buffer_info : debug_printf_buffer_infos) {
printf_buffer_info.output_mem_buffer.Destroy();
}
debug_printf_buffer_infos.clear();
// Free the device memory and descriptor set(s) associated with a command buffer.
gpu_resources_manager.DestroyResources();
per_command_error_loggers.clear();
for (DescriptorCommandBinding &descriptor_command_binding : descriptor_command_bindings) {
descriptor_command_binding.descritpor_state_ssbo_buffer.Destroy();
descriptor_command_binding.post_process_ssbo_buffer.Destroy();
}
descriptor_command_bindings.clear();
action_command_snapshots.clear();
descriptor_indexing_buffer = VK_NULL_HANDLE;
post_process_buffer_lut = VK_NULL_HANDLE;
error_output_buffer_.Destroy();
cmd_errors_counts_buffer_.Destroy();
bda_ranges_snapshot_.Destroy();
bda_ranges_snapshot_version_ = 0;
if (validation_cmd_desc_pool_ != VK_NULL_HANDLE && error_logging_desc_set_ != VK_NULL_HANDLE) {
gpuav->desc_set_manager_->PutBackDescriptorSet(validation_cmd_desc_pool_, error_logging_desc_set_);
validation_cmd_desc_pool_ = VK_NULL_HANDLE;
error_logging_desc_set_ = VK_NULL_HANDLE;
}
if (instrumentation_desc_set_layout_ != VK_NULL_HANDLE) {
DispatchDestroyDescriptorSetLayout(gpuav->device, instrumentation_desc_set_layout_, nullptr);
instrumentation_desc_set_layout_ = VK_NULL_HANDLE;
}
if (error_logging_desc_set_layout_ != VK_NULL_HANDLE) {
DispatchDestroyDescriptorSetLayout(gpuav->device, error_logging_desc_set_layout_, nullptr);
error_logging_desc_set_layout_ = VK_NULL_HANDLE;
}
draw_index = 0;
compute_index = 0;
trace_rays_index = 0;
action_command_count = 0;
}
void CommandBuffer::ClearCmdErrorsCountsBuffer(const Location &loc) const {
auto cmd_errors_counts_buffer_ptr = (uint32_t *)cmd_errors_counts_buffer_.MapMemory(loc);
std::memset(cmd_errors_counts_buffer_ptr, 0, static_cast<size_t>(GetCmdErrorsCountsBufferByteSize()));
cmd_errors_counts_buffer_.UnmapMemory();
}
void CommandBuffer::IncrementCommandCount(VkPipelineBindPoint bind_point) {
action_command_count++;
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
draw_index++;
} else if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) {
compute_index++;
} else if (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) {
trace_rays_index++;
}
}
std::string CommandBuffer::GetDebugLabelRegion(uint32_t label_command_i,
const std::vector<std::string> &initial_label_stack) const {
std::string debug_region_name;
if (label_command_i != vvl::kU32Max) {
debug_region_name = GetDebugRegionName(GetLabelCommands(), label_command_i, initial_label_stack);
} else {
// label_command_i == vvl::kU32Max => when the instrumented command was recorded,
// no debug label region was yet opened in the corresponding command buffer,
// but still a region might have been started in another previously submitted
// command buffer. So just compute region name from initial_label_stack.
for (const std::string &label_name : initial_label_stack) {
if (!debug_region_name.empty()) {
debug_region_name += "::";
}
debug_region_name += label_name;
}
}
return debug_region_name;
}
bool CommandBuffer::PreProcess(const Location &loc) {
auto gpuav = static_cast<Validator *>(&dev_data);
bool succeeded = descriptor::UpdateDescriptorStateSSBO(*gpuav, *this, loc);
if (!succeeded) {
return false;
}
succeeded = UpdateBdaRangesBuffer(loc);
if (!succeeded) {
return false;
}
return !per_command_error_loggers.empty() || has_build_as_cmd;
}
bool CommandBuffer::NeedsPostProcess() { return !error_output_buffer_.IsDestroyed(); }
// For the given command buffer, map its debug data buffers and read their contents for analysis.
void CommandBuffer::PostProcess(VkQueue queue, const std::vector<std::string> &initial_label_stack, const Location &loc) {
auto gpuav = static_cast<Validator *>(&dev_data);
// For the given command buffer, map its debug data buffers and read their contents for analysis.
for (DebugPrintfBufferInfo &printf_buffer_info : debug_printf_buffer_infos) {
auto printf_output_ptr = (char *)printf_buffer_info.output_mem_buffer.MapMemory(loc);
debug_printf::AnalyzeAndGenerateMessage(*gpuav, VkHandle(), queue, printf_buffer_info, (uint32_t *)printf_output_ptr, loc);
printf_buffer_info.output_mem_buffer.UnmapMemory();
}
// CommandBuffer::Destroy can happen on an other thread,
// so when getting here after acquiring command buffer's lock,
// make sure there are still things to process
if (!NeedsPostProcess()) {
return;
}
bool skip = false;
{
auto error_output_buffer_ptr = (uint32_t *)error_output_buffer_.MapMemory(loc);
// The second word in the debug output buffer is the number of words that would have
// been written by the shader instrumentation, if there was enough room in the buffer we provided.
// The number of words actually written by the shaders is determined by the size of the buffer
// we provide via the descriptor. So, we process only the number of words that can fit in the
// buffer.
const uint32_t total_words = error_output_buffer_ptr[cst::stream_output_size_offset];
// A zero here means that the shader instrumentation didn't write anything.
if (total_words != 0) {
uint32_t *const error_records_start = &error_output_buffer_ptr[cst::stream_output_data_offset];
assert(glsl::kErrorBufferByteSize > cst::stream_output_data_offset);
uint32_t *const error_records_end =
error_output_buffer_ptr + (glsl::kErrorBufferByteSize - cst::stream_output_data_offset);
uint32_t *error_record_ptr = error_records_start;
uint32_t record_size = error_record_ptr[glsl::kHeaderErrorRecordSizeOffset];
assert(record_size == glsl::kErrorRecordSize);
while (record_size > 0 && (error_record_ptr + record_size) <= error_records_end) {
const uint32_t error_logger_i = error_record_ptr[glsl::kHeaderCommandResourceIdOffset];
assert(error_logger_i < per_command_error_loggers.size());
auto &error_logger = per_command_error_loggers[error_logger_i];
const LogObjectList objlist(queue, VkHandle());
skip |= error_logger(*gpuav, *this, error_record_ptr, objlist, initial_label_stack);
// Next record
error_record_ptr += record_size;
record_size = error_record_ptr[glsl::kHeaderErrorRecordSizeOffset];
}
// Clear the written size and any error messages. Note that this preserves the first word, which contains flags.
assert(glsl::kErrorBufferByteSize > cst::stream_output_data_offset);
memset(&error_output_buffer_ptr[cst::stream_output_data_offset], 0,
glsl::kErrorBufferByteSize - cst::stream_output_data_offset * sizeof(uint32_t));
}
error_output_buffer_ptr[cst::stream_output_size_offset] = 0;
error_output_buffer_.UnmapMemory();
}
ClearCmdErrorsCountsBuffer(loc);
if (gpuav->aborted_) return;
// If instrumentation found an error, skip post processing. Errors detected by instrumentation are usually
// very serious, such as a prematurely destroyed resource and the state needed below is likely invalid.
bool gpuav_success = false;
if (!skip && gpuav->gpuav_settings.shader_instrumentation.post_process_descriptor_index) {
gpuav_success = ValidateBindlessDescriptorSets(loc);
}
if (gpuav_success) {
UpdateCmdBufImageLayouts(state_, *this);
}
}
Queue::Queue(Validator &gpuav, VkQueue q, uint32_t family_index, uint32_t queue_index, VkDeviceQueueCreateFlags flags,
const VkQueueFamilyProperties &queueFamilyProperties, bool timeline_khr)
: vvl::Queue(gpuav, q, family_index, queue_index, flags, queueFamilyProperties), state_(gpuav), timeline_khr_(timeline_khr) {}
Queue::~Queue() {
if (barrier_command_buffer_) {
DispatchFreeCommandBuffers(state_.device, barrier_command_pool_, 1, &barrier_command_buffer_);
barrier_command_buffer_ = VK_NULL_HANDLE;
}
if (barrier_command_pool_) {
DispatchDestroyCommandPool(state_.device, barrier_command_pool_, nullptr);
barrier_command_pool_ = VK_NULL_HANDLE;
}
if (barrier_sem_) {
DispatchDestroySemaphore(state_.device, barrier_sem_, nullptr);
barrier_sem_ = VK_NULL_HANDLE;
}
}
// Submit a memory barrier on graphics queues.
// Lazy-create and record the needed command buffer.
void Queue::SubmitBarrier(const Location &loc, uint64_t seq) {
if (barrier_command_pool_ == VK_NULL_HANDLE) {
VkResult result = VK_SUCCESS;
VkCommandPoolCreateInfo pool_create_info = vku::InitStructHelper();
pool_create_info.queueFamilyIndex = queue_family_index;
result = DispatchCreateCommandPool(state_.device, &pool_create_info, nullptr, &barrier_command_pool_);
if (result != VK_SUCCESS) {
state_.InternalError(vvl::Queue::VkHandle(), loc, "Unable to create command pool for barrier CB.");
barrier_command_pool_ = VK_NULL_HANDLE;
return;
}
VkCommandBufferAllocateInfo buffer_alloc_info = vku::InitStructHelper();
buffer_alloc_info.commandPool = barrier_command_pool_;
buffer_alloc_info.commandBufferCount = 1;
buffer_alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
result = DispatchAllocateCommandBuffers(state_.device, &buffer_alloc_info, &barrier_command_buffer_);
if (result != VK_SUCCESS) {
state_.InternalError(vvl::Queue::VkHandle(), loc, "Unable to create barrier command buffer.");
DispatchDestroyCommandPool(state_.device, barrier_command_pool_, nullptr);
barrier_command_pool_ = VK_NULL_HANDLE;
barrier_command_buffer_ = VK_NULL_HANDLE;
return;
}
VkSemaphoreTypeCreateInfo semaphore_type_create_info = vku::InitStructHelper();
semaphore_type_create_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;
semaphore_type_create_info.initialValue = 0;
VkSemaphoreCreateInfo semaphore_create_info = vku::InitStructHelper(&semaphore_type_create_info);
result = DispatchCreateSemaphore(state_.device, &semaphore_create_info, nullptr, &barrier_sem_);
if (result != VK_SUCCESS) {
state_.InternalError(state_.device, loc, "Unable to create barrier semaphore.");
DispatchDestroyCommandPool(state_.device, barrier_command_pool_, nullptr);
barrier_command_pool_ = VK_NULL_HANDLE;
barrier_command_buffer_ = VK_NULL_HANDLE;
return;
}
// Hook up command buffer dispatch
state_.vk_set_device_loader_data_(state_.device, barrier_command_buffer_);
// Record a global memory barrier to force availability of device memory operations to the host domain.
VkCommandBufferBeginInfo barrier_cmd_buffer_begin_info = vku::InitStructHelper();
barrier_cmd_buffer_begin_info.flags |= VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
result = DispatchBeginCommandBuffer(barrier_command_buffer_, &barrier_cmd_buffer_begin_info);
if (result == VK_SUCCESS) {
VkMemoryBarrier memory_barrier = vku::InitStructHelper();
memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
memory_barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
DispatchCmdPipelineBarrier(barrier_command_buffer_, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0,
1, &memory_barrier, 0, nullptr, 0, nullptr);
DispatchEndCommandBuffer(barrier_command_buffer_);
}
}
if (barrier_command_buffer_ != VK_NULL_HANDLE) {
VkTimelineSemaphoreSubmitInfo timeline_semaphore_submit_info = vku::InitStructHelper();
timeline_semaphore_submit_info.signalSemaphoreValueCount = 1;
timeline_semaphore_submit_info.pSignalSemaphoreValues = &seq;
VkSubmitInfo submit_info = vku::InitStructHelper(&timeline_semaphore_submit_info);
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &barrier_command_buffer_;
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &barrier_sem_;
DispatchQueueSubmit(vvl::Queue::VkHandle(), 1, &submit_info, VK_NULL_HANDLE);
}
}
vvl::PreSubmitResult Queue::PreSubmit(std::vector<vvl::QueueSubmission> &&submissions) {
for (const auto &submission : submissions) {
auto loc = submission.loc.Get();
for (auto &cb_submission : submission.cb_submissions) {
auto gpu_cb = std::static_pointer_cast<CommandBuffer>(cb_submission.cb);
auto guard = gpu_cb->ReadLock();
gpu_cb->PreProcess(loc);
for (auto *secondary_cb : gpu_cb->linkedCommandBuffers) {
auto secondary_guard = secondary_cb->ReadLock();
auto *secondary_gpu_cb = static_cast<CommandBuffer *>(secondary_cb);
secondary_gpu_cb->PreProcess(loc);
}
}
}
return vvl::Queue::PreSubmit(std::move(submissions));
}
void Queue::PostSubmit(vvl::QueueSubmission &submission) {
vvl::Queue::PostSubmit(submission);
if (submission.end_batch) {
auto loc = submission.loc.Get();
SubmitBarrier(loc, submission.seq);
}
}
void Queue::Retire(vvl::QueueSubmission &submission) {
vvl::Queue::Retire(submission);
if (submission.loc.Get().function == vvl::Func::vkQueuePresentKHR) {
// Present batch does not have any GPU-AV work to post process, skip it.
// This is also needed for correctness. QueuePresent does not have a PostSubmit call
// that signals barrier_sem_. The following timeline wait must not be called.
return;
}
retiring_.emplace_back(submission.cb_submissions);
if (submission.end_batch) {
VkSemaphoreWaitInfo wait_info = vku::InitStructHelper();
wait_info.semaphoreCount = 1;
wait_info.pSemaphores = &barrier_sem_;
wait_info.pValues = &submission.seq;
if (timeline_khr_) {
DispatchWaitSemaphoresKHR(state_.device, &wait_info, 1'000'000'000);
} else {
DispatchWaitSemaphores(state_.device, &wait_info, 1'000'000'000);
}
for (std::vector<vvl::CommandBufferSubmission> &cb_submissions : retiring_) {
for (vvl::CommandBufferSubmission &cb_submission : cb_submissions) {
auto gpu_cb = std::static_pointer_cast<CommandBuffer>(cb_submission.cb);
auto guard = gpu_cb->WriteLock();
auto loc = submission.loc.Get();
gpu_cb->PostProcess(VkHandle(), cb_submission.initial_label_stack, loc);
for (vvl::CommandBuffer *secondary_cb : gpu_cb->linkedCommandBuffers) {
auto secondary_gpu_cb = static_cast<CommandBuffer *>(secondary_cb);
auto secondary_guard = secondary_gpu_cb->WriteLock();
secondary_gpu_cb->PostProcess(VkHandle(), cb_submission.initial_label_stack, loc);
}
}
}
retiring_.clear();
}
}
} // namespace gpuav