layers/gpu/resources/gpuav_state_trackers.cpp - third_party/Vulkan-ValidationLayers - Git at Google

 /* Copyright (c) 2018-2024 The Khronos Group Inc.
  * Copyright (c) 2018-2024 Valve Corporation
  * Copyright (c) 2018-2024 LunarG, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "gpu/resources/gpuav_state_trackers.h"

 #include "gpu/resources/gpuav_shader_resources.h"
 #include "gpu/core/gpuav.h"
 #include "gpu/core/gpuav_constants.h"
 #include "gpu/descriptor_validation/gpuav_image_layout.h"
 #include "gpu/descriptor_validation/gpuav_descriptor_validation.h"
 #include "gpu/shaders/gpuav_error_header.h"
 #include "gpu/debug_printf/debug_printf.h"

 namespace gpuav {

 Buffer::Buffer(ValidationStateTracker &dev_data, VkBuffer buff, const VkBufferCreateInfo *pCreateInfo, DescriptorHeap &desc_heap_)
     : vvl::Buffer(dev_data, buff, pCreateInfo),
       desc_heap(desc_heap_),
       id(desc_heap.NextId(VulkanTypedHandle(buff, kVulkanObjectTypeBuffer))) {}

 void Buffer::Destroy() {
     desc_heap.DeleteId(id);
     vvl::Buffer::Destroy();
 }

 void Buffer::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
     desc_heap.DeleteId(id);
     vvl::Buffer::NotifyInvalidate(invalid_nodes, unlink);
 }

 BufferView::BufferView(const std::shared_ptr<vvl::Buffer> &bf, VkBufferView bv, const VkBufferViewCreateInfo *ci,
                        VkFormatFeatureFlags2KHR buf_ff, DescriptorHeap &desc_heap_)
     : vvl::BufferView(bf, bv, ci, buf_ff),
       desc_heap(desc_heap_),
       id(desc_heap.NextId(VulkanTypedHandle(bv, kVulkanObjectTypeBufferView))) {}

 void BufferView::Destroy() {
     desc_heap.DeleteId(id);
     vvl::BufferView::Destroy();
 }

 void BufferView::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
     desc_heap.DeleteId(id);
     vvl::BufferView::NotifyInvalidate(invalid_nodes, unlink);
 }

 ImageView::ImageView(const std::shared_ptr<vvl::Image> &image_state, VkImageView iv, const VkImageViewCreateInfo *ci,
                      VkFormatFeatureFlags2KHR ff, const VkFilterCubicImageViewImageFormatPropertiesEXT &cubic_props,
                      DescriptorHeap &desc_heap_)
     : vvl::ImageView(image_state, iv, ci, ff, cubic_props),
       desc_heap(desc_heap_),
       id(desc_heap.NextId(VulkanTypedHandle(iv, kVulkanObjectTypeImageView))) {}

 void ImageView::Destroy() {
     desc_heap.DeleteId(id);
     vvl::ImageView::Destroy();
 }

 void ImageView::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
     desc_heap.DeleteId(id);
     vvl::ImageView::NotifyInvalidate(invalid_nodes, unlink);
 }

 Sampler::Sampler(const VkSampler s, const VkSamplerCreateInfo *pci, DescriptorHeap &desc_heap_)
     : vvl::Sampler(s, pci), desc_heap(desc_heap_), id(desc_heap.NextId(VulkanTypedHandle(s, kVulkanObjectTypeSampler))) {}

 void Sampler::Destroy() {
     desc_heap.DeleteId(id);
     vvl::Sampler::Destroy();
 }

 void Sampler::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
     desc_heap.DeleteId(id);
     vvl::Sampler::NotifyInvalidate(invalid_nodes, unlink);
 }

 AccelerationStructureKHR::AccelerationStructureKHR(VkAccelerationStructureKHR as, const VkAccelerationStructureCreateInfoKHR *ci,
                                                    std::shared_ptr<vvl::Buffer> &&buf_state, DescriptorHeap &desc_heap_)
     : vvl::AccelerationStructureKHR(as, ci, std::move(buf_state)),
       desc_heap(desc_heap_),
       id(desc_heap.NextId(VulkanTypedHandle(as, kVulkanObjectTypeAccelerationStructureKHR))) {}

 void AccelerationStructureKHR::Destroy() {
     desc_heap.DeleteId(id);
     vvl::AccelerationStructureKHR::Destroy();
 }

 void AccelerationStructureKHR::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
     desc_heap.DeleteId(id);
     vvl::AccelerationStructureKHR::NotifyInvalidate(invalid_nodes, unlink);
 }

 AccelerationStructureNV::AccelerationStructureNV(VkDevice device, VkAccelerationStructureNV as,
                                                  const VkAccelerationStructureCreateInfoNV *ci, DescriptorHeap &desc_heap_)
     : vvl::AccelerationStructureNV(device, as, ci),
       desc_heap(desc_heap_),
       id(desc_heap.NextId(VulkanTypedHandle(as, kVulkanObjectTypeAccelerationStructureNV))) {}

 void AccelerationStructureNV::Destroy() {
     desc_heap.DeleteId(id);
     vvl::AccelerationStructureNV::Destroy();
 }

 void AccelerationStructureNV::NotifyInvalidate(const NodeList &invalid_nodes, bool unlink) {
     desc_heap.DeleteId(id);
     vvl::AccelerationStructureNV::NotifyInvalidate(invalid_nodes, unlink);
 }

 CommandBuffer::CommandBuffer(Validator &gpuav, VkCommandBuffer handle, const VkCommandBufferAllocateInfo *pCreateInfo,
                              const vvl::CommandPool *pool)
     : vvl::CommandBuffer(gpuav, handle, pCreateInfo, pool),
       gpu_resources_manager(*gpuav.desc_set_manager_),
       state_(gpuav),
       error_output_buffer_(gpuav),
       cmd_errors_counts_buffer_(gpuav),
       bda_ranges_snapshot_(gpuav) {
     Location loc(vvl::Func::vkAllocateCommandBuffers);
     AllocateResources(loc);
 }

 static bool AllocateErrorLogsBuffer(Validator &gpuav, VkCommandBuffer command_buffer, vko::Buffer &error_output_buffer,
                                     const Location &loc) {
     VkBufferCreateInfo buffer_info = vku::InitStructHelper();
     buffer_info.size = glsl::kErrorBufferByteSize;
     buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
     VmaAllocationCreateInfo alloc_info = {};
     alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
     alloc_info.pool = gpuav.output_buffer_pool_;
     const bool success = error_output_buffer.Create(loc, &buffer_info, &alloc_info);
     if (!success) {
         return false;
     }

     auto output_buffer_ptr = (uint32_t *)error_output_buffer.MapMemory(loc);

     memset(output_buffer_ptr, 0, glsl::kErrorBufferByteSize);
     if (gpuav.gpuav_settings.shader_instrumentation.descriptor_checks) {
         output_buffer_ptr[cst::stream_output_flags_offset] = cst::inst_buffer_oob_enabled;
     }

     error_output_buffer.UnmapMemory();
     return true;
 }

 void CommandBuffer::AllocateResources(const Location &loc) {
     auto gpuav = static_cast<Validator *>(&dev_data);

     VkResult result = VK_SUCCESS;

     // Instrumentation descriptor set layout
     if (instrumentation_desc_set_layout_ == VK_NULL_HANDLE) {
         assert(!gpuav->instrumentation_bindings_.empty());
         VkDescriptorSetLayoutCreateInfo instrumentation_desc_set_layout_ci = vku::InitStructHelper();
         instrumentation_desc_set_layout_ci.bindingCount = static_cast<uint32_t>(gpuav->instrumentation_bindings_.size());
         instrumentation_desc_set_layout_ci.pBindings = gpuav->instrumentation_bindings_.data();
         result = DispatchCreateDescriptorSetLayout(gpuav->device, &instrumentation_desc_set_layout_ci, nullptr,
                                                    &instrumentation_desc_set_layout_);
         if (result != VK_SUCCESS) {
             gpuav->InternalError(gpuav->device, loc, "Unable to create instrumentation descriptor set layout.");
             return;
         }
     }

     // Error output buffer
     if (!AllocateErrorLogsBuffer(*gpuav, VkHandle(), error_output_buffer_, loc)) {
         return;
     }

     // Commands errors counts buffer
     {
         VkBufferCreateInfo buffer_info = vku::InitStructHelper();
         buffer_info.size = GetCmdErrorsCountsBufferByteSize();
         buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
         VmaAllocationCreateInfo alloc_info = {};
         alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
         alloc_info.pool = gpuav->output_buffer_pool_;
         const bool success = cmd_errors_counts_buffer_.Create(loc, &buffer_info, &alloc_info);
         if (!success) {
             return;
         }

         ClearCmdErrorsCountsBuffer(loc);
         if (gpuav->aborted_) return;
     }

     // BDA snapshot
     if (gpuav->gpuav_settings.shader_instrumentation.buffer_device_address) {
         VkBufferCreateInfo buffer_info = vku::InitStructHelper();
         buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
         VmaAllocationCreateInfo alloc_info = {};
         buffer_info.size = GetBdaRangesBufferByteSize();
         // This buffer could be very large if an application uses many buffers. Allocating it as HOST_CACHED
         // and manually flushing it at the end of the state updates is faster than using HOST_COHERENT.
         alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
         bool success = bda_ranges_snapshot_.Create(loc, &buffer_info, &alloc_info);
         if (!success) {
             return;
         }
     }

     // Update validation commands common descriptor set
     {
         const std::vector<VkDescriptorSetLayoutBinding> validation_cmd_bindings = {
             // Error output buffer
             {glsl::kBindingDiagErrorBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr},
             // Buffer holding action command index in command buffer
             {glsl::kBindingDiagActionIndex, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_ALL, nullptr},
             // Buffer holding a resource index from the per command buffer command resources list
             {glsl::kBindingDiagCmdResourceIndex, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_ALL, nullptr},
             // Commands errors counts buffer
             {glsl::kBindingDiagCmdErrorsCount, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL, nullptr},
         };

         if (error_logging_desc_set_layout_ == VK_NULL_HANDLE) {
             VkDescriptorSetLayoutCreateInfo validation_cmd_desc_set_layout_ci = vku::InitStructHelper();
             validation_cmd_desc_set_layout_ci.bindingCount = static_cast<uint32_t>(validation_cmd_bindings.size());
             validation_cmd_desc_set_layout_ci.pBindings = validation_cmd_bindings.data();
             result = DispatchCreateDescriptorSetLayout(gpuav->device, &validation_cmd_desc_set_layout_ci, nullptr,
                                                        &error_logging_desc_set_layout_);
             if (result != VK_SUCCESS) {
                 gpuav->InternalError(gpuav->device, loc, "Unable to create descriptor set layout used for validation commands.");
                 return;
             }
         }

         assert(validation_cmd_desc_pool_ == VK_NULL_HANDLE);
         assert(error_logging_desc_set_ == VK_NULL_HANDLE);
         result = gpuav->desc_set_manager_->GetDescriptorSet(&validation_cmd_desc_pool_, error_logging_desc_set_layout_,
                                                             &error_logging_desc_set_);
         if (result != VK_SUCCESS) {
             gpuav->InternalError(gpuav->device, loc, "Unable to create descriptor set used for validation commands.");
             return;
         }

         std::array<VkWriteDescriptorSet, 4> validation_cmd_descriptor_writes = {};
         assert(validation_cmd_bindings.size() == validation_cmd_descriptor_writes.size());

         VkDescriptorBufferInfo error_output_buffer_desc_info = {};

         assert(!error_output_buffer_.IsDestroyed());
         error_output_buffer_desc_info.buffer = error_output_buffer_.VkHandle();
         error_output_buffer_desc_info.offset = 0;
         error_output_buffer_desc_info.range = VK_WHOLE_SIZE;

         validation_cmd_descriptor_writes[0] = vku::InitStructHelper();
         validation_cmd_descriptor_writes[0].dstBinding = glsl::kBindingDiagErrorBuffer;
         validation_cmd_descriptor_writes[0].descriptorCount = 1;
         validation_cmd_descriptor_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
         validation_cmd_descriptor_writes[0].pBufferInfo = &error_output_buffer_desc_info;
         validation_cmd_descriptor_writes[0].dstSet = GetErrorLoggingDescSet();

         VkDescriptorBufferInfo cmd_indices_buffer_desc_info = {};

         assert(!gpuav->indices_buffer_.IsDestroyed());
         cmd_indices_buffer_desc_info.buffer = gpuav->indices_buffer_.VkHandle();
         cmd_indices_buffer_desc_info.offset = 0;
         cmd_indices_buffer_desc_info.range = sizeof(uint32_t);

         validation_cmd_descriptor_writes[1] = vku::InitStructHelper();
         validation_cmd_descriptor_writes[1].dstBinding = glsl::kBindingDiagActionIndex;
         validation_cmd_descriptor_writes[1].descriptorCount = 1;
         validation_cmd_descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
         validation_cmd_descriptor_writes[1].pBufferInfo = &cmd_indices_buffer_desc_info;
         validation_cmd_descriptor_writes[1].dstSet = GetErrorLoggingDescSet();

         validation_cmd_descriptor_writes[2] = validation_cmd_descriptor_writes[1];
         validation_cmd_descriptor_writes[2].dstBinding = glsl::kBindingDiagCmdResourceIndex;

         VkDescriptorBufferInfo cmd_errors_count_buffer_desc_info = {};
         cmd_errors_count_buffer_desc_info.buffer = GetCmdErrorsCountsBuffer();
         cmd_errors_count_buffer_desc_info.offset = 0;
         cmd_errors_count_buffer_desc_info.range = VK_WHOLE_SIZE;

         validation_cmd_descriptor_writes[3] = vku::InitStructHelper();
         validation_cmd_descriptor_writes[3].dstBinding = glsl::kBindingDiagCmdErrorsCount;
         validation_cmd_descriptor_writes[3].descriptorCount = 1;
         validation_cmd_descriptor_writes[3].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
         validation_cmd_descriptor_writes[3].pBufferInfo = &cmd_errors_count_buffer_desc_info;
         validation_cmd_descriptor_writes[3].dstSet = GetErrorLoggingDescSet();

         DispatchUpdateDescriptorSets(gpuav->device, static_cast<uint32_t>(validation_cmd_descriptor_writes.size()),
                                      validation_cmd_descriptor_writes.data(), 0, NULL);
     }
 }

 bool CommandBuffer::UpdateBdaRangesBuffer(const Location &loc) {
     auto gpuav = static_cast<Validator *>(&dev_data);

     // By supplying a "date"
     if (!gpuav->gpuav_settings.shader_instrumentation.buffer_device_address ||
         bda_ranges_snapshot_version_ == gpuav->buffer_device_address_ranges_version) {
         return true;
     }

     // Update buffer device address table
     // ---
     auto bda_table_ptr = (VkDeviceAddress *)bda_ranges_snapshot_.MapMemory(loc);

     // Buffer device address table layout
     // Ranges are sorted from low to high, and do not overlap
     // QWord 0 | Number of *ranges* (1 range occupies 2 QWords)
     // QWord 1 | Range 1 begin
     // QWord 2 | Range 1 end
     // QWord 3 | Range 2 begin
     // QWord 4 | Range 2 end
     // QWord 5 | ...

     const size_t max_recordable_ranges =
         static_cast<size_t>((GetBdaRangesBufferByteSize() - sizeof(uint64_t)) / (2 * sizeof(VkDeviceAddress)));
     auto bda_ranges = reinterpret_cast<ValidationStateTracker::BufferAddressRange *>(bda_table_ptr + 1);
     const auto [ranges_to_update_count, total_address_ranges_count] =
         gpuav->GetBufferAddressRanges(bda_ranges, max_recordable_ranges);
     bda_table_ptr[0] = ranges_to_update_count;

     if (total_address_ranges_count > size_t(gpuav->gpuav_settings.max_bda_in_use)) {
         std::ostringstream problem_string;
         problem_string << "Number of buffer device addresses ranges in use (" << total_address_ranges_count
                        << ") is greater than khronos_validation.gpuav_max_buffer_device_addresses ("
                        << gpuav->gpuav_settings.max_bda_in_use
                        << "). Truncating buffer device address table could result in invalid validation.";
         gpuav->InternalError(gpuav->device, loc, problem_string.str().c_str());
         return false;
     }

     // Post update cleanups
     // ---
     // Flush the BDA buffer before un-mapping so that the new state is visible to the GPU
     bda_ranges_snapshot_.FlushAllocation(loc);
     bda_ranges_snapshot_.UnmapMemory();
     bda_ranges_snapshot_version_ = gpuav->buffer_device_address_ranges_version;

     return true;
 }

 VkDeviceSize CommandBuffer::GetBdaRangesBufferByteSize() const {
     auto gpuav = static_cast<Validator *>(&dev_data);
     return (1                                           // 1 QWORD for the number of address ranges
             + 2 * gpuav->gpuav_settings.max_bda_in_use  // 2 QWORDS per address range
             ) *
            8;
 }

 CommandBuffer::~CommandBuffer() { Destroy(); }

 void CommandBuffer::Destroy() {
     {
         auto guard = WriteLock();
         ResetCBState();
     }
     vvl::CommandBuffer::Destroy();
 }

 void CommandBuffer::Reset(const Location &loc) {
     vvl::CommandBuffer::Reset(loc);
     ResetCBState();
     // TODO: Calling AllocateResources in Reset like so is a kind of a hack,
     // relying on CommandBuffer internal logic to work.
     // Tried to call it in ResetCBState, hang on command buffer mutex :/
     AllocateResources(loc);
 }

 void CommandBuffer::ResetCBState() {
     auto gpuav = static_cast<Validator *>(&dev_data);

     // Free the device memory and descriptor set(s) associated with a command buffer.
     for (DebugPrintfBufferInfo &printf_buffer_info : debug_printf_buffer_infos) {
         printf_buffer_info.output_mem_buffer.Destroy();
     }
     debug_printf_buffer_infos.clear();

     // Free the device memory and descriptor set(s) associated with a command buffer.
     gpu_resources_manager.DestroyResources();
     per_command_error_loggers.clear();

     for (DescriptorCommandBinding &descriptor_command_binding : descriptor_command_bindings) {
         descriptor_command_binding.descritpor_state_ssbo_buffer.Destroy();
         descriptor_command_binding.post_process_ssbo_buffer.Destroy();
     }
     descriptor_command_bindings.clear();
     action_command_snapshots.clear();
     descriptor_indexing_buffer = VK_NULL_HANDLE;
     post_process_buffer_lut = VK_NULL_HANDLE;

     error_output_buffer_.Destroy();
     cmd_errors_counts_buffer_.Destroy();
     bda_ranges_snapshot_.Destroy();
     bda_ranges_snapshot_version_ = 0;

     if (validation_cmd_desc_pool_ != VK_NULL_HANDLE && error_logging_desc_set_ != VK_NULL_HANDLE) {
         gpuav->desc_set_manager_->PutBackDescriptorSet(validation_cmd_desc_pool_, error_logging_desc_set_);
         validation_cmd_desc_pool_ = VK_NULL_HANDLE;
         error_logging_desc_set_ = VK_NULL_HANDLE;
     }

     if (instrumentation_desc_set_layout_ != VK_NULL_HANDLE) {
         DispatchDestroyDescriptorSetLayout(gpuav->device, instrumentation_desc_set_layout_, nullptr);
         instrumentation_desc_set_layout_ = VK_NULL_HANDLE;
     }

     if (error_logging_desc_set_layout_ != VK_NULL_HANDLE) {
         DispatchDestroyDescriptorSetLayout(gpuav->device, error_logging_desc_set_layout_, nullptr);
         error_logging_desc_set_layout_ = VK_NULL_HANDLE;
     }

     draw_index = 0;
     compute_index = 0;
     trace_rays_index = 0;
     action_command_count = 0;
 }

 void CommandBuffer::ClearCmdErrorsCountsBuffer(const Location &loc) const {
     auto cmd_errors_counts_buffer_ptr = (uint32_t *)cmd_errors_counts_buffer_.MapMemory(loc);
     std::memset(cmd_errors_counts_buffer_ptr, 0, static_cast<size_t>(GetCmdErrorsCountsBufferByteSize()));
     cmd_errors_counts_buffer_.UnmapMemory();
 }

 void CommandBuffer::IncrementCommandCount(VkPipelineBindPoint bind_point) {
     action_command_count++;
     if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
         draw_index++;
     } else if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) {
         compute_index++;
     } else if (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) {
         trace_rays_index++;
     }
 }

 std::string CommandBuffer::GetDebugLabelRegion(uint32_t label_command_i,
                                                const std::vector<std::string> &initial_label_stack) const {
     std::string debug_region_name;
     if (label_command_i != vvl::kU32Max) {
         debug_region_name = GetDebugRegionName(GetLabelCommands(), label_command_i, initial_label_stack);
     } else {
         // label_command_i == vvl::kU32Max => when the instrumented command was recorded,
         // no debug label region was yet opened in the corresponding command buffer,
         // but still a region might have been started in another previously submitted
         // command buffer. So just compute region name from initial_label_stack.
         for (const std::string &label_name : initial_label_stack) {
             if (!debug_region_name.empty()) {
                 debug_region_name += "::";
             }
             debug_region_name += label_name;
         }
     }
     return debug_region_name;
 }

 bool CommandBuffer::PreProcess(const Location &loc) {
     auto gpuav = static_cast<Validator *>(&dev_data);

     bool succeeded = descriptor::UpdateDescriptorStateSSBO(*gpuav, *this, loc);
     if (!succeeded) {
         return false;
     }

     succeeded = UpdateBdaRangesBuffer(loc);
     if (!succeeded) {
         return false;
     }

     return !per_command_error_loggers.empty() || has_build_as_cmd;
 }

 bool CommandBuffer::NeedsPostProcess() { return !error_output_buffer_.IsDestroyed(); }

 // For the given command buffer, map its debug data buffers and read their contents for analysis.
 void CommandBuffer::PostProcess(VkQueue queue, const std::vector<std::string> &initial_label_stack, const Location &loc) {
     auto gpuav = static_cast<Validator *>(&dev_data);

     // For the given command buffer, map its debug data buffers and read their contents for analysis.
     for (DebugPrintfBufferInfo &printf_buffer_info : debug_printf_buffer_infos) {
         auto printf_output_ptr = (char *)printf_buffer_info.output_mem_buffer.MapMemory(loc);
         debug_printf::AnalyzeAndGenerateMessage(*gpuav, VkHandle(), queue, printf_buffer_info, (uint32_t *)printf_output_ptr, loc);
         printf_buffer_info.output_mem_buffer.UnmapMemory();
     }

     // CommandBuffer::Destroy can happen on an other thread,
     // so when getting here after acquiring command buffer's lock,
     // make sure there are still things to process
     if (!NeedsPostProcess()) {
         return;
     }

     bool skip = false;
     {
         auto error_output_buffer_ptr = (uint32_t *)error_output_buffer_.MapMemory(loc);

         // The second word in the debug output buffer is the number of words that would have
         // been written by the shader instrumentation, if there was enough room in the buffer we provided.
         // The number of words actually written by the shaders is determined by the size of the buffer
         // we provide via the descriptor. So, we process only the number of words that can fit in the
         // buffer.
         const uint32_t total_words = error_output_buffer_ptr[cst::stream_output_size_offset];

         // A zero here means that the shader instrumentation didn't write anything.
         if (total_words != 0) {
             uint32_t *const error_records_start = &error_output_buffer_ptr[cst::stream_output_data_offset];
             assert(glsl::kErrorBufferByteSize > cst::stream_output_data_offset);
             uint32_t *const error_records_end =
                 error_output_buffer_ptr + (glsl::kErrorBufferByteSize - cst::stream_output_data_offset);

             uint32_t *error_record_ptr = error_records_start;
             uint32_t record_size = error_record_ptr[glsl::kHeaderErrorRecordSizeOffset];
             assert(record_size == glsl::kErrorRecordSize);

             while (record_size > 0 && (error_record_ptr + record_size) <= error_records_end) {
                 const uint32_t error_logger_i = error_record_ptr[glsl::kHeaderCommandResourceIdOffset];
                 assert(error_logger_i < per_command_error_loggers.size());
                 auto &error_logger = per_command_error_loggers[error_logger_i];
                 const LogObjectList objlist(queue, VkHandle());
                 skip |= error_logger(*gpuav, *this, error_record_ptr, objlist, initial_label_stack);

                 // Next record
                 error_record_ptr += record_size;
                 record_size = error_record_ptr[glsl::kHeaderErrorRecordSizeOffset];
             }

             // Clear the written size and any error messages. Note that this preserves the first word, which contains flags.
             assert(glsl::kErrorBufferByteSize > cst::stream_output_data_offset);
             memset(&error_output_buffer_ptr[cst::stream_output_data_offset], 0,
                    glsl::kErrorBufferByteSize - cst::stream_output_data_offset * sizeof(uint32_t));
         }
         error_output_buffer_ptr[cst::stream_output_size_offset] = 0;
         error_output_buffer_.UnmapMemory();
     }

     ClearCmdErrorsCountsBuffer(loc);
     if (gpuav->aborted_) return;

     // If instrumentation found an error, skip post processing. Errors detected by instrumentation are usually
     // very serious, such as a prematurely destroyed resource and the state needed below is likely invalid.
     bool gpuav_success = false;
     if (!skip && gpuav->gpuav_settings.shader_instrumentation.post_process_descriptor_index) {
         gpuav_success = ValidateBindlessDescriptorSets(loc);
     }

     if (gpuav_success) {
         UpdateCmdBufImageLayouts(state_, *this);
     }
 }

 Queue::Queue(Validator &gpuav, VkQueue q, uint32_t family_index, uint32_t queue_index, VkDeviceQueueCreateFlags flags,
              const VkQueueFamilyProperties &queueFamilyProperties, bool timeline_khr)
     : vvl::Queue(gpuav, q, family_index, queue_index, flags, queueFamilyProperties), state_(gpuav), timeline_khr_(timeline_khr) {}

 Queue::~Queue() {
     if (barrier_command_buffer_) {
         DispatchFreeCommandBuffers(state_.device, barrier_command_pool_, 1, &barrier_command_buffer_);
         barrier_command_buffer_ = VK_NULL_HANDLE;
     }
     if (barrier_command_pool_) {
         DispatchDestroyCommandPool(state_.device, barrier_command_pool_, nullptr);
         barrier_command_pool_ = VK_NULL_HANDLE;
     }
     if (barrier_sem_) {
         DispatchDestroySemaphore(state_.device, barrier_sem_, nullptr);
         barrier_sem_ = VK_NULL_HANDLE;
     }
 }

 // Submit a memory barrier on graphics queues.
 // Lazy-create and record the needed command buffer.
 void Queue::SubmitBarrier(const Location &loc, uint64_t seq) {
     if (barrier_command_pool_ == VK_NULL_HANDLE) {
         VkResult result = VK_SUCCESS;

         VkCommandPoolCreateInfo pool_create_info = vku::InitStructHelper();
         pool_create_info.queueFamilyIndex = queue_family_index;
         result = DispatchCreateCommandPool(state_.device, &pool_create_info, nullptr, &barrier_command_pool_);
         if (result != VK_SUCCESS) {
             state_.InternalError(vvl::Queue::VkHandle(), loc, "Unable to create command pool for barrier CB.");
             barrier_command_pool_ = VK_NULL_HANDLE;
             return;
         }

         VkCommandBufferAllocateInfo buffer_alloc_info = vku::InitStructHelper();
         buffer_alloc_info.commandPool = barrier_command_pool_;
         buffer_alloc_info.commandBufferCount = 1;
         buffer_alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
         result = DispatchAllocateCommandBuffers(state_.device, &buffer_alloc_info, &barrier_command_buffer_);
         if (result != VK_SUCCESS) {
             state_.InternalError(vvl::Queue::VkHandle(), loc, "Unable to create barrier command buffer.");
             DispatchDestroyCommandPool(state_.device, barrier_command_pool_, nullptr);
             barrier_command_pool_ = VK_NULL_HANDLE;
             barrier_command_buffer_ = VK_NULL_HANDLE;
             return;
         }

         VkSemaphoreTypeCreateInfo semaphore_type_create_info = vku::InitStructHelper();
         semaphore_type_create_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;
         semaphore_type_create_info.initialValue = 0;

         VkSemaphoreCreateInfo semaphore_create_info = vku::InitStructHelper(&semaphore_type_create_info);

         result = DispatchCreateSemaphore(state_.device, &semaphore_create_info, nullptr, &barrier_sem_);
         if (result != VK_SUCCESS) {
             state_.InternalError(state_.device, loc, "Unable to create barrier semaphore.");
             DispatchDestroyCommandPool(state_.device, barrier_command_pool_, nullptr);
             barrier_command_pool_ = VK_NULL_HANDLE;
             barrier_command_buffer_ = VK_NULL_HANDLE;
             return;
         }

         // Hook up command buffer dispatch
         state_.vk_set_device_loader_data_(state_.device, barrier_command_buffer_);

         // Record a global memory barrier to force availability of device memory operations to the host domain.
         VkCommandBufferBeginInfo barrier_cmd_buffer_begin_info = vku::InitStructHelper();
         barrier_cmd_buffer_begin_info.flags |= VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
         result = DispatchBeginCommandBuffer(barrier_command_buffer_, &barrier_cmd_buffer_begin_info);
         if (result == VK_SUCCESS) {
             VkMemoryBarrier memory_barrier = vku::InitStructHelper();
             memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
             memory_barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
             DispatchCmdPipelineBarrier(barrier_command_buffer_, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0,
                                        1, &memory_barrier, 0, nullptr, 0, nullptr);
             DispatchEndCommandBuffer(barrier_command_buffer_);
         }
     }

     if (barrier_command_buffer_ != VK_NULL_HANDLE) {
         VkTimelineSemaphoreSubmitInfo timeline_semaphore_submit_info = vku::InitStructHelper();
         timeline_semaphore_submit_info.signalSemaphoreValueCount = 1;
         timeline_semaphore_submit_info.pSignalSemaphoreValues = &seq;

         VkSubmitInfo submit_info = vku::InitStructHelper(&timeline_semaphore_submit_info);

         submit_info.commandBufferCount = 1;
         submit_info.pCommandBuffers = &barrier_command_buffer_;
         submit_info.signalSemaphoreCount = 1;
         submit_info.pSignalSemaphores = &barrier_sem_;

         DispatchQueueSubmit(vvl::Queue::VkHandle(), 1, &submit_info, VK_NULL_HANDLE);
     }
 }

 vvl::PreSubmitResult Queue::PreSubmit(std::vector<vvl::QueueSubmission> &&submissions) {
     for (const auto &submission : submissions) {
         auto loc = submission.loc.Get();
         for (auto &cb_submission : submission.cb_submissions) {
             auto gpu_cb = std::static_pointer_cast<CommandBuffer>(cb_submission.cb);
             auto guard = gpu_cb->ReadLock();
             gpu_cb->PreProcess(loc);
             for (auto *secondary_cb : gpu_cb->linkedCommandBuffers) {
                 auto secondary_guard = secondary_cb->ReadLock();
                 auto *secondary_gpu_cb = static_cast<CommandBuffer *>(secondary_cb);
                 secondary_gpu_cb->PreProcess(loc);
             }
         }
     }
     return vvl::Queue::PreSubmit(std::move(submissions));
 }

 void Queue::PostSubmit(vvl::QueueSubmission &submission) {
     vvl::Queue::PostSubmit(submission);
     if (submission.end_batch) {
         auto loc = submission.loc.Get();
         SubmitBarrier(loc, submission.seq);
     }
 }

 void Queue::Retire(vvl::QueueSubmission &submission) {
     vvl::Queue::Retire(submission);
     if (submission.loc.Get().function == vvl::Func::vkQueuePresentKHR) {
         // Present batch does not have any GPU-AV work to post process, skip it.
         // This is also needed for correctness. QueuePresent does not have a PostSubmit call
         // that signals barrier_sem_. The following timeline wait must not be called.
         return;
     }
     retiring_.emplace_back(submission.cb_submissions);
     if (submission.end_batch) {
         VkSemaphoreWaitInfo wait_info = vku::InitStructHelper();
         wait_info.semaphoreCount = 1;
         wait_info.pSemaphores = &barrier_sem_;
         wait_info.pValues = &submission.seq;

         if (timeline_khr_) {
             DispatchWaitSemaphoresKHR(state_.device, &wait_info, 1'000'000'000);
         } else {
             DispatchWaitSemaphores(state_.device, &wait_info, 1'000'000'000);
         }

         for (std::vector<vvl::CommandBufferSubmission> &cb_submissions : retiring_) {
             for (vvl::CommandBufferSubmission &cb_submission : cb_submissions) {
                 auto gpu_cb = std::static_pointer_cast<CommandBuffer>(cb_submission.cb);
                 auto guard = gpu_cb->WriteLock();
                 auto loc = submission.loc.Get();
                 gpu_cb->PostProcess(VkHandle(), cb_submission.initial_label_stack, loc);
                 for (vvl::CommandBuffer *secondary_cb : gpu_cb->linkedCommandBuffers) {
                     auto secondary_gpu_cb = static_cast<CommandBuffer *>(secondary_cb);
                     auto secondary_guard = secondary_gpu_cb->WriteLock();
                     secondary_gpu_cb->PostProcess(VkHandle(), cb_submission.initial_label_stack, loc);
                 }
             }
         }
         retiring_.clear();
     }
 }

 }  // namespace gpuav