blob: 706d3fb780ac938aac892c42dc120c44b82598c7 [file] [log] [blame]
/* Copyright (c) 2018-2020 The Khronos Group Inc.
* Copyright (c) 2018-2020 Valve Corporation
* Copyright (c) 2018-2020 LunarG, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Author: Karl Schultz <karl@lunarg.com>
* Author: Tony Barbour <tony@lunarg.com>
*/
#pragma once
#include "chassis.h"
#include "state_tracker.h"
#include "vk_mem_alloc.h"
#include "gpu_utils.h"
class GpuAssisted;
struct GpuAssistedDeviceMemoryBlock {
VkBuffer buffer;
VmaAllocation allocation;
std::unordered_map<uint32_t, const cvdescriptorset::Descriptor*> update_at_submit;
};
struct GpuAssistedBufferInfo {
GpuAssistedDeviceMemoryBlock output_mem_block;
GpuAssistedDeviceMemoryBlock di_input_mem_block; // Descriptor Indexing input
GpuAssistedDeviceMemoryBlock bda_input_mem_block; // Buffer Device Address input
VkDescriptorSet desc_set;
VkDescriptorPool desc_pool;
VkPipelineBindPoint pipeline_bind_point;
GpuAssistedBufferInfo(GpuAssistedDeviceMemoryBlock output_mem_block, GpuAssistedDeviceMemoryBlock di_input_mem_block,
GpuAssistedDeviceMemoryBlock bda_input_mem_block, VkDescriptorSet desc_set, VkDescriptorPool desc_pool,
VkPipelineBindPoint pipeline_bind_point)
: output_mem_block(output_mem_block),
di_input_mem_block(di_input_mem_block),
bda_input_mem_block(bda_input_mem_block),
desc_set(desc_set),
desc_pool(desc_pool),
pipeline_bind_point(pipeline_bind_point){};
};
struct GpuAssistedShaderTracker {
VkPipeline pipeline;
VkShaderModule shader_module;
std::vector<unsigned int> pgm;
};
struct GpuAssistedAccelerationStructureBuildValidationBufferInfo {
// The acceleration structure that is being built.
VkAccelerationStructureNV acceleration_structure = VK_NULL_HANDLE;
// The descriptor pool and descriptor set being used to validate a given build.
VkDescriptorPool descriptor_pool = VK_NULL_HANDLE;
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
// The storage buffer used by the validating compute shader whichcontains info about
// the valid handles and which is written to communicate found invalid handles.
VkBuffer validation_buffer = VK_NULL_HANDLE;
VmaAllocation validation_buffer_allocation = VK_NULL_HANDLE;
};
struct GpuAssistedAccelerationStructureBuildValidationState {
bool initialized = false;
VkPipeline pipeline = VK_NULL_HANDLE;
VkPipelineLayout pipeline_layout = VK_NULL_HANDLE;
VkAccelerationStructureNV replacement_as = VK_NULL_HANDLE;
VmaAllocation replacement_as_allocation = VK_NULL_HANDLE;
uint64_t replacement_as_handle = 0;
std::unordered_map<VkCommandBuffer, std::vector<GpuAssistedAccelerationStructureBuildValidationBufferInfo>> validation_buffers;
};
class GpuAssisted : public ValidationStateTracker {
VkPhysicalDeviceFeatures supported_features;
VkBool32 shaderInt64;
uint32_t unique_shader_module_id = 0;
std::unordered_map<VkCommandBuffer, std::vector<GpuAssistedBufferInfo>> command_buffer_map; // gpu_buffer_list;
uint32_t output_buffer_size;
bool buffer_oob_enabled;
std::map<VkDeviceAddress, VkDeviceSize> buffer_map;
GpuAssistedAccelerationStructureBuildValidationState acceleration_structure_validation_state;
public:
GpuAssisted() { container_type = LayerObjectTypeGpuAssisted; }
bool aborted = false;
bool descriptor_indexing = false;
VkDevice device;
VkPhysicalDevice physicalDevice;
uint32_t adjusted_max_desc_sets;
uint32_t desc_set_bind_index;
VkDescriptorSetLayout debug_desc_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout dummy_desc_layout = VK_NULL_HANDLE;
std::unique_ptr<UtilDescriptorSetManager> desc_set_manager;
std::unordered_map<uint32_t, GpuAssistedShaderTracker> shader_map;
PFN_vkSetDeviceLoaderData vkSetDeviceLoaderData;
VmaAllocator vmaAllocator = {};
std::map<VkQueue, UtilQueueBarrierCommandInfo> queue_barrier_command_infos;
std::vector<GpuAssistedBufferInfo>& GetBufferInfo(const VkCommandBuffer command_buffer) {
auto buffer_list = command_buffer_map.find(command_buffer);
if (buffer_list == command_buffer_map.end()) {
std::vector<GpuAssistedBufferInfo> new_list{};
command_buffer_map[command_buffer] = new_list;
return command_buffer_map[command_buffer];
}
return buffer_list->second;
}
public:
template <typename T>
void ReportSetupProblem(T object, const char* const specific_message) const;
bool CheckForDescriptorIndexing(DeviceFeatures enabled_features) const;
void PreCallRecordCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkDevice* pDevice, void* modified_create_info) override;
void PostCallRecordCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkDevice* pDevice, VkResult result) override;
void PostCallRecordGetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo* pInfo,
VkDeviceAddress address) override;
void PostCallRecordGetBufferDeviceAddressKHR(VkDevice device, const VkBufferDeviceAddressInfo* pInfo,
VkDeviceAddress address) override;
void PostCallRecordGetBufferDeviceAddressEXT(VkDevice device, const VkBufferDeviceAddressInfo* pInfo,
VkDeviceAddress address) override;
void PreCallRecordDestroyBuffer(VkDevice device, VkBuffer buffer, const VkAllocationCallbacks* pAllocator) override;
void PreCallRecordDestroyDevice(VkDevice device, const VkAllocationCallbacks* pAllocator) override;
void PostCallRecordBindAccelerationStructureMemoryNV(VkDevice device, uint32_t bindInfoCount,
const VkBindAccelerationStructureMemoryInfoNV* pBindInfos,
VkResult result) override;
void PreCallRecordCreatePipelineLayout(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkPipelineLayout* pPipelineLayout,
void* cpl_state_data) override;
void PostCallRecordCreatePipelineLayout(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkPipelineLayout* pPipelineLayout,
VkResult result) override;
void ResetCommandBuffer(VkCommandBuffer commandBuffer);
bool PreCallValidateCmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,
uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier* pImageMemoryBarriers) const override;
void PreCallRecordCreateBuffer(VkDevice device, const VkBufferCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator,
VkBuffer* pBuffer, void* cb_state_data) override;
void CreateAccelerationStructureBuildValidationState(GpuAssisted* device_GpuAssisted);
void DestroyAccelerationStructureBuildValidationState();
void PreCallRecordCmdBuildAccelerationStructureNV(VkCommandBuffer commandBuffer, const VkAccelerationStructureInfoNV* pInfo,
VkBuffer instanceData, VkDeviceSize instanceOffset, VkBool32 update,
VkAccelerationStructureNV dst, VkAccelerationStructureNV src,
VkBuffer scratch, VkDeviceSize scratchOffset) override;
void ProcessAccelerationStructureBuildValidationBuffer(VkQueue queue, CMD_BUFFER_STATE* cb_node);
void PreCallRecordCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
const VkGraphicsPipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
void* cgpl_state_data) override;
void PreCallRecordCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
const VkComputePipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
void* ccpl_state_data) override;
void PreCallRecordCreateRayTracingPipelinesNV(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
const VkRayTracingPipelineCreateInfoNV* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
void* crtpl_state_data) override;
void PreCallRecordCreateRayTracingPipelinesKHR(VkDevice device, VkDeferredOperationKHR deferredOperation,
VkPipelineCache pipelineCache, uint32_t count,
const VkRayTracingPipelineCreateInfoKHR* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
void* crtpl_state_data) override;
void PostCallRecordCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
const VkGraphicsPipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines, VkResult result,
void* cgpl_state_data) override;
void PostCallRecordCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
const VkComputePipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines, VkResult result,
void* ccpl_state_data) override;
void PostCallRecordCreateRayTracingPipelinesNV(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
const VkRayTracingPipelineCreateInfoNV* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines, VkResult result,
void* crtpl_state_data) override;
void PostCallRecordCreateRayTracingPipelinesKHR(VkDevice device, VkDeferredOperationKHR deferredOperation,
VkPipelineCache pipelineCache, uint32_t count,
const VkRayTracingPipelineCreateInfoKHR* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
VkResult result, void* crtpl_state_data) override;
void PreCallRecordDestroyPipeline(VkDevice device, VkPipeline pipeline, const VkAllocationCallbacks* pAllocator) override;
bool InstrumentShader(const VkShaderModuleCreateInfo* pCreateInfo, std::vector<unsigned int>& new_pgm,
uint32_t* unique_shader_id);
void PreCallRecordCreateShaderModule(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule,
void* csm_state_data) override;
void AnalyzeAndGenerateMessages(VkCommandBuffer command_buffer, VkQueue queue, VkPipelineBindPoint pipeline_bind_point,
uint32_t operation_index, uint32_t* const debug_output_buffer);
void SetDescriptorInitialized(uint32_t* pData, uint32_t index, const cvdescriptorset::Descriptor* descriptor);
void UpdateInstrumentationBuffer(CMD_BUFFER_STATE* cb_node);
void PreCallRecordQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) override;
void PostCallRecordQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence,
VkResult result) override;
void PreCallRecordCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex,
uint32_t firstInstance) override;
void PreCallRecordCmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) override;
void PreCallRecordCmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count,
uint32_t stride) override;
void PreCallRecordCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count,
uint32_t stride) override;
void PreCallRecordCmdDrawIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride) override;
void PreCallRecordCmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride) override;
void PreCallRecordCmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount, uint32_t firstInstance,
VkBuffer counterBuffer, VkDeviceSize counterBufferOffset, uint32_t counterOffset,
uint32_t vertexStride) override;
void PreCallRecordCmdDrawIndexedIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride) override;
void PreCallRecordCmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride) override;
void PreCallRecordCmdDrawMeshTasksNV(VkCommandBuffer commandBuffer, uint32_t taskCount, uint32_t firstTask) override;
void PreCallRecordCmdDrawMeshTasksIndirectNV(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
uint32_t drawCount, uint32_t stride) override;
void PreCallRecordCmdDrawMeshTasksIndirectCountNV(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride) override;
void PreCallRecordCmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) override;
void PreCallRecordCmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) override;
void PreCallRecordCmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override;
void PreCallRecordCmdDispatchBaseKHR(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY,
uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY,
uint32_t groupCountZ) override;
void PreCallRecordCmdTraceRaysNV(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer,
VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer,
VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride,
VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset,
VkDeviceSize hitShaderBindingStride, VkBuffer callableShaderBindingTableBuffer,
VkDeviceSize callableShaderBindingOffset, VkDeviceSize callableShaderBindingStride,
uint32_t width, uint32_t height, uint32_t depth) override;
void PostCallRecordCmdTraceRaysNV(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer,
VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer,
VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride,
VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset,
VkDeviceSize hitShaderBindingStride, VkBuffer callableShaderBindingTableBuffer,
VkDeviceSize callableShaderBindingOffset, VkDeviceSize callableShaderBindingStride,
uint32_t width, uint32_t height, uint32_t depth) override;
void PreCallRecordCmdTraceRaysKHR(VkCommandBuffer commandBuffer,
const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, uint32_t width,
uint32_t height, uint32_t depth) override;
void PostCallRecordCmdTraceRaysKHR(VkCommandBuffer commandBuffer,
const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, uint32_t width,
uint32_t height, uint32_t depth) override;
void PreCallRecordCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
VkDeviceAddress indirectDeviceAddress) override;
void PostCallRecordCmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable,
const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable,
VkDeviceAddress indirectDeviceAddress) override;
void AllocateValidationResources(const VkCommandBuffer cmd_buffer, const VkPipelineBindPoint bind_point);
void PostCallRecordGetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties* pPhysicalDeviceProperties) override;
void PostCallRecordGetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties2* pPhysicalDeviceProperties2) override;
};