| /* Copyright (c) 2015-2023 The Khronos Group Inc. |
| * Copyright (c) 2015-2023 Valve Corporation |
| * Copyright (c) 2015-2023 LunarG, Inc. |
| * Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. |
| * Modifications Copyright (C) 2022 RasterGrid Kft. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "best_practices/best_practices_validation.h" |
| #include "best_practices/best_practices_error_enums.h" |
| |
| struct VendorSpecificInfo { |
| EnableFlags vendor_id; |
| std::string name; |
| }; |
| |
| const std::map<BPVendorFlagBits, VendorSpecificInfo> kVendorInfo = {{kBPVendorArm, {vendor_specific_arm, "Arm"}}, |
| {kBPVendorAMD, {vendor_specific_amd, "AMD"}}, |
| {kBPVendorIMG, {vendor_specific_img, "IMG"}}, |
| {kBPVendorNVIDIA, {vendor_specific_nvidia, "NVIDIA"}}}; |
| |
| static constexpr std::array<VkFormat, 12> kCustomClearColorCompressedFormatsNVIDIA = { |
| VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_A8B8G8R8_UNORM_PACK32, |
| VK_FORMAT_A2R10G10B10_UNORM_PACK32, VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_FORMAT_R16G16B16A16_UNORM, |
| VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UINT, VK_FORMAT_R16G16B16A16_SINT, |
| VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_B10G11R11_UFLOAT_PACK32, |
| }; |
| |
| ReadLockGuard BestPractices::ReadLock() const { |
| if (fine_grained_locking) { |
| return ReadLockGuard(validation_object_mutex, std::defer_lock); |
| } else { |
| return ReadLockGuard(validation_object_mutex); |
| } |
| } |
| |
| WriteLockGuard BestPractices::WriteLock() { |
| if (fine_grained_locking) { |
| return WriteLockGuard(validation_object_mutex, std::defer_lock); |
| } else { |
| return WriteLockGuard(validation_object_mutex); |
| } |
| } |
| |
| std::shared_ptr<CMD_BUFFER_STATE> BestPractices::CreateCmdBufferState(VkCommandBuffer cb, |
| const VkCommandBufferAllocateInfo* pCreateInfo, |
| const COMMAND_POOL_STATE* pool) { |
| return std::static_pointer_cast<CMD_BUFFER_STATE>(std::make_shared<bp_state::CommandBuffer>(this, cb, pCreateInfo, pool)); |
| } |
| |
| bp_state::CommandBuffer::CommandBuffer(BestPractices* bp, VkCommandBuffer cb, const VkCommandBufferAllocateInfo* pCreateInfo, |
| const COMMAND_POOL_STATE* pool) |
| : CMD_BUFFER_STATE(bp, cb, pCreateInfo, pool) {} |
| |
| bool BestPractices::VendorCheckEnabled(BPVendorFlags vendors) const { |
| for (const auto& vendor : kVendorInfo) { |
| if (vendors & vendor.first && enabled[vendor.second.vendor_id]) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| const char* BestPractices::VendorSpecificTag(BPVendorFlags vendors) const { |
| // Cache built vendor tags in a map |
| static vvl::unordered_map<BPVendorFlags, std::string> tag_map; |
| |
| auto res = tag_map.find(vendors); |
| if (res == tag_map.end()) { |
| // Build the vendor tag string |
| std::stringstream vendor_tag; |
| |
| vendor_tag << "["; |
| bool first_vendor = true; |
| for (const auto& vendor : kVendorInfo) { |
| if (vendors & vendor.first) { |
| if (!first_vendor) { |
| vendor_tag << ", "; |
| } |
| vendor_tag << vendor.second.name; |
| first_vendor = false; |
| } |
| } |
| vendor_tag << "]"; |
| |
| tag_map[vendors] = vendor_tag.str(); |
| res = tag_map.find(vendors); |
| } |
| |
| return res->second.c_str(); |
| } |
| |
| // Despite the return code being successful this can be a useful utility for some developers in niche debugging situation. |
| void BestPractices::LogPositiveSuccessCode(const RecordObject& record_obj) const { |
| assert(record_obj.result > VK_SUCCESS); |
| |
| LogVerbose(kVUID_BestPractices_Verbose_Success_Logging, instance, record_obj.location, "Returned %s.", |
| string_VkResult(record_obj.result)); |
| } |
| |
| void BestPractices::LogErrorCode(const RecordObject& record_obj) const { |
| assert(record_obj.result < VK_SUCCESS); // Anything less than VK_SUCCESS is an error. |
| |
| // Despite being error codes log these results as informational. |
| // That is because they are returned frequently during window resizing. |
| // They are expected to occur during the normal application lifecycle. |
| constexpr std::array common_failure_codes = {VK_ERROR_OUT_OF_DATE_KHR, VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT}; |
| const auto result_string = string_VkResult(record_obj.result); |
| |
| if (IsValueIn(record_obj.result, common_failure_codes)) { |
| LogInfo(kVUID_BestPractices_Failure_Result, instance, record_obj.location, "Returned error %s.", result_string); |
| } else { |
| LogWarning(kVUID_BestPractices_Error_Result, instance, record_obj.location, "Returned error %s.", result_string); |
| } |
| } |
| |
| void BestPractices::RecordSetDepthTestState(bp_state::CommandBuffer& cmd_state, VkCompareOp new_depth_compare_op, |
| bool new_depth_test_enable) { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| if (cmd_state.nv.depth_compare_op != new_depth_compare_op) { |
| switch (new_depth_compare_op) { |
| case VK_COMPARE_OP_LESS: |
| case VK_COMPARE_OP_LESS_OR_EQUAL: |
| cmd_state.nv.zcull_direction = bp_state::CommandBufferStateNV::ZcullDirection::Less; |
| break; |
| case VK_COMPARE_OP_GREATER: |
| case VK_COMPARE_OP_GREATER_OR_EQUAL: |
| cmd_state.nv.zcull_direction = bp_state::CommandBufferStateNV::ZcullDirection::Greater; |
| break; |
| default: |
| // The other ops carry over the previous state. |
| break; |
| } |
| } |
| cmd_state.nv.depth_compare_op = new_depth_compare_op; |
| cmd_state.nv.depth_test_enable = new_depth_test_enable; |
| } |
| |
| void BestPractices::RecordBindZcullScope(bp_state::CommandBuffer& cmd_state, VkImage depth_attachment, |
| const VkImageSubresourceRange& subresource_range) { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| if (depth_attachment == VK_NULL_HANDLE) { |
| cmd_state.nv.zcull_scope = {}; |
| return; |
| } |
| |
| assert((subresource_range.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0U); |
| |
| auto image_state = Get<IMAGE_STATE>(depth_attachment); |
| assert(image_state); |
| |
| const uint32_t mip_levels = image_state->createInfo.mipLevels; |
| const uint32_t array_layers = image_state->createInfo.arrayLayers; |
| |
| auto& tree = cmd_state.nv.zcull_per_image[depth_attachment]; |
| if (tree.states.empty()) { |
| tree.mip_levels = mip_levels; |
| tree.array_layers = array_layers; |
| tree.states.resize(array_layers * mip_levels); |
| } |
| |
| cmd_state.nv.zcull_scope.image = depth_attachment; |
| cmd_state.nv.zcull_scope.range = subresource_range; |
| cmd_state.nv.zcull_scope.tree = &tree; |
| } |
| |
| void BestPractices::RecordUnbindZcullScope(bp_state::CommandBuffer& cmd_state) { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| RecordBindZcullScope(cmd_state, VK_NULL_HANDLE, VkImageSubresourceRange{}); |
| } |
| |
| void BestPractices::RecordResetScopeZcullDirection(bp_state::CommandBuffer& cmd_state) { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| auto& scope = cmd_state.nv.zcull_scope; |
| RecordResetZcullDirection(cmd_state, scope.image, scope.range); |
| } |
| |
| template <typename Func> |
| static void ForEachSubresource(const IMAGE_STATE& image, const VkImageSubresourceRange& range, Func&& func) { |
| const uint32_t layerCount = |
| (range.layerCount == VK_REMAINING_ARRAY_LAYERS) ? (image.full_range.layerCount - range.baseArrayLayer) : range.layerCount; |
| const uint32_t levelCount = |
| (range.levelCount == VK_REMAINING_MIP_LEVELS) ? (image.full_range.levelCount - range.baseMipLevel) : range.levelCount; |
| |
| for (uint32_t i = 0; i < layerCount; ++i) { |
| const uint32_t layer = range.baseArrayLayer + i; |
| for (uint32_t j = 0; j < levelCount; ++j) { |
| const uint32_t level = range.baseMipLevel + j; |
| func(layer, level); |
| } |
| } |
| } |
| |
| void BestPractices::RecordResetZcullDirection(bp_state::CommandBuffer& cmd_state, VkImage depth_image, |
| const VkImageSubresourceRange& subresource_range) { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| RecordSetZcullDirection(cmd_state, depth_image, subresource_range, bp_state::CommandBufferStateNV::ZcullDirection::Unknown); |
| |
| const auto image_it = cmd_state.nv.zcull_per_image.find(depth_image); |
| if (image_it == cmd_state.nv.zcull_per_image.end()) { |
| return; |
| } |
| auto& tree = image_it->second; |
| |
| auto image = Get<IMAGE_STATE>(depth_image); |
| if (!image) return; |
| |
| ForEachSubresource(*image, subresource_range, [&tree](uint32_t layer, uint32_t level) { |
| auto& subresource = tree.GetState(layer, level); |
| subresource.num_less_draws = 0; |
| subresource.num_greater_draws = 0; |
| }); |
| } |
| |
| void BestPractices::RecordSetScopeZcullDirection(bp_state::CommandBuffer& cmd_state, |
| bp_state::CommandBufferStateNV::ZcullDirection mode) { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| auto& scope = cmd_state.nv.zcull_scope; |
| RecordSetZcullDirection(cmd_state, scope.image, scope.range, mode); |
| } |
| |
| void BestPractices::RecordSetZcullDirection(bp_state::CommandBuffer& cmd_state, VkImage depth_image, |
| const VkImageSubresourceRange& subresource_range, |
| bp_state::CommandBufferStateNV::ZcullDirection mode) { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| const auto image_it = cmd_state.nv.zcull_per_image.find(depth_image); |
| if (image_it == cmd_state.nv.zcull_per_image.end()) { |
| return; |
| } |
| auto& tree = image_it->second; |
| |
| auto image = Get<IMAGE_STATE>(depth_image); |
| if (!image) return; |
| |
| ForEachSubresource(*image, subresource_range, [&tree, &cmd_state](uint32_t layer, uint32_t level) { |
| tree.GetState(layer, level).direction = cmd_state.nv.zcull_direction; |
| }); |
| } |
| |
| void BestPractices::RecordZcullDraw(bp_state::CommandBuffer& cmd_state) { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| // Add one draw to each subresource depending on the current Z-cull direction |
| auto& scope = cmd_state.nv.zcull_scope; |
| |
| auto image = Get<IMAGE_STATE>(scope.image); |
| if (!image) return; |
| |
| ForEachSubresource(*image, scope.range, [&scope](uint32_t layer, uint32_t level) { |
| auto& subresource = scope.tree->GetState(layer, level); |
| |
| switch (subresource.direction) { |
| case bp_state::CommandBufferStateNV::ZcullDirection::Unknown: |
| // Unreachable |
| assert(0); |
| break; |
| case bp_state::CommandBufferStateNV::ZcullDirection::Less: |
| ++subresource.num_less_draws; |
| break; |
| case bp_state::CommandBufferStateNV::ZcullDirection::Greater: |
| ++subresource.num_greater_draws; |
| break; |
| } |
| }); |
| } |
| |
| bool BestPractices::ValidateZcullScope(const bp_state::CommandBuffer& cmd_state, const Location& loc) const { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| bool skip = false; |
| |
| if (cmd_state.nv.depth_test_enable) { |
| auto& scope = cmd_state.nv.zcull_scope; |
| skip |= ValidateZcull(cmd_state, scope.image, scope.range, loc); |
| } |
| |
| return skip; |
| } |
| |
| bool BestPractices::ValidateZcull(const bp_state::CommandBuffer& cmd_state, VkImage image, |
| const VkImageSubresourceRange& subresource_range, const Location& loc) const { |
| bool skip = false; |
| |
| const char* good_mode = nullptr; |
| const char* bad_mode = nullptr; |
| bool is_balanced = false; |
| |
| const auto image_it = cmd_state.nv.zcull_per_image.find(image); |
| if (image_it == cmd_state.nv.zcull_per_image.end()) { |
| return skip; |
| } |
| const auto& tree = image_it->second; |
| |
| auto image_state = Get<IMAGE_STATE>(image); |
| if (!image_state) { |
| return skip; |
| } |
| |
| ForEachSubresource(*image_state, subresource_range, [&](uint32_t layer, uint32_t level) { |
| if (is_balanced) { |
| return; |
| } |
| const auto& resource = tree.GetState(layer, level); |
| const uint64_t num_draws = resource.num_less_draws + resource.num_greater_draws; |
| |
| if (num_draws == 0) { |
| return; |
| } |
| const uint64_t less_ratio = (resource.num_less_draws * 100) / num_draws; |
| const uint64_t greater_ratio = (resource.num_greater_draws * 100) / num_draws; |
| |
| if ((less_ratio > kZcullDirectionBalanceRatioNVIDIA) && (greater_ratio > kZcullDirectionBalanceRatioNVIDIA)) { |
| is_balanced = true; |
| |
| if (greater_ratio > less_ratio) { |
| good_mode = "GREATER"; |
| bad_mode = "LESS"; |
| } else { |
| good_mode = "LESS"; |
| bad_mode = "GREATER"; |
| } |
| } |
| }); |
| |
| if (is_balanced) { |
| skip |= LogPerformanceWarning( |
| kVUID_BestPractices_Zcull_LessGreaterRatio, cmd_state.commandBuffer(), loc, |
| "%s Depth attachment %s is primarily rendered with depth compare op %s, but some draws use %s. " |
| "Z-cull is disabled for the least used direction, which harms depth testing performance. " |
| "The Z-cull direction can be reset by clearing the depth attachment, transitioning from VK_IMAGE_LAYOUT_UNDEFINED, " |
| "using VK_ATTACHMENT_LOAD_OP_DONT_CARE, or using VK_ATTACHMENT_STORE_OP_DONT_CARE.", |
| VendorSpecificTag(kBPVendorNVIDIA), FormatHandle(cmd_state.nv.zcull_scope.image).c_str(), good_mode, bad_mode); |
| } |
| |
| return skip; |
| } |
| |
| static std::array<uint32_t, 4> GetRawClearColor(VkFormat format, const VkClearColorValue& clear_value) { |
| std::array<uint32_t, 4> raw_color{}; |
| std::copy_n(clear_value.uint32, raw_color.size(), raw_color.data()); |
| |
| // Zero out unused components to avoid polluting the cache with garbage |
| if (!vkuFormatHasRed(format)) raw_color[0] = 0; |
| if (!vkuFormatHasGreen(format)) raw_color[1] = 0; |
| if (!vkuFormatHasBlue(format)) raw_color[2] = 0; |
| if (!vkuFormatHasAlpha(format)) raw_color[3] = 0; |
| |
| return raw_color; |
| } |
| |
| static bool IsClearColorZeroOrOne(VkFormat format, const std::array<uint32_t, 4> clear_color) { |
| static_assert(sizeof(float) == sizeof(uint32_t), "Mismatching float <-> uint32 sizes"); |
| const float one = 1.0f; |
| const float zero = 0.0f; |
| uint32_t raw_one{}; |
| uint32_t raw_zero{}; |
| memcpy(&raw_one, &one, sizeof(one)); |
| memcpy(&raw_zero, &zero, sizeof(zero)); |
| |
| const bool is_one = |
| (!vkuFormatHasRed(format) || (clear_color[0] == raw_one)) && (!vkuFormatHasGreen(format) || (clear_color[1] == raw_one)) && |
| (!vkuFormatHasBlue(format) || (clear_color[2] == raw_one)) && (!vkuFormatHasAlpha(format) || (clear_color[3] == raw_one)); |
| const bool is_zero = |
| (!vkuFormatHasRed(format) || (clear_color[0] == raw_zero)) && (!vkuFormatHasGreen(format) || (clear_color[1] == raw_zero)) && |
| (!vkuFormatHasBlue(format) || (clear_color[2] == raw_zero)) && (!vkuFormatHasAlpha(format) || (clear_color[3] == raw_zero)); |
| return is_one || is_zero; |
| } |
| |
| static std::string MakeCompressedFormatListNVIDIA() { |
| std::string format_list; |
| for (VkFormat compressed_format : kCustomClearColorCompressedFormatsNVIDIA) { |
| if (compressed_format == kCustomClearColorCompressedFormatsNVIDIA.back()) { |
| format_list += "or "; |
| } |
| format_list += string_VkFormat(compressed_format); |
| if (compressed_format != kCustomClearColorCompressedFormatsNVIDIA.back()) { |
| format_list += ", "; |
| } |
| } |
| return format_list; |
| } |
| |
| void BestPractices::RecordClearColor(VkFormat format, const VkClearColorValue& clear_value) { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| const std::array<uint32_t, 4> raw_color = GetRawClearColor(format, clear_value); |
| if (IsClearColorZeroOrOne(format, raw_color)) { |
| // These colors are always compressed |
| return; |
| } |
| |
| const auto it = |
| std::find(kCustomClearColorCompressedFormatsNVIDIA.begin(), kCustomClearColorCompressedFormatsNVIDIA.end(), format); |
| if (it == kCustomClearColorCompressedFormatsNVIDIA.end()) { |
| // The format cannot be compressed with a custom color |
| return; |
| } |
| |
| // Record custom clear color |
| WriteLockGuard guard{clear_colors_lock_}; |
| if (clear_colors_.size() < kMaxRecommendedNumberOfClearColorsNVIDIA) { |
| clear_colors_.insert(raw_color); |
| } |
| } |
| |
| bool BestPractices::ValidateClearColor(VkCommandBuffer commandBuffer, VkFormat format, const VkClearColorValue& clear_value, |
| const Location& loc) const { |
| assert(VendorCheckEnabled(kBPVendorNVIDIA)); |
| |
| bool skip = false; |
| |
| const std::array<uint32_t, 4> raw_color = GetRawClearColor(format, clear_value); |
| if (IsClearColorZeroOrOne(format, raw_color)) { |
| return skip; |
| } |
| |
| const auto it = |
| std::find(kCustomClearColorCompressedFormatsNVIDIA.begin(), kCustomClearColorCompressedFormatsNVIDIA.end(), format); |
| if (it == kCustomClearColorCompressedFormatsNVIDIA.end()) { |
| // The format is not compressible |
| static const std::string format_list = MakeCompressedFormatListNVIDIA(); |
| |
| skip |= LogPerformanceWarning(kVUID_BestPractices_ClearColor_NotCompressed, commandBuffer, loc, |
| "%s Clearing image with format %s without a 1.0f or 0.0f clear color. " |
| "The clear will not get compressed in the GPU, harming performance. " |
| "This can be fixed using a clear color of VkClearColorValue{0.0f, 0.0f, 0.0f, 0.0f}, or " |
| "VkClearColorValue{1.0f, 1.0f, 1.0f, 1.0f}. Alternatively, use %s.", |
| VendorSpecificTag(kBPVendorNVIDIA), string_VkFormat(format), format_list.c_str()); |
| } else { |
| // The format is compressible |
| bool registered = false; |
| { |
| ReadLockGuard guard{clear_colors_lock_}; |
| registered = clear_colors_.find(raw_color) != clear_colors_.end(); |
| |
| if (!registered) { |
| // If it's not in the list, it might be new. Check if there's still space for new entries. |
| registered = clear_colors_.size() < kMaxRecommendedNumberOfClearColorsNVIDIA; |
| } |
| } |
| if (!registered) { |
| std::string clear_color_str; |
| |
| if (vkuFormatIsUINT(format)) { |
| clear_color_str = std::to_string(clear_value.uint32[0]) + ", " + std::to_string(clear_value.uint32[1]) + ", " + |
| std::to_string(clear_value.uint32[2]) + ", " + std::to_string(clear_value.uint32[3]); |
| } else if (vkuFormatIsSINT(format)) { |
| clear_color_str = std::to_string(clear_value.int32[0]) + ", " + std::to_string(clear_value.int32[1]) + ", " + |
| std::to_string(clear_value.int32[2]) + ", " + std::to_string(clear_value.int32[3]); |
| } else { |
| clear_color_str = std::to_string(clear_value.float32[0]) + ", " + std::to_string(clear_value.float32[1]) + ", " + |
| std::to_string(clear_value.float32[2]) + ", " + std::to_string(clear_value.float32[3]); |
| } |
| |
| skip |= LogPerformanceWarning( |
| kVUID_BestPractices_ClearColor_NotCompressed, commandBuffer, loc, |
| "%s Clearing image with unregistered VkClearColorValue{%s}. " |
| "This clear will not get compressed in the GPU, harming performance. " |
| "The clear color is not registered because too many unique colors have been used. " |
| "Select a discrete set of clear colors and stick to those. " |
| "VkClearColorValue{0, 0, 0, 0} and VkClearColorValue{1.0f, 1.0f, 1.0f, 1.0f} are always registered.", |
| VendorSpecificTag(kBPVendorNVIDIA), clear_color_str.c_str()); |
| } |
| } |
| |
| return skip; |
| } |
| |
| void BestPractices::QueueValidateImageView(QueueCallbacks& funcs, Func command, IMAGE_VIEW_STATE* view, |
| IMAGE_SUBRESOURCE_USAGE_BP usage) { |
| if (view) { |
| auto image_state = std::static_pointer_cast<bp_state::Image>(view->image_state); |
| QueueValidateImage(funcs, command, image_state, usage, view->normalized_subresource_range); |
| } |
| } |
| |
| void BestPractices::QueueValidateImage(QueueCallbacks& funcs, Func command, std::shared_ptr<bp_state::Image>& state, |
| IMAGE_SUBRESOURCE_USAGE_BP usage, const VkImageSubresourceRange& subresource_range) { |
| // If we're viewing a 3D slice, ignore base array layer. |
| // The entire 3D subresource is accessed as one atomic unit. |
| const uint32_t base_array_layer = state->createInfo.imageType == VK_IMAGE_TYPE_3D ? 0 : subresource_range.baseArrayLayer; |
| |
| const uint32_t max_layers = state->createInfo.arrayLayers - base_array_layer; |
| const uint32_t array_layers = std::min(subresource_range.layerCount, max_layers); |
| const uint32_t max_levels = state->createInfo.mipLevels - subresource_range.baseMipLevel; |
| const uint32_t mip_levels = std::min(state->createInfo.mipLevels, max_levels); |
| |
| for (uint32_t layer = 0; layer < array_layers; layer++) { |
| for (uint32_t level = 0; level < mip_levels; level++) { |
| QueueValidateImage(funcs, command, state, usage, layer + base_array_layer, level + subresource_range.baseMipLevel); |
| } |
| } |
| } |
| |
| void BestPractices::QueueValidateImage(QueueCallbacks& funcs, Func command, std::shared_ptr<bp_state::Image>& state, |
| IMAGE_SUBRESOURCE_USAGE_BP usage, const VkImageSubresourceLayers& subresource_layers) { |
| const uint32_t max_layers = state->createInfo.arrayLayers - subresource_layers.baseArrayLayer; |
| const uint32_t array_layers = std::min(subresource_layers.layerCount, max_layers); |
| |
| for (uint32_t layer = 0; layer < array_layers; layer++) { |
| QueueValidateImage(funcs, command, state, usage, layer + subresource_layers.baseArrayLayer, subresource_layers.mipLevel); |
| } |
| } |
| |
| void BestPractices::QueueValidateImage(QueueCallbacks& funcs, Func command, std::shared_ptr<bp_state::Image>& state, |
| IMAGE_SUBRESOURCE_USAGE_BP usage, uint32_t array_layer, uint32_t mip_level) { |
| funcs.push_back([this, command, state, usage, array_layer, mip_level](const ValidationStateTracker& vst, const QUEUE_STATE& qs, |
| const CMD_BUFFER_STATE& cbs) -> bool { |
| ValidateImageInQueue(qs, cbs, command, *state, usage, array_layer, mip_level); |
| return false; |
| }); |
| } |
| |
| void BestPractices::ValidateImageInQueueArmImg(Func command, const bp_state::Image& image, IMAGE_SUBRESOURCE_USAGE_BP last_usage, |
| IMAGE_SUBRESOURCE_USAGE_BP usage, uint32_t array_layer, uint32_t mip_level) { |
| // Swapchain images are implicitly read so clear after store is expected. |
| const Location loc(command); |
| if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_CLEARED && last_usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_STORED && |
| !image.IsSwapchainImage()) { |
| LogPerformanceWarning( |
| kVUID_BestPractices_RenderPass_RedundantStore, device, loc, |
| "%s %s Subresource (arrayLayer: %u, mipLevel: %u) of image was cleared as part of LOAD_OP_CLEAR, but last time " |
| "image was used, it was written to with STORE_OP_STORE. " |
| "Storing to the image is probably redundant in this case, and wastes bandwidth on tile-based " |
| "architectures.", |
| VendorSpecificTag(kBPVendorArm), VendorSpecificTag(kBPVendorIMG), array_layer, mip_level); |
| } else if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_CLEARED && last_usage == IMAGE_SUBRESOURCE_USAGE_BP::CLEARED) { |
| LogPerformanceWarning( |
| kVUID_BestPractices_RenderPass_RedundantClear, device, loc, |
| "%s %s Subresource (arrayLayer: %u, mipLevel: %u) of image was cleared as part of LOAD_OP_CLEAR, but last time " |
| "image was used, it was written to with vkCmdClear*Image(). " |
| "Clearing the image with vkCmdClear*Image() is probably redundant in this case, and wastes bandwidth on " |
| "tile-based architectures.", |
| VendorSpecificTag(kBPVendorArm), VendorSpecificTag(kBPVendorIMG), array_layer, mip_level); |
| } else if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_READ_TO_TILE && |
| (last_usage == IMAGE_SUBRESOURCE_USAGE_BP::BLIT_WRITE || last_usage == IMAGE_SUBRESOURCE_USAGE_BP::CLEARED || |
| last_usage == IMAGE_SUBRESOURCE_USAGE_BP::COPY_WRITE || last_usage == IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_WRITE)) { |
| const char* last_cmd = nullptr; |
| const char* vuid = nullptr; |
| const char* suggestion = nullptr; |
| |
| switch (last_usage) { |
| case IMAGE_SUBRESOURCE_USAGE_BP::BLIT_WRITE: |
| vuid = kVUID_BestPractices_RenderPass_BlitImage_LoadOpLoad; |
| last_cmd = "vkCmdBlitImage"; |
| suggestion = |
| "The blit is probably redundant in this case, and wastes bandwidth on tile-based architectures. " |
| "Rather than blitting, just render the source image in a fragment shader in this render pass, " |
| "which avoids the memory roundtrip."; |
| break; |
| case IMAGE_SUBRESOURCE_USAGE_BP::CLEARED: |
| vuid = kVUID_BestPractices_RenderPass_InefficientClear; |
| last_cmd = "vkCmdClear*Image"; |
| suggestion = |
| "Clearing the image with vkCmdClear*Image() is probably redundant in this case, and wastes bandwidth on " |
| "tile-based architectures. " |
| "Use LOAD_OP_CLEAR instead to clear the image for free."; |
| break; |
| case IMAGE_SUBRESOURCE_USAGE_BP::COPY_WRITE: |
| vuid = kVUID_BestPractices_RenderPass_CopyImage_LoadOpLoad; |
| last_cmd = "vkCmdCopy*Image"; |
| suggestion = |
| "The copy is probably redundant in this case, and wastes bandwidth on tile-based architectures. " |
| "Rather than copying, just render the source image in a fragment shader in this render pass, " |
| "which avoids the memory roundtrip."; |
| break; |
| case IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_WRITE: |
| vuid = kVUID_BestPractices_RenderPass_ResolveImage_LoadOpLoad; |
| last_cmd = "vkCmdResolveImage"; |
| suggestion = |
| "The resolve is probably redundant in this case, and wastes a lot of bandwidth on tile-based architectures. " |
| "Rather than resolving, and then loading, try to keep rendering in the same render pass, " |
| "which avoids the memory roundtrip."; |
| break; |
| default: |
| break; |
| } |
| |
| LogPerformanceWarning( |
| vuid, device, loc, |
| "%s %s Subresource (arrayLayer: %u, mipLevel: %u) of image was loaded to tile as part of LOAD_OP_LOAD, but last " |
| "time image was used, it was written to with %s. %s", |
| VendorSpecificTag(kBPVendorArm), VendorSpecificTag(kBPVendorIMG), array_layer, mip_level, last_cmd, suggestion); |
| } |
| } |
| |
| void BestPractices::ValidateImageInQueue(const QUEUE_STATE& qs, const CMD_BUFFER_STATE& cbs, Func command, bp_state::Image& state, |
| IMAGE_SUBRESOURCE_USAGE_BP usage, uint32_t array_layer, uint32_t mip_level) { |
| auto queue_family = qs.queueFamilyIndex; |
| auto last_usage = state.UpdateUsage(array_layer, mip_level, usage, queue_family); |
| |
| // Concurrent sharing usage of image with exclusive sharing mode |
| if (state.createInfo.sharingMode == VK_SHARING_MODE_EXCLUSIVE && last_usage.queue_family_index != queue_family) { |
| // if UNDEFINED then first use/acquisition of subresource |
| if (last_usage.type != IMAGE_SUBRESOURCE_USAGE_BP::UNDEFINED) { |
| // If usage might read from the subresource, as contents are undefined |
| // so write only is fine |
| if (usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_READ_TO_TILE || usage == IMAGE_SUBRESOURCE_USAGE_BP::BLIT_READ || |
| usage == IMAGE_SUBRESOURCE_USAGE_BP::COPY_READ || usage == IMAGE_SUBRESOURCE_USAGE_BP::DESCRIPTOR_ACCESS || |
| usage == IMAGE_SUBRESOURCE_USAGE_BP::RESOLVE_READ) { |
| Location loc(command); |
| LogWarning( |
| kVUID_BestPractices_ConcurrentUsageOfExclusiveImage, state.image(), loc, |
| "Subresource (arrayLayer: %" PRIu32 ", mipLevel: %" PRIu32 ") of image is used on queue family index %" PRIu32 |
| " after being used on " |
| "queue family index %" PRIu32 |
| ", " |
| "but has VK_SHARING_MODE_EXCLUSIVE, and has not been acquired and released with a ownership transfer operation", |
| array_layer, mip_level, queue_family, last_usage.queue_family_index); |
| } |
| } |
| } |
| |
| // When image was discarded with StoreOpDontCare but is now being read with LoadOpLoad |
| if (last_usage.type == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_DISCARDED && |
| usage == IMAGE_SUBRESOURCE_USAGE_BP::RENDER_PASS_READ_TO_TILE) { |
| Location loc(command); |
| LogWarning(kVUID_BestPractices_StoreOpDontCareThenLoadOpLoad, device, loc, |
| "Trying to load an attachment with LOAD_OP_LOAD that was previously stored with STORE_OP_DONT_CARE. This may " |
| "result in undefined behaviour."); |
| } |
| |
| if (VendorCheckEnabled(kBPVendorArm) || VendorCheckEnabled(kBPVendorIMG)) { |
| ValidateImageInQueueArmImg(command, state, last_usage.type, usage, array_layer, mip_level); |
| } |
| } |
| |
| void BestPractices::AddDeferredQueueOperations(bp_state::CommandBuffer& cb) { |
| cb.queue_submit_functions.insert(cb.queue_submit_functions.end(), cb.queue_submit_functions_after_render_pass.begin(), |
| cb.queue_submit_functions_after_render_pass.end()); |
| cb.queue_submit_functions_after_render_pass.clear(); |
| } |
| |
| void BestPractices::RecordAttachmentAccess(bp_state::CommandBuffer& cb_state, uint32_t fb_attachment, VkImageAspectFlags aspects) { |
| auto& state = cb_state.render_pass_state; |
| // Called when we have a partial clear attachment, or a normal draw call which accesses an attachment. |
| auto itr = |
| std::find_if(state.touchesAttachments.begin(), state.touchesAttachments.end(), |
| [fb_attachment](const bp_state::AttachmentInfo& info) { return info.framebufferAttachment == fb_attachment; }); |
| |
| if (itr != state.touchesAttachments.end()) { |
| itr->aspects |= aspects; |
| } else { |
| state.touchesAttachments.push_back({fb_attachment, aspects}); |
| } |
| } |
| |
| void BestPractices::RecordAttachmentClearAttachments(bp_state::CommandBuffer& cmd_state, uint32_t fb_attachment, |
| uint32_t color_attachment, VkImageAspectFlags aspects, uint32_t rectCount, |
| const VkClearRect* pRects) { |
| auto& state = cmd_state.render_pass_state; |
| // If we observe a full clear before any other access to a frame buffer attachment, |
| // we have candidate for redundant clear attachments. |
| auto itr = |
| std::find_if(state.touchesAttachments.begin(), state.touchesAttachments.end(), |
| [fb_attachment](const bp_state::AttachmentInfo& info) { return info.framebufferAttachment == fb_attachment; }); |
| |
| uint32_t new_aspects = aspects; |
| if (itr != state.touchesAttachments.end()) { |
| new_aspects = aspects & ~itr->aspects; |
| itr->aspects |= aspects; |
| } else { |
| state.touchesAttachments.push_back({fb_attachment, aspects}); |
| } |
| |
| if (new_aspects == 0) { |
| return; |
| } |
| |
| if (cmd_state.createInfo.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { |
| // The first command might be a clear, but might not be the first in the render pass, defer any checks until |
| // CmdExecuteCommands. |
| state.earlyClearAttachments.push_back( |
| {fb_attachment, color_attachment, new_aspects, std::vector<VkClearRect>{pRects, pRects + rectCount}}); |
| } |
| } |