blob: b8d3ddca33441151976bbf2510f93708b89e2c69 [file] [log] [blame]
/* Copyright (c) 2015-2022 The Khronos Group Inc.
* Copyright (c) 2015-2022 Valve Corporation
* Copyright (c) 2015-2022 LunarG, Inc.
* Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Author: Camden Stocker <camden@lunarg.com>
* Author: Nadav Geva <nadav.geva@amd.com>
*/
#include "best_practices_validation.h"
#include "layer_chassis_dispatch.h"
#include "best_practices_error_enums.h"
#include "shader_validation.h"
#include "sync_utils.h"
#include "cmd_buffer_state.h"
#include "device_state.h"
#include "render_pass_state.h"
#include <string>
#include <bitset>
#include <memory>
struct VendorSpecificInfo {
EnableFlags vendor_id;
std::string name;
};
const std::map<BPVendorFlagBits, VendorSpecificInfo> kVendorInfo = {{kBPVendorArm, {vendor_specific_arm, "Arm"}},
{kBPVendorAMD, {vendor_specific_amd, "AMD"}},
{kBPVendorIMG, {vendor_specific_img, "IMG"}},
{kBPVendorNVIDIA, {vendor_specific_nvidia, "NVIDIA"}}};
const SpecialUseVUIDs kSpecialUseInstanceVUIDs {
kVUID_BestPractices_CreateInstance_SpecialUseExtension_CADSupport,
kVUID_BestPractices_CreateInstance_SpecialUseExtension_D3DEmulation,
kVUID_BestPractices_CreateInstance_SpecialUseExtension_DevTools,
kVUID_BestPractices_CreateInstance_SpecialUseExtension_Debugging,
kVUID_BestPractices_CreateInstance_SpecialUseExtension_GLEmulation,
};
const SpecialUseVUIDs kSpecialUseDeviceVUIDs {
kVUID_BestPractices_CreateDevice_SpecialUseExtension_CADSupport,
kVUID_BestPractices_CreateDevice_SpecialUseExtension_D3DEmulation,
kVUID_BestPractices_CreateDevice_SpecialUseExtension_DevTools,
kVUID_BestPractices_CreateDevice_SpecialUseExtension_Debugging,
kVUID_BestPractices_CreateDevice_SpecialUseExtension_GLEmulation,
};
static constexpr std::array<VkFormat, 12> kCustomClearColorCompressedFormatsNVIDIA = {
VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_A2R10G10B10_UNORM_PACK32, VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_FORMAT_R16G16B16A16_UNORM,
VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UINT, VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_B10G11R11_UFLOAT_PACK32,
};
ReadLockGuard BestPractices::ReadLock() {
if (fine_grained_locking) {
return ReadLockGuard(validation_object_mutex, std::defer_lock);
} else {
return ReadLockGuard(validation_object_mutex);
}
}
WriteLockGuard BestPractices::WriteLock() {
if (fine_grained_locking) {
return WriteLockGuard(validation_object_mutex, std::defer_lock);
} else {
return WriteLockGuard(validation_object_mutex);
}
}
std::shared_ptr<CMD_BUFFER_STATE> BestPractices::CreateCmdBufferState(VkCommandBuffer cb,
const VkCommandBufferAllocateInfo* pCreateInfo,
const COMMAND_POOL_STATE* pool) {
return std::static_pointer_cast<CMD_BUFFER_STATE>(std::make_shared<bp_state::CommandBuffer>(this, cb, pCreateInfo, pool));
}
bp_state::CommandBuffer::CommandBuffer(BestPractices* bp, VkCommandBuffer cb, const VkCommandBufferAllocateInfo* pCreateInfo,
const COMMAND_POOL_STATE* pool)
: CMD_BUFFER_STATE(bp, cb, pCreateInfo, pool) {}
bool BestPractices::VendorCheckEnabled(BPVendorFlags vendors) const {
for (const auto& vendor : kVendorInfo) {
if (vendors & vendor.first && enabled[vendor.second.vendor_id]) {
return true;
}
}
return false;
}
const char* VendorSpecificTag(BPVendorFlags vendors) {
// Cache built vendor tags in a map
static layer_data::unordered_map<BPVendorFlags, std::string> tag_map;
auto res = tag_map.find(vendors);
if (res == tag_map.end()) {
// Build the vendor tag string
std::stringstream vendor_tag;
vendor_tag << "[";
bool first_vendor = true;
for (const auto& vendor : kVendorInfo) {
if (vendors & vendor.first) {
if (!first_vendor) {
vendor_tag << ", ";
}
vendor_tag << vendor.second.name;
first_vendor = false;
}
}
vendor_tag << "]";
tag_map[vendors] = vendor_tag.str();
res = tag_map.find(vendors);
}
return res->second.c_str();
}
const char* DepReasonToString(ExtDeprecationReason reason) {
switch (reason) {
case kExtPromoted:
return "promoted to";
break;
case kExtObsoleted:
return "obsoleted by";
break;
case kExtDeprecated:
return "deprecated by";
break;
default:
return "";
break;
}
}
bool BestPractices::ValidateDeprecatedExtensions(const char* api_name, const char* extension_name, uint32_t version,
const char* vuid) const {
bool skip = false;
auto dep_info_it = deprecated_extensions.find(extension_name);
if (dep_info_it != deprecated_extensions.end()) {
auto dep_info = dep_info_it->second;
if (((dep_info.target.compare("VK_VERSION_1_1") == 0) && (version >= VK_API_VERSION_1_1)) ||
((dep_info.target.compare("VK_VERSION_1_2") == 0) && (version >= VK_API_VERSION_1_2)) ||
((dep_info.target.compare("VK_VERSION_1_3") == 0) && (version >= VK_API_VERSION_1_3))) {
skip |=
LogWarning(instance, vuid, "%s(): Attempting to enable deprecated extension %s, but this extension has been %s %s.",
api_name, extension_name, DepReasonToString(dep_info.reason), (dep_info.target).c_str());
} else if (dep_info.target.find("VK_VERSION") == std::string::npos) {
if (dep_info.target.length() == 0) {
skip |= LogWarning(instance, vuid,
"%s(): Attempting to enable deprecated extension %s, but this extension has been deprecated "
"without replacement.",
api_name, extension_name);
} else {
skip |= LogWarning(instance, vuid,
"%s(): Attempting to enable deprecated extension %s, but this extension has been %s %s.",
api_name, extension_name, DepReasonToString(dep_info.reason), (dep_info.target).c_str());
}
}
}
return skip;
}
bool BestPractices::ValidateSpecialUseExtensions(const char* api_name, const char* extension_name, const SpecialUseVUIDs& special_use_vuids) const
{
bool skip = false;
auto dep_info_it = special_use_extensions.find(extension_name);
if (dep_info_it != special_use_extensions.end()) {
const char* const format = "%s(): Attempting to enable extension %s, but this extension is intended to support %s "
"and it is strongly recommended that it be otherwise avoided.";
auto& special_uses = dep_info_it->second;
if (special_uses.find("cadsupport") != std::string::npos) {
skip |= LogWarning(instance, special_use_vuids.cadsupport, format, api_name, extension_name,
"specialized functionality used by CAD/CAM applications");
}
if (special_uses.find("d3demulation") != std::string::npos) {
skip |= LogWarning(instance, special_use_vuids.d3demulation, format, api_name, extension_name,
"D3D emulation layers, and applications ported from D3D, by adding functionality specific to D3D");
}
if (special_uses.find("devtools") != std::string::npos) {
skip |= LogWarning(instance, special_use_vuids.devtools, format, api_name, extension_name,
"developer tools such as capture-replay libraries");
}
if (special_uses.find("debugging") != std::string::npos) {
skip |= LogWarning(instance, special_use_vuids.debugging, format, api_name, extension_name,
"use by applications when debugging");
}
if (special_uses.find("glemulation") != std::string::npos) {
skip |= LogWarning(instance, special_use_vuids.glemulation, format, api_name, extension_name,
"OpenGL and/or OpenGL ES emulation layers, and applications ported from those APIs, by adding functionality "
"specific to those APIs");
}
}
return skip;
}
bool BestPractices::PreCallValidateCreateInstance(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator,
VkInstance* pInstance) const {
bool skip = false;
for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
if (white_list(pCreateInfo->ppEnabledExtensionNames[i], kDeviceExtensionNames)) {
skip |= LogWarning(instance, kVUID_BestPractices_CreateInstance_ExtensionMismatch,
"vkCreateInstance(): Attempting to enable Device Extension %s at CreateInstance time.",
pCreateInfo->ppEnabledExtensionNames[i]);
}
uint32_t specified_version =
(pCreateInfo->pApplicationInfo ? pCreateInfo->pApplicationInfo->apiVersion : VK_API_VERSION_1_0);
skip |= ValidateDeprecatedExtensions("CreateInstance", pCreateInfo->ppEnabledExtensionNames[i], specified_version,
kVUID_BestPractices_CreateInstance_DeprecatedExtension);
skip |= ValidateSpecialUseExtensions("CreateInstance", pCreateInfo->ppEnabledExtensionNames[i], kSpecialUseInstanceVUIDs);
}
return skip;
}
bool BestPractices::PreCallValidateCreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) const {
bool skip = false;
// get API version of physical device passed when creating device.
VkPhysicalDeviceProperties physical_device_properties{};
DispatchGetPhysicalDeviceProperties(physicalDevice, &physical_device_properties);
auto device_api_version = physical_device_properties.apiVersion;
// Check api versions and log an info message when instance api Version is higher than version on device.
if (api_version > device_api_version) {
std::string inst_api_name = StringAPIVersion(api_version);
std::string dev_api_name = StringAPIVersion(device_api_version);
skip |= LogInfo(device, kVUID_BestPractices_CreateDevice_API_Mismatch,
"vkCreateDevice(): API Version of current instance, %s is higher than API Version on device, %s",
inst_api_name.c_str(), dev_api_name.c_str());
}
std::vector<std::string> extensions;
{
uint32_t property_count = 0;
if (DispatchEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &property_count, nullptr) == VK_SUCCESS) {
std::vector<VkExtensionProperties> property_list(property_count);
if (DispatchEnumerateDeviceExtensionProperties(physicalDevice, nullptr, &property_count, property_list.data()) == VK_SUCCESS) {
extensions.reserve(property_list.size());
for (const VkExtensionProperties& properties : property_list) {
extensions.push_back(properties.extensionName);
}
}
}
}
for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
const char *extension_name = pCreateInfo->ppEnabledExtensionNames[i];
uint32_t extension_api_version = std::min(api_version, device_api_version);
if (white_list(extension_name, kInstanceExtensionNames)) {
skip |= LogWarning(instance, kVUID_BestPractices_CreateDevice_ExtensionMismatch,
"vkCreateDevice(): Attempting to enable Instance Extension %s at CreateDevice time.",
extension_name);
extension_api_version = api_version;
}
skip |= ValidateDeprecatedExtensions("CreateDevice", extension_name, extension_api_version,
kVUID_BestPractices_CreateDevice_DeprecatedExtension);
skip |= ValidateSpecialUseExtensions("CreateDevice", extension_name, kSpecialUseDeviceVUIDs);
}
const auto bp_pd_state = Get<bp_state::PhysicalDevice>(physicalDevice);
if ((bp_pd_state->vkGetPhysicalDeviceFeaturesState == UNCALLED) && (pCreateInfo->pEnabledFeatures != NULL)) {
skip |= LogWarning(device, kVUID_BestPractices_CreateDevice_PDFeaturesNotCalled,
"vkCreateDevice() called before getting physical device features from vkGetPhysicalDeviceFeatures().");
}
if ((VendorCheckEnabled(kBPVendorArm) || VendorCheckEnabled(kBPVendorAMD) || VendorCheckEnabled(kBPVendorIMG)) &&
(pCreateInfo->pEnabledFeatures != nullptr) && (pCreateInfo->pEnabledFeatures->robustBufferAccess == VK_TRUE)) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateDevice_RobustBufferAccess,
"%s %s %s: vkCreateDevice() called with enabled robustBufferAccess. Use robustBufferAccess as a debugging tool during "
"development. Enabling it causes loss in performance for accesses to uniform buffers and shader storage "
"buffers. Disable robustBufferAccess in release builds. Only leave it enabled if the application use-case "
"requires the additional level of reliability due to the use of unverified user-supplied draw parameters.",
VendorSpecificTag(kBPVendorArm), VendorSpecificTag(kBPVendorAMD), VendorSpecificTag(kBPVendorIMG));
}
const bool enabled_pageable_device_local_memory = IsExtEnabled(device_extensions.vk_ext_pageable_device_local_memory);
if (VendorCheckEnabled(kBPVendorNVIDIA) && !enabled_pageable_device_local_memory &&
std::find(extensions.begin(), extensions.end(), VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_EXTENSION_NAME) != extensions.end()) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateDevice_PageableDeviceLocalMemory,
"%s vkCreateDevice() called without pageable device local memory. "
"Use pageableDeviceLocalMemory from VK_EXT_pageable_device_local_memory when it is available.",
VendorSpecificTag(kBPVendorNVIDIA));
}
return skip;
}
bool BestPractices::PreCallValidateCreateBuffer(VkDevice device, const VkBufferCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkBuffer* pBuffer) const {
bool skip = false;
if ((pCreateInfo->queueFamilyIndexCount > 1) && (pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE)) {
std::stringstream buffer_hex;
buffer_hex << "0x" << std::hex << HandleToUint64(pBuffer);
skip |= LogWarning(
device, kVUID_BestPractices_SharingModeExclusive,
"Warning: Buffer (%s) specifies a sharing mode of VK_SHARING_MODE_EXCLUSIVE while specifying multiple queues "
"(queueFamilyIndexCount of %" PRIu32 ").",
buffer_hex.str().c_str(), pCreateInfo->queueFamilyIndexCount);
}
return skip;
}
bool BestPractices::PreCallValidateCreateImage(VkDevice device, const VkImageCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkImage* pImage) const {
bool skip = false;
if ((pCreateInfo->queueFamilyIndexCount > 1) && (pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE)) {
std::stringstream image_hex;
image_hex << "0x" << std::hex << HandleToUint64(pImage);
skip |=
LogWarning(device, kVUID_BestPractices_SharingModeExclusive,
"Warning: Image (%s) specifies a sharing mode of VK_SHARING_MODE_EXCLUSIVE while specifying multiple queues "
"(queueFamilyIndexCount of %" PRIu32 ").",
image_hex.str().c_str(), pCreateInfo->queueFamilyIndexCount);
}
if ((pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) && !(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) {
skip |= LogWarning(device, kVUID_BestPractices_ImageCreateFlags,
"vkCreateImage(): pCreateInfo->flags has VK_IMAGE_CREATE_EXTENDED_USAGE_BIT set, but not "
"VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, therefore image views created from this image will have to use the "
"same format and VK_IMAGE_CREATE_EXTENDED_USAGE_BIT will not have any effect.");
}
if (VendorCheckEnabled(kBPVendorArm) || VendorCheckEnabled(kBPVendorIMG)) {
if (pCreateInfo->samples > VK_SAMPLE_COUNT_1_BIT && !(pCreateInfo->usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT)) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateImage_NonTransientMSImage,
"%s %s vkCreateImage(): Trying to create a multisampled image, but createInfo.usage did not have "
"VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT set. Multisampled images may be resolved on-chip, "
"and do not need to be backed by physical storage. "
"TRANSIENT_ATTACHMENT allows tiled GPUs to not back the multisampled image with physical memory.",
VendorSpecificTag(kBPVendorArm), VendorSpecificTag(kBPVendorIMG));
}
}
if (VendorCheckEnabled(kBPVendorArm) && pCreateInfo->samples > kMaxEfficientSamplesArm) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateImage_TooLargeSampleCount,
"%s vkCreateImage(): Trying to create an image with %u samples. "
"The hardware revision may not have full throughput for framebuffers with more than %u samples.",
VendorSpecificTag(kBPVendorArm), static_cast<uint32_t>(pCreateInfo->samples), kMaxEfficientSamplesArm);
}
if (VendorCheckEnabled(kBPVendorIMG) && pCreateInfo->samples > kMaxEfficientSamplesImg) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateImage_TooLargeSampleCount,
"%s vkCreateImage(): Trying to create an image with %u samples. "
"The device may not have full support for true multisampling for images with more than %u samples. "
"XT devices support up to 8 samples, XE up to 4 samples.",
VendorSpecificTag(kBPVendorIMG), static_cast<uint32_t>(pCreateInfo->samples), kMaxEfficientSamplesImg);
}
if (VendorCheckEnabled(kBPVendorIMG) && (pCreateInfo->format == VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG ||
pCreateInfo->format == VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG ||
pCreateInfo->format == VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG ||
pCreateInfo->format == VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG ||
pCreateInfo->format == VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG ||
pCreateInfo->format == VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG ||
pCreateInfo->format == VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG ||
pCreateInfo->format == VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG)) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_Texture_Format_PVRTC_Outdated,
"%s vkCreateImage(): Trying to create an image with a PVRTC format. Both PVRTC1 and PVRTC2 "
"are slower than standard image formats on PowerVR GPUs, prefer ETC, BC, ASTC, etc.",
VendorSpecificTag(kBPVendorIMG));
}
if (VendorCheckEnabled(kBPVendorAMD)) {
std::stringstream image_hex;
image_hex << "0x" << std::hex << HandleToUint64(pImage);
if ((pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
(pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT)) {
skip |= LogPerformanceWarning(device,
kVUID_BestPractices_vkImage_AvoidConcurrentRenderTargets,
"%s Performance warning: image (%s) is created as a render target with VK_SHARING_MODE_CONCURRENT. "
"Using a SHARING_MODE_CONCURRENT "
"is not recommended with color and depth targets",
VendorSpecificTag(kBPVendorAMD), image_hex.str().c_str());
}
if ((pCreateInfo->usage &
(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_vkImage_DontUseMutableRenderTargets,
"%s Performance warning: image (%s) is created as a render target with VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT. "
"Using a MUTABLE_FORMAT is not recommended with color, depth, and storage targets",
VendorSpecificTag(kBPVendorAMD), image_hex.str().c_str());
}
if ((pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_vkImage_DontUseStorageRenderTargets,
"%s Performance warning: image (%s) is created as a render target with VK_IMAGE_USAGE_STORAGE_BIT. Using a "
"VK_IMAGE_USAGE_STORAGE_BIT is not recommended with color and depth targets",
VendorSpecificTag(kBPVendorAMD), image_hex.str().c_str());
}
}
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
std::stringstream image_hex;
image_hex << "0x" << std::hex << HandleToUint64(pImage);
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreateImage_TilingLinear,
"%s Performance warning: image (%s) is created with tiling VK_IMAGE_TILING_LINEAR. "
"Use VK_IMAGE_TILING_OPTIMAL instead.",
VendorSpecificTag(kBPVendorNVIDIA), image_hex.str().c_str());
}
if (pCreateInfo->format == VK_FORMAT_D32_SFLOAT || pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateImage_Depth32Format,
"%s Performance warning: image (%s) is created with a 32-bit depth format. Use VK_FORMAT_D24_UNORM_S8_UINT or "
"VK_FORMAT_D16_UNORM instead, unless the extra precision is needed.",
VendorSpecificTag(kBPVendorNVIDIA), image_hex.str().c_str());
}
}
return skip;
}
bool BestPractices::PreCallValidateCreateSwapchainKHR(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain) const {
bool skip = false;
const auto* bp_pd_state = GetPhysicalDeviceState();
if (bp_pd_state) {
if (bp_pd_state->vkGetPhysicalDeviceSurfaceCapabilitiesKHRState == UNCALLED) {
skip |= LogWarning(device, kVUID_BestPractices_Swapchain_GetSurfaceNotCalled,
"vkCreateSwapchainKHR() called before getting surface capabilities from "
"vkGetPhysicalDeviceSurfaceCapabilitiesKHR().");
}
if ((pCreateInfo->presentMode != VK_PRESENT_MODE_FIFO_KHR) &&
(bp_pd_state->vkGetPhysicalDeviceSurfacePresentModesKHRState != QUERY_DETAILS)) {
skip |= LogWarning(device, kVUID_BestPractices_Swapchain_GetSurfaceNotCalled,
"vkCreateSwapchainKHR() called before getting surface present mode(s) from "
"vkGetPhysicalDeviceSurfacePresentModesKHR().");
}
if (bp_pd_state->vkGetPhysicalDeviceSurfaceFormatsKHRState != QUERY_DETAILS) {
skip |= LogWarning(
device, kVUID_BestPractices_Swapchain_GetSurfaceNotCalled,
"vkCreateSwapchainKHR() called before getting surface format(s) from vkGetPhysicalDeviceSurfaceFormatsKHR().");
}
}
if ((pCreateInfo->queueFamilyIndexCount > 1) && (pCreateInfo->imageSharingMode == VK_SHARING_MODE_EXCLUSIVE)) {
skip |=
LogWarning(device, kVUID_BestPractices_SharingModeExclusive,
"Warning: A Swapchain is being created which specifies a sharing mode of VK_SHARING_MODE_EXCLUSIVE while "
"specifying multiple queues (queueFamilyIndexCount of %" PRIu32 ").",
pCreateInfo->queueFamilyIndexCount);
}
const auto present_mode = pCreateInfo->presentMode;
if (((present_mode == VK_PRESENT_MODE_MAILBOX_KHR) || (present_mode == VK_PRESENT_MODE_FIFO_KHR)) &&
(pCreateInfo->minImageCount == 2)) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_SuboptimalSwapchainImageCount,
"Warning: A Swapchain is being created with minImageCount set to %" PRIu32
", which means double buffering is going "
"to be used. Using double buffering and vsync locks rendering to an integer fraction of the vsync rate. In turn, "
"reducing the performance of the application if rendering is slower than vsync. Consider setting minImageCount to "
"3 to use triple buffering to maximize performance in such cases.",
pCreateInfo->minImageCount);
}
if (VendorCheckEnabled(kBPVendorArm) && (pCreateInfo->presentMode != VK_PRESENT_MODE_FIFO_KHR)) {
skip |= LogWarning(device, kVUID_BestPractices_CreateSwapchain_PresentMode,
"%s Warning: Swapchain is not being created with presentation mode \"VK_PRESENT_MODE_FIFO_KHR\". "
"Prefer using \"VK_PRESENT_MODE_FIFO_KHR\" to avoid unnecessary CPU and GPU load and save power. "
"Presentation modes which are not FIFO will present the latest available frame and discard other "
"frame(s) if any.",
VendorSpecificTag(kBPVendorArm));
}
return skip;
}
bool BestPractices::PreCallValidateCreateSharedSwapchainsKHR(VkDevice device, uint32_t swapchainCount,
const VkSwapchainCreateInfoKHR* pCreateInfos,
const VkAllocationCallbacks* pAllocator,
VkSwapchainKHR* pSwapchains) const {
bool skip = false;
for (uint32_t i = 0; i < swapchainCount; i++) {
if ((pCreateInfos[i].queueFamilyIndexCount > 1) && (pCreateInfos[i].imageSharingMode == VK_SHARING_MODE_EXCLUSIVE)) {
skip |= LogWarning(
device, kVUID_BestPractices_SharingModeExclusive,
"Warning: A shared swapchain (index %" PRIu32
") is being created which specifies a sharing mode of VK_SHARING_MODE_EXCLUSIVE while specifying multiple "
"queues (queueFamilyIndexCount of %" PRIu32 ").",
i, pCreateInfos[i].queueFamilyIndexCount);
}
}
return skip;
}
bool BestPractices::PreCallValidateCreateRenderPass(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass) const {
bool skip = false;
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; ++i) {
VkFormat format = pCreateInfo->pAttachments[i].format;
if (pCreateInfo->pAttachments[i].initialLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
if ((FormatIsColor(format) || FormatHasDepth(format)) &&
pCreateInfo->pAttachments[i].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
skip |= LogWarning(device, kVUID_BestPractices_RenderPass_Attatchment,
"Render pass has an attachment with loadOp == VK_ATTACHMENT_LOAD_OP_LOAD and "
"initialLayout == VK_IMAGE_LAYOUT_UNDEFINED. This is probably not what you "
"intended. Consider using VK_ATTACHMENT_LOAD_OP_DONT_CARE instead if the "
"image truely is undefined at the start of the render pass.");
}
if (FormatHasStencil(format) && pCreateInfo->pAttachments[i].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
skip |= LogWarning(device, kVUID_BestPractices_RenderPass_Attatchment,
"Render pass has an attachment with stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD "
"and initialLayout == VK_IMAGE_LAYOUT_UNDEFINED. This is probably not what you "
"intended. Consider using VK_ATTACHMENT_LOAD_OP_DONT_CARE instead if the "
"image truely is undefined at the start of the render pass.");
}
}
const auto& attachment = pCreateInfo->pAttachments[i];
if (attachment.samples > VK_SAMPLE_COUNT_1_BIT) {
bool access_requires_memory =
attachment.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD || attachment.storeOp == VK_ATTACHMENT_STORE_OP_STORE;
if (FormatHasStencil(format)) {
access_requires_memory |= attachment.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD ||
attachment.stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE;
}
if (access_requires_memory) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateRenderPass_ImageRequiresMemory,
"Attachment %u in the VkRenderPass is a multisampled image with %u samples, but it uses loadOp/storeOp "
"which requires accessing data from memory. Multisampled images should always be loadOp = CLEAR or DONT_CARE, "
"storeOp = DONT_CARE. This allows the implementation to use lazily allocated memory effectively.",
i, static_cast<uint32_t>(attachment.samples));
}
}
}
for (uint32_t dependency = 0; dependency < pCreateInfo->dependencyCount; dependency++) {
skip |= CheckPipelineStageFlags("vkCreateRenderPass", pCreateInfo->pDependencies[dependency].srcStageMask);
skip |= CheckPipelineStageFlags("vkCreateRenderPass", pCreateInfo->pDependencies[dependency].dstStageMask);
}
return skip;
}
bool BestPractices::ValidateAttachments(const VkRenderPassCreateInfo2* rpci, uint32_t attachmentCount,
const VkImageView* image_views) const {
bool skip = false;
// Check for non-transient attachments that should be transient and vice versa
for (uint32_t i = 0; i < attachmentCount; ++i) {
const auto& attachment = rpci->pAttachments[i];
bool attachment_should_be_transient =
(attachment.loadOp != VK_ATTACHMENT_LOAD_OP_LOAD && attachment.storeOp != VK_ATTACHMENT_STORE_OP_STORE);
if (FormatHasStencil(attachment.format)) {
attachment_should_be_transient &= (attachment.stencilLoadOp != VK_ATTACHMENT_LOAD_OP_LOAD &&
attachment.stencilStoreOp != VK_ATTACHMENT_STORE_OP_STORE);
}
auto view_state = Get<IMAGE_VIEW_STATE>(image_views[i]);
if (view_state) {
const auto& ici = view_state->image_state->createInfo;
bool image_is_transient = (ici.usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) != 0;
// The check for an image that should not be transient applies to all GPUs
if (!attachment_should_be_transient && image_is_transient) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateFramebuffer_AttachmentShouldNotBeTransient,
"Attachment %u in VkFramebuffer uses loadOp/storeOps which need to access physical memory, "
"but the image backing the image view has VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT set. "
"Physical memory will need to be backed lazily to this image, potentially causing stalls.",
i);
}
bool supports_lazy = false;
for (uint32_t j = 0; j < phys_dev_mem_props.memoryTypeCount; j++) {
if (phys_dev_mem_props.memoryTypes[j].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) {
supports_lazy = true;
}
}
// The check for an image that should be transient only applies to GPUs supporting
// lazily allocated memory
if (supports_lazy && attachment_should_be_transient && !image_is_transient) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateFramebuffer_AttachmentShouldBeTransient,
"Attachment %u in VkFramebuffer uses loadOp/storeOps which never have to be backed by physical memory, "
"but the image backing the image view does not have VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT set. "
"You can save physical memory by using transient attachment backed by lazily allocated memory here.",
i);
}
}
}
return skip;
}
bool BestPractices::PreCallValidateCreateFramebuffer(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkFramebuffer* pFramebuffer) const {
bool skip = false;
auto rp_state = Get<RENDER_PASS_STATE>(pCreateInfo->renderPass);
if (rp_state && !(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT)) {
skip = ValidateAttachments(rp_state->createInfo.ptr(), pCreateInfo->attachmentCount, pCreateInfo->pAttachments);
}
return skip;
}
bool BestPractices::PreCallValidateAllocateDescriptorSets(VkDevice device, const VkDescriptorSetAllocateInfo* pAllocateInfo,
VkDescriptorSet* pDescriptorSets, void* ads_state_data) const {
bool skip = false;
skip |= ValidationStateTracker::PreCallValidateAllocateDescriptorSets(device, pAllocateInfo, pDescriptorSets, ads_state_data);
if (!skip) {
const auto pool_state = Get<bp_state::DescriptorPool>(pAllocateInfo->descriptorPool);
// if the number of freed sets > 0, it implies they could be recycled instead if desirable
// this warning is specific to Arm
if (VendorCheckEnabled(kBPVendorArm) && pool_state && (pool_state->freed_count > 0)) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_AllocateDescriptorSets_SuboptimalReuse,
"%s Descriptor set memory was allocated via vkAllocateDescriptorSets() for sets which were previously freed in the "
"same logical device. On some drivers or architectures it may be most optimal to re-use existing descriptor sets.",
VendorSpecificTag(kBPVendorArm));
}
if (IsExtEnabled(device_extensions.vk_khr_maintenance1)) {
// Track number of descriptorSets allowable in this pool
if (pool_state->GetAvailableSets() < pAllocateInfo->descriptorSetCount) {
skip |= LogWarning(pool_state->Handle(), kVUID_BestPractices_EmptyDescriptorPool,
"vkAllocateDescriptorSets(): Unable to allocate %" PRIu32 " descriptorSets from %s"
". This pool only has %" PRIu32 " descriptorSets remaining.",
pAllocateInfo->descriptorSetCount, report_data->FormatHandle(pool_state->Handle()).c_str(),
pool_state->GetAvailableSets());
}
}
}
return skip;
}
void BestPractices::ManualPostCallRecordAllocateDescriptorSets(VkDevice device, const VkDescriptorSetAllocateInfo* pAllocateInfo,
VkDescriptorSet* pDescriptorSets, VkResult result, void* ads_state) {
if (result == VK_SUCCESS) {
auto pool_state = Get<bp_state::DescriptorPool>(pAllocateInfo->descriptorPool);
if (pool_state) {
// we record successful allocations by subtracting the allocation count from the last recorded free count
const auto alloc_count = pAllocateInfo->descriptorSetCount;
// clamp the unsigned subtraction to the range [0, last_free_count]
if (pool_state->freed_count > alloc_count) {
pool_state->freed_count -= alloc_count;
} else {
pool_state->freed_count = 0;
}
}
}
}
void BestPractices::PostCallRecordFreeDescriptorSets(VkDevice device, VkDescriptorPool descriptorPool, uint32_t descriptorSetCount,
const VkDescriptorSet* pDescriptorSets, VkResult result) {
ValidationStateTracker::PostCallRecordFreeDescriptorSets(device, descriptorPool, descriptorSetCount, pDescriptorSets, result);
if (result == VK_SUCCESS) {
auto pool_state = Get<bp_state::DescriptorPool>(descriptorPool);
// we want to track frees because we're interested in suggesting re-use
if (pool_state) {
pool_state->freed_count += descriptorSetCount;
}
}
}
void BestPractices::PreCallRecordAllocateMemory(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo,
const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory) {
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
WriteLockGuard guard{memory_free_events_lock_};
// Release old allocations to avoid overpopulating the container
const auto now = std::chrono::high_resolution_clock::now();
const auto last_old = std::find_if(memory_free_events_.rbegin(), memory_free_events_.rend(), [now](const MemoryFreeEvent& event) {
return now - event.time > kAllocateMemoryReuseTimeThresholdNVIDIA;
});
memory_free_events_.erase(memory_free_events_.begin(), last_old.base());
}
}
bool BestPractices::PreCallValidateAllocateMemory(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo,
const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory) const {
bool skip = false;
if ((Count<DEVICE_MEMORY_STATE>() + 1) > kMemoryObjectWarningLimit) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_AllocateMemory_TooManyObjects,
"Performance Warning: This app has > %" PRIu32 " memory objects.", kMemoryObjectWarningLimit);
}
if (pAllocateInfo->allocationSize < kMinDeviceAllocationSize) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_AllocateMemory_SmallAllocation,
"vkAllocateMemory(): Allocating a VkDeviceMemory of size %" PRIu64 ". This is a very small allocation (current "
"threshold is %" PRIu64 " bytes). "
"You should make large allocations and sub-allocate from one large VkDeviceMemory.",
pAllocateInfo->allocationSize, kMinDeviceAllocationSize);
}
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
if (!device_extensions.vk_ext_pageable_device_local_memory &&
!LvlFindInChain<VkMemoryPriorityAllocateInfoEXT>(pAllocateInfo->pNext)) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_AllocateMemory_SetPriority,
"%s Use VkMemoryPriorityAllocateInfoEXT to provide the operating system information on the allocations that "
"should stay in video memory and which should be demoted first when video memory is limited. "
"The highest priority should be given to GPU-written resources like color attachments, depth attachments, "
"storage images, and buffers written from the GPU.",
VendorSpecificTag(kBPVendorNVIDIA));
}
{
// Size in bytes for an allocation to be considered "compatible"
static constexpr VkDeviceSize size_threshold = VkDeviceSize{1} << 20;
ReadLockGuard guard{memory_free_events_lock_};
const auto now = std::chrono::high_resolution_clock::now();
const VkDeviceSize alloc_size = pAllocateInfo->allocationSize;
const uint32_t memory_type_index = pAllocateInfo->memoryTypeIndex;
const auto latest_event = std::find_if(memory_free_events_.rbegin(), memory_free_events_.rend(), [&](const MemoryFreeEvent& event) {
return (memory_type_index == event.memory_type_index) && (alloc_size <= event.allocation_size) &&
(alloc_size - event.allocation_size <= size_threshold) && (now - event.time < kAllocateMemoryReuseTimeThresholdNVIDIA);
});
if (latest_event != memory_free_events_.rend()) {
const auto time_delta = std::chrono::duration_cast<std::chrono::milliseconds>(now - latest_event->time);
if (time_delta < std::chrono::milliseconds{5}) {
skip |=
LogPerformanceWarning(device, kVUID_BestPractices_AllocateMemory_ReuseAllocations,
"%s Reuse memory allocations instead of releasing and reallocating. A memory allocation "
"has just been released, and it could have been reused in place of this allocation.",
VendorSpecificTag(kBPVendorNVIDIA));
} else {
const uint32_t seconds = static_cast<uint32_t>(time_delta.count() / 1000);
const uint32_t milliseconds = static_cast<uint32_t>(time_delta.count() % 1000);
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_AllocateMemory_ReuseAllocations,
"%s Reuse memory allocations instead of releasing and reallocating. A memory allocation has been released "
"%" PRIu32 ".%03" PRIu32 " seconds ago, and it could have been reused in place of this allocation.",
VendorSpecificTag(kBPVendorNVIDIA), seconds, milliseconds);
}
}
}
}
// TODO: Insert get check for GetPhysicalDeviceMemoryProperties once the state is tracked in the StateTracker
return skip;
}
void BestPractices::ManualPostCallRecordAllocateMemory(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo,
const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory,
VkResult result) {
if (result != VK_SUCCESS) {
static std::vector<VkResult> error_codes = {VK_ERROR_OUT_OF_HOST_MEMORY, VK_ERROR_OUT_OF_DEVICE_MEMORY,
VK_ERROR_TOO_MANY_OBJECTS, VK_ERROR_INVALID_EXTERNAL_HANDLE,
VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS};
static std::vector<VkResult> success_codes = {};
ValidateReturnCodes("vkAllocateMemory", result, error_codes, success_codes);
return;
}
}
void BestPractices::ValidateReturnCodes(const char* api_name, VkResult result, const std::vector<VkResult>& error_codes,
const std::vector<VkResult>& success_codes) const {
auto error = std::find(error_codes.begin(), error_codes.end(), result);
if (error != error_codes.end()) {
static const std::vector<VkResult> common_failure_codes = {VK_ERROR_OUT_OF_DATE_KHR,
VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT};
auto common_failure = std::find(common_failure_codes.begin(), common_failure_codes.end(), result);
if (common_failure != common_failure_codes.end()) {
LogInfo(instance, kVUID_BestPractices_Failure_Result, "%s(): Returned error %s.", api_name, string_VkResult(result));
} else {
LogWarning(instance, kVUID_BestPractices_Error_Result, "%s(): Returned error %s.", api_name, string_VkResult(result));
}
return;
}
auto success = std::find(success_codes.begin(), success_codes.end(), result);
if (success != success_codes.end()) {
LogInfo(instance, kVUID_BestPractices_NonSuccess_Result, "%s(): Returned non-success return code %s.", api_name,
string_VkResult(result));
}
}
void BestPractices::PreCallRecordFreeMemory(VkDevice device, VkDeviceMemory memory, const VkAllocationCallbacks* pAllocator) {
if (memory != VK_NULL_HANDLE && VendorCheckEnabled(kBPVendorNVIDIA)) {
auto mem_info = Get<DEVICE_MEMORY_STATE>(memory);
// Exclude memory free events on dedicated allocations, or imported/exported allocations.
if (!mem_info->IsDedicatedBuffer() && !mem_info->IsDedicatedImage() && !mem_info->IsExport() && !mem_info->IsImport()) {
MemoryFreeEvent event;
event.time = std::chrono::high_resolution_clock::now();
event.memory_type_index = mem_info->alloc_info.memoryTypeIndex;
event.allocation_size = mem_info->alloc_info.allocationSize;
WriteLockGuard guard{memory_free_events_lock_};
memory_free_events_.push_back(event);
}
}
ValidationStateTracker::PreCallRecordFreeMemory(device, memory, pAllocator);
}
bool BestPractices::PreCallValidateFreeMemory(VkDevice device, VkDeviceMemory memory,
const VkAllocationCallbacks* pAllocator) const {
if (memory == VK_NULL_HANDLE) return false;
bool skip = false;
auto mem_info = Get<DEVICE_MEMORY_STATE>(memory);
for (const auto& item : mem_info->ObjectBindings()) {
const auto& obj = item.first;
LogObjectList objlist(device);
objlist.add(obj);
objlist.add(mem_info->mem());
skip |= LogWarning(objlist, layer_name.c_str(), "VK Object %s still has a reference to mem obj %s.",
report_data->FormatHandle(obj).c_str(), report_data->FormatHandle(mem_info->mem()).c_str());
}
return skip;
}
bool BestPractices::ValidateBindBufferMemory(VkBuffer buffer, VkDeviceMemory memory, const char* api_name) const {
bool skip = false;
auto buffer_state = Get<BUFFER_STATE>(buffer);
if (!buffer_state->memory_requirements_checked && !buffer_state->external_memory_handle) {
skip |= LogWarning(device, kVUID_BestPractices_BufferMemReqNotCalled,
"%s: Binding memory to %s but vkGetBufferMemoryRequirements() has not been called on that buffer.",
api_name, report_data->FormatHandle(buffer).c_str());
}
auto mem_state = Get<DEVICE_MEMORY_STATE>(memory);
if (mem_state && mem_state->alloc_info.allocationSize == buffer_state->createInfo.size &&
mem_state->alloc_info.allocationSize < kMinDedicatedAllocationSize) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_SmallDedicatedAllocation,
"%s: Trying to bind %s to a memory block which is fully consumed by the buffer. "
"The required size of the allocation is %" PRIu64 ", but smaller buffers like this should be sub-allocated from "
"larger memory blocks. (Current threshold is %" PRIu64 " bytes.)",
api_name, report_data->FormatHandle(buffer).c_str(), mem_state->alloc_info.allocationSize, kMinDedicatedAllocationSize);
}
skip |= ValidateBindMemory(device, memory);
return skip;
}
bool BestPractices::PreCallValidateBindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory memory,
VkDeviceSize memoryOffset) const {
bool skip = false;
const char* api_name = "BindBufferMemory()";
skip |= ValidateBindBufferMemory(buffer, memory, api_name);
return skip;
}
bool BestPractices::PreCallValidateBindBufferMemory2(VkDevice device, uint32_t bindInfoCount,
const VkBindBufferMemoryInfo* pBindInfos) const {
char api_name[64];
bool skip = false;
for (uint32_t i = 0; i < bindInfoCount; i++) {
snprintf(api_name, sizeof(api_name), "vkBindBufferMemory2() pBindInfos[%u]", i);
skip |= ValidateBindBufferMemory(pBindInfos[i].buffer, pBindInfos[i].memory, api_name);
}
return skip;
}
bool BestPractices::PreCallValidateBindBufferMemory2KHR(VkDevice device, uint32_t bindInfoCount,
const VkBindBufferMemoryInfo* pBindInfos) const {
char api_name[64];
bool skip = false;
for (uint32_t i = 0; i < bindInfoCount; i++) {
snprintf(api_name, sizeof(api_name), "vkBindBufferMemory2KHR() pBindInfos[%u]", i);
skip |= ValidateBindBufferMemory(pBindInfos[i].buffer, pBindInfos[i].memory, api_name);
}
return skip;
}
bool BestPractices::ValidateBindImageMemory(VkImage image, VkDeviceMemory memory, const char* api_name) const {
bool skip = false;
auto image_state = Get<IMAGE_STATE>(image);
if (image_state->disjoint == false) {
if (!image_state->memory_requirements_checked[0] && !image_state->external_memory_handle) {
skip |= LogWarning(device, kVUID_BestPractices_ImageMemReqNotCalled,
"%s: Binding memory to %s but vkGetImageMemoryRequirements() has not been called on that image.",
api_name, report_data->FormatHandle(image).c_str());
}
} else {
// TODO If binding disjoint image then this needs to check that VkImagePlaneMemoryRequirementsInfo was called for each
// plane.
}
auto mem_state = Get<DEVICE_MEMORY_STATE>(memory);
if (mem_state->alloc_info.allocationSize == image_state->requirements[0].size &&
mem_state->alloc_info.allocationSize < kMinDedicatedAllocationSize) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_SmallDedicatedAllocation,
"%s: Trying to bind %s to a memory block which is fully consumed by the image. "
"The required size of the allocation is %" PRIu64 ", but smaller images like this should be sub-allocated from "
"larger memory blocks. (Current threshold is %" PRIu64 " bytes.)",
api_name, report_data->FormatHandle(image).c_str(), mem_state->alloc_info.allocationSize, kMinDedicatedAllocationSize);
}
// If we're binding memory to a image which was created as TRANSIENT and the image supports LAZY allocation,
// make sure this type is actually used.
// This warning will only trigger if this layer is run on a platform that supports LAZILY_ALLOCATED_BIT
// (i.e.most tile - based renderers)
if (image_state->createInfo.usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) {
bool supports_lazy = false;
uint32_t suggested_type = 0;
for (uint32_t i = 0; i < phys_dev_mem_props.memoryTypeCount; i++) {
if ((1u << i) & image_state->requirements[0].memoryTypeBits) {
if (phys_dev_mem_props.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) {
supports_lazy = true;
suggested_type = i;
break;
}
}
}
uint32_t allocated_properties = phys_dev_mem_props.memoryTypes[mem_state->alloc_info.memoryTypeIndex].propertyFlags;
if (supports_lazy && (allocated_properties & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) == 0) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_NonLazyTransientImage,
"%s: Attempting to bind memory type %u to VkImage which was created with TRANSIENT_ATTACHMENT_BIT,"
"but this memory type is not LAZILY_ALLOCATED_BIT. You should use memory type %u here instead to save "
"%" PRIu64 " bytes of physical memory.",
api_name, mem_state->alloc_info.memoryTypeIndex, suggested_type, image_state->requirements[0].size);
}
}
skip |= ValidateBindMemory(device, memory);
return skip;
}
bool BestPractices::PreCallValidateBindImageMemory(VkDevice device, VkImage image, VkDeviceMemory memory,
VkDeviceSize memoryOffset) const {
bool skip = false;
const char* api_name = "vkBindImageMemory()";
skip |= ValidateBindImageMemory(image, memory, api_name);
return skip;
}
bool BestPractices::PreCallValidateBindImageMemory2(VkDevice device, uint32_t bindInfoCount,
const VkBindImageMemoryInfo* pBindInfos) const {
char api_name[64];
bool skip = false;
for (uint32_t i = 0; i < bindInfoCount; i++) {
snprintf(api_name, sizeof(api_name), "vkBindImageMemory2() pBindInfos[%u]", i);
if (!LvlFindInChain<VkBindImageMemorySwapchainInfoKHR>(pBindInfos[i].pNext)) {
skip |= ValidateBindImageMemory(pBindInfos[i].image, pBindInfos[i].memory, api_name);
}
}
return skip;
}
bool BestPractices::PreCallValidateBindImageMemory2KHR(VkDevice device, uint32_t bindInfoCount,
const VkBindImageMemoryInfo* pBindInfos) const {
char api_name[64];
bool skip = false;
for (uint32_t i = 0; i < bindInfoCount; i++) {
snprintf(api_name, sizeof(api_name), "vkBindImageMemory2KHR() pBindInfos[%u]", i);
skip |= ValidateBindImageMemory(pBindInfos[i].image, pBindInfos[i].memory, api_name);
}
return skip;
}
void BestPractices::PreCallRecordSetDeviceMemoryPriorityEXT(VkDevice device, VkDeviceMemory memory, float priority) {
auto mem_info = std::static_pointer_cast<bp_state::DeviceMemory>(Get<DEVICE_MEMORY_STATE>(memory));
mem_info->dynamic_priority.emplace(priority);
}
static inline bool FormatHasFullThroughputBlendingArm(VkFormat format) {
switch (format) {
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32B32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
return false;
default:
return true;
}
}
bool BestPractices::ValidateMultisampledBlendingArm(uint32_t createInfoCount,
const VkGraphicsPipelineCreateInfo* pCreateInfos) const {
bool skip = false;
for (uint32_t i = 0; i < createInfoCount; i++) {
auto create_info = &pCreateInfos[i];
if (!create_info->pColorBlendState || !create_info->pMultisampleState ||
create_info->pMultisampleState->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
create_info->pMultisampleState->sampleShadingEnable) {
return skip;
}
auto rp_state = Get<RENDER_PASS_STATE>(create_info->renderPass);
const auto& subpass = rp_state->createInfo.pSubpasses[create_info->subpass];
// According to spec, pColorBlendState must be ignored if subpass does not have color attachments.
uint32_t num_color_attachments = std::min(subpass.colorAttachmentCount, create_info->pColorBlendState->attachmentCount);
for (uint32_t j = 0; j < num_color_attachments; j++) {
const auto& blend_att = create_info->pColorBlendState->pAttachments[j];
uint32_t att = subpass.pColorAttachments[j].attachment;
if (att != VK_ATTACHMENT_UNUSED && blend_att.blendEnable && blend_att.colorWriteMask) {
if (!FormatHasFullThroughputBlendingArm(rp_state->createInfo.pAttachments[att].format)) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_MultisampledBlending,
"%s vkCreateGraphicsPipelines() - createInfo #%u: Pipeline is multisampled and "
"color attachment #%u makes use "
"of a format which cannot be blended at full throughput when using MSAA.",
VendorSpecificTag(kBPVendorArm), i, j);
}
}
}
}
return skip;
}
void BestPractices::ManualPostCallRecordCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount,
const VkComputePipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
VkResult result, void* pipe_state) {
// AMD best practice
pipeline_cache_ = pipelineCache;
}
bool BestPractices::PreCallValidateCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount,
const VkGraphicsPipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
void* cgpl_state_data) const {
bool skip = StateTracker::PreCallValidateCreateGraphicsPipelines(device, pipelineCache, createInfoCount, pCreateInfos,
pAllocator, pPipelines, cgpl_state_data);
if (skip) {
return skip;
}
create_graphics_pipeline_api_state* cgpl_state = reinterpret_cast<create_graphics_pipeline_api_state*>(cgpl_state_data);
if ((createInfoCount > 1) && (!pipelineCache)) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreatePipelines_MultiplePipelines,
"Performance Warning: This vkCreateGraphicsPipelines call is creating multiple pipelines but is not using a "
"pipeline cache, which may help with performance");
}
for (uint32_t i = 0; i < createInfoCount; i++) {
const auto& create_info = pCreateInfos[i];
if (!(cgpl_state->pipe_state[i]->active_shaders & VK_SHADER_STAGE_MESH_BIT_NV) && create_info.pVertexInputState) {
const auto& vertex_input = *create_info.pVertexInputState;
uint32_t count = 0;
for (uint32_t j = 0; j < vertex_input.vertexBindingDescriptionCount; j++) {
if (vertex_input.pVertexBindingDescriptions[j].inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
count++;
}
}
if (count > kMaxInstancedVertexBuffers) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreatePipelines_TooManyInstancedVertexBuffers,
"The pipeline is using %u instanced vertex buffers (current limit: %u), but this can be inefficient on the "
"GPU. If using instanced vertex attributes prefer interleaving them in a single buffer.",
count, kMaxInstancedVertexBuffers);
}
}
if ((pCreateInfos[i].pRasterizationState) && (pCreateInfos[i].pRasterizationState->depthBiasEnable) &&
(pCreateInfos[i].pRasterizationState->depthBiasConstantFactor == 0.0f) &&
(pCreateInfos[i].pRasterizationState->depthBiasSlopeFactor == 0.0f) && VendorCheckEnabled(kBPVendorArm)) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreatePipelines_DepthBias_Zero,
"%s Performance Warning: This vkCreateGraphicsPipelines call is created with depthBiasEnable set to true "
"and both depthBiasConstantFactor and depthBiasSlopeFactor are set to 0. This can cause reduced "
"efficiency during rasterization. Consider disabling depthBias or increasing either "
"depthBiasConstantFactor or depthBiasSlopeFactor.",
VendorSpecificTag(kBPVendorArm));
}
skip |= VendorCheckEnabled(kBPVendorArm) && ValidateMultisampledBlendingArm(createInfoCount, pCreateInfos);
}
if (VendorCheckEnabled(kBPVendorAMD) || VendorCheckEnabled(kBPVendorNVIDIA)) {
auto prev_pipeline = pipeline_cache_.load();
if (pipelineCache && prev_pipeline && pipelineCache != prev_pipeline) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_MultiplePipelineCaches,
"%s %s Performance Warning: A second pipeline cache is in use. "
"Consider using only one pipeline cache to improve cache hit rate.",
VendorSpecificTag(kBPVendorAMD), VendorSpecificTag(kBPVendorNVIDIA));
}
}
if (VendorCheckEnabled(kBPVendorAMD)) {
if (num_pso_ > kMaxRecommendedNumberOfPSOAMD) {
skip |=
LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_TooManyPipelines,
"%s Performance warning: Too many pipelines created, consider consolidation",
VendorSpecificTag(kBPVendorAMD));
}
if (pCreateInfos->pInputAssemblyState && pCreateInfos->pInputAssemblyState->primitiveRestartEnable) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_AvoidPrimitiveRestart,
"%s Performance warning: Use of primitive restart is not recommended",
VendorSpecificTag(kBPVendorAMD));
}
// TODO: this might be too aggressive of a check
if (pCreateInfos->pDynamicState && pCreateInfos->pDynamicState->dynamicStateCount > kDynamicStatesWarningLimitAMD) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreatePipelines_MinimizeNumDynamicStates,
"%s Performance warning: Dynamic States usage incurs a performance cost. Ensure that they are truly needed",
VendorSpecificTag(kBPVendorAMD));
}
}
return skip;
}
static std::vector<bp_state::AttachmentInfo> GetAttachmentAccess(const safe_VkGraphicsPipelineCreateInfo& create_info,
std::shared_ptr<const RENDER_PASS_STATE>& rp) {
std::vector<bp_state::AttachmentInfo> result;
if (!rp || rp->UsesDynamicRendering()) {
return result;
}
const auto& subpass = rp->createInfo.pSubpasses[create_info.subpass];
// NOTE: see PIPELINE_LAYOUT and safe_VkGraphicsPipelineCreateInfo constructors. pColorBlendState and pDepthStencilState
// are only non-null if they are enabled.
if (create_info.pColorBlendState) {
// According to spec, pColorBlendState must be ignored if subpass does not have color attachments.
uint32_t num_color_attachments = std::min(subpass.colorAttachmentCount, create_info.pColorBlendState->attachmentCount);
for (uint32_t j = 0; j < num_color_attachments; j++) {
if (create_info.pColorBlendState->pAttachments[j].colorWriteMask != 0) {
uint32_t attachment = subpass.pColorAttachments[j].attachment;
if (attachment != VK_ATTACHMENT_UNUSED) {
result.push_back({attachment, VK_IMAGE_ASPECT_COLOR_BIT});
}
}
}
}
if (create_info.pDepthStencilState &&
(create_info.pDepthStencilState->depthTestEnable || create_info.pDepthStencilState->depthBoundsTestEnable ||
create_info.pDepthStencilState->stencilTestEnable)) {
uint32_t attachment = subpass.pDepthStencilAttachment ? subpass.pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
if (attachment != VK_ATTACHMENT_UNUSED) {
VkImageAspectFlags aspects = 0;
if (create_info.pDepthStencilState->depthTestEnable || create_info.pDepthStencilState->depthBoundsTestEnable) {
aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
}
if (create_info.pDepthStencilState->stencilTestEnable) {
aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
result.push_back({attachment, aspects});
}
}
return result;
}
bp_state::Pipeline::Pipeline(const ValidationStateTracker* state_data, const VkGraphicsPipelineCreateInfo* pCreateInfo,
std::shared_ptr<const RENDER_PASS_STATE>&& rpstate,
std::shared_ptr<const PIPELINE_LAYOUT_STATE>&& layout)
: PIPELINE_STATE(state_data, pCreateInfo, std::move(rpstate), std::move(layout)),
access_framebuffer_attachments(GetAttachmentAccess(create_info.graphics, rp_state)) {}
std::shared_ptr<PIPELINE_STATE> BestPractices::CreateGraphicsPipelineState(
const VkGraphicsPipelineCreateInfo* pCreateInfo, std::shared_ptr<const RENDER_PASS_STATE>&& render_pass,
std::shared_ptr<const PIPELINE_LAYOUT_STATE>&& layout) const {
return std::static_pointer_cast<PIPELINE_STATE>(
std::make_shared<bp_state::Pipeline>(this, pCreateInfo, std::move(render_pass), std::move(layout)));
}
void BestPractices::ManualPostCallRecordCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
const VkGraphicsPipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
VkResult result, void* cgpl_state_data) {
// AMD best practice
pipeline_cache_ = pipelineCache;
}
bool BestPractices::PreCallValidateCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount,
const VkComputePipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines,
void* ccpl_state_data) const {
bool skip = StateTracker::PreCallValidateCreateComputePipelines(device, pipelineCache, createInfoCount, pCreateInfos,
pAllocator, pPipelines, ccpl_state_data);
if ((createInfoCount > 1) && (!pipelineCache)) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreatePipelines_MultiplePipelines,
"Performance Warning: This vkCreateComputePipelines call is creating multiple pipelines but is not using a "
"pipeline cache, which may help with performance");
}
if (VendorCheckEnabled(kBPVendorAMD)) {
auto prev_pipeline = pipeline_cache_.load();
if (pipelineCache && prev_pipeline && pipelineCache != prev_pipeline) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreatePipelines_MultiplePipelines,
"%s Performance Warning: A second pipeline cache is in use. Consider using only one pipeline cache to "
"improve cache hit rate",
VendorSpecificTag(kBPVendorAMD));
}
}
for (uint32_t i = 0; i < createInfoCount; i++) {
const VkComputePipelineCreateInfo& createInfo = pCreateInfos[i];
if (VendorCheckEnabled(kBPVendorArm)) {
skip |= ValidateCreateComputePipelineArm(createInfo);
}
if (IsExtEnabled(device_extensions.vk_khr_maintenance4)) {
auto module_state = Get<SHADER_MODULE_STATE>(createInfo.stage.module);
for (const Instruction* inst : module_state->GetBuiltinDecorationList()) {
if (inst->GetBuiltIn() == spv::BuiltInWorkgroupSize) {
skip |= LogWarning(device, kVUID_BestPractices_SpirvDeprecated_WorkgroupSize,
"vkCreateComputePipelines(): pCreateInfos[ %" PRIu32
"] is using the Workgroup built-in which SPIR-V 1.6 deprecated. The VK_KHR_maintenance4 "
"extension exposes a new LocalSizeId execution mode that should be used instead.",
i);
}
}
}
}
return skip;
}
bool BestPractices::ValidateCreateComputePipelineArm(const VkComputePipelineCreateInfo& createInfo) const {
bool skip = false;
auto module_state = Get<SHADER_MODULE_STATE>(createInfo.stage.module);
// Generate warnings about work group sizes based on active resources.
auto entrypoint_optional = module_state->FindEntrypoint(createInfo.stage.pName, createInfo.stage.stage);
if (!entrypoint_optional) return false;
const Instruction& entrypoint = *entrypoint_optional;
uint32_t x = 1, y = 1, z = 1;
module_state->FindLocalSize(entrypoint, x, y, z);
uint32_t thread_count = x * y * z;
// Generate a priori warnings about work group sizes.
if (thread_count > kMaxEfficientWorkGroupThreadCountArm) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateComputePipelines_ComputeWorkGroupSize,
"%s vkCreateComputePipelines(): compute shader with work group dimensions (%u, %u, "
"%u) (%u threads total), has more threads than advised in a single work group. It is advised to use work "
"groups with less than %u threads, especially when using barrier() or shared memory.",
VendorSpecificTag(kBPVendorArm), x, y, z, thread_count, kMaxEfficientWorkGroupThreadCountArm);
}
if (thread_count == 1 || ((x > 1) && (x & (kThreadGroupDispatchCountAlignmentArm - 1))) ||
((y > 1) && (y & (kThreadGroupDispatchCountAlignmentArm - 1))) ||
((z > 1) && (z & (kThreadGroupDispatchCountAlignmentArm - 1)))) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_CreateComputePipelines_ComputeThreadGroupAlignment,
"%s vkCreateComputePipelines(): compute shader with work group dimensions (%u, "
"%u, %u) is not aligned to %u "
"threads. On Arm Mali architectures, not aligning work group sizes to %u may "
"leave threads idle on the shader "
"core.",
VendorSpecificTag(kBPVendorArm), x, y, z, kThreadGroupDispatchCountAlignmentArm,
kThreadGroupDispatchCountAlignmentArm);
}
auto accessible_ids = module_state->MarkAccessibleIds(entrypoint_optional);
auto descriptor_uses = module_state->CollectInterfaceByDescriptorSlot(accessible_ids);
unsigned dimensions = 0;
if (x > 1) dimensions++;
if (y > 1) dimensions++;
if (z > 1) dimensions++;
// Here the dimension will really depend on the dispatch grid, but assume it's 1D.
dimensions = std::max(dimensions, 1u);
// If we're accessing images, we almost certainly want to have a 2D workgroup for cache reasons.
// There are some false positives here. We could simply have a shader that does this within a 1D grid,
// or we may have a linearly tiled image, but these cases are quite unlikely in practice.
bool accesses_2d = false;
for (const auto& usage : descriptor_uses) {
auto dim = module_state->GetShaderResourceDimensionality(usage.second);
if (dim < 0) continue;
auto spvdim = spv::Dim(dim);
if (spvdim != spv::Dim1D && spvdim != spv::DimBuffer) accesses_2d = true;
}
if (accesses_2d && dimensions < 2) {
LogPerformanceWarning(device, kVUID_BestPractices_CreateComputePipelines_ComputeSpatialLocality,
"%s vkCreateComputePipelines(): compute shader has work group dimensions (%u, %u, %u), which "
"suggests a 1D dispatch, but the shader is accessing 2D or 3D images. The shader may be "
"exhibiting poor spatial locality with respect to one or more shader resources.",
VendorSpecificTag(kBPVendorArm), x, y, z);
}
return skip;
}
bool BestPractices::CheckPipelineStageFlags(const std::string& api_name, VkPipelineStageFlags flags) const {
bool skip = false;
if (flags & VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT) {
skip |= LogWarning(device, kVUID_BestPractices_PipelineStageFlags,
"You are using VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT when %s is called\n", api_name.c_str());
} else if (flags & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) {
skip |= LogWarning(device, kVUID_BestPractices_PipelineStageFlags,
"You are using VK_PIPELINE_STAGE_ALL_COMMANDS_BIT when %s is called\n", api_name.c_str());
}
return skip;
}
bool BestPractices::CheckPipelineStageFlags(const std::string& api_name, VkPipelineStageFlags2KHR flags) const {
bool skip = false;
if (flags & VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT_KHR) {
skip |= LogWarning(device, kVUID_BestPractices_PipelineStageFlags,
"You are using VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT_KHR when %s is called\n", api_name.c_str());
} else if (flags & VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR) {
skip |= LogWarning(device, kVUID_BestPractices_PipelineStageFlags,
"You are using VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR when %s is called\n", api_name.c_str());
}
return skip;
}
bool BestPractices::CheckDependencyInfo(const std::string& api_name, const VkDependencyInfoKHR& dep_info) const {
bool skip = false;
auto stage_masks = sync_utils::GetGlobalStageMasks(dep_info);
skip |= CheckPipelineStageFlags(api_name, stage_masks.src);
skip |= CheckPipelineStageFlags(api_name, stage_masks.dst);
for (uint32_t i = 0; i < dep_info.imageMemoryBarrierCount; ++i) {
skip |= ValidateImageMemoryBarrier(
api_name, dep_info.pImageMemoryBarriers[i].oldLayout, dep_info.pImageMemoryBarriers[i].newLayout,
dep_info.pImageMemoryBarriers[i].srcAccessMask, dep_info.pImageMemoryBarriers[i].dstAccessMask,
dep_info.pImageMemoryBarriers[i].subresourceRange.aspectMask);
}
return skip;
}
void BestPractices::ManualPostCallRecordQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo, VkResult result) {
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; ++i) {
auto swapchains_result = pPresentInfo->pResults ? pPresentInfo->pResults[i] : result;
if (swapchains_result == VK_SUBOPTIMAL_KHR) {
LogPerformanceWarning(
pPresentInfo->pSwapchains[i], kVUID_BestPractices_SuboptimalSwapchain,
"vkQueuePresentKHR: %s :VK_SUBOPTIMAL_KHR was returned. VK_SUBOPTIMAL_KHR - Presentation will still succeed, "
"subject to the window resize behavior, but the swapchain is no longer configured optimally for the surface it "
"targets. Applications should query updated surface information and recreate their swapchain at the next "
"convenient opportunity.",
report_data->FormatHandle(pPresentInfo->pSwapchains[i]).c_str());
}
}
// AMD best practice
// end-of-frame cleanup
num_queue_submissions_ = 0;
num_barriers_objects_ = 0;
ClearPipelinesUsedInFrame();
}
bool BestPractices::PreCallValidateQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits,
VkFence fence) const {
bool skip = false;
for (uint32_t submit = 0; submit < submitCount; submit++) {
for (uint32_t semaphore = 0; semaphore < pSubmits[submit].waitSemaphoreCount; semaphore++) {
skip |= CheckPipelineStageFlags("vkQueueSubmit", pSubmits[submit].pWaitDstStageMask[semaphore]);
}
if (pSubmits[submit].signalSemaphoreCount == 0 && pSubmits[submit].pSignalSemaphores != nullptr) {
skip |=
LogWarning(device, kVUID_BestPractices_SemaphoreCount,
"pSubmits[%" PRIu32 "].pSignalSemaphores is set, but pSubmits[%" PRIu32 "].signalSemaphoreCount is 0.",
submit, submit);
}
if (pSubmits[submit].waitSemaphoreCount == 0 && pSubmits[submit].pWaitSemaphores != nullptr) {
skip |= LogWarning(device, kVUID_BestPractices_SemaphoreCount,
"pSubmits[%" PRIu32 "].pWaitSemaphores is set, but pSubmits[%" PRIu32 "].waitSemaphoreCount is 0.",
submit, submit);
}
}
return skip;
}
bool BestPractices::PreCallValidateQueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2KHR* pSubmits,
VkFence fence) const {
bool skip = false;
for (uint32_t submit = 0; submit < submitCount; submit++) {
for (uint32_t semaphore = 0; semaphore < pSubmits[submit].waitSemaphoreInfoCount; semaphore++) {
skip |= CheckPipelineStageFlags("vkQueueSubmit2KHR", pSubmits[submit].pWaitSemaphoreInfos[semaphore].stageMask);
}
}
return skip;
}
bool BestPractices::PreCallValidateQueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2* pSubmits,
VkFence fence) const {
bool skip = false;
for (uint32_t submit = 0; submit < submitCount; submit++) {
for (uint32_t semaphore = 0; semaphore < pSubmits[submit].waitSemaphoreInfoCount; semaphore++) {
skip |= CheckPipelineStageFlags("vkQueueSubmit2", pSubmits[submit].pWaitSemaphoreInfos[semaphore].stageMask);
}
}
return skip;
}
bool BestPractices::PreCallValidateCreateCommandPool(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool) const {
bool skip = false;
if (pCreateInfo->flags & VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT) {
skip |= LogPerformanceWarning(
device, kVUID_BestPractices_CreateCommandPool_CommandBufferReset,
"vkCreateCommandPool(): VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT is set. Consider resetting entire "
"pool instead.");
}
return skip;
}
void BestPractices::PreCallRecordBeginCommandBuffer(VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo* pBeginInfo) {
StateTracker::PreCallRecordBeginCommandBuffer(commandBuffer, pBeginInfo);
auto cb = GetWrite<bp_state::CommandBuffer>(commandBuffer);
if (!cb) return;
cb->num_submits = 0;
cb->is_one_time_submit = (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) != 0;
}
bool BestPractices::PreCallValidateBeginCommandBuffer(VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo* pBeginInfo) const {
bool skip = false;
if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_BeginCommandBuffer_SimultaneousUse,
"vkBeginCommandBuffer(): VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT is set.");
}
if (VendorCheckEnabled(kBPVendorArm)) {
if (!(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_BeginCommandBuffer_OneTimeSubmit,
"%s vkBeginCommandBuffer(): VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT is not set. "
"For best performance on Mali GPUs, consider setting ONE_TIME_SUBMIT by default.",
VendorSpecificTag(kBPVendorArm));
}
}
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
auto cb = GetRead<bp_state::CommandBuffer>(commandBuffer);
if (cb->num_submits == 1 && !cb->is_one_time_submit) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_BeginCommandBuffer_OneTimeSubmit,
"%s vkBeginCommandBuffer(): VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT was not set "
"and the command buffer has only been submitted once. "
"For best performance on NVIDIA GPUs, use ONE_TIME_SUBMIT.",
VendorSpecificTag(kBPVendorNVIDIA));
}
}
return skip;
}
bool BestPractices::PreCallValidateCmdSetEvent(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask) const {
bool skip = false;
skip |= CheckPipelineStageFlags("vkCmdSetEvent", stageMask);
return skip;
}
bool BestPractices::PreCallValidateCmdSetEvent2KHR(VkCommandBuffer commandBuffer, VkEvent event,
const VkDependencyInfoKHR* pDependencyInfo) const {
return CheckDependencyInfo("vkCmdSetEvent2KHR", *pDependencyInfo);
}
bool BestPractices::PreCallValidateCmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent event,
const VkDependencyInfo* pDependencyInfo) const {
return CheckDependencyInfo("vkCmdSetEvent2", *pDependencyInfo);
}
bool BestPractices::PreCallValidateCmdResetEvent(VkCommandBuffer commandBuffer, VkEvent event,
VkPipelineStageFlags stageMask) const {
bool skip = false;
skip |= CheckPipelineStageFlags("vkCmdResetEvent", stageMask);
return skip;
}
bool BestPractices::PreCallValidateCmdResetEvent2KHR(VkCommandBuffer commandBuffer, VkEvent event,
VkPipelineStageFlags2KHR stageMask) const {
bool skip = false;
skip |= CheckPipelineStageFlags("vkCmdResetEvent2KHR", stageMask);
return skip;
}
bool BestPractices::PreCallValidateCmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent event,
VkPipelineStageFlags2 stageMask) const {
bool skip = false;
skip |= CheckPipelineStageFlags("vkCmdResetEvent2", stageMask);
return skip;
}
bool BestPractices::PreCallValidateCmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,
uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers) const {
bool skip = false;
skip |= CheckPipelineStageFlags("vkCmdWaitEvents", srcStageMask);
skip |= CheckPipelineStageFlags("vkCmdWaitEvents", dstStageMask);
return skip;
}
bool BestPractices::PreCallValidateCmdWaitEvents2KHR(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents,
const VkDependencyInfoKHR* pDependencyInfos) const {
bool skip = false;
for (uint32_t i = 0; i < eventCount; i++) {
skip = CheckDependencyInfo("vkCmdWaitEvents2KHR", pDependencyInfos[i]);
}
return skip;
}
bool BestPractices::PreCallValidateCmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents,
const VkDependencyInfo* pDependencyInfos) const {
bool skip = false;
for (uint32_t i = 0; i < eventCount; i++) {
skip = CheckDependencyInfo("vkCmdWaitEvents2", pDependencyInfos[i]);
}
return skip;
}
bool BestPractices::ValidateAccessLayoutCombination(const std::string& api_name, VkAccessFlags2 access, VkImageLayout layout,
VkImageAspectFlags aspect) const {
bool skip = false;
const VkAccessFlags all = VK_ACCESS_FLAG_BITS_MAX_ENUM;
bool none_allowed = false;
VkAccessFlags allowed = 0;
// Combinations taken from https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/2918
switch (layout) {
case VK_IMAGE_LAYOUT_UNDEFINED:
allowed = all;
none_allowed = true;
break;
case VK_IMAGE_LAYOUT_GENERAL:
allowed = all;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
allowed = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
allowed = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
allowed = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
break;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
allowed = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
allowed = VK_ACCESS_TRANSFER_READ_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
allowed = VK_ACCESS_TRANSFER_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_PREINITIALIZED:
allowed = VK_ACCESS_HOST_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL:
if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
allowed |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
}
if (aspect & VK_IMAGE_ASPECT_STENCIL_BIT) {
allowed |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
break;
case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL:
if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
allowed |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
if (aspect & VK_IMAGE_ASPECT_STENCIL_BIT) {
allowed |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
}
break;
case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
allowed = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
allowed = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
break;
case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
allowed = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL:
allowed = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
break;
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
allowed = VK_ACCESS_NONE;
break;
case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
allowed = all;
none_allowed = true;
break;
case VK_IMAGE_LAYOUT_SHADING_RATE_OPTIMAL_NV:
allowed = VK_ACCESS_SHADING_RATE_IMAGE_READ_BIT_NV;
break;
case VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT:
allowed = VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT;
break;
default:
// If a new layout is added, will need to manually add it
return false;
}
if (access == 0 && !none_allowed) {
skip |= LogWarning(device, kVUID_BestPractices_ImageBarrierAccessLayout,
"%s: accessMask is VK_ACCESS_NONE, but for layout %s expected accessMask are %s.", api_name.c_str(),
string_VkImageLayout(layout), string_VkAccessFlags2(allowed).c_str());
} else if ((allowed | access) != allowed) {
skip |=
LogWarning(device, kVUID_BestPractices_ImageBarrierAccessLayout,
"%s: accessMask is %s, but for layout %s expected accessMask are %s.", string_VkAccessFlags2(access).c_str(),
api_name.c_str(), string_VkImageLayout(layout), string_VkAccessFlags2(allowed).c_str());
}
return skip;
}
bool BestPractices::ValidateImageMemoryBarrier(const std::string& api_name, VkImageLayout oldLayout, VkImageLayout newLayout,
VkAccessFlags2 srcAccessMask, VkAccessFlags2 dstAccessMask,
VkImageAspectFlags aspectMask) const {
bool skip = false;
if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && IsImageLayoutReadOnly(newLayout)) {
skip |= LogWarning(device, kVUID_BestPractices_TransitionUndefinedToReadOnly,
"VkImageMemoryBarrier is being submitted with oldLayout VK_IMAGE_LAYOUT_UNDEFINED and the contents "
"may be discarded, but the newLayout is %s, which is read only.",
string_VkImageLayout(newLayout));
}
skip |= ValidateAccessLayoutCombination(api_name, srcAccessMask, oldLayout, aspectMask);
skip |= ValidateAccessLayoutCombination(api_name, dstAccessMask, newLayout, aspectMask);
return skip;
}
bool BestPractices::PreCallValidateCmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags,
uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers) const {
bool skip = false;
skip |= CheckPipelineStageFlags("vkCmdPipelineBarrier", srcStageMask);
skip |= CheckPipelineStageFlags("vkCmdPipelineBarrier", dstStageMask);
for (uint32_t i = 0; i < imageMemoryBarrierCount; ++i) {
skip |=
ValidateImageMemoryBarrier("vkCmdPipelineBarrier", pImageMemoryBarriers[i].oldLayout, pImageMemoryBarriers[i].newLayout,
pImageMemoryBarriers[i].srcAccessMask, pImageMemoryBarriers[i].dstAccessMask,
pImageMemoryBarriers[i].subresourceRange.aspectMask);
}
if (VendorCheckEnabled(kBPVendorAMD)) {
auto num = num_barriers_objects_.load();
if (num + imageMemoryBarrierCount + bufferMemoryBarrierCount > kMaxRecommendedBarriersSizeAMD) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_CmdBuffer_highBarrierCount,
"%s Performance warning: In this frame, %" PRIu32
" barriers were already submitted. Barriers have a high cost and can "
"stall the GPU. "
"Consider consolidating and re-organizing the frame to use fewer barriers.",
VendorSpecificTag(kBPVendorAMD), num);
}
}
if (VendorCheckEnabled(kBPVendorAMD) || VendorCheckEnabled(kBPVendorNVIDIA)) {
static constexpr std::array<VkImageLayout, 3> read_layouts = {
VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
};
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
// read to read barriers
const auto &image_barrier = pImageMemoryBarriers[i];
bool old_is_read_layout = std::find(read_layouts.begin(), read_layouts.end(), image_barrier.oldLayout) != read_layouts.end();
bool new_is_read_layout = std::find(read_layouts.begin(), read_layouts.end(), image_barrier.newLayout) != read_layouts.end();
if (old_is_read_layout && new_is_read_layout) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_PipelineBarrier_readToReadBarrier,
"%s %s Performance warning: Don't issue read-to-read barriers. "
"Get the resource in the right state the first time you use it.",
VendorSpecificTag(kBPVendorAMD), VendorSpecificTag(kBPVendorNVIDIA));
}
// general with no storage
if (VendorCheckEnabled(kBPVendorAMD) && image_barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) {
auto image_state = Get<IMAGE_STATE>(pImageMemoryBarriers[i].image);
if (!(image_state->createInfo.usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
skip |= LogPerformanceWarning(device, kVUID_BestPractices_vkImage_AvoidGeneral,
"%s Performance warning: VK_IMAGE_LAYOUT_GENERAL should only be used with "
"VK_IMAGE_USAGE_STORAGE_BIT images.",
VendorSpecificTag(kBPVendorAMD));
}
}
}
}
for (uint32_t i = 0; i < imageMemoryBarrierCount; ++i) {
skip |= ValidateCmdPipelineBarrierImageBarrier(commandBuffer, pImageMemoryBarriers[i]);
}
return skip;
}
bool BestPractices::PreCallValidateCmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer,
const VkDependencyInfoKHR* pDependencyInfo) const {
bool skip = false;
skip |= CheckDependencyInfo("vkCmdPipelineBarrier2KHR", *pDependencyInfo);
for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; ++i) {
skip |= ValidateCmdPipelineBarrierImageBarrier(commandBuffer, pDependencyInfo->pImageMemoryBarriers[i]);
}
return skip;
}
bool BestPractices::PreCallValidateCmdPipelineBarrier2(VkCommandBuffer commandBuffer,
const VkDependencyInfo* pDependencyInfo) const {
bool skip = false;
skip |= CheckDependencyInfo("vkCmdPipelineBarrier2", *pDependencyInfo);
for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; ++i) {
skip |= ValidateCmdPipelineBarrierImageBarrier(commandBuffer, pDependencyInfo->pImageMemoryBarriers[i]);
}
return skip;
}
template <typename ImageMemoryBarrier>
bool BestPractices::ValidateCmdPipelineBarrierImageBarrier(VkCommandBuffer commandBuffer,
const ImageMemoryBarrier& barrier) const {
bool skip = false;
const auto cmd_state = GetRead<bp_state::CommandBuffer>(commandBuffer);
assert(cmd_state);
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
if (barrier.oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && barrier.newLayout != VK_IMAGE_LAYOUT_UNDEFINED) {
skip |= ValidateZcull(*cmd_state, barrier.image, barrier.subresourceRange);
}
}
return skip;
}
bool BestPractices::PreCallValidateCmdWriteTimestamp(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage,
VkQueryPool queryPool, uint32_t query) const {
bool skip = false;
skip |= CheckPipelineStageFlags("vkCmdWriteTimestamp", static_cast<VkPipelineStageFlags>(pipelineStage));
return skip;
}
bool BestPractices::PreCallValidateCmdWriteTimestamp2KHR(VkCommandBuffer commandBuffer, VkPipelineStageFlags2KHR pipelineStage,
VkQueryPool queryPool, uint32_t query) const {
bool skip = false;
skip |= CheckPipelineStageFlags("vkCmdWriteTimestamp2KHR", pipelineStage);
return skip;
}
bool BestPractices::PreCallValidateCmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 pipelineStage,
VkQueryPool queryPool, uint32_t query) const {
bool skip = false;
skip |= CheckPipelineStageFlags("vkCmdWriteTimestamp2", pipelineStage);
return skip;
}
void BestPractices::PreCallRecordCmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
VkPipeline pipeline) {
StateTracker::PreCallRecordCmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
auto pipeline_info = Get<PIPELINE_STATE>(pipeline);
auto cb = GetWrite<bp_state::CommandBuffer>(commandBuffer);
assert(pipeline_info);
assert(cb);
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS && VendorCheckEnabled(kBPVendorNVIDIA)) {
using TessGeometryMeshState = bp_state::CommandBufferStateNV::TessGeometryMesh::State;
auto& tgm = cb->nv.tess_geometry_mesh;
// Make sure the message is only signaled once per command buffer
tgm.threshold_signaled = tgm.num_switches >= kNumBindPipelineTessGeometryMeshSwitchesThresholdNVIDIA;
// Track pipeline switches with tessellation, geometry, and/or mesh shaders enabled, and disabled
auto tgm_stages = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_TASK_BIT_NV | VK_SHADER_STAGE_MESH_BIT_NV;
auto new_tgm_state = (pipeline_info->active_shaders & tgm_stages) != 0
? TessGeometryMeshState::Enabled
: TessGeometryMeshState::Disabled;
if (tgm.state != new_tgm_state && tgm.state != TessGeometryMeshState::Unknown) {
tgm.num_switches++;
}
tgm.state = new_tgm_state;
// Track depthTestEnable and depthCompareOp
auto &pipeline_create_info = pipeline_info->GetCreateInfo<VkGraphicsPipelineCreateInfo>();
auto depth_stencil_state = pipeline_create_info.pDepthStencilState;
auto dynamic_state = pipeline_create_info.pDynamicState;
if (depth_stencil_state && dynamic_state) {
auto dynamic_state_begin = dynamic_state->pDynamicStates;
auto dynamic_state_end = dynamic_state->pDynamicStates + dynamic_state->dynamicStateCount;
bool dynamic_depth_test_enable = std::find(dynamic_state_begin, dynamic_state_end, VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE) != dynamic_state_end;
bool dynamic_depth_func = std::find(dynamic_state_begin, dynamic_state_end, VK_DYNAMIC_STATE_DEPTH_COMPARE_OP) != dynamic_state_end;
if (!dynamic_depth_test_enable) {
RecordSetDepthTestState(*cb, cb->nv.depth_compare_op, depth_stencil_state->depthTestEnable != VK_FALSE);
}
if (!dynamic_depth_func) {
RecordSetDepthTestState(*cb, depth_stencil_state->depthCompareOp, cb->nv.depth_test_enable);
}
}
}
}
void BestPractices::PostCallRecordCmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
VkPipeline pipeline) {
StateTracker::PostCallRecordCmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
// AMD best practice
PipelineUsedInFrame(pipeline);
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
auto pipeline_state = Get<bp_state::Pipeline>(pipeline);
// check for depth/blend state tracking
if (pipeline_state) {
auto cb_node = GetWrite<bp_state::CommandBuffer>(commandBuffer);
assert(cb_node);
auto& render_pass_state = cb_node->render_pass_state;
render_pass_state.nextDrawTouchesAttachments = pipeline_state->access_framebuffer_attachments;
render_pass_state.drawTouchAttachments = true;
const auto* blend_state = pipeline_state->ColorBlendState();
const auto* stencil_state = pipeline_state->DepthStencilState();
if (blend_state) {
// assume the pipeline is depth-only unless any of the attachments have color writes enabled
render_pass_state.depthOnly = true;
for (size_t i = 0; i < blend_state->attachmentCount; i++) {
if (blend_state->pAttachments[i].colorWriteMask != 0) {
render_pass_state.depthOnly = false;
}
}
}
// check for depth value usage
render_pass_state.depthEqualComparison = false;
if (stencil_state && stencil_state->depthTestEnable) {
switch (stencil_state->depthCompareOp) {
case VK_COMPARE_OP_EQUAL:
case VK_COMPARE_OP_GREATER_OR_EQUAL:
case VK_COMPARE_OP_LESS_OR_EQUAL:
render_pass_state.depthEqualComparison = true;
break;
default:
break;
}
}
}
}
}
void BestPractices::PreCallRecordCmdSetDepthCompareOp(VkCommandBuffer commandBuffer, VkCompareOp depthCompareOp) {
StateTracker::PreCallRecordCmdSetDepthCompareOp(commandBuffer, depthCompareOp);
auto cb = GetWrite<bp_state::CommandBuffer>(commandBuffer);
assert(cb);
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
RecordSetDepthTestState(*cb, depthCompareOp, cb->nv.depth_test_enable);
}
}
void BestPractices::PreCallRecordCmdSetDepthCompareOpEXT(VkCommandBuffer commandBuffer, VkCompareOp depthCompareOp) {
StateTracker::PreCallRecordCmdSetDepthCompareOpEXT(commandBuffer, depthCompareOp);
auto cb = GetWrite<bp_state::CommandBuffer>(commandBuffer);
assert(cb);
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
RecordSetDepthTestState(*cb, depthCompareOp, cb->nv.depth_test_enable);
}
}
void BestPractices::PreCallRecordCmdSetDepthTestEnable(VkCommandBuffer commandBuffer, VkBool32 depthTestEnable) {
StateTracker::PreCallRecordCmdSetDepthTestEnable(commandBuffer, depthTestEnable);
auto cb = GetWrite<bp_state::CommandBuffer>(commandBuffer);
assert(cb);
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
RecordSetDepthTestState(*cb, cb->nv.depth_compare_op, depthTestEnable != VK_FALSE);
}
}
void BestPractices::PreCallRecordCmdSetDepthTestEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthTestEnable) {
StateTracker::PreCallRecordCmdSetDepthTestEnableEXT(commandBuffer, depthTestEnable);
auto cb = GetWrite<bp_state::CommandBuffer>(commandBuffer);
assert(cb);
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
RecordSetDepthTestState(*cb, cb->nv.depth_compare_op, depthTestEnable != VK_FALSE);
}
}
void BestPractices::RecordSetDepthTestState(bp_state::CommandBuffer& cmd_state, VkCompareOp new_depth_compare_op, bool new_depth_test_enable) {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
if (cmd_state.nv.depth_compare_op != new_depth_compare_op) {
switch (new_depth_compare_op) {
case VK_COMPARE_OP_LESS:
case VK_COMPARE_OP_LESS_OR_EQUAL:
cmd_state.nv.zcull_direction = bp_state::CommandBufferStateNV::ZcullDirection::Less;
break;
case VK_COMPARE_OP_GREATER:
case VK_COMPARE_OP_GREATER_OR_EQUAL:
cmd_state.nv.zcull_direction = bp_state::CommandBufferStateNV::ZcullDirection::Greater;
break;
default:
// The other ops carry over the previous state.
break;
}
}
cmd_state.nv.depth_compare_op = new_depth_compare_op;
cmd_state.nv.depth_test_enable = new_depth_test_enable;
}
void BestPractices::RecordCmdBeginRenderingCommon(VkCommandBuffer commandBuffer) {
auto cmd_state = GetWrite<bp_state::CommandBuffer>(commandBuffer);
assert(cmd_state);
auto rp = cmd_state->activeRenderPass.get();
assert(rp);
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
std::shared_ptr<IMAGE_VIEW_STATE> depth_image_view_shared_ptr;
IMAGE_VIEW_STATE* depth_image_view = nullptr;
layer_data::optional<VkAttachmentLoadOp> load_op;
if (rp->use_dynamic_rendering || rp->use_dynamic_rendering_inherited) {
const auto depth_attachment = rp->dynamic_rendering_begin_rendering_info.pDepthAttachment;
if (depth_attachment) {
load_op.emplace(depth_attachment->loadOp);
depth_image_view_shared_ptr = Get<IMAGE_VIEW_STATE>(depth_attachment->imageView);
depth_image_view = depth_image_view_shared_ptr.get();
}
for (uint32_t i = 0; i < rp->dynamic_rendering_begin_rendering_info.colorAttachmentCount; ++i) {
const auto& color_attachment = rp->dynamic_rendering_begin_rendering_info.pColorAttachments[i];
if (color_attachment.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
const VkFormat format = Get<IMAGE_VIEW_STATE>(color_attachment.imageView)->create_info.format;
RecordClearColor(format, color_attachment.clearValue.color);
}
}
} else {
if (rp->createInfo.pAttachments) {
if (rp->createInfo.subpassCount > 0) {
const auto depth_attachment = rp->createInfo.pSubpasses[0].pDepthStencilAttachment;
if (depth_attachment) {
const uint32_t attachment_index = depth_attachment->attachment;
if (attachment_index != VK_ATTACHMENT_UNUSED) {
load_op.emplace(rp->createInfo.pAttachments[attachment_index].loadOp);
depth_image_view = (*cmd_state->active_attachments)[attachment_index];
}
}
}
for (uint32_t i = 0; i < cmd_state->activeRenderPassBeginInfo.clearValueCount; ++i) {
const auto& attachment = rp->createInfo.pAttachments[i];
if (attachment.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
const auto& clear_color = cmd_state->activeRenderPassBeginInfo.pClearValues[i].color;
RecordClearColor(attachment.format, clear_color);
}
}
}
}
if (depth_image_view && (depth_image_view->create_info.subresourceRange.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0U) {
const VkImage depth_image = depth_image_view->image_state->image();
const VkImageSubresourceRange& subresource_range = depth_image_view->create_info.subresourceRange;
RecordBindZcullScope(*cmd_state, depth_image, subresource_range);
} else {
RecordUnbindZcullScope(*cmd_state);
}
if (load_op) {
if (*load_op == VK_ATTACHMENT_LOAD_OP_CLEAR || *load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) {
RecordResetScopeZcullDirection(*cmd_state);
}
}
}
}
void BestPractices::RecordCmdEndRenderingCommon(VkCommandBuffer commandBuffer) {
auto cmd_state = GetWrite<bp_state::CommandBuffer>(commandBuffer);
assert(cmd_state);
auto rp = cmd_state->activeRenderPass.get();
assert(rp);
if (VendorCheckEnabled(kBPVendorNVIDIA)) {
layer_data::optional<VkAttachmentStoreOp> store_op;
if (rp->use_dynamic_rendering || rp->use_dynamic_rendering_inherited) {
const auto depth_attachment = rp->dynamic_rendering_begin_rendering_info.pDepthAttachment;
if (depth_attachment) {
store_op.emplace(depth_attachment->storeOp);
}
} else {
if (rp->createInfo.subpassCount > 0) {
const uint32_t last_subpass = rp->createInfo.subpassCount - 1;
const auto depth_attachment = rp->createInfo.pSubpasses[last_subpass].pDepthStencilAttachment;
if (depth_attachment) {
const uint32_t attachment = depth_attachment->attachment;
if (attachment != VK_ATTACHMENT_UNUSED) {
store_op.emplace(rp->createInfo.pAttachments[attachment].storeOp);
}
}
}
}
if (store_op) {
if (*store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE || *store_op == VK_ATTACHMENT_STORE_OP_NONE) {
RecordResetScopeZcullDirection(*cmd_state);
}
}
RecordUnbindZcullScope(*cmd_state);
}
}
void BestPractices::RecordBindZcullScope(bp_state::CommandBuffer& cmd_state, VkImage depth_attachment, const VkImageSubresourceRange& subresource_range) {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
if (depth_attachment == VK_NULL_HANDLE) {
cmd_state.nv.zcull_scope = {};
return;
}
assert((subresource_range.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0U);
auto image_state = Get<IMAGE_STATE>(depth_attachment);
assert(image_state);
const uint32_t mip_levels = image_state->createInfo.mipLevels;
const uint32_t array_layers = image_state->createInfo.arrayLayers;
auto& tree = cmd_state.nv.zcull_per_image[depth_attachment];
if (tree.states.empty()) {
tree.mip_levels = mip_levels;
tree.array_layers = array_layers;
tree.states.resize(array_layers * mip_levels);
}
cmd_state.nv.zcull_scope.image = depth_attachment;
cmd_state.nv.zcull_scope.range = subresource_range;
cmd_state.nv.zcull_scope.tree = &tree;
}
void BestPractices::RecordUnbindZcullScope(bp_state::CommandBuffer& cmd_state) {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
RecordBindZcullScope(cmd_state, VK_NULL_HANDLE, VkImageSubresourceRange{});
}
void BestPractices::RecordResetScopeZcullDirection(bp_state::CommandBuffer& cmd_state) {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
auto& scope = cmd_state.nv.zcull_scope;
RecordResetZcullDirection(cmd_state, scope.image, scope.range);
}
template <typename Func>
static void ForEachSubresource(const IMAGE_STATE& image, const VkImageSubresourceRange& range, Func&& func)
{
const uint32_t layerCount =
(range.layerCount == VK_REMAINING_ARRAY_LAYERS) ? (image.full_range.layerCount - range.baseArrayLayer) : range.layerCount;
const uint32_t levelCount =
(range.levelCount == VK_REMAINING_MIP_LEVELS) ? (image.full_range.levelCount - range.baseMipLevel) : range.levelCount;
for (uint32_t i = 0; i < layerCount; ++i) {
const uint32_t layer = range.baseArrayLayer + i;
for (uint32_t j = 0; j < levelCount; ++j) {
const uint32_t level = range.baseMipLevel + j;
func(layer, level);
}
}
}
void BestPractices::RecordResetZcullDirection(bp_state::CommandBuffer& cmd_state, VkImage depth_image,
const VkImageSubresourceRange& subresource_range) {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
RecordSetZcullDirection(cmd_state, depth_image, subresource_range, bp_state::CommandBufferStateNV::ZcullDirection::Unknown);
const auto image_it = cmd_state.nv.zcull_per_image.find(depth_image);
if (image_it == cmd_state.nv.zcull_per_image.end()) {
return;
}
auto& tree = image_it->second;
auto image = Get<IMAGE_STATE>(depth_image);
if (!image) return;
ForEachSubresource(*image, subresource_range, [&tree](uint32_t layer, uint32_t level) {
auto& subresource = tree.GetState(layer, level);
subresource.num_less_draws = 0;
subresource.num_greater_draws = 0;
});
}
void BestPractices::RecordSetScopeZcullDirection(bp_state::CommandBuffer& cmd_state, bp_state::CommandBufferStateNV::ZcullDirection mode) {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
auto& scope = cmd_state.nv.zcull_scope;
RecordSetZcullDirection(cmd_state, scope.image, scope.range, mode);
}
void BestPractices::RecordSetZcullDirection(bp_state::CommandBuffer& cmd_state, VkImage depth_image,
const VkImageSubresourceRange& subresource_range,
bp_state::CommandBufferStateNV::ZcullDirection mode) {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
const auto image_it = cmd_state.nv.zcull_per_image.find(depth_image);
if (image_it == cmd_state.nv.zcull_per_image.end()) {
return;
}
auto& tree = image_it->second;
auto image = Get<IMAGE_STATE>(depth_image);
if (!image) return;
ForEachSubresource(*image, subresource_range, [&tree, &cmd_state](uint32_t layer, uint32_t level) {
tree.GetState(layer, level).direction = cmd_state.nv.zcull_direction;
});
}
void BestPractices::RecordZcullDraw(bp_state::CommandBuffer& cmd_state) {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
// Add one draw to each subresource depending on the current Z-cull direction
auto& scope = cmd_state.nv.zcull_scope;
auto image = Get<IMAGE_STATE>(scope.image);
if (!image) return;
ForEachSubresource(*image, scope.range, [&scope](uint32_t layer, uint32_t level) {
auto& subresource = scope.tree->GetState(layer, level);
switch (subresource.direction) {
case bp_state::CommandBufferStateNV::ZcullDirection::Unknown:
// Unreachable
assert(0);
break;
case bp_state::CommandBufferStateNV::ZcullDirection::Less:
++subresource.num_less_draws;
break;
case bp_state::CommandBufferStateNV::ZcullDirection::Greater:
++subresource.num_greater_draws;
break;
}
});
}
bool BestPractices::ValidateZcullScope(const bp_state::CommandBuffer& cmd_state) const {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
bool skip = false;
if (cmd_state.nv.depth_test_enable) {
auto& scope = cmd_state.nv.zcull_scope;
skip |= ValidateZcull(cmd_state, scope.image, scope.range);
}
return skip;
}
bool BestPractices::ValidateZcull(const bp_state::CommandBuffer& cmd_state, VkImage image,
const VkImageSubresourceRange& subresource_range) const {
bool skip = false;
const char* good_mode = nullptr;
const char* bad_mode = nullptr;
bool is_balanced = false;
const auto image_it = cmd_state.nv.zcull_per_image.find(image);
if (image_it == cmd_state.nv.zcull_per_image.end()) {
return skip;
}
const auto& tree = image_it->second;
auto image_state = Get<IMAGE_STATE>(image);
if (!image_state) {
return skip;
}
ForEachSubresource(*image_state, subresource_range, [&](uint32_t layer, uint32_t level) {
if (is_balanced) {
return;
}
const auto& resource = tree.GetState(layer, level);
const uint64_t num_draws = resource.num_less_draws + resource.num_greater_draws;
if (num_draws == 0) {
return;
}
const uint64_t less_ratio = (resource.num_less_draws * 100) / num_draws;
const uint64_t greater_ratio = (resource.num_greater_draws * 100) / num_draws;
if ((less_ratio > kZcullDirectionBalanceRatioNVIDIA) && (greater_ratio > kZcullDirectionBalanceRatioNVIDIA)) {
is_balanced = true;
if (greater_ratio > less_ratio) {
good_mode = "GREATER";
bad_mode = "LESS";
} else {
good_mode = "LESS";
bad_mode = "GREATER";
}
}
});
if (is_balanced) {
skip |= LogPerformanceWarning(
cmd_state.commandBuffer(), kVUID_BestPractices_Zcull_LessGreaterRatio,
"%s Depth attachment %s is primarily rendered with depth compare op %s, but some draws use %s. "
"Z-cull is disabled for the least used direction, which harms depth testing performance. "
"The Z-cull direction can be reset by clearing the depth attachment, transitioning from VK_IMAGE_LAYOUT_UNDEFINED, "
"using VK_ATTACHMENT_LOAD_OP_DONT_CARE, or using VK_ATTACHMENT_STORE_OP_DONT_CARE.",
VendorSpecificTag(kBPVendorNVIDIA), report_data->FormatHandle(cmd_state.nv.zcull_scope.image).c_str(), good_mode,
bad_mode);
}
return skip;
}
static std::array<uint32_t, 4> GetRawClearColor(VkFormat format, const VkClearColorValue& clear_value) {
std::array<uint32_t, 4> raw_color{};
std::copy_n(clear_value.uint32, raw_color.size(), raw_color.data());
// Zero out unused components to avoid polluting the cache with garbage
if (!FormatHasRed(format)) raw_color[0] = 0;
if (!FormatHasGreen(format)) raw_color[1] = 0;
if (!FormatHasBlue(format)) raw_color[2] = 0;
if (!FormatHasAlpha(format)) raw_color[3] = 0;
return raw_color;
}
static bool IsClearColorZeroOrOne(VkFormat format, const std::array<uint32_t, 4> clear_color) {
static_assert(sizeof(float) == sizeof(uint32_t), "Mismatching float <-> uint32 sizes");
const float one = 1.0f;
const float zero = 0.0f;
uint32_t raw_one{};
uint32_t raw_zero{};
memcpy(&raw_one, &one, sizeof(one));
memcpy(&raw_zero, &zero, sizeof(zero));
const bool is_one = (!FormatHasRed(format) || (clear_color[0] == raw_one)) &&
(!FormatHasGreen(format) || (clear_color[1] == raw_one)) &&
(!FormatHasBlue(format) || (clear_color[2] == raw_one)) &&
(!FormatHasAlpha(format) || (clear_color[3] == raw_one));
const bool is_zero = (!FormatHasRed(format) || (clear_color[0] == raw_zero)) &&
(!FormatHasGreen(format) || (clear_color[1] == raw_zero)) &&
(!FormatHasBlue(format) || (clear_color[2] == raw_zero)) &&
(!FormatHasAlpha(format) || (clear_color[3] == raw_zero));
return is_one || is_zero;
}
static std::string MakeCompressedFormatListNVIDIA() {
std::string format_list;
for (VkFormat compressed_format : kCustomClearColorCompressedFormatsNVIDIA) {
if (compressed_format == kCustomClearColorCompressedFormatsNVIDIA.back()) {
format_list += "or ";
}
format_list += string_VkFormat(compressed_format);
if (compressed_format != kCustomClearColorCompressedFormatsNVIDIA.back()) {
format_list += ", ";
}
}
return format_list;
}
void BestPractices::RecordClearColor(VkFormat format, const VkClearColorValue& clear_value) {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
const std::array<uint32_t, 4> raw_color = GetRawClearColor(format, clear_value);
if (IsClearColorZeroOrOne(format, raw_color)) {
// These colors are always compressed
return;
}
const auto it = std::find(kCustomClearColorCompressedFormatsNVIDIA.begin(), kCustomClearColorCompressedFormatsNVIDIA.end(), format);
if (it == kCustomClearColorCompressedFormatsNVIDIA.end()) {
// The format cannot be compressed with a custom color
return;
}
// Record custom clear color
WriteLockGuard guard{clear_colors_lock_};
if (clear_colors_.size() < kMaxRecommendedNumberOfClearColorsNVIDIA) {
clear_colors_.insert(raw_color);
}
}
bool BestPractices::ValidateClearColor(VkCommandBuffer commandBuffer, VkFormat format, const VkClearColorValue& clear_value) const {
assert(VendorCheckEnabled(kBPVendorNVIDIA));
bool skip = false;
const std::array<uint32_t, 4> raw_color = GetRawClearColor(format, clear_value);
if (IsClearColorZeroOrOne(format, raw_color)) {
return skip;
}
const auto it = std::find(kCustomClearColorCompressedFormatsNVIDIA.begin(), kCustomClearColorCompressedFormatsNVIDIA.end(), format);
if (it == kCustomClearColorCompressedFormatsNVIDIA.end()) {
// The format is not compressible
static const std::string format_list = MakeCompressedFormatListNVIDIA();
skip |= LogPerformanceWarning(commandBuffer, kVUID_BestPractices_ClearColor_NotCompressed,
"%s Clearing image with format %s without a 1.0f or 0.0f clear color. "
"The clear will not get compressed in the GPU, harming performance. "
"This can be fixed using a clear color of VkClearColorValue{0.0f, 0.0f, 0.0f, 0.0f}, or "
"VkClearColorValue{1.0f, 1.0f, 1.0f, 1.0f}. Alternatively, use %s.",
VendorSpecificTag(kBPVendorNVIDIA), string_VkFormat(format), format_list.c_str());
} else {
// The format is compressible
bool registered = false;
{
ReadLockGuard guard{clear_colors_lock_};
registered = clear_colors_.find(raw_color) != clear_colors_.end();
if (!registered) {
// If it's not in the list, it might be new. Check if there's still space for new entries.
registered = clear_colors_.size() < kMaxRecommendedNumberOfClearColorsNVIDIA;
}
}
if (!registered) {
std::string clear_color_str;
if (FormatIsUINT(format)) {
clear_color_str = std::to_string(clear_value.uint32[0]) + ", " + std::to_string(clear_value.uint32[1]) + ", " +
std::to_string(clear_value.uint32[2]) + ", " + std::to_string(clear_value.uint32[3]);
} else if (FormatIsSINT(format)) {
clear_color_str = std::to_string(clear_value.int32[0]) + ", " + std::to_string(clear_value.int32[1]) + ", " +
std::to_string(clear_value.int32[2]) + ", " + std::to_string(clear_value.int32[3]);
} else {
clear_color_str = std::to_string(clear_value.float32[0]) + ", " + std::to_string(clear_value.float32[1]) + ", " +
std::to_string(clear_value.float32[2]) + ", " + std::to_string(clear_value.float32[3]);
}
skip |= LogPerformanceWarning(
commandBuffer, kVUID_BestPractices_ClearColor_NotCompressed,
"%s Clearing image with unregistered VkClearColorValue{%s}. "
"This clear will not get compressed in the GPU, harming performance. "
"The clear color is not registered because too many unique colors have been used. "
"Select a discrete set of clear colors and stick to those. "
"VkClearColorValue{0, 0, 0, 0} and VkClearColorValue{1.0f, 1.0f, 1.0f, 1.0f} are always registered.",
VendorSpecificTag(kBPVendorNVIDIA), clear_color_str.c_str());
}
}
return skip;
}
static inline bool RenderPassUsesAttachmentAsResolve(const safe_VkRenderPassCreateInfo2& createInfo, uint32_t attachment) {
for (uint32_t subpass = 0; subpass < createInfo.subpassCount; subpass++) {
const auto& subpass_info = createInfo.pSubpasses[subpass];
if (subpass_info.pResolveAttachments) {
for (uint32_t i = 0; i < subpass_info.colorAttachmentCount; i++) {
if (subpass_info.pResolveAttachments[i].attachment == attachment) return true;
}
}
}
return false;
}
static inline bool RenderPassUsesAttachmentOnTile(const safe_VkRenderPassCreateInfo2& createInfo, uint32_t attachment) {
for (uint32_t subpass = 0; subpass < createInfo.subpassCount; subpass++) {
const auto& subpass_info = createInfo.pSubpasses[subpass];
// If an attachment is ever used as a color attachment,
// resolve attachment or depth stencil attachment,
// it needs to exist on tile at some point.
for (uint32_t i = 0; i < subpass_info.colorAttachmentCount; i++) {
if (subpass_info.pColorAttachments[i].attachment == attachment) return true;
}
if (subpass_info.pResolveAttachments) {
for (uint32_t i = 0; i < subpass_info.colorAttachmentCount; i++) {
if (subpass_info.pResolveAttachments[i].attachment == attachment) return true;
}
}
if (subpass_info.pDepthStencilAttachment && subpass_info.pDepthStencilAttachment->attachment == attachment) return true;
}
return false;
}
static inline bool RenderPassUsesAttachmentAsImageOnly(const safe_VkRenderPassCreateInfo2& createInfo, uint32_t attachment) {
if (RenderPassUsesAttachmentOnTile(createInfo, attachment)) {
return false;
}
for (uint32_t subpass = 0; subpass < createInfo.subpassCount; subpass++) {
const auto& subpassInfo = createInfo.pSubpasses[subpass];
for (uint32_t i = 0; i < subpassInfo.inputAttachmentCount; i++) {
if (subpassInfo.pInputAttachments[i].attachment == attachment) {
return true;
}
}
}
return false;
}
bool BestPractices::ValidateCmdBeginRenderPass(VkCommandBuffer commandBuffer, RenderPassCreateVersion rp_version,
const VkRenderPassBeginInfo* pRenderPassBegin) const {
bool skip = false;
if (!pRenderPassBegin) {
return skip;
}
if (pRenderPassBegin->renderArea.extent.width == 0 || pRenderPassBegin->renderArea.extent.height == 0) {
skip |= LogWarning(device, kVUID_BestPractices_BeginRenderPass_ZeroSizeRenderArea,
"This render pass has a zero-size render area. It cannot write to any attachments, "
"and can only be used for side effects such as layout transitions.");
}
auto rp_state = Get<RENDER_PASS_STATE>(pRenderPassBegin->renderPass);
if (rp_state) {
if (rp_state->createInfo.flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT) {
const VkRenderPassAttachmentBeginInfo* rpabi = LvlFindInChain<VkRenderPassAttachmentBeginInfo>(pRenderPassBegin->pNext);
if (rpabi) {
skip = ValidateAttachments(rp_state->createInfo.ptr(), rpabi->attachmentCount, rpabi->pAttachments);
}
}
// Check if any attachments have LOAD operation on them
for (uint32_t att = 0; att < rp_state->createInfo.attachmentCount; att++) {
const auto& attachment = rp_state->createInfo.pAttachments[att];
bool attachment_has_readback = false;
if (!FormatIsStencilOnly(attachment.format) && attachment.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
attachment_has_readback = true;
}
if (FormatHasStencil(attachment.format) && attachment.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
attachment_has_readback = true;
}
bool attachment_needs_readback = false;
// Check if the attachment is actually used in any subpass on-tile
if (attachment_has_readback && RenderPassUsesAttachmentOnTile(rp_state->createInfo, att)) {
attachment_needs_readback = true;
}
// Using LOAD_OP_LOAD is expensive on tiled GPUs, so flag it as a potential improvement
if (attachment_needs_readback && (VendorCheckEnabled(kBPVendorArm) || VendorCheckEnabled(kBPVendorIMG))) {
skip |=
LogPerformanceWarning(device, kVUID_BestPractices_BeginRenderPass_AttachmentNeedsReadback,
"%s %s: Attachment #%u in render pass has begun with VK_ATTACHMENT_LOAD_OP_LOAD.\n"