| /*------------------------------------------------------------------------ |
| * Vulkan Conformance Tests |
| * ------------------------ |
| * |
| * Copyright (c) 2017-2019 The Khronos Group Inc. |
| * Copyright (c) 2018-2019 NVIDIA Corporation |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| *//*! |
| * \file |
| * \brief Tests for VK_EXT_fragment_shader_interlock. |
| * These tests render a set of overlapping full-screen quads that use image |
| * or buffer reads and writes to accumulate values into a result image/buffer. |
| * They use fragment shader interlock to avoid race conditions on the read/write |
| * and validate that the final result includes all the writes. |
| * Each fragment shader invocation computes a coordinate, and does a read/modify/write |
| * into the image or buffer, inside the interlock. The value in memory accumulates a bitmask |
| * indicating which primitives or samples have already run through the interlock. e.g. |
| * for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive |
| * and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests |
| * that all the previous primitives (less significant bits) are also set, else it clobbers the |
| * value. Sample and shading_rate interlock are variants of this where there is one value per |
| * sample or per coarse fragment location, respectively. When there are multiple samples per |
| * fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish |
| * primitive order between samples on the internal diagonal of the quad (triangle strip). |
| *//*--------------------------------------------------------------------*/ |
| |
| #include "vktFragmentShaderInterlockBasic.hpp" |
| |
| #include "vkBufferWithMemory.hpp" |
| #include "vkImageWithMemory.hpp" |
| #include "vkQueryUtil.hpp" |
| #include "vkBuilderUtil.hpp" |
| #include "vkCmdUtil.hpp" |
| #include "vkTypeUtil.hpp" |
| #include "vkObjUtil.hpp" |
| |
| #include "vktTestGroupUtil.hpp" |
| #include "vktTestCase.hpp" |
| #include "vktCustomInstancesDevices.hpp" |
| |
| #include "deDefs.h" |
| #include "deMath.h" |
| #include "deRandom.h" |
| #include "deSharedPtr.hpp" |
| #include "deString.h" |
| |
| #include "tcuTestCase.hpp" |
| #include "tcuTestLog.hpp" |
| #include "tcuCommandLine.hpp" |
| |
| #include <string> |
| #include <sstream> |
| |
| namespace vkt |
| { |
| namespace FragmentShaderInterlock |
| { |
| namespace |
| { |
| using namespace vk; |
| using namespace std; |
| |
| typedef enum |
| { |
| RES_SSBO = 0, |
| RES_IMAGE, |
| } Resource; |
| |
| typedef enum |
| { |
| INT_PIXEL_ORDERED = 0, |
| INT_PIXEL_UNORDERED, |
| INT_SAMPLE_ORDERED, |
| INT_SAMPLE_UNORDERED, |
| INT_SHADING_RATE_ORDERED, |
| INT_SHADING_RATE_UNORDERED, |
| } Interlock; |
| |
| de::SharedPtr<Move<vk::VkDevice>> g_singletonDevice; |
| |
| VkDevice getDevice(Context& context, Interlock interlock) |
| { |
| if (interlock == INT_SHADING_RATE_ORDERED || interlock == INT_SHADING_RATE_UNORDERED) |
| { |
| if (!g_singletonDevice) |
| { |
| const float queuePriority = 1.0f; |
| |
| // Create a universal queue that supports graphics and compute |
| const VkDeviceQueueCreateInfo queueParams = |
| { |
| VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| 0u, // VkDeviceQueueCreateFlags flags; |
| context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex; |
| 1u, // deUint32 queueCount; |
| &queuePriority // const float* pQueuePriorities; |
| }; |
| |
| const char * extensions[] = |
| { |
| "VK_EXT_fragment_shader_interlock", |
| "VK_NV_shading_rate_image", |
| }; |
| |
| VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragmentShaderInterlockFeatures = initVulkanStructure(); |
| VkPhysicalDeviceShadingRateImageFeaturesNV shadingRateImageFeatures = initVulkanStructure(&fragmentShaderInterlockFeatures); |
| VkPhysicalDeviceFeatures2 features2 = initVulkanStructure(&shadingRateImageFeatures); |
| |
| context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2); |
| |
| const VkDeviceCreateInfo deviceCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, //sType; |
| &features2, //pNext; |
| (VkDeviceCreateFlags)0u, //flags |
| 1, //queueRecordCount; |
| &queueParams, //pRequestedQueues; |
| 0, //layerCount; |
| DE_NULL, //ppEnabledLayerNames; |
| DE_LENGTH_OF_ARRAY(extensions), // deUint32 enabledExtensionCount; |
| extensions, // const char* const* ppEnabledExtensionNames; |
| DE_NULL, //pEnabledFeatures; |
| }; |
| |
| Move<VkDevice> device = createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(), context.getPlatformInterface(), context.getInstance(), context.getInstanceInterface(), context.getPhysicalDevice(), &deviceCreateInfo); |
| g_singletonDevice = de::SharedPtr<Move<VkDevice>>(new Move<VkDevice>(device)); |
| } |
| |
| return g_singletonDevice->get(); |
| } |
| |
| return context.getDevice(); |
| } |
| |
| struct CaseDef |
| { |
| deUint32 dim; |
| Resource resType; |
| Interlock interlock; |
| VkSampleCountFlagBits samples; |
| bool killOdd; |
| bool sampleShading; |
| |
| bool isSampleInterlock() const |
| { |
| return sampleShading || interlock == INT_SAMPLE_ORDERED || interlock == INT_SAMPLE_UNORDERED; |
| } |
| bool isOrdered() const |
| { |
| return interlock == INT_PIXEL_ORDERED || interlock == INT_SAMPLE_ORDERED || interlock == INT_SHADING_RATE_ORDERED; |
| } |
| }; |
| |
| class FSITestInstance : public TestInstance |
| { |
| public: |
| FSITestInstance (Context& context, const CaseDef& data); |
| ~FSITestInstance (void); |
| tcu::TestStatus iterate (void); |
| |
| private: |
| CaseDef m_data; |
| }; |
| |
| FSITestInstance::FSITestInstance (Context& context, const CaseDef& data) |
| : vkt::TestInstance (context) |
| , m_data (data) |
| { |
| } |
| |
| FSITestInstance::~FSITestInstance (void) |
| { |
| } |
| |
| class FSITestCase : public TestCase |
| { |
| public: |
| FSITestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data); |
| ~FSITestCase (void); |
| virtual void initPrograms (SourceCollections& programCollection) const; |
| virtual TestInstance* createInstance (Context& context) const; |
| virtual void checkSupport (Context& context) const; |
| |
| private: |
| CaseDef m_data; |
| }; |
| |
| FSITestCase::FSITestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data) |
| : vkt::TestCase (context, name, desc) |
| , m_data (data) |
| { |
| } |
| |
| FSITestCase::~FSITestCase (void) |
| { |
| } |
| |
| void FSITestCase::checkSupport(Context& context) const |
| { |
| context.requireDeviceFunctionality("VK_EXT_fragment_shader_interlock"); |
| |
| if ((m_data.interlock == INT_SAMPLE_ORDERED || m_data.interlock == INT_SAMPLE_UNORDERED) && |
| !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderSampleInterlock) |
| { |
| TCU_THROW(NotSupportedError, "Fragment shader sample interlock not supported"); |
| } |
| |
| if ((m_data.interlock == INT_PIXEL_ORDERED || m_data.interlock == INT_PIXEL_UNORDERED) && |
| !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderPixelInterlock) |
| { |
| TCU_THROW(NotSupportedError, "Fragment shader pixel interlock not supported"); |
| } |
| |
| if ((m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED)) |
| { |
| if (!context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderShadingRateInterlock) |
| TCU_THROW(NotSupportedError, "Fragment shader shading rate interlock not supported"); |
| |
| context.requireDeviceFunctionality("VK_NV_shading_rate_image"); |
| |
| // We need to query the VK_NV_shading_rate_image features because they might be disabled |
| // in the default context due to a conflict with VK_KHR_fragment_shading_rate. |
| VkPhysicalDeviceShadingRateImageFeaturesNV shadingRateImageFeatures = initVulkanStructure(); |
| VkPhysicalDeviceFeatures2KHR features2 = initVulkanStructure(&shadingRateImageFeatures); |
| |
| context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2); |
| |
| if (!shadingRateImageFeatures.shadingRateImage) |
| TCU_THROW(NotSupportedError, "Shading rate image not supported"); |
| } |
| } |
| |
| static int bitsPerQuad(const CaseDef &c) |
| { |
| deUint32 bpq = c.samples; |
| |
| if (c.isSampleInterlock()) |
| bpq = 1; |
| else if (c.interlock == INT_SHADING_RATE_ORDERED || c.interlock == INT_SHADING_RATE_UNORDERED) |
| bpq *= 4; |
| |
| return bpq; |
| } |
| |
| void FSITestCase::initPrograms (SourceCollections& programCollection) const |
| { |
| std::stringstream vss; |
| |
| vss << |
| "#version 450 core\n" |
| "layout(location = 0) out int primID;\n" |
| "void main()\n" |
| "{\n" |
| " primID = gl_InstanceIndex;\n" |
| // full-viewport quad |
| " gl_Position = vec4( 2.0*float(gl_VertexIndex&2) - 1.0, 4.0*(gl_VertexIndex&1)-1.0, 1.0 - 2.0 * float(gl_VertexIndex&1), 1);\n" |
| "}\n"; |
| |
| programCollection.glslSources.add("vert") << glu::VertexSource(vss.str()); |
| |
| std::stringstream fss; |
| |
| fss << |
| "#version 450 core\n" |
| "#extension GL_ARB_fragment_shader_interlock : enable\n" |
| "#extension GL_NV_shading_rate_image : enable\n" |
| "layout(r32ui, set = 0, binding = 0) coherent uniform uimage2D image0;\n" |
| "layout(std430, set = 0, binding = 1) coherent buffer B1 { uint x[]; } buf1;\n" |
| "layout(location = 0) flat in int primID;\n"; |
| |
| switch (m_data.interlock) |
| { |
| default: DE_ASSERT(0); // fallthrough |
| case INT_PIXEL_ORDERED: fss << "layout(pixel_interlock_ordered) in;\n"; break; |
| case INT_PIXEL_UNORDERED: fss << "layout(pixel_interlock_unordered) in;\n"; break; |
| case INT_SAMPLE_ORDERED: fss << "layout(sample_interlock_ordered) in;\n"; break; |
| case INT_SAMPLE_UNORDERED: fss << "layout(sample_interlock_unordered) in;\n"; break; |
| case INT_SHADING_RATE_ORDERED: fss << "layout(shading_rate_interlock_ordered) in;\n"; break; |
| case INT_SHADING_RATE_UNORDERED: fss << "layout(shading_rate_interlock_unordered) in;\n"; break; |
| } |
| |
| // Each fragment shader invocation computes a coordinate, and does a read/modify/write |
| // into the image or buffer, inside the interlock. The value in memory accumulates a bitmask |
| // indicating which primitives or samples have already run through the interlock. e.g. |
| // for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive |
| // and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests |
| // that all the previous primitives (less significant bits) are also set, else it clobbers the |
| // value. Sample and shading_rate interlock are variants of this where there is one value per |
| // sample or per coarse fragment location, respectively. When there are multiple samples per |
| // fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish |
| // primitive order between samples on the internal diagonal of the quad (triangle strip). |
| |
| fss << |
| "void main()\n" |
| "{\n" |
| " ivec2 coordxy = ivec2(gl_FragCoord.xy);\n" |
| " uint stride = " << m_data.dim << ";\n" |
| " uint bitsPerQuad = " << bitsPerQuad(m_data) << ";\n"; |
| |
| // Compute the coordinate |
| if (m_data.isSampleInterlock()) |
| { |
| // Spread samples out in the x dimension |
| fss << " coordxy.x = coordxy.x * " << m_data.samples << " + gl_SampleID;\n"; |
| fss << " stride *= " << m_data.samples << ";\n"; |
| } |
| else if (m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED) |
| { |
| // shading rate is 2x2. Divide xy by 2 |
| fss << " coordxy /= 2;\n"; |
| fss << " stride /= 2;\n"; |
| } |
| |
| if (m_data.isSampleInterlock()) |
| { |
| // sample interlock runs per-sample, and stores one bit per sample |
| fss << " uint mask = 1 << primID;\n"; |
| fss << " uint previousMask = (1 << primID)-1;\n"; |
| } |
| else |
| { |
| // pixel and shading_rate interlock run per-fragment, and store the sample mask |
| fss << " uint mask = gl_SampleMaskIn[0] << (primID * bitsPerQuad);\n"; |
| fss << " uint previousMask = (1 << (primID * bitsPerQuad))-1;\n"; |
| } |
| |
| // Exercise discard before and during the interlock |
| if (m_data.killOdd) |
| fss << " if (coordxy.y < " << m_data.dim / 4 << " && (coordxy.x & 1) != 0) discard;\n"; |
| |
| fss << " beginInvocationInterlockARB();\n"; |
| |
| if (m_data.killOdd) |
| fss << " if ((coordxy.x & 1) != 0) discard;\n"; |
| |
| // Read the current value from the image or buffer |
| if (m_data.resType == RES_IMAGE) |
| fss << " uint temp = imageLoad(image0, coordxy).x;\n"; |
| else |
| { |
| fss << " uint coord = coordxy.y * stride + coordxy.x;\n"; |
| fss << " uint temp = buf1.x[coord];\n"; |
| } |
| |
| // Update the value. For "ordered" modes, check that all the previous primitives' |
| // bits are already set |
| if (m_data.isOrdered()) |
| fss << " if ((temp & previousMask) == previousMask) temp |= mask; else temp = 0;\n"; |
| else |
| fss << " temp |= mask;\n"; |
| |
| // Store out the new value |
| if (m_data.resType == RES_IMAGE) |
| fss << " imageStore(image0, coordxy, uvec4(temp, 0, 0, 0));\n"; |
| else |
| fss << " buf1.x[coord] = temp;\n"; |
| |
| fss << " endInvocationInterlockARB();\n"; |
| |
| if (m_data.killOdd) |
| fss << " discard;\n"; |
| |
| fss << "}\n"; |
| |
| programCollection.glslSources.add("frag") << glu::FragmentSource(fss.str()); |
| } |
| |
| TestInstance* FSITestCase::createInstance (Context& context) const |
| { |
| return new FSITestInstance(context, m_data); |
| } |
| |
| tcu::TestStatus FSITestInstance::iterate (void) |
| { |
| const DeviceInterface& vk = m_context.getDeviceInterface(); |
| const VkDevice device = getDevice(m_context, m_data.interlock); |
| Allocator& allocator = m_context.getDefaultAllocator(); |
| VkFlags allShaderStages = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; |
| VkFlags allPipelineStages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; |
| |
| VkPhysicalDeviceProperties2 properties; |
| deMemset(&properties, 0, sizeof(properties)); |
| properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; |
| |
| m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties); |
| |
| VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; |
| |
| Move<vk::VkDescriptorSetLayout> descriptorSetLayout; |
| Move<vk::VkDescriptorPool> descriptorPool; |
| Move<vk::VkDescriptorSet> descriptorSet; |
| |
| VkDescriptorPoolCreateFlags poolCreateFlags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; |
| VkDescriptorSetLayoutCreateFlags layoutCreateFlags = 0; |
| |
| const VkDescriptorSetLayoutBinding bindings[2] = |
| { |
| { |
| 0u, // binding |
| VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, // descriptorType |
| 1u, // descriptorCount |
| allShaderStages, // stageFlags |
| DE_NULL, // pImmutableSamplers |
| }, |
| { |
| 1u, // binding |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptorType |
| 1u, // descriptorCount |
| allShaderStages, // stageFlags |
| DE_NULL, // pImmutableSamplers |
| }, |
| }; |
| |
| // Create a layout and allocate a descriptor set for it. |
| const VkDescriptorSetLayoutCreateInfo setLayoutCreateInfo = |
| { |
| vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // sType |
| DE_NULL, // pNext |
| layoutCreateFlags, // flags |
| 2u, // bindingCount |
| &bindings[0] // pBindings |
| }; |
| |
| descriptorSetLayout = vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo); |
| |
| vk::DescriptorPoolBuilder poolBuilder; |
| poolBuilder.addType(bindings[0].descriptorType, 1); |
| poolBuilder.addType(bindings[1].descriptorType, 1); |
| |
| descriptorPool = poolBuilder.build(vk, device, poolCreateFlags, 1u); |
| descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout); |
| |
| // one uint per sample (max of 4 samples) |
| VkDeviceSize bufferSize = m_data.dim*m_data.dim*sizeof(deUint32)*4; |
| |
| de::MovePtr<BufferWithMemory> buffer; |
| buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory( |
| vk, device, allocator, makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any)); |
| |
| flushAlloc(vk, device, buffer->getAllocation()); |
| |
| const VkQueue queue = getDeviceQueue(vk, device, m_context.getUniversalQueueFamilyIndex(), 0); |
| Move<VkCommandPool> cmdPool = createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex()); |
| Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY); |
| |
| beginCommandBuffer(vk, *cmdBuffer, 0u); |
| |
| const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType |
| DE_NULL, // pNext |
| (VkPipelineLayoutCreateFlags)0, |
| 1, // setLayoutCount |
| &descriptorSetLayout.get(), // pSetLayouts |
| 0u, // pushConstantRangeCount |
| DE_NULL, // pPushConstantRanges |
| }; |
| |
| Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL); |
| |
| de::MovePtr<BufferWithMemory> copyBuffer; |
| copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory( |
| vk, device, allocator, makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached)); |
| |
| const VkImageCreateInfo imageCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkImageCreateFlags)0u, // VkImageCreateFlags flags; |
| VK_IMAGE_TYPE_2D, // VkImageType imageType; |
| VK_FORMAT_R32_UINT, // VkFormat format; |
| { |
| m_data.dim * m_data.samples, // deUint32 width; |
| m_data.dim, // deUint32 height; |
| 1u // deUint32 depth; |
| }, // VkExtent3D extent; |
| 1u, // deUint32 mipLevels; |
| 1u, // deUint32 arrayLayers; |
| VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples; |
| VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling; |
| VK_IMAGE_USAGE_STORAGE_BIT |
| | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
| | VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage; |
| VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode; |
| 0u, // deUint32 queueFamilyIndexCount; |
| DE_NULL, // const deUint32* pQueueFamilyIndices; |
| VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout; |
| }; |
| |
| VkImageViewCreateInfo imageViewCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkImageViewCreateFlags)0u, // VkImageViewCreateFlags flags; |
| DE_NULL, // VkImage image; |
| VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType; |
| VK_FORMAT_R32_UINT, // VkFormat format; |
| { |
| VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r; |
| VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g; |
| VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b; |
| VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a; |
| }, // VkComponentMapping components; |
| { |
| VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask; |
| 0u, // deUint32 baseMipLevel; |
| 1u, // deUint32 levelCount; |
| 0u, // deUint32 baseArrayLayer; |
| 1u // deUint32 layerCount; |
| } // VkImageSubresourceRange subresourceRange; |
| }; |
| |
| de::MovePtr<ImageWithMemory> image; |
| Move<VkImageView> imageView; |
| |
| image = de::MovePtr<ImageWithMemory>(new ImageWithMemory( |
| vk, device, allocator, imageCreateInfo, MemoryRequirement::Any)); |
| imageViewCreateInfo.image = **image; |
| imageView = createImageView(vk, device, &imageViewCreateInfo, NULL); |
| |
| VkDescriptorImageInfo imageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL); |
| VkDescriptorBufferInfo bufferInfo = makeDescriptorBufferInfo(**buffer, 0, bufferSize); |
| |
| VkWriteDescriptorSet w = |
| { |
| VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType |
| DE_NULL, // pNext |
| *descriptorSet, // dstSet |
| (deUint32)0, // dstBinding |
| 0, // dstArrayElement |
| 1u, // descriptorCount |
| bindings[0].descriptorType, // descriptorType |
| &imageInfo, // pImageInfo |
| &bufferInfo, // pBufferInfo |
| DE_NULL, // pTexelBufferView |
| }; |
| vk.updateDescriptorSets(device, 1, &w, 0, NULL); |
| |
| w.dstBinding = 1; |
| w.descriptorType = bindings[1].descriptorType; |
| vk.updateDescriptorSets(device, 1, &w, 0, NULL); |
| |
| vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL); |
| |
| VkBool32 shadingRateEnable = m_data.interlock == INT_SHADING_RATE_ORDERED || |
| m_data.interlock == INT_SHADING_RATE_UNORDERED ? VK_TRUE : VK_FALSE; |
| |
| Move<VkPipeline> pipeline; |
| Move<VkRenderPass> renderPass; |
| Move<VkFramebuffer> framebuffer; |
| |
| { |
| const vk::VkSubpassDescription subpassDesc = |
| { |
| (vk::VkSubpassDescriptionFlags)0, |
| vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint |
| 0u, // inputCount |
| DE_NULL, // pInputAttachments |
| 0u, // colorCount |
| DE_NULL, // pColorAttachments |
| DE_NULL, // pResolveAttachments |
| DE_NULL, // depthStencilAttachment |
| 0u, // preserveCount |
| DE_NULL, // pPreserveAttachments |
| }; |
| const vk::VkRenderPassCreateInfo renderPassParams = |
| { |
| vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType |
| DE_NULL, // pNext |
| (vk::VkRenderPassCreateFlags)0, |
| 0u, // attachmentCount |
| DE_NULL, // pAttachments |
| 1u, // subpassCount |
| &subpassDesc, // pSubpasses |
| 0u, // dependencyCount |
| DE_NULL, // pDependencies |
| }; |
| |
| renderPass = createRenderPass(vk, device, &renderPassParams); |
| |
| const vk::VkFramebufferCreateInfo framebufferParams = |
| { |
| vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType |
| DE_NULL, // pNext |
| (vk::VkFramebufferCreateFlags)0, |
| *renderPass, // renderPass |
| 0u, // attachmentCount |
| DE_NULL, // pAttachments |
| m_data.dim, // width |
| m_data.dim, // height |
| 1u, // layers |
| }; |
| |
| framebuffer = createFramebuffer(vk, device, &framebufferParams); |
| |
| const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags; |
| 0u, // deUint32 vertexBindingDescriptionCount; |
| DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions; |
| 0u, // deUint32 vertexAttributeDescriptionCount; |
| DE_NULL // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions; |
| }; |
| |
| const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags; |
| VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology; |
| VK_FALSE // VkBool32 primitiveRestartEnable; |
| }; |
| |
| const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags; |
| VK_FALSE, // VkBool32 depthClampEnable; |
| VK_FALSE, // VkBool32 rasterizerDiscardEnable; |
| VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode; |
| VK_CULL_MODE_NONE, // VkCullModeFlags cullMode; |
| VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace; |
| VK_FALSE, // VkBool32 depthBiasEnable; |
| 0.0f, // float depthBiasConstantFactor; |
| 0.0f, // float depthBiasClamp; |
| 0.0f, // float depthBiasSlopeFactor; |
| 1.0f // float lineWidth; |
| }; |
| |
| const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType |
| DE_NULL, // const void* pNext |
| 0u, // VkPipelineMultisampleStateCreateFlags flags |
| (VkSampleCountFlagBits)m_data.samples, // VkSampleCountFlagBits rasterizationSamples |
| m_data.sampleShading ? VK_TRUE : VK_FALSE, // VkBool32 sampleShadingEnable |
| 1.0f, // float minSampleShading |
| DE_NULL, // const VkSampleMask* pSampleMask |
| VK_FALSE, // VkBool32 alphaToCoverageEnable |
| VK_FALSE // VkBool32 alphaToOneEnable |
| }; |
| |
| VkViewport viewport = makeViewport(m_data.dim, m_data.dim); |
| VkRect2D scissor = makeRect2D(m_data.dim, m_data.dim); |
| |
| VkPipelineFragmentShadingRateStateCreateInfoKHR shadingRateStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| { 2, 2 }, // VkExtent2D fragmentSize; |
| { VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR }, // VkFragmentShadingRateCombinerOpKHR combinerOps[2]; |
| }; |
| |
| const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType |
| DE_NULL, // const void* pNext |
| (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags |
| 1u, // deUint32 viewportCount |
| &viewport, // const VkViewport* pViewports |
| 1u, // deUint32 scissorCount |
| &scissor // const VkRect2D* pScissors |
| }; |
| |
| Move<VkShaderModule> fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("frag"), 0); |
| Move<VkShaderModule> vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0); |
| deUint32 numStages = 2u; |
| |
| const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = |
| { |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| DE_NULL, |
| (VkPipelineShaderStageCreateFlags)0, |
| VK_SHADER_STAGE_VERTEX_BIT, // stage |
| *vs, // shader |
| "main", |
| DE_NULL, // pSpecializationInfo |
| }, |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| DE_NULL, |
| (VkPipelineShaderStageCreateFlags)0, |
| VK_SHADER_STAGE_FRAGMENT_BIT, // stage |
| *fs, // shader |
| "main", |
| DE_NULL, // pSpecializationInfo |
| } |
| }; |
| |
| const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType; |
| shadingRateEnable ? &shadingRateStateCreateInfo : DE_NULL, // const void* pNext; |
| (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags; |
| numStages, // deUint32 stageCount; |
| &shaderCreateInfo[0], // const VkPipelineShaderStageCreateInfo* pStages; |
| &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState; |
| &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState; |
| DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState; |
| &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState; |
| &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState; |
| &multisampleStateCreateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState; |
| DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState; |
| DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState; |
| DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState; |
| pipelineLayout.get(), // VkPipelineLayout layout; |
| renderPass.get(), // VkRenderPass renderPass; |
| 0u, // deUint32 subpass; |
| DE_NULL, // VkPipeline basePipelineHandle; |
| 0 // int basePipelineIndex; |
| }; |
| |
| pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo); |
| } |
| |
| const VkImageMemoryBarrier imageBarrier = |
| { |
| VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType |
| DE_NULL, // const void* pNext |
| 0u, // VkAccessFlags srcAccessMask |
| VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask |
| VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout |
| VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout |
| VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex |
| VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex |
| **image, // VkImage image |
| { |
| VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask |
| 0u, // uint32_t baseMipLevel |
| 1u, // uint32_t mipLevels, |
| 0u, // uint32_t baseArray |
| 1u, // uint32_t arraySize |
| } |
| }; |
| |
| vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| (VkDependencyFlags)0, |
| 0, (const VkMemoryBarrier*)DE_NULL, |
| 0, (const VkBufferMemoryBarrier*)DE_NULL, |
| 1, &imageBarrier); |
| |
| vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline); |
| |
| VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u); |
| VkClearValue clearColor = makeClearValueColorU32(0,0,0,0); |
| |
| VkMemoryBarrier memBarrier = |
| { |
| VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType |
| DE_NULL, // pNext |
| 0u, // srcAccessMask |
| 0u, // dstAccessMask |
| }; |
| |
| vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range); |
| |
| vk.cmdFillBuffer(*cmdBuffer, **buffer, 0, bufferSize, 0); |
| |
| memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; |
| memBarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; |
| vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages, |
| 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL); |
| |
| beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, |
| makeRect2D(m_data.dim, m_data.dim), |
| 0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE); |
| |
| // Draw N fullscreen "quads", one per instance. |
| deUint32 N = 32 / bitsPerQuad(m_data); |
| deUint32 expectedValue = 0xFFFFFFFF; |
| vk.cmdDraw(*cmdBuffer, 4u, N, 0u, 0u); |
| |
| endRenderPass(vk, *cmdBuffer); |
| |
| memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; |
| memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; |
| vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL); |
| |
| deUint32 copyDimX = m_data.dim; |
| deUint32 copyDimY = m_data.dim; |
| |
| if (m_data.isSampleInterlock()) |
| copyDimX *= m_data.samples; |
| |
| if (shadingRateEnable) |
| { |
| copyDimX /= 2; |
| copyDimY /= 2; |
| } |
| |
| if (m_data.resType == RES_IMAGE) |
| { |
| const VkBufferImageCopy copyRegion = makeBufferImageCopy(makeExtent3D(copyDimX, copyDimY, 1u), |
| makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u)); |
| vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **copyBuffer, 1u, ©Region); |
| } |
| else |
| { |
| const VkBufferCopy copyRegion = makeBufferCopy(0u, 0u, copyDimX*copyDimY*sizeof(deUint32)); |
| vk.cmdCopyBuffer(*cmdBuffer, **buffer, **copyBuffer, 1, ©Region); |
| } |
| |
| memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; |
| memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; |
| vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, |
| 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL); |
| |
| endCommandBuffer(vk, *cmdBuffer); |
| |
| submitCommandsAndWait(vk, device, queue, cmdBuffer.get()); |
| |
| deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr(); |
| invalidateAlloc(vk, device, copyBuffer->getAllocation()); |
| |
| qpTestResult res = QP_TEST_RESULT_PASS; |
| |
| for (deUint32 i = 0; i < copyDimX*copyDimY; ++i) |
| { |
| if (m_data.killOdd && (i & 1)) |
| { |
| if (ptr[i] != 0) |
| res = QP_TEST_RESULT_FAIL; |
| } |
| else if (ptr[i] != expectedValue) |
| res = QP_TEST_RESULT_FAIL; |
| } |
| |
| return tcu::TestStatus(res, qpGetTestResultName(res)); |
| } |
| |
| } // anonymous |
| |
| tcu::TestCaseGroup* createBasicTests (tcu::TestContext& testCtx) |
| { |
| de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "basic", "Test VK_EXT_fragment_shader_interlock")); |
| |
| typedef struct |
| { |
| deUint32 count; |
| const char* name; |
| const char* description; |
| } TestGroupCase; |
| |
| TestGroupCase dimCases[] = |
| { |
| { 8, "8x8", "8x8" }, |
| { 16, "16x16", "16x16" }, |
| { 32, "32x32", "32x32" }, |
| { 64, "64x64", "64x64" }, |
| { 128, "128x128", "128x128" }, |
| { 256, "256x256", "256x256" }, |
| { 512, "512x512", "512x512" }, |
| { 1024, "1024x1024","1024x1024" }, |
| }; |
| |
| TestGroupCase resCases[] = |
| { |
| { RES_IMAGE, "image", "image" }, |
| { RES_SSBO, "ssbo", "ssbo" }, |
| }; |
| |
| TestGroupCase killCases[] = |
| { |
| { 0, "nodiscard", "no discard" }, |
| { 1, "discard", "discard odd pixels"}, |
| }; |
| |
| TestGroupCase sampCases[] = |
| { |
| { 1, "1xaa", "1 sample" }, |
| { 4, "4xaa", "4 sample" }, |
| }; |
| |
| TestGroupCase ssCases[] = |
| { |
| { 0, "no_sample_shading", "no sample shading" }, |
| { 1, "sample_shading", "sample shading" }, |
| }; |
| |
| TestGroupCase intCases[] = |
| { |
| { INT_PIXEL_ORDERED, "pixel_ordered", "pixel_ordered" }, |
| { INT_PIXEL_UNORDERED, "pixel_unordered", "pixel_unordered" }, |
| { INT_SAMPLE_ORDERED, "sample_ordered", "sample_ordered" }, |
| { INT_SAMPLE_UNORDERED, "sample_unordered", "sample_unordered" }, |
| { INT_SHADING_RATE_ORDERED, "shading_rate_ordered", "shading_rate_ordered" }, |
| { INT_SHADING_RATE_UNORDERED, "shading_rate_unordered", "shading_rate_unordered" }, |
| }; |
| |
| for (int killNdx = 0; killNdx < DE_LENGTH_OF_ARRAY(killCases); killNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> killGroup(new tcu::TestCaseGroup(testCtx, killCases[killNdx].name, killCases[killNdx].description)); |
| for (int resNdx = 0; resNdx < DE_LENGTH_OF_ARRAY(resCases); resNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> resGroup(new tcu::TestCaseGroup(testCtx, resCases[resNdx].name, resCases[resNdx].description)); |
| for (int intNdx = 0; intNdx < DE_LENGTH_OF_ARRAY(intCases); intNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> intGroup(new tcu::TestCaseGroup(testCtx, intCases[intNdx].name, intCases[intNdx].description)); |
| for (int sampNdx = 0; sampNdx < DE_LENGTH_OF_ARRAY(sampCases); sampNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> sampGroup(new tcu::TestCaseGroup(testCtx, sampCases[sampNdx].name, sampCases[sampNdx].description)); |
| for (int ssNdx = 0; ssNdx < DE_LENGTH_OF_ARRAY(ssCases); ssNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> ssGroup(new tcu::TestCaseGroup(testCtx, ssCases[ssNdx].name, ssCases[ssNdx].description)); |
| for (int dimNdx = 0; dimNdx < DE_LENGTH_OF_ARRAY(dimCases); dimNdx++) |
| { |
| CaseDef c = |
| { |
| dimCases[dimNdx].count, // deUint32 set; |
| (Resource)resCases[resNdx].count, // Resource resType; |
| (Interlock)intCases[intNdx].count, // Interlock interlock; |
| (VkSampleCountFlagBits)sampCases[sampNdx].count, // VkSampleCountFlagBits samples; |
| (bool)killCases[killNdx].count, // bool killOdd; |
| (bool)ssCases[ssNdx].count, // bool sampleShading; |
| }; |
| |
| if (c.sampleShading && c.samples == 1) |
| continue; |
| |
| ssGroup->addChild(new FSITestCase(testCtx, dimCases[dimNdx].name, dimCases[dimNdx].description, c)); |
| } |
| sampGroup->addChild(ssGroup.release()); |
| } |
| intGroup->addChild(sampGroup.release()); |
| } |
| resGroup->addChild(intGroup.release()); |
| } |
| killGroup->addChild(resGroup.release()); |
| } |
| group->addChild(killGroup.release()); |
| } |
| return group.release(); |
| } |
| |
| void cleanupDevice() |
| { |
| g_singletonDevice.clear(); |
| } |
| |
| } // FragmentShaderInterlock |
| } // vkt |