| /*------------------------------------------------------------------------ |
| * Vulkan Conformance Tests |
| * ------------------------ |
| * |
| * Copyright (c) 2017-2019 The Khronos Group Inc. |
| * Copyright (c) 2018-2019 NVIDIA Corporation |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| *//*! |
| * \file |
| * \brief Vulkan Memory Model tests |
| *//*--------------------------------------------------------------------*/ |
| |
| #include "vktMemoryModelTests.hpp" |
| #include "vktMemoryModelPadding.hpp" |
| #include "vktMemoryModelSharedLayout.hpp" |
| |
| #include "vkBufferWithMemory.hpp" |
| #include "vkImageWithMemory.hpp" |
| #include "vkQueryUtil.hpp" |
| #include "vkBuilderUtil.hpp" |
| #include "vkCmdUtil.hpp" |
| #include "vkTypeUtil.hpp" |
| #include "vkObjUtil.hpp" |
| |
| #include "vktTestCase.hpp" |
| |
| #include "deDefs.h" |
| #include "deMath.h" |
| #include "deSharedPtr.hpp" |
| #include "deString.h" |
| |
| #include "tcuTestCase.hpp" |
| #include "tcuTestLog.hpp" |
| |
| #include <string> |
| #include <sstream> |
| |
| namespace vkt |
| { |
| namespace MemoryModel |
| { |
| namespace |
| { |
| using namespace vk; |
| using namespace std; |
| |
| typedef enum |
| { |
| TT_MP = 0, // message passing |
| TT_WAR, // write-after-read hazard |
| } TestType; |
| |
| typedef enum |
| { |
| ST_FENCE_FENCE = 0, |
| ST_FENCE_ATOMIC, |
| ST_ATOMIC_FENCE, |
| ST_ATOMIC_ATOMIC, |
| ST_CONTROL_BARRIER, |
| ST_CONTROL_AND_MEMORY_BARRIER, |
| } SyncType; |
| |
| typedef enum |
| { |
| SC_BUFFER = 0, |
| SC_IMAGE, |
| SC_WORKGROUP, |
| SC_PHYSBUFFER, |
| } StorageClass; |
| |
| typedef enum |
| { |
| SCOPE_DEVICE = 0, |
| SCOPE_QUEUEFAMILY, |
| SCOPE_WORKGROUP, |
| SCOPE_SUBGROUP, |
| } Scope; |
| |
| typedef enum |
| { |
| STAGE_COMPUTE = 0, |
| STAGE_VERTEX, |
| STAGE_FRAGMENT, |
| } Stage; |
| |
| typedef enum |
| { |
| DATA_TYPE_UINT = 0, |
| DATA_TYPE_UINT64, |
| DATA_TYPE_FLOAT32, |
| DATA_TYPE_FLOAT64, |
| } DataType; |
| |
| const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; |
| const VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; |
| |
| struct CaseDef |
| { |
| bool payloadMemLocal; |
| bool guardMemLocal; |
| bool coherent; |
| bool core11; |
| bool atomicRMW; |
| TestType testType; |
| StorageClass payloadSC; |
| StorageClass guardSC; |
| Scope scope; |
| SyncType syncType; |
| Stage stage; |
| DataType dataType; |
| bool transitive; |
| bool transitiveVis; |
| }; |
| |
| class MemoryModelTestInstance : public TestInstance |
| { |
| public: |
| MemoryModelTestInstance (Context& context, const CaseDef& data); |
| ~MemoryModelTestInstance (void); |
| tcu::TestStatus iterate (void); |
| private: |
| CaseDef m_data; |
| |
| enum |
| { |
| WIDTH = 256, |
| HEIGHT = 256 |
| }; |
| }; |
| |
| MemoryModelTestInstance::MemoryModelTestInstance (Context& context, const CaseDef& data) |
| : vkt::TestInstance (context) |
| , m_data (data) |
| { |
| } |
| |
| MemoryModelTestInstance::~MemoryModelTestInstance (void) |
| { |
| } |
| |
| class MemoryModelTestCase : public TestCase |
| { |
| public: |
| MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data); |
| ~MemoryModelTestCase (void); |
| virtual void initPrograms (SourceCollections& programCollection) const; |
| virtual void initProgramsTransitive(SourceCollections& programCollection) const; |
| virtual TestInstance* createInstance (Context& context) const; |
| virtual void checkSupport (Context& context) const; |
| |
| private: |
| CaseDef m_data; |
| }; |
| |
| MemoryModelTestCase::MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data) |
| : vkt::TestCase (context, name, desc) |
| , m_data (data) |
| { |
| } |
| |
| MemoryModelTestCase::~MemoryModelTestCase (void) |
| { |
| } |
| |
| void MemoryModelTestCase::checkSupport(Context& context) const |
| { |
| if (!context.contextSupports(vk::ApiVersion(1, 1, 0))) |
| { |
| TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported"); |
| } |
| |
| if (!m_data.core11) |
| { |
| if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel) |
| { |
| TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported"); |
| } |
| |
| if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope) |
| { |
| TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported"); |
| } |
| } |
| |
| if (m_data.scope == SCOPE_SUBGROUP) |
| { |
| // Check for subgroup support for scope_subgroup tests. |
| VkPhysicalDeviceSubgroupProperties subgroupProperties; |
| subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; |
| subgroupProperties.pNext = DE_NULL; |
| subgroupProperties.supportedOperations = 0; |
| |
| VkPhysicalDeviceProperties2 properties; |
| properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; |
| properties.pNext = &subgroupProperties; |
| |
| context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties); |
| |
| if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) || |
| !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) || |
| !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT)) |
| { |
| TCU_THROW(NotSupportedError, "Subgroup features not supported"); |
| } |
| |
| VkShaderStageFlags stage= VK_SHADER_STAGE_COMPUTE_BIT; |
| if (m_data.stage == STAGE_VERTEX) |
| { |
| stage = VK_SHADER_STAGE_VERTEX_BIT; |
| } |
| else if (m_data.stage == STAGE_COMPUTE) |
| { |
| stage = VK_SHADER_STAGE_COMPUTE_BIT; |
| } |
| else if (m_data.stage == STAGE_FRAGMENT) |
| { |
| stage = VK_SHADER_STAGE_FRAGMENT_BIT; |
| } |
| |
| if((subgroupProperties.supportedStages & stage)==0) |
| { |
| TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage"); |
| } |
| } |
| if (m_data.dataType == DATA_TYPE_UINT64) |
| { |
| if (!context.getDeviceFeatures().shaderInt64) |
| { |
| TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported"); |
| } |
| if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics && |
| (m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER)) |
| { |
| TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported"); |
| } |
| if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics && |
| m_data.guardSC == SC_WORKGROUP) |
| { |
| TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported"); |
| } |
| } |
| |
| if (m_data.dataType == DATA_TYPE_FLOAT32) |
| { |
| if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float")) |
| TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float"); |
| |
| if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) && |
| (!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)) |
| { |
| TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported"); |
| } |
| |
| if (m_data.guardSC == SC_IMAGE && (!context.getShaderAtomicFloatFeaturesEXT().shaderImageFloat32Atomics)) |
| { |
| TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point image atomic operations not supported"); |
| } |
| |
| if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)) |
| { |
| TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported"); |
| } |
| } |
| |
| if (m_data.dataType == DATA_TYPE_FLOAT64) |
| { |
| if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float")) |
| TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float"); |
| |
| if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) && |
| (!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)) |
| { |
| TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported"); |
| } |
| |
| if (m_data.guardSC == SC_IMAGE || m_data.payloadSC == SC_IMAGE) |
| { |
| TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point image atomic operations not supported"); |
| } |
| |
| if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)) |
| { |
| TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported"); |
| } |
| } |
| |
| if (m_data.transitive && |
| !context.getVulkanMemoryModelFeatures().vulkanMemoryModelAvailabilityVisibilityChains) |
| TCU_THROW(NotSupportedError, "vulkanMemoryModelAvailabilityVisibilityChains not supported"); |
| |
| if ((m_data.payloadSC == SC_PHYSBUFFER || m_data.guardSC == SC_PHYSBUFFER) && !context.isBufferDeviceAddressSupported()) |
| TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported"); |
| |
| if (m_data.stage == STAGE_VERTEX) |
| { |
| if (!context.getDeviceFeatures().vertexPipelineStoresAndAtomics) |
| { |
| TCU_THROW(NotSupportedError, "vertexPipelineStoresAndAtomics not supported"); |
| } |
| } |
| if (m_data.stage == STAGE_FRAGMENT) |
| { |
| if (!context.getDeviceFeatures().fragmentStoresAndAtomics) |
| { |
| TCU_THROW(NotSupportedError, "fragmentStoresAndAtomics not supported"); |
| } |
| } |
| } |
| |
| |
| void MemoryModelTestCase::initPrograms (SourceCollections& programCollection) const |
| { |
| if (m_data.transitive) |
| { |
| initProgramsTransitive(programCollection); |
| return; |
| } |
| DE_ASSERT(!m_data.transitiveVis); |
| |
| Scope invocationMapping = m_data.scope; |
| if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) && |
| (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)) |
| { |
| invocationMapping = SCOPE_WORKGROUP; |
| } |
| |
| const char *scopeStr; |
| switch (m_data.scope) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SCOPE_DEVICE: scopeStr = "gl_ScopeDevice"; break; |
| case SCOPE_QUEUEFAMILY: scopeStr = "gl_ScopeQueueFamily"; break; |
| case SCOPE_WORKGROUP: scopeStr = "gl_ScopeWorkgroup"; break; |
| case SCOPE_SUBGROUP: scopeStr = "gl_ScopeSubgroup"; break; |
| } |
| |
| const char *typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" : (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" : |
| (m_data.dataType == DATA_TYPE_FLOAT64) ? "double" : "uint"; |
| const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64); |
| |
| // Construct storageSemantics strings. Both release and acquire |
| // always have the payload storage class. They only include the |
| // guard storage class if they're using FENCE for that side of the |
| // sync. |
| std::stringstream storageSemanticsRelease; |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: storageSemanticsRelease << "gl_StorageSemanticsBuffer"; break; |
| case SC_IMAGE: storageSemanticsRelease << "gl_StorageSemanticsImage"; break; |
| case SC_WORKGROUP: storageSemanticsRelease << "gl_StorageSemanticsShared"; break; |
| } |
| std::stringstream storageSemanticsAcquire; |
| storageSemanticsAcquire << storageSemanticsRelease.str(); |
| if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE) |
| { |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: storageSemanticsRelease << " | gl_StorageSemanticsBuffer"; break; |
| case SC_IMAGE: storageSemanticsRelease << " | gl_StorageSemanticsImage"; break; |
| case SC_WORKGROUP: storageSemanticsRelease << " | gl_StorageSemanticsShared"; break; |
| } |
| } |
| if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE) |
| { |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: storageSemanticsAcquire << " | gl_StorageSemanticsBuffer"; break; |
| case SC_IMAGE: storageSemanticsAcquire << " | gl_StorageSemanticsImage"; break; |
| case SC_WORKGROUP: storageSemanticsAcquire << " | gl_StorageSemanticsShared"; break; |
| } |
| } |
| |
| std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease; |
| |
| semanticsRelease << "gl_SemanticsRelease"; |
| semanticsAcquire << "gl_SemanticsAcquire"; |
| semanticsAcquireRelease << "gl_SemanticsAcquireRelease"; |
| if (!m_data.coherent && m_data.testType != TT_WAR) |
| { |
| DE_ASSERT(!m_data.core11); |
| semanticsRelease << " | gl_SemanticsMakeAvailable"; |
| semanticsAcquire << " | gl_SemanticsMakeVisible"; |
| semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible"; |
| } |
| |
| std::stringstream css; |
| css << "#version 450 core\n"; |
| if (!m_data.core11) |
| { |
| css << "#pragma use_vulkan_memory_model\n"; |
| } |
| if (!intType) |
| { |
| css << |
| "#extension GL_EXT_shader_atomic_float : enable\n" |
| "#extension GL_KHR_memory_scope_semantics : enable\n"; |
| } |
| css << |
| "#extension GL_KHR_shader_subgroup_basic : enable\n" |
| "#extension GL_KHR_shader_subgroup_shuffle : enable\n" |
| "#extension GL_KHR_shader_subgroup_ballot : enable\n" |
| "#extension GL_KHR_memory_scope_semantics : enable\n" |
| "#extension GL_ARB_gpu_shader_int64 : enable\n" |
| "#extension GL_EXT_buffer_reference : enable\n" |
| "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n" |
| "layout(constant_id = 0) const int DIM = 1;\n" |
| "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n" |
| "struct S { " << typeStr << " x[DIM*DIM]; };\n"; |
| |
| if (m_data.stage == STAGE_COMPUTE) |
| { |
| css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n"; |
| } |
| |
| const char *memqual = ""; |
| if (m_data.coherent) |
| { |
| if (m_data.core11) |
| { |
| // Vulkan 1.1 only has "coherent", use it regardless of scope |
| memqual = "coherent"; |
| } |
| else |
| { |
| switch (m_data.scope) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SCOPE_DEVICE: memqual = "devicecoherent"; break; |
| case SCOPE_QUEUEFAMILY: memqual = "queuefamilycoherent"; break; |
| case SCOPE_WORKGROUP: memqual = "workgroupcoherent"; break; |
| case SCOPE_SUBGROUP: memqual = "subgroupcoherent"; break; |
| } |
| } |
| } |
| else |
| { |
| DE_ASSERT(!m_data.core11); |
| memqual = "nonprivate"; |
| } |
| |
| stringstream pushConstMembers; |
| |
| // Declare payload, guard, and fail resources |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n"; |
| pushConstMembers << " layout(offset = 0) PayloadRef payloadref;\n"; break; |
| case SC_BUFFER: css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break; |
| case SC_IMAGE: |
| if (intType) |
| css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n"; |
| else |
| css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n"; |
| break; |
| case SC_WORKGROUP: css << "shared S payload;\n"; break; |
| } |
| if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER) |
| { |
| // The guard variable is only accessed with atomics and need not be declared coherent. |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n"; |
| pushConstMembers << "layout(offset = 8) GuardRef guard;\n"; break; |
| case SC_BUFFER: css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break; |
| case SC_IMAGE: |
| if (intType) |
| css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n"; |
| else |
| css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n"; |
| break; |
| case SC_WORKGROUP: css << "shared S guard;\n"; break; |
| } |
| } |
| |
| css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n"; |
| |
| if (pushConstMembers.str().size() != 0) { |
| css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n"; |
| } |
| |
| css << |
| "void main()\n" |
| "{\n" |
| " bool pass = true;\n" |
| " bool skip = false;\n"; |
| |
| if (m_data.payloadSC == SC_PHYSBUFFER) |
| css << " " << memqual << " PayloadRef payload = payloadref;\n"; |
| |
| if (m_data.stage == STAGE_FRAGMENT) |
| { |
| // Kill helper invocations so they don't load outside the bounds of the SSBO. |
| // Helper pixels are also initially "active" and if a thread gets one as its |
| // partner in SCOPE_SUBGROUP mode, it can't run the test. |
| css << " if (gl_HelperInvocation) { return; }\n"; |
| } |
| |
| // Compute coordinates based on the storage class and scope. |
| // For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID. |
| // For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID. |
| // For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1) |
| switch (invocationMapping) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SCOPE_SUBGROUP: |
| // If the partner invocation isn't active, the shuffle below will be undefined. Bail. |
| css << " uvec4 ballot = subgroupBallot(true);\n" |
| " if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n"; |
| |
| switch (m_data.stage) |
| { |
| default: DE_ASSERT(0); // fall through |
| case STAGE_COMPUTE: |
| css << |
| " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n" |
| " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n" |
| " uint sharedCoord = localId.y * DIM + localId.x;\n" |
| " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n" |
| " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n" |
| " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n" |
| " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n" |
| " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n"; |
| break; |
| case STAGE_VERTEX: |
| css << |
| " uint bufferCoord = gl_VertexIndex;\n" |
| " uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n" |
| " ivec2 imageCoord = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n" |
| " ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n" |
| " gl_PointSize = 1.0f;\n" |
| " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n"; |
| break; |
| case STAGE_FRAGMENT: |
| css << |
| " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n" |
| " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n" |
| " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n" |
| " ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n" |
| " uint sharedCoord = localId.y * DIM + localId.x;\n" |
| " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n" |
| " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n" |
| " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n" |
| " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n" |
| " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n"; |
| break; |
| } |
| break; |
| case SCOPE_WORKGROUP: |
| css << |
| " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n" |
| " ivec2 partnerLocalId = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n" |
| " uint sharedCoord = localId.y * DIM + localId.x;\n" |
| " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n" |
| " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n" |
| " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n" |
| " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n" |
| " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n"; |
| break; |
| case SCOPE_QUEUEFAMILY: |
| case SCOPE_DEVICE: |
| switch (m_data.stage) |
| { |
| default: DE_ASSERT(0); // fall through |
| case STAGE_COMPUTE: |
| css << |
| " ivec2 globalId = ivec2(gl_GlobalInvocationID.xy);\n" |
| " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n" |
| " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n" |
| " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n" |
| " ivec2 imageCoord = globalId;\n" |
| " ivec2 partnerImageCoord = partnerGlobalId;\n"; |
| break; |
| case STAGE_VERTEX: |
| css << |
| " ivec2 globalId = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n" |
| " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n" |
| " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n" |
| " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n" |
| " ivec2 imageCoord = globalId;\n" |
| " ivec2 partnerImageCoord = partnerGlobalId;\n" |
| " gl_PointSize = 1.0f;\n" |
| " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n"; |
| break; |
| case STAGE_FRAGMENT: |
| css << |
| " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n" |
| " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n" |
| " ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n" |
| " ivec2 partnerGroupId = groupId;\n" |
| " uint sharedCoord = localId.y * DIM + localId.x;\n" |
| " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n" |
| " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n" |
| " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n" |
| " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n" |
| " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n"; |
| break; |
| } |
| break; |
| } |
| |
| // Initialize shared memory, followed by a barrier |
| if (m_data.payloadSC == SC_WORKGROUP) |
| { |
| css << " payload.x[sharedCoord] = 0;\n"; |
| } |
| if (m_data.guardSC == SC_WORKGROUP) |
| { |
| css << " guard.x[sharedCoord] = 0;\n"; |
| } |
| if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP) |
| { |
| switch (invocationMapping) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SCOPE_SUBGROUP: css << " subgroupBarrier();\n"; break; |
| case SCOPE_WORKGROUP: css << " barrier();\n"; break; |
| } |
| } |
| |
| if (m_data.testType == TT_MP) |
| { |
| if (intType) |
| { |
| // Store payload |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break; |
| case SC_IMAGE: css << " imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break; |
| case SC_WORKGROUP: css << " payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n"; break; |
| } |
| } |
| else |
| { |
| // Store payload |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " payload.x[bufferCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n"; break; |
| case SC_IMAGE: css << " imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x))>>31)), 0, 0, 0)); \n"; break; |
| case SC_WORKGROUP: css << " payload.x[sharedCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerSharedCoord])))>>31);\n"; break; |
| } |
| } |
| } |
| else |
| { |
| DE_ASSERT(m_data.testType == TT_WAR); |
| // Load payload |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break; |
| case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break; |
| case SC_WORKGROUP: css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break; |
| } |
| } |
| if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER) |
| { |
| // Acquire and release separate from control barrier |
| css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n" |
| " controlBarrier(" << scopeStr << ", gl_ScopeInvocation, 0, 0);\n" |
| " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n"; |
| } |
| else if (m_data.syncType == ST_CONTROL_BARRIER) |
| { |
| // Control barrier performs both acquire and release |
| css << " controlBarrier(" << scopeStr << ", " << scopeStr << ", " |
| << storageSemanticsRelease.str() << " | " << storageSemanticsAcquire.str() << ", " |
| << semanticsAcquireRelease.str() << ");\n"; |
| } |
| else |
| { |
| // Don't type cast for 64 bit image atomics |
| const char* typeCastStr = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr; |
| // Release barrier |
| std::stringstream atomicReleaseSemantics; |
| if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE) |
| { |
| css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n"; |
| atomicReleaseSemantics << ", 0, 0"; |
| } |
| else |
| { |
| atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str(); |
| } |
| // Atomic store guard |
| if (m_data.atomicRMW) |
| { |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break; |
| case SC_IMAGE: css << " imageAtomicExchange(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break; |
| case SC_WORKGROUP: css << " atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break; |
| } |
| } |
| else |
| { |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break; |
| case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break; |
| case SC_WORKGROUP: css << " atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break; |
| } |
| } |
| |
| std::stringstream atomicAcquireSemantics; |
| if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE) |
| { |
| atomicAcquireSemantics << ", 0, 0"; |
| } |
| else |
| { |
| atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str(); |
| } |
| // Atomic load guard |
| if (m_data.atomicRMW) |
| { |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " skip = atomicExchange(guard.x[partnerBufferCoord], " << typeStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break; |
| case SC_IMAGE: css << " skip = imageAtomicExchange(guard, partnerImageCoord, " << typeCastStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break; |
| case SC_WORKGROUP: css << " skip = atomicExchange(guard.x[partnerSharedCoord], " << typeStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break; |
| } |
| } else |
| { |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break; |
| case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break; |
| case SC_WORKGROUP: css << " skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break; |
| } |
| } |
| // Acquire barrier |
| if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE) |
| { |
| css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n"; |
| } |
| } |
| if (m_data.testType == TT_MP) |
| { |
| // Load payload |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break; |
| case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break; |
| case SC_WORKGROUP: css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break; |
| } |
| css << |
| " if (!skip && r != " << typeStr << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n" |
| "}\n"; |
| } |
| else |
| { |
| DE_ASSERT(m_data.testType == TT_WAR); |
| // Store payload, only if the partner invocation has already done its read |
| css << " if (!skip) {\n "; |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " payload.x[bufferCoord] = " << typeStr << "(bufferCoord);\n"; break; |
| case SC_IMAGE: |
| if (intType) { |
| css << " imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n"; |
| } |
| else { |
| css << " imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord), 0, 0, 0));\n"; |
| } |
| break; |
| case SC_WORKGROUP: css << " payload.x[sharedCoord] = " << typeStr << "(bufferCoord);\n"; break; |
| } |
| css << |
| " }\n" |
| " if (r != 0) { fail.x[bufferCoord] = 1; }\n" |
| "}\n"; |
| } |
| |
| // Draw a fullscreen triangle strip based on gl_VertexIndex |
| std::stringstream vss; |
| vss << |
| "#version 450 core\n" |
| "vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n" |
| "void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n"; |
| |
| const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u); |
| |
| switch (m_data.stage) |
| { |
| default: DE_ASSERT(0); // fall through |
| case STAGE_COMPUTE: |
| programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions; |
| break; |
| case STAGE_VERTEX: |
| programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions; |
| break; |
| case STAGE_FRAGMENT: |
| programCollection.glslSources.add("vert") << glu::VertexSource(vss.str()); |
| programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions; |
| break; |
| } |
| } |
| |
| |
| void MemoryModelTestCase::initProgramsTransitive (SourceCollections& programCollection) const |
| { |
| Scope invocationMapping = m_data.scope; |
| |
| const char* typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" : (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" : |
| (m_data.dataType == DATA_TYPE_FLOAT64) ? "double" : "uint"; |
| const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64); |
| |
| // Construct storageSemantics strings. Both release and acquire |
| // always have the payload storage class. They only include the |
| // guard storage class if they're using FENCE for that side of the |
| // sync. |
| std::stringstream storageSemanticsPayload; |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: storageSemanticsPayload << "gl_StorageSemanticsBuffer"; break; |
| case SC_IMAGE: storageSemanticsPayload << "gl_StorageSemanticsImage"; break; |
| } |
| std::stringstream storageSemanticsGuard; |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: storageSemanticsGuard << "gl_StorageSemanticsBuffer"; break; |
| case SC_IMAGE: storageSemanticsGuard << "gl_StorageSemanticsImage"; break; |
| } |
| std::stringstream storageSemanticsAll; |
| storageSemanticsAll << storageSemanticsPayload.str() << " | " << storageSemanticsGuard.str(); |
| |
| std::stringstream css; |
| css << "#version 450 core\n"; |
| css << "#pragma use_vulkan_memory_model\n"; |
| if (!intType) |
| { |
| css << |
| "#extension GL_EXT_shader_atomic_float : enable\n" |
| "#extension GL_KHR_memory_scope_semantics : enable\n"; |
| } |
| css << |
| "#extension GL_KHR_shader_subgroup_basic : enable\n" |
| "#extension GL_KHR_shader_subgroup_shuffle : enable\n" |
| "#extension GL_KHR_shader_subgroup_ballot : enable\n" |
| "#extension GL_KHR_memory_scope_semantics : enable\n" |
| "#extension GL_ARB_gpu_shader_int64 : enable\n" |
| "#extension GL_EXT_buffer_reference : enable\n" |
| "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n" |
| "layout(constant_id = 0) const int DIM = 1;\n" |
| "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n" |
| "shared bool sharedSkip;\n"; |
| |
| css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n"; |
| |
| const char *memqual = ""; |
| const char *semAvail = ""; |
| const char *semVis = ""; |
| if (m_data.coherent) |
| { |
| memqual = "workgroupcoherent"; |
| } |
| else |
| { |
| memqual = "nonprivate"; |
| semAvail = " | gl_SemanticsMakeAvailable"; |
| semVis = " | gl_SemanticsMakeVisible"; |
| } |
| |
| stringstream pushConstMembers; |
| |
| // Declare payload, guard, and fail resources |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n"; |
| pushConstMembers << " layout(offset = 0) PayloadRef payloadref;\n"; break; |
| case SC_BUFFER: css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break; |
| case SC_IMAGE: |
| if (intType) |
| css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n"; |
| else |
| css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n"; |
| break; |
| } |
| // The guard variable is only accessed with atomics and need not be declared coherent. |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n"; |
| pushConstMembers << "layout(offset = 8) GuardRef guard;\n"; break; |
| case SC_BUFFER: css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break; |
| case SC_IMAGE: |
| if (intType) |
| css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n"; |
| else |
| css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n"; |
| break; |
| } |
| |
| css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n"; |
| |
| if (pushConstMembers.str().size() != 0) { |
| css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n"; |
| } |
| |
| css << |
| "void main()\n" |
| "{\n" |
| " bool pass = true;\n" |
| " bool skip = false;\n" |
| " sharedSkip = false;\n"; |
| |
| if (m_data.payloadSC == SC_PHYSBUFFER) |
| css << " " << memqual << " PayloadRef payload = payloadref;\n"; |
| |
| // Compute coordinates based on the storage class and scope. |
| switch (invocationMapping) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SCOPE_DEVICE: |
| css << |
| " ivec2 globalId = ivec2(gl_GlobalInvocationID.xy);\n" |
| " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n" |
| " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n" |
| " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n" |
| " ivec2 imageCoord = globalId;\n" |
| " ivec2 partnerImageCoord = partnerGlobalId;\n" |
| " ivec2 globalId00 = ivec2(DIM) * ivec2(gl_WorkGroupID.xy);\n" |
| " ivec2 partnerGlobalId00 = ivec2(DIM) * (ivec2(NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_WorkGroupID.xy));\n" |
| " uint bufferCoord00 = globalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId00.x;\n" |
| " uint partnerBufferCoord00 = partnerGlobalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId00.x;\n" |
| " ivec2 imageCoord00 = globalId00;\n" |
| " ivec2 partnerImageCoord00 = partnerGlobalId00;\n"; |
| break; |
| } |
| |
| // Store payload |
| if (intType) |
| { |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break; |
| case SC_IMAGE: css << " imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break; |
| } |
| } |
| else |
| { |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " payload.x[bufferCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n"; break; |
| case SC_IMAGE: css << " imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x)>>31))), 0, 0, 0)); \n"; break; |
| } |
| } |
| |
| // Sync to other threads in the workgroup |
| css << " controlBarrier(gl_ScopeWorkgroup, " |
| "gl_ScopeWorkgroup, " << |
| storageSemanticsPayload.str() << " | gl_StorageSemanticsShared, " |
| "gl_SemanticsAcquireRelease" << semAvail << ");\n"; |
| |
| // Device-scope release/availability in invocation(0,0) |
| css << " if (all(equal(gl_LocalInvocationID.xy, ivec2(0,0)))) {\n"; |
| const char* typeCastStr = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr; |
| if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_ATOMIC_FENCE) { |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; break; |
| case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; break; |
| } |
| } else { |
| css << " memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, 0, 0);\n"; break; |
| case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, 0, 0);\n"; break; |
| } |
| } |
| |
| // Device-scope acquire/visibility either in invocation(0,0) or in every invocation |
| if (!m_data.transitiveVis) { |
| css << " }\n"; |
| } |
| if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_FENCE_ATOMIC) { |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n"; break; |
| case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n"; break; |
| } |
| } else { |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, 0, 0) == 0;\n"; break; |
| case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, 0, 0) == 0;\n"; break; |
| } |
| css << " memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible);\n"; |
| } |
| |
| // If invocation(0,0) did the acquire then store "skip" to shared memory and |
| // synchronize with the workgroup |
| if (m_data.transitiveVis) { |
| css << " sharedSkip = skip;\n"; |
| css << " }\n"; |
| |
| css << " controlBarrier(gl_ScopeWorkgroup, " |
| "gl_ScopeWorkgroup, " << |
| storageSemanticsPayload.str() << " | gl_StorageSemanticsShared, " |
| "gl_SemanticsAcquireRelease" << semVis << ");\n"; |
| css << " skip = sharedSkip;\n"; |
| } |
| |
| // Load payload |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: // fall through |
| case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break; |
| case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break; |
| } |
| css << |
| " if (!skip && r != " << typeStr << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n" |
| "}\n"; |
| |
| const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u); |
| |
| programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions; |
| } |
| |
| TestInstance* MemoryModelTestCase::createInstance (Context& context) const |
| { |
| return new MemoryModelTestInstance(context, m_data); |
| } |
| |
| tcu::TestStatus MemoryModelTestInstance::iterate (void) |
| { |
| const DeviceInterface& vk = m_context.getDeviceInterface(); |
| const VkDevice device = m_context.getDevice(); |
| Allocator& allocator = m_context.getDefaultAllocator(); |
| |
| VkPhysicalDeviceProperties2 properties; |
| properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; |
| properties.pNext = NULL; |
| |
| m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties); |
| |
| deUint32 DIM = 31; |
| deUint32 NUM_WORKGROUP_EACH_DIM = 8; |
| // If necessary, shrink workgroup size to fit HW limits |
| if (DIM*DIM > properties.properties.limits.maxComputeWorkGroupInvocations) |
| { |
| DIM = (deUint32)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations); |
| } |
| deUint32 NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM); |
| |
| VkDeviceSize bufferSizes[3]; |
| de::MovePtr<BufferWithMemory> buffers[3]; |
| vk::VkDescriptorBufferInfo bufferDescriptors[3]; |
| de::MovePtr<BufferWithMemory> copyBuffer; |
| |
| for (deUint32 i = 0; i < 3; ++i) |
| { |
| size_t elementSize = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64)? sizeof(deUint64) : sizeof(deUint32); |
| // buffer2 is the "fail" buffer, and is always uint |
| if (i == 2) |
| elementSize = sizeof(deUint32); |
| bufferSizes[i] = NUM_INVOCATIONS * elementSize; |
| |
| vk::VkFlags usageFlags = vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; |
| |
| bool memoryDeviceAddress = false; |
| |
| bool local; |
| switch (i) |
| { |
| default: DE_ASSERT(0); // fall through |
| case 0: |
| if (m_data.payloadSC != SC_BUFFER && m_data.payloadSC != SC_PHYSBUFFER) |
| continue; |
| local = m_data.payloadMemLocal; |
| if (m_data.payloadSC == SC_PHYSBUFFER) |
| { |
| usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; |
| if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address")) |
| memoryDeviceAddress = true; |
| } |
| break; |
| case 1: |
| if (m_data.guardSC != SC_BUFFER && m_data.guardSC != SC_PHYSBUFFER) |
| continue; |
| local = m_data.guardMemLocal; |
| if (m_data.guardSC == SC_PHYSBUFFER) |
| { |
| usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; |
| if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address")) |
| memoryDeviceAddress = true; |
| } |
| break; |
| case 2: local = true; break; |
| } |
| |
| try |
| { |
| buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory( |
| vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], usageFlags), |
| (memoryDeviceAddress ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any) | |
| (local ? MemoryRequirement::Local : MemoryRequirement::NonLocal))); |
| } |
| catch (const tcu::NotSupportedError&) |
| { |
| if (!local) |
| { |
| TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported"); |
| } |
| throw; |
| } |
| bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]); |
| } |
| |
| // Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible. |
| try |
| { |
| copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory( |
| vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached)); |
| } |
| catch (const tcu::NotSupportedError&) |
| { |
| copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory( |
| vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible)); |
| } |
| |
| VkFormat imageFormat; |
| switch (m_data.dataType) |
| { |
| case DATA_TYPE_UINT: |
| case DATA_TYPE_UINT64: |
| imageFormat = VK_FORMAT_R32_UINT; |
| break; |
| case DATA_TYPE_FLOAT32: |
| case DATA_TYPE_FLOAT64: |
| imageFormat = VK_FORMAT_R32_SFLOAT; |
| break; |
| default: |
| TCU_FAIL("Invalid data type."); |
| } |
| |
| const VkImageCreateInfo imageCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkImageCreateFlags)0u, // VkImageCreateFlags flags; |
| VK_IMAGE_TYPE_2D, // VkImageType imageType; |
| imageFormat, // VkFormat format; |
| { |
| DIM*NUM_WORKGROUP_EACH_DIM, // deUint32 width; |
| DIM*NUM_WORKGROUP_EACH_DIM, // deUint32 height; |
| 1u // deUint32 depth; |
| }, // VkExtent3D extent; |
| 1u, // deUint32 mipLevels; |
| 1u, // deUint32 arrayLayers; |
| VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples; |
| VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling; |
| VK_IMAGE_USAGE_STORAGE_BIT |
| | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
| | VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage; |
| VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode; |
| 0u, // deUint32 queueFamilyIndexCount; |
| DE_NULL, // const deUint32* pQueueFamilyIndices; |
| VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout; |
| }; |
| VkImageViewCreateInfo imageViewCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkImageViewCreateFlags)0u, // VkImageViewCreateFlags flags; |
| DE_NULL, // VkImage image; |
| VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType; |
| imageFormat, // VkFormat format; |
| { |
| VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r; |
| VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g; |
| VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b; |
| VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a; |
| }, // VkComponentMapping components; |
| { |
| VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask; |
| 0u, // deUint32 baseMipLevel; |
| 1u, // deUint32 levelCount; |
| 0u, // deUint32 baseArrayLayer; |
| 1u // deUint32 layerCount; |
| } // VkImageSubresourceRange subresourceRange; |
| }; |
| |
| |
| de::MovePtr<ImageWithMemory> images[2]; |
| Move<VkImageView> imageViews[2]; |
| vk::VkDescriptorImageInfo imageDescriptors[2]; |
| |
| for (deUint32 i = 0; i < 2; ++i) |
| { |
| |
| bool local; |
| switch (i) |
| { |
| default: DE_ASSERT(0); // fall through |
| case 0: |
| if (m_data.payloadSC != SC_IMAGE) |
| continue; |
| local = m_data.payloadMemLocal; |
| break; |
| case 1: |
| if (m_data.guardSC != SC_IMAGE) |
| continue; |
| local = m_data.guardMemLocal; |
| break; |
| } |
| |
| try |
| { |
| images[i] = de::MovePtr<ImageWithMemory>(new ImageWithMemory( |
| vk, device, allocator, imageCreateInfo, local ? MemoryRequirement::Local : MemoryRequirement::NonLocal)); |
| } |
| catch (const tcu::NotSupportedError&) |
| { |
| if (!local) |
| { |
| TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported"); |
| } |
| throw; |
| } |
| imageViewCreateInfo.image = **images[i]; |
| imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL); |
| |
| imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL); |
| } |
| |
| vk::DescriptorSetLayoutBuilder layoutBuilder; |
| |
| switch (m_data.payloadSC) |
| { |
| default: |
| case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break; |
| case SC_IMAGE: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break; |
| } |
| switch (m_data.guardSC) |
| { |
| default: |
| case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break; |
| case SC_IMAGE: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break; |
| } |
| layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); |
| |
| vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device)); |
| |
| vk::Unique<vk::VkDescriptorPool> descriptorPool(vk::DescriptorPoolBuilder() |
| .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u) |
| .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u) |
| .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u)); |
| vk::Unique<vk::VkDescriptorSet> descriptorSet (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout)); |
| |
| vk::DescriptorSetUpdateBuilder setUpdateBuilder; |
| switch (m_data.payloadSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: |
| case SC_WORKGROUP: |
| break; |
| case SC_BUFFER: |
| setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0), |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]); |
| break; |
| case SC_IMAGE: |
| setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0), |
| VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]); |
| break; |
| } |
| switch (m_data.guardSC) |
| { |
| default: DE_ASSERT(0); // fall through |
| case SC_PHYSBUFFER: |
| case SC_WORKGROUP: |
| break; |
| case SC_BUFFER: |
| setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1), |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]); |
| break; |
| case SC_IMAGE: |
| setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1), |
| VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]); |
| break; |
| } |
| setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2), |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]); |
| |
| setUpdateBuilder.update(vk, device); |
| |
| const VkPushConstantRange pushConstRange = |
| { |
| allShaderStages, // VkShaderStageFlags stageFlags |
| 0, // deUint32 offset |
| 16 // deUint32 size |
| }; |
| |
| const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType |
| DE_NULL, // pNext |
| (VkPipelineLayoutCreateFlags)0, |
| 1, // setLayoutCount |
| &descriptorSetLayout.get(), // pSetLayouts |
| 1u, // pushConstantRangeCount |
| &pushConstRange, // pPushConstantRanges |
| }; |
| |
| Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL); |
| |
| Move<VkPipeline> pipeline; |
| Move<VkRenderPass> renderPass; |
| Move<VkFramebuffer> framebuffer; |
| |
| VkPipelineBindPoint bindPoint = m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS; |
| |
| const deUint32 specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM}; |
| |
| const vk::VkSpecializationMapEntry entries[3] = |
| { |
| {0, sizeof(deUint32) * 0, sizeof(deUint32)}, |
| {1, sizeof(deUint32) * 1, sizeof(deUint32)}, |
| }; |
| |
| const vk::VkSpecializationInfo specInfo = |
| { |
| 2, // mapEntryCount |
| entries, // pMapEntries |
| sizeof(specData), // dataSize |
| specData // pData |
| }; |
| |
| if (m_data.stage == STAGE_COMPUTE) |
| { |
| const Unique<VkShaderModule> shader (createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0)); |
| |
| const VkPipelineShaderStageCreateInfo shaderCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| DE_NULL, |
| (VkPipelineShaderStageCreateFlags)0, |
| VK_SHADER_STAGE_COMPUTE_BIT, // stage |
| *shader, // shader |
| "main", |
| &specInfo, // pSpecializationInfo |
| }; |
| |
| const VkComputePipelineCreateInfo pipelineCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| DE_NULL, |
| 0u, // flags |
| shaderCreateInfo, // cs |
| *pipelineLayout, // layout |
| (vk::VkPipeline)0, // basePipelineHandle |
| 0u, // basePipelineIndex |
| }; |
| pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL); |
| } |
| else |
| { |
| |
| const vk::VkSubpassDescription subpassDesc = |
| { |
| (vk::VkSubpassDescriptionFlags)0, |
| vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint |
| 0u, // inputCount |
| DE_NULL, // pInputAttachments |
| 0u, // colorCount |
| DE_NULL, // pColorAttachments |
| DE_NULL, // pResolveAttachments |
| DE_NULL, // depthStencilAttachment |
| 0u, // preserveCount |
| DE_NULL, // pPreserveAttachments |
| |
| }; |
| const vk::VkRenderPassCreateInfo renderPassParams = |
| { |
| vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType |
| DE_NULL, // pNext |
| (vk::VkRenderPassCreateFlags)0, |
| 0u, // attachmentCount |
| DE_NULL, // pAttachments |
| 1u, // subpassCount |
| &subpassDesc, // pSubpasses |
| 0u, // dependencyCount |
| DE_NULL, // pDependencies |
| }; |
| |
| renderPass = createRenderPass(vk, device, &renderPassParams); |
| |
| const vk::VkFramebufferCreateInfo framebufferParams = |
| { |
| vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType |
| DE_NULL, // pNext |
| (vk::VkFramebufferCreateFlags)0, |
| *renderPass, // renderPass |
| 0u, // attachmentCount |
| DE_NULL, // pAttachments |
| DIM*NUM_WORKGROUP_EACH_DIM, // width |
| DIM*NUM_WORKGROUP_EACH_DIM, // height |
| 1u, // layers |
| }; |
| |
| framebuffer = createFramebuffer(vk, device, &framebufferParams); |
| |
| const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags; |
| 0u, // deUint32 vertexBindingDescriptionCount; |
| DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions; |
| 0u, // deUint32 vertexAttributeDescriptionCount; |
| DE_NULL // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions; |
| }; |
| |
| const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags; |
| (m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology; |
| VK_FALSE // VkBool32 primitiveRestartEnable; |
| }; |
| |
| const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags; |
| VK_FALSE, // VkBool32 depthClampEnable; |
| (m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE, // VkBool32 rasterizerDiscardEnable; |
| VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode; |
| VK_CULL_MODE_NONE, // VkCullModeFlags cullMode; |
| VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace; |
| VK_FALSE, // VkBool32 depthBiasEnable; |
| 0.0f, // float depthBiasConstantFactor; |
| 0.0f, // float depthBiasClamp; |
| 0.0f, // float depthBiasSlopeFactor; |
| 1.0f // float lineWidth; |
| }; |
| |
| const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType |
| DE_NULL, // const void* pNext |
| 0u, // VkPipelineMultisampleStateCreateFlags flags |
| VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples |
| VK_FALSE, // VkBool32 sampleShadingEnable |
| 1.0f, // float minSampleShading |
| DE_NULL, // const VkSampleMask* pSampleMask |
| VK_FALSE, // VkBool32 alphaToCoverageEnable |
| VK_FALSE // VkBool32 alphaToOneEnable |
| }; |
| |
| VkViewport viewport = makeViewport(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM); |
| VkRect2D scissor = makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM); |
| |
| const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType |
| DE_NULL, // const void* pNext |
| (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags |
| 1u, // deUint32 viewportCount |
| &viewport, // const VkViewport* pViewports |
| 1u, // deUint32 scissorCount |
| &scissor // const VkRect2D* pScissors |
| }; |
| |
| Move<VkShaderModule> fs; |
| Move<VkShaderModule> vs; |
| |
| deUint32 numStages; |
| if (m_data.stage == STAGE_VERTEX) |
| { |
| vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); |
| fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus |
| numStages = 1u; |
| } |
| else |
| { |
| vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0); |
| fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); |
| numStages = 2u; |
| } |
| |
| const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = { |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| DE_NULL, |
| (VkPipelineShaderStageCreateFlags)0, |
| VK_SHADER_STAGE_VERTEX_BIT, // stage |
| *vs, // shader |
| "main", |
| &specInfo, // pSpecializationInfo |
| }, |
| { |
| VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| DE_NULL, |
| (VkPipelineShaderStageCreateFlags)0, |
| VK_SHADER_STAGE_FRAGMENT_BIT, // stage |
| *fs, // shader |
| "main", |
| &specInfo, // pSpecializationInfo |
| } |
| }; |
| |
| const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo = |
| { |
| VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags; |
| numStages, // deUint32 stageCount; |
| &shaderCreateInfo[0], // const VkPipelineShaderStageCreateInfo* pStages; |
| &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState; |
| &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState; |
| DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState; |
| &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState; |
| &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState; |
| &multisampleStateCreateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState; |
| DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState; |
| DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState; |
| DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState; |
| pipelineLayout.get(), // VkPipelineLayout layout; |
| renderPass.get(), // VkRenderPass renderPass; |
| 0u, // deUint32 subpass; |
| DE_NULL, // VkPipeline basePipelineHandle; |
| 0 // int basePipelineIndex; |
| }; |
| |
| pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo); |
| } |
| |
| const VkQueue queue = m_context.getUniversalQueue(); |
| Move<VkCommandPool> cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, m_context.getUniversalQueueFamilyIndex()); |
| Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY); |
| |
| VkBufferDeviceAddressInfo addrInfo = |
| { |
| VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType; |
| DE_NULL, // const void* pNext; |
| 0, // VkBuffer buffer |
| }; |
| |
| VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u); |
| VkClearValue clearColor = makeClearValueColorU32(0,0,0,0); |
| |
| VkMemoryBarrier memBarrier = |
| { |
| VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType |
| DE_NULL, // pNext |
| 0u, // srcAccessMask |
| 0u, // dstAccessMask |
| }; |
| |
| const VkBufferCopy copyParams = |
| { |
| (VkDeviceSize)0u, // srcOffset |
| (VkDeviceSize)0u, // dstOffset |
| bufferSizes[2] // size |
| }; |
| |
| deUint32 NUM_SUBMITS = 4; |
| |
| for (deUint32 x = 0; x < NUM_SUBMITS; ++x) |
| { |
| beginCommandBuffer(vk, *cmdBuffer, 0u); |
| |
| if (x == 0) |
| vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0); |
| |
| for (deUint32 i = 0; i < 2; ++i) |
| { |
| if (!images[i]) |
| continue; |
| |
| const VkImageMemoryBarrier imageBarrier = |
| { |
| VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType |
| DE_NULL, // const void* pNext |
| 0u, // VkAccessFlags srcAccessMask |
| VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask |
| VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout |
| VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout |
| VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex |
| VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex |
| **images[i], // VkImage image |
| { |
| VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask |
| 0u, // uint32_t baseMipLevel |
| 1u, // uint32_t mipLevels, |
| 0u, // uint32_t baseArray |
| 1u, // uint32_t arraySize |
| } |
| }; |
| |
| vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| (VkDependencyFlags)0, |
| 0, (const VkMemoryBarrier*)DE_NULL, |
| 0, (const VkBufferMemoryBarrier*)DE_NULL, |
| 1, &imageBarrier); |
| } |
| |
| vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL); |
| vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline); |
| |
| if (m_data.payloadSC == SC_PHYSBUFFER) |
| { |
| const bool useKHR = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"); |
| addrInfo.buffer = **buffers[0]; |
| VkDeviceAddress addr; |
| if (useKHR) |
| addr = vk.getBufferDeviceAddress(device, &addrInfo); |
| else |
| addr = vk.getBufferDeviceAddressEXT(device, &addrInfo); |
| vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages, |
| 0, sizeof(VkDeviceSize), &addr); |
| } |
| if (m_data.guardSC == SC_PHYSBUFFER) |
| { |
| const bool useKHR = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"); |
| addrInfo.buffer = **buffers[1]; |
| VkDeviceAddress addr; |
| if (useKHR) |
| addr = vk.getBufferDeviceAddress(device, &addrInfo); |
| else |
| addr = vk.getBufferDeviceAddressEXT(device, &addrInfo); |
| vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages, |
| 8, sizeof(VkDeviceSize), &addr); |
| } |
| |
| for (deUint32 iters = 0; iters < 50; ++iters) |
| { |
| for (deUint32 i = 0; i < 2; ++i) |
| { |
| if (buffers[i]) |
| vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0); |
| if (images[i]) |
| vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range); |
| } |
| |
| memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; |
| memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; |
| vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages, |
| 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL); |
| |
| if (m_data.stage == STAGE_COMPUTE) |
| { |
| vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1); |
| } |
| else |
| { |
| beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, |
| makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM), |
| 0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE); |
| // Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing |
| if (m_data.stage == STAGE_VERTEX) |
| { |
| vk.cmdDraw(*cmdBuffer, DIM*DIM*NUM_WORKGROUP_EACH_DIM*NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u); |
| } |
| else |
| { |
| vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u); |
| } |
| endRenderPass(vk, *cmdBuffer); |
| } |
| |
| memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; |
| memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; |
| vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL); |
| } |
| |
| if (x == NUM_SUBMITS - 1) |
| { |
| vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, ©Params); |
| memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; |
| memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; |
| vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, |
| 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL); |
| } |
| |
| endCommandBuffer(vk, *cmdBuffer); |
| |
| submitCommandsAndWait(vk, device, queue, cmdBuffer.get()); |
| |
| vk.resetCommandBuffer(*cmdBuffer, 0x00000000); |
| } |
| |
| tcu::TestLog& log = m_context.getTestContext().getLog(); |
| |
| deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr(); |
| invalidateAlloc(vk, device, copyBuffer->getAllocation()); |
| qpTestResult res = QP_TEST_RESULT_PASS; |
| |
| deUint32 numErrors = 0; |
| for (deUint32 i = 0; i < NUM_INVOCATIONS; ++i) |
| { |
| if (ptr[i] != 0) |
| { |
| if (numErrors < 256) |
| { |
| log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage; |
| } |
| numErrors++; |
| res = QP_TEST_RESULT_FAIL; |
| } |
| } |
| |
| if (numErrors) |
| { |
| log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage; |
| } |
| |
| return tcu::TestStatus(res, qpGetTestResultName(res)); |
| } |
| |
| } // anonymous |
| |
| tcu::TestCaseGroup* createTests (tcu::TestContext& testCtx) |
| { |
| de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup( |
| testCtx, "memory_model", "Memory model tests")); |
| |
| typedef struct |
| { |
| deUint32 value; |
| const char* name; |
| const char* description; |
| } TestGroupCase; |
| |
| TestGroupCase ttCases[] = |
| { |
| { TT_MP, "message_passing", "message passing" }, |
| { TT_WAR, "write_after_read", "write after read" }, |
| }; |
| |
| TestGroupCase core11Cases[] = |
| { |
| { 1, "core11", "Supported by Vulkan1.1" }, |
| { 0, "ext", "Requires VK_KHR_vulkan_memory_model extension" }, |
| }; |
| |
| TestGroupCase dtCases[] = |
| { |
| { DATA_TYPE_UINT, "u32", "uint32_t atomics" }, |
| { DATA_TYPE_UINT64, "u64", "uint64_t atomics" }, |
| { DATA_TYPE_FLOAT32, "f32", "float32 atomics" }, |
| { DATA_TYPE_FLOAT64, "f64", "float64 atomics" }, |
| }; |
| |
| TestGroupCase cohCases[] = |
| { |
| { 1, "coherent", "coherent payload variable" }, |
| { 0, "noncoherent", "noncoherent payload variable" }, |
| }; |
| |
| TestGroupCase stCases[] = |
| { |
| { ST_FENCE_FENCE, "fence_fence", "release fence, acquire fence" }, |
| { ST_FENCE_ATOMIC, "fence_atomic", "release fence, atomic acquire" }, |
| { ST_ATOMIC_FENCE, "atomic_fence", "atomic release, acquire fence" }, |
| { ST_ATOMIC_ATOMIC, "atomic_atomic", "atomic release, atomic acquire" }, |
| { ST_CONTROL_BARRIER, "control_barrier", "control barrier" }, |
| { ST_CONTROL_AND_MEMORY_BARRIER, "control_and_memory_barrier", "control barrier with release/acquire" }, |
| }; |
| |
| TestGroupCase rmwCases[] = |
| { |
| { 0, "atomicwrite", "atomic write" }, |
| { 1, "atomicrmw", "atomic rmw" }, |
| }; |
| |
| TestGroupCase scopeCases[] = |
| { |
| { SCOPE_DEVICE, "device", "device scope" }, |
| { SCOPE_QUEUEFAMILY, "queuefamily", "queuefamily scope" }, |
| { SCOPE_WORKGROUP, "workgroup", "workgroup scope" }, |
| { SCOPE_SUBGROUP, "subgroup", "subgroup scope" }, |
| }; |
| |
| TestGroupCase plCases[] = |
| { |
| { 0, "payload_nonlocal", "payload variable in non-local memory" }, |
| { 1, "payload_local", "payload variable in local memory" }, |
| }; |
| |
| TestGroupCase pscCases[] = |
| { |
| { SC_BUFFER, "buffer", "payload variable in buffer memory" }, |
| { SC_IMAGE, "image", "payload variable in image memory" }, |
| { SC_WORKGROUP, "workgroup", "payload variable in workgroup memory" }, |
| { SC_PHYSBUFFER,"physbuffer", "payload variable in physical storage buffer memory" }, |
| }; |
| |
| TestGroupCase glCases[] = |
| { |
| { 0, "guard_nonlocal", "guard variable in non-local memory" }, |
| { 1, "guard_local", "guard variable in local memory" }, |
| }; |
| |
| TestGroupCase gscCases[] = |
| { |
| { SC_BUFFER, "buffer", "guard variable in buffer memory" }, |
| { SC_IMAGE, "image", "guard variable in image memory" }, |
| { SC_WORKGROUP, "workgroup", "guard variable in workgroup memory" }, |
| { SC_PHYSBUFFER,"physbuffer", "guard variable in physical storage buffer memory" }, |
| }; |
| |
| TestGroupCase stageCases[] = |
| { |
| { STAGE_COMPUTE, "comp", "compute shader" }, |
| { STAGE_VERTEX, "vert", "vertex shader" }, |
| { STAGE_FRAGMENT, "frag", "fragment shader" }, |
| }; |
| |
| |
| for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name, ttCases[ttNdx].description)); |
| for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name, core11Cases[core11Ndx].description)); |
| for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name, dtCases[dtNdx].description)); |
| for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description)); |
| for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description)); |
| for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> rmwGroup(new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name, rmwCases[rmwNdx].description)); |
| for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> scopeGroup(new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name, scopeCases[scopeNdx].description)); |
| for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description)); |
| for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description)); |
| for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description)); |
| for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description)); |
| for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++) |
| { |
| CaseDef c = |
| { |
| !!plCases[plNdx].value, // bool payloadMemLocal; |
| !!glCases[glNdx].value, // bool guardMemLocal; |
| !!cohCases[cohNdx].value, // bool coherent; |
| !!core11Cases[core11Ndx].value, // bool core11; |
| !!rmwCases[rmwNdx].value, // bool atomicRMW; |
| (TestType)ttCases[ttNdx].value, // TestType testType; |
| (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC; |
| (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC; |
| (Scope)scopeCases[scopeNdx].value, // Scope scope; |
| (SyncType)stCases[stNdx].value, // SyncType syncType; |
| (Stage)stageCases[stageNdx].value, // Stage stage; |
| (DataType)dtCases[dtNdx].value, // DataType dataType; |
| false, // bool transitive; |
| false, // bool transitiveVis; |
| }; |
| |
| // Mustpass11 tests should only exercise things we expect to work on |
| // existing implementations. Exclude noncoherent tests which require |
| // new extensions, and assume atomic synchronization wouldn't work |
| // (i.e. atomics may be implemented as relaxed atomics). Exclude |
| // queuefamily scope which doesn't exist in Vulkan 1.1. Exclude |
| // physical storage buffer which doesn't support the legacy decorations. |
| if (c.core11 && |
| (c.coherent == 0 || |
| c.syncType == ST_FENCE_ATOMIC || |
| c.syncType == ST_ATOMIC_FENCE || |
| c.syncType == ST_ATOMIC_ATOMIC || |
| c.dataType == DATA_TYPE_UINT64 || |
| c.dataType == DATA_TYPE_FLOAT64 || |
| c.scope == SCOPE_QUEUEFAMILY || |
| c.payloadSC == SC_PHYSBUFFER || |
| c.guardSC == SC_PHYSBUFFER)) |
| { |
| continue; |
| } |
| |
| if (c.stage != STAGE_COMPUTE && |
| c.scope == SCOPE_WORKGROUP) |
| { |
| continue; |
| } |
| |
| // Don't exercise local and non-local for workgroup memory |
| // Also don't exercise workgroup memory for non-compute stages |
| if (c.payloadSC == SC_WORKGROUP && (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE)) |
| { |
| continue; |
| } |
| if (c.guardSC == SC_WORKGROUP && (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE)) |
| { |
| continue; |
| } |
| // Can't do control barrier with larger than workgroup scope, or non-compute stages |
| if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) && |
| (c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY || c.stage != STAGE_COMPUTE)) |
| { |
| continue; |
| } |
| |
| // Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases |
| if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC) |
| { |
| continue; |
| } |
| |
| // uint64/float32/float64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC |
| const bool atomicTesting = (c.dataType == DATA_TYPE_UINT64 || c.dataType == DATA_TYPE_FLOAT32 || c.dataType == DATA_TYPE_FLOAT64); |
| if (atomicTesting && c.syncType != ST_ATOMIC_ATOMIC) |
| { |
| continue; |
| } |
| |
| // No 64-bit image types, so skip tests with both payload and guard in image memory |
| if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE && c.guardSC == SC_IMAGE) |
| { |
| continue; |
| } |
| |
| // No support for atomic operations on 64-bit floating point images |
| if (c.dataType == DATA_TYPE_FLOAT64 && (c.payloadSC == SC_IMAGE || c.guardSC == SC_IMAGE)) |
| { |
| continue; |
| } |
| // Control barrier tests don't use a guard variable, so only run them with gsc,gl==0 |
| if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) && |
| (c.guardSC != 0 || c.guardMemLocal != 0)) |
| { |
| continue; |
| } |
| |
| gscGroup->addChild(new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, stageCases[stageNdx].description, c)); |
| } |
| glGroup->addChild(gscGroup.release()); |
| } |
| pscGroup->addChild(glGroup.release()); |
| } |
| plGroup->addChild(pscGroup.release()); |
| } |
| scopeGroup->addChild(plGroup.release()); |
| } |
| rmwGroup->addChild(scopeGroup.release()); |
| } |
| stGroup->addChild(rmwGroup.release()); |
| } |
| cohGroup->addChild(stGroup.release()); |
| } |
| dtGroup->addChild(cohGroup.release()); |
| } |
| core11Group->addChild(dtGroup.release()); |
| } |
| ttGroup->addChild(core11Group.release()); |
| } |
| group->addChild(ttGroup.release()); |
| } |
| |
| TestGroupCase transVisCases[] = |
| { |
| { 0, "nontransvis", "destination invocation acquires" }, |
| { 1, "transvis", "invocation 0,0 acquires" }, |
| }; |
| |
| de::MovePtr<tcu::TestCaseGroup> transGroup(new tcu::TestCaseGroup(testCtx, "transitive", "transitive")); |
| for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description)); |
| for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description)); |
| for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description)); |
| for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description)); |
| for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description)); |
| for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++) |
| { |
| de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description)); |
| for (int visNdx = 0; visNdx < DE_LENGTH_OF_ARRAY(transVisCases); visNdx++) |
| { |
| CaseDef c = |
| { |
| !!plCases[plNdx].value, // bool payloadMemLocal; |
| !!glCases[glNdx].value, // bool guardMemLocal; |
| !!cohCases[cohNdx].value, // bool coherent; |
| false, // bool core11; |
| false, // bool atomicRMW; |
| TT_MP, // TestType testType; |
| (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC; |
| (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC; |
| SCOPE_DEVICE, // Scope scope; |
| (SyncType)stCases[stNdx].value, // SyncType syncType; |
| STAGE_COMPUTE, // Stage stage; |
| DATA_TYPE_UINT, // DataType dataType; |
| true, // bool transitive; |
| !!transVisCases[visNdx].value, // bool transitiveVis; |
| }; |
| if (c.payloadSC == SC_WORKGROUP || c.guardSC == SC_WORKGROUP) |
| { |
| continue; |
| } |
| if (c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) |
| { |
| continue; |
| } |
| gscGroup->addChild(new MemoryModelTestCase(testCtx, transVisCases[visNdx].name, transVisCases[visNdx].description, c)); |
| } |
| glGroup->addChild(gscGroup.release()); |
| } |
| pscGroup->addChild(glGroup.release()); |
| } |
| plGroup->addChild(pscGroup.release()); |
| } |
| stGroup->addChild(plGroup.release()); |
| } |
| cohGroup->addChild(stGroup.release()); |
| } |
| transGroup->addChild(cohGroup.release()); |
| } |
| group->addChild(transGroup.release()); |
| |
| // Padding tests. |
| group->addChild(createPaddingTests(testCtx)); |
| // Shared memory layout tests. |
| group->addChild(createSharedMemoryLayoutTests(testCtx)); |
| |
| return group.release(); |
| } |
| |
| } // MemoryModel |
| } // vkt |