blob: 9f7de051c9fca995c3baf6755da2b07313bfcfb4 [file] [log] [blame]
/*------------------------------------------------------------------------
* OpenGL Conformance Tests
* ------------------------
*
* Copyright (c) 2017-2019 The Khronos Group Inc.
* Copyright (c) 2017 Codeplay Software Ltd.
* Copyright (c) 2019 NVIDIA Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/ /*!
* \file
* \brief Subgroups Tests
*/ /*--------------------------------------------------------------------*/
#include "glcSubgroupsBasicTests.hpp"
#include "glcSubgroupsTestsUtils.hpp"
#include <string>
#include <vector>
#include "tcuStringTemplate.hpp"
using namespace tcu;
using namespace std;
namespace glc
{
namespace subgroups
{
namespace
{
static const deUint32 ELECTED_VALUE = 42u;
static const deUint32 UNELECTED_VALUE = 13u;
static const deUint64 SHADER_BUFFER_SIZE = 4096ull;
static bool checkFragmentSubgroupBarriersNoSSBO(std::vector<const void*> datas,
deUint32 width, deUint32 height, deUint32)
{
const float* const resultData = reinterpret_cast<const float*>(datas[0]);
for (deUint32 x = 0u; x < width; ++x)
{
for (deUint32 y = 0u; y < height; ++y)
{
const deUint32 ndx = (x * height + y) * 4u;
if (1.0f == resultData[ndx +2])
{
if(resultData[ndx] != resultData[ndx +1])
{
return false;
}
}
else if (resultData[ndx] != resultData[ndx +3])
{
return false;
}
}
}
return true;
}
static bool checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector<const void*> datas,
deUint32 width, deUint32)
{
const float* const resultData = reinterpret_cast<const float*>(datas[0]);
float poisonValuesFound = 0.0f;
float numSubgroupsUsed = 0.0f;
for (deUint32 x = 0; x < width; ++x)
{
deUint32 val = static_cast<deUint32>(resultData[x * 2]);
numSubgroupsUsed += resultData[x * 2 + 1];
switch (val)
{
default:
// some garbage value was found!
return false;
case UNELECTED_VALUE:
break;
case ELECTED_VALUE:
poisonValuesFound += 1.0f;
break;
}
}
return numSubgroupsUsed == poisonValuesFound;
}
static bool checkVertexPipelineStagesSubgroupElect(std::vector<const void*> datas,
deUint32 width, deUint32)
{
const deUint32* const resultData =
reinterpret_cast<const deUint32*>(datas[0]);
deUint32 poisonValuesFound = 0;
for (deUint32 x = 0; x < width; ++x)
{
deUint32 val = resultData[x];
switch (val)
{
default:
// some garbage value was found!
return false;
case UNELECTED_VALUE:
break;
case ELECTED_VALUE:
poisonValuesFound++;
break;
}
}
// we used an atomicly incremented counter to note how many subgroups we used for the vertex shader
const deUint32 numSubgroupsUsed =
*reinterpret_cast<const deUint32*>(datas[1]);
return numSubgroupsUsed == poisonValuesFound;
}
static bool checkVertexPipelineStagesSubgroupBarriers(std::vector<const void*> datas,
deUint32 width, deUint32)
{
const deUint32* const resultData = reinterpret_cast<const deUint32*>(datas[0]);
// We used this SSBO to generate our unique value!
const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[1]);
for (deUint32 x = 0; x < width; ++x)
{
deUint32 val = resultData[x];
if (val != ref)
return false;
}
return true;
}
static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector<const void*> datas,
deUint32 width, deUint32)
{
const float* const resultData = reinterpret_cast<const float*>(datas[0]);
for (deUint32 x = 0u; x < width; ++x)
{
const deUint32 ndx = x*4u;
if (1.0f == resultData[ndx +2])
{
if(resultData[ndx] != resultData[ndx +1])
return false;
}
else if (resultData[ndx] != resultData[ndx +3])
{
return false;
}
}
return true;
}
static bool checkTessellationEvaluationSubgroupBarriersNoSSBO(std::vector<const void*> datas,
deUint32 width, deUint32)
{
const float* const resultData = reinterpret_cast<const float*>(datas[0]);
for (deUint32 x = 0u; x < width; ++x)
{
const deUint32 ndx = x*4u;
if (0.0f == resultData[ndx +2] && resultData[ndx] != resultData[ndx +3])
{
return false;
}
}
return true;
}
static bool checkComputeSubgroupElect(std::vector<const void*> datas,
const deUint32 numWorkgroups[3], const deUint32 localSize[3],
deUint32)
{
return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
}
static bool checkComputeSubgroupBarriers(std::vector<const void*> datas,
const deUint32 numWorkgroups[3], const deUint32 localSize[3],
deUint32)
{
// We used this SSBO to generate our unique value!
const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[1]);
return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, ref);
}
enum OpType
{
OPTYPE_ELECT = 0,
OPTYPE_SUBGROUP_BARRIER,
OPTYPE_SUBGROUP_MEMORY_BARRIER,
OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER,
OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED,
OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE,
OPTYPE_LAST
};
std::string getOpTypeName(int opType)
{
switch (opType)
{
default:
DE_FATAL("Unsupported op type");
return "";
case OPTYPE_ELECT:
return "subgroupElect";
case OPTYPE_SUBGROUP_BARRIER:
return "subgroupBarrier";
case OPTYPE_SUBGROUP_MEMORY_BARRIER:
return "subgroupMemoryBarrier";
case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
return "subgroupMemoryBarrierBuffer";
case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
return "subgroupMemoryBarrierShared";
case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
return "subgroupMemoryBarrierImage";
}
}
struct CaseDefinition
{
int opType;
subgroups::ShaderStageFlags shaderStage;
};
void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
{
if(subgroups::SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
{
const string fragmentGLSL =
"${VERSION_DECL}\n"
"layout(location = 0) in highp vec4 in_color;\n"
"layout(location = 0) out highp vec4 out_color;\n"
"void main()\n"
"{\n"
" out_color = in_color;\n"
"}\n";
programCollection.add("fragment") << glu::FragmentSource(fragmentGLSL);
}
if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
{
const string vertexGLSL =
"${VERSION_DECL}\n"
"void main (void)\n"
"{\n"
" vec2 uv = vec2((gl_VertexID << 1) & 2, gl_VertexID & 2);\n"
" gl_Position = vec4(uv * 2.0f + -1.0f, 0.0f, 1.0f);\n"
" gl_PointSize = 1.0f;\n"
"}\n";
programCollection.add("vert") << glu::VertexSource(vertexGLSL);
}
else if (subgroups::SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
subgroups::setVertexShaderFrameBuffer(programCollection);
if (OPTYPE_ELECT == caseDef.opType)
{
std::ostringstream electedValue ;
std::ostringstream unelectedValue;
electedValue << ELECTED_VALUE;
unelectedValue << UNELECTED_VALUE;
if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
{
const string vertexGLSL =
"${VERSION_DECL}\n"
"#extension GL_KHR_shader_subgroup_basic: enable\n"
"layout(location = 0) out vec4 out_color;\n"
"layout(location = 0) in highp vec4 in_position;\n"
"\n"
"void main (void)\n"
"{\n"
" if (subgroupElect())\n"
" {\n"
" out_color.r = " + electedValue.str() + ".0f;\n"
" out_color.g = 1.0f;\n"
" }\n"
" else\n"
" {\n"
" out_color.r = " + unelectedValue.str() + ".0f;\n"
" out_color.g = 0.0f;\n"
" }\n"
" gl_Position = in_position;\n"
" gl_PointSize = 1.0f;\n"
"}\n";
programCollection.add("vert") << glu::VertexSource(vertexGLSL);
}
else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
{
const string geometryGLSL =
"${VERSION_DECL}\n"
"#extension GL_KHR_shader_subgroup_basic: enable\n"
"layout(points) in;\n"
"layout(points, max_vertices = 1) out;\n"
"layout(location = 0) out vec4 out_color;\n"
"void main (void)\n"
"{\n"
" if (subgroupElect())\n"
" {\n"
" out_color.r = " + electedValue.str() + ".0f;\n"
" out_color.g = 1.0f;\n"
" }\n"
" else\n"
" {\n"
" out_color.r = " + unelectedValue.str() + ".0f;\n"
" out_color.g = 0.0f;\n"
" }\n"
" gl_Position = gl_in[0].gl_Position;\n"
" EmitVertex();\n"
" EndPrimitive();\n"
"}\n";
programCollection.add("geometry") << glu::GeometrySource(geometryGLSL);
}
else if (subgroups::SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
{
const string controlSourceGLSL =
"${VERSION_DECL}\n"
"#extension GL_KHR_shader_subgroup_basic: enable\n"
"#extension GL_EXT_tessellation_shader : require\n"
"layout(vertices = 2) out;\n"
"void main (void)\n"
"{\n"
" if (gl_InvocationID == 0)\n"
" {\n"
" gl_TessLevelOuter[0] = 1.0f;\n"
" gl_TessLevelOuter[1] = 1.0f;\n"
" }\n"
" gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
"}\n";
programCollection.add("tesc") << glu::TessellationControlSource(controlSourceGLSL);
const string evaluationSourceGLSL =
"${VERSION_DECL}\n"
"#extension GL_KHR_shader_subgroup_basic: enable\n"
"#extension GL_EXT_tessellation_shader : require\n"
"layout(isolines, equal_spacing, ccw ) in;\n"
"layout(location = 0) out vec4 out_color;\n"
"\n"
"void main (void)\n"
"{\n"
" if (subgroupElect())\n"
" {\n"
" out_color.r = 2.0f * " + electedValue.str() + ".0f - " + unelectedValue.str() + ".0f;\n"
" out_color.g = 2.0f;\n"
" }\n"
" else\n"
" {\n"
" out_color.r = " + unelectedValue.str() + ".0f;\n"
" out_color.g = 0.0f;\n"
" }\n"
" gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
"}\n";
programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSourceGLSL);
}
else if (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
{
const string controlSourceGLSL =
"${VERSION_DECL}\n"
"#extension GL_KHR_shader_subgroup_basic: enable\n"
"#extension GL_EXT_tessellation_shader : require\n"
"layout(vertices = 2) out;\n"
"layout(location = 0) out vec4 out_color[];\n"
"void main (void)\n"
"{\n"
" if (gl_InvocationID == 0)\n"
" {\n"
" gl_TessLevelOuter[0] = 1.0f;\n"
" gl_TessLevelOuter[1] = 1.0f;\n"
" }\n"
" if (subgroupElect())\n"
" {\n"
" out_color[gl_InvocationID].r = " + electedValue.str() + ".0f;\n"
" out_color[gl_InvocationID].g = 1.0f;\n"
" }\n"
" else\n"
" {\n"
" out_color[gl_InvocationID].r = " + unelectedValue.str() + ".0f;\n"
" out_color[gl_InvocationID].g = 0.0f;\n"
" }\n"
" gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
"}\n";
programCollection.add("tesc") << glu::TessellationControlSource(controlSourceGLSL);
const string evaluationSourceGLSL =
"${VERSION_DECL}\n"
"#extension GL_KHR_shader_subgroup_ballot: enable\n"
"#extension GL_EXT_tessellation_shader : require\n"
"layout(isolines, equal_spacing, ccw ) in;\n"
"layout(location = 0) in vec4 in_color[];\n"
"layout(location = 0) out vec4 out_color;\n"
"\n"
"void main (void)\n"
"{\n"
" gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
" out_color = in_color[0];\n"
"}\n";
programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSourceGLSL);
}
else
{
DE_FATAL("Unsupported shader stage");
}
}
else
{
std::ostringstream bdy;
string color = (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage) ? "out_color[gl_InvocationID].b = 1.0f;\n" : "out_color.b = 1.0f;\n";
switch (caseDef.opType)
{
default:
DE_FATAL("Unhandled op type!");
break;
case OPTYPE_SUBGROUP_BARRIER:
case OPTYPE_SUBGROUP_MEMORY_BARRIER:
case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
{
bdy << " tempResult2 = tempBuffer[id];\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " tempResult = value;\n"
<< " " << color
<< " }\n"
<< " else\n"
<< " {\n"
<< " tempResult = tempBuffer[id];\n"
<< " }\n"
<< " " << getOpTypeName(caseDef.opType) << "();\n";
break;
}
case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
bdy << " tempResult2 = imageLoad(tempImage, ivec2(id, 0)).x;\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " tempResult = value;\n"
<< " " << color
<< " }\n"
<< " else\n"
<< " {\n"
<< " tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n"
<< " }\n"
<< " subgroupMemoryBarrierImage();\n";
break;
}
if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
{
std::ostringstream fragment;
fragment << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
<< "precision highp int;\n"
<< "layout(location = 0) out highp vec4 out_color;\n"
<< "\n"
<< "layout(binding = 0, std140) uniform Buffer1\n"
<< "{\n"
<< " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
<< "};\n"
<< "\n"
<< "layout(binding = 1, std140) uniform Buffer2\n"
<< "{\n"
<< " uint value;\n"
<< "};\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
<< "void main (void)\n"
<< "{\n"
<< " if (gl_HelperInvocation) return;\n"
<< " uint id = 0u;\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " id = uint(gl_FragCoord.x);\n"
<< " }\n"
<< " id = subgroupBroadcastFirst(id);\n"
<< " uint localId = id;\n"
<< " uint tempResult = 0u;\n"
<< " uint tempResult2 = 0u;\n"
<< " out_color.b = 0.0f;\n"
<< bdy.str()
<< " out_color.r = float(tempResult);\n"
<< " out_color.g = float(value);\n"
<< " out_color.a = float(tempResult2);\n"
<< "}\n";
programCollection.add("fragment") << glu::FragmentSource(fragment.str());
}
else if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
{
std::ostringstream vertex;
vertex << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
<<"\n"
<< "layout(location = 0) out vec4 out_color;\n"
<< "layout(location = 0) in highp vec4 in_position;\n"
<< "\n"
<< "layout(binding = 0, std140) uniform Buffer1\n"
<< "{\n"
<< " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
<< "};\n"
<< "\n"
<< "layout(binding = 1, std140) uniform Buffer2\n"
<< "{\n"
<< " uint value;\n"
<< "};\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
<< "void main (void)\n"
<< "{\n"
<< " uint id = 0u;\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " id = uint(gl_VertexID);\n"
<< " }\n"
<< " id = subgroupBroadcastFirst(id);\n"
<< " uint tempResult = 0u;\n"
<< " uint tempResult2 = 0u;\n"
<< " out_color.b = 0.0f;\n"
<< bdy.str()
<< " out_color.r = float(tempResult);\n"
<< " out_color.g = float(value);\n"
<< " out_color.a = float(tempResult2);\n"
<< " gl_Position = in_position;\n"
<< " gl_PointSize = 1.0f;\n"
<< "}\n";
programCollection.add("vert") << glu::VertexSource(vertex.str());
}
else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
{
std::ostringstream geometry;
geometry << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "layout(points) in;\n"
<< "layout(points, max_vertices = 1) out;\n"
<< "layout(location = 0) out vec4 out_color;\n"
<< "layout(binding = 0, std140) uniform Buffer1\n"
<< "{\n"
<< " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
<< "};\n"
<< "\n"
<< "layout(binding = 1, std140) uniform Buffer2\n"
<< "{\n"
<< " uint value;\n"
<< "};\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
<< "void main (void)\n"
<< "{\n"
<< " uint id = 0u;\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " id = uint(gl_InvocationID);\n"
<< " }\n"
<< " id = subgroupBroadcastFirst(id);\n"
<< " uint tempResult = 0u;\n"
<< " uint tempResult2 = 0u;\n"
<< " out_color.b = 0.0f;\n"
<< bdy.str()
<< " out_color.r = float(tempResult);\n"
<< " out_color.g = float(value);\n"
<< " out_color.a = float(tempResult2);\n"
<< " gl_Position = gl_in[0].gl_Position;\n"
<< " EmitVertex();\n"
<< " EndPrimitive();\n"
<< "}\n";
programCollection.add("geometry") << glu::GeometrySource(geometry.str());
}
else if (subgroups::SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
{
std::ostringstream controlSource;
std::ostringstream evaluationSource;
controlSource << "${VERSION_DECL}\n"
<< "#extension GL_EXT_tessellation_shader : require\n"
<< "layout(vertices = 2) out;\n"
<< "void main (void)\n"
<< "{\n"
<< " if (gl_InvocationID == 0)\n"
<<" {\n"
<< " gl_TessLevelOuter[0] = 1.0f;\n"
<< " gl_TessLevelOuter[1] = 1.0f;\n"
<< " }\n"
<< " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
<< "}\n";
evaluationSource << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
<< "#extension GL_EXT_tessellation_shader : require\n"
<< "layout(isolines, equal_spacing, ccw ) in;\n"
<< "layout(location = 0) out vec4 out_color;\n"
<< "layout(binding = 0, std140) uniform Buffer1\n"
<< "{\n"
<< " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
<< "};\n"
<< "\n"
<< "layout(binding = 1, std140) uniform Buffer2\n"
<< "{\n"
<< " uint value;\n"
<< "};\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
<< "void main (void)\n"
<< "{\n"
<< " uint id = 0u;\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " id = uint(gl_PrimitiveID);\n"
<< " }\n"
<< " id = subgroupBroadcastFirst(id);\n"
<< " uint tempResult = 0u;\n"
<< " uint tempResult2 = 0u;\n"
<< " out_color.b = 0.0f;\n"
<< bdy.str()
<< " out_color.r = float(tempResult);\n"
<< " out_color.g = float(value);\n"
<< " out_color.a = float(tempResult2);\n"
<< " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
<< "}\n";
programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
}
else if (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
{
std::ostringstream controlSource;
std::ostringstream evaluationSource;
controlSource << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
<< "#extension GL_EXT_tessellation_shader : require\n"
<< "layout(vertices = 2) out;\n"
<< "layout(location = 0) out vec4 out_color[];\n"
<< "layout(binding = 0, std140) uniform Buffer1\n"
<< "{\n"
<< " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
<< "};\n"
<< "\n"
<< "layout(binding = 1, std140) uniform Buffer2\n"
<< "{\n"
<< " uint value;\n"
<< "};\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
<< "void main (void)\n"
<< "{\n"
<< " uint id = 0u;\n"
<< " if (gl_InvocationID == 0)\n"
<<" {\n"
<< " gl_TessLevelOuter[0] = 1.0f;\n"
<< " gl_TessLevelOuter[1] = 1.0f;\n"
<< " }\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " id = uint(gl_InvocationID);\n"
<< " }\n"
<< " id = subgroupBroadcastFirst(id);\n"
<< " uint tempResult = 0u;\n"
<< " uint tempResult2 = 0u;\n"
<< " out_color[gl_InvocationID].b = 0.0f;\n"
<< bdy.str()
<< " out_color[gl_InvocationID].r = float(tempResult);\n"
<< " out_color[gl_InvocationID].g = float(value);\n"
<< " out_color[gl_InvocationID].a = float(tempResult2);\n"
<< " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
<< "}\n";
evaluationSource << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
<< "#extension GL_EXT_tessellation_shader : require\n"
<< "layout(isolines, equal_spacing, ccw ) in;\n"
<< "layout(location = 0) in vec4 in_color[];\n"
<< "layout(location = 0) out vec4 out_color;\n"
<< "\n"
<< "void main (void)\n"
<< "{\n"
<< " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
<< " out_color = in_color[0];\n"
<< "}\n";
programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
}
else
{
DE_FATAL("Unsupported shader stage");
}
}
}
void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
{
if (OPTYPE_ELECT == caseDef.opType)
{
if (subgroups::SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
{
std::ostringstream src;
src << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
<< "layout(binding = 0, std430) buffer Buffer1\n"
<< "{\n"
<< " uint result[];\n"
<< "};\n"
<< "\n"
<< subgroups::getSharedMemoryBallotHelper()
<< "void main (void)\n"
<< "{\n"
<< " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
<< " highp uint offset = globalSize.x * ((globalSize.y * "
"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
"gl_GlobalInvocationID.x;\n"
<< " uint value = " << UNELECTED_VALUE << "u;\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " value = " << ELECTED_VALUE << "u;\n"
<< " }\n"
<< " uvec4 bits = uvec4(bitCount(sharedMemoryBallot(value == " << ELECTED_VALUE << "u)));\n"
<< " result[offset] = bits.x + bits.y + bits.z + bits.w;\n"
<< "}\n";
programCollection.add("comp") << glu::ComputeSource(src.str());
}
else
{
{
std::ostringstream vertex;
vertex << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "layout(binding = 0, std430) buffer Buffer0\n"
<< "{\n"
<< " uint result[];\n"
<< "} b0;\n"
<< "layout(binding = 4, std430) buffer Buffer4\n"
<< "{\n"
<< " uint numSubgroupsExecuted;\n"
<< "} b4;\n"
<< "\n"
<< "void main (void)\n"
<< "{\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " b0.result[gl_VertexID] = " << ELECTED_VALUE << "u;\n"
<< " atomicAdd(b4.numSubgroupsExecuted, 1u);\n"
<< " }\n"
<< " else\n"
<< " {\n"
<< " b0.result[gl_VertexID] = " << UNELECTED_VALUE << "u;\n"
<< " }\n"
<< " float pixelSize = 2.0f/1024.0f;\n"
<< " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
<< " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
<< " gl_PointSize = 1.0f;\n"
<< "}\n";
programCollection.add("vert") << glu::VertexSource(vertex.str());
}
{
std::ostringstream tesc;
tesc << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "layout(vertices=1) out;\n"
<< "layout(binding = 1, std430) buffer Buffer1\n"
<< "{\n"
<< " uint result[];\n"
<< "} b1;\n"
<< "layout(binding = 5, std430) buffer Buffer5\n"
<< "{\n"
<< " uint numSubgroupsExecuted;\n"
<< "} b5;\n"
<< "\n"
<< "void main (void)\n"
<< "{\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " b1.result[gl_PrimitiveID] = " << ELECTED_VALUE << "u;\n"
<< " atomicAdd(b5.numSubgroupsExecuted, 1u);\n"
<< " }\n"
<< " else\n"
<< " {\n"
<< " b1.result[gl_PrimitiveID] = " << UNELECTED_VALUE << "u;\n"
<< " }\n"
<< " if (gl_InvocationID == 0)\n"
<< " {\n"
<< " gl_TessLevelOuter[0] = 1.0f;\n"
<< " gl_TessLevelOuter[1] = 1.0f;\n"
<< " }\n"
<< " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
<< "}\n";
programCollection.add("tesc") << glu::TessellationControlSource(tesc.str());
}
{
std::ostringstream tese;
tese << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "layout(isolines) in;\n"
<< "layout(binding = 2, std430) buffer Buffer2\n"
<< "{\n"
<< " uint result[];\n"
<< "} b2;\n"
<< "layout(binding = 6, std430) buffer Buffer6\n"
<< "{\n"
<< " uint numSubgroupsExecuted;\n"
<< "} b6;\n"
<< "\n"
<< "void main (void)\n"
<< "{\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = " << ELECTED_VALUE << "u;\n"
<< " atomicAdd(b6.numSubgroupsExecuted, 1u);\n"
<< " }\n"
<< " else\n"
<< " {\n"
<< " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = " << UNELECTED_VALUE << "u;\n"
<< " }\n"
<< " float pixelSize = 2.0f/1024.0f;\n"
<< " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
<< "}\n";
programCollection.add("tese") << glu::TessellationEvaluationSource(tese.str());
}
{
std::ostringstream geometry;
geometry << "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "layout(${TOPOLOGY}) in;\n"
<< "layout(points, max_vertices = 1) out;\n"
<< "layout(binding = 3, std430) buffer Buffer3\n"
<< "{\n"
<< " uint result[];\n"
<< "} b3;\n"
<< "layout(binding = 7, std430) buffer Buffer7\n"
<< "{\n"
<< " uint numSubgroupsExecuted;\n"
<< "} b7;\n"
<< "\n"
<< "void main (void)\n"
<< "{\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " b3.result[gl_PrimitiveIDIn] = " << ELECTED_VALUE << "u;\n"
<< " atomicAdd(b7.numSubgroupsExecuted, 1u);\n"
<< " }\n"
<< " else\n"
<< " {\n"
<< " b3.result[gl_PrimitiveIDIn] = " << UNELECTED_VALUE << "u;\n"
<< " }\n"
<< " gl_Position = gl_in[0].gl_Position;\n"
<< " EmitVertex();\n"
<< " EndPrimitive();\n"
<< "}\n";
subgroups::addGeometryShadersFromTemplate(geometry.str(), programCollection);
}
{
std::ostringstream fragment;
fragment << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "precision highp int;\n"
<< "layout(location = 0) out uint data;\n"
<< "layout(binding = 8, std430) buffer Buffer8\n"
<< "{\n"
<< " uint numSubgroupsExecuted;\n"
<< "} b8;\n"
<< "void main (void)\n"
<< "{\n"
<< " if (gl_HelperInvocation) return;\n"
<< " if (subgroupElect())\n"
<< " {\n"
<< " data = " << ELECTED_VALUE << "u;\n"
<< " atomicAdd(b8.numSubgroupsExecuted, 1u);\n"
<< " }\n"
<< " else\n"
<< " {\n"
<< " data = " << UNELECTED_VALUE << "u;\n"
<< " }\n"
<< "}\n";
programCollection.add("fragment") << glu::FragmentSource(fragment.str());
}
subgroups::addNoSubgroupShader(programCollection);
}
}
else
{
std::ostringstream bdy;
switch (caseDef.opType)
{
default:
DE_FATAL("Unhandled op type!");
break;
case OPTYPE_SUBGROUP_BARRIER:
case OPTYPE_SUBGROUP_MEMORY_BARRIER:
case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
bdy << " if (subgroupElect())\n"
<< " {\n"
<< " b${SSBO1}.tempBuffer[id] = b${SSBO1}.value;\n"
<< " }\n"
<< " " << getOpTypeName(caseDef.opType) << "();\n"
<< " tempResult = b${SSBO1}.tempBuffer[id];\n";
break;
case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
bdy << " if (subgroupElect())\n"
<< " {\n"
<< " tempShared[localId] = b${SSBO1}.value;\n"
<< " }\n"
<< " subgroupMemoryBarrierShared();\n"
<< " tempResult = tempShared[localId];\n";
break;
case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
bdy << " if (subgroupElect())\n"
<< " {\n"
<< " imageStore(tempImage${IMG1}, ivec2(id, 0), uvec4(b${SSBO1}.value));\n"
<< " }\n"
<< " subgroupMemoryBarrierImage();\n"
<< " tempResult = imageLoad(tempImage${IMG1}, ivec2(id, 0)).x;\n";
break;
}
tcu::StringTemplate bdyTemplate(bdy.str());
if (subgroups::SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
{
std::ostringstream src;
map<string, string> bufferNameMapping;
bufferNameMapping.insert(pair<string, string>("SSBO1", "1"));
bufferNameMapping.insert(pair<string, string>("IMG1", "0"));
src << "${VERSION_DECL}\n"
<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
<< "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
<< "layout(binding = 0, std430) buffer Buffer0\n"
<< "{\n"
<< " uint result[];\n"
<< "} b0;\n"
<< "layout(binding = 1, std430) buffer Buffer1\n"
<< "{\n"
<< " uint value;\n"
<< " uint tempBuffer[];\n"
<< "} b1;\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) uniform highp uimage2D tempImage0;\n" : "\n")
<< "shared uint tempShared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
<< "\n"
<< "void main (void)\n"
<< "{\n"
<< " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
<< " highp uint offset = globalSize.x * ((globalSize.y * "
"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
"gl_GlobalInvocationID.x;\n"
<< " uint localId = gl_SubgroupID;\n"
<< " uint id = globalSize.x * ((globalSize.y * "
"gl_WorkGroupID.z) + gl_WorkGroupID.y) + "
"gl_WorkGroupID.x + localId;\n"
<< " uint tempResult = 0u;\n"
<< bdyTemplate.specialize(bufferNameMapping)
<< " b0.result[offset] = tempResult;\n"
<< "}\n";
programCollection.add("comp") << glu::ComputeSource(src.str());
}
else
{
{
map<string, string> bufferNameMapping;
bufferNameMapping.insert(pair<string, string>("SSBO1", "4"));
bufferNameMapping.insert(pair<string, string>("IMG1", "0"));
std::ostringstream vertex;
vertex <<
"${VERSION_DECL}\n"
"#extension GL_KHR_shader_subgroup_basic: enable\n"
"#extension GL_KHR_shader_subgroup_ballot: enable\n"
"layout(binding = 0, std430) buffer Buffer0\n"
"{\n"
" uint result[];\n"
"} b0;\n"
"layout(binding = 4, std430) buffer Buffer4\n"
"{\n"
" uint value;\n"
" uint tempBuffer[];\n"
"} b4;\n"
"layout(binding = 5, std430) buffer Buffer5\n"
"{\n"
" uint subgroupID;\n"
"} b5;\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) uniform highp uimage2D tempImage0;\n" : "")
<< "void main (void)\n"
"{\n"
" uint id = 0u;\n"
" if (subgroupElect())\n"
" {\n"
" id = atomicAdd(b5.subgroupID, 1u);\n"
" }\n"
" id = subgroupBroadcastFirst(id);\n"
" uint localId = id;\n"
" uint tempResult = 0u;\n"
+ bdyTemplate.specialize(bufferNameMapping) +
" b0.result[gl_VertexID] = tempResult;\n"
" float pixelSize = 2.0f/1024.0f;\n"
" float pixelPosition = pixelSize/2.0f - 1.0f;\n"
" gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
" gl_PointSize = 1.0f;\n"
"}\n";
programCollection.add("vert") << glu::VertexSource(vertex.str());
}
{
map<string, string> bufferNameMapping;
bufferNameMapping.insert(pair<string, string>("SSBO1", "6"));
bufferNameMapping.insert(pair<string, string>("IMG1", "1"));
std::ostringstream tesc;
tesc <<
"${VERSION_DECL}\n"
"#extension GL_KHR_shader_subgroup_basic: enable\n"
"#extension GL_KHR_shader_subgroup_ballot: enable\n"
"layout(vertices=1) out;\n"
"layout(binding = 1, std430) buffer Buffer1\n"
"{\n"
" uint result[];\n"
"} b1;\n"
"layout(binding = 6, std430) buffer Buffer6\n"
"{\n"
" uint value;\n"
" uint tempBuffer[];\n"
"} b6;\n"
"layout(binding = 7, std430) buffer Buffer7\n"
"{\n"
" uint subgroupID;\n"
"} b7;\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 1, r32ui) uniform highp uimage2D tempImage1;\n" : "")
<< "void main (void)\n"
"{\n"
" uint id = 0u;\n"
" if (subgroupElect())\n"
" {\n"
" id = atomicAdd(b7.subgroupID, 1u);\n"
" }\n"
" id = subgroupBroadcastFirst(id);\n"
" uint localId = id;\n"
" uint tempResult = 0u;\n"
+ bdyTemplate.specialize(bufferNameMapping) +
" b1.result[gl_PrimitiveID] = tempResult;\n"
" if (gl_InvocationID == 0)\n"
" {\n"
" gl_TessLevelOuter[0] = 1.0f;\n"
" gl_TessLevelOuter[1] = 1.0f;\n"
" }\n"
" gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
"}\n";
programCollection.add("tesc") << glu::TessellationControlSource(tesc.str());
}
{
map<string, string> bufferNameMapping;
bufferNameMapping.insert(pair<string, string>("SSBO1", "8"));
bufferNameMapping.insert(pair<string, string>("IMG1", "2"));
std::ostringstream tese;
tese <<
"${VERSION_DECL}\n"
"#extension GL_KHR_shader_subgroup_basic: enable\n"
"#extension GL_KHR_shader_subgroup_ballot: enable\n"
"layout(isolines) in;\n"
"layout(binding = 2, std430) buffer Buffer2\n"
"{\n"
" uint result[];\n"
"} b2;\n"
"layout(binding = 8, std430) buffer Buffer8\n"
"{\n"
" uint value;\n"
" uint tempBuffer[];\n"
"} b8;\n"
"layout(binding = 9, std430) buffer Buffer9\n"
"{\n"
" uint subgroupID;\n"
"} b9;\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 2, r32ui) uniform highp uimage2D tempImage2;\n" : "")
<< "void main (void)\n"
"{\n"
" uint id = 0u;\n"
" if (subgroupElect())\n"
" {\n"
" id = atomicAdd(b9.subgroupID, 1u);\n"
" }\n"
" id = subgroupBroadcastFirst(id);\n"
" uint localId = id;\n"
" uint tempResult = 0u;\n"
+ bdyTemplate.specialize(bufferNameMapping) +
" b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = tempResult;\n"
" float pixelSize = 2.0f/1024.0f;\n"" gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
"}\n";
programCollection.add("tese") << glu::TessellationEvaluationSource(tese.str());
}
{
map<string, string> bufferNameMapping;
bufferNameMapping.insert(pair<string, string>("SSBO1", "10"));
bufferNameMapping.insert(pair<string, string>("IMG1", "3"));
std::ostringstream geometry;
geometry <<
"#extension GL_KHR_shader_subgroup_basic: enable\n"
"#extension GL_KHR_shader_subgroup_ballot: enable\n"
"layout(${TOPOLOGY}) in;\n"
"layout(points, max_vertices = 1) out;\n"
"layout(binding = 3, std430) buffer Buffer3\n"
"{\n"
" uint result[];\n"
"} b3;\n"
"layout(binding = 10, std430) buffer Buffer10\n"
"{\n"
" uint value;\n"
" uint tempBuffer[];\n"
"} b10;\n"
"layout(binding = 11, std430) buffer Buffer11\n"
"{\n"
" uint subgroupID;\n"
"} b11;\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 3, r32ui) uniform highp uimage2D tempImage3;\n" : "")
<< "void main (void)\n"
"{\n"
" uint id = 0u;\n"
" if (subgroupElect())\n"
" {\n"
" id = atomicAdd(b11.subgroupID, 1u);\n"
" }\n"
" id = subgroupBroadcastFirst(id);\n"
" uint localId = id;\n"
" uint tempResult = 0u;\n"
+ bdyTemplate.specialize(bufferNameMapping) +
" b3.result[gl_PrimitiveIDIn] = tempResult;\n"
" gl_Position = gl_in[0].gl_Position;\n"
" EmitVertex();\n"
" EndPrimitive();\n"
"}\n";
subgroups::addGeometryShadersFromTemplate(geometry.str(), programCollection);
}
{
map<string, string> bufferNameMapping;
bufferNameMapping.insert(pair<string, string>("SSBO1", "12"));
bufferNameMapping.insert(pair<string, string>("IMG1", "4"));
std::ostringstream fragment;
fragment <<
"${VERSION_DECL}\n"
"#extension GL_KHR_shader_subgroup_basic: enable\n"
"#extension GL_KHR_shader_subgroup_ballot: enable\n"
"precision highp int;\n"
"layout(location = 0) out uint result;\n"
"layout(binding = 12, std430) buffer Buffer12\n"
"{\n"
" uint value;\n"
" uint tempBuffer[];\n"
"} b12;\n"
"layout(binding = 13, std430) buffer Buffer13\n"
"{\n"
" uint subgroupID;\n"
"} b13;\n"
<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 4, r32ui) uniform highp uimage2D tempImage4;\n" : "")
<< "void main (void)\n"
"{\n"
" if (gl_HelperInvocation) return;\n"
" uint id = 0u;\n"
" if (subgroupElect())\n"
" {\n"
" id = atomicAdd(b13.subgroupID, 1u);\n"
" }\n"
" id = subgroupBroadcastFirst(id);\n"
" uint localId = id;\n"
" uint tempResult = 0u;\n"
+ bdyTemplate.specialize(bufferNameMapping) +
" result = tempResult;\n"
"}\n";
programCollection.add("fragment") << glu::FragmentSource(fragment.str());
}
subgroups::addNoSubgroupShader(programCollection);
}
}
}
void supportedCheck (Context& context, CaseDefinition caseDef)
{
DE_UNREF(caseDef);
if (!subgroups::isSubgroupSupported(context))
TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
}
tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
{
if (!subgroups::areSubgroupOperationsSupportedForStage(
context, caseDef.shaderStage))
{
if (subgroups::areSubgroupOperationsRequiredForStage(
caseDef.shaderStage))
{
return tcu::TestStatus::fail(
"Shader stage " +
subgroups::getShaderStageName(caseDef.shaderStage) +
" is required to support subgroup operations!");
}
else
{
TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
}
}
if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BASIC_BIT))
{
return tcu::TestStatus::fail(
"Subgroup feature " +
subgroups::getSubgroupFeatureName(subgroups::SUBGROUP_FEATURE_BASIC_BIT) +
" is a required capability!");
}
if (OPTYPE_ELECT != caseDef.opType && subgroups::SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
{
if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BALLOT_BIT))
{
TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
}
}
if (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType)
{
if (!subgroups::isImageSupportedForStageOnDevice(context, caseDef.shaderStage))
{
TCU_THROW(NotSupportedError, "Subgroup basic memory barrier image test for " +
subgroups::getShaderStageName(caseDef.shaderStage) +
" stage requires that image uniforms be supported on this stage");
}
}
const deUint32 inputDatasCount = OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? 3u : 2u;
std::vector<subgroups::SSBOData> inputDatas (inputDatasCount);
inputDatas[0].format = subgroups::FORMAT_R32_UINT;
inputDatas[0].layout = subgroups::SSBOData::LayoutStd140;
inputDatas[0].numElements = SHADER_BUFFER_SIZE/4ull;
inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
inputDatas[0].binding = 0u;
inputDatas[1].format = subgroups::FORMAT_R32_UINT;
inputDatas[1].layout = subgroups::SSBOData::LayoutStd140;
inputDatas[1].numElements = 1ull;
inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
inputDatas[1].binding = 1u;
if(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType )
{
inputDatas[2].format = subgroups::FORMAT_R32_UINT;
inputDatas[2].layout = subgroups::SSBOData::LayoutPacked;
inputDatas[2].numElements = SHADER_BUFFER_SIZE;
inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
inputDatas[2].isImage = true;
inputDatas[2].binding = 0u;
}
if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
{
if (OPTYPE_ELECT == caseDef.opType)
return subgroups::makeVertexFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
else
return subgroups::makeVertexFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO);
}
else if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
{
return subgroups::makeFragmentFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkFragmentSubgroupBarriersNoSSBO);
}
else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
{
if (OPTYPE_ELECT == caseDef.opType)
return subgroups::makeGeometryFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
else
return subgroups::makeGeometryFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO);
}
if (OPTYPE_ELECT == caseDef.opType)
return subgroups::makeTessellationEvaluationFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO, caseDef.shaderStage);
return subgroups::makeTessellationEvaluationFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount,
(subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)? checkVertexPipelineStagesSubgroupBarriersNoSSBO : checkTessellationEvaluationSubgroupBarriersNoSSBO,
caseDef.shaderStage);
}
tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
{
if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BASIC_BIT))
{
return tcu::TestStatus::fail(
"Subgroup feature " +
subgroups::getSubgroupFeatureName(subgroups::SUBGROUP_FEATURE_BASIC_BIT) +
" is a required capability!");
}
if (OPTYPE_ELECT != caseDef.opType && subgroups::SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
{
if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BALLOT_BIT))
{
TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
}
}
if (subgroups::SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
{
if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
{
return tcu::TestStatus::fail("Shader stage " +
subgroups::getShaderStageName(caseDef.shaderStage) +
" is required to support subgroup operations!");
}
if (OPTYPE_ELECT == caseDef.opType)
{
return subgroups::makeComputeTest(context, subgroups::FORMAT_R32_UINT, DE_NULL, 0, checkComputeSubgroupElect);
}
else
{
const deUint32 inputDatasCount = 2;
subgroups::SSBOData inputDatas[inputDatasCount];
inputDatas[0].format = subgroups::FORMAT_R32_UINT;
inputDatas[0].layout = subgroups::SSBOData::LayoutStd430;
inputDatas[0].numElements = 1 + SHADER_BUFFER_SIZE;
inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
inputDatas[0].binding = 1u;
inputDatas[1].format = subgroups::FORMAT_R32_UINT;
inputDatas[1].layout = subgroups::SSBOData::LayoutPacked;
inputDatas[1].numElements = SHADER_BUFFER_SIZE;
inputDatas[1].initializeType = subgroups::SSBOData::InitializeNone;
inputDatas[1].isImage = true;
inputDatas[1].binding = 0u;
return subgroups::makeComputeTest(context, subgroups::FORMAT_R32_UINT, inputDatas, inputDatasCount, checkComputeSubgroupBarriers);
}
}
else
{
if (!subgroups::isFragmentSSBOSupportedForDevice(context))
{
TCU_THROW(NotSupportedError, "Subgroup basic operation require that the fragment stage be able to write to SSBOs!");
}
int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR);
int combinedSSBOs = context.getDeqpContext().getContextInfo().getInt(GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS);
subgroups::ShaderStageFlags stages = (subgroups::ShaderStageFlags)(caseDef.shaderStage & supportedStages);
if ( subgroups::SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
{
if ( (stages & subgroups::SHADER_STAGE_FRAGMENT_BIT) == 0)
TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
else
stages = subgroups::SHADER_STAGE_FRAGMENT_BIT;
}
if ((subgroups::ShaderStageFlags)0u == stages)
TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
// with sufficient effort we could dynamically assign the binding points
// based on the number of stages actually supported, etc, but we already
// have the framebuffer tests which cover those cases, so there doesn't seem
// to be much benefit in doing that right now.
if (combinedSSBOs < 14)
TCU_THROW(NotSupportedError, "Device does not support enough combined SSBOs for this test (14)");
if (OPTYPE_ELECT == caseDef.opType)
{
const deUint32 inputCount = 5u;
subgroups::SSBOData inputData[inputCount];
inputData[0].format = subgroups::FORMAT_R32_UINT;
inputData[0].layout = subgroups::SSBOData::LayoutStd430;
inputData[0].numElements = 1;
inputData[0].initializeType = subgroups::SSBOData::InitializeZero;
inputData[0].binding = 4u;
inputData[0].stages = subgroups::SHADER_STAGE_VERTEX_BIT;
inputData[1].format = subgroups::FORMAT_R32_UINT;
inputData[1].layout = subgroups::SSBOData::LayoutStd430;
inputData[1].numElements = 1;
inputData[1].initializeType = subgroups::SSBOData::InitializeZero;
inputData[1].binding = 5u;
inputData[1].stages = subgroups::SHADER_STAGE_TESS_CONTROL_BIT;
inputData[2].format = subgroups::FORMAT_R32_UINT;
inputData[2].layout = subgroups::SSBOData::LayoutStd430;
inputData[2].numElements = 1;
inputData[2].initializeType = subgroups::SSBOData::InitializeZero;
inputData[2].binding = 6u;
inputData[2].stages = subgroups::SHADER_STAGE_TESS_EVALUATION_BIT;
inputData[3].format = subgroups::FORMAT_R32_UINT;
inputData[3].layout = subgroups::SSBOData::LayoutStd430;
inputData[3].numElements = 1;
inputData[3].initializeType = subgroups::SSBOData::InitializeZero;
inputData[3].binding = 7u;
inputData[3].stages = subgroups::SHADER_STAGE_GEOMETRY_BIT;
inputData[4].format = subgroups::FORMAT_R32_UINT;
inputData[4].layout = subgroups::SSBOData::LayoutStd430;
inputData[4].numElements = 1;
inputData[4].initializeType = subgroups::SSBOData::InitializeZero;
inputData[4].binding = 8u;
inputData[4].stages = subgroups::SHADER_STAGE_FRAGMENT_BIT;
return subgroups::allStages(context, subgroups::FORMAT_R32_UINT, inputData, inputCount, checkVertexPipelineStagesSubgroupElect, stages);
}
else
{
const subgroups::ShaderStageFlags stagesBits[] =
{
subgroups::SHADER_STAGE_VERTEX_BIT,
subgroups::SHADER_STAGE_TESS_CONTROL_BIT,
subgroups::SHADER_STAGE_TESS_EVALUATION_BIT,
subgroups::SHADER_STAGE_GEOMETRY_BIT,
subgroups::SHADER_STAGE_FRAGMENT_BIT,
};
const deUint32 inputDatasCount = DE_LENGTH_OF_ARRAY(stagesBits) * 3u;
subgroups::SSBOData inputDatas[inputDatasCount];
for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(stagesBits); ++ndx)
{
const deUint32 index = ndx*3;
const deUint32 ssboIndex = ndx*2;
const deUint32 imgIndex = ndx;
inputDatas[index].format = subgroups::FORMAT_R32_UINT;
inputDatas[index].layout = subgroups::SSBOData::LayoutStd430;
inputDatas[index].numElements = 1 + SHADER_BUFFER_SIZE;
inputDatas[index].initializeType = subgroups::SSBOData::InitializeNonZero;
inputDatas[index].binding = ssboIndex + 4u;
inputDatas[index].stages = stagesBits[ndx];
inputDatas[index + 1].format = subgroups::FORMAT_R32_UINT;
inputDatas[index + 1].layout = subgroups::SSBOData::LayoutStd430;
inputDatas[index + 1].numElements = 1;
inputDatas[index + 1].initializeType = subgroups::SSBOData::InitializeZero;
inputDatas[index + 1].binding = ssboIndex + 5u;
inputDatas[index + 1].stages = stagesBits[ndx];
inputDatas[index + 2].format = subgroups::FORMAT_R32_UINT;
inputDatas[index + 2].layout = subgroups::SSBOData::LayoutPacked;
inputDatas[index + 2].numElements = SHADER_BUFFER_SIZE;
inputDatas[index + 2].initializeType = subgroups::SSBOData::InitializeNone;
inputDatas[index + 2].isImage = true;
inputDatas[index + 2].binding = imgIndex;
inputDatas[index + 2].stages = stagesBits[ndx];
}
return subgroups::allStages(context, subgroups::FORMAT_R32_UINT, inputDatas, inputDatasCount, checkVertexPipelineStagesSubgroupBarriers, stages);
}
}
}
}
deqp::TestCaseGroup* createSubgroupsBasicTests(deqp::Context& testCtx)
{
de::MovePtr<deqp::TestCaseGroup> graphicGroup(new deqp::TestCaseGroup(
testCtx, "graphics", "Subgroup basic category tests: graphics"));
de::MovePtr<deqp::TestCaseGroup> computeGroup(new deqp::TestCaseGroup(
testCtx, "compute", "Subgroup basic category tests: compute"));
de::MovePtr<deqp::TestCaseGroup> framebufferGroup(new deqp::TestCaseGroup(
testCtx, "framebuffer", "Subgroup basic category tests: framebuffer"));
const subgroups::ShaderStageFlags stages[] =
{
SHADER_STAGE_FRAGMENT_BIT,
SHADER_STAGE_VERTEX_BIT,
SHADER_STAGE_TESS_EVALUATION_BIT,
SHADER_STAGE_TESS_CONTROL_BIT,
SHADER_STAGE_GEOMETRY_BIT,
};
for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
{
const std::string op = de::toLower(getOpTypeName(opTypeIndex));
{
const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_COMPUTE_BIT};
SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(computeGroup.get(), op, "",
supportedCheck, initPrograms, test, caseDef);
}
if (OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED == opTypeIndex)
{
// Shared isn't available in non compute shaders.
continue;
}
{
const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_ALL_GRAPHICS};
SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(graphicGroup.get(),
op, "",
supportedCheck, initPrograms, test, caseDef);
}
for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
{
if (opTypeIndex == OPTYPE_ELECT && stageIndex == 0)
continue; // This is not tested. I don't know why.
const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(framebufferGroup.get(),
op + "_" + getShaderStageName(caseDef.shaderStage), "",
supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
}
}
de::MovePtr<deqp::TestCaseGroup> group(new deqp::TestCaseGroup(
testCtx, "basic", "Subgroup basic category tests"));
group->addChild(graphicGroup.release());
group->addChild(computeGroup.release());
group->addChild(framebufferGroup.release());
return group.release();
}
} // subgroups
} // glc