| /*------------------------------------------------------------------------- |
| * drawElements Quality Program OpenGL ES 3.1 Module |
| * ------------------------------------------------- |
| * |
| * Copyright 2014 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| *//*! |
| * \file |
| * \brief Indirect compute dispatch tests. |
| *//*--------------------------------------------------------------------*/ |
| |
| #include "es31fIndirectComputeDispatchTests.hpp" |
| #include "gluObjectWrapper.hpp" |
| #include "gluRenderContext.hpp" |
| #include "gluShaderProgram.hpp" |
| #include "glwFunctions.hpp" |
| #include "glwEnums.hpp" |
| #include "tcuVector.hpp" |
| #include "tcuStringTemplate.hpp" |
| #include "tcuTestLog.hpp" |
| #include "deStringUtil.hpp" |
| |
| #include <vector> |
| #include <string> |
| #include <map> |
| |
| namespace deqp |
| { |
| namespace gles31 |
| { |
| namespace Functional |
| { |
| |
| using tcu::UVec3; |
| using tcu::TestLog; |
| using std::vector; |
| using std::string; |
| using std::map; |
| |
| // \todo [2014-02-17 pyry] Should be extended with following: |
| |
| // Negative: |
| // - no active shader program |
| // - indirect negative or not aligned |
| // - indirect + size outside buffer bounds |
| // - no buffer bound to DRAW_INDIRECT_BUFFER |
| // - (implict) buffer mapped |
| |
| // Robustness: |
| // - lot of small work group launches |
| // - very large work group size |
| // - no synchronization, touched by gpu |
| // - compute program overwiting buffer |
| |
| namespace |
| { |
| |
| enum |
| { |
| RESULT_BLOCK_BASE_SIZE = (3+1)*(int)sizeof(deUint32), // uvec3 + uint |
| RESULT_BLOCK_EXPECTED_COUNT_OFFSET = 0, |
| RESULT_BLOCK_NUM_PASSED_OFFSET = 3*(int)sizeof(deUint32), |
| |
| INDIRECT_COMMAND_SIZE = 3*(int)sizeof(deUint32) |
| }; |
| |
| enum GenBuffer |
| { |
| GEN_BUFFER_UPLOAD = 0, |
| GEN_BUFFER_COMPUTE, |
| |
| GEN_BUFFER_LAST |
| }; |
| |
| glu::ProgramSources genVerifySources (const UVec3& workGroupSize) |
| { |
| static const char* s_verifyDispatchTmpl = |
| "#version 310 es\n" |
| "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n" |
| "layout(binding = 0, std430) buffer Result\n" |
| "{\n" |
| " uvec3 expectedGroupCount;\n" |
| " coherent uint numPassed;\n" |
| "} result;\n" |
| "void main (void)\n" |
| "{\n" |
| " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n" |
| " atomicAdd(result.numPassed, 1u);\n" |
| "}\n"; |
| |
| map<string, string> args; |
| |
| args["LOCAL_SIZE_X"] = de::toString(workGroupSize.x()); |
| args["LOCAL_SIZE_Y"] = de::toString(workGroupSize.y()); |
| args["LOCAL_SIZE_Z"] = de::toString(workGroupSize.z()); |
| |
| return glu::ProgramSources() << glu::ComputeSource(tcu::StringTemplate(s_verifyDispatchTmpl).specialize(args)); |
| } |
| |
| class IndirectDispatchCase : public TestCase |
| { |
| public: |
| IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer); |
| ~IndirectDispatchCase (void); |
| |
| IterateResult iterate (void); |
| |
| protected: |
| struct DispatchCommand |
| { |
| deIntptr offset; |
| UVec3 numWorkGroups; |
| |
| DispatchCommand (void) : offset(0) {} |
| DispatchCommand (deIntptr offset_, const UVec3& numWorkGroups_) : offset(offset_), numWorkGroups(numWorkGroups_) {} |
| }; |
| |
| GenBuffer m_genBuffer; |
| deUintptr m_bufferSize; |
| UVec3 m_workGroupSize; |
| vector<DispatchCommand> m_commands; |
| |
| void createCommandBuffer (deUint32 buffer) const; |
| void createResultBuffer (deUint32 buffer) const; |
| |
| bool verifyResultBuffer (deUint32 buffer); |
| |
| void createCmdBufferUpload (deUint32 buffer) const; |
| void createCmdBufferCompute (deUint32 buffer) const; |
| |
| private: |
| IndirectDispatchCase (const IndirectDispatchCase&); |
| IndirectDispatchCase& operator= (const IndirectDispatchCase&); |
| }; |
| |
| IndirectDispatchCase::IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer) |
| : TestCase (context, name, description) |
| , m_genBuffer (genBuffer) |
| , m_bufferSize (0) |
| { |
| } |
| |
| IndirectDispatchCase::~IndirectDispatchCase (void) |
| { |
| } |
| |
| static int getResultBlockAlignedSize (const glw::Functions& gl) |
| { |
| const int baseSize = RESULT_BLOCK_BASE_SIZE; |
| int alignment = 0; |
| gl.getIntegerv(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT, &alignment); |
| |
| if (alignment == 0 || (baseSize % alignment == 0)) |
| return baseSize; |
| else |
| return (baseSize/alignment + 1)*alignment; |
| } |
| |
| void IndirectDispatchCase::createCommandBuffer (deUint32 buffer) const |
| { |
| switch (m_genBuffer) |
| { |
| case GEN_BUFFER_UPLOAD: createCmdBufferUpload (buffer); break; |
| case GEN_BUFFER_COMPUTE: createCmdBufferCompute (buffer); break; |
| default: |
| DE_ASSERT(false); |
| } |
| } |
| |
| void IndirectDispatchCase::createCmdBufferUpload (deUint32 buffer) const |
| { |
| const glw::Functions& gl = m_context.getRenderContext().getFunctions(); |
| vector<deUint8> data (m_bufferSize); |
| |
| for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter) |
| { |
| DE_STATIC_ASSERT(INDIRECT_COMMAND_SIZE >= sizeof(deUint32)*3); |
| DE_ASSERT(cmdIter->offset >= 0); |
| DE_ASSERT(cmdIter->offset%sizeof(deUint32) == 0); |
| DE_ASSERT(cmdIter->offset + INDIRECT_COMMAND_SIZE <= (deIntptr)m_bufferSize); |
| |
| deUint32* const dstPtr = (deUint32*)&data[cmdIter->offset]; |
| |
| dstPtr[0] = cmdIter->numWorkGroups[0]; |
| dstPtr[1] = cmdIter->numWorkGroups[1]; |
| dstPtr[2] = cmdIter->numWorkGroups[2]; |
| } |
| |
| gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer); |
| gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)data.size(), &data[0], GL_STATIC_DRAW); |
| } |
| |
| void IndirectDispatchCase::createCmdBufferCompute (deUint32 buffer) const |
| { |
| std::ostringstream src; |
| |
| // Header |
| src << |
| "#version 310 es\n" |
| "layout(local_size_x = 1) in;\n" |
| "layout(std430, binding = 1) buffer Out\n" |
| "{\n" |
| " highp uint data[];\n" |
| "};\n" |
| "void writeCmd (uint offset, uvec3 numWorkGroups)\n" |
| "{\n" |
| " data[offset+0u] = numWorkGroups.x;\n" |
| " data[offset+1u] = numWorkGroups.y;\n" |
| " data[offset+2u] = numWorkGroups.z;\n" |
| "}\n" |
| "void main (void)\n" |
| "{\n"; |
| |
| // Commands |
| for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter) |
| { |
| const deUint32 offs = (deUint32)(cmdIter->offset/4); |
| DE_ASSERT((deIntptr)offs*4 == cmdIter->offset); |
| |
| src << "\twriteCmd(" << offs << "u, uvec3(" |
| << cmdIter->numWorkGroups.x() << "u, " |
| << cmdIter->numWorkGroups.y() << "u, " |
| << cmdIter->numWorkGroups.z() << "u));\n"; |
| } |
| |
| src << "}\n"; |
| |
| { |
| const glw::Functions& gl = m_context.getRenderContext().getFunctions(); |
| glu::ShaderProgram program (m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(src.str())); |
| |
| m_testCtx.getLog() << program; |
| if (!program.isOk()) |
| TCU_FAIL("Compile failed"); |
| |
| gl.useProgram(program.getProgram()); |
| |
| gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer); |
| gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)m_bufferSize, DE_NULL, GL_STATIC_DRAW); |
| gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer); |
| GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); |
| |
| gl.dispatchCompute(1,1,1); |
| GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute() failed"); |
| |
| gl.memoryBarrier(GL_COMMAND_BARRIER_BIT); |
| GLU_EXPECT_NO_ERROR(gl.getError(), "glMemoryBarrier(GL_COMMAND_BARRIER_BIT) failed"); |
| } |
| } |
| |
| void IndirectDispatchCase::createResultBuffer (deUint32 buffer) const |
| { |
| const glw::Functions& gl = m_context.getRenderContext().getFunctions(); |
| const int resultBlockSize = getResultBlockAlignedSize(gl); |
| const int resultBufferSize = resultBlockSize*(int)m_commands.size(); |
| vector<deUint8> data (resultBufferSize); |
| |
| for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++) |
| { |
| deUint8* const dstPtr = &data[resultBlockSize*cmdNdx]; |
| |
| *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 0*4) = m_commands[cmdNdx].numWorkGroups[0]; |
| *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 1*4) = m_commands[cmdNdx].numWorkGroups[1]; |
| *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 2*4) = m_commands[cmdNdx].numWorkGroups[2]; |
| *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0; |
| } |
| |
| gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); |
| gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizei)data.size(), &data[0], GL_STATIC_READ); |
| } |
| |
| deUint32 computeInvocationCount (const UVec3& workGroupSize, const UVec3& numWorkGroups) |
| { |
| const int numInvocationsPerGroup = workGroupSize[0]*workGroupSize[1]*workGroupSize[2]; |
| const int numGroups = numWorkGroups[0]*numWorkGroups[1]*numWorkGroups[2]; |
| |
| return numInvocationsPerGroup*numGroups; |
| } |
| |
| bool IndirectDispatchCase::verifyResultBuffer (deUint32 buffer) |
| { |
| const glw::Functions& gl = m_context.getRenderContext().getFunctions(); |
| |
| const int resultBlockSize = getResultBlockAlignedSize(gl); |
| const int resultBufferSize = resultBlockSize*(int)m_commands.size(); |
| |
| void* mapPtr = DE_NULL; |
| bool allOk = true; |
| |
| try |
| { |
| gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); |
| mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, resultBufferSize, GL_MAP_READ_BIT); |
| |
| GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange() failed"); |
| TCU_CHECK(mapPtr); |
| |
| for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++) |
| { |
| const DispatchCommand& cmd = m_commands[cmdNdx]; |
| const deUint8* const srcPtr = (const deUint8*)mapPtr + cmdNdx*resultBlockSize; |
| const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET); |
| const deUint32 expectedCount = computeInvocationCount(m_workGroupSize, cmd.numWorkGroups); |
| |
| // Verify numPassed. |
| if (numPassed != expectedCount) |
| { |
| m_testCtx.getLog() << TestLog::Message << "ERROR: got invalid result for invocation " << cmdNdx |
| << ": got numPassed = " << numPassed << ", expected " << expectedCount |
| << TestLog::EndMessage; |
| allOk = false; |
| } |
| } |
| } |
| catch (...) |
| { |
| if (mapPtr) |
| gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER); |
| } |
| |
| gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER); |
| GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer() failed"); |
| |
| return allOk; |
| } |
| |
| IndirectDispatchCase::IterateResult IndirectDispatchCase::iterate (void) |
| { |
| const glu::RenderContext& renderCtx = m_context.getRenderContext(); |
| const glw::Functions& gl = renderCtx.getFunctions(); |
| |
| const glu::ShaderProgram program (renderCtx, genVerifySources(m_workGroupSize)); |
| |
| glu::Buffer cmdBuffer (renderCtx); |
| glu::Buffer resultBuffer (renderCtx); |
| |
| m_testCtx.getLog() << program; |
| TCU_CHECK_MSG(program.isOk(), "Compile failed"); |
| |
| m_testCtx.getLog() << TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << TestLog::EndMessage; |
| { |
| tcu::ScopedLogSection section(m_testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_commands.size()) + " in total)"); |
| |
| for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++) |
| m_testCtx.getLog() << TestLog::Message << cmdNdx << ": " << "offset = " << m_commands[cmdNdx].offset |
| << ", numWorkGroups = " << m_commands[cmdNdx].numWorkGroups |
| << TestLog::EndMessage; |
| } |
| |
| createResultBuffer(*resultBuffer); |
| createCommandBuffer(*cmdBuffer); |
| |
| gl.useProgram(program.getProgram()); |
| gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, *cmdBuffer); |
| GLU_EXPECT_NO_ERROR(gl.getError(), "State setup failed"); |
| |
| { |
| const int resultBlockAlignedSize = getResultBlockAlignedSize(gl); |
| deIntptr curOffset = 0; |
| |
| for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter) |
| { |
| gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, *resultBuffer, (glw::GLintptr)curOffset, resultBlockAlignedSize); |
| gl.dispatchComputeIndirect((glw::GLintptr)cmdIter->offset); |
| |
| curOffset += resultBlockAlignedSize; |
| } |
| } |
| |
| GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchComputeIndirect() failed"); |
| |
| if (verifyResultBuffer(*resultBuffer)) |
| m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); |
| else |
| m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Invalid values in result buffer"); |
| |
| return STOP; |
| } |
| |
| class SingleDispatchCase : public IndirectDispatchCase |
| { |
| public: |
| SingleDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer, deUintptr bufferSize, deUintptr offset, const UVec3& workGroupSize, const UVec3& numWorkGroups) |
| : IndirectDispatchCase(context, name, description, genBuffer) |
| { |
| m_bufferSize = bufferSize; |
| m_workGroupSize = workGroupSize; |
| m_commands.push_back(DispatchCommand(offset, numWorkGroups)); |
| } |
| }; |
| |
| class MultiDispatchCase : public IndirectDispatchCase |
| { |
| public: |
| MultiDispatchCase (Context& context, GenBuffer genBuffer) |
| : IndirectDispatchCase(context, "multi_dispatch", "Dispatch multiple compute commands from single buffer", genBuffer) |
| { |
| m_bufferSize = 1<<10; |
| m_workGroupSize = UVec3(3,1,2); |
| |
| m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); |
| m_commands.push_back(DispatchCommand(INDIRECT_COMMAND_SIZE, UVec3(2,1,1))); |
| m_commands.push_back(DispatchCommand(104, UVec3(1,3,1))); |
| m_commands.push_back(DispatchCommand(40, UVec3(1,1,7))); |
| m_commands.push_back(DispatchCommand(52, UVec3(1,1,4))); |
| } |
| }; |
| |
| class MultiDispatchReuseCommandCase : public IndirectDispatchCase |
| { |
| public: |
| MultiDispatchReuseCommandCase (Context& context, GenBuffer genBuffer) |
| : IndirectDispatchCase(context, "multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", genBuffer) |
| { |
| m_bufferSize = 1<<10; |
| m_workGroupSize = UVec3(3,1,2); |
| |
| m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); |
| m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); |
| m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); |
| m_commands.push_back(DispatchCommand(104, UVec3(1,3,1))); |
| m_commands.push_back(DispatchCommand(104, UVec3(1,3,1))); |
| m_commands.push_back(DispatchCommand(52, UVec3(1,1,4))); |
| m_commands.push_back(DispatchCommand(52, UVec3(1,1,4))); |
| } |
| }; |
| |
| } // anonymous |
| |
| IndirectComputeDispatchTests::IndirectComputeDispatchTests (Context& context) |
| : TestCaseGroup(context, "indirect_dispatch", "Indirect dispatch tests") |
| { |
| } |
| |
| IndirectComputeDispatchTests::~IndirectComputeDispatchTests (void) |
| { |
| } |
| |
| void IndirectComputeDispatchTests::init (void) |
| { |
| static const struct |
| { |
| const char* name; |
| GenBuffer gen; |
| } s_genBuffer[] = |
| { |
| { "upload_buffer", GEN_BUFFER_UPLOAD }, |
| { "gen_in_compute", GEN_BUFFER_COMPUTE } |
| }; |
| |
| static const struct |
| { |
| const char* name; |
| const char* description; |
| deUintptr bufferSize; |
| deUintptr offset; |
| UVec3 workGroupSize; |
| UVec3 numWorkGroups; |
| } s_singleDispatchCases[] = |
| { |
| // Name Desc BufferSize Offs WorkGroupSize NumWorkGroups |
| { "single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(1,1,1) }, |
| { "multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(2,3,5) }, |
| { "multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(2,3,1), UVec3(1,2,3) }, |
| { "small_offset", "Small offset", 16+INDIRECT_COMMAND_SIZE, 16, UVec3(1,1,1), UVec3(1,1,1) }, |
| { "large_offset", "Large offset", (2<<20), (1<<20) + 12, UVec3(1,1,1), UVec3(1,1,1) }, |
| { "large_offset_multiple_invocations", "Large offset, multiple invocations", (2<<20), (1<<20) + 12, UVec3(2,3,1), UVec3(1,2,3) }, |
| { "empty_command", "Empty command", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(0,0,0) }, |
| }; |
| |
| for (int genNdx = 0; genNdx < DE_LENGTH_OF_ARRAY(s_genBuffer); genNdx++) |
| { |
| const GenBuffer genBuf = s_genBuffer[genNdx].gen; |
| tcu::TestCaseGroup* const genGroup = new tcu::TestCaseGroup(m_testCtx, s_genBuffer[genNdx].name, ""); |
| addChild(genGroup); |
| |
| for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_singleDispatchCases); ndx++) |
| genGroup->addChild(new SingleDispatchCase(m_context, |
| s_singleDispatchCases[ndx].name, |
| s_singleDispatchCases[ndx].description, |
| genBuf, |
| s_singleDispatchCases[ndx].bufferSize, |
| s_singleDispatchCases[ndx].offset, |
| s_singleDispatchCases[ndx].workGroupSize, |
| s_singleDispatchCases[ndx].numWorkGroups)); |
| |
| genGroup->addChild(new MultiDispatchCase (m_context, genBuf)); |
| genGroup->addChild(new MultiDispatchReuseCommandCase (m_context, genBuf)); |
| } |
| } |
| |
| } // Functional |
| } // gles31 |
| } // deqp |