blob: 5cc0e510e6c6782eeaeee03070796cd393493913 [file] [log] [blame]
/*------------------------------------------------------------------------
* Vulkan Conformance Tests
* ------------------------
*
* Copyright (c) 2020 The Khronos Group Inc.
* Copyright (c) 2020 Google LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*//*!
* \file
* \brief VK_KHR_zero_initialize_workgroup_memory tests
*//*--------------------------------------------------------------------*/
#include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
#include "vktTestCase.hpp"
#include "vktTestCaseUtil.hpp"
#include "vktTestGroupUtil.hpp"
#include "vkBufferWithMemory.hpp"
#include "vkImageWithMemory.hpp"
#include "vkQueryUtil.hpp"
#include "vkBuilderUtil.hpp"
#include "vkCmdUtil.hpp"
#include "vkTypeUtil.hpp"
#include "vkObjUtil.hpp"
#include "vkDefs.hpp"
#include "vkRef.hpp"
#include "tcuCommandLine.hpp"
#include "tcuTestLog.hpp"
#include "deRandom.hpp"
#include "deStringUtil.hpp"
#include "deUniquePtr.hpp"
#include <algorithm>
#include <vector>
using namespace vk;
namespace vkt
{
namespace compute
{
namespace
{
tcu::TestStatus runCompute(Context& context, deUint32 bufferSize,
deUint32 numWGX, deUint32 numWGY, deUint32 numWGZ,
const std::vector<deUint32> specValues = {},
deUint32 increment = 0)
{
const DeviceInterface& vk = context.getDeviceInterface();
const VkDevice device = context.getDevice();
Allocator& allocator = context.getDefaultAllocator();
tcu::TestLog& log = context.getTestContext().getLog();
de::MovePtr<BufferWithMemory> buffer;
VkDescriptorBufferInfo bufferDescriptor;
VkDeviceSize size = bufferSize;
buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
MemoryRequirement::HostVisible | MemoryRequirement::Cached));
bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
DescriptorSetLayoutBuilder layoutBuilder;
layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
std::vector<VkSpecializationMapEntry> entries(specValues.size());
if (!specValues.empty())
{
for (deUint32 i = 0; i < specValues.size(); ++i)
{
entries[i] = {i, (deUint32)(sizeof(deUint32) * i), sizeof(deUint32)};
}
}
const VkSpecializationInfo specInfo =
{
(deUint32)specValues.size(),
entries.data(),
specValues.size() * sizeof(deUint32),
specValues.data(),
};
const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
{
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
DE_NULL,
(VkPipelineLayoutCreateFlags)0,
1,
&descriptorSetLayout.get(),
0u,
DE_NULL,
};
Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
flushAlloc(vk, device, buffer->getAllocation());
const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
const VkPipelineShaderStageCreateInfo shaderInfo =
{
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
DE_NULL,
0,
VK_SHADER_STAGE_COMPUTE_BIT,
*shader,
"main",
specValues.empty() ? DE_NULL : &specInfo,
};
const VkComputePipelineCreateInfo pipelineInfo =
{
VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
DE_NULL,
0u,
shaderInfo,
*pipelineLayout,
(VkPipeline)0,
0u,
};
Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
const VkQueue queue = context.getUniversalQueue();
Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
context.getUniversalQueueFamilyIndex());
Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
DescriptorSetUpdateBuilder setUpdateBuilder;
setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
setUpdateBuilder.update(vk, device);
beginCommandBuffer(vk, *cmdBuffer, 0);
vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
endCommandBuffer(vk, *cmdBuffer);
submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
invalidateAlloc(vk, device, buffer->getAllocation());
for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
{
deUint32 expected = increment ? numWGX * numWGY * numWGZ : 0u;
if (ptr[i] != expected)
{
log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
return tcu::TestStatus::fail("compute failed");
}
}
return tcu::TestStatus::pass("compute succeeded");
}
class MaxWorkgroupMemoryInstance : public vkt::TestInstance
{
public:
MaxWorkgroupMemoryInstance(Context& context, deUint32 numWorkgroups)
: TestInstance(context),
m_numWorkgroups(numWorkgroups)
{
}
tcu::TestStatus iterate(void);
private:
deUint32 m_numWorkgroups;
};
class MaxWorkgroupMemoryTest : public vkt::TestCase
{
public:
MaxWorkgroupMemoryTest(tcu::TestContext& testCtx,
const std::string& name,
const std::string& description,
deUint32 numWorkgroups)
: TestCase(testCtx, name, description),
m_numWorkgroups(numWorkgroups)
{
}
void initPrograms(SourceCollections& sourceCollections) const;
TestInstance* createInstance(Context& context) const
{
return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups);
}
virtual void checkSupport(Context& context) const;
private:
deUint32 m_numWorkgroups;
};
void MaxWorkgroupMemoryTest::checkSupport(Context& context) const
{
context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
}
void MaxWorkgroupMemoryTest::initPrograms(SourceCollections& sourceCollections) const
{
std::ostringstream src;
src << "#version 450\n";
src << "#extension GL_EXT_null_initializer : enable\n";
src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
src << "shared uvec4 wg_mem[num_elems] = {};\n";
src << "void main() {\n";
src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
src << " uint idx_x = gl_LocalInvocationID.x;\n";
src << " uint idx = idx_x + idx_y + idx_z;\n";
src << " uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
src << " for (uint i = 0; i < num_elems; ++i) {\n";
src << " for (uint j = 0; j < 4; ++j) {\n";
src << " uint shared_idx = 4*i + j;\n";
src << " uint wg_val = wg_mem[i][j];\n";
src << " if (idx == shared_idx) {\n";
src << " atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
src << " } else if (idx == 0 && shared_idx >= wg_size) {\n";
src << " atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
src << " }\n";
src << " }\n";
src << " }\n";
src << "}\n";
sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
}
tcu::TestStatus MaxWorkgroupMemoryInstance::iterate(void)
{
VkPhysicalDeviceProperties properties;
m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
const deUint32 maxMemSize = properties.limits.maxComputeSharedMemorySize;
const deUint32 maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
deUint32 wgx = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
deUint32 wgy = 1;
deUint32 wgz = 1;
if (wgx < maxWG)
{
wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
}
if ((wgx * wgy) < maxWG)
{
wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
}
const deUint32 size = maxMemSize;
const deUint32 numElems = maxMemSize / 16;
return runCompute(m_context, size, m_numWorkgroups, 1, 1, {wgx, wgy, wgz, numElems}, /*increment*/ 1);
}
void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup* group)
{
std::vector<deUint32> workgroups = {1, 2, 4, 16, 64, 128};
for (deUint32 i = 0; i < workgroups.size(); ++i) {
deUint32 numWG = workgroups[i];
group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(),
de::toString(numWG), de::toString(numWG) + " workgroups", numWG));
}
}
struct TypeCaseDef
{
std::string typeName;
deUint32 typeSize;
deUint32 numElements;
deUint32 numRows;
deUint32 numVariables;
};
class TypeTestInstance : public vkt::TestInstance
{
public:
TypeTestInstance(Context& context, const TypeCaseDef& caseDef)
: TestInstance(context),
m_caseDef(caseDef)
{
}
tcu::TestStatus iterate(void);
private:
TypeCaseDef m_caseDef;
};
class TypeTest : public vkt::TestCase
{
public:
TypeTest(tcu::TestContext& testCtx,
const std::string& name,
const std::string& description,
const TypeCaseDef& caseDef)
: TestCase(testCtx, name, description),
m_caseDef(caseDef)
{
}
void initPrograms(SourceCollections& sourceCollections) const;
TestInstance* createInstance(Context& context) const
{
return new TypeTestInstance(context, m_caseDef);
}
virtual void checkSupport(Context& context) const;
private:
TypeCaseDef m_caseDef;
};
void TypeTest::checkSupport(Context& context) const
{
context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
f16_i8_features.pNext = DE_NULL;
VkPhysicalDeviceFeatures2 features2;
deMemset(&features2, 0, sizeof(features2));
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = &f16_i8_features;
context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
if (m_caseDef.typeName == "float16_t" ||
m_caseDef.typeName == "f16vec2" ||
m_caseDef.typeName == "f16vec3" ||
m_caseDef.typeName == "f16vec4" ||
m_caseDef.typeName == "f16mat2x2" ||
m_caseDef.typeName == "f16mat2x3" ||
m_caseDef.typeName == "f16mat2x4" ||
m_caseDef.typeName == "f16mat3x2" ||
m_caseDef.typeName == "f16mat3x3" ||
m_caseDef.typeName == "f16mat3x4" ||
m_caseDef.typeName == "f16mat4x2" ||
m_caseDef.typeName == "f16mat4x3" ||
m_caseDef.typeName == "f16mat4x4")
{
if (f16_i8_features.shaderFloat16 != VK_TRUE)
TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
}
if (m_caseDef.typeName == "float64_t" ||
m_caseDef.typeName == "f64vec2" ||
m_caseDef.typeName == "f64vec3" ||
m_caseDef.typeName == "f64vec4"||
m_caseDef.typeName == "f64mat2x2" ||
m_caseDef.typeName == "f64mat2x3" ||
m_caseDef.typeName == "f64mat2x4" ||
m_caseDef.typeName == "f64mat3x2" ||
m_caseDef.typeName == "f64mat3x3" ||
m_caseDef.typeName == "f64mat3x4" ||
m_caseDef.typeName == "f64mat4x2" ||
m_caseDef.typeName == "f64mat4x3" ||
m_caseDef.typeName == "f64mat4x4")
{
if (features2.features.shaderFloat64 != VK_TRUE)
TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
}
if (m_caseDef.typeName == "int8_t" ||
m_caseDef.typeName == "i8vec2" ||
m_caseDef.typeName == "i8vec3" ||
m_caseDef.typeName == "i8vec4" ||
m_caseDef.typeName == "uint8_t" ||
m_caseDef.typeName == "u8vec2" ||
m_caseDef.typeName == "u8vec3" ||
m_caseDef.typeName == "u8vec4")
{
if (f16_i8_features.shaderInt8 != VK_TRUE)
TCU_THROW(NotSupportedError, "shaderInt8 not supported");
}
if (m_caseDef.typeName == "int16_t" ||
m_caseDef.typeName == "i16vec2" ||
m_caseDef.typeName == "i16vec3" ||
m_caseDef.typeName == "i16vec4" ||
m_caseDef.typeName == "uint16_t" ||
m_caseDef.typeName == "u16vec2" ||
m_caseDef.typeName == "u16vec3" ||
m_caseDef.typeName == "u16vec4")
{
if (features2.features.shaderInt16 != VK_TRUE)
TCU_THROW(NotSupportedError, "shaderInt16 not supported");
}
if (m_caseDef.typeName == "int64_t" ||
m_caseDef.typeName == "i64vec2" ||
m_caseDef.typeName == "i64vec3" ||
m_caseDef.typeName == "i64vec4" ||
m_caseDef.typeName == "uint64_t" ||
m_caseDef.typeName == "u64vec2" ||
m_caseDef.typeName == "u64vec3" ||
m_caseDef.typeName == "u64vec4")
{
if (features2.features.shaderInt64 != VK_TRUE)
TCU_THROW(NotSupportedError, "shaderInt64 not supported");
}
}
void TypeTest::initPrograms(SourceCollections& sourceCollections) const
{
std::ostringstream src;
src << "#version 450\n";
src << "#extension GL_EXT_null_initializer : enable\n";
src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows << ", local_size_y = 1, local_size_z = 1) in;\n";
src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
for (deUint32 i = 0; i < m_caseDef.numVariables; ++i) {
src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
}
src << "void main() {\n";
if (m_caseDef.numRows > 1)
{
src << " uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
src << " uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
}
std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
for (deUint32 v = 0; v < m_caseDef.numVariables; ++v)
{
if (m_caseDef.numElements == 1)
{
// Scalars.
src << " a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") == 0) ? 0 : 1;\n";
}
else if (m_caseDef.numRows == 1)
{
// Vectors.
src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[gl_LocalInvocationID.x]) == 0) ? 0 : 1;\n";
}
else
{
// Matrices.
src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[row][col]) == 0) ? 0 : 1;\n";
}
}
src << "}\n";
sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
}
tcu::TestStatus TypeTestInstance::iterate(void)
{
const deUint32 varBytes = m_caseDef.numElements * m_caseDef.numRows * (deUint32)sizeof(deUint32);
return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1);
}
void AddTypeTests(tcu::TestCaseGroup* group)
{
deRandom rnd;
deRandom_init(&rnd, 0);
std::vector<TypeCaseDef> cases =
{
{"bool", 1, 1, 1, 0},
{"bvec2", 1, 2, 1, 0},
{"bvec3", 1, 3, 1, 0},
{"bvec4", 1, 4, 1, 0},
{"uint32_t", 4, 1, 1, 0},
{"uvec2", 4, 2, 1, 0},
{"uvec3", 4, 3, 1, 0},
{"uvec4", 4, 4, 1, 0},
{"int32_t", 4, 1, 1, 0},
{"ivec2", 4, 2, 1, 0},
{"ivec3", 4, 3, 1, 0},
{"ivec4", 4, 4, 1, 0},
{"uint8_t", 1, 1, 1, 0},
{"u8vec2", 1, 2, 1, 0},
{"u8vec3", 1, 3, 1, 0},
{"u8vec4", 1, 4, 1, 0},
{"int8_t", 1, 1, 1, 0},
{"i8vec2", 1, 2, 1, 0},
{"i8vec3", 1, 3, 1, 0},
{"i8vec4", 1, 4, 1, 0},
{"uint16_t", 2, 1, 1, 0},
{"u16vec2", 2, 2, 1, 0},
{"u16vec3", 2, 3, 1, 0},
{"u16vec4", 2, 4, 1, 0},
{"int16_t", 2, 1, 1, 0},
{"i16vec2", 2, 2, 1, 0},
{"i16vec3", 2, 3, 1, 0},
{"i16vec4", 2, 4, 1, 0},
{"uint64_t", 8, 1, 1, 0},
{"u64vec2", 8, 2, 1, 0},
{"u64vec3", 8, 3, 1, 0},
{"u64vec4", 8, 4, 1, 0},
{"int64_t", 8, 1, 1, 0},
{"i64vec2", 8, 2, 1, 0},
{"i64vec3", 8, 3, 1, 0},
{"i64vec4", 8, 4, 1, 0},
{"float32_t", 4, 1, 1, 0},
{"f32vec2", 4, 2, 1, 0},
{"f32vec3", 4, 3, 1, 0},
{"f32vec4", 4, 4, 1, 0},
{"f32mat2x2", 4, 2, 2, 0},
{"f32mat2x3", 4, 3, 2, 0},
{"f32mat2x4", 4, 4, 2, 0},
{"f32mat3x2", 4, 2, 3, 0},
{"f32mat3x3", 4, 3, 3, 0},
{"f32mat3x4", 4, 4, 3, 0},
{"f32mat4x2", 4, 2, 4, 0},
{"f32mat4x3", 4, 3, 4, 0},
{"f32mat4x4", 4, 4, 4, 0},
{"float16_t", 2, 1, 1, 0},
{"f16vec2", 2, 2, 1, 0},
{"f16vec3", 2, 3, 1, 0},
{"f16vec4", 2, 4, 1, 0},
{"f16mat2x2", 2, 2, 2, 0},
{"f16mat2x3", 2, 3, 2, 0},
{"f16mat2x4", 2, 4, 2, 0},
{"f16mat3x2", 2, 2, 3, 0},
{"f16mat3x3", 2, 3, 3, 0},
{"f16mat3x4", 2, 4, 3, 0},
{"f16mat4x2", 2, 2, 4, 0},
{"f16mat4x3", 2, 3, 4, 0},
{"f16mat4x4", 2, 4, 4, 0},
{"float64_t", 8, 1, 1, 0},
{"f64vec2", 8, 2, 1, 0},
{"f64vec3", 8, 3, 1, 0},
{"f64vec4", 8, 4, 1, 0},
{"f64mat2x2", 8, 2, 2, 0},
{"f64mat2x3", 8, 3, 2, 0},
{"f64mat2x4", 8, 4, 2, 0},
{"f64mat3x2", 8, 2, 3, 0},
{"f64mat3x3", 8, 3, 3, 0},
{"f64mat3x4", 8, 4, 3, 0},
{"f64mat4x2", 8, 2, 4, 0},
{"f64mat4x3", 8, 3, 4, 0},
{"f64mat4x4", 8, 4, 4, 0},
};
for (deUint32 i = 0; i < cases.size(); ++i)
{
cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
group->addChild(
new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i].typeName.c_str(), cases[i]));
}
}
struct CompositeCaseDef
{
deUint32 index;
std::string typeDefinition;
std::string assignment;
deUint32 elements;
std::vector<deUint32> specValues;
};
class CompositeTestInstance : public vkt::TestInstance
{
public:
CompositeTestInstance(Context& context, const CompositeCaseDef& caseDef)
: TestInstance(context),
m_caseDef(caseDef)
{
}
tcu::TestStatus iterate(void);
private:
CompositeCaseDef m_caseDef;
};
class CompositeTest : public vkt::TestCase
{
public:
CompositeTest(tcu::TestContext& testCtx,
const std::string& name,
const std::string& description,
const CompositeCaseDef& caseDef)
: TestCase(testCtx, name, description),
m_caseDef(caseDef)
{
}
void initPrograms(SourceCollections& sourceCollections) const;
TestInstance* createInstance(Context& context) const
{
return new CompositeTestInstance(context, m_caseDef);
}
virtual void checkSupport(Context& context) const;
private:
CompositeCaseDef m_caseDef;
};
void CompositeTest::checkSupport(Context& context) const
{
context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
f16_i8_features.pNext = DE_NULL;
VkPhysicalDeviceFeatures2 features2;
deMemset(&features2, 0, sizeof(features2));
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = &f16_i8_features;
context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
bool needsFloat16 = (m_caseDef.index & 0x1) != 0;
bool needsFloat64 = (m_caseDef.index & 0x2) != 0;
bool needsInt8 = (m_caseDef.index & 0x4) != 0;
bool needsInt16 = (m_caseDef.index & 0x8) != 0;
bool needsInt64 = (m_caseDef.index & 0x10) != 0;
if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
TCU_THROW(NotSupportedError, "shaderInt8 not supported");
if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
TCU_THROW(NotSupportedError, "shaderInt16 not supported");
if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
TCU_THROW(NotSupportedError, "shaderInt64 not supported");
}
void CompositeTest::initPrograms(SourceCollections& sourceCollections) const
{
std::ostringstream src;
src << "#version 450\n";
src << "#extension GL_EXT_null_initializer : enable\n";
src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
src << "\n";
for (deUint32 i = 0; i < m_caseDef.specValues.size(); ++i) {
src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
}
src << "\n";
src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
src << "\n";
src << m_caseDef.typeDefinition;
src << "\n";
src << "void main() {\n";
src << m_caseDef.assignment;
src << "}\n";
sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
}
tcu::TestStatus CompositeTestInstance::iterate(void)
{
const deUint32 bufferSize = (deUint32)sizeof(deUint32) * m_caseDef.elements;
return runCompute(m_context, bufferSize, 1, 1, 1, m_caseDef.specValues);
}
void AddCompositeTests(tcu::TestCaseGroup* group)
{
std::vector<CompositeCaseDef> cases =
{
{0,
"shared uint wg_mem[specId0] = {};\n",
"for (uint i = 0; i < specId0; ++i) {\n"
" a.a[i] = wg_mem[i];\n"
"}\n",
16,
{16},
},
{0,
"shared float wg_mem[specId0][specId1] = {};\n",
"for (uint i = 0; i < specId0; ++i) {\n"
" for (uint j = 0; j < specId1; ++j) {\n"
" uint idx = i * specId1 + j;\n"
" a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
" }\n"
"}\n",
32,
{4, 8},
},
{0,
"struct Sa {\n"
" uint a;\n"
" uvec2 b;\n"
" uvec3 c;\n"
" uvec4 d;\n"
" float e;\n"
" vec2 f;\n"
" vec3 g;\n"
" vec4 h;\n"
" bool i;\n"
" bvec2 j;\n"
" bvec3 k;\n"
" bvec4 l;\n"
"};\n"
"shared Sa wg_mem = {};\n",
"uint i = 0;\n"
"a.a[i++] = wg_mem.a;\n"
"a.a[i++] = wg_mem.b.x;\n"
"a.a[i++] = wg_mem.b.y;\n"
"a.a[i++] = wg_mem.c.x;\n"
"a.a[i++] = wg_mem.c.y;\n"
"a.a[i++] = wg_mem.c.z;\n"
"a.a[i++] = wg_mem.d.x;\n"
"a.a[i++] = wg_mem.d.y;\n"
"a.a[i++] = wg_mem.d.z;\n"
"a.a[i++] = wg_mem.d.w;\n"
"a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
"a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
"a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
"a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
"a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
"a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
"a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
"a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
"a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
"a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
"a.a[i++] = wg_mem.i ? 1 : 0;\n"
"a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
"a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
"a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
"a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
"a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
"a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
"a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
"a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
"a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
30,
{},
},
{0,
"struct Sa {\n"
" uint a;\n"
"};\n"
"struct Sb {\n"
" uvec2 a;\n"
"};\n"
"struct Sc {\n"
" Sa a[specId0];\n"
" Sb b[specId1];\n"
"};\n"
"shared Sc wg_mem[specId2] = {};\n",
"uint idx = 0;\n"
"for (uint i = 0; i < specId2; ++i) {\n"
" for (uint j = 0; j < specId0; ++j) {\n"
" a.a[idx++] = wg_mem[i].a[j].a;\n"
" }\n"
" for (uint j = 0; j < specId1; ++j) {\n"
" a.a[idx++] = wg_mem[i].b[j].a.x;\n"
" a.a[idx++] = wg_mem[i].b[j].a.y;\n"
" }\n"
"}\n",
32,
{2,3,4},
},
{1,
"struct Sa {\n"
" f16vec2 a;\n"
" float16_t b[specId0];\n"
"};\n"
"shared Sa wg_mem = {};\n",
"uint idx = 0;\n"
"a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
"a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
"for (uint i = 0; i < specId0; ++i) {\n"
" a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
"}\n",
18,
{16},
},
{2,
"struct Sa {\n"
" f64vec2 a;\n"
" float64_t b[specId0];\n"
"};\n"
"shared Sa wg_mem = {};\n",
"uint idx = 0;\n"
"a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
"a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
"for (uint i = 0; i < specId0; ++i) {\n"
" a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
"}\n",
7,
{5},
},
{4,
"struct Sa {\n"
" i8vec2 a;\n"
" int8_t b[specId0];\n"
"};\n"
"shared Sa wg_mem = {};\n",
"uint idx = 0;\n"
"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
"for (uint i = 0; i < specId0; ++i) {\n"
" a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
"}\n",
34,
{32},
},
{8,
"struct Sa {\n"
" i16vec2 a;\n"
" int16_t b[specId0];\n"
"};\n"
"shared Sa wg_mem = {};\n",
"uint idx = 0;\n"
"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
"for (uint i = 0; i < specId0; ++i) {\n"
" a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
"}\n",
122,
{120},
},
{16,
"struct Sa {\n"
" i64vec2 a;\n"
" int64_t b[specId0];\n"
"};\n"
"shared Sa wg_mem = {};\n",
"uint idx = 0;\n"
"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
"for (uint i = 0; i < specId0; ++i) {\n"
" a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
"}\n",
63,
{61},
},
{0x1f,
"struct Sa {\n"
" float16_t a;\n"
" float b;\n"
" int8_t c;\n"
" int16_t d;\n"
" int e;\n"
" int64_t f;\n"
" float64_t g;\n"
"};\n"
"shared Sa wg_mem = {};\n",
"uint idx = 0;\n"
"a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
"a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
"a.a[idx++] = uint(wg_mem.c);\n"
"a.a[idx++] = uint(wg_mem.d);\n"
"a.a[idx++] = uint(wg_mem.e);\n"
"a.a[idx++] = uint(wg_mem.f);\n"
"a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
7,
{},
},
{0,
"struct Sa {\n"
" uint a;\n"
"};\n"
"struct Sb {\n"
" Sa a[specId0];\n"
" uint b;\n"
"};\n"
"struct Sc {\n"
" Sb b[specId1];\n"
" uint c;\n"
"};\n"
"struct Sd {\n"
" Sc c[specId2];\n"
" uint d;\n"
"};\n"
"struct Se {\n"
" Sd d[specId3];\n"
" uint e;\n"
"};\n"
"shared Se wg_mem[specId4] = {};\n",
"uint idx = 0;\n"
"for (uint i1 = 0; i1 < specId4; ++i1) {\n"
" a.a[idx++] = wg_mem[i1].e;\n"
" for (uint i2 = 0; i2 < specId3; ++i2) {\n"
" a.a[idx++] = wg_mem[i1].d[i2].d;\n"
" for (uint i3 = 0; i3 < specId2; ++i3) {\n"
" a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
" for (uint i4 = 0; i4 < specId1; ++i4) {\n"
" a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
" for (uint i5 = 0; i5 < specId0; ++i5) {\n"
" a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
" }\n"
" }\n"
" }\n"
" }\n"
"}\n",
872,
{6,5,4,3,2},
},
};
for (deUint32 i = 0; i < cases.size(); ++i)
{
group->addChild(
new CompositeTest(group->getTestContext(), de::toString(i), de::toString(i), cases[i]));
}
}
enum Dim {
DimX,
DimY,
DimZ,
};
class MaxWorkgroupsInstance : public vkt::TestInstance
{
public:
MaxWorkgroupsInstance(Context &context, Dim dim)
: TestInstance(context),
m_dim(dim)
{
}
tcu::TestStatus iterate(void);
private:
Dim m_dim;
};
class MaxWorkgroupsTest : public vkt::TestCase
{
public:
MaxWorkgroupsTest(tcu::TestContext& testCtx,
const std::string& name,
const std::string& description,
Dim dim)
: TestCase(testCtx, name, description),
m_dim(dim)
{
}
void initPrograms(SourceCollections& sourceCollections) const;
TestInstance* createInstance(Context& context) const
{
return new MaxWorkgroupsInstance(context, m_dim);
}
virtual void checkSupport(Context& context) const;
private:
Dim m_dim;
};
void MaxWorkgroupsTest::checkSupport(Context& context) const
{
context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
}
void MaxWorkgroupsTest::initPrograms(SourceCollections& sourceCollections) const
{
std::ostringstream src;
src << "#version 450\n";
src << "#extension GL_EXT_null_initializer : enable\n";
src << "\n";
src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
src << "shared uint wg_mem[2] = {};\n";
std::string dim;
switch (m_dim) {
case DimX:
dim = "x";
break;
case DimY:
dim = "y";
break;
case DimZ:
dim = "z";
break;
}
src << "\n";
src << "void main() {\n";
src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
src << " uint idx_x = gl_LocalInvocationID.x;\n";
src << " uint idx = idx_x + idx_y + idx_z;\n";
src << " if (gl_LocalInvocationID.x == 0) {\n";
src << " wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
src << " }\n";
src << " barrier();\n";
src << " atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
src << "}\n";
sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
}
tcu::TestStatus MaxWorkgroupsInstance::iterate(void)
{
VkPhysicalDeviceProperties properties;
deMemset(&properties, 0, sizeof(properties));
m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
const deUint32 maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
deUint32 wgx = properties.limits.maxComputeWorkGroupSize[0];
deUint32 wgy = 1;
deUint32 wgz = 1;
if (wgx < maxWG)
{
wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
}
if ((wgx * wgy) < maxWG)
{
wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
}
deUint32 size = (deUint32)sizeof(deUint32) * wgx * wgy * wgz;
deUint32 num_wgx = m_dim == DimX ? 65535 : 1;
deUint32 num_wgy = m_dim == DimY ? 65535 : 1;
deUint32 num_wgz = m_dim == DimZ ? 65535 : 1;
return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, {wgx, wgy, wgz}, /*increment*/ 1);
}
void AddMaxWorkgroupsTests(tcu::TestCaseGroup* group)
{
group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", "max x dim workgroups", DimX));
group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", "max y dim workgroups", DimY));
group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", "max z dim workgroups", DimZ));
}
class SpecializeWorkgroupInstance : public vkt::TestInstance
{
public:
SpecializeWorkgroupInstance(Context &context, deUint32 x, deUint32 y, deUint32 z)
: TestInstance(context),
m_x(x),
m_y(y),
m_z(z)
{
}
tcu::TestStatus iterate(void);
private:
deUint32 m_x;
deUint32 m_y;
deUint32 m_z;
};
class SpecializeWorkgroupTest : public vkt::TestCase
{
public:
SpecializeWorkgroupTest(tcu::TestContext& testCtx,
const std::string& name,
const std::string& description,
deUint32 x, deUint32 y, deUint32 z)
: TestCase(testCtx, name, description),
m_x(x),
m_y(y),
m_z(z)
{
}
void initPrograms(SourceCollections& sourceCollections) const;
TestInstance* createInstance(Context& context) const
{
return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z);
}
virtual void checkSupport(Context& context) const;
private:
deUint32 m_x;
deUint32 m_y;
deUint32 m_z;
};
void SpecializeWorkgroupTest::checkSupport(Context& context) const
{
context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
VkPhysicalDeviceProperties properties;
deMemset(&properties, 0, sizeof(properties));
context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
}
void SpecializeWorkgroupTest::initPrograms(SourceCollections& sourceCollections) const
{
std::ostringstream src;
src << "#version 450\n";
src << "#extension GL_EXT_null_initializer : enable\n";
src << "\n";
src << "layout(constant_id = 0) const uint WGX = 1;\n";
src << "layout(constant_id = 1) const uint WGY = 1;\n";
src << "layout(constant_id = 2) const uint WGZ = 1;\n";
src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
src << "\n";
src << "void main() {\n";
src << " a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
src << "}\n";
sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
}
tcu::TestStatus SpecializeWorkgroupInstance::iterate(void)
{
const deUint32 size = (deUint32)sizeof(deUint32) * m_x * m_y * m_z;
return runCompute(m_context, size, 1, 1, 1, {m_x, m_y, m_z});
}
void AddSpecializeWorkgroupTests(tcu::TestCaseGroup* group)
{
for (deUint32 z = 1; z <= 8; ++z)
{
for (deUint32 y = 1; y <= 8; ++y)
{
for (deUint32 x = 1; x <= 8; ++x)
{
group->addChild(new SpecializeWorkgroupTest(group->getTestContext(),
de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
x, y, z));
}
}
}
}
class RepeatedPipelineInstance : public vkt::TestInstance
{
public:
RepeatedPipelineInstance(Context& context, deUint32 xSize, deUint32 repeat, deUint32 odd)
: TestInstance(context),
m_xSize(xSize),
m_repeat(repeat),
m_odd(odd)
{
}
tcu::TestStatus iterate(void);
private:
deUint32 m_xSize;
deUint32 m_repeat;
deUint32 m_odd;
};
class RepeatedPipelineTest : public vkt::TestCase
{
public:
RepeatedPipelineTest(tcu::TestContext& testCtx,
const std::string& name,
const std::string& description,
deUint32 xSize, deUint32 repeat, deUint32 odd)
: TestCase(testCtx, name, description),
m_xSize(xSize),
m_repeat(repeat),
m_odd(odd)
{
}
void initPrograms(SourceCollections& sourceCollections) const;
TestInstance* createInstance(Context& context) const
{
return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
}
virtual void checkSupport(Context& context) const;
private:
deUint32 m_xSize;
deUint32 m_repeat;
deUint32 m_odd;
};
void RepeatedPipelineTest::checkSupport(Context& context) const
{
context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
}
void RepeatedPipelineTest::initPrograms(SourceCollections& sourceCollections) const
{
std::ostringstream src;
src << "#version 450\n";
src << "#extension GL_EXT_null_initializer : enable\n";
src << "\n";
src << "layout(constant_id = 0) const uint WGX = 1;\n";
src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
src << "\n";
src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
src << "\n";
src << "shared uint wg_mem[WGX][2] = {};\n";
src << "void main() {\n";
src << " if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
src << " wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x];\n";
src << " }\n";
src << " barrier();\n";
src << " a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
src << "}\n";
sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
}
tcu::TestStatus RepeatedPipelineInstance::iterate(void)
{
Context& context = m_context;
const deUint32 bufferSize = m_xSize * 2 * (deUint32)sizeof(deUint32);
const deUint32 numBuffers = 2;
const DeviceInterface& vk = context.getDeviceInterface();
const VkDevice device = context.getDevice();
Allocator& allocator = context.getDefaultAllocator();
tcu::TestLog& log = context.getTestContext().getLog();
de::MovePtr<BufferWithMemory> buffers[numBuffers];
VkDescriptorBufferInfo bufferDescriptors[numBuffers];
VkDeviceSize size = bufferSize;
for (deUint32 i = 0; i < numBuffers; ++i)
{
buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
MemoryRequirement::HostVisible | MemoryRequirement::Cached));
bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
}
deUint32* ptrs[numBuffers];
for (deUint32 i = 0; i < numBuffers; ++i)
{
ptrs[i] = (deUint32*)buffers[i]->getAllocation().getHostPtr();
}
for (deUint32 i = 0; i < bufferSize / sizeof(deUint32); ++i)
{
ptrs[1][i] = i;
}
deMemset(ptrs[0], 0xff, (size_t)size);
DescriptorSetLayoutBuilder layoutBuilder;
for (deUint32 i = 0; i < numBuffers; ++i)
{
layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
}
Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
const deUint32 specData[1] =
{
m_xSize,
};
const vk::VkSpecializationMapEntry entries[1] =
{
{0, (deUint32)(sizeof(deUint32) * 0), sizeof(deUint32)},
};
const vk::VkSpecializationInfo specInfo =
{
1,
entries,
sizeof(specData),
specData
};
const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
{
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
DE_NULL,
(VkPipelineLayoutCreateFlags)0,
1,
&descriptorSetLayout.get(),
0u,
DE_NULL,
};
Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
for (deUint32 i = 0; i < numBuffers; ++i)
{
flushAlloc(vk, device, buffers[i]->getAllocation());
}
const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
const VkPipelineShaderStageCreateInfo shaderInfo =
{
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
DE_NULL,
0,
VK_SHADER_STAGE_COMPUTE_BIT,
*shader,
"main",
&specInfo,
};
const VkComputePipelineCreateInfo pipelineInfo =
{
VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
DE_NULL,
0u,
shaderInfo,
*pipelineLayout,
(VkPipeline)0,
0u,
};
Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
const VkQueue queue = context.getUniversalQueue();
Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
context.getUniversalQueueFamilyIndex());
Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
DescriptorSetUpdateBuilder setUpdateBuilder;
for (deUint32 i = 0; i < numBuffers; ++i)
{
setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
}
setUpdateBuilder.update(vk, device);
beginCommandBuffer(vk, *cmdBuffer, 0);
vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
endCommandBuffer(vk, *cmdBuffer);
for (deUint32 r = 0; r < m_repeat; ++r)
{
submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
invalidateAlloc(vk, device, buffers[0]->getAllocation());
for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
{
deUint32 expected = (m_odd == (i / m_xSize)) ? i : 0u;
if (ptrs[0][i] != expected)
{
log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
return tcu::TestStatus::fail("compute failed");
}
}
deMemset(ptrs[0], 0xff, (size_t)size);
flushAlloc(vk, device, buffers[0]->getAllocation());
setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
setUpdateBuilder.update(vk, device);
}
return tcu::TestStatus::pass("compute succeeded");
}
void AddRepeatedPipelineTests(tcu::TestCaseGroup* group)
{
std::vector<deUint32> xSizes = {4, 16, 32, 64};
std::vector<deUint32> odds = {0, 1};
std::vector<deUint32> repeats = {2, 4, 8, 16};
for (deUint32 i = 0; i < xSizes.size(); ++i)
{
deUint32 x = xSizes[i];
for (deUint32 j = 0; j < odds.size(); ++j)
{
deUint32 odd = odds[j];
for (deUint32 k = 0; k < repeats.size(); ++k)
{
deUint32 repeat = repeats[k];
group->addChild(new RepeatedPipelineTest(group->getTestContext(),
std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
x, odd, repeat));
}
}
}
}
} // anonymous
tcu::TestCaseGroup* createZeroInitializeWorkgroupMemoryTests(tcu::TestContext& testCtx)
{
de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory", "VK_KHR_zero_intialize_workgroup_memory tests"));
tcu::TestCaseGroup* maxWorkgroupMemoryGroup =
new tcu::TestCaseGroup(testCtx, "max_workgroup_memory", "Read initialization of max workgroup memory");
AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup);
tests->addChild(maxWorkgroupMemoryGroup);
tcu::TestCaseGroup* typeGroup = new tcu::TestCaseGroup(testCtx, "types", "basic type tests");
AddTypeTests(typeGroup);
tests->addChild(typeGroup);
tcu::TestCaseGroup* compositeGroup = new tcu::TestCaseGroup(testCtx, "composites", "composite type tests");
AddCompositeTests(compositeGroup);
tests->addChild(compositeGroup);
tcu::TestCaseGroup* maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups", "max workgroups");
AddMaxWorkgroupsTests(maxWorkgroupsGroup);
tests->addChild(maxWorkgroupsGroup);
tcu::TestCaseGroup* specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup", "specialize workgroup size");
AddSpecializeWorkgroupTests(specializeWorkgroupGroup);
tests->addChild(specializeWorkgroupGroup);
tcu::TestCaseGroup* repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline", "repeated pipeline run");
AddRepeatedPipelineTests(repeatPipelineGroup);
tests->addChild(repeatPipelineGroup);
return tests.release();
}
} // compute
} // vkt