blob: a2de892ee93e243500a0635c8b41e77e94cc0d6e [file] [log] [blame]
/*------------------------------------------------------------------------
* Vulkan Conformance Tests
* ------------------------
*
* Copyright (c) 2015 The Khronos Group Inc.
* Copyright (c) 2017 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*//*!
* \file
* \brief Atomic operations (OpAtomic*) tests.
*//*--------------------------------------------------------------------*/
#include "vktAtomicOperationTests.hpp"
#include "vktShaderExecutor.hpp"
#include "vkRefUtil.hpp"
#include "vkMemUtil.hpp"
#include "vkQueryUtil.hpp"
#include "vkObjUtil.hpp"
#include "vkBarrierUtil.hpp"
#include "vkCmdUtil.hpp"
#include "vktTestGroupUtil.hpp"
#include "tcuTestLog.hpp"
#include "tcuStringTemplate.hpp"
#include "tcuResultCollector.hpp"
#include "deFloat16.h"
#include "deMath.hpp"
#include "deStringUtil.hpp"
#include "deSharedPtr.hpp"
#include "deRandom.hpp"
#include "deArrayUtil.hpp"
#include <string>
#include <memory>
#include <cmath>
namespace vkt
{
namespace shaderexecutor
{
namespace
{
using de::UniquePtr;
using de::MovePtr;
using std::vector;
using namespace vk;
enum class AtomicMemoryType
{
BUFFER = 0, // Normal buffer.
SHARED, // Shared global struct in a compute workgroup.
REFERENCE, // Buffer passed as a reference.
};
// Helper struct to indicate the shader type and if it should use shared global memory.
class AtomicShaderType
{
public:
AtomicShaderType (glu::ShaderType type, AtomicMemoryType memoryType)
: m_type (type)
, m_atomicMemoryType (memoryType)
{
// Shared global memory can only be set to true with compute shaders.
DE_ASSERT(memoryType != AtomicMemoryType::SHARED || type == glu::SHADERTYPE_COMPUTE);
}
glu::ShaderType getType (void) const { return m_type; }
AtomicMemoryType getMemoryType (void) const { return m_atomicMemoryType; }
private:
glu::ShaderType m_type;
AtomicMemoryType m_atomicMemoryType;
};
// Buffer helper
class Buffer
{
public:
Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef);
VkBuffer getBuffer (void) const { return *m_buffer; }
void* getHostPtr (void) const { return m_allocation->getHostPtr(); }
void flush (void);
void invalidate (void);
private:
const DeviceInterface& m_vkd;
const VkDevice m_device;
const VkQueue m_queue;
const deUint32 m_queueIndex;
const Unique<VkBuffer> m_buffer;
const UniquePtr<Allocation> m_allocation;
};
typedef de::SharedPtr<Buffer> BufferSp;
Move<VkBuffer> createBuffer (const DeviceInterface& vkd, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usageFlags)
{
const VkBufferCreateInfo createInfo =
{
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
DE_NULL,
(VkBufferCreateFlags)0,
size,
usageFlags,
VK_SHARING_MODE_EXCLUSIVE,
0u,
DE_NULL
};
return createBuffer(vkd, device, &createInfo);
}
MovePtr<Allocation> allocateAndBindMemory (const DeviceInterface& vkd, VkDevice device, Allocator& allocator, VkBuffer buffer, bool useRef)
{
const MemoryRequirement allocationType = (MemoryRequirement::HostVisible | (useRef ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any));
MovePtr<Allocation> alloc(allocator.allocate(getBufferMemoryRequirements(vkd, device, buffer), allocationType));
VK_CHECK(vkd.bindBufferMemory(device, buffer, alloc->getMemory(), alloc->getOffset()));
return alloc;
}
Buffer::Buffer (Context& context, VkBufferUsageFlags usage, size_t size, bool useRef)
: m_vkd (context.getDeviceInterface())
, m_device (context.getDevice())
, m_queue (context.getUniversalQueue())
, m_queueIndex (context.getUniversalQueueFamilyIndex())
, m_buffer (createBuffer (context.getDeviceInterface(),
context.getDevice(),
(VkDeviceSize)size,
usage))
, m_allocation (allocateAndBindMemory (context.getDeviceInterface(),
context.getDevice(),
context.getDefaultAllocator(),
*m_buffer,
useRef))
{
}
void Buffer::flush (void)
{
flushMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
}
void Buffer::invalidate (void)
{
const auto cmdPool = vk::makeCommandPool(m_vkd, m_device, m_queueIndex);
const auto cmdBufferPtr = vk::allocateCommandBuffer(m_vkd, m_device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
const auto cmdBuffer = cmdBufferPtr.get();
const auto bufferBarrier = vk::makeBufferMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_buffer.get(), 0ull, VK_WHOLE_SIZE);
beginCommandBuffer(m_vkd, cmdBuffer);
m_vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
endCommandBuffer(m_vkd, cmdBuffer);
submitCommandsAndWait(m_vkd, m_device, m_queue, cmdBuffer);
invalidateMappedMemoryRange(m_vkd, m_device, m_allocation->getMemory(), m_allocation->getOffset(), VK_WHOLE_SIZE);
}
// Tests
enum AtomicOperation
{
ATOMIC_OP_EXCHANGE = 0,
ATOMIC_OP_COMP_SWAP,
ATOMIC_OP_ADD,
ATOMIC_OP_MIN,
ATOMIC_OP_MAX,
ATOMIC_OP_AND,
ATOMIC_OP_OR,
ATOMIC_OP_XOR,
ATOMIC_OP_LAST
};
std::string atomicOp2Str (AtomicOperation op)
{
static const char* const s_names[] =
{
"atomicExchange",
"atomicCompSwap",
"atomicAdd",
"atomicMin",
"atomicMax",
"atomicAnd",
"atomicOr",
"atomicXor"
};
return de::getSizedArrayElement<ATOMIC_OP_LAST>(s_names, op);
}
enum
{
NUM_ELEMENTS = 32
};
enum DataType
{
DATA_TYPE_FLOAT16 = 0,
DATA_TYPE_INT32,
DATA_TYPE_UINT32,
DATA_TYPE_FLOAT32,
DATA_TYPE_INT64,
DATA_TYPE_UINT64,
DATA_TYPE_FLOAT64,
DATA_TYPE_LAST
};
std::string dataType2Str(DataType type)
{
static const char* const s_names[] =
{
"float16_t",
"int",
"uint",
"float",
"int64_t",
"uint64_t",
"double",
};
return de::getSizedArrayElement<DATA_TYPE_LAST>(s_names, type);
}
class BufferInterface
{
public:
virtual void setBuffer(void* ptr) = 0;
virtual size_t bufferSize() = 0;
virtual void fillWithTestData(de::Random &rnd) = 0;
virtual void checkResults(tcu::ResultCollector& resultCollector) = 0;
virtual ~BufferInterface() {}
};
template<typename dataTypeT>
class TestBuffer : public BufferInterface
{
public:
TestBuffer(AtomicOperation atomicOp)
: m_atomicOp(atomicOp)
{}
template<typename T>
struct BufferData
{
// Use half the number of elements for inout to cause overlap between atomic operations.
// Each inout element at index i will have two atomic operations using input from
// indices i and i + NUM_ELEMENTS / 2.
T inout[NUM_ELEMENTS / 2];
T input[NUM_ELEMENTS];
T compare[NUM_ELEMENTS];
T output[NUM_ELEMENTS];
T invocationHitCount[NUM_ELEMENTS];
deInt32 index;
};
virtual void setBuffer(void* ptr)
{
m_ptr = static_cast<BufferData<dataTypeT>*>(ptr);
}
virtual size_t bufferSize()
{
return sizeof(BufferData<dataTypeT>);
}
virtual void fillWithTestData(de::Random &rnd)
{
dataTypeT pattern;
deMemset(&pattern, 0xcd, sizeof(dataTypeT));
for (int i = 0; i < NUM_ELEMENTS / 2; i++)
{
m_ptr->inout[i] = static_cast<dataTypeT>(rnd.getUint64());
// The first half of compare elements match with every even index.
// The second half matches with odd indices. This causes the
// overlapping operations to only select one.
m_ptr->compare[i] = m_ptr->inout[i] + (i % 2);
m_ptr->compare[i + NUM_ELEMENTS / 2] = m_ptr->inout[i] + 1 - (i % 2);
}
for (int i = 0; i < NUM_ELEMENTS; i++)
{
m_ptr->input[i] = static_cast<dataTypeT>(rnd.getUint64());
m_ptr->output[i] = pattern;
m_ptr->invocationHitCount[i] = 0;
}
m_ptr->index = 0;
// Take a copy to be used when calculating expected values.
m_original = *m_ptr;
}
virtual void checkResults(tcu::ResultCollector& resultCollector)
{
checkOperation(m_original, *m_ptr, resultCollector);
}
template<typename T>
struct Expected
{
T m_inout;
T m_output[2];
Expected (T inout, T output0, T output1)
: m_inout(inout)
{
m_output[0] = output0;
m_output[1] = output1;
}
bool compare (T inout, T output0, T output1)
{
return (deMemCmp((const void*)&m_inout, (const void*)&inout, sizeof(inout)) == 0
&& deMemCmp((const void*)&m_output[0], (const void*)&output0, sizeof(output0)) == 0
&& deMemCmp((const void*)&m_output[1], (const void*)&output1, sizeof(output1)) == 0);
}
};
void checkOperation (const BufferData<dataTypeT>& original,
const BufferData<dataTypeT>& result,
tcu::ResultCollector& resultCollector);
const AtomicOperation m_atomicOp;
BufferData<dataTypeT>* m_ptr;
BufferData<dataTypeT> m_original;
};
template<typename T>
bool nanSafeSloppyEquals(T x, T y)
{
if (deIsIEEENaN(x) && deIsIEEENaN(y))
return true;
if (deIsIEEENaN(x) || deIsIEEENaN(y))
return false;
return fabs(deToDouble(x) - deToDouble(y)) < 0.00001;
}
template<typename dataTypeT>
class TestBufferFloatingPoint : public BufferInterface
{
public:
TestBufferFloatingPoint(AtomicOperation atomicOp)
: m_atomicOp(atomicOp)
{}
template<typename T>
struct BufferDataFloatingPoint
{
// Use half the number of elements for inout to cause overlap between atomic operations.
// Each inout element at index i will have two atomic operations using input from
// indices i and i + NUM_ELEMENTS / 2.
T inout[NUM_ELEMENTS / 2];
T input[NUM_ELEMENTS];
T compare[NUM_ELEMENTS];
T output[NUM_ELEMENTS];
deInt32 invocationHitCount[NUM_ELEMENTS];
deInt32 index;
};
virtual void setBuffer(void* ptr)
{
m_ptr = static_cast<BufferDataFloatingPoint<dataTypeT>*>(ptr);
}
virtual size_t bufferSize()
{
return sizeof(BufferDataFloatingPoint<dataTypeT>);
}
virtual void fillWithTestData(de::Random& rnd)
{
dataTypeT pattern;
deMemset(&pattern, 0xcd, sizeof(dataTypeT));
for (int i = 0; i < NUM_ELEMENTS / 2; i++)
{
m_ptr->inout[i] = deToFloatType<dataTypeT>(rnd.getFloat());
// These aren't used by any of the float tests
m_ptr->compare[i] = deToFloatType<dataTypeT>(0.0);
}
// Add special cases for NaN and +/-0
// 0: min(sNaN, x)
m_ptr->inout[0] = deSignalingNaN<dataTypeT>();
// 1: min(x, sNaN)
m_ptr->input[1 * 2 + 0] = deSignalingNaN<dataTypeT>();
// 2: min(qNaN, x)
m_ptr->inout[2] = deQuietNaN<dataTypeT>();
// 3: min(x, qNaN)
m_ptr->input[3 * 2 + 0] = deQuietNaN<dataTypeT>();
// 4: min(NaN, NaN)
m_ptr->inout[4] = deSignalingNaN<dataTypeT>();
m_ptr->input[4 * 2 + 0] = deQuietNaN<dataTypeT>();
m_ptr->input[4 * 2 + 1] = deQuietNaN<dataTypeT>();
// 5: min(+0, -0)
m_ptr->inout[5] = deToFloatType<dataTypeT>(-0.0);
m_ptr->input[5 * 2 + 0] = deToFloatType<dataTypeT>(0.0);
m_ptr->input[5 * 2 + 1] = deToFloatType<dataTypeT>(0.0);
for (int i = 0; i < NUM_ELEMENTS; i++)
{
m_ptr->input[i] = deToFloatType<dataTypeT>(rnd.getFloat());
m_ptr->output[i] = pattern;
m_ptr->invocationHitCount[i] = 0;
}
m_ptr->index = 0;
// Take a copy to be used when calculating expected values.
m_original = *m_ptr;
}
virtual void checkResults(tcu::ResultCollector& resultCollector)
{
checkOperationFloatingPoint(m_original, *m_ptr, resultCollector);
}
template<typename T>
struct Expected
{
T m_inout;
T m_output[2];
Expected(T inout, T output0, T output1)
: m_inout(inout)
{
m_output[0] = output0;
m_output[1] = output1;
}
bool compare(T inout, T output0, T output1)
{
return nanSafeSloppyEquals(m_inout, inout) &&
nanSafeSloppyEquals(m_output[0], output0) &&
nanSafeSloppyEquals(m_output[1], output1);
}
};
void checkOperationFloatingPoint(const BufferDataFloatingPoint<dataTypeT>& original,
const BufferDataFloatingPoint<dataTypeT>& result,
tcu::ResultCollector& resultCollector);
const AtomicOperation m_atomicOp;
BufferDataFloatingPoint<dataTypeT>* m_ptr;
BufferDataFloatingPoint<dataTypeT> m_original;
};
static BufferInterface* createTestBuffer(DataType type, AtomicOperation atomicOp)
{
switch (type)
{
case DATA_TYPE_FLOAT16:
return new TestBufferFloatingPoint<deFloat16>(atomicOp);
case DATA_TYPE_INT32:
return new TestBuffer<deInt32>(atomicOp);
case DATA_TYPE_UINT32:
return new TestBuffer<deUint32>(atomicOp);
case DATA_TYPE_FLOAT32:
return new TestBufferFloatingPoint<float>(atomicOp);
case DATA_TYPE_INT64:
return new TestBuffer<deInt64>(atomicOp);
case DATA_TYPE_UINT64:
return new TestBuffer<deUint64>(atomicOp);
case DATA_TYPE_FLOAT64:
return new TestBufferFloatingPoint<double>(atomicOp);
default:
DE_ASSERT(false);
return DE_NULL;
}
}
// Use template to handle both signed and unsigned cases. SPIR-V should
// have separate operations for both.
template<typename T>
void TestBuffer<T>::checkOperation (const BufferData<T>& original,
const BufferData<T>& result,
tcu::ResultCollector& resultCollector)
{
// originalInout = original inout
// input0 = input at index i
// iinput1 = input at index i + NUM_ELEMENTS / 2
//
// atomic operation will return the memory contents before
// the operation and this is stored as output. Two operations
// are executed for each InOut value (using input0 and input1).
//
// Since there is an overlap of two operations per each
// InOut element, the outcome of the resulting InOut and
// the outputs of the operations have two result candidates
// depending on the execution order. Verification passes
// if the results match one of these options.
for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
{
// Needed when reinterpeting the data as signed values.
const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
// Expected results are collected to this vector.
vector<Expected<T> > exp;
switch (m_atomicOp)
{
case ATOMIC_OP_ADD:
{
exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout, originalInout + input0));
exp.push_back(Expected<T>(originalInout + input0 + input1, originalInout + input1, originalInout));
}
break;
case ATOMIC_OP_AND:
{
exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout, originalInout & input0));
exp.push_back(Expected<T>(originalInout & input0 & input1, originalInout & input1, originalInout));
}
break;
case ATOMIC_OP_OR:
{
exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout, originalInout | input0));
exp.push_back(Expected<T>(originalInout | input0 | input1, originalInout | input1, originalInout));
}
break;
case ATOMIC_OP_XOR:
{
exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout, originalInout ^ input0));
exp.push_back(Expected<T>(originalInout ^ input0 ^ input1, originalInout ^ input1, originalInout));
}
break;
case ATOMIC_OP_MIN:
{
exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), originalInout, de::min(originalInout, input0)));
exp.push_back(Expected<T>(de::min(de::min(originalInout, input0), input1), de::min(originalInout, input1), originalInout));
}
break;
case ATOMIC_OP_MAX:
{
exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), originalInout, de::max(originalInout, input0)));
exp.push_back(Expected<T>(de::max(de::max(originalInout, input0), input1), de::max(originalInout, input1), originalInout));
}
break;
case ATOMIC_OP_EXCHANGE:
{
exp.push_back(Expected<T>(input1, originalInout, input0));
exp.push_back(Expected<T>(input0, input1, originalInout));
}
break;
case ATOMIC_OP_COMP_SWAP:
{
if (elementNdx % 2 == 0)
{
exp.push_back(Expected<T>(input0, originalInout, input0));
exp.push_back(Expected<T>(input0, originalInout, originalInout));
}
else
{
exp.push_back(Expected<T>(input1, input1, originalInout));
exp.push_back(Expected<T>(input1, originalInout, originalInout));
}
}
break;
default:
DE_FATAL("Unexpected atomic operation.");
break;
}
const T resIo = result.inout[elementNdx];
const T resOutput0 = result.output[elementNdx];
const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
if (!exp[0].compare(resIo, resOutput0, resOutput1) && !exp[1].compare(resIo, resOutput0, resOutput1))
{
std::ostringstream errorMessage;
errorMessage << "ERROR: Result value check failed at index " << elementNdx
<< ". Expected one of the two outcomes: InOut = " << tcu::toHex(exp[0].m_inout)
<< ", Output0 = " << tcu::toHex(exp[0].m_output[0]) << ", Output1 = "
<< tcu::toHex(exp[0].m_output[1]) << ", or InOut = " << tcu::toHex(exp[1].m_inout)
<< ", Output0 = " << tcu::toHex(exp[1].m_output[0]) << ", Output1 = "
<< tcu::toHex(exp[1].m_output[1]) << ". Got: InOut = " << tcu::toHex(resIo)
<< ", Output0 = " << tcu::toHex(resOutput0) << ", Output1 = "
<< tcu::toHex(resOutput1) << ". Using Input0 = " << tcu::toHex(original.input[elementNdx])
<< " and Input1 = " << tcu::toHex(original.input[elementNdx + NUM_ELEMENTS / 2]) << ".";
resultCollector.fail(errorMessage.str());
}
}
}
template<typename T>
void handleExceptionalFloatMinMaxValues(vector<T> &values, T x, T y)
{
if (deIsSignalingNaN(x) && deIsSignalingNaN(y))
{
values.push_back(deQuietNaN<T>());
values.push_back(deSignalingNaN<T>());
}
else if (deIsSignalingNaN(x))
{
values.push_back(deQuietNaN<T>());
values.push_back(deSignalingNaN<T>());
if (!deIsIEEENaN(y))
values.push_back(y);
}
else if (deIsSignalingNaN(y))
{
values.push_back(deQuietNaN<T>());
values.push_back(deSignalingNaN<T>());
if (!deIsIEEENaN(x))
values.push_back(x);
}
else if (deIsIEEENaN(x) && deIsIEEENaN(y))
{
// Both quiet NaNs
values.push_back(deQuietNaN<T>());
}
else if (deIsIEEENaN(x))
{
// One quiet NaN and one non-NaN.
values.push_back(y);
}
else if (deIsIEEENaN(y))
{
// One quiet NaN and one non-NaN.
values.push_back(x);
}
else if ((deIsPositiveZero(x) && deIsNegativeZero(y)) || (deIsNegativeZero(x) && deIsPositiveZero(y)))
{
values.push_back(deToFloatType<T>(0.0));
values.push_back(deToFloatType<T>(-0.0));
}
}
template<typename T>
T floatAdd(T x, T y)
{
if (deIsIEEENaN(x) || deIsIEEENaN(y))
return deQuietNaN<T>();
return deToFloatType<T>(deToDouble(x) + deToDouble(y));
}
template<typename T>
vector<T> floatMinValues(T x, T y)
{
vector<T> values;
handleExceptionalFloatMinMaxValues(values, x, y);
if (values.empty())
{
values.push_back(deToDouble(x) < deToDouble(y) ? x : y);
}
return values;
}
template<typename T>
vector<T> floatMaxValues(T x, T y)
{
vector<T> values;
handleExceptionalFloatMinMaxValues(values, x, y);
if (values.empty())
{
values.push_back(deToDouble(x) > deToDouble(y) ? x : y);
}
return values;
}
// Use template to handle both float and double cases. SPIR-V should
// have separate operations for both.
template<typename T>
void TestBufferFloatingPoint<T>::checkOperationFloatingPoint(const BufferDataFloatingPoint<T>& original,
const BufferDataFloatingPoint<T>& result,
tcu::ResultCollector& resultCollector)
{
// originalInout = original inout
// input0 = input at index i
// iinput1 = input at index i + NUM_ELEMENTS / 2
//
// atomic operation will return the memory contents before
// the operation and this is stored as output. Two operations
// are executed for each InOut value (using input0 and input1).
//
// Since there is an overlap of two operations per each
// InOut element, the outcome of the resulting InOut and
// the outputs of the operations have two result candidates
// depending on the execution order. Verification passes
// if the results match one of these options.
for (int elementNdx = 0; elementNdx < NUM_ELEMENTS / 2; elementNdx++)
{
// Needed when reinterpeting the data as signed values.
const T originalInout = *reinterpret_cast<const T*>(&original.inout[elementNdx]);
const T input0 = *reinterpret_cast<const T*>(&original.input[elementNdx]);
const T input1 = *reinterpret_cast<const T*>(&original.input[elementNdx + NUM_ELEMENTS / 2]);
// Expected results are collected to this vector.
vector<Expected<T> > exp;
switch (m_atomicOp)
{
case ATOMIC_OP_ADD:
{
exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), originalInout, floatAdd(originalInout, input0)));
exp.push_back(Expected<T>(floatAdd(floatAdd(originalInout, input0), input1), floatAdd(originalInout, input1), originalInout));
}
break;
case ATOMIC_OP_MIN:
{
// The case where input0 is combined first
vector<T> minOriginalAndInput0 = floatMinValues(originalInout, input0);
for (T x : minOriginalAndInput0)
{
vector<T> minAll = floatMinValues(x, input1);
for (T y : minAll)
{
exp.push_back(Expected<T>(y, originalInout, x));
}
}
// The case where input1 is combined first
vector<T> minOriginalAndInput1 = floatMinValues(originalInout, input1);
for (T x : minOriginalAndInput1)
{
vector<T> minAll = floatMinValues(x, input0);
for (T y : minAll)
{
exp.push_back(Expected<T>(y, x, originalInout));
}
}
}
break;
case ATOMIC_OP_MAX:
{
// The case where input0 is combined first
vector<T> minOriginalAndInput0 = floatMaxValues(originalInout, input0);
for (T x : minOriginalAndInput0)
{
vector<T> minAll = floatMaxValues(x, input1);
for (T y : minAll)
{
exp.push_back(Expected<T>(y, originalInout, x));
}
}
// The case where input1 is combined first
vector<T> minOriginalAndInput1 = floatMaxValues(originalInout, input1);
for (T x : minOriginalAndInput1)
{
vector<T> minAll = floatMaxValues(x, input0);
for (T y : minAll)
{
exp.push_back(Expected<T>(y, x, originalInout));
}
}
}
break;
case ATOMIC_OP_EXCHANGE:
{
exp.push_back(Expected<T>(input1, originalInout, input0));
exp.push_back(Expected<T>(input0, input1, originalInout));
}
break;
default:
DE_FATAL("Unexpected atomic operation.");
break;
}
const T resIo = result.inout[elementNdx];
const T resOutput0 = result.output[elementNdx];
const T resOutput1 = result.output[elementNdx + NUM_ELEMENTS / 2];
bool hasMatch = false;
for (Expected<T> e : exp)
{
if (e.compare(resIo, resOutput0, resOutput1))
{
hasMatch = true;
break;
}
}
if (!hasMatch)
{
std::ostringstream errorMessage;
errorMessage << "ERROR: Result value check failed at index " << elementNdx
<< ". Expected one of the outcomes:";
bool first = true;
for (Expected<T> e : exp)
{
if (!first)
errorMessage << ", or";
first = false;
errorMessage << " InOut = " << e.m_inout
<< ", Output0 = " << e.m_output[0]
<< ", Output1 = " << e.m_output[1];
}
errorMessage << ". Got: InOut = " << resIo
<< ", Output0 = " << resOutput0
<< ", Output1 = " << resOutput1
<< ". Using Input0 = " << original.input[elementNdx]
<< " and Input1 = " << original.input[elementNdx + NUM_ELEMENTS / 2] << ".";
resultCollector.fail(errorMessage.str());
}
}
}
class AtomicOperationCaseInstance : public TestInstance
{
public:
AtomicOperationCaseInstance (Context& context,
const ShaderSpec& shaderSpec,
AtomicShaderType shaderType,
DataType dataType,
AtomicOperation atomicOp);
virtual tcu::TestStatus iterate (void);
private:
const ShaderSpec& m_shaderSpec;
AtomicShaderType m_shaderType;
const DataType m_dataType;
AtomicOperation m_atomicOp;
};
AtomicOperationCaseInstance::AtomicOperationCaseInstance (Context& context,
const ShaderSpec& shaderSpec,
AtomicShaderType shaderType,
DataType dataType,
AtomicOperation atomicOp)
: TestInstance (context)
, m_shaderSpec (shaderSpec)
, m_shaderType (shaderType)
, m_dataType (dataType)
, m_atomicOp (atomicOp)
{
}
tcu::TestStatus AtomicOperationCaseInstance::iterate(void)
{
de::UniquePtr<BufferInterface> testBuffer (createTestBuffer(m_dataType, m_atomicOp));
tcu::TestLog& log = m_context.getTestContext().getLog();
const DeviceInterface& vkd = m_context.getDeviceInterface();
const VkDevice device = m_context.getDevice();
de::Random rnd (0x62a15e34);
const bool useRef = (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE);
const VkDescriptorType descType = (useRef ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
const VkBufferUsageFlags usageFlags = (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | (useRef ? static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : 0u));
// The main buffer will hold test data. When using buffer references, the buffer's address will be indirectly passed as part of
// a uniform buffer. If not, it will be passed directly as a descriptor.
Buffer buffer (m_context, usageFlags, testBuffer->bufferSize(), useRef);
std::unique_ptr<Buffer> auxBuffer;
if (useRef)
{
// Pass the main buffer address inside a uniform buffer.
const VkBufferDeviceAddressInfo addressInfo =
{
VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
nullptr, // const void* pNext;
buffer.getBuffer(), // VkBuffer buffer;
};
const auto address = vkd.getBufferDeviceAddress(device, &addressInfo);
auxBuffer.reset(new Buffer(m_context, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, sizeof(address), false));
deMemcpy(auxBuffer->getHostPtr(), &address, sizeof(address));
auxBuffer->flush();
}
testBuffer->setBuffer(buffer.getHostPtr());
testBuffer->fillWithTestData(rnd);
buffer.flush();
Move<VkDescriptorSetLayout> extraResourcesLayout;
Move<VkDescriptorPool> extraResourcesSetPool;
Move<VkDescriptorSet> extraResourcesSet;
const VkDescriptorSetLayoutBinding bindings[] =
{
{ 0u, descType, 1, VK_SHADER_STAGE_ALL, DE_NULL }
};
const VkDescriptorSetLayoutCreateInfo layoutInfo =
{
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
DE_NULL,
(VkDescriptorSetLayoutCreateFlags)0u,
DE_LENGTH_OF_ARRAY(bindings),
bindings
};
extraResourcesLayout = createDescriptorSetLayout(vkd, device, &layoutInfo);
const VkDescriptorPoolSize poolSizes[] =
{
{ descType, 1u }
};
const VkDescriptorPoolCreateInfo poolInfo =
{
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
DE_NULL,
(VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
1u, // maxSets
DE_LENGTH_OF_ARRAY(poolSizes),
poolSizes
};
extraResourcesSetPool = createDescriptorPool(vkd, device, &poolInfo);
const VkDescriptorSetAllocateInfo allocInfo =
{
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
DE_NULL,
*extraResourcesSetPool,
1u,
&extraResourcesLayout.get()
};
extraResourcesSet = allocateDescriptorSet(vkd, device, &allocInfo);
VkDescriptorBufferInfo bufferInfo;
bufferInfo.buffer = (useRef ? auxBuffer->getBuffer() : buffer.getBuffer());
bufferInfo.offset = 0u;
bufferInfo.range = VK_WHOLE_SIZE;
const VkWriteDescriptorSet descriptorWrite =
{
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
DE_NULL,
*extraResourcesSet,
0u, // dstBinding
0u, // dstArrayElement
1u,
descType,
(const VkDescriptorImageInfo*)DE_NULL,
&bufferInfo,
(const VkBufferView*)DE_NULL
};
vkd.updateDescriptorSets(device, 1u, &descriptorWrite, 0u, DE_NULL);
// Storage for output varying data.
std::vector<deUint32> outputs (NUM_ELEMENTS);
std::vector<void*> outputPtr (NUM_ELEMENTS);
for (size_t i = 0; i < NUM_ELEMENTS; i++)
{
outputs[i] = 0xcdcdcdcd;
outputPtr[i] = &outputs[i];
}
const int numWorkGroups = ((m_shaderType.getMemoryType() == AtomicMemoryType::SHARED) ? 1 : static_cast<int>(NUM_ELEMENTS));
UniquePtr<ShaderExecutor> executor (createExecutor(m_context, m_shaderType.getType(), m_shaderSpec, *extraResourcesLayout));
executor->execute(numWorkGroups, DE_NULL, &outputPtr[0], *extraResourcesSet);
buffer.invalidate();
tcu::ResultCollector resultCollector(log);
// Check the results of the atomic operation
testBuffer->checkResults(resultCollector);
return tcu::TestStatus(resultCollector.getResult(), resultCollector.getMessage());
}
class AtomicOperationCase : public TestCase
{
public:
AtomicOperationCase (tcu::TestContext& testCtx,
const char* name,
const char* description,
AtomicShaderType type,
DataType dataType,
AtomicOperation atomicOp);
virtual ~AtomicOperationCase (void);
virtual TestInstance* createInstance (Context& ctx) const;
virtual void checkSupport (Context& ctx) const;
virtual void initPrograms (vk::SourceCollections& programCollection) const
{
generateSources(m_shaderType.getType(), m_shaderSpec, programCollection);
}
private:
void createShaderSpec();
ShaderSpec m_shaderSpec;
const AtomicShaderType m_shaderType;
const DataType m_dataType;
const AtomicOperation m_atomicOp;
};
AtomicOperationCase::AtomicOperationCase (tcu::TestContext& testCtx,
const char* name,
const char* description,
AtomicShaderType shaderType,
DataType dataType,
AtomicOperation atomicOp)
: TestCase (testCtx, name, description)
, m_shaderType (shaderType)
, m_dataType (dataType)
, m_atomicOp (atomicOp)
{
createShaderSpec();
init();
}
AtomicOperationCase::~AtomicOperationCase (void)
{
}
TestInstance* AtomicOperationCase::createInstance (Context& ctx) const
{
return new AtomicOperationCaseInstance(ctx, m_shaderSpec, m_shaderType, m_dataType, m_atomicOp);
}
void AtomicOperationCase::checkSupport (Context& ctx) const
{
if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
{
ctx.requireDeviceFunctionality("VK_KHR_shader_atomic_int64");
const auto atomicInt64Features = ctx.getShaderAtomicInt64Features();
const bool isSharedMemory = (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED);
if (!isSharedMemory && atomicInt64Features.shaderBufferInt64Atomics == VK_FALSE)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for buffers");
}
if (isSharedMemory && atomicInt64Features.shaderSharedInt64Atomics == VK_FALSE)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicInt64: 64-bit integer atomic operations not supported for shared memory");
}
}
if (m_dataType == DATA_TYPE_FLOAT16)
{
ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
if (m_atomicOp == ATOMIC_OP_ADD)
{
if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
{
if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicAdd)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared add atomic operation not supported");
}
}
else
{
if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicAdd)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer add atomic operation not supported");
}
}
}
if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
{
if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
{
if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16AtomicMinMax)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared min/max atomic operation not supported");
}
}
else
{
if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16AtomicMinMax)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer min/max atomic operation not supported");
}
}
}
if (m_atomicOp == ATOMIC_OP_EXCHANGE)
{
if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
{
if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat16Atomics)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point shared atomic operations not supported");
}
}
else
{
if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat16Atomics)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat16: 16-bit floating point buffer atomic operations not supported");
}
}
}
}
if (m_dataType == DATA_TYPE_FLOAT32)
{
ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
if (m_atomicOp == ATOMIC_OP_ADD)
{
if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
{
if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32AtomicAdd)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared add atomic operation not supported");
}
}
else
{
if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32AtomicAdd)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer add atomic operation not supported");
}
}
}
if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
{
ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
{
if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat32AtomicMinMax)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared min/max atomic operation not supported");
}
}
else
{
if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat32AtomicMinMax)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer min/max atomic operation not supported");
}
}
}
if (m_atomicOp == ATOMIC_OP_EXCHANGE)
{
if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
{
if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
}
}
else
{
if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
}
}
}
}
if (m_dataType == DATA_TYPE_FLOAT64)
{
ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float");
if (m_atomicOp == ATOMIC_OP_ADD)
{
if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
{
if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64AtomicAdd)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared add atomic operation not supported");
}
}
else
{
if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64AtomicAdd)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer add atomic operation not supported");
}
}
}
if (m_atomicOp == ATOMIC_OP_MIN || m_atomicOp == ATOMIC_OP_MAX)
{
ctx.requireDeviceFunctionality("VK_EXT_shader_atomic_float2");
if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
{
if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderSharedFloat64AtomicMinMax)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared min/max atomic operation not supported");
}
}
else
{
if (!ctx.getShaderAtomicFloat2FeaturesEXT().shaderBufferFloat64AtomicMinMax)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer min/max atomic operation not supported");
}
}
}
if (m_atomicOp == ATOMIC_OP_EXCHANGE)
{
if (m_shaderType.getMemoryType() == AtomicMemoryType::SHARED)
{
if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
}
}
else
{
if (!ctx.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics)
{
TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
}
}
}
}
if (m_shaderType.getMemoryType() == AtomicMemoryType::REFERENCE)
{
ctx.requireDeviceFunctionality("VK_KHR_buffer_device_address");
}
// Check stores and atomic operation support.
switch (m_shaderType.getType())
{
case glu::SHADERTYPE_VERTEX:
case glu::SHADERTYPE_TESSELLATION_CONTROL:
case glu::SHADERTYPE_TESSELLATION_EVALUATION:
case glu::SHADERTYPE_GEOMETRY:
if (!ctx.getDeviceFeatures().vertexPipelineStoresAndAtomics)
TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in Vertex, Tessellation, and Geometry shader.");
break;
case glu::SHADERTYPE_FRAGMENT:
if (!ctx.getDeviceFeatures().fragmentStoresAndAtomics)
TCU_THROW(NotSupportedError, "Stores and atomic operations are not supported in fragment shader.");
break;
case glu::SHADERTYPE_COMPUTE:
break;
default:
DE_FATAL("Unsupported shader type");
}
checkSupportShader(ctx, m_shaderType.getType());
}
void AtomicOperationCase::createShaderSpec (void)
{
const AtomicMemoryType memoryType = m_shaderType.getMemoryType();
// Global declarations.
std::ostringstream shaderTemplateGlobalStream;
// Structure in use for atomic operations.
shaderTemplateGlobalStream
<< "${EXTENSIONS}\n"
<< "\n"
<< "struct AtomicStruct\n"
<< "{\n"
<< " ${DATATYPE} inoutValues[${N}/2];\n"
<< " ${DATATYPE} inputValues[${N}];\n"
<< " ${DATATYPE} compareValues[${N}];\n"
<< " ${DATATYPE} outputValues[${N}];\n"
<< " int invocationHitCount[${N}];\n"
<< " int index;\n"
<< "};\n"
<< "\n"
;
// The name dance and declarations below will make sure the structure that will be used with atomic operations can be accessed
// as "buf.data", which is the name used in the atomic operation statements.
//
// * When using a buffer directly, RESULT_BUFFER_NAME will be "buf" and the inner struct will be "data".
// * When using a workgroup-shared global variable, the "data" struct will be nested in an auxiliar "buf" struct.
// * When using buffer references, the uniform buffer reference will be called "buf" and its contents "data".
//
if (memoryType != AtomicMemoryType::REFERENCE)
{
shaderTemplateGlobalStream
<< "layout (set = ${SETIDX}, binding = 0) buffer AtomicBuffer {\n"
<< " AtomicStruct data;\n"
<< "} ${RESULT_BUFFER_NAME};\n"
<< "\n"
;
// When using global shared memory in the compute variant, invocations will use a shared global structure instead of a
// descriptor set as the sources and results of each tested operation.
if (memoryType == AtomicMemoryType::SHARED)
{
shaderTemplateGlobalStream
<< "shared struct { AtomicStruct data; } buf;\n"
<< "\n"
;
}
}
else
{
shaderTemplateGlobalStream
<< "layout (buffer_reference) buffer AtomicBuffer {\n"
<< " AtomicStruct data;\n"
<< "};\n"
<< "\n"
<< "layout (set = ${SETIDX}, binding = 0) uniform References {\n"
<< " AtomicBuffer buf;\n"
<< "};\n"
<< "\n"
;
}
const auto shaderTemplateGlobalString = shaderTemplateGlobalStream.str();
const tcu::StringTemplate shaderTemplateGlobal (shaderTemplateGlobalString);
// Shader body for the non-vertex case.
std::ostringstream nonVertexShaderTemplateStream;
if (memoryType == AtomicMemoryType::SHARED)
{
// Invocation zero will initialize the shared structure from the descriptor set.
nonVertexShaderTemplateStream
<< "if (gl_LocalInvocationIndex == 0u)\n"
<< "{\n"
<< " buf.data = ${RESULT_BUFFER_NAME}.data;\n"
<< "}\n"
<< "barrier();\n"
;
}
if (m_shaderType.getType() == glu::SHADERTYPE_FRAGMENT)
{
nonVertexShaderTemplateStream
<< "if (!gl_HelperInvocation) {\n"
<< " int idx = atomicAdd(buf.data.index, 1);\n"
<< " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
<< "}\n"
;
}
else
{
nonVertexShaderTemplateStream
<< "if (atomicAdd(buf.data.invocationHitCount[0], 1) < ${N})\n"
<< "{\n"
<< " int idx = atomicAdd(buf.data.index, 1);\n"
<< " buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
<< "}\n"
;
}
if (memoryType == AtomicMemoryType::SHARED)
{
// Invocation zero will copy results back to the descriptor set.
nonVertexShaderTemplateStream
<< "barrier();\n"
<< "if (gl_LocalInvocationIndex == 0u)\n"
<< "{\n"
<< " ${RESULT_BUFFER_NAME}.data = buf.data;\n"
<< "}\n"
;
}
const auto nonVertexShaderTemplateStreamStr = nonVertexShaderTemplateStream.str();
const tcu::StringTemplate nonVertexShaderTemplateSrc (nonVertexShaderTemplateStreamStr);
// Shader body for the vertex case.
const tcu::StringTemplate vertexShaderTemplateSrc(
"int idx = gl_VertexIndex;\n"
"if (atomicAdd(buf.data.invocationHitCount[idx], 1) == 0)\n"
"{\n"
" buf.data.outputValues[idx] = ${ATOMICOP}(buf.data.inoutValues[idx % (${N}/2)], ${COMPARE_ARG}buf.data.inputValues[idx]);\n"
"}\n");
// Extensions.
std::ostringstream extensions;
if ((m_dataType == DATA_TYPE_INT64) || (m_dataType == DATA_TYPE_UINT64))
{
extensions
<< "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
<< "#extension GL_EXT_shader_atomic_int64 : enable\n"
;
}
else if ((m_dataType == DATA_TYPE_FLOAT16) || (m_dataType == DATA_TYPE_FLOAT32) || (m_dataType == DATA_TYPE_FLOAT64))
{
extensions
<< "#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable\n"
<< "#extension GL_EXT_shader_atomic_float : enable\n"
<< "#extension GL_EXT_shader_atomic_float2 : enable\n"
<< "#extension GL_KHR_memory_scope_semantics : enable\n"
;
}
if (memoryType == AtomicMemoryType::REFERENCE)
{
extensions << "#extension GL_EXT_buffer_reference : require\n";
}
// Specializations.
std::map<std::string, std::string> specializations;
specializations["EXTENSIONS"] = extensions.str();
specializations["DATATYPE"] = dataType2Str(m_dataType);
specializations["ATOMICOP"] = atomicOp2Str(m_atomicOp);
specializations["SETIDX"] = de::toString((int)EXTRA_RESOURCES_DESCRIPTOR_SET_INDEX);
specializations["N"] = de::toString((int)NUM_ELEMENTS);
specializations["COMPARE_ARG"] = ((m_atomicOp == ATOMIC_OP_COMP_SWAP) ? "buf.data.compareValues[idx], " : "");
specializations["RESULT_BUFFER_NAME"] = ((memoryType == AtomicMemoryType::SHARED) ? "result" : "buf");
// Shader spec.
m_shaderSpec.outputs.push_back(Symbol("outData", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
m_shaderSpec.glslVersion = glu::GLSL_VERSION_450;
m_shaderSpec.globalDeclarations = shaderTemplateGlobal.specialize(specializations);
m_shaderSpec.source = ((m_shaderType.getType() == glu::SHADERTYPE_VERTEX)
? vertexShaderTemplateSrc.specialize(specializations)
: nonVertexShaderTemplateSrc.specialize(specializations));
if (memoryType == AtomicMemoryType::SHARED)
{
// When using global shared memory, use a single workgroup and an appropriate number of local invocations.
m_shaderSpec.localSizeX = static_cast<int>(NUM_ELEMENTS);
}
}
void addAtomicOperationTests (tcu::TestCaseGroup* atomicOperationTestsGroup)
{
tcu::TestContext& testCtx = atomicOperationTestsGroup->getTestContext();
static const struct
{
glu::ShaderType type;
const char* name;
} shaderTypes[] =
{
{ glu::SHADERTYPE_VERTEX, "vertex" },
{ glu::SHADERTYPE_FRAGMENT, "fragment" },
{ glu::SHADERTYPE_GEOMETRY, "geometry" },
{ glu::SHADERTYPE_TESSELLATION_CONTROL, "tess_ctrl" },
{ glu::SHADERTYPE_TESSELLATION_EVALUATION, "tess_eval" },
{ glu::SHADERTYPE_COMPUTE, "compute" },
};
static const struct
{
AtomicMemoryType type;
const char* suffix;
} kMemoryTypes[] =
{
{ AtomicMemoryType::BUFFER, "" },
{ AtomicMemoryType::SHARED, "_shared" },
{ AtomicMemoryType::REFERENCE, "_reference" },
};
static const struct
{
DataType dataType;
const char* name;
const char* description;
} dataSign[] =
{
{ DATA_TYPE_FLOAT16,"float16", "Tests using 16-bit float data" },
{ DATA_TYPE_INT32, "signed", "Tests using signed data (int)" },
{ DATA_TYPE_UINT32, "unsigned", "Tests using unsigned data (uint)" },
{ DATA_TYPE_FLOAT32,"float32", "Tests using 32-bit float data" },
{ DATA_TYPE_INT64, "signed64bit", "Tests using 64 bit signed data (int64)" },
{ DATA_TYPE_UINT64, "unsigned64bit", "Tests using 64 bit unsigned data (uint64)" },
{ DATA_TYPE_FLOAT64,"float64", "Tests using 64-bit float data)" }
};
static const struct
{
AtomicOperation value;
const char* name;
} atomicOp[] =
{
{ ATOMIC_OP_EXCHANGE, "exchange" },
{ ATOMIC_OP_COMP_SWAP, "comp_swap" },
{ ATOMIC_OP_ADD, "add" },
{ ATOMIC_OP_MIN, "min" },
{ ATOMIC_OP_MAX, "max" },
{ ATOMIC_OP_AND, "and" },
{ ATOMIC_OP_OR, "or" },
{ ATOMIC_OP_XOR, "xor" }
};
for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(atomicOp); opNdx++)
{
for (int signNdx = 0; signNdx < DE_LENGTH_OF_ARRAY(dataSign); signNdx++)
{
for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(shaderTypes); shaderTypeNdx++)
{
// Only ADD and EXCHANGE are supported on floating-point
if (dataSign[signNdx].dataType == DATA_TYPE_FLOAT16 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT32 || dataSign[signNdx].dataType == DATA_TYPE_FLOAT64)
{
if (atomicOp[opNdx].value != ATOMIC_OP_ADD &&
atomicOp[opNdx].value != ATOMIC_OP_MIN &&
atomicOp[opNdx].value != ATOMIC_OP_MAX &&
atomicOp[opNdx].value != ATOMIC_OP_EXCHANGE)
{
continue;
}
}
for (int memoryTypeNdx = 0; memoryTypeNdx < DE_LENGTH_OF_ARRAY(kMemoryTypes); ++memoryTypeNdx)
{
// Shared memory only available in compute shaders.
if (kMemoryTypes[memoryTypeNdx].type == AtomicMemoryType::SHARED && shaderTypes[shaderTypeNdx].type != glu::SHADERTYPE_COMPUTE)
continue;
const std::string description = std::string("Tests atomic operation ") + atomicOp2Str(atomicOp[opNdx].value) + std::string(".");
const std::string name = std::string(atomicOp[opNdx].name) + "_" + std::string(dataSign[signNdx].name) + "_" + std::string(shaderTypes[shaderTypeNdx].name) + kMemoryTypes[memoryTypeNdx].suffix;
atomicOperationTestsGroup->addChild(new AtomicOperationCase(testCtx, name.c_str(), description.c_str(), AtomicShaderType(shaderTypes[shaderTypeNdx].type, kMemoryTypes[memoryTypeNdx].type), dataSign[signNdx].dataType, atomicOp[opNdx].value));
}
}
}
}
}
} // anonymous
tcu::TestCaseGroup* createAtomicOperationTests (tcu::TestContext& testCtx)
{
return createTestGroup(testCtx, "atomic_operations", "Atomic Operation Tests", addAtomicOperationTests);
}
} // shaderexecutor
} // vkt