blob: cebb0d50ccc0bbe9bb40193a06b25dcaeaf5e00e [file] [log] [blame]
/*-------------------------------------------------------------------------
* Vulkan Conformance Tests
* ------------------------
*
* Copyright (c) 2017 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*//*!
* \file
* \brief SPIR-V Assembly Tests for the VK_KHR_16bit_storage
*//*--------------------------------------------------------------------*/
#include "vktSpvAsm16bitStorageTests.hpp"
#include "tcuFloat.hpp"
#include "tcuRGBA.hpp"
#include "tcuStringTemplate.hpp"
#include "tcuTestLog.hpp"
#include "tcuVectorUtil.hpp"
#include "vkDefs.hpp"
#include "vkDeviceUtil.hpp"
#include "vkMemUtil.hpp"
#include "vkPlatform.hpp"
#include "vkPrograms.hpp"
#include "vkQueryUtil.hpp"
#include "vkRef.hpp"
#include "vkRefUtil.hpp"
#include "vkStrUtil.hpp"
#include "vkTypeUtil.hpp"
#include "deRandom.hpp"
#include "deStringUtil.hpp"
#include "deUniquePtr.hpp"
#include "deMath.h"
#include "vktSpvAsmComputeShaderCase.hpp"
#include "vktSpvAsmComputeShaderTestUtil.hpp"
#include "vktSpvAsmGraphicsShaderTestUtil.hpp"
#include "vktSpvAsmUtils.hpp"
#include "vktTestCaseUtil.hpp"
#include "vktTestGroupUtil.hpp"
#include <limits>
#include <map>
#include <string>
#include <sstream>
#include <utility>
namespace vkt
{
namespace SpirVAssembly
{
using namespace vk;
using std::map;
using std::string;
using std::vector;
using tcu::Float16;
using tcu::IVec3;
using tcu::IVec4;
using tcu::RGBA;
using tcu::TestLog;
using tcu::TestStatus;
using tcu::Vec4;
using de::UniquePtr;
using tcu::StringTemplate;
using tcu::Vec4;
namespace
{
enum ShaderTemplate
{
SHADERTEMPLATE_TYPES = 0,
SHADERTEMPLATE_STRIDE32BIT_STD140,
SHADERTEMPLATE_STRIDE32BIT_STD430,
SHADERTEMPLATE_STRIDE16BIT_STD140,
SHADERTEMPLATE_STRIDE16BIT_STD430,
SHADERTEMPLATE_STRIDEMIX_STD140,
SHADERTEMPLATE_STRIDEMIX_STD430
};
bool compare16Bit (float original, deUint16 returned, RoundingModeFlags flags, tcu::TestLog& log)
{
return compare16BitFloat (original, returned, flags, log);
}
bool compare16Bit (deUint16 original, float returned, RoundingModeFlags flags, tcu::TestLog& log)
{
DE_UNREF(flags);
return compare16BitFloat (original, returned, log);
}
bool compare16Bit (deInt16 original, deInt16 returned, RoundingModeFlags flags, tcu::TestLog& log)
{
DE_UNREF(flags);
DE_UNREF(log);
return (returned == original);
}
struct StructTestData
{
const int structArraySize; //Size of Struct Array
const int nestedArraySize; //Max size of any nested arrays
};
struct Capability
{
const char* name;
const char* cap;
const char* decor;
vk::VkDescriptorType dtype;
};
static const Capability CAPABILITIES[] =
{
{"uniform_buffer_block", "StorageUniformBufferBlock16", "BufferBlock", VK_DESCRIPTOR_TYPE_STORAGE_BUFFER},
{"uniform", "StorageUniform16", "Block", VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER},
};
static const StructTestData structData = {7, 11};
enum TestDefDataType
{
DATATYPE_FLOAT,
DATATYPE_VEC2,
DATATYPE_INT,
DATATYPE_UINT,
DATATYPE_IVEC2,
DATATYPE_UVEC2
};
struct TestDefinition
{
InstanceContext instanceContext;
TestDefDataType dataType;
};
VulkanFeatures get16BitStorageFeatures (const char* cap)
{
VulkanFeatures features;
if (string(cap) == "uniform_buffer_block")
features.ext16BitStorage.storageBuffer16BitAccess = true;
else if (string(cap) == "uniform")
features.ext16BitStorage.uniformAndStorageBuffer16BitAccess = true;
else
DE_ASSERT(false && "not supported");
return features;
}
int getStructSize(const ShaderTemplate shaderTemplate)
{
switch (shaderTemplate)
{
case SHADERTEMPLATE_STRIDE16BIT_STD140:
return 600 * structData.structArraySize; //size of struct in f16 with offsets
case SHADERTEMPLATE_STRIDE16BIT_STD430:
return 184 * structData.structArraySize; //size of struct in f16 with offsets
case SHADERTEMPLATE_STRIDE32BIT_STD140:
return 304 * structData.structArraySize; //size of struct in f32 with offsets
case SHADERTEMPLATE_STRIDE32BIT_STD430:
return 184 * structData.structArraySize; //size of struct in f32 with offset
case SHADERTEMPLATE_STRIDEMIX_STD140:
return 4480 * structData.structArraySize / 2; //size of struct in 16b with offset
case SHADERTEMPLATE_STRIDEMIX_STD430:
return 1216 * structData.structArraySize / 2; //size of struct in 16b with offset
default:
DE_ASSERT(0);
}
return 0;
}
// Batch function to check arrays of 16-bit floats.
//
// For comparing 16-bit floats, we need to consider both RTZ and RTE. So we can only recalculate
// the expected values here instead of get the expected values directly from the test case.
// Thus we need original floats here but not expected outputs.
template<RoundingModeFlags RoundingMode>
bool graphicsCheck16BitFloats (const std::vector<Resource>& originalFloats,
const vector<AllocationSp>& outputAllocs,
const std::vector<Resource>& expectedOutputs,
tcu::TestLog& log)
{
if (outputAllocs.size() != originalFloats.size())
return false;
for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
{
vector<deUint8> originalBytes;
originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
const deUint16* returned = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
const float* original = reinterpret_cast<const float*>(&originalBytes.front());
const deUint32 count = static_cast<deUint32>(expectedOutputs[outputNdx].getByteSize() / sizeof(deUint16));
const deUint32 inputStride = static_cast<deUint32>(originalBytes.size() / sizeof(float)) / count;
for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
if (!compare16BitFloat(original[numNdx * inputStride], returned[numNdx], RoundingMode, log))
return false;
}
return true;
}
template<RoundingModeFlags RoundingMode>
bool graphicsCheck16BitFloats64 (const std::vector<Resource>& originalFloats,
const vector<AllocationSp>& outputAllocs,
const std::vector<Resource>& /* expectedOutputs */,
tcu::TestLog& log)
{
if (outputAllocs.size() != originalFloats.size())
return false;
for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
{
vector<deUint8> originalBytes;
originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
const deUint16* returned = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
const double* original = reinterpret_cast<const double*>(&originalBytes.front());
const deUint32 count = static_cast<deUint32>(originalBytes.size() / sizeof(double));
for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
if (!compare16BitFloat64(original[numNdx], returned[numNdx], RoundingMode, log))
return false;
}
return true;
}
bool computeCheckBuffersFloats (const std::vector<Resource>& originalFloats,
const vector<AllocationSp>& outputAllocs,
const std::vector<Resource>& /*expectedOutputs*/,
tcu::TestLog& /*log*/)
{
std::vector<deUint8> result;
originalFloats.front().getBuffer()->getPackedBytes(result);
const deUint16 * results = reinterpret_cast<const deUint16 *>(&result[0]);
const deUint16 * expected = reinterpret_cast<const deUint16 *>(outputAllocs.front()->getHostPtr());
for (size_t i = 0; i < result.size() / sizeof (deUint16); ++i)
{
if (results[i] == expected[i])
continue;
if (Float16(results[i]).isNaN() && Float16(expected[i]).isNaN())
continue;
return false;
}
return true;
}
template<RoundingModeFlags RoundingMode>
bool computeCheck16BitFloats (const std::vector<Resource>& originalFloats,
const vector<AllocationSp>& outputAllocs,
const std::vector<Resource>& expectedOutputs,
tcu::TestLog& log)
{
if (outputAllocs.size() != originalFloats.size())
return false;
for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
{
vector<deUint8> originalBytes;
originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
const deUint16* returned = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
const float* original = reinterpret_cast<const float*>(&originalBytes.front());
const deUint32 count = static_cast<deUint32>(expectedOutputs[outputNdx].getByteSize() / sizeof(deUint16));
const deUint32 inputStride = static_cast<deUint32>(originalBytes.size() / sizeof(float)) / count;
for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
if (!compare16BitFloat(original[numNdx * inputStride], returned[numNdx], RoundingMode, log))
return false;
}
return true;
}
template<RoundingModeFlags RoundingMode>
bool computeCheck16BitFloats64 (const std::vector<Resource>& originalFloats,
const vector<AllocationSp>& outputAllocs,
const std::vector<Resource>& /* expectedOutputs */,
tcu::TestLog& log)
{
if (outputAllocs.size() != originalFloats.size())
return false;
for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
{
vector<deUint8> originalBytes;
originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
const deUint16* returned = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
const double* original = reinterpret_cast<const double*>(&originalBytes.front());
const deUint32 count = static_cast<deUint32>(originalBytes.size() / sizeof(double));
for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
if (!compare16BitFloat64(original[numNdx], returned[numNdx], RoundingMode, log))
return false;
}
return true;
}
// Batch function to check arrays of 64-bit floats.
//
// For comparing 64-bit floats, we just need the expected value precomputed in the test case.
// So we need expected outputs here but not original floats.
bool check64BitFloats (const std::vector<Resource>& /* originalFloats */,
const std::vector<AllocationSp>& outputAllocs,
const std::vector<Resource>& expectedOutputs,
tcu::TestLog& log)
{
if (outputAllocs.size() != expectedOutputs.size())
return false;
for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
{
vector<deUint8> expectedBytes;
expectedOutputs[outputNdx].getBuffer()->getPackedBytes(expectedBytes);
const double* returnedAsDouble = static_cast<const double*>(outputAllocs[outputNdx]->getHostPtr());
const double* expectedAsDouble = reinterpret_cast<const double*>(&expectedBytes.front());
const deUint32 count = static_cast<deUint32>(expectedBytes.size() / sizeof(double));
for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
if (!compare64BitFloat(expectedAsDouble[numNdx], returnedAsDouble[numNdx], log))
return false;
}
return true;
}
// Batch function to check arrays of 32-bit floats.
//
// For comparing 32-bit floats, we just need the expected value precomputed in the test case.
// So we need expected outputs here but not original floats.
bool check32BitFloats (const std::vector<Resource>& /* originalFloats */,
const std::vector<AllocationSp>& outputAllocs,
const std::vector<Resource>& expectedOutputs,
tcu::TestLog& log)
{
if (outputAllocs.size() != expectedOutputs.size())
return false;
for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
{
vector<deUint8> expectedBytes;
expectedOutputs[outputNdx].getBuffer()->getPackedBytes(expectedBytes);
const float* returnedAsFloat = static_cast<const float*>(outputAllocs[outputNdx]->getHostPtr());
const float* expectedAsFloat = reinterpret_cast<const float*>(&expectedBytes.front());
const deUint32 count = static_cast<deUint32>(expectedBytes.size() / sizeof(float));
for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
if (!compare32BitFloat(expectedAsFloat[numNdx], returnedAsFloat[numNdx], log))
return false;
}
return true;
}
void addInfo(vector<bool>& info, int& ndx, const int count, bool isData)
{
for (int index = 0; index < count; ++index)
info[ndx++] = isData;
}
vector<deFloat16> data16bitStd140 (de::Random& rnd)
{
return getFloat16s(rnd, getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD140));
}
vector<bool> info16bitStd140 (void)
{
int ndx = 0u;
vector<bool> infoData (getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD140));
for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
{
infoData[ndx++] = true; //f16
infoData[ndx++] = false; //offset
infoData[ndx++] = true; //v2f16
infoData[ndx++] = true; //v2f16
addInfo(infoData, ndx, 3, true); //v3f16
infoData[ndx++] = false; //offset
addInfo(infoData, ndx, 4, true); //v4f16
addInfo(infoData, ndx, 4, false); //offset
//f16[3];
for (int i = 0; i < 3; ++i)
{
infoData[ndx++] = true; //f16[0];
addInfo(infoData, ndx, 7, false); //offset
}
//struct {f16, v2f16[3]} [11]
for (int i = 0; i < structData.nestedArraySize; ++i)
{
//struct.f16
infoData[ndx++] = true; //f16
addInfo(infoData, ndx, 7, false); //offset
//struct.f16.v2f16[3]
for (int j = 0; j < 3; ++j)
{
infoData[ndx++] = true; //v2f16
infoData[ndx++] = true; //v2f16
addInfo(infoData, ndx, 6, false); //offset
}
}
//vec2[11];
for (int i = 0; i < structData.nestedArraySize; ++i)
{
infoData[ndx++] = true; //v2f16
infoData[ndx++] = true; //v2f16
addInfo(infoData, ndx, 6, false); //offset
}
//f16
infoData[ndx++] = true; //f16
addInfo(infoData, ndx, 7, false); //offset
//vec3[11]
for (int i = 0; i < structData.nestedArraySize; ++i)
{
addInfo(infoData, ndx, 3, true); //vec3
addInfo(infoData, ndx, 5, false); //offset
}
//vec4[3]
for (int i = 0; i < 3; ++i)
{
addInfo(infoData, ndx, 4, true); //vec4
addInfo(infoData, ndx, 4, false); //offset
}
}
//Please check the data and offset
DE_ASSERT(ndx == static_cast<int>(infoData.size()));
return infoData;
}
vector<deFloat16> data16bitStd430 (de::Random& rnd)
{
return getFloat16s(rnd, getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD430));
}
vector<bool> info16bitStd430 (void)
{
int ndx = 0u;
vector<bool> infoData (getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD430));
for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
{
infoData[ndx++] = true; //f16
infoData[ndx++] = false; //offset
infoData[ndx++] = true; //v2f16
infoData[ndx++] = true; //v2f16
addInfo(infoData, ndx, 3, true); //v3f16
infoData[ndx++] = false; //offset
addInfo(infoData, ndx, 4, true); //v4f16
//f16[3];
for (int i = 0; i < 3; ++i)
{
infoData[ndx++] = true; //f16;
}
addInfo(infoData, ndx, 1, false); //offset
//struct {f16, v2f16[3]} [11]
for (int i = 0; i < structData.nestedArraySize; ++i)
{
//struct.f16
infoData[ndx++] = true; //f16
infoData[ndx++] = false; //offset
//struct.f16.v2f16[3]
for (int j = 0; j < 3; ++j)
{
infoData[ndx++] = true; //v2f16
infoData[ndx++] = true; //v2f16
}
}
//vec2[11];
for (int i = 0; i < structData.nestedArraySize; ++i)
{
infoData[ndx++] = true; //v2f16
infoData[ndx++] = true; //v2f16
}
//f16
infoData[ndx++] = true; //f16
infoData[ndx++] = false; //offset
//vec3[11]
for (int i = 0; i < structData.nestedArraySize; ++i)
{
addInfo(infoData, ndx, 3, true); //vec3
infoData[ndx++] = false; //offset
}
//vec4[3]
for (int i = 0; i < 3; ++i)
{
addInfo(infoData, ndx, 4, true); //vec4
}
}
//Please check the data and offset
DE_ASSERT(ndx == static_cast<int>(infoData.size()));
return infoData;
}
vector<float> data32bitStd140 (de::Random& rnd)
{
return getFloat32s(rnd, getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD140));
}
vector<bool> info32bitStd140 (void)
{
int ndx = 0u;
vector<bool> infoData (getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD140));
for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
{
infoData[ndx++] = true; //f32
infoData[ndx++] = false; //offset
infoData[ndx++] = true; //v2f32
infoData[ndx++] = true; //v2f32
addInfo(infoData, ndx, 3, true); //v3f32
infoData[ndx++] = false; //offset
addInfo(infoData, ndx, 4, true); //v4f16
//f32[3];
for (int i = 0; i < 3; ++i)
{
infoData[ndx++] = true; //f32;
addInfo(infoData, ndx, 3, false); //offset
}
//struct {f32, v2f32[3]} [11]
for (int i = 0; i < structData.nestedArraySize; ++i)
{
//struct.f32
infoData[ndx++] = true; //f32
addInfo(infoData, ndx, 3, false); //offset
//struct.f32.v2f16[3]
for (int j = 0; j < 3; ++j)
{
infoData[ndx++] = true; //v2f32
infoData[ndx++] = true; //v2f32
infoData[ndx++] = false; //offset
infoData[ndx++] = false; //offset
}
}
//v2f32[11];
for (int i = 0; i < structData.nestedArraySize; ++i)
{
infoData[ndx++] = true; //v2f32
infoData[ndx++] = true; //v2f32
infoData[ndx++] = false; //offset
infoData[ndx++] = false; //offset
}
//f16
infoData[ndx++] = true; //f16
addInfo(infoData, ndx, 3, false); //offset
//vec3[11]
for (int i = 0; i < structData.nestedArraySize; ++i)
{
addInfo(infoData, ndx, 3, true); //v3f32
infoData[ndx++] = false; //offset
}
//vec4[3]
for (int i = 0; i < 3; ++i)
{
addInfo(infoData, ndx, 4, true); //vec4
}
}
//Please check the data and offset
DE_ASSERT(ndx == static_cast<int>(infoData.size()));
return infoData;
}
vector<float> data32bitStd430 (de::Random& rnd)
{
return getFloat32s(rnd, getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430));
}
vector<bool> info32bitStd430 (void)
{
int ndx = 0u;
vector<bool> infoData (getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430));
for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
{
infoData[ndx++] = true; //f32
infoData[ndx++] = false; //offset
infoData[ndx++] = true; //v2f32
infoData[ndx++] = true; //v2f32
addInfo(infoData, ndx, 3, true); //v3f32
infoData[ndx++] = false; //offset
addInfo(infoData, ndx, 4, true); //v4f16
//f32[3];
for (int i = 0; i < 3; ++i)
{
infoData[ndx++] = true; //f32;
}
infoData[ndx++] = false; //offset
//struct {f32, v2f32[3]} [11]
for (int i = 0; i < structData.nestedArraySize; ++i)
{
//struct.f32
infoData[ndx++] = true; //f32
infoData[ndx++] = false; //offset
//struct.f32.v2f16[3]
for (int j = 0; j < 3; ++j)
{
infoData[ndx++] = true; //v2f32
infoData[ndx++] = true; //v2f32
}
}
//v2f32[11];
for (int i = 0; i < structData.nestedArraySize; ++i)
{
infoData[ndx++] = true; //v2f32
infoData[ndx++] = true; //v2f32
}
//f32
infoData[ndx++] = true; //f32
infoData[ndx++] = false; //offset
//vec3[11]
for (int i = 0; i < structData.nestedArraySize; ++i)
{
addInfo(infoData, ndx, 3, true); //v3f32
infoData[ndx++] = false; //offset
}
//vec4[3]
for (int i = 0; i < 3; ++i)
{
addInfo(infoData, ndx, 4, true); //vec4
}
}
//Please check the data and offset
DE_ASSERT(ndx == static_cast<int>(infoData.size()));
return infoData;
}
vector<deInt16> dataMixStd140 (de::Random& rnd)
{
return getInt16s(rnd, getStructSize(SHADERTEMPLATE_STRIDEMIX_STD140));
}
vector<bool> infoMixStd140 (void)
{
int ndx = 0u;
vector<bool> infoData (getStructSize(SHADERTEMPLATE_STRIDEMIX_STD140));
for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
{
infoData[ndx++] = true; //16b
addInfo(infoData, ndx, 1, false); //offset
addInfo(infoData, ndx, 2, true); //32b
addInfo(infoData, ndx, 2, true); //v2b16
addInfo(infoData, ndx, 2, false); //offset
addInfo(infoData, ndx, 4, true); //v2b32
addInfo(infoData, ndx, 3, true); //v3b16
addInfo(infoData, ndx, 1, false); //offset
addInfo(infoData, ndx, 6, true); //v3b32
addInfo(infoData, ndx, 2, false); //offset
addInfo(infoData, ndx, 4, true); //v4b16
addInfo(infoData, ndx, 4, false); //offset
addInfo(infoData, ndx, 8, true); //v4b32
//strut {b16, b32, v2b16[11], b32[11]}
for (int i = 0; i < structData.nestedArraySize; ++i)
{
infoData[ndx++] = true; //16b
addInfo(infoData, ndx, 1, false); //offset
addInfo(infoData, ndx, 2, true); //32b
addInfo(infoData, ndx, 4, false); //offset
for (int j = 0; j < structData.nestedArraySize; ++j)
{
addInfo(infoData, ndx, 2, true); //v2b16[11]
addInfo(infoData, ndx, 6, false); //offset
}
for (int j = 0; j < structData.nestedArraySize; ++j)
{
addInfo(infoData, ndx, 2, true); //b32[11]
addInfo(infoData, ndx, 6, false); //offset
}
}
for (int i = 0; i < structData.nestedArraySize; ++i)
{
infoData[ndx++] = true; //16b[11]
addInfo(infoData, ndx, 7, false); //offset
}
for (int i = 0; i < structData.nestedArraySize; ++i)
{
addInfo(infoData, ndx, 2, true); //b32bIn[11]
addInfo(infoData, ndx, 6, false); //offset
}
}
//Please check the data and offset
DE_ASSERT(ndx == static_cast<int>(infoData.size()));
return infoData;
}
vector<deInt16> dataMixStd430 (de::Random& rnd)
{
return getInt16s(rnd, getStructSize(SHADERTEMPLATE_STRIDEMIX_STD430));
}
vector<bool> infoMixStd430 (void)
{
int ndx = 0u;
vector<bool> infoData (getStructSize(SHADERTEMPLATE_STRIDEMIX_STD430));
for(int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
{
infoData[ndx++] = true; //16b
addInfo(infoData, ndx, 1, false); //offset
addInfo(infoData, ndx, 2, true); //32b
addInfo(infoData, ndx, 2, true); //v2b16
addInfo(infoData, ndx, 2, false); //offset
addInfo(infoData, ndx, 4, true); //v2b32
addInfo(infoData, ndx, 3, true); //v3b16
addInfo(infoData, ndx, 1, false); //offset
addInfo(infoData, ndx, 6, true); //v3b32
addInfo(infoData, ndx, 2, false); //offset
addInfo(infoData, ndx, 4, true); //v4b16
addInfo(infoData, ndx, 4, false); //offset
addInfo(infoData, ndx, 8, true); //v4b32
//strut {b16, b32, v2b16[11], b32[11]}
for (int i = 0; i < structData.nestedArraySize; ++i)
{
infoData[ndx++] = true; //16b
addInfo(infoData, ndx, 1, false); //offset
addInfo(infoData, ndx, 2, true); //32b
addInfo(infoData, ndx, 22, true); //v2b16[11]
addInfo(infoData, ndx, 22, true); //b32[11]
}
addInfo(infoData, ndx, 11, true); //16b[11]
infoData[ndx++] = false; //offset
addInfo(infoData, ndx, 22, true); //32b[11]
addInfo(infoData, ndx, 6, false); //offset
}
//Please check the data and offset
DE_ASSERT(ndx == static_cast<int>(infoData.size()));
return infoData;
}
template<typename originType, typename resultType, ShaderTemplate funcOrigin, ShaderTemplate funcResult>
bool compareStruct(const resultType* returned, const originType* original, tcu::TestLog& log)
{
vector<bool> resultInfo;
vector<bool> originInfo;
vector<resultType> resultToCompare;
vector<originType> originToCompare;
switch(funcOrigin)
{
case SHADERTEMPLATE_STRIDE16BIT_STD140:
originInfo = info16bitStd140();
break;
case SHADERTEMPLATE_STRIDE16BIT_STD430:
originInfo = info16bitStd430();
break;
case SHADERTEMPLATE_STRIDE32BIT_STD140:
originInfo = info32bitStd140();
break;
case SHADERTEMPLATE_STRIDE32BIT_STD430:
originInfo = info32bitStd430();
break;
case SHADERTEMPLATE_STRIDEMIX_STD140:
originInfo = infoMixStd140();
break;
case SHADERTEMPLATE_STRIDEMIX_STD430:
originInfo = infoMixStd430();
break;
default:
DE_ASSERT(0);
}
switch(funcResult)
{
case SHADERTEMPLATE_STRIDE16BIT_STD140:
resultInfo = info16bitStd140();
break;
case SHADERTEMPLATE_STRIDE16BIT_STD430:
resultInfo = info16bitStd430();
break;
case SHADERTEMPLATE_STRIDE32BIT_STD140:
resultInfo = info32bitStd140();
break;
case SHADERTEMPLATE_STRIDE32BIT_STD430:
resultInfo = info32bitStd430();
break;
case SHADERTEMPLATE_STRIDEMIX_STD140:
resultInfo = infoMixStd140();
break;
case SHADERTEMPLATE_STRIDEMIX_STD430:
resultInfo = infoMixStd430();
break;
default:
DE_ASSERT(0);
}
for (unsigned int ndx = 0; ndx < static_cast<unsigned int>(resultInfo.size()); ++ndx)
{
if (resultInfo[ndx])
resultToCompare.push_back(returned[ndx]);
}
for (unsigned int ndx = 0; ndx < static_cast<unsigned int>(originInfo.size()); ++ndx)
{
if (originInfo[ndx])
originToCompare.push_back(original[ndx]);
}
//Different offset but that same amount of data
DE_ASSERT(originToCompare.size() == resultToCompare.size());
for (unsigned int ndx = 0; ndx < static_cast<unsigned int>(originToCompare.size()); ++ndx)
{
if (!compare16Bit(originToCompare[ndx], resultToCompare[ndx], RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ), log))
return false;
}
return true;
}
template<typename originType, typename resultType, ShaderTemplate funcOrigin, ShaderTemplate funcResult>
bool computeCheckStruct (const std::vector<Resource>& originalFloats,
const vector<AllocationSp>& outputAllocs,
const std::vector<Resource>& /* expectedOutputs */,
tcu::TestLog& log)
{
for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
{
vector<deUint8> originalBytes;
originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
const resultType* returned = static_cast<const resultType*>(outputAllocs[outputNdx]->getHostPtr());
const originType* original = reinterpret_cast<const originType*>(&originalBytes.front());
if(!compareStruct<originType, resultType, funcOrigin, funcResult>(returned, original, log))
return false;
}
return true;
}
template<typename originType, typename resultType, ShaderTemplate funcOrigin, ShaderTemplate funcResult>
bool graphicsCheckStruct (const std::vector<Resource>& originalFloats,
const vector<AllocationSp>& outputAllocs,
const std::vector<Resource>& /* expectedOutputs */,
tcu::TestLog& log)
{
for (deUint32 outputNdx = 0; outputNdx < static_cast<deUint32>(outputAllocs.size()); ++outputNdx)
{
vector<deUint8> originalBytes;
originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
const resultType* returned = static_cast<const resultType*>(outputAllocs[outputNdx]->getHostPtr());
const originType* original = reinterpret_cast<const originType*>(&originalBytes.front());
if(!compareStruct<originType, resultType, funcOrigin, funcResult>(returned, original, log))
return false;
}
return true;
}
string getStructShaderComponet (const ShaderTemplate component)
{
switch(component)
{
case SHADERTEMPLATE_TYPES:
return string(
"%f16 = OpTypeFloat 16\n"
"%v2f16 = OpTypeVector %f16 2\n"
"%v3f16 = OpTypeVector %f16 3\n"
"%v4f16 = OpTypeVector %f16 4\n"
"%f16ptr = OpTypePointer Uniform %f16\n"
"%v2f16ptr = OpTypePointer Uniform %v2f16\n"
"%v3f16ptr = OpTypePointer Uniform %v3f16\n"
"%v4f16ptr = OpTypePointer Uniform %v4f16\n"
"\n"
"%f32ptr = OpTypePointer Uniform %f32\n"
"%v2f32ptr = OpTypePointer Uniform %v2f32\n"
"%v3f32ptr = OpTypePointer Uniform %v3f32\n"
"%v4f32ptr = OpTypePointer Uniform %v4f32\n");
case SHADERTEMPLATE_STRIDE16BIT_STD140:
return string(
//struct {f16, v2f16[3]} [11]
"OpDecorate %v2f16arr3 ArrayStride 16\n"
"OpMemberDecorate %struct16 0 Offset 0\n"
"OpMemberDecorate %struct16 1 Offset 16\n"
"OpDecorate %struct16arr11 ArrayStride 64\n"
"OpDecorate %f16arr3 ArrayStride 16\n"
"OpDecorate %v2f16arr11 ArrayStride 16\n"
"OpDecorate %v3f16arr11 ArrayStride 16\n"
"OpDecorate %v4f16arr3 ArrayStride 16\n"
"OpDecorate %f16StructArr7 ArrayStride 1200\n"
"\n"
"OpMemberDecorate %f16Struct 0 Offset 0\n" //f16
"OpMemberDecorate %f16Struct 1 Offset 4\n" //v2f16
"OpMemberDecorate %f16Struct 2 Offset 8\n" //v3f16
"OpMemberDecorate %f16Struct 3 Offset 16\n" //v4f16
"OpMemberDecorate %f16Struct 4 Offset 32\n" //f16[3]
"OpMemberDecorate %f16Struct 5 Offset 80\n" //struct {f16, v2f16[3]} [11]
"OpMemberDecorate %f16Struct 6 Offset 784\n" //v2f16[11]
"OpMemberDecorate %f16Struct 7 Offset 960\n" //f16
"OpMemberDecorate %f16Struct 8 Offset 976\n" //v3f16[11]
"OpMemberDecorate %f16Struct 9 Offset 1152\n"); //v4f16[3]
case SHADERTEMPLATE_STRIDE16BIT_STD430:
return string(
//struct {f16, v2f16[3]} [11]
"OpDecorate %v2f16arr3 ArrayStride 4\n"
"OpMemberDecorate %struct16 0 Offset 0\n"
"OpMemberDecorate %struct16 1 Offset 4\n"
"OpDecorate %struct16arr11 ArrayStride 16\n"
"OpDecorate %f16arr3 ArrayStride 2\n"
"OpDecorate %v2f16arr11 ArrayStride 4\n"
"OpDecorate %v3f16arr11 ArrayStride 8\n"
"OpDecorate %v4f16arr3 ArrayStride 8\n"
"OpDecorate %f16StructArr7 ArrayStride 368\n"
"\n"
"OpMemberDecorate %f16Struct 0 Offset 0\n" //f16
"OpMemberDecorate %f16Struct 1 Offset 4\n" //v2f16
"OpMemberDecorate %f16Struct 2 Offset 8\n" //v3f16
"OpMemberDecorate %f16Struct 3 Offset 16\n" //v4f16
"OpMemberDecorate %f16Struct 4 Offset 24\n" //f16[3]
"OpMemberDecorate %f16Struct 5 Offset 32\n" //struct {f16, v2f16[3]} [11]
"OpMemberDecorate %f16Struct 6 Offset 208\n" //v2f16[11]
"OpMemberDecorate %f16Struct 7 Offset 252\n" //f16
"OpMemberDecorate %f16Struct 8 Offset 256\n" //v3f16[11]
"OpMemberDecorate %f16Struct 9 Offset 344\n"); //v4f16[3]
case SHADERTEMPLATE_STRIDE32BIT_STD140:
return string (
//struct {f32, v2f32[3]} [11]
"OpDecorate %v2f32arr3 ArrayStride 16\n"
"OpMemberDecorate %struct32 0 Offset 0\n"
"OpMemberDecorate %struct32 1 Offset 16\n"
"OpDecorate %struct32arr11 ArrayStride 64\n"
"OpDecorate %f32arr3 ArrayStride 16\n"
"OpDecorate %v2f32arr11 ArrayStride 16\n"
"OpDecorate %v3f32arr11 ArrayStride 16\n"
"OpDecorate %v4f32arr3 ArrayStride 16\n"
"OpDecorate %f32StructArr7 ArrayStride 1216\n"
"\n"
"OpMemberDecorate %f32Struct 0 Offset 0\n" //f32
"OpMemberDecorate %f32Struct 1 Offset 8\n" //v2f32
"OpMemberDecorate %f32Struct 2 Offset 16\n" //v3f32
"OpMemberDecorate %f32Struct 3 Offset 32\n" //v4f32
"OpMemberDecorate %f32Struct 4 Offset 48\n" //f32[3]
"OpMemberDecorate %f32Struct 5 Offset 96\n" //struct {f32, v2f32[3]} [11]
"OpMemberDecorate %f32Struct 6 Offset 800\n" //v2f32[11]
"OpMemberDecorate %f32Struct 7 Offset 976\n" //f32
"OpMemberDecorate %f32Struct 8 Offset 992\n" //v3f32[11]
"OpMemberDecorate %f32Struct 9 Offset 1168\n"); //v4f32[3]
case SHADERTEMPLATE_STRIDE32BIT_STD430:
return string(
//struct {f32, v2f32[3]} [11]
"OpDecorate %v2f32arr3 ArrayStride 8\n"
"OpMemberDecorate %struct32 0 Offset 0\n"
"OpMemberDecorate %struct32 1 Offset 8\n"
"OpDecorate %struct32arr11 ArrayStride 32\n"
"OpDecorate %f32arr3 ArrayStride 4\n"
"OpDecorate %v2f32arr11 ArrayStride 8\n"
"OpDecorate %v3f32arr11 ArrayStride 16\n"
"OpDecorate %v4f32arr3 ArrayStride 16\n"
"OpDecorate %f32StructArr7 ArrayStride 736\n"
"\n"
"OpMemberDecorate %f32Struct 0 Offset 0\n" //f32
"OpMemberDecorate %f32Struct 1 Offset 8\n" //v2f32
"OpMemberDecorate %f32Struct 2 Offset 16\n" //v3f32
"OpMemberDecorate %f32Struct 3 Offset 32\n" //v4f32
"OpMemberDecorate %f32Struct 4 Offset 48\n" //f32[3]
"OpMemberDecorate %f32Struct 5 Offset 64\n" //struct {f32, v2f32[3]}[11]
"OpMemberDecorate %f32Struct 6 Offset 416\n" //v2f32[11]
"OpMemberDecorate %f32Struct 7 Offset 504\n" //f32
"OpMemberDecorate %f32Struct 8 Offset 512\n" //v3f32[11]
"OpMemberDecorate %f32Struct 9 Offset 688\n"); //v4f32[3]
case SHADERTEMPLATE_STRIDEMIX_STD140:
return string(
"\n"//strutNestedIn {b16, b32, v2b16[11], b32[11]}
"OpDecorate %v2b16NestedArr11${InOut} ArrayStride 16\n" //v2b16[11]
"OpDecorate %b32NestedArr11${InOut} ArrayStride 16\n" //b32[11]
"OpMemberDecorate %sNested${InOut} 0 Offset 0\n" //b16
"OpMemberDecorate %sNested${InOut} 1 Offset 4\n" //b32
"OpMemberDecorate %sNested${InOut} 2 Offset 16\n" //v2b16[11]
"OpMemberDecorate %sNested${InOut} 3 Offset 192\n" //b32[11]
"OpDecorate %sNestedArr11${InOut} ArrayStride 368\n" //strutNestedIn[11]
"\n"//strutIn {b16, b32, v2b16, v2b32, v3b16, v3b32, v4b16, v4b32, strutNestedIn[11], b16In[11], b32bIn[11]}
"OpDecorate %sb16Arr11${InOut} ArrayStride 16\n" //b16In[11]
"OpDecorate %sb32Arr11${InOut} ArrayStride 16\n" //b32bIn[11]
"OpMemberDecorate %struct${InOut} 0 Offset 0\n" //b16
"OpMemberDecorate %struct${InOut} 1 Offset 4\n" //b32
"OpMemberDecorate %struct${InOut} 2 Offset 8\n" //v2b16
"OpMemberDecorate %struct${InOut} 3 Offset 16\n" //v2b32
"OpMemberDecorate %struct${InOut} 4 Offset 24\n" //v3b16
"OpMemberDecorate %struct${InOut} 5 Offset 32\n" //v3b32
"OpMemberDecorate %struct${InOut} 6 Offset 48\n" //v4b16
"OpMemberDecorate %struct${InOut} 7 Offset 64\n" //v4b32
"OpMemberDecorate %struct${InOut} 8 Offset 80\n" //strutNestedIn[11]
"OpMemberDecorate %struct${InOut} 9 Offset 4128\n" //b16In[11]
"OpMemberDecorate %struct${InOut} 10 Offset 4304\n" //b32bIn[11]
"OpDecorate %structArr7${InOut} ArrayStride 4480\n"); //strutIn[7]
case SHADERTEMPLATE_STRIDEMIX_STD430:
return string(
"\n"//strutNestedOut {b16, b32, v2b16[11], b32[11]}
"OpDecorate %v2b16NestedArr11${InOut} ArrayStride 4\n" //v2b16[11]
"OpDecorate %b32NestedArr11${InOut} ArrayStride 4\n" //b32[11]
"OpMemberDecorate %sNested${InOut} 0 Offset 0\n" //b16
"OpMemberDecorate %sNested${InOut} 1 Offset 4\n" //b32
"OpMemberDecorate %sNested${InOut} 2 Offset 8\n" //v2b16[11]
"OpMemberDecorate %sNested${InOut} 3 Offset 52\n" //b32[11]
"OpDecorate %sNestedArr11${InOut} ArrayStride 96\n" //strutNestedOut[11]
"\n"//strutOut {b16, b32, v2b16, v2b32, v3b16, v3b32, v4b16, v4b32, strutNestedOut[11], b16Out[11], b32bOut[11]}
"OpDecorate %sb16Arr11${InOut} ArrayStride 2\n" //b16Out[11]
"OpDecorate %sb32Arr11${InOut} ArrayStride 4\n" //b32bOut[11]
"OpMemberDecorate %struct${InOut} 0 Offset 0\n" //b16
"OpMemberDecorate %struct${InOut} 1 Offset 4\n" //b32
"OpMemberDecorate %struct${InOut} 2 Offset 8\n" //v2b16
"OpMemberDecorate %struct${InOut} 3 Offset 16\n" //v2b32
"OpMemberDecorate %struct${InOut} 4 Offset 24\n" //v3b16
"OpMemberDecorate %struct${InOut} 5 Offset 32\n" //v3b32
"OpMemberDecorate %struct${InOut} 6 Offset 48\n" //v4b16
"OpMemberDecorate %struct${InOut} 7 Offset 64\n" //v4b32
"OpMemberDecorate %struct${InOut} 8 Offset 80\n" //strutNestedOut[11]
"OpMemberDecorate %struct${InOut} 9 Offset 1136\n" //b16Out[11]
"OpMemberDecorate %struct${InOut} 10 Offset 1160\n" //b32bOut[11]
"OpDecorate %structArr7${InOut} ArrayStride 1216\n"); //strutOut[7]
default:
return string("");
}
}
/*Return string contains spirv loop begin.
the spec should contains "exeCount" - with name of const i32, it is number of executions
the spec should contains "loopName" - suffix for all local names
%Val${loopName} - index which can be used inside loop
"%ndxArr${loopName} = OpVariable %fp_i32 Function\n" - has to be defined outside
The function should be always use with endLoop function*/
std::string beginLoop(const std::map<std::string, std::string>& spec)
{
const tcu::StringTemplate loopBegin (
"OpStore %ndxArr${loopName} %zero\n"
"OpBranch %Loop${loopName}\n"
"%Loop${loopName} = OpLabel\n"
"OpLoopMerge %MergeLabel1${loopName} %MergeLabel2${loopName} None\n"
"OpBranch %Label1${loopName}\n"
"%Label1${loopName} = OpLabel\n"
"%Val${loopName} = OpLoad %i32 %ndxArr${loopName}\n"
"%LessThan${loopName} = OpSLessThan %bool %Val${loopName} %${exeCount}\n"
"OpBranchConditional %LessThan${loopName} %ifLabel${loopName} %MergeLabel1${loopName}\n"
"%ifLabel${loopName} = OpLabel\n");
return loopBegin.specialize(spec);
}
/*Return string contains spirv loop end.
the spec should contains "loopName" - suffix for all local names, suffix should be the same in beginLoop
The function should be always use with beginLoop function*/
std::string endLoop(const std::map<std::string, std::string>& spec)
{
const tcu::StringTemplate loopEnd (
"OpBranch %MergeLabel2${loopName}\n"
"%MergeLabel2${loopName} = OpLabel\n"
"%plusOne${loopName} = OpIAdd %i32 %Val${loopName} %c_i32_1\n"
"OpStore %ndxArr${loopName} %plusOne${loopName}\n"
"OpBranch %Loop${loopName}\n"
"%MergeLabel1${loopName} = OpLabel\n");
return loopEnd.specialize(spec);
}
void addCompute16bitStorageUniform16To32Group (tcu::TestCaseGroup* group)
{
tcu::TestContext& testCtx = group->getTestContext();
de::Random rnd (deStringHash(group->getName()));
const int numElements = 128;
const StringTemplate shaderTemplate (
"OpCapability Shader\n"
"OpCapability ${capability}\n"
"OpExtension \"SPV_KHR_16bit_storage\"\n"
"OpMemoryModel Logical GLSL450\n"
"OpEntryPoint GLCompute %main \"main\" %id\n"
"OpExecutionMode %main LocalSize 1 1 1\n"
"OpDecorate %id BuiltIn GlobalInvocationId\n"
"${stride}\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO16 0 Offset 0\n"
"OpDecorate %SSBO32 BufferBlock\n"
"OpDecorate %SSBO16 ${storage}\n"
"OpDecorate %ssbo32 DescriptorSet 0\n"
"OpDecorate %ssbo16 DescriptorSet 0\n"
"OpDecorate %ssbo32 Binding 1\n"
"OpDecorate %ssbo16 Binding 0\n"
"${matrix_decor:opt}\n"
"%bool = OpTypeBool\n"
"%void = OpTypeVoid\n"
"%voidf = OpTypeFunction %void\n"
"%u32 = OpTypeInt 32 0\n"
"%i32 = OpTypeInt 32 1\n"
"%f32 = OpTypeFloat 32\n"
"%v3u32 = OpTypeVector %u32 3\n"
"%uvec3ptr = OpTypePointer Input %v3u32\n"
"%i32ptr = OpTypePointer Uniform %i32\n"
"%f32ptr = OpTypePointer Uniform %f32\n"
"%zero = OpConstant %i32 0\n"
"%c_i32_1 = OpConstant %i32 1\n"
"%c_i32_2 = OpConstant %i32 2\n"
"%c_i32_3 = OpConstant %i32 3\n"
"%c_i32_16 = OpConstant %i32 16\n"
"%c_i32_32 = OpConstant %i32 32\n"
"%c_i32_64 = OpConstant %i32 64\n"
"%c_i32_128 = OpConstant %i32 128\n"
"%c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
"%i32arr = OpTypeArray %i32 %c_i32_128\n"
"%f32arr = OpTypeArray %f32 %c_i32_128\n"
"${types}\n"
"${matrix_types:opt}\n"
"%SSBO32 = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
"%SSBO16 = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
"%ssbo32 = OpVariable %up_SSBO32 Uniform\n"
"%ssbo16 = OpVariable %up_SSBO16 Uniform\n"
"%id = OpVariable %uvec3ptr Input\n"
"%main = OpFunction %void None %voidf\n"
"%label = OpLabel\n"
"%idval = OpLoad %v3u32 %id\n"
"%x = OpCompositeExtract %u32 %idval 0\n"
"%inloc = OpAccessChain %${base16}ptr %ssbo16 %zero %${arrayindex} ${index0:opt}\n"
"%val16 = OpLoad %${base16} %inloc\n"
"%val32 = ${convert} %${base32} %val16\n"
"%outloc = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
" OpStore %outloc %val32\n"
"${matrix_store:opt}\n"
" OpReturn\n"
" OpFunctionEnd\n");
{ // floats
const char floatTypes[] =
"%f16 = OpTypeFloat 16\n"
"%f16ptr = OpTypePointer Uniform %f16\n"
"%f16arr = OpTypeArray %f16 %c_i32_128\n"
"%v2f16 = OpTypeVector %f16 2\n"
"%v2f32 = OpTypeVector %f32 2\n"
"%v2f16ptr = OpTypePointer Uniform %v2f16\n"
"%v2f32ptr = OpTypePointer Uniform %v2f32\n"
"%v2f16arr = OpTypeArray %v2f16 %c_i32_64\n"
"%v2f32arr = OpTypeArray %v2f32 %c_i32_64\n";
struct CompositeType
{
const char* name;
const char* base32;
const char* base16;
const char* stride;
bool useConstantIndex;
unsigned constantIndex;
unsigned count;
unsigned inputStride;
};
const CompositeType cTypes[2][5] =
{
{
{"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", false, 0, numElements, 1},
{"scalar_const_idx_5", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 5, numElements, 1},
{"scalar_const_idx_8", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 8, numElements, 1},
{"vector", "v2f32", "v2f16", "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 4\n", false, 0, numElements / 2, 2},
{"matrix", "v2f32", "v2f16", "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n", false, 0, numElements / 8, 8}
},
{
{"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", false, 0, numElements, 8},
{"scalar_const_idx_5", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", true, 5, numElements, 8},
{"scalar_const_idx_8", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", true, 8, numElements, 8},
{"vector", "v2f32", "v2f16", "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 16\n", false, 0, numElements / 2, 8},
{"matrix", "v2f32", "v2f16", "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n", false, 0, numElements / 8, 8}
}
};
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
{
ComputeShaderSpec spec;
map<string, string> specs;
string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name + "_float";
specs["capability"] = CAPABILITIES[capIdx].cap;
specs["storage"] = CAPABILITIES[capIdx].decor;
specs["stride"] = cTypes[capIdx][tyIdx].stride;
specs["base32"] = cTypes[capIdx][tyIdx].base32;
specs["base16"] = cTypes[capIdx][tyIdx].base16;
specs["types"] = floatTypes;
specs["convert"] = "OpFConvert";
specs["constarrayidx"] = de::toString(cTypes[capIdx][tyIdx].constantIndex);
if (cTypes[capIdx][tyIdx].useConstantIndex)
specs["arrayindex"] = "c_i32_ci";
else
specs["arrayindex"] = "x";
const deUint32 inputStride = cTypes[capIdx][tyIdx].inputStride;
const deUint32 count = cTypes[capIdx][tyIdx].count;
const deUint32 scalarsPerItem = numElements / count;
vector<deFloat16> float16Data = getFloat16s(rnd, numElements * inputStride);
vector<float> float32Data;
float32Data.reserve(numElements);
for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; scalarIdx++)
float32Data.push_back(deFloat16To32(float16Data[numIdx * inputStride + scalarIdx]));
vector<float> float32DataConstIdx;
if (cTypes[capIdx][tyIdx].useConstantIndex)
{
const deUint32 numFloats = numElements / cTypes[capIdx][tyIdx].count;
for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
float32DataConstIdx.push_back(float32Data[cTypes[capIdx][tyIdx].constantIndex * numFloats + numIdx % numFloats]);
}
if (strcmp(cTypes[capIdx][tyIdx].name, "matrix") == 0)
{
specs["index0"] = "%zero";
specs["matrix_prefix"] = "m4";
specs["matrix_types"] =
"%m4v2f16 = OpTypeMatrix %v2f16 4\n"
"%m4v2f32 = OpTypeMatrix %v2f32 4\n"
"%m4v2f16arr = OpTypeArray %m4v2f16 %c_i32_16\n"
"%m4v2f32arr = OpTypeArray %m4v2f32 %c_i32_16\n";
specs["matrix_decor"] =
"OpMemberDecorate %SSBO32 0 ColMajor\n"
"OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
"OpMemberDecorate %SSBO16 0 ColMajor\n"
"OpMemberDecorate %SSBO16 0 MatrixStride 4\n";
specs["matrix_store"] =
"%inloc_1 = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_1\n"
"%val16_1 = OpLoad %v2f16 %inloc_1\n"
"%val32_1 = OpFConvert %v2f32 %val16_1\n"
"%outloc_1 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_1\n"
" OpStore %outloc_1 %val32_1\n"
"%inloc_2 = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_2\n"
"%val16_2 = OpLoad %v2f16 %inloc_2\n"
"%val32_2 = OpFConvert %v2f32 %val16_2\n"
"%outloc_2 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_2\n"
" OpStore %outloc_2 %val32_2\n"
"%inloc_3 = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_3\n"
"%val16_3 = OpLoad %v2f16 %inloc_3\n"
"%val32_3 = OpFConvert %v2f32 %val16_3\n"
"%outloc_3 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_3\n"
" OpStore %outloc_3 %val32_3\n";
}
spec.assembly = shaderTemplate.specialize(specs);
spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
spec.verifyIO = check32BitFloats;
spec.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data)), CAPABILITIES[capIdx].dtype));
spec.outputs.push_back(Resource(BufferSp(new Float32Buffer(cTypes[capIdx][tyIdx].useConstantIndex ? float32DataConstIdx : float32Data))));
spec.extensions.push_back("VK_KHR_16bit_storage");
spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
}
}
{ // Integers
const char sintTypes[] =
"%i16 = OpTypeInt 16 1\n"
"%i16ptr = OpTypePointer Uniform %i16\n"
"%i16arr = OpTypeArray %i16 %c_i32_128\n"
"%v4i16 = OpTypeVector %i16 4\n"
"%v4i32 = OpTypeVector %i32 4\n"
"%v4i16ptr = OpTypePointer Uniform %v4i16\n"
"%v4i32ptr = OpTypePointer Uniform %v4i32\n"
"%v4i16arr = OpTypeArray %v4i16 %c_i32_32\n"
"%v4i32arr = OpTypeArray %v4i32 %c_i32_32\n";
const char uintTypes[] =
"%u16 = OpTypeInt 16 0\n"
"%u16ptr = OpTypePointer Uniform %u16\n"
"%u32ptr = OpTypePointer Uniform %u32\n"
"%u16arr = OpTypeArray %u16 %c_i32_128\n"
"%u32arr = OpTypeArray %u32 %c_i32_128\n"
"%v4u16 = OpTypeVector %u16 4\n"
"%v4u32 = OpTypeVector %u32 4\n"
"%v4u16ptr = OpTypePointer Uniform %v4u16\n"
"%v4u32ptr = OpTypePointer Uniform %v4u32\n"
"%v4u16arr = OpTypeArray %v4u16 %c_i32_32\n"
"%v4u32arr = OpTypeArray %v4u32 %c_i32_32\n";
struct CompositeType
{
const char* name;
bool isSigned;
const char* types;
const char* base32;
const char* base16;
const char* opcode;
const char* stride;
bool useConstantIndex;
unsigned constantIndex;
unsigned count;
unsigned inputStride;
};
const CompositeType cTypes[2][8] =
{
{
{"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", false, 0, numElements, 1},
{"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 5, numElements, 1},
{"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 8, numElements, 1},
{"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", false, 0, numElements, 1},
{"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 5, numElements, 1},
{"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 8, numElements, 1},
{"vector_sint", true, sintTypes, "v4i32", "v4i16", "OpSConvert", "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 8\n", false, 0, numElements / 4, 4},
{"vector_uint", false, uintTypes, "v4u32", "v4u16", "OpUConvert", "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 8\n", false, 0, numElements / 4, 4}
},
{
{"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", false, 0, numElements, 8},
{"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", true, 5, numElements, 8},
{"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", true, 8, numElements, 8},
{"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", false, 0, numElements, 8},
{"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", true, 5, numElements, 8},
{"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", true, 8, numElements, 8},
{"vector_sint", true, sintTypes, "v4i32", "v4i16", "OpSConvert", "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 16\n", false, 0, numElements / 4, 8},
{"vector_uint", false, uintTypes, "v4u32", "v4u16", "OpUConvert", "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 16\n", false, 0, numElements / 4, 8}
}
};
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
{
ComputeShaderSpec spec;
map<string, string> specs;
string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name;
const deUint32 inputStride = cTypes[capIdx][tyIdx].inputStride;
vector<deInt16> inputs = getInt16s(rnd, numElements * inputStride);
vector<deInt32> sOutputs;
vector<deInt32> uOutputs;
const deUint16 signBitMask = 0x8000;
const deUint32 signExtendMask = 0xffff0000;
const deUint32 count = cTypes[capIdx][tyIdx].count;
const deUint32 scalarsPerItem = numElements / count;
sOutputs.reserve(numElements);
uOutputs.reserve(numElements);
for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; ++scalarIdx)
{
const deInt16 input = inputs[numNdx * inputStride + scalarIdx];
uOutputs.push_back(static_cast<deUint16>(input));
if (input & signBitMask)
sOutputs.push_back(static_cast<deInt32>(input | signExtendMask));
else
sOutputs.push_back(static_cast<deInt32>(input));
}
vector<deInt32> intDataConstIdx;
if (cTypes[capIdx][tyIdx].useConstantIndex)
{
for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
{
const deInt32 idx = cTypes[capIdx][tyIdx].constantIndex * scalarsPerItem + numIdx % scalarsPerItem;
if (cTypes[capIdx][tyIdx].isSigned)
intDataConstIdx.push_back(sOutputs[idx]);
else
intDataConstIdx.push_back(uOutputs[idx]);
}
}
specs["capability"] = CAPABILITIES[capIdx].cap;
specs["storage"] = CAPABILITIES[capIdx].decor;
specs["stride"] = cTypes[capIdx][tyIdx].stride;
specs["base32"] = cTypes[capIdx][tyIdx].base32;
specs["base16"] = cTypes[capIdx][tyIdx].base16;
specs["types"] = cTypes[capIdx][tyIdx].types;
specs["convert"] = cTypes[capIdx][tyIdx].opcode;
specs["constarrayidx"] = de::toString(cTypes[capIdx][tyIdx].constantIndex);
if (cTypes[capIdx][tyIdx].useConstantIndex)
specs["arrayindex"] = "c_i32_ci";
else
specs["arrayindex"] = "x";
spec.assembly = shaderTemplate.specialize(specs);
spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
spec.inputs.push_back(Resource(BufferSp(new Int16Buffer(inputs)), CAPABILITIES[capIdx].dtype));
if (cTypes[capIdx][tyIdx].useConstantIndex)
spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(intDataConstIdx))));
else if (cTypes[capIdx][tyIdx].isSigned)
spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(sOutputs))));
else
spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(uOutputs))));
spec.extensions.push_back("VK_KHR_16bit_storage");
spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
}
}
}
void addCompute16bitStorageUniform16To32ChainAccessGroup (tcu::TestCaseGroup* group)
{
tcu::TestContext& testCtx = group->getTestContext();
de::Random rnd (deStringHash(group->getName()));
const deUint32 structSize = 128; // In number of 16bit items. Includes padding.
vector<deFloat16> inputDataFloat = getFloat16s(rnd, structSize * 4);
vector<deInt16> inputDataInt = getInt16s(rnd, structSize * 4);
vector<float> outputDataFloat;
vector<deInt32> outputDataSInt;
vector<deInt32> outputDataUInt;
vector<tcu::UVec4> indices;
// Input is an array of a struct that varies on 16bit data type being tested:
//
// Float:
//
// float16 scalars[3]
// mat4x3 matrix
// vec3 vector
//
// Int:
//
// int16 scalars[3]
// int16 array2D[4][3]
// ivec3 vector
//
// UInt:
//
// uint16 scalars[3]
// uint16 array2D[4][3]
// uvec3 vector
const StringTemplate shaderTemplate (
" OpCapability Shader\n"
" OpCapability ${capability}\n"
" OpExtension \"SPV_KHR_16bit_storage\"\n"
" %1 = OpExtInstImport \"GLSL.std.450\"\n"
" OpMemoryModel Logical GLSL450\n"
" OpEntryPoint GLCompute %main \"main\"\n"
" OpExecutionMode %main LocalSize 1 1 1\n"
" OpSource GLSL 430\n"
" OpDecorate %Output BufferBlock\n"
" OpDecorate %dataOutput DescriptorSet 0\n"
" OpDecorate %dataOutput Binding 1\n"
" OpDecorate %scalarArray ArrayStride 16\n"
" OpDecorate %scalarArray2D ArrayStride 48\n"
" OpMemberDecorate %S 0 Offset 0\n"
" OpMemberDecorate %S 1 Offset 48\n"
" ${decoration:opt}\n"
" OpMemberDecorate %S 2 Offset 240\n"
" OpDecorate %_arr_S_uint_4 ArrayStride 256\n"
" OpMemberDecorate %Input 0 Offset 0\n"
" OpMemberDecorate %Output 0 Offset 0\n"
" OpDecorate %Input ${storage}\n"
" OpDecorate %dataInput DescriptorSet 0\n"
" OpDecorate %dataInput Binding 0\n"
" %f16 = OpTypeFloat 16\n"
" %f32 = OpTypeFloat 32\n"
" %i16 = OpTypeInt 16 1\n"
" %i32 = OpTypeInt 32 1\n"
" %u16 = OpTypeInt 16 0\n"
" %u32 = OpTypeInt 32 0\n"
" %void = OpTypeVoid\n"
" %voidFunc = OpTypeFunction %void\n"
" %_ptr_Function_uint = OpTypePointer Function %u32\n"
" %v3u32 = OpTypeVector %u32 3\n"
" %_ptr_Input_v3u32 = OpTypePointer Input %v3u32\n"
" %int_0 = OpConstant %i32 0\n"
" %uint_3 = OpConstant %u32 3\n"
" %uint_4 = OpConstant %u32 4\n"
" %s0 = OpConstant %u32 ${s0}\n"
" %s1 = OpConstant %u32 ${s1}\n"
" %s2 = OpConstant %u32 ${s2}\n"
" %s3 = OpConstant %u32 ${s3}\n"
" %Output = OpTypeStruct %${type}32\n"
" %_ptr_Uniform_Output = OpTypePointer Uniform %Output\n"
" %dataOutput = OpVariable %_ptr_Uniform_Output Uniform\n"
" %scalarArray = OpTypeArray %${type}16 %uint_3\n"
" %v3f16 = OpTypeVector %f16 3\n"
" %v3i16 = OpTypeVector %i16 3\n"
" %v3u16 = OpTypeVector %u16 3\n"
" %matrix = OpTypeMatrix %v3f16 4\n"
" %scalarArray2D = OpTypeArray %scalarArray %uint_4\n"
" %S = OpTypeStruct %scalarArray %${type2D} %v3${type}16\n"
" %_arr_S_uint_4 = OpTypeArray %S %uint_4\n"
" %Input = OpTypeStruct %_arr_S_uint_4\n"
" %_ptr_Uniform_Input = OpTypePointer Uniform %Input\n"
" %dataInput = OpVariable %_ptr_Uniform_Input Uniform\n"
" %_ptr_Uniform_16bit_data = OpTypePointer Uniform %${type}16\n"
" %_ptr_Uniform_32bit_data = OpTypePointer Uniform %${type}32\n"
" %main = OpFunction %void None %voidFunc\n"
" %entry = OpLabel\n"
" %dataPtr = ${accessChain}\n"
" %data = OpLoad %${type}16 %dataPtr\n"
" %converted = ${convert}\n"
" %outPtr = OpAccessChain %_ptr_Uniform_32bit_data %dataOutput %int_0\n"
" OpStore %outPtr %converted\n"
" OpReturn\n"
" OpFunctionEnd\n");
// Generate constant indices for OpChainAccess. We need to use constant values
// when indexing into structures. This loop generates all permutations.
for (deUint32 idx0 = 0; idx0 < 4; ++idx0)
for (deUint32 idx1 = 0; idx1 < 3; ++idx1)
for (deUint32 idx2 = 0; idx2 < (idx1 == 1u ? 4u : 3u); ++idx2)
for (deUint32 idx3 = 0; idx3 < (idx1 == 1u ? 3u : 1u); ++idx3)
indices.push_back(tcu::UVec4(idx0, idx1, idx2, idx3));
for (deUint32 numIdx = 0; numIdx < (deUint32)indices.size(); ++numIdx)
{
const deUint16 signBitMask = 0x8000;
const deUint32 signExtendMask = 0xffff0000;
// Determine the selected output float for the selected indices.
const tcu::UVec4 vec = indices[numIdx];
// Offsets are in multiples of 16bits. Floats are using matrix as the
// second field, which has different layout rules than 2D array.
// Therefore separate offset tables are needed.
const deUint32 fieldOffsetsFloat[3][3] =
{
{0u, 8u, 0u},
{24, 24u, 1u},
{120u, 1u, 0u}
};
const deUint32 fieldOffsetsInt[3][3] =
{
{0u, 8u, 0u},
{24, 24u, 8u},
{120u, 1u, 0u}
};
const deUint32 offsetFloat = vec.x() * structSize + fieldOffsetsFloat[vec.y()][0] + fieldOffsetsFloat[vec.y()][1] * vec.z() + fieldOffsetsFloat[vec.y()][2] * vec.w();
const deUint32 offsetInt = vec.x() * structSize + fieldOffsetsInt[vec.y()][0] + fieldOffsetsInt[vec.y()][1] * vec.z() + fieldOffsetsInt[vec.y()][2] * vec.w();
const bool hasSign = inputDataInt[offsetInt] & signBitMask;
outputDataFloat.push_back(deFloat16To32(inputDataFloat[offsetFloat]));
outputDataUInt.push_back((deUint16)inputDataInt[offsetInt]);
outputDataSInt.push_back((deInt32)(inputDataInt[offsetInt] | (hasSign ? signExtendMask : 0u)));
}
for (deUint32 indicesIdx = 0; indicesIdx < (deUint32)indices.size(); ++indicesIdx)
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
{
string indexString = de::toString(indices[indicesIdx].x()) + "_" + de::toString(indices[indicesIdx].y()) + "_" + de::toString(indices[indicesIdx].z());
if (indices[indicesIdx].y() == 1)
indexString += string("_") + de::toString(indices[indicesIdx].w());
const string testNameBase = string(CAPABILITIES[capIdx].name) + "_" + indexString + "_";
struct DataType
{
string name;
string type;
string convert;
string type2D; // Matrix when using floats. 2D array otherwise.
BufferSp inputs;
BufferSp outputs;
};
const DataType dataTypes[] =
{
{ "float", "f", "OpFConvert %f32 %data", "matrix", BufferSp(new Float16Buffer(inputDataFloat)), BufferSp(new Float32Buffer(vector<float>(1, outputDataFloat[indicesIdx]))) },
{ "int", "i", "OpSConvert %i32 %data", "scalarArray2D", BufferSp(new Int16Buffer(inputDataInt)), BufferSp(new Int32Buffer(vector<deInt32>(1, outputDataSInt[indicesIdx]))) },
{ "uint", "u", "OpUConvert %u32 %data", "scalarArray2D", BufferSp(new Int16Buffer(inputDataInt)), BufferSp(new Int32Buffer(vector<deInt32>(1, outputDataUInt[indicesIdx]))) }
};
for (deUint32 dataTypeIdx = 0; dataTypeIdx < DE_LENGTH_OF_ARRAY(dataTypes); ++dataTypeIdx)
{
const string testName = testNameBase + dataTypes[dataTypeIdx].name;
map<string, string> specs;
ComputeShaderSpec spec;
specs["capability"] = CAPABILITIES[capIdx].cap;
specs["storage"] = CAPABILITIES[capIdx].decor;
specs["s0"] = de::toString(indices[indicesIdx].x());
specs["s1"] = de::toString(indices[indicesIdx].y());
specs["s2"] = de::toString(indices[indicesIdx].z());
specs["s3"] = de::toString(indices[indicesIdx].w());
specs["type"] = dataTypes[dataTypeIdx].type;
specs["convert"] = dataTypes[dataTypeIdx].convert;
specs["type2D"] = dataTypes[dataTypeIdx].type2D;
if (indices[indicesIdx].y() == 1)
specs["accessChain"] = "OpAccessChain %_ptr_Uniform_16bit_data %dataInput %int_0 %s0 %s1 %s2 %s3";
else
specs["accessChain"] = "OpAccessChain %_ptr_Uniform_16bit_data %dataInput %int_0 %s0 %s1 %s2";
if (dataTypeIdx == 0)
{
spec.verifyIO = check32BitFloats;
specs["decoration"] = "OpMemberDecorate %S 1 ColMajor\nOpMemberDecorate %S 1 MatrixStride 48\n";
}
spec.assembly = shaderTemplate.specialize(specs);
spec.numWorkGroups = IVec3(1, 1, 1);
spec.extensions.push_back ("VK_KHR_16bit_storage");
spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
spec.inputs.push_back(Resource(dataTypes[dataTypeIdx].inputs, CAPABILITIES[capIdx].dtype));
spec.outputs.push_back(Resource(dataTypes[dataTypeIdx].outputs));
group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
}
}
}
void addCompute16bitStoragePushConstant16To32Group (tcu::TestCaseGroup* group)
{
tcu::TestContext& testCtx = group->getTestContext();
de::Random rnd (deStringHash(group->getName()));
const int numElements = 64;
const StringTemplate shaderTemplate (
"OpCapability Shader\n"
"OpCapability StoragePushConstant16\n"
"OpExtension \"SPV_KHR_16bit_storage\"\n"
"OpMemoryModel Logical GLSL450\n"
"OpEntryPoint GLCompute %main \"main\" %id\n"
"OpExecutionMode %main LocalSize 1 1 1\n"
"OpDecorate %id BuiltIn GlobalInvocationId\n"
"${stride}"
"OpDecorate %PC16 Block\n"
"OpMemberDecorate %PC16 0 Offset 0\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpDecorate %SSBO32 BufferBlock\n"
"OpDecorate %ssbo32 DescriptorSet 0\n"
"OpDecorate %ssbo32 Binding 0\n"
"${matrix_decor:opt}\n"
"%void = OpTypeVoid\n"
"%voidf = OpTypeFunction %void\n"
"%u32 = OpTypeInt 32 0\n"
"%i32 = OpTypeInt 32 1\n"
"%f32 = OpTypeFloat 32\n"
"%v3u32 = OpTypeVector %u32 3\n"
"%uvec3ptr = OpTypePointer Input %v3u32\n"
"%i32ptr = OpTypePointer Uniform %i32\n"
"%f32ptr = OpTypePointer Uniform %f32\n"
"%zero = OpConstant %i32 0\n"
"%c_i32_1 = OpConstant %i32 1\n"
"%c_i32_8 = OpConstant %i32 8\n"
"%c_i32_16 = OpConstant %i32 16\n"
"%c_i32_32 = OpConstant %i32 32\n"
"%c_i32_64 = OpConstant %i32 64\n"
"%c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
"%i32arr = OpTypeArray %i32 %c_i32_64\n"
"%f32arr = OpTypeArray %f32 %c_i32_64\n"
"${types}\n"
"${matrix_types:opt}\n"
"%PC16 = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
"%pp_PC16 = OpTypePointer PushConstant %PC16\n"
"%pc16 = OpVariable %pp_PC16 PushConstant\n"
"%SSBO32 = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
"%ssbo32 = OpVariable %up_SSBO32 Uniform\n"
"%id = OpVariable %uvec3ptr Input\n"
"%main = OpFunction %void None %voidf\n"
"%label = OpLabel\n"
"%idval = OpLoad %v3u32 %id\n"
"%x = OpCompositeExtract %u32 %idval 0\n"
"%inloc = OpAccessChain %${base16}ptr %pc16 %zero %${arrayindex} ${index0:opt}\n"
"%val16 = OpLoad %${base16} %inloc\n"
"%val32 = ${convert} %${base32} %val16\n"
"%outloc = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
" OpStore %outloc %val32\n"
"${matrix_store:opt}\n"
" OpReturn\n"
" OpFunctionEnd\n");
{ // floats
const char floatTypes[] =
"%f16 = OpTypeFloat 16\n"
"%f16ptr = OpTypePointer PushConstant %f16\n"
"%f16arr = OpTypeArray %f16 %c_i32_64\n"
"%v4f16 = OpTypeVector %f16 4\n"
"%v4f32 = OpTypeVector %f32 4\n"
"%v4f16ptr = OpTypePointer PushConstant %v4f16\n"
"%v4f32ptr = OpTypePointer Uniform %v4f32\n"
"%v4f16arr = OpTypeArray %v4f16 %c_i32_16\n"
"%v4f32arr = OpTypeArray %v4f32 %c_i32_16\n";
struct CompositeType
{
const char* name;
const char* base32;
const char* base16;
const char* stride;
bool useConstantIndex;
unsigned constantIndex;
unsigned count;
};
const CompositeType cTypes[] =
{
{"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", false, 0, numElements},
{"scalar_const_idx_5", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 5, numElements},
{"scalar_const_idx_8", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 8, numElements},
{"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n", false, 0, numElements / 4},
{"matrix", "v4f32", "v4f16", "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", false, 0, numElements / 8},
};
vector<deFloat16> float16Data = getFloat16s(rnd, numElements);
vector<float> float32Data;
float32Data.reserve(numElements);
for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
float32Data.push_back(deFloat16To32(float16Data[numIdx]));
for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
{
ComputeShaderSpec spec;
map<string, string> specs;
string testName = string(cTypes[tyIdx].name) + "_float";
vector<float> float32DataConstIdx;
if (cTypes[tyIdx].useConstantIndex)
{
const deUint32 numFloats = numElements / cTypes[tyIdx].count;
for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
float32DataConstIdx.push_back(float32Data[cTypes[tyIdx].constantIndex * numFloats + numIdx % numFloats]);
}
specs["stride"] = cTypes[tyIdx].stride;
specs["base32"] = cTypes[tyIdx].base32;
specs["base16"] = cTypes[tyIdx].base16;
specs["types"] = floatTypes;
specs["convert"] = "OpFConvert";
specs["constarrayidx"] = de::toString(cTypes[tyIdx].constantIndex);
if (cTypes[tyIdx].useConstantIndex)
specs["arrayindex"] = "c_i32_ci";
else
specs["arrayindex"] = "x";
if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
{
specs["index0"] = "%zero";
specs["matrix_prefix"] = "m2";
specs["matrix_types"] =
"%m2v4f16 = OpTypeMatrix %v4f16 2\n"
"%m2v4f32 = OpTypeMatrix %v4f32 2\n"
"%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_8\n"
"%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_8\n";
specs["matrix_decor"] =
"OpMemberDecorate %SSBO32 0 ColMajor\n"
"OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
"OpMemberDecorate %PC16 0 ColMajor\n"
"OpMemberDecorate %PC16 0 MatrixStride 8\n";
specs["matrix_store"] =
"%inloc_1 = OpAccessChain %v4f16ptr %pc16 %zero %x %c_i32_1\n"
"%val16_1 = OpLoad %v4f16 %inloc_1\n"
"%val32_1 = OpFConvert %v4f32 %val16_1\n"
"%outloc_1 = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
" OpStore %outloc_1 %val32_1\n";
}
spec.assembly = shaderTemplate.specialize(specs);
spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1);
spec.verifyIO = check32BitFloats;
spec.pushConstants = BufferSp(new Float16Buffer(float16Data));
spec.outputs.push_back(Resource(BufferSp(new Float32Buffer(cTypes[tyIdx].useConstantIndex ? float32DataConstIdx : float32Data))));
spec.extensions.push_back("VK_KHR_16bit_storage");
spec.requestedVulkanFeatures.ext16BitStorage.storagePushConstant16 = true;
group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
}
}
{// integers
const char sintTypes[] =
"%i16 = OpTypeInt 16 1\n"
"%i16ptr = OpTypePointer PushConstant %i16\n"
"%i16arr = OpTypeArray %i16 %c_i32_64\n"
"%v2i16 = OpTypeVector %i16 2\n"
"%v2i32 = OpTypeVector %i32 2\n"
"%v2i16ptr = OpTypePointer PushConstant %v2i16\n"
"%v2i32ptr = OpTypePointer Uniform %v2i32\n"
"%v2i16arr = OpTypeArray %v2i16 %c_i32_32\n"
"%v2i32arr = OpTypeArray %v2i32 %c_i32_32\n";
const char uintTypes[] =
"%u16 = OpTypeInt 16 0\n"
"%u16ptr = OpTypePointer PushConstant %u16\n"
"%u32ptr = OpTypePointer Uniform %u32\n"
"%u16arr = OpTypeArray %u16 %c_i32_64\n"
"%u32arr = OpTypeArray %u32 %c_i32_64\n"
"%v2u16 = OpTypeVector %u16 2\n"
"%v2u32 = OpTypeVector %u32 2\n"
"%v2u16ptr = OpTypePointer PushConstant %v2u16\n"
"%v2u32ptr = OpTypePointer Uniform %v2u32\n"
"%v2u16arr = OpTypeArray %v2u16 %c_i32_32\n"
"%v2u32arr = OpTypeArray %v2u32 %c_i32_32\n";
struct CompositeType
{
const char* name;
bool isSigned;
const char* types;
const char* base32;
const char* base16;
const char* opcode;
const char* stride;
bool useConstantIndex;
unsigned constantIndex;
unsigned count;
};
const CompositeType cTypes[] =
{
{"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", false, 0, numElements},
{"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 5, numElements},
{"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 8, numElements},
{"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", false, 0, numElements},
{"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 5, numElements},
{"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 8, numElements},
{"vector_sint", true, sintTypes, "v2i32", "v2i16", "OpSConvert", "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n", false, 0, numElements / 2},
{"vector_uint", false, uintTypes, "v2u32", "v2u16", "OpUConvert", "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n", false, 0, numElements / 2},
};
vector<deInt16> inputs = getInt16s(rnd, numElements);
vector<deInt32> sOutputs;
vector<deInt32> uOutputs;
const deUint16 signBitMask = 0x8000;
const deUint32 signExtendMask = 0xffff0000;
sOutputs.reserve(inputs.size());
uOutputs.reserve(inputs.size());
for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
{
uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
if (inputs[numNdx] & signBitMask)
sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
else
sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
}
for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
{
ComputeShaderSpec spec;
map<string, string> specs;
const char* testName = cTypes[tyIdx].name;
vector<deInt32> intDataConstIdx;
if (cTypes[tyIdx].useConstantIndex)
{
const deUint32 numInts = numElements / cTypes[tyIdx].count;
for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
{
const deInt32 idx = cTypes[tyIdx].constantIndex * numInts + numIdx % numInts;
if (cTypes[tyIdx].isSigned)
intDataConstIdx.push_back(sOutputs[idx]);
else
intDataConstIdx.push_back(uOutputs[idx]);
}
}
specs["stride"] = cTypes[tyIdx].stride;
specs["base32"] = cTypes[tyIdx].base32;
specs["base16"] = cTypes[tyIdx].base16;
specs["types"] = cTypes[tyIdx].types;
specs["convert"] = cTypes[tyIdx].opcode;
specs["constarrayidx"] = de::toString(cTypes[tyIdx].constantIndex);
if (cTypes[tyIdx].useConstantIndex)
specs["arrayindex"] = "c_i32_ci";
else
specs["arrayindex"] = "x";
spec.assembly = shaderTemplate.specialize(specs);
spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1);
spec.pushConstants = BufferSp(new Int16Buffer(inputs));
if (cTypes[tyIdx].useConstantIndex)
spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(intDataConstIdx))));
else if (cTypes[tyIdx].isSigned)
spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(sOutputs))));
else
spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(uOutputs))));
spec.extensions.push_back("VK_KHR_16bit_storage");
spec.requestedVulkanFeatures.ext16BitStorage.storagePushConstant16 = true;
group->addChild(new SpvAsmComputeShaderCase(testCtx, testName, testName, spec));
}
}
}
void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup)
{
de::Random rnd (deStringHash(testGroup->getName()));
map<string, string> fragments;
const deUint32 numDataPoints = 256;
RGBA defaultColors[4];
vector<string> extensions;
const StringTemplate capabilities ("OpCapability ${cap}\n");
// inputs and outputs are declared to be vectors of signed integers.
// However, depending on the test, they may be interpreted as unsiged
// integers. That won't be a problem as long as we passed the bits
// in faithfully to the pipeline.
vector<deInt32> inputs = getInt32s(rnd, numDataPoints);
vector<deInt16> outputs;
outputs.reserve(inputs.size());
for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
extensions.push_back("VK_KHR_16bit_storage");
fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
getDefaultColors(defaultColors);
struct IntegerFacts
{
const char* name;
const char* type32;
const char* type16;
const char* opcode;
const char* isSigned;
};
const IntegerFacts intFacts[] =
{
{"sint", "%i32", "%i16", "OpSConvert", "1"},
{"uint", "%u32", "%u16", "OpUConvert", "0"},
};
const StringTemplate scalarPreMain(
"${itype16} = OpTypeInt 16 ${signed}\n"
"%c_i32_256 = OpConstant %i32 256\n"
" %up_i32 = OpTypePointer Uniform ${itype32}\n"
" %up_i16 = OpTypePointer Uniform ${itype16}\n"
" %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
" %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
" %SSBO32 = OpTypeStruct %ra_i32\n"
" %SSBO16 = OpTypeStruct %ra_i16\n"
"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
" %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
" %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
const StringTemplate scalarDecoration(
"OpDecorate %ra_i32 ArrayStride ${arraystride}\n"
"OpDecorate %ra_i16 ArrayStride 2\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO16 0 Offset 0\n"
"OpDecorate %SSBO32 ${indecor}\n"
"OpDecorate %SSBO16 BufferBlock\n"
"OpDecorate %ssbo32 DescriptorSet 0\n"
"OpDecorate %ssbo16 DescriptorSet 0\n"
"OpDecorate %ssbo32 Binding 0\n"
"OpDecorate %ssbo16 Binding 1\n");
const StringTemplate scalarTestFunc(
"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
" %param = OpFunctionParameter %v4f32\n"
"%entry = OpLabel\n"
" %i = OpVariable %fp_i32 Function\n"
" OpStore %i %c_i32_0\n"
" OpBranch %loop\n"
" %loop = OpLabel\n"
" %15 = OpLoad %i32 %i\n"
" %lt = OpSLessThan %bool %15 %c_i32_256\n"
" OpLoopMerge %merge %inc None\n"
" OpBranchConditional %lt %write %merge\n"
"%write = OpLabel\n"
" %30 = OpLoad %i32 %i\n"
" %src = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
"%val32 = OpLoad ${itype32} %src\n"
"%val16 = ${convert} ${itype16} %val32\n"
" %dst = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %30\n"
" OpStore %dst %val16\n"
" OpBranch %inc\n"
" %inc = OpLabel\n"
" %37 = OpLoad %i32 %i\n"
" %39 = OpIAdd %i32 %37 %c_i32_1\n"
" OpStore %i %39\n"
" OpBranch %loop\n"
"%merge = OpLabel\n"
" OpReturnValue %param\n"
"OpFunctionEnd\n");
const StringTemplate vecPreMain(
"${itype16} = OpTypeInt 16 ${signed}\n"
" %c_i32_64 = OpConstant %i32 64\n"
"%v4itype16 = OpTypeVector ${itype16} 4\n"
" %up_v4i32 = OpTypePointer Uniform ${v4itype32}\n"
" %up_v4i16 = OpTypePointer Uniform %v4itype16\n"
" %ra_v4i32 = OpTypeArray ${v4itype32} %c_i32_64\n"
" %ra_v4i16 = OpTypeArray %v4itype16 %c_i32_64\n"
" %SSBO32 = OpTypeStruct %ra_v4i32\n"
" %SSBO16 = OpTypeStruct %ra_v4i16\n"
"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
" %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
" %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
const StringTemplate vecDecoration(
"OpDecorate %ra_v4i32 ArrayStride 16\n"
"OpDecorate %ra_v4i16 ArrayStride 8\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO16 0 Offset 0\n"
"OpDecorate %SSBO32 ${indecor}\n"
"OpDecorate %SSBO16 BufferBlock\n"
"OpDecorate %ssbo32 DescriptorSet 0\n"
"OpDecorate %ssbo16 DescriptorSet 0\n"
"OpDecorate %ssbo32 Binding 0\n"
"OpDecorate %ssbo16 Binding 1\n");
const StringTemplate vecTestFunc(
"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
" %param = OpFunctionParameter %v4f32\n"
"%entry = OpLabel\n"
" %i = OpVariable %fp_i32 Function\n"
" OpStore %i %c_i32_0\n"
" OpBranch %loop\n"
" %loop = OpLabel\n"
" %15 = OpLoad %i32 %i\n"
" %lt = OpSLessThan %bool %15 %c_i32_64\n"
" OpLoopMerge %merge %inc None\n"
" OpBranchConditional %lt %write %merge\n"
"%write = OpLabel\n"
" %30 = OpLoad %i32 %i\n"
" %src = OpAccessChain %up_v4i32 %ssbo32 %c_i32_0 %30\n"
"%val32 = OpLoad ${v4itype32} %src\n"
"%val16 = ${convert} %v4itype16 %val32\n"
" %dst = OpAccessChain %up_v4i16 %ssbo16 %c_i32_0 %30\n"
" OpStore %dst %val16\n"
" OpBranch %inc\n"
" %inc = OpLabel\n"
" %37 = OpLoad %i32 %i\n"
" %39 = OpIAdd %i32 %37 %c_i32_1\n"
" OpStore %i %39\n"
" OpBranch %loop\n"
"%merge = OpLabel\n"
" OpReturnValue %param\n"
"OpFunctionEnd\n");
// Scalar
{
const deUint32 arrayStrides[] = {4, 16};
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
{
map<string, string> specs;
string name = string(CAPABILITIES[capIdx].name) + "_scalar_" + intFacts[factIdx].name;
specs["cap"] = CAPABILITIES[capIdx].cap;
specs["indecor"] = CAPABILITIES[capIdx].decor;
specs["itype32"] = intFacts[factIdx].type32;
specs["v4itype32"] = "%v4" + string(intFacts[factIdx].type32).substr(1);
specs["itype16"] = intFacts[factIdx].type16;
specs["signed"] = intFacts[factIdx].isSigned;
specs["convert"] = intFacts[factIdx].opcode;
specs["arraystride"] = de::toString(arrayStrides[capIdx]);
fragments["pre_main"] = scalarPreMain.specialize(specs);
fragments["testfun"] = scalarTestFunc.specialize(specs);
fragments["capability"] = capabilities.specialize(specs);
fragments["decoration"] = scalarDecoration.specialize(specs);
vector<deInt32> inputsPadded;
for (size_t dataIdx = 0; dataIdx < inputs.size(); ++dataIdx)
{
inputsPadded.push_back(inputs[dataIdx]);
for (deUint32 padIdx = 0; padIdx < arrayStrides[capIdx] / 4 - 1; ++padIdx)
inputsPadded.push_back(0);
}
GraphicsResources resources;
VulkanFeatures features;
resources.inputs.push_back(Resource(BufferSp(new Int32Buffer(inputsPadded)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
resources.outputs.push_back(Resource(BufferSp(new Int16Buffer(outputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
features.coreFeatures.vertexPipelineStoresAndAtomics = true;
features.coreFeatures.fragmentStoresAndAtomics = true;
createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
}
}
// Vector
{
GraphicsResources resources;
resources.inputs.push_back(Resource(BufferSp(new Int32Buffer(inputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
resources.outputs.push_back(Resource(BufferSp(new Int16Buffer(outputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
{
map<string, string> specs;
string name = string(CAPABILITIES[capIdx].name) + "_vector_" + intFacts[factIdx].name;
VulkanFeatures features;
specs["cap"] = CAPABILITIES[capIdx].cap;
specs["indecor"] = CAPABILITIES[capIdx].decor;
specs["itype32"] = intFacts[factIdx].type32;
specs["v4itype32"] = "%v4" + string(intFacts[factIdx].type32).substr(1);
specs["itype16"] = intFacts[factIdx].type16;
specs["signed"] = intFacts[factIdx].isSigned;
specs["convert"] = intFacts[factIdx].opcode;
fragments["pre_main"] = vecPreMain.specialize(specs);
fragments["testfun"] = vecTestFunc.specialize(specs);
fragments["capability"] = capabilities.specialize(specs);
fragments["decoration"] = vecDecoration.specialize(specs);
resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
features.coreFeatures.vertexPipelineStoresAndAtomics = true;
features.coreFeatures.fragmentStoresAndAtomics = true;
createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, features);
}
}
}
void addCompute16bitStorageUniform16To16Group (tcu::TestCaseGroup* group)
{
tcu::TestContext& testCtx = group->getTestContext();
de::Random rnd (deStringHash(group->getName()));
const int numElements = 128;
const vector<deFloat16> float16Data = getFloat16s(rnd, numElements);
const vector<deFloat16> float16UnusedData (numElements, 0);
ComputeShaderSpec spec;
std::ostringstream shaderTemplate;
shaderTemplate<<"OpCapability Shader\n"
<< "OpCapability StorageUniformBufferBlock16\n"
<< "OpExtension \"SPV_KHR_16bit_storage\"\n"
<< "OpMemoryModel Logical GLSL450\n"
<< "OpEntryPoint GLCompute %main \"main\" %id\n"
<< "OpExecutionMode %main LocalSize 1 1 1\n"
<< "OpDecorate %id BuiltIn GlobalInvocationId\n"
<< "OpDecorate %f16arr ArrayStride 2\n"
<< "OpMemberDecorate %SSBO_IN 0 Coherent\n"
<< "OpMemberDecorate %SSBO_OUT 0 Coherent\n"
<< "OpMemberDecorate %SSBO_IN 0 Offset 0\n"
<< "OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
<< "OpDecorate %SSBO_IN BufferBlock\n"
<< "OpDecorate %SSBO_OUT BufferBlock\n"
<< "OpDecorate %ssboIN DescriptorSet 0\n"
<< "OpDecorate %ssboOUT DescriptorSet 0\n"
<< "OpDecorate %ssboIN Binding 0\n"
<< "OpDecorate %ssboOUT Binding 1\n"
<< "\n"
<< "%bool = OpTypeBool\n"
<< "%void = OpTypeVoid\n"
<< "%voidf = OpTypeFunction %void\n"
<< "%u32 = OpTypeInt 32 0\n"
<< "%i32 = OpTypeInt 32 1\n"
<< "%uvec3 = OpTypeVector %u32 3\n"
<< "%uvec3ptr = OpTypePointer Input %uvec3\n"
<< "%f16 = OpTypeFloat 16\n"
<< "%f16ptr = OpTypePointer Uniform %f16\n"
<< "\n"
<< "%zero = OpConstant %i32 0\n"
<< "%c_size = OpConstant %i32 " << numElements << "\n"
<< "\n"
<< "%f16arr = OpTypeArray %f16 %c_size\n"
<< "%SSBO_IN = OpTypeStruct %f16arr\n"
<< "%SSBO_OUT = OpTypeStruct %f16arr\n"
<< "%up_SSBOIN = OpTypePointer Uniform %SSBO_IN\n"
<< "%up_SSBOOUT = OpTypePointer Uniform %SSBO_OUT\n"
<< "%ssboIN = OpVariable %up_SSBOIN Uniform\n"
<< "%ssboOUT = OpVariable %up_SSBOOUT Uniform\n"
<< "\n"
<< "%id = OpVariable %uvec3ptr Input\n"
<< "%main = OpFunction %void None %voidf\n"
<< "%label = OpLabel\n"
<< "%idval = OpLoad %uvec3 %id\n"
<< "%x = OpCompositeExtract %u32 %idval 0\n"
<< "%y = OpCompositeExtract %u32 %idval 1\n"
<< "\n"
<< "%inlocx = OpAccessChain %f16ptr %ssboIN %zero %x \n"
<< "%valx = OpLoad %f16 %inlocx\n"
<< "%outlocx = OpAccessChain %f16ptr %ssboOUT %zero %x \n"
<< " OpStore %outlocx %valx\n"
<< "%inlocy = OpAccessChain %f16ptr %ssboIN %zero %y \n"
<< "%valy = OpLoad %f16 %inlocy\n"
<< "%outlocy = OpAccessChain %f16ptr %ssboOUT %zero %y \n"
<< " OpStore %outlocy %valy\n"
<< "\n"
<< " OpReturn\n"
<< " OpFunctionEnd\n";
spec.assembly = shaderTemplate.str();
spec.numWorkGroups = IVec3(numElements, numElements, 1);
spec.verifyIO = computeCheckBuffersFloats;
spec.coherentMemory = true;
spec.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data))));
spec.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16UnusedData))));
spec.extensions.push_back("VK_KHR_16bit_storage");
spec.requestedVulkanFeatures = get16BitStorageFeatures("uniform_buffer_block");
group->addChild(new SpvAsmComputeShaderCase(testCtx, "stress_test", "Granularity stress test", spec));
}
void addCompute16bitStorageUniform32To16Group (tcu::TestCaseGroup* group)
{
tcu::TestContext& testCtx = group->getTestContext();
de::Random rnd (deStringHash(group->getName()));
const int numElements = 128;
const StringTemplate shaderTemplate (
"OpCapability Shader\n"
"OpCapability ${capability}\n"
"OpExtension \"SPV_KHR_16bit_storage\"\n"
"OpMemoryModel Logical GLSL450\n"
"OpEntryPoint GLCompute %main \"main\" %id\n"
"OpExecutionMode %main LocalSize 1 1 1\n"
"OpDecorate %id BuiltIn GlobalInvocationId\n"
"${stride}"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO16 0 Offset 0\n"
"OpDecorate %SSBO32 ${storage}\n"
"OpDecorate %SSBO16 BufferBlock\n"
"OpDecorate %ssbo32 DescriptorSet 0\n"
"OpDecorate %ssbo16 DescriptorSet 0\n"
"OpDecorate %ssbo32 Binding 0\n"
"OpDecorate %ssbo16 Binding 1\n"
"${matrix_decor:opt}\n"
"${rounding:opt}\n"
"%bool = OpTypeBool\n"
"%void = OpTypeVoid\n"
"%voidf = OpTypeFunction %void\n"
"%u32 = OpTypeInt 32 0\n"
"%i32 = OpTypeInt 32 1\n"
"%f32 = OpTypeFloat 32\n"
"%uvec3 = OpTypeVector %u32 3\n"
"%uvec3ptr = OpTypePointer Input %uvec3\n"
"%i32ptr = OpTypePointer Uniform %i32\n"
"%f32ptr = OpTypePointer Uniform %f32\n"
"%zero = OpConstant %i32 0\n"
"%c_i32_1 = OpConstant %i32 1\n"
"%c_i32_16 = OpConstant %i32 16\n"
"%c_i32_32 = OpConstant %i32 32\n"
"%c_i32_64 = OpConstant %i32 64\n"
"%c_i32_128 = OpConstant %i32 128\n"
"%i32arr = OpTypeArray %i32 %c_i32_128\n"
"%f32arr = OpTypeArray %f32 %c_i32_128\n"
"${types}\n"
"${matrix_types:opt}\n"
"%SSBO32 = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
"%SSBO16 = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
"%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
"%ssbo32 = OpVariable %up_SSBO32 Uniform\n"
"%ssbo16 = OpVariable %up_SSBO16 Uniform\n"
"%id = OpVariable %uvec3ptr Input\n"
"%main = OpFunction %void None %voidf\n"
"%label = OpLabel\n"
"%idval = OpLoad %uvec3 %id\n"
"%x = OpCompositeExtract %u32 %idval 0\n"
"%inloc = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
"%val32 = OpLoad %${base32} %inloc\n"
"%val16 = ${convert} %${base16} %val32\n"
"%outloc = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
" OpStore %outloc %val16\n"
"${matrix_store:opt}\n"
" OpReturn\n"
" OpFunctionEnd\n");
{ // Floats
const char floatTypes[] =
"%f16 = OpTypeFloat 16\n"
"%f16ptr = OpTypePointer Uniform %f16\n"
"%f16arr = OpTypeArray %f16 %c_i32_128\n"
"%v4f16 = OpTypeVector %f16 4\n"
"%v4f32 = OpTypeVector %f32 4\n"
"%v4f16ptr = OpTypePointer Uniform %v4f16\n"
"%v4f32ptr = OpTypePointer Uniform %v4f32\n"
"%v4f16arr = OpTypeArray %v4f16 %c_i32_32\n"
"%v4f32arr = OpTypeArray %v4f32 %c_i32_32\n";
struct RndMode
{
const char* name;
const char* decor;
VerifyIOFunc func;
};
const RndMode rndModes[] =
{
{"rtz", "OpDecorate %val16 FPRoundingMode RTZ", computeCheck16BitFloats<ROUNDINGMODE_RTZ>},
{"rte", "OpDecorate %val16 FPRoundingMode RTE", computeCheck16BitFloats<ROUNDINGMODE_RTE>},
{"unspecified_rnd_mode", "", computeCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
};
struct CompositeType
{
const char* name;
const char* base32;
const char* base16;
const char* stride;
unsigned count;
unsigned inputStride;
};
const CompositeType cTypes[2][3] =
{
{ // BufferBlock
{"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", numElements, 1},
{"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n", numElements / 4, 1},
{"matrix", "v4f32", "v4f16", "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8, 1}
},
{ // Block
{"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 16\nOpDecorate %f16arr ArrayStride 2\n", numElements, 4},
{"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n", numElements / 4, 1},
{"matrix", "v4f32", "v4f16", "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8, 1}
}
};
vector<deFloat16> float16UnusedData (numElements, 0);
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
{
ComputeShaderSpec spec;
map<string, string> specs;
string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name + "_float_" + rndModes[rndModeIdx].name;
vector<float> float32Data = getFloat32s(rnd, numElements * cTypes[capIdx][tyIdx].inputStride);
specs["capability"] = CAPABILITIES[capIdx].cap;
specs["storage"] = CAPABILITIES[capIdx].decor;
specs["stride"] = cTypes[capIdx][tyIdx].stride;
specs["base32"] = cTypes[capIdx][tyIdx].base32;
specs["base16"] = cTypes[capIdx][tyIdx].base16;
specs["rounding"] = rndModes[rndModeIdx].decor;
specs["types"] = floatTypes;
specs["convert"] = "OpFConvert";
if (strcmp(cTypes[capIdx][tyIdx].name, "matrix") == 0)
{
if (strcmp(rndModes[rndModeIdx].name, "rtz") == 0)
specs["rounding"] += "\nOpDecorate %val16_1 FPRoundingMode RTZ\n";
else if (strcmp(rndModes[rndModeIdx].name, "rte") == 0)
specs["rounding"] += "\nOpDecorate %val16_1 FPRoundingMode RTE\n";
specs["index0"] = "%zero";
specs["matrix_prefix"] = "m2";
specs["matrix_types"] =
"%m2v4f16 = OpTypeMatrix %v4f16 2\n"
"%m2v4f32 = OpTypeMatrix %v4f32 2\n"
"%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_16\n"
"%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_16\n";
specs["matrix_decor"] =
"OpMemberDecorate %SSBO32 0 ColMajor\n"
"OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
"OpMemberDecorate %SSBO16 0 ColMajor\n"
"OpMemberDecorate %SSBO16 0 MatrixStride 8\n";
specs["matrix_store"] =
"%inloc_1 = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
"%val32_1 = OpLoad %v4f32 %inloc_1\n"
"%val16_1 = OpFConvert %v4f16 %val32_1\n"
"%outloc_1 = OpAccessChain %v4f16ptr %ssbo16 %zero %x %c_i32_1\n"
" OpStore %outloc_1 %val16_1\n";
}
spec.assembly = shaderTemplate.specialize(specs);
spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
spec.verifyIO = rndModes[rndModeIdx].func;
spec.inputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data)), CAPABILITIES[capIdx].dtype));
// We provided a custom verifyIO in the above in which inputs will be used for checking.
// So put unused data in the expected values.
spec.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16UnusedData))));
spec.extensions.push_back("VK_KHR_16bit_storage");
spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
}
}
{ // Integers
const char sintTypes[] =
"%i16 = OpTypeInt 16 1\n"
"%i16ptr = OpTypePointer Uniform %i16\n"
"%i16arr = OpTypeArray %i16 %c_i32_128\n"
"%v2i16 = OpTypeVector %i16 2\n"
"%v2i32 = OpTypeVector %i32 2\n"
"%v2i16ptr = OpTypePointer Uniform %v2i16\n"
"%v2i32ptr = OpTypePointer Uniform %v2i32\n"
"%v2i16arr = OpTypeArray %v2i16 %c_i32_64\n"
"%v2i32arr = OpTypeArray %v2i32 %c_i32_64\n";
const char uintTypes[] =
"%u16 = OpTypeInt 16 0\n"
"%u16ptr = OpTypePointer Uniform %u16\n"
"%u32ptr = OpTypePointer Uniform %u32\n"
"%u16arr = OpTypeArray %u16 %c_i32_128\n"
"%u32arr = OpTypeArray %u32 %c_i32_128\n"
"%v2u16 = OpTypeVector %u16 2\n"
"%v2u32 = OpTypeVector %u32 2\n"
"%v2u16ptr = OpTypePointer Uniform %v2u16\n"
"%v2u32ptr = OpTypePointer Uniform %v2u32\n"
"%v2u16arr = OpTypeArray %v2u16 %c_i32_64\n"
"%v2u32arr = OpTypeArray %v2u32 %c_i32_64\n";
struct CompositeType
{
const char* name;
const char* types;
const char* base32;
const char* base16;
const char* opcode;
const char* stride;
unsigned count;
unsigned inputStride;
};
const CompositeType cTypes[2][4] =
{
{
{"scalar_sint", sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", numElements, 1},
{"scalar_uint", uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", numElements, 1},
{"vector_sint", sintTypes, "v2i32", "v2i16", "OpSConvert", "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n", numElements / 2, 2},
{"vector_uint", uintTypes, "v2u32", "v2u16", "OpUConvert", "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n", numElements / 2, 2}
},
{
{"scalar_sint", sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 16\nOpDecorate %i16arr ArrayStride 2\n", numElements, 4},
{"scalar_uint", uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 16\nOpDecorate %u16arr ArrayStride 2\n", numElements, 4},
{"vector_sint", sintTypes, "v2i32", "v2i16", "OpSConvert", "OpDecorate %v2i32arr ArrayStride 16\nOpDecorate %v2i16arr ArrayStride 4\n", numElements / 2, 4},
{"vector_uint", uintTypes, "v2u32", "v2u16", "OpUConvert", "OpDecorate %v2u32arr ArrayStride 16\nOpDecorate %v2u16arr ArrayStride 4\n", numElements / 2, 4}
}
};
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
{
ComputeShaderSpec spec;
map<string, string> specs;
string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name;
const deUint32 inputStride = cTypes[capIdx][tyIdx].inputStride;
const deUint32 count = cTypes[capIdx][tyIdx].count;
const deUint32 scalarsPerItem = numElements / count;
vector<deInt32> inputs = getInt32s(rnd, numElements * inputStride);
vector<deInt16> outputs;
outputs.reserve(numElements);
for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; scalarIdx++)
outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx * inputStride + scalarIdx]));
specs["capability"] = CAPABILITIES[capIdx].cap;
specs["storage"] = CAPABILITIES[capIdx].decor;
specs["stride"] = cTypes[capIdx][tyIdx].stride;
specs["base32"] = cTypes[capIdx][tyIdx].base32;
specs["base16"] = cTypes[capIdx][tyIdx].base16;
specs["types"] = cTypes[capIdx][tyIdx].types;
specs["convert"] = cTypes[capIdx][tyIdx].opcode;
spec.assembly = shaderTemplate.specialize(specs);
spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
spec.inputs.push_back(Resource(BufferSp(new Int32Buffer(inputs)), CAPABILITIES[capIdx].dtype));
spec.outputs.push_back(Resource(BufferSp(new Int16Buffer(outputs))));
spec.extensions.push_back("VK_KHR_16bit_storage");
spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
}
}
}
void addCompute16bitStorageUniform16StructTo32StructGroup (tcu::TestCaseGroup* group)
{
tcu::TestContext& testCtx = group->getTestContext();
de::Random rnd (deStringHash(group->getName()));
const StringTemplate shaderTemplate (
"OpCapability Shader\n"
"OpCapability ${capability}\n"
"OpExtension \"SPV_KHR_16bit_storage\"\n"
"OpMemoryModel Logical GLSL450\n"
"OpEntryPoint GLCompute %main \"main\" %id\n"
"OpExecutionMode %main LocalSize 1 1 1\n"
"OpDecorate %id BuiltIn GlobalInvocationId\n"
"\n"
"${strideF16}"
"\n"
"${strideF32}"
"\n"
"OpMemberDecorate %SSBO_IN 0 Offset 0\n"
"OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
"OpDecorate %SSBO_IN ${storage}\n"
"OpDecorate %SSBO_OUT BufferBlock\n"
"OpDecorate %ssboIN DescriptorSet 0\n"
"OpDecorate %ssboOUT DescriptorSet 0\n"
"OpDecorate %ssboIN Binding 0\n"
"OpDecorate %ssboOUT Binding 1\n"
"\n"
"%bool = OpTypeBool\n"
"%void = OpTypeVoid\n"
"%voidf = OpTypeFunction %void\n"
"%u32 = OpTypeInt 32 0\n"
"%uvec3 = OpTypeVector %u32 3\n"
"%uvec3ptr = OpTypePointer Input %uvec3\n"
"\n"
"%i32 = OpTypeInt 32 1\n"
"%v2i32 = OpTypeVector %i32 2\n"
"%v4i32 = OpTypeVector %i32 4\n"
"\n"
"%f32 = OpTypeFloat 32\n"
"%v2f32 = OpTypeVector %f32 2\n"
"%v3f32 = OpTypeVector %f32 3\n"
"%v4f32 = OpTypeVector %f32 4\n"
"${types}\n"
"\n"
"%zero = OpConstant %i32 0\n"
"%c_i32_1 = OpConstant %i32 1\n"
"%c_i32_2 = OpConstant %i32 2\n"
"%c_i32_3 = OpConstant %i32 3\n"
"%c_i32_4 = OpConstant %i32 4\n"
"%c_i32_5 = OpConstant %i32 5\n"
"%c_i32_6 = OpConstant %i32 6\n"
"%c_i32_7 = OpConstant %i32 7\n"
"%c_i32_8 = OpConstant %i32 8\n"
"%c_i32_9 = OpConstant %i32 9\n"
"\n"
"%c_u32_1 = OpConstant %u32 1\n"
"%c_u32_3 = OpConstant %u32 3\n"
"%c_u32_7 = OpConstant %u32 7\n"
"%c_u32_11 = OpConstant %u32 11\n"
"\n"
"%f16arr3 = OpTypeArray %f16 %c_u32_3\n"
"%v2f16arr3 = OpTypeArray %v2f16 %c_u32_3\n"
"%v2f16arr11 = OpTypeArray %v2f16 %c_u32_11\n"
"%v3f16arr11 = OpTypeArray %v3f16 %c_u32_11\n"
"%v4f16arr3 = OpTypeArray %v4f16 %c_u32_3\n"
"%struct16 = OpTypeStruct %f16 %v2f16arr3\n"
"%struct16arr11 = OpTypeArray %struct16 %c_u32_11\n"
"%f16Struct = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr11 %v2f16arr11 %f16 %v3f16arr11 %v4f16arr3\n"
"\n"
"%f32arr3 = OpTypeArray %f32 %c_u32_3\n"
"%v2f32arr3 = OpTypeArray %v2f32 %c_u32_3\n"
"%v2f32arr11 = OpTypeArray %v2f32 %c_u32_11\n"
"%v3f32arr11 = OpTypeArray %v3f32 %c_u32_11\n"
"%v4f32arr3 = OpTypeArray %v4f32 %c_u32_3\n"
"%struct32 = OpTypeStruct %f32 %v2f32arr3\n"
"%struct32arr11 = OpTypeArray %struct32 %c_u32_11\n"
"%f32Struct = OpTypeStruct %f32 %v2f32 %v3f32 %v4f32 %f32arr3 %struct32arr11 %v2f32arr11 %f32 %v3f32arr11 %v4f32arr3\n"
"\n"
"%f16StructArr7 = OpTypeArray %f16Struct %c_u32_7\n"
"%f32StructArr7 = OpTypeArray %f32Struct %c_u32_7\n"
"%SSBO_IN = OpTypeStruct %f16StructArr7\n"
"%SSBO_OUT = OpTypeStruct %f32StructArr7\n"
"%up_SSBOIN = OpTypePointer Uniform %SSBO_IN\n"
"%up_SSBOOUT = OpTypePointer Uniform %SSBO_OUT\n"
"%ssboIN = OpVariable %up_SSBOIN Uniform\n"
"%ssboOUT = OpVariable %up_SSBOOUT Uniform\n"
"\n"
"%id = OpVariable %uvec3ptr Input\n"
"%main = OpFunction %void None %voidf\n"
"%label = OpLabel\n"
"\n"
"%idval = OpLoad %uvec3 %id\n"
"%x = OpCompositeExtract %u32 %idval 0\n"
"%y = OpCompositeExtract %u32 %idval 1\n"
"\n"
"%f16src = OpAccessChain %f16ptr %ssboIN %zero %x %zero\n"
"%val_f16 = OpLoad %f16 %f16src\n"
"%val_f32 = OpFConvert %f32 %val_f16\n"
"%f32dst = OpAccessChain %f32ptr %ssboOUT %zero %x %zero\n"
"OpStore %f32dst %val_f32\n"
"\n"
"%v2f16src = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_1\n"
"%val_v2f16 = OpLoad %v2f16 %v2f16src\n"
"%val_v2f32 = OpFConvert %v2f32 %val_v2f16\n"
"%v2f32dst = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_1\n"
"OpStore %v2f32dst %val_v2f32\n"
"\n"
"%v3f16src = OpAccessChain %v3f16ptr %ssboIN %zero %x %c_i32_2\n"
"%val_v3f16 = OpLoad %v3f16 %v3f16src\n"
"%val_v3f32 = OpFConvert %v3f32 %val_v3f16\n"
"%v3f32dst = OpAccessChain %v3f32ptr %ssboOUT %zero %x %c_i32_2\n"
"OpStore %v3f32dst %val_v3f32\n"
"\n"
"%v4f16src = OpAccessChain %v4f16ptr %ssboIN %zero %x %c_i32_3\n"
"%val_v4f16 = OpLoad %v4f16 %v4f16src\n"
"%val_v4f32 = OpFConvert %v4f32 %val_v4f16\n"
"%v4f32dst = OpAccessChain %v4f32ptr %ssboOUT %zero %x %c_i32_3\n"
"OpStore %v4f32dst %val_v4f32\n"
"\n"
//struct {f16, v2f16[3]}
"%Sf16src = OpAccessChain %f16ptr %ssboIN %zero %x %c_i32_5 %y %zero\n"
"%Sval_f16 = OpLoad %f16 %Sf16src\n"
"%Sval_f32 = OpFConvert %f32 %Sval_f16\n"
"%Sf32dst2 = OpAccessChain %f32ptr %ssboOUT %zero %x %c_i32_5 %y %zero\n"
"OpStore %Sf32dst2 %Sval_f32\n"
"\n"
"%Sv2f16src0 = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %zero\n"
"%Sv2f16_0 = OpLoad %v2f16 %Sv2f16src0\n"
"%Sv2f32_0 = OpFConvert %v2f32 %Sv2f16_0\n"
"%Sv2f32dst_0 = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %zero\n"
"OpStore %Sv2f32dst_0 %Sv2f32_0\n"
"\n"
"%Sv2f16src1 = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %c_i32_1\n"
"%Sv2f16_1 = OpLoad %v2f16 %Sv2f16src1\n"
"%Sv2f32_1 = OpFConvert %v2f32 %Sv2f16_1\n"
"%Sv2f32dst_1 = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %c_i32_1\n"
"OpStore %Sv2f32dst_1 %Sv2f32_1\n"
"\n"
"%Sv2f16src2 = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %c_i32_2\n"
"%Sv2f16_2 = OpLoad %v2f16 %Sv2f16src2\n"
"%Sv2f32_2 = OpFConvert %v2f32 %Sv2f16_2\n"
"%Sv2f32dst_2 = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %c_i32_2\n"
"OpStore %Sv2f32dst_2 %Sv2f32_2\n"
"\n"
"%v2f16src2 = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_6 %y\n"
"%val2_v2f16 = OpLoad %v2f16 %v2f16src2\n"
"%val2_v2f32 = OpFConvert %v2f32 %val2_v2f16\n"
"%v2f32dst2 = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_6 %y\n"
"OpStore %v2f32dst2 %val2_v2f32\n"
"\n"
"%f16src2 = OpAccessChain %f16ptr %ssboIN %zero %x %c_i32_7\n"
"%val2_f16 = OpLoad %f16 %f16src2\n"
"%val2_f32 = OpFConvert %f32 %val2_f16\n"
"%f32dst2 = OpAccessChain %f32ptr %ssboOUT %zero %x %c_i32_7\n"
"OpStore %f32dst2 %val2_f32\n"
"\n"
"%v3f16src2 = OpAccessChain %v3f16ptr %ssboIN %zero %x %c_i32_8 %y\n"
"%val2_v3f16 = OpLoad %v3f16 %v3f16src2\n"
"%val2_v3f32 = OpFConvert %v3f32 %val2_v3f16\n"
"%v3f32dst2 = OpAccessChain %v3f32ptr %ssboOUT %zero %x %c_i32_8 %y\n"
"OpStore %v3f32dst2 %val2_v3f32\n"
"\n"
//Array with 3 elements
"%LessThan3 = OpSLessThan %bool %y %c_i32_3\n"
"OpSelectionMerge %BlockIf None\n"
"OpBranchConditional %LessThan3 %LabelIf %BlockIf\n"
"%LabelIf = OpLabel\n"
" %f16src3 = OpAccessChain %f16ptr %ssboIN %zero %x %c_i32_4 %y\n"
" %val3_f16 = OpLoad %f16 %f16src3\n"
" %val3_f32 = OpFConvert %f32 %val3_f16\n"
" %f32dst3 = OpAccessChain %f32ptr %ssboOUT %zero %x %c_i32_4 %y\n"
" OpStore %f32dst3 %val3_f32\n"
"\n"
" %v4f16src2 = OpAccessChain %v4f16ptr %ssboIN %zero %x %c_i32_9 %y\n"
" %val2_v4f16 = OpLoad %v4f16 %v4f16src2\n"
" %val2_v4f32 = OpFConvert %v4f32 %val2_v4f16\n"
" %v4f32dst2 = OpAccessChain %v4f32ptr %ssboOUT %zero %x %c_i32_9 %y\n"
" OpStore %v4f32dst2 %val2_v4f32\n"
"OpBranch %BlockIf\n"
"%BlockIf = OpLabel\n"
" OpReturn\n"
" OpFunctionEnd\n");
{ // Floats
vector<float> float32Data (getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430), 0.0f);
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
{
vector<deFloat16> float16DData = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? data16bitStd430(rnd) : data16bitStd140(rnd);
ComputeShaderSpec spec;
map<string, string> specs;
string testName = string(CAPABILITIES[capIdx].name);
specs["capability"] = CAPABILITIES[capIdx].cap;
specs["storage"] = CAPABILITIES[capIdx].decor;
specs["strideF16"] = getStructShaderComponet((VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? SHADERTEMPLATE_STRIDE16BIT_STD430 : SHADERTEMPLATE_STRIDE16BIT_STD140);
specs["strideF32"] = getStructShaderComponet(SHADERTEMPLATE_STRIDE32BIT_STD430);
specs["types"] = getStructShaderComponet(SHADERTEMPLATE_TYPES);