external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmComputeShaderCase.cpp - third_party/vulkan-cts - Git at Google

 /*-------------------------------------------------------------------------
  * Vulkan Conformance Tests
  * ------------------------
  *
  * Copyright (c) 2019 Google LLC
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
  *//*!
  * \file
  * \brief Test Case Skeleton Based on Compute Shaders
  *//*--------------------------------------------------------------------*/

 #include "vktSpvAsmComputeShaderCase.hpp"

 #include "deSharedPtr.hpp"
 #include "deSTLUtil.hpp"

 #include "vktSpvAsmUtils.hpp"

 #include "vkBuilderUtil.hpp"
 #include "vkMemUtil.hpp"
 #include "vkPlatform.hpp"
 #include "vkRefUtil.hpp"
 #include "vkQueryUtil.hpp"
 #include "vkTypeUtil.hpp"
 #include "vkCmdUtil.hpp"
 #include "vkImageUtil.hpp"

 namespace
 {

 using namespace vk;
 using std::vector;

 typedef vkt::SpirVAssembly::AllocationMp			AllocationMp;
 typedef vkt::SpirVAssembly::AllocationSp			AllocationSp;
 typedef vk::Unique<VkBuffer>						BufferHandleUp;
 typedef vk::Unique<VkImage>							ImageHandleUp;
 typedef vk::Unique<VkImageView>						ImageViewHandleUp;
 typedef vk::Unique<VkSampler>						SamplerHandleUp;
 typedef de::SharedPtr<BufferHandleUp>				BufferHandleSp;
 typedef de::SharedPtr<ImageHandleUp>				ImageHandleSp;
 typedef de::SharedPtr<ImageViewHandleUp>			ImageViewHandleSp;
 typedef de::SharedPtr<SamplerHandleUp>				SamplerHandleSp;

 /*--------------------------------------------------------------------*//*!
  * \brief Create a buffer, allocate and bind memory for the buffer
  *
  * The memory is created as host visible and passed back as a vk::Allocation
  * instance via outMemory.
  *//*--------------------------------------------------------------------*/
 Move<VkBuffer> createBufferAndBindMemory (vkt::Context&				context,
 										  const DeviceInterface&	vkdi,
 										  const VkDevice&			device,
 										  VkDescriptorType			dtype,
 										  Allocator&				allocator,
 										  size_t					numBytes,
 										  AllocationMp*				outMemory,
 										  bool						physStorageBuffer,
 										  bool						coherent = false)
 {
 	VkBufferUsageFlags			usageFlags			= (VkBufferUsageFlags)0u;

 	if (physStorageBuffer)
 		usageFlags |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;

 	switch (dtype)
 	{
 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:			usageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;	break;
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:			usageFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;	break;
 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:			usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;	break;
 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:			usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;	break;
 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:	usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;	break;
 		default:										DE_FATAL("Not implemented");
 	}

 	const VkBufferCreateInfo	bufferCreateInfo	=
 	{
 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// sType
 		DE_NULL,								// pNext
 		0u,										// flags
 		numBytes,								// size
 		usageFlags,								// usage
 		VK_SHARING_MODE_EXCLUSIVE,				// sharingMode
 		0u,										// queueFamilyCount
 		DE_NULL,								// pQueueFamilyIndices
 	};

 	Move<VkBuffer>				buffer			(createBuffer(vkdi, device, &bufferCreateInfo));
 	const VkMemoryRequirements	requirements	= getBufferMemoryRequirements(vkdi, device, *buffer);
 	AllocationMp				bufferMemory	= allocator.allocate(requirements,
 													(coherent ? MemoryRequirement::Coherent : MemoryRequirement::Any) |
 													(context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") && physStorageBuffer ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any) |
 													MemoryRequirement::HostVisible);

 	VK_CHECK(vkdi.bindBufferMemory(device, *buffer, bufferMemory->getMemory(), bufferMemory->getOffset()));
 	*outMemory = bufferMemory;

 	return buffer;
 }

 /*--------------------------------------------------------------------*//*!
  * \brief Create image, allocate and bind memory for the image
  *
  *//*--------------------------------------------------------------------*/
 Move<VkImage> createImageAndBindMemory (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorType dtype, Allocator& allocator, deUint32 queueFamilyIndex, AllocationMp* outMemory)
 {
 	VkImageUsageFlags			usageBits			= (VkImageUsageFlags)0;

 	switch (dtype)
 	{
 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:			usageBits = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;	break;
 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:			usageBits = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;	break;
 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:	usageBits = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;	break;
 		default:										DE_FATAL("Not implemented");
 	}

 	const VkImageCreateInfo		resourceImageParams	=
 	{
 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,									//	VkStructureType		sType;
 		DE_NULL,																//	const void*			pNext;
 		0u,																		//	VkImageCreateFlags	flags;
 		VK_IMAGE_TYPE_2D,														//	VkImageType			imageType;
 		VK_FORMAT_R32G32B32A32_SFLOAT,											//	VkFormat			format;
 		{ 8, 8, 1 },															//  VkExtent3D			extent;
 		1u,																		//	deUint32			mipLevels;
 		1u,																		//	deUint32			arraySize;
 		VK_SAMPLE_COUNT_1_BIT,													//	deUint32			samples;
 		VK_IMAGE_TILING_OPTIMAL,												//	VkImageTiling		tiling;
 		usageBits,																//  VkImageUsageFlags	usage;
 		VK_SHARING_MODE_EXCLUSIVE,												//	VkSharingMode		sharingMode;
 		1u,																		//	deUint32			queueFamilyCount;
 		&queueFamilyIndex,														//	const deUint32*		pQueueFamilyIndices;
 		VK_IMAGE_LAYOUT_UNDEFINED,												//	VkImageLayout		initialLayout;
 	};

 	// Create image
 	Move<VkImage>				image				= createImage(vkdi, device, &resourceImageParams);
 	const VkMemoryRequirements	requirements		= getImageMemoryRequirements(vkdi, device, *image);
 	de::MovePtr<Allocation>		imageMemory			= allocator.allocate(requirements, MemoryRequirement::Any);

 	VK_CHECK(vkdi.bindImageMemory(device, *image, imageMemory->getMemory(), imageMemory->getOffset()));
 	*outMemory = imageMemory;

 	return image;
 }

 void setMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, const void* data, bool coherent = false)
 {
 	void* const hostPtr = destAlloc->getHostPtr();

 	deMemcpy((deUint8*)hostPtr, data, numBytes);

 	if (!coherent)
 		flushAlloc(vkdi, device, *destAlloc);
 }

 void fillMemoryWithValue (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, deUint8 value, bool coherent = false)
 {
 	void* const hostPtr = destAlloc->getHostPtr();

 	deMemset((deUint8*)hostPtr, value, numBytes);

 	if (!coherent)
 		flushAlloc(vkdi, device, *destAlloc);
 }

 void invalidateMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* srcAlloc, bool coherent = false)
 {
 	if (!coherent)
 		invalidateAlloc(vkdi, device, *srcAlloc);
 }

 /*--------------------------------------------------------------------*//*!
  * \brief Create a descriptor set layout with the given descriptor types
  *
  * All descriptors are created for compute pipeline.
  *//*--------------------------------------------------------------------*/
 Move<VkDescriptorSetLayout> createDescriptorSetLayout (const DeviceInterface& vkdi, const VkDevice& device, const vector<VkDescriptorType>& dtypes)
 {
 	DescriptorSetLayoutBuilder builder;

 	for (size_t bindingNdx = 0; bindingNdx < dtypes.size(); ++bindingNdx)
 		builder.addSingleBinding(dtypes[bindingNdx], VK_SHADER_STAGE_COMPUTE_BIT);

 	return builder.build(vkdi, device);
 }

 /*--------------------------------------------------------------------*//*!
  * \brief Create a pipeline layout with one descriptor set
  *//*--------------------------------------------------------------------*/
 Move<VkPipelineLayout> createPipelineLayout (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorSetLayout descriptorSetLayout, const vkt::SpirVAssembly::BufferSp& pushConstants)
 {
 	VkPipelineLayoutCreateInfo		createInfo	=
 	{
 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,	// sType
 		DE_NULL,										// pNext
 		(VkPipelineLayoutCreateFlags)0,
 		1u,												// descriptorSetCount
 		&descriptorSetLayout,							// pSetLayouts
 		0u,												// pushConstantRangeCount
 		DE_NULL,										// pPushConstantRanges
 	};

 	VkPushConstantRange				range		=
 	{
 		VK_SHADER_STAGE_COMPUTE_BIT,					// stageFlags
 		0,												// offset
 		0,												// size
 	};

 	if (pushConstants != DE_NULL)
 	{
 		vector<deUint8> pushConstantsBytes;
 		pushConstants->getBytes(pushConstantsBytes);

 		range.size							= static_cast<deUint32>(pushConstantsBytes.size());
 		createInfo.pushConstantRangeCount	= 1;
 		createInfo.pPushConstantRanges		= &range;
 	}

 	return createPipelineLayout(vkdi, device, &createInfo);
 }

 /*--------------------------------------------------------------------*//*!
  * \brief Create a one-time descriptor pool for one descriptor set that
  * support the given descriptor types.
  *//*--------------------------------------------------------------------*/
 inline Move<VkDescriptorPool> createDescriptorPool (const DeviceInterface& vkdi, const VkDevice& device, const vector<VkDescriptorType>& dtypes)
 {
 	DescriptorPoolBuilder builder;

 	for (size_t typeNdx = 0; typeNdx < dtypes.size(); ++typeNdx)
 		builder.addType(dtypes[typeNdx], 1);

 	return builder.build(vkdi, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, /* maxSets = */ 1);
 }

 /*--------------------------------------------------------------------*//*!
  * \brief Create a descriptor set
  *
  * The descriptor set's layout contains the given descriptor types,
  * sequentially binded to binding points starting from 0.
  *//*--------------------------------------------------------------------*/
 Move<VkDescriptorSet> createDescriptorSet (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorPool pool, VkDescriptorSetLayout layout, const vector<VkDescriptorType>& dtypes, const vector<VkDescriptorBufferInfo>& descriptorInfos, const vector<VkDescriptorImageInfo>& descriptorImageInfos)
 {
 	DE_ASSERT(dtypes.size() == descriptorInfos.size() + descriptorImageInfos.size());

 	const VkDescriptorSetAllocateInfo	allocInfo	=
 	{
 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
 		DE_NULL,
 		pool,
 		1u,
 		&layout
 	};

 	Move<VkDescriptorSet>				descriptorSet	= allocateDescriptorSet(vkdi, device, &allocInfo);
 	DescriptorSetUpdateBuilder			builder;

 	deUint32							bufferNdx		= 0u;
 	deUint32							imageNdx		= 0u;

 	for (deUint32 descriptorNdx = 0; descriptorNdx < dtypes.size(); ++descriptorNdx)
 	{
 		switch (dtypes[descriptorNdx])
 		{
 			// Write buffer descriptor
 			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
 			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
 				builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx), dtypes[descriptorNdx], &descriptorInfos[bufferNdx++]);
 				break;

 			// Write image/sampler descriptor
 			case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
 			case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
 			case VK_DESCRIPTOR_TYPE_SAMPLER:
 			case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
 				builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx), dtypes[descriptorNdx], &descriptorImageInfos[imageNdx++]);
 				break;

 			default:
 				DE_FATAL("Not implemented");
 		}
 	}
 	builder.update(vkdi, device);

 	return descriptorSet;
 }

 /*--------------------------------------------------------------------*//*!
  * \brief Create a compute pipeline based on the given shader
  *//*--------------------------------------------------------------------*/
 Move<VkPipeline> createComputePipeline (const DeviceInterface& vkdi, const VkDevice& device, VkPipelineLayout pipelineLayout, VkShaderModule shader, const char* entryPoint, const vkt::SpirVAssembly::SpecConstants& specConstants)
 {
 	const deUint32							numSpecConstants				= (deUint32)specConstants.getValuesCount();
 	vector<VkSpecializationMapEntry>		entries;
 	VkSpecializationInfo					specInfo;
 	size_t									offset							= 0;

 	if (numSpecConstants != 0)
 	{
 		entries.resize(numSpecConstants);

 		for (deUint32 ndx = 0; ndx < numSpecConstants; ++ndx)
 		{
 			const size_t valueSize	= specConstants.getValueSize(ndx);

 			entries[ndx].constantID	= ndx;
 			entries[ndx].offset		= static_cast<deUint32>(offset);
 			entries[ndx].size		= valueSize;

 			offset					+= valueSize;
 		}

 		specInfo.mapEntryCount		= numSpecConstants;
 		specInfo.pMapEntries		= &entries[0];
 		specInfo.dataSize			= offset;
 		specInfo.pData				= specConstants.getValuesBuffer();
 	}

 	const VkPipelineShaderStageCreateInfo	pipelineShaderStageCreateInfo	=
 	{
 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// sType
 		DE_NULL,												// pNext
 		(VkPipelineShaderStageCreateFlags)0,					// flags
 		VK_SHADER_STAGE_COMPUTE_BIT,							// stage
 		shader,													// module
 		entryPoint,												// pName
 		(numSpecConstants == 0) ? DE_NULL : &specInfo,			// pSpecializationInfo
 	};
 	const VkComputePipelineCreateInfo		pipelineCreateInfo				=
 	{
 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,			// sType
 		DE_NULL,												// pNext
 		(VkPipelineCreateFlags)0,
 		pipelineShaderStageCreateInfo,							// cs
 		pipelineLayout,											// layout
 		(VkPipeline)0,											// basePipelineHandle
 		0u,														// basePipelineIndex
 	};

 	return createComputePipeline(vkdi, device, (VkPipelineCache)0u, &pipelineCreateInfo);
 }

 } // anonymous

 namespace vkt
 {
 namespace SpirVAssembly
 {

 // ComputeShaderTestCase implementations

 SpvAsmComputeShaderCase::SpvAsmComputeShaderCase (tcu::TestContext& testCtx, const char* name, const char* description, const ComputeShaderSpec& spec)
 	: TestCase		(testCtx, name, description)
 	, m_shaderSpec	(spec)
 {
 }

 void SpvAsmComputeShaderCase::initPrograms (SourceCollections& programCollection) const
 {
 	programCollection.spirvAsmSources.add("compute") << m_shaderSpec.assembly.c_str() << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, m_shaderSpec.spirvVersion);
 }

 TestInstance* SpvAsmComputeShaderCase::createInstance (Context& ctx) const
 {
 	if (getMinRequiredVulkanVersion(m_shaderSpec.spirvVersion) > ctx.getUsedApiVersion())
 	{
 		TCU_THROW(NotSupportedError, std::string("Vulkan higher than or equal to " + getVulkanName(getMinRequiredVulkanVersion(m_shaderSpec.spirvVersion)) + " is required for this test to run").c_str());
 	}
 	return new SpvAsmComputeShaderInstance(ctx, m_shaderSpec);
 }

 // ComputeShaderTestInstance implementations

 SpvAsmComputeShaderInstance::SpvAsmComputeShaderInstance (Context& ctx, const ComputeShaderSpec& spec)
 	: TestInstance		(ctx)
 	, m_shaderSpec		(spec)
 {
 }

 VkImageUsageFlags getMatchingComputeImageUsageFlags (VkDescriptorType dType)
 {
 	switch (dType)
 	{
 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:			return VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:			return VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:	return VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
 		default:										DE_FATAL("Not implemented");
 	}
 	return (VkImageUsageFlags)0;
 }

 tcu::TestStatus SpvAsmComputeShaderInstance::iterate (void)
 {
 	const deUint32						queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
 	const VkDevice&						device				= m_context.getDevice();
 	const DeviceInterface&				vkdi				= m_context.getDeviceInterface();
 	Allocator&							allocator			= m_context.getDefaultAllocator();
 	const VkQueue						queue				= m_context.getUniversalQueue();

 	vector<AllocationSp>				inputAllocs;
 	vector<AllocationSp>				outputAllocs;
 	vector<BufferHandleSp>				inputBuffers;
 	vector<ImageHandleSp>				inputImages;
 	vector<ImageViewHandleSp>			inputImageViews;
 	vector<SamplerHandleSp>				inputSamplers;
 	vector<BufferHandleSp>				outputBuffers;
 	vector<VkDescriptorBufferInfo>		descriptorInfos;
 	vector<VkDescriptorImageInfo>		descriptorImageInfos;
 	vector<VkDescriptorType>			descriptorTypes;

 	// Check all required extensions are supported
 	for (const auto& ext : m_shaderSpec.extensions)
 		m_context.requireDeviceFunctionality(ext);

 	// Core features
 	{
 		const char*						unsupportedFeature			= DE_NULL;
 		vk::VkPhysicalDeviceFeatures	localRequiredCoreFeatures	= m_shaderSpec.requestedVulkanFeatures.coreFeatures;

 		// Skip check features not targeted to compute
 		localRequiredCoreFeatures.fullDrawIndexUint32						= DE_FALSE;
 		localRequiredCoreFeatures.independentBlend							= DE_FALSE;
 		localRequiredCoreFeatures.geometryShader							= DE_FALSE;
 		localRequiredCoreFeatures.tessellationShader						= DE_FALSE;
 		localRequiredCoreFeatures.sampleRateShading							= DE_FALSE;
 		localRequiredCoreFeatures.dualSrcBlend								= DE_FALSE;
 		localRequiredCoreFeatures.logicOp									= DE_FALSE;
 		localRequiredCoreFeatures.multiDrawIndirect							= DE_FALSE;
 		localRequiredCoreFeatures.drawIndirectFirstInstance					= DE_FALSE;
 		localRequiredCoreFeatures.depthClamp								= DE_FALSE;
 		localRequiredCoreFeatures.depthBiasClamp							= DE_FALSE;
 		localRequiredCoreFeatures.fillModeNonSolid							= DE_FALSE;
 		localRequiredCoreFeatures.depthBounds								= DE_FALSE;
 		localRequiredCoreFeatures.wideLines									= DE_FALSE;
 		localRequiredCoreFeatures.largePoints								= DE_FALSE;
 		localRequiredCoreFeatures.alphaToOne								= DE_FALSE;
 		localRequiredCoreFeatures.multiViewport								= DE_FALSE;
 		localRequiredCoreFeatures.occlusionQueryPrecise						= DE_FALSE;
 		localRequiredCoreFeatures.vertexPipelineStoresAndAtomics			= DE_FALSE;
 		localRequiredCoreFeatures.fragmentStoresAndAtomics					= DE_FALSE;
 		localRequiredCoreFeatures.shaderTessellationAndGeometryPointSize	= DE_FALSE;
 		localRequiredCoreFeatures.shaderClipDistance						= DE_FALSE;
 		localRequiredCoreFeatures.shaderCullDistance						= DE_FALSE;
 		localRequiredCoreFeatures.sparseBinding								= DE_FALSE;
 		localRequiredCoreFeatures.variableMultisampleRate					= DE_FALSE;

 		if (!isCoreFeaturesSupported(m_context, localRequiredCoreFeatures, &unsupportedFeature))
 			TCU_THROW(NotSupportedError, std::string("At least following requested core feature is not supported: ") + unsupportedFeature);
 	}

 	// Extension features
 	{
 		// 8bit storage features
 		{
 			if (!is8BitStorageFeaturesSupported(m_context, m_shaderSpec.requestedVulkanFeatures.ext8BitStorage))
 				TCU_THROW(NotSupportedError, "Requested 8bit storage features not supported");
 		}

 		// 16bit storage features
 		{
 			if (!is16BitStorageFeaturesSupported(m_context, m_shaderSpec.requestedVulkanFeatures.ext16BitStorage))
 				TCU_THROW(NotSupportedError, "Requested 16bit storage features not supported");
 		}

 		// VariablePointers features
 		{
 			if (!isVariablePointersFeaturesSupported(m_context, m_shaderSpec.requestedVulkanFeatures.extVariablePointers))
 				TCU_THROW(NotSupportedError, "Requested Variable Pointer feature not supported");
 		}

 		// Float16/Int8 shader features
 		{
 			if (!isFloat16Int8FeaturesSupported(m_context, m_shaderSpec.requestedVulkanFeatures.extFloat16Int8))
 				TCU_THROW(NotSupportedError, "Requested 16bit float or 8bit int feature not supported");
 		}

 		// Vulkan Memory Model features
 		{
 			if (!isVulkanMemoryModelFeaturesSupported(m_context, m_shaderSpec.requestedVulkanFeatures.extVulkanMemoryModel))
 				TCU_THROW(NotSupportedError, "Requested Vulkan Memory Model feature not supported");
 		}

 		// FloatControls features
 		if (!isFloatControlsFeaturesSupported(m_context, m_shaderSpec.requestedVulkanFeatures.floatControlsProperties))
 			TCU_THROW(NotSupportedError, "Requested Float Controls features not supported");

 		if (m_shaderSpec.usesPhysStorageBuffer && !m_context.isBufferDeviceAddressSupported())
 			TCU_THROW(NotSupportedError, "Request physical storage buffer feature not supported");
 	}

 	DE_ASSERT(!m_shaderSpec.outputs.empty());

 	// Create command pool and command buffer

 	const Unique<VkCommandPool>			cmdPool				(createCommandPool(vkdi, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
 	Unique<VkCommandBuffer>				cmdBuffer			(allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));

 	// Create buffer and image objects, allocate storage, and create view for all input/output buffers and images.

 	for (deUint32 inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
 	{
 		const VkDescriptorType	descType	= m_shaderSpec.inputs[inputNdx].getDescriptorType();

 		const bool				hasImage	= (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)	||
 											  (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)	||
 											  (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);

 		const bool				hasSampler	= (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)	||
 											  (descType == VK_DESCRIPTOR_TYPE_SAMPLER)			||
 											  (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);

 		descriptorTypes.push_back(descType);

 		// Buffer
 		if (!hasImage && !hasSampler)
 		{
 			const BufferSp&		input			= m_shaderSpec.inputs[inputNdx].getBuffer();
 			vector<deUint8>		inputBytes;

 			input->getBytes(inputBytes);

 			const size_t		numBytes		= inputBytes.size();

 			AllocationMp		bufferAlloc;
 			BufferHandleUp*		buffer			= new BufferHandleUp(createBufferAndBindMemory(m_context, vkdi, device, descType, allocator, numBytes, &bufferAlloc, m_shaderSpec.usesPhysStorageBuffer, m_shaderSpec.coherentMemory));

 			setMemory(vkdi, device, &*bufferAlloc, numBytes, &inputBytes.front(), m_shaderSpec.coherentMemory);
 			inputBuffers.push_back(BufferHandleSp(buffer));
 			inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
 		}
 		// Image
 		else if (hasImage)
 		{
 			const BufferSp&				input			= m_shaderSpec.inputs[inputNdx].getBuffer();
 			vector<deUint8>				inputBytes;

 			input->getBytes(inputBytes);

 			const size_t				numBytes		= inputBytes.size();

 			AllocationMp				bufferAlloc;
 			BufferHandleUp*				buffer			= new BufferHandleUp(createBufferAndBindMemory(m_context, vkdi, device, descType, allocator, numBytes, &bufferAlloc, m_shaderSpec.usesPhysStorageBuffer));

 			AllocationMp				imageAlloc;
 			ImageHandleUp*				image			= new ImageHandleUp(createImageAndBindMemory(vkdi, device, descType, allocator, queueFamilyIndex, &imageAlloc));

 			setMemory(vkdi, device, &*bufferAlloc, numBytes, &inputBytes.front());

 			inputBuffers.push_back(BufferHandleSp(buffer));
 			inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));

 			inputImages.push_back(ImageHandleSp(image));
 			inputAllocs.push_back(de::SharedPtr<Allocation>(imageAlloc.release()));

 			const VkImageLayout			imageLayout		= (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
 			const VkBufferImageCopy		copyRegion		=
 			{
 				0u,												// VkDeviceSize				bufferOffset;
 				0u,												// deUint32					bufferRowLength;
 				0u,												// deUint32					bufferImageHeight;
 				{
 					VK_IMAGE_ASPECT_COLOR_BIT,						// VkImageAspectFlags		aspect;
 					0u,												// deUint32					mipLevel;
 					0u,												// deUint32					baseArrayLayer;
 					1u,												// deUint32					layerCount;
 				},												// VkImageSubresourceLayers	imageSubresource;
 				{ 0, 0, 0 },									// VkOffset3D				imageOffset;
 				{ 8, 8, 1 }										// VkExtent3D				imageExtent;
 			};
 			vector<VkBufferImageCopy>	copyRegions;
 			copyRegions.push_back(copyRegion);

 			copyBufferToImage(vkdi, device, queue, queueFamilyIndex, buffer->get(), (deUint32)numBytes, copyRegions, DE_NULL, VK_IMAGE_ASPECT_COLOR_BIT, 1u, 1u, image->get(), imageLayout);
 		}
 	}

 	deUint32							imageNdx			= 0u;
 	deUint32							bufferNdx			= 0u;

 	for (deUint32 inputNdx = 0; inputNdx < descriptorTypes.size(); ++inputNdx)
 	{
 		const VkDescriptorType	descType	= descriptorTypes[inputNdx];

 		const bool				hasImage	= (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)	||
 											  (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)	||
 											  (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);

 		const bool				hasSampler	= (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)	||
 											  (descType == VK_DESCRIPTOR_TYPE_SAMPLER)			||
 											  (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);

 		// Create image view and sampler
 		if (hasImage || hasSampler)
 		{
 			if (descType != VK_DESCRIPTOR_TYPE_SAMPLER)
 			{
 				const VkImageViewCreateInfo	imgViewParams	=
 				{
 					VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	//	VkStructureType			sType;
 					DE_NULL,									//	const void*				pNext;
 					0u,											//	VkImageViewCreateFlags	flags;
 					**inputImages[imageNdx++],					//	VkImage					image;
 					VK_IMAGE_VIEW_TYPE_2D,						//	VkImageViewType			viewType;
 					VK_FORMAT_R32G32B32A32_SFLOAT,				//	VkFormat				format;
 					{
 						VK_COMPONENT_SWIZZLE_R,
 						VK_COMPONENT_SWIZZLE_G,
 						VK_COMPONENT_SWIZZLE_B,
 						VK_COMPONENT_SWIZZLE_A
 					},											//	VkChannelMapping		channels;
 					{
 						VK_IMAGE_ASPECT_COLOR_BIT,					//	VkImageAspectFlags		aspectMask;
 						0u,											//	deUint32				baseMipLevel;
 						1u,											//	deUint32				mipLevels;
 						0u,											//	deUint32				baseArrayLayer;
 						1u,											//	deUint32				arraySize;
 					},											//	VkImageSubresourceRange	subresourceRange;
 				};

 				Move<VkImageView>			imgView			(createImageView(vkdi, device, &imgViewParams));
 				inputImageViews.push_back(ImageViewHandleSp(new ImageViewHandleUp(imgView)));
 			}

 			if (hasSampler)
 			{
 				const VkSamplerCreateInfo	samplerParams	=
 				{
 					VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,		// VkStructureType			sType;
 					DE_NULL,									// const void*				pNext;
 					0,											// VkSamplerCreateFlags		flags;
 					VK_FILTER_NEAREST,							// VkFilter					magFilter:
 					VK_FILTER_NEAREST,							// VkFilter					minFilter;
 					VK_SAMPLER_MIPMAP_MODE_NEAREST,				// VkSamplerMipmapMode		mipmapMode;
 					VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		// VkSamplerAddressMode		addressModeU;
 					VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		// VkSamplerAddressMode		addressModeV;
 					VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		// VkSamplerAddressMode		addressModeW;
 					0.0f,										// float					mipLodBias;
 					VK_FALSE,									// VkBool32					anistoropyEnable;
 					1.0f,										// float					maxAnisotropy;
 					VK_FALSE,									// VkBool32					compareEnable;
 					VK_COMPARE_OP_ALWAYS,						// VkCompareOp				compareOp;
 					0.0f,										// float					minLod;
 					0.0f,										// float					maxLod;
 					VK_BORDER_COLOR_INT_OPAQUE_BLACK,			// VkBorderColor			borderColor;
 					VK_FALSE									// VkBool32					unnormalizedCoordinates;
 				};

 				Move<VkSampler>				sampler			(createSampler(vkdi, device, &samplerParams));
 				inputSamplers.push_back(SamplerHandleSp(new SamplerHandleUp(sampler)));
 			}
 		}

 		// Create descriptor buffer and image infos
 		switch (descType)
 		{
 			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
 			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
 			{
 				const VkDescriptorBufferInfo bufInfo =
 				{
 					**inputBuffers[bufferNdx++],				// VkBuffer					buffer;
 					0,											// VkDeviceSize				offset;
 					VK_WHOLE_SIZE,								// VkDeviceSize				size;
 				};

 				descriptorInfos.push_back(bufInfo);
 				break;
 			}

 			case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
 			{
 				const VkDescriptorImageInfo	imgInfo	=
 				{
 					DE_NULL,									// VkSampler				sampler;
 					**inputImageViews.back(),					// VkImageView				imageView;
 					VK_IMAGE_LAYOUT_GENERAL						// VkImageLayout			imageLayout;
 				};

 				descriptorImageInfos.push_back(imgInfo);
 				break;
 			}

 			case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
 			{
 				const VkDescriptorImageInfo	imgInfo	=
 				{
 					DE_NULL,									// VkSampler				sampler;
 					**inputImageViews.back(),					// VkImageView				imageView;
 					VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL	// VkImageLayout			imageLayout;
 				};

 				descriptorImageInfos.push_back(imgInfo);
 				break;
 			}

 			case VK_DESCRIPTOR_TYPE_SAMPLER:
 			{
 				const VkDescriptorImageInfo	imgInfo	=
 				{
 					**inputSamplers.back(),						// VkSampler				sampler;
 					DE_NULL,									// VkImageView				imageView;
 					VK_IMAGE_LAYOUT_GENERAL						// VkImageLayout			imageLayout;
 				};

 				descriptorImageInfos.push_back(imgInfo);
 				break;
 			}

 			case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
 			{
 				const VkDescriptorImageInfo	imgInfo	=
 				{
 					**inputSamplers.back(),						// VkSampler				sampler;
 					**inputImageViews.back(),					// VkImageView				imageView;
 					VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL	// VkImageLayout			imageLayout;
 				};

 				descriptorImageInfos.push_back(imgInfo);
 				break;
 			}

 			default:
 				DE_FATAL("Not implemented");
 		}
 	}

 	for (deUint32 outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
 	{
 		DE_ASSERT(m_shaderSpec.outputs[outputNdx].getDescriptorType() == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);

 		descriptorTypes.push_back(m_shaderSpec.outputs[outputNdx].getDescriptorType());

 		AllocationMp		alloc;
 		const BufferSp&		output		= m_shaderSpec.outputs[outputNdx].getBuffer();
 		vector<deUint8>		outputBytes;

 		output->getBytes(outputBytes);

 		const size_t		numBytes	= outputBytes.size();
 		BufferHandleUp*		buffer		= new BufferHandleUp(createBufferAndBindMemory(m_context, vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc, m_shaderSpec.usesPhysStorageBuffer, m_shaderSpec.coherentMemory));

 		fillMemoryWithValue(vkdi, device, &*alloc, numBytes, 0xff, m_shaderSpec.coherentMemory);
 		descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
 		outputBuffers.push_back(BufferHandleSp(buffer));
 		outputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release()));
 	}

 	std::vector<VkDeviceAddress> gpuAddrs;
 	// Query the buffer device addresses, write them into a new buffer, and replace
 	// all the descriptors with just a desciptor to this new buffer.
 	if (m_shaderSpec.usesPhysStorageBuffer)
 	{
 		const bool useKHR = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");

 		VkBufferDeviceAddressInfo info =
 		{
 			VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,		// VkStructureType	sType;
 			DE_NULL,											// const void*		pNext;
 			0,													// VkBuffer			buffer
 		};

 		for (deUint32 inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
 		{
 			info.buffer = **inputBuffers[inputNdx];
 			VkDeviceAddress addr;
 			if (useKHR)
 				addr = vkdi.getBufferDeviceAddress(device, &info);
 			else
 				addr = vkdi.getBufferDeviceAddressEXT(device, &info);
 			gpuAddrs.push_back(addr);
 		}
 		for (deUint32 outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
 		{
 			info.buffer = **outputBuffers[outputNdx];
 			VkDeviceAddress addr;
 			if (useKHR)
 				addr = vkdi.getBufferDeviceAddress(device, &info);
 			else
 				addr = vkdi.getBufferDeviceAddressEXT(device, &info);
 			gpuAddrs.push_back(addr);
 		}

 		descriptorInfos.clear();
 		descriptorTypes.clear();
 		descriptorTypes.push_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
 		const size_t		numBytes		= gpuAddrs.size() * sizeof(VkDeviceAddress);

 		AllocationMp		bufferAlloc;
 		BufferHandleUp*		buffer			= new BufferHandleUp(createBufferAndBindMemory(m_context, vkdi, device, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
 																						   allocator, numBytes, &bufferAlloc, false, m_shaderSpec.coherentMemory));

 		setMemory(vkdi, device, &*bufferAlloc, numBytes, &gpuAddrs.front(), m_shaderSpec.coherentMemory);
 		inputBuffers.push_back(BufferHandleSp(buffer));
 		inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));

 		descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
 	}

 	// Create layouts and descriptor set.

 	Unique<VkDescriptorSetLayout>		descriptorSetLayout	(createDescriptorSetLayout(vkdi, device, descriptorTypes));
 	Unique<VkPipelineLayout>			pipelineLayout		(createPipelineLayout(vkdi, device, *descriptorSetLayout, m_shaderSpec.pushConstants));
 	Unique<VkDescriptorPool>			descriptorPool		(createDescriptorPool(vkdi, device, descriptorTypes));
 	Unique<VkDescriptorSet>				descriptorSet		(createDescriptorSet(vkdi, device, *descriptorPool, *descriptorSetLayout, descriptorTypes, descriptorInfos, descriptorImageInfos));

 	// Create compute shader and pipeline.

 	const ProgramBinary&				binary				= m_context.getBinaryCollection().get("compute");
 	if (m_shaderSpec.verifyBinary && !m_shaderSpec.verifyBinary(binary))
 	{
 		return tcu::TestStatus::fail("Binary verification of SPIR-V in the test failed");
 	}
 	Unique<VkShaderModule>				module				(createShaderModule(vkdi, device, binary, (VkShaderModuleCreateFlags)0u));

 	Unique<VkPipeline>					computePipeline		(createComputePipeline(vkdi, device, *pipelineLayout, *module, m_shaderSpec.entryPoint.c_str(), m_shaderSpec.specConstants));

 	// Create command buffer and record commands

 	const tcu::IVec3&				numWorkGroups		= m_shaderSpec.numWorkGroups;

 	beginCommandBuffer(vkdi, *cmdBuffer);
 	vkdi.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
 	vkdi.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
 	if (m_shaderSpec.pushConstants != DE_NULL)
 	{
 		vector<deUint8>	pushConstantsBytes;
 		m_shaderSpec.pushConstants->getBytes(pushConstantsBytes);

 		const deUint32	size	= static_cast<deUint32>(pushConstantsBytes.size());
 		const void*		data	= &pushConstantsBytes.front();

 		vkdi.cmdPushConstants(*cmdBuffer, *pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, /* offset = */ 0, /* size = */ size, data);
 	}
 	vkdi.cmdDispatch(*cmdBuffer, numWorkGroups.x(), numWorkGroups.y(), numWorkGroups.z());

 	// Insert a barrier so data written by the shader is available to the host
 	for (deUint32 outputBufferNdx = 0; outputBufferNdx < outputBuffers.size(); ++outputBufferNdx)
 	{
 		const VkBufferMemoryBarrier buf_barrier =
 		{
 			VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	//    VkStructureType    sType;
 			DE_NULL,									//    const void*        pNext;
 			VK_ACCESS_SHADER_WRITE_BIT,					//    VkAccessFlags      srcAccessMask;
 			VK_ACCESS_HOST_READ_BIT,					//    VkAccessFlags      dstAccessMask;
 			VK_QUEUE_FAMILY_IGNORED,					//    uint32_t           srcQueueFamilyIndex;
 			VK_QUEUE_FAMILY_IGNORED,					//    uint32_t           dstQueueFamilyIndex;
 			**outputBuffers[outputBufferNdx],			//    VkBuffer           buffer;
 			0,											//    VkDeviceSize       offset;
 			VK_WHOLE_SIZE								//    VkDeviceSize       size;
 		};

 		vkdi.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, DE_NULL, 1, &buf_barrier, 0, DE_NULL);
 	}
 	endCommandBuffer(vkdi, *cmdBuffer);

 	submitCommandsAndWait(vkdi, device, queue, *cmdBuffer);

 	// Invalidate output memory ranges before checking on host.
 	for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
 	{
 		invalidateMemory(vkdi, device, outputAllocs[outputNdx].get(), m_shaderSpec.coherentMemory);
 	}

 	// Check output.
 	if (m_shaderSpec.verifyIO)
 	{
 		if (!(*m_shaderSpec.verifyIO)(m_shaderSpec.inputs, outputAllocs, m_shaderSpec.outputs, m_context.getTestContext().getLog()))
 			return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
 	}
 	else
 	{
 		for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
 		{
 			const BufferSp&	expectedOutput = m_shaderSpec.outputs[outputNdx].getBuffer();;
 			vector<deUint8>	expectedBytes;

 			expectedOutput->getBytes(expectedBytes);

 			if (deMemCmp(&expectedBytes.front(), outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size()))
 			{
 				const size_t	errorsMax	= 16u;
 				const deUint8*	ptrHost		= static_cast<deUint8*>(outputAllocs[outputNdx]->getHostPtr());
 				const deUint8*	ptrExpected	= static_cast<deUint8*>(&expectedBytes.front());
 				size_t			errors		= 0u;
 				size_t			ndx			= 0u;

 				for (; ndx < expectedBytes.size(); ++ndx)
 				{
 					if (ptrHost[ndx] != ptrExpected[ndx])
 						break;
 				}

 				for (; ndx < expectedBytes.size(); ++ndx)
 				{
 					if (ptrHost[ndx] != ptrExpected[ndx])
 					{
 						m_context.getTestContext().getLog() << tcu::TestLog::Message
 															<< "OutputBuffer:" << outputNdx
 															<< " got:" << ((deUint32)ptrHost[ndx])
 															<< " expected:" << ((deUint32)ptrExpected[ndx])
 															<< " at byte " << ndx << tcu::TestLog::EndMessage;
 						errors++;

 						if (errors >= errorsMax)
 						{
 							m_context.getTestContext().getLog() << tcu::TestLog::Message << "Maximum error count reached (" << errors << "). Stop output."
 																<< tcu::TestLog::EndMessage;
 							break;
 						}
 					}
 				}

 				return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
 			}
 		}
 	}

 	return tcu::TestStatus::pass("Output match with expected");
 }

 } // SpirVAssembly
 } // vkt