external/vulkancts/modules/vulkan/draw/vktDrawConcurrentTests.cpp - third_party/vulkan-cts - Git at Google

 /*------------------------------------------------------------------------
  * Vulkan Conformance Tests
  * ------------------------
  *
  * Copyright (c) 2019 The Khronos Group Inc.
  * Copyright (c) 2019 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
  *//*!
  * \file
  * \brief Concurrent draw tests
  * Tests that create queue for rendering as well as queue for
  * compute, and trigger work on both pipelines at the same time,
  * and finally verify that the results are as expected.
  *//*--------------------------------------------------------------------*/

 #include "vktDrawConcurrentTests.hpp"

 #include "vktTestCaseUtil.hpp"
 #include "vktDrawTestCaseUtil.hpp"
 #include "../compute/vktComputeTestsUtil.hpp"

 #include "vktDrawBaseClass.hpp"

 #include "tcuTestLog.hpp"
 #include "tcuResource.hpp"
 #include "tcuImageCompare.hpp"
 #include "tcuTextureUtil.hpp"
 #include "tcuRGBA.hpp"

 #include "vkDefs.hpp"
 #include "vkCmdUtil.hpp"
 #include "vkQueryUtil.hpp"
 #include "vkBuilderUtil.hpp"
 #include "vkBarrierUtil.hpp"

 #include "deRandom.hpp"

 using namespace vk;

 namespace vkt
 {
 namespace Draw
 {
 namespace
 {

 class ConcurrentDraw : public DrawTestsBaseClass
 {
 public:
 	typedef TestSpecBase	TestSpec;
 							ConcurrentDraw			(Context &context, TestSpec testSpec);
 	virtual tcu::TestStatus	iterate					(void);
 };

 ConcurrentDraw::ConcurrentDraw (Context &context, TestSpec testSpec)
 	: DrawTestsBaseClass(context, testSpec.shaders[glu::SHADERTYPE_VERTEX], testSpec.shaders[glu::SHADERTYPE_FRAGMENT], testSpec.topology)
 {
 	m_data.push_back(VertexElementData(tcu::Vec4(1.0f, -1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
 	m_data.push_back(VertexElementData(tcu::Vec4(-1.0f, 1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));

 	int refVertexIndex = 2;

 	for (int i = 0; i < 1000; i++)
 	{
 		m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
 		m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
 		m_data.push_back(VertexElementData(tcu::Vec4(0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
 		m_data.push_back(VertexElementData(tcu::Vec4(0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
 		m_data.push_back(VertexElementData(tcu::Vec4(0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
 		m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
 	}
 	m_data.push_back(VertexElementData(tcu::Vec4(-1.0f, 1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));

 	initialize();
 }

 tcu::TestStatus ConcurrentDraw::iterate (void)
 {
 	enum
 	{
 		NO_MATCH_FOUND		= ~((deUint32)0),
 		ERROR_NONE			= 0,
 		ERROR_WAIT_COMPUTE	= 1,
 		ERROR_WAIT_DRAW		= 2
 	};

 	struct Queue
 	{
 		VkQueue		queue;
 		deUint32	queueFamilyIndex;
 	};

 	const DeviceInterface&					vk				= m_context.getDeviceInterface();
 	const deUint32							numValues		= 1024;
 	const InstanceInterface&				instance		= m_context.getInstanceInterface();
 	const VkPhysicalDevice					physicalDevice	= m_context.getPhysicalDevice();
 	tcu::TestLog&							log				= m_context.getTestContext().getLog();
 	Move<VkDevice>							computeDevice;
 	std::vector<VkQueueFamilyProperties>	queueFamilyProperties;
 	VkDeviceCreateInfo						deviceInfo;
 	VkPhysicalDeviceFeatures				deviceFeatures;
 	const float								queuePriority	= 1.0f;
 	VkDeviceQueueCreateInfo					queueInfos;
 	Queue									computeQueue	= { DE_NULL, (deUint32)NO_MATCH_FOUND };

 	// Set up compute

 	queueFamilyProperties = getPhysicalDeviceQueueFamilyProperties(instance, physicalDevice);

 	for (deUint32 queueNdx = 0; queueNdx < queueFamilyProperties.size(); ++queueNdx)
 	{
 		if (queueFamilyProperties[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
 		{
 			if (computeQueue.queueFamilyIndex == NO_MATCH_FOUND)
 				computeQueue.queueFamilyIndex = queueNdx;
 		}
 	}

 	if (computeQueue.queueFamilyIndex == NO_MATCH_FOUND)
 		TCU_THROW(NotSupportedError, "Compute queue couldn't be created");

 	VkDeviceQueueCreateInfo queueInfo;
 	deMemset(&queueInfo, 0, sizeof(queueInfo));

 	queueInfo.sType				= VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
 	queueInfo.pNext				= DE_NULL;
 	queueInfo.flags				= (VkDeviceQueueCreateFlags)0u;
 	queueInfo.queueFamilyIndex	= computeQueue.queueFamilyIndex;
 	queueInfo.queueCount		= 1;
 	queueInfo.pQueuePriorities	= &queuePriority;

 	queueInfos = queueInfo;

 	deMemset(&deviceInfo, 0, sizeof(deviceInfo));
 	instance.getPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);

 	deviceInfo.sType					= VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
 	deviceInfo.pNext					= DE_NULL;
 	deviceInfo.enabledExtensionCount	= 0u;
 	deviceInfo.ppEnabledExtensionNames	= DE_NULL;
 	deviceInfo.enabledLayerCount		= 0u;
 	deviceInfo.ppEnabledLayerNames		= DE_NULL;
 	deviceInfo.pEnabledFeatures			= &deviceFeatures;
 	deviceInfo.queueCreateInfoCount		= 1;
 	deviceInfo.pQueueCreateInfos		= &queueInfos;

 	computeDevice = createDevice(m_context.getPlatformInterface(), m_context.getInstance(), instance, physicalDevice, &deviceInfo);

 	vk.getDeviceQueue(*computeDevice, computeQueue.queueFamilyIndex, 0, &computeQueue.queue);

 	// Create an input/output buffer
 	const VkPhysicalDeviceMemoryProperties memoryProperties = getPhysicalDeviceMemoryProperties(instance, physicalDevice);

 	SimpleAllocator *			allocator			= new SimpleAllocator(vk, *computeDevice, memoryProperties);
 	const VkDeviceSize			bufferSizeBytes		= sizeof(deUint32) * numValues;
 	const vkt::compute::Buffer	buffer(vk, *computeDevice, *allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);

 	// Fill the buffer with data

 	typedef std::vector<deUint32> data_vector_t;
 	data_vector_t inputData(numValues);

 	{
 		de::Random			rnd(0x82ce7f);
 		const Allocation&	bufferAllocation	= buffer.getAllocation();
 		deUint32*			bufferPtr			= static_cast<deUint32*>(bufferAllocation.getHostPtr());

 		for (deUint32 i = 0; i < numValues; ++i)
 		{
 			deUint32 val = rnd.getUint32();
 			inputData[i] = val;
 			*bufferPtr++ = val;
 		}

 		flushAlloc(vk, *computeDevice, bufferAllocation);
 	}

 	// Create descriptor set

 	const Unique<VkDescriptorSetLayout>	descriptorSetLayout(
 		DescriptorSetLayoutBuilder()
 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
 		.build(vk, *computeDevice));

 	const Unique<VkDescriptorPool>		descriptorPool(
 		DescriptorPoolBuilder()
 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
 		.build(vk, *computeDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));

 	const Unique<VkDescriptorSet>		descriptorSet(makeDescriptorSet(vk, *computeDevice, *descriptorPool, *descriptorSetLayout));

 	const VkDescriptorBufferInfo		bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
 	DescriptorSetUpdateBuilder()
 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
 		.update(vk, *computeDevice);

 	// Perform the computation

 	const Unique<VkShaderModule>		shaderModule(createShaderModule(vk, *computeDevice, m_context.getBinaryCollection().get("vulkan/draw/ConcurrentPayload.comp"), 0u));

 	const Unique<VkPipelineLayout>		pipelineLayout(makePipelineLayout(vk, *computeDevice, *descriptorSetLayout));
 	const Unique<VkPipeline>			pipeline(vkt::compute::makeComputePipeline(vk, *computeDevice, *pipelineLayout, *shaderModule));
 	const VkBufferMemoryBarrier			hostWriteBarrier	= makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
 	const VkBufferMemoryBarrier			shaderWriteBarrier	= makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
 	const Unique<VkCommandPool>			cmdPool(makeCommandPool(vk, *computeDevice, computeQueue.queueFamilyIndex));
 	const Unique<VkCommandBuffer>		computeCommandBuffer(allocateCommandBuffer(vk, *computeDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));

 	// Compute command buffer

 	beginCommandBuffer(vk, *computeCommandBuffer);
 	vk.cmdBindPipeline(*computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
 	vk.cmdBindDescriptorSets(*computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
 	vk.cmdPipelineBarrier(*computeCommandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
 	vk.cmdDispatch(*computeCommandBuffer, 1, 1, 1);
 	vk.cmdPipelineBarrier(*computeCommandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
 	endCommandBuffer(vk, *computeCommandBuffer);

 	const VkSubmitInfo	submitInfo =
 	{
 		VK_STRUCTURE_TYPE_SUBMIT_INFO,			// sType
 		DE_NULL,								// pNext
 		0u,										// waitSemaphoreCount
 		DE_NULL,								// pWaitSemaphores
 		(const VkPipelineStageFlags*)DE_NULL,	// pWaitDstStageMask
 		1u,										// commandBufferCount
 		&computeCommandBuffer.get(),			// pCommandBuffers
 		0u,										// signalSemaphoreCount
 		DE_NULL									// pSignalSemaphores
 	};

 	// Set up draw

 	const VkQueue		drawQueue			= m_context.getUniversalQueue();
 	const VkDevice		drawDevice			= m_context.getDevice();

 	beginRenderPass();

 	const VkDeviceSize	vertexBufferOffset	= 0;
 	const VkBuffer		vertexBuffer		= m_vertexBuffer->object();

 	m_vk.cmdBindVertexBuffers(*m_cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
 	m_vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);

 	m_vk.cmdDraw(*m_cmdBuffer, 6, 1, 2, 0);

 	endRenderPass(m_vk, *m_cmdBuffer);
 	endCommandBuffer(m_vk, *m_cmdBuffer);

 	const VkCommandBuffer	drawCommandBuffer	= m_cmdBuffer.get();
 	const bool				useDeviceGroups		= false;
 	const deUint32			deviceMask			= 1u;
 	const Unique<VkFence>	drawFence(createFence(vk, drawDevice));

 	VkDeviceGroupSubmitInfo	deviceGroupSubmitInfo =
 	{
 		VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO_KHR,	//	VkStructureType		sType;
 		DE_NULL,										//	const void*			pNext;
 		0u,												//	deUint32			waitSemaphoreCount;
 		DE_NULL,										//	const deUint32*		pWaitSemaphoreDeviceIndices;
 		1u,												//	deUint32			commandBufferCount;
 		&deviceMask,									//	const deUint32*		pCommandBufferDeviceMasks;
 		0u,												//	deUint32			signalSemaphoreCount;
 		DE_NULL,										//	const deUint32*		pSignalSemaphoreDeviceIndices;
 	};

 	const VkSubmitInfo		drawSubmitInfo =
 	{
 		VK_STRUCTURE_TYPE_SUBMIT_INFO,						// VkStructureType				sType;
 		useDeviceGroups ? &deviceGroupSubmitInfo : DE_NULL,	// const void*					pNext;
 		0u,													// deUint32						waitSemaphoreCount;
 		DE_NULL,											// const VkSemaphore*			pWaitSemaphores;
 		(const VkPipelineStageFlags*)DE_NULL,				// const VkPipelineStageFlags*	pWaitDstStageMask;
 		1u,													// deUint32						commandBufferCount;
 		&drawCommandBuffer,									// const VkCommandBuffer*		pCommandBuffers;
 		0u,													// deUint32						signalSemaphoreCount;
 		DE_NULL,											// const VkSemaphore*			pSignalSemaphores;
 	};

 	const Unique<VkFence>	computeFence(createFence(vk, *computeDevice));

 	// Submit both compute and draw queues
 	VK_CHECK(vk.queueSubmit(computeQueue.queue, 1u, &submitInfo, *computeFence));
 	VK_CHECK(vk.queueSubmit(drawQueue, 1u, &drawSubmitInfo, *drawFence));

 	int err = ERROR_NONE;

 	if (VK_SUCCESS != vk.waitForFences(*computeDevice, 1u, &computeFence.get(), DE_TRUE, ~0ull))
 		err = ERROR_WAIT_COMPUTE;

 	if (VK_SUCCESS != vk.waitForFences(drawDevice, 1u, &drawFence.get(), DE_TRUE, ~0ull))
 		err = ERROR_WAIT_DRAW;

 	// Have to wait for all fences before calling fail, or some fence may be left hanging.

 	if (err == ERROR_WAIT_COMPUTE)
 		return tcu::TestStatus::fail("Failed waiting for compute queue fence.");

 	if (err == ERROR_WAIT_DRAW)
 		return tcu::TestStatus::fail("Failed waiting for draw queue fence.");

 	// Validation - compute

 	const Allocation&	bufferAllocation	= buffer.getAllocation();
 	invalidateAlloc(vk, *computeDevice, bufferAllocation);
 	const deUint32*		bufferPtr			= static_cast<deUint32*>(bufferAllocation.getHostPtr());

 	for (deUint32 ndx = 0; ndx < numValues; ++ndx)
 	{
 		const deUint32 res = bufferPtr[ndx];
 		const deUint32 inp = inputData[ndx];
 		const deUint32 ref = ~inp;

 		if (res != ref)
 		{
 			std::ostringstream msg;
 			msg << "Comparison failed (compute) for InOut.values[" << ndx << "] ref:" << ref << " res:" << res << " inp:" << inp;
 			return tcu::TestStatus::fail(msg.str());
 		}
 	}

 	// Validation - draw

 	tcu::Texture2D referenceFrame(mapVkFormat(m_colorAttachmentFormat), (int)(0.5f + static_cast<float>(WIDTH)), (int)(0.5f + static_cast<float>(HEIGHT)));

 	referenceFrame.allocLevel(0);

 	const deInt32 frameWidth	= referenceFrame.getWidth();
 	const deInt32 frameHeight	= referenceFrame.getHeight();

 	tcu::clear(referenceFrame.getLevel(0), tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f));

 	ReferenceImageCoordinates refCoords;

 	for (int y = 0; y < frameHeight; y++)
 	{
 		const float yCoord = (float)(y / (0.5 * frameHeight)) - 1.0f;

 		for (int x = 0; x < frameWidth; x++)
 		{
 			const float xCoord = (float)(x / (0.5 * frameWidth)) - 1.0f;

 			if ((yCoord >= refCoords.bottom	&&
 				 yCoord <= refCoords.top	&&
 				 xCoord >= refCoords.left	&&
 				 xCoord <= refCoords.right))
 				referenceFrame.getLevel(0).setPixel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), x, y);
 		}
 	}

 	const VkOffset3D					zeroOffset		= { 0, 0, 0 };
 	const tcu::ConstPixelBufferAccess	renderedFrame	= m_colorTargetImage->readSurface(
 		drawQueue, m_context.getDefaultAllocator(), VK_IMAGE_LAYOUT_GENERAL, zeroOffset, WIDTH, HEIGHT, VK_IMAGE_ASPECT_COLOR_BIT);

 	qpTestResult res = QP_TEST_RESULT_PASS;

 	if (!tcu::fuzzyCompare(log, "Result", "Image comparison result",
 		referenceFrame.getLevel(0), renderedFrame, 0.05f,
 		tcu::COMPARE_LOG_RESULT))
 	{
 		res = QP_TEST_RESULT_FAIL;
 	}

 	return tcu::TestStatus(res, qpGetTestResultName(res));
 }

 }	// anonymous

 ConcurrentDrawTests::ConcurrentDrawTests (tcu::TestContext &testCtx)
 : TestCaseGroup	(testCtx, "concurrent", "concurrent drawing")
 {
 	/* Left blank on purpose */
 }

 ConcurrentDrawTests::~ConcurrentDrawTests (void) {}

 void ConcurrentDrawTests::init (void)
 {
 	{
 		ConcurrentDraw::TestSpec testSpec;
 		testSpec.shaders[glu::SHADERTYPE_VERTEX]	= "vulkan/draw/VertexFetch.vert";
 		testSpec.shaders[glu::SHADERTYPE_FRAGMENT]	= "vulkan/draw/VertexFetch.frag";
 		testSpec.shaders[glu::SHADERTYPE_COMPUTE]	= "vulkan/draw/ConcurrentPayload.comp";

 		testSpec.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
 		addChild(new InstanceFactory<ConcurrentDraw>(m_testCtx, "compute_and_triangle_list", "Draws triangle list while running a compute shader", testSpec));
 	}
 }

 }	// DrawTests
 }	// vkt
	/*------------------------------------------------------------------------
	* Vulkan Conformance Tests
	* ------------------------
	*
	* Copyright (c) 2019 The Khronos Group Inc.
	* Copyright (c) 2019 Google Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*
	//!
	* \file
	* \brief Concurrent draw tests
	* Tests that create queue for rendering as well as queue for
	* compute, and trigger work on both pipelines at the same time,
	* and finally verify that the results are as expected.
	//--------------------------------------------------------------------*/

	#include "vktDrawConcurrentTests.hpp"

	#include "vktTestCaseUtil.hpp"
	#include "vktDrawTestCaseUtil.hpp"
	#include "../compute/vktComputeTestsUtil.hpp"

	#include "vktDrawBaseClass.hpp"

	#include "tcuTestLog.hpp"
	#include "tcuResource.hpp"
	#include "tcuImageCompare.hpp"
	#include "tcuTextureUtil.hpp"
	#include "tcuRGBA.hpp"

	#include "vkDefs.hpp"
	#include "vkCmdUtil.hpp"
	#include "vkQueryUtil.hpp"
	#include "vkBuilderUtil.hpp"
	#include "vkBarrierUtil.hpp"

	#include "deRandom.hpp"

	using namespace vk;

	namespace vkt
	{
	namespace Draw
	{
	namespace
	{

	class ConcurrentDraw : public DrawTestsBaseClass
	{
	public:
	typedef TestSpecBase TestSpec;
	ConcurrentDraw (Context &context, TestSpec testSpec);
	virtual tcu::TestStatus iterate (void);
	};

	ConcurrentDraw::ConcurrentDraw (Context &context, TestSpec testSpec)
	: DrawTestsBaseClass(context, testSpec.shaders[glu::SHADERTYPE_VERTEX], testSpec.shaders[glu::SHADERTYPE_FRAGMENT], testSpec.topology)
	{
	m_data.push_back(VertexElementData(tcu::Vec4(1.0f, -1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
	m_data.push_back(VertexElementData(tcu::Vec4(-1.0f, 1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));

	int refVertexIndex = 2;

	for (int i = 0; i < 1000; i++)
	{
	m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
	m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
	m_data.push_back(VertexElementData(tcu::Vec4(0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
	m_data.push_back(VertexElementData(tcu::Vec4(0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
	m_data.push_back(VertexElementData(tcu::Vec4(0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
	m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
	}
	m_data.push_back(VertexElementData(tcu::Vec4(-1.0f, 1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));

	initialize();
	}

	tcu::TestStatus ConcurrentDraw::iterate (void)
	{
	enum
	{
	NO_MATCH_FOUND = ~((deUint32)0),
	ERROR_NONE = 0,
	ERROR_WAIT_COMPUTE = 1,
	ERROR_WAIT_DRAW = 2
	};

	struct Queue
	{
	VkQueue queue;
	deUint32 queueFamilyIndex;
	};

	const DeviceInterface& vk = m_context.getDeviceInterface();
	const deUint32 numValues = 1024;
	const InstanceInterface& instance = m_context.getInstanceInterface();
	const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
	tcu::TestLog& log = m_context.getTestContext().getLog();
	Move<VkDevice> computeDevice;
	std::vector<VkQueueFamilyProperties> queueFamilyProperties;
	VkDeviceCreateInfo deviceInfo;
	VkPhysicalDeviceFeatures deviceFeatures;
	const float queuePriority = 1.0f;
	VkDeviceQueueCreateInfo queueInfos;
	Queue computeQueue = { DE_NULL, (deUint32)NO_MATCH_FOUND };

	// Set up compute

	queueFamilyProperties = getPhysicalDeviceQueueFamilyProperties(instance, physicalDevice);

	for (deUint32 queueNdx = 0; queueNdx < queueFamilyProperties.size(); ++queueNdx)
	{
	if (queueFamilyProperties[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
	{
	if (computeQueue.queueFamilyIndex == NO_MATCH_FOUND)
	computeQueue.queueFamilyIndex = queueNdx;
	}
	}

	if (computeQueue.queueFamilyIndex == NO_MATCH_FOUND)
	TCU_THROW(NotSupportedError, "Compute queue couldn't be created");

	VkDeviceQueueCreateInfo queueInfo;
	deMemset(&queueInfo, 0, sizeof(queueInfo));

	queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
	queueInfo.pNext = DE_NULL;
	queueInfo.flags = (VkDeviceQueueCreateFlags)0u;
	queueInfo.queueFamilyIndex = computeQueue.queueFamilyIndex;
	queueInfo.queueCount = 1;
	queueInfo.pQueuePriorities = &queuePriority;

	queueInfos = queueInfo;

	deMemset(&deviceInfo, 0, sizeof(deviceInfo));
	instance.getPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);

	deviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
	deviceInfo.pNext = DE_NULL;
	deviceInfo.enabledExtensionCount = 0u;
	deviceInfo.ppEnabledExtensionNames = DE_NULL;
	deviceInfo.enabledLayerCount = 0u;
	deviceInfo.ppEnabledLayerNames = DE_NULL;
	deviceInfo.pEnabledFeatures = &deviceFeatures;
	deviceInfo.queueCreateInfoCount = 1;
	deviceInfo.pQueueCreateInfos = &queueInfos;

	computeDevice = createDevice(m_context.getPlatformInterface(), m_context.getInstance(), instance, physicalDevice, &deviceInfo);

	vk.getDeviceQueue(*computeDevice, computeQueue.queueFamilyIndex, 0, &computeQueue.queue);

	// Create an input/output buffer
	const VkPhysicalDeviceMemoryProperties memoryProperties = getPhysicalDeviceMemoryProperties(instance, physicalDevice);

	SimpleAllocator * allocator = new SimpleAllocator(vk, *computeDevice, memoryProperties);
	const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * numValues;
	const vkt::compute::Buffer buffer(vk, computeDevice, allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);

	// Fill the buffer with data

	typedef std::vector<deUint32> data_vector_t;
	data_vector_t inputData(numValues);

	{
	de::Random rnd(0x82ce7f);
	const Allocation& bufferAllocation = buffer.getAllocation();
	deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());

	for (deUint32 i = 0; i < numValues; ++i)
	{
	deUint32 val = rnd.getUint32();
	inputData[i] = val;
	*bufferPtr++ = val;
	}

	flushAlloc(vk, *computeDevice, bufferAllocation);
	}

	// Create descriptor set

	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
	DescriptorSetLayoutBuilder()
	.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
	.build(vk, *computeDevice));

	const Unique<VkDescriptorPool> descriptorPool(
	DescriptorPoolBuilder()
	.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
	.build(vk, *computeDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));

	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, computeDevice, descriptorPool, *descriptorSetLayout));

	const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
	DescriptorSetUpdateBuilder()
	.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
	.update(vk, *computeDevice);

	// Perform the computation

	const Unique<VkShaderModule> shaderModule(createShaderModule(vk, *computeDevice, m_context.getBinaryCollection().get("vulkan/draw/ConcurrentPayload.comp"), 0u));

	const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, computeDevice, descriptorSetLayout));
	const Unique<VkPipeline> pipeline(vkt::compute::makeComputePipeline(vk, computeDevice, pipelineLayout, *shaderModule));
	const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
	const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
	const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, *computeDevice, computeQueue.queueFamilyIndex));
	const Unique<VkCommandBuffer> computeCommandBuffer(allocateCommandBuffer(vk, computeDevice, cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));

	// Compute command buffer

	beginCommandBuffer(vk, *computeCommandBuffer);
	vk.cmdBindPipeline(computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
	vk.cmdBindDescriptorSets(computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
	vk.cmdPipelineBarrier(computeCommandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
	vk.cmdDispatch(*computeCommandBuffer, 1, 1, 1);
	vk.cmdPipelineBarrier(computeCommandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
	endCommandBuffer(vk, *computeCommandBuffer);

	const VkSubmitInfo submitInfo =
	{
	VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
	DE_NULL, // pNext
	0u, // waitSemaphoreCount
	DE_NULL, // pWaitSemaphores
	(const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
	1u, // commandBufferCount
	&computeCommandBuffer.get(), // pCommandBuffers
	0u, // signalSemaphoreCount
	DE_NULL // pSignalSemaphores
	};

	// Set up draw

	const VkQueue drawQueue = m_context.getUniversalQueue();
	const VkDevice drawDevice = m_context.getDevice();

	beginRenderPass();

	const VkDeviceSize vertexBufferOffset = 0;
	const VkBuffer vertexBuffer = m_vertexBuffer->object();

	m_vk.cmdBindVertexBuffers(*m_cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
	m_vk.cmdBindPipeline(m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline);

	m_vk.cmdDraw(*m_cmdBuffer, 6, 1, 2, 0);

	endRenderPass(m_vk, *m_cmdBuffer);
	endCommandBuffer(m_vk, *m_cmdBuffer);

	const VkCommandBuffer drawCommandBuffer = m_cmdBuffer.get();
	const bool useDeviceGroups = false;
	const deUint32 deviceMask = 1u;
	const Unique<VkFence> drawFence(createFence(vk, drawDevice));

	VkDeviceGroupSubmitInfo deviceGroupSubmitInfo =
	{
	VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO_KHR, // VkStructureType sType;
	DE_NULL, // const void* pNext;
	0u, // deUint32 waitSemaphoreCount;
	DE_NULL, // const deUint32* pWaitSemaphoreDeviceIndices;
	1u, // deUint32 commandBufferCount;
	&deviceMask, // const deUint32* pCommandBufferDeviceMasks;
	0u, // deUint32 signalSemaphoreCount;
	DE_NULL, // const deUint32* pSignalSemaphoreDeviceIndices;
	};

	const VkSubmitInfo drawSubmitInfo =
	{
	VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType sType;
	useDeviceGroups ? &deviceGroupSubmitInfo : DE_NULL, // const void* pNext;
	0u, // deUint32 waitSemaphoreCount;
	DE_NULL, // const VkSemaphore* pWaitSemaphores;
	(const VkPipelineStageFlags)DE_NULL, // const VkPipelineStageFlags pWaitDstStageMask;
	1u, // deUint32 commandBufferCount;
	&drawCommandBuffer, // const VkCommandBuffer* pCommandBuffers;
	0u, // deUint32 signalSemaphoreCount;
	DE_NULL, // const VkSemaphore* pSignalSemaphores;
	};

	const Unique<VkFence> computeFence(createFence(vk, *computeDevice));

	// Submit both compute and draw queues
	VK_CHECK(vk.queueSubmit(computeQueue.queue, 1u, &submitInfo, *computeFence));
	VK_CHECK(vk.queueSubmit(drawQueue, 1u, &drawSubmitInfo, *drawFence));

	int err = ERROR_NONE;

	if (VK_SUCCESS != vk.waitForFences(*computeDevice, 1u, &computeFence.get(), DE_TRUE, ~0ull))
	err = ERROR_WAIT_COMPUTE;

	if (VK_SUCCESS != vk.waitForFences(drawDevice, 1u, &drawFence.get(), DE_TRUE, ~0ull))
	err = ERROR_WAIT_DRAW;

	// Have to wait for all fences before calling fail, or some fence may be left hanging.

	if (err == ERROR_WAIT_COMPUTE)
	return tcu::TestStatus::fail("Failed waiting for compute queue fence.");

	if (err == ERROR_WAIT_DRAW)
	return tcu::TestStatus::fail("Failed waiting for draw queue fence.");

	// Validation - compute

	const Allocation& bufferAllocation = buffer.getAllocation();
	invalidateAlloc(vk, *computeDevice, bufferAllocation);
	const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());

	for (deUint32 ndx = 0; ndx < numValues; ++ndx)
	{
	const deUint32 res = bufferPtr[ndx];
	const deUint32 inp = inputData[ndx];
	const deUint32 ref = ~inp;

	if (res != ref)
	{
	std::ostringstream msg;
	msg << "Comparison failed (compute) for InOut.values[" << ndx << "] ref:" << ref << " res:" << res << " inp:" << inp;
	return tcu::TestStatus::fail(msg.str());
	}
	}

	// Validation - draw

	tcu::Texture2D referenceFrame(mapVkFormat(m_colorAttachmentFormat), (int)(0.5f + static_cast<float>(WIDTH)), (int)(0.5f + static_cast<float>(HEIGHT)));

	referenceFrame.allocLevel(0);

	const deInt32 frameWidth = referenceFrame.getWidth();
	const deInt32 frameHeight = referenceFrame.getHeight();

	tcu::clear(referenceFrame.getLevel(0), tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f));

	ReferenceImageCoordinates refCoords;

	for (int y = 0; y < frameHeight; y++)
	{
	const float yCoord = (float)(y / (0.5 * frameHeight)) - 1.0f;

	for (int x = 0; x < frameWidth; x++)
	{
	const float xCoord = (float)(x / (0.5 * frameWidth)) - 1.0f;

	if ((yCoord >= refCoords.bottom &&
	yCoord <= refCoords.top &&
	xCoord >= refCoords.left &&
	xCoord <= refCoords.right))
	referenceFrame.getLevel(0).setPixel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), x, y);
	}
	}

	const VkOffset3D zeroOffset = { 0, 0, 0 };
	const tcu::ConstPixelBufferAccess renderedFrame = m_colorTargetImage->readSurface(
	drawQueue, m_context.getDefaultAllocator(), VK_IMAGE_LAYOUT_GENERAL, zeroOffset, WIDTH, HEIGHT, VK_IMAGE_ASPECT_COLOR_BIT);

	qpTestResult res = QP_TEST_RESULT_PASS;

	if (!tcu::fuzzyCompare(log, "Result", "Image comparison result",
	referenceFrame.getLevel(0), renderedFrame, 0.05f,
	tcu::COMPARE_LOG_RESULT))
	{
	res = QP_TEST_RESULT_FAIL;
	}

	return tcu::TestStatus(res, qpGetTestResultName(res));
	}

	} // anonymous

	ConcurrentDrawTests::ConcurrentDrawTests (tcu::TestContext &testCtx)
	: TestCaseGroup (testCtx, "concurrent", "concurrent drawing")
	{
	/* Left blank on purpose */
	}

	ConcurrentDrawTests::~ConcurrentDrawTests (void) {}

	void ConcurrentDrawTests::init (void)
	{
	{
	ConcurrentDraw::TestSpec testSpec;
	testSpec.shaders[glu::SHADERTYPE_VERTEX] = "vulkan/draw/VertexFetch.vert";
	testSpec.shaders[glu::SHADERTYPE_FRAGMENT] = "vulkan/draw/VertexFetch.frag";
	testSpec.shaders[glu::SHADERTYPE_COMPUTE] = "vulkan/draw/ConcurrentPayload.comp";

	testSpec.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
	addChild(new InstanceFactory<ConcurrentDraw>(m_testCtx, "compute_and_triangle_list", "Draws triangle list while running a compute shader", testSpec));
	}
	}

	} // DrawTests
	} // vkt