Add tests for VK_NV_mesh_shader

Tests that verify multiple aspects of mesh shader pipelines work as
expected. This includes:

* Some basic smoke tests.
* API tests to check the new draw calls and different rules about them.
* Synchronization tests.
* Test a wide range of affected built-in variables.
* Miscellaneous tests:
  * Producing small and large amounts of different types of primitives.
  * Barriers.
  * Push constants.
  * Custom per-primitive attributes.
  * Complex structures in the task->mesh interface.

New tests:

Components: Vulkan, Framework
VK-GL-CTS issue: 2991

Change-Id: I510296e4da326a2f0a3891d55a1116848d874c3f
diff --git a/ b/
index 986dea5..a4da5f2 100644
--- a/
+++ b/
@@ -209,6 +209,13 @@
 	external/vulkancts/modules/vulkan/memory_model/vktMemoryModelPadding.cpp \
 	external/vulkancts/modules/vulkan/memory_model/vktMemoryModelSharedLayout.cpp \
 	external/vulkancts/modules/vulkan/memory_model/vktMemoryModelSharedLayoutCase.cpp \
+	external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderApiTests.cpp \
+	external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderBuiltinTests.cpp \
+	external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderMiscTests.cpp \
+	external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderPropertyTests.cpp \
+	external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSmokeTests.cpp \
+	external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSyncTests.cpp \
+	external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderTests.cpp \
 	external/vulkancts/modules/vulkan/modifiers/vktModifiersTests.cpp \
 	external/vulkancts/modules/vulkan/multiview/vktMultiViewRenderPassUtil.cpp \
 	external/vulkancts/modules/vulkan/multiview/vktMultiViewRenderTests.cpp \
@@ -1250,6 +1257,7 @@
 	$(deqp_dir)/external/vulkancts/modules/vulkan/imageless_framebuffer \
 	$(deqp_dir)/external/vulkancts/modules/vulkan/memory \
 	$(deqp_dir)/external/vulkancts/modules/vulkan/memory_model \
+	$(deqp_dir)/external/vulkancts/modules/vulkan/mesh_shader \
 	$(deqp_dir)/external/vulkancts/modules/vulkan/modifiers \
 	$(deqp_dir)/external/vulkancts/modules/vulkan/multiview \
 	$(deqp_dir)/external/vulkancts/modules/vulkan \
diff --git a/android/cts/master/vk-master-2021-03-01.txt b/android/cts/master/vk-master-2021-03-01.txt
index 0b1d07b..6b3ea18 100644
--- a/android/cts/master/vk-master-2021-03-01.txt
+++ b/android/cts/master/vk-master-2021-03-01.txt
@@ -14,6 +14,7 @@
diff --git a/android/cts/master/vk-master-2021-03-01/mesh-shader.txt b/android/cts/master/vk-master-2021-03-01/mesh-shader.txt
new file mode 100644
index 0000000..0d171e7
--- /dev/null
+++ b/android/cts/master/vk-master-2021-03-01/mesh-shader.txt
@@ -0,0 +1,544 @@
diff --git a/android/cts/master/vk-master.txt b/android/cts/master/vk-master.txt
index cbaabe9..e07f469 100644
--- a/android/cts/master/vk-master.txt
+++ b/android/cts/master/vk-master.txt
@@ -19,6 +19,7 @@
diff --git a/android/cts/master/vk-master/mesh-shader.txt b/android/cts/master/vk-master/mesh-shader.txt
new file mode 100644
index 0000000..0d171e7
--- /dev/null
+++ b/android/cts/master/vk-master/mesh-shader.txt
@@ -0,0 +1,544 @@
diff --git a/executor/tools/xeExtractShaderPrograms.cpp b/executor/tools/xeExtractShaderPrograms.cpp
index 2d5378c..727ba87 100644
--- a/executor/tools/xeExtractShaderPrograms.cpp
+++ b/executor/tools/xeExtractShaderPrograms.cpp
@@ -66,6 +66,8 @@
 		case xe::ri::Shader::SHADERTYPE_MISS:				return "miss";
 		case xe::ri::Shader::SHADERTYPE_INTERSECTION:		return "sect";
 		case xe::ri::Shader::SHADERTYPE_CALLABLE:			return "call";
+		case xe::ri::Shader::SHADERTYPE_TASK:				return "task";
+		case xe::ri::Shader::SHADERTYPE_MESH:				return "mesh";
 			throw xe::Error("Invalid shader type");
diff --git a/executor/xeTestCaseResult.hpp b/executor/xeTestCaseResult.hpp
index 79f0465..6fbf792 100644
--- a/executor/xeTestCaseResult.hpp
+++ b/executor/xeTestCaseResult.hpp
@@ -326,6 +326,8 @@
diff --git a/executor/xeTestLogWriter.cpp b/executor/xeTestLogWriter.cpp
index c7e389a..f4cce6e 100644
--- a/executor/xeTestLogWriter.cpp
+++ b/executor/xeTestLogWriter.cpp
@@ -308,6 +308,8 @@
 				case ri::Shader::SHADERTYPE_MISS:				tagName = "MissShader";				break;
 				case ri::Shader::SHADERTYPE_INTERSECTION:		tagName = "IntersectionShader";		break;
 				case ri::Shader::SHADERTYPE_CALLABLE:			tagName = "CallableShader";			break;
+				case ri::Shader::SHADERTYPE_TASK:				tagName = "TaskShader";				break;
+				case ri::Shader::SHADERTYPE_MESH:				tagName = "MeshShader";				break;
 					throw Error("Unknown shader type");
diff --git a/executor/xeTestResultParser.cpp b/executor/xeTestResultParser.cpp
index c21ddd5..2f31601 100644
--- a/executor/xeTestResultParser.cpp
+++ b/executor/xeTestResultParser.cpp
@@ -161,7 +161,9 @@
 	{ 0x8c64a6be,	"ClosestHitShader",		ri::Shader::SHADERTYPE_CLOSEST_HIT		},
 	{ 0xb30ed398,	"MissShader",			ri::Shader::SHADERTYPE_MISS				},
 	{ 0x26150e53,	"IntersectionShader",	ri::Shader::SHADERTYPE_INTERSECTION		},
-	{ 0x7e50944c,	"CallableShader",		ri::Shader::SHADERTYPE_CALLABLE			}
+	{ 0x7e50944c,	"CallableShader",		ri::Shader::SHADERTYPE_CALLABLE			},
+	{ 0xc3a35d6f,	"TaskShader",			ri::Shader::SHADERTYPE_TASK				},
+	{ 0x925c7349,	"MeshShader",			ri::Shader::SHADERTYPE_MESH				},
 static const EnumMapEntry s_testTypeMap[] =
diff --git a/external/openglcts/modules/common/glcSpirvUtils.cpp b/external/openglcts/modules/common/glcSpirvUtils.cpp
index aff5e73..dc41878 100644
--- a/external/openglcts/modules/common/glcSpirvUtils.cpp
+++ b/external/openglcts/modules/common/glcSpirvUtils.cpp
@@ -59,7 +59,8 @@
 	static const EShLanguage stageMap[] = {
 		EShLangVertex, EShLangFragment, EShLangGeometry, EShLangTessControl, EShLangTessEvaluation, EShLangCompute,
-		EShLangRayGen, EShLangAnyHit, EShLangClosestHit, EShLangMiss, EShLangIntersect, EShLangCallable
+		EShLangRayGen, EShLangAnyHit, EShLangClosestHit, EShLangMiss, EShLangIntersect, EShLangCallable, EShLangTaskNV,
+		EShLangMeshNV
 	return de::getSizedArrayElement<glu::SHADERTYPE_LAST>(stageMap, type);
diff --git a/external/vulkancts/framework/vulkan/vkDeviceExtensions.inl b/external/vulkancts/framework/vulkan/vkDeviceExtensions.inl
index 4fb39e7..5ce6155 100644
--- a/external/vulkancts/framework/vulkan/vkDeviceExtensions.inl
+++ b/external/vulkancts/framework/vulkan/vkDeviceExtensions.inl
@@ -83,5 +83,6 @@
+	"VK_NV_mesh_shader",
diff --git a/external/vulkancts/framework/vulkan/vkObjUtil.cpp b/external/vulkancts/framework/vulkan/vkObjUtil.cpp
index f178c12..8e21fb2 100644
--- a/external/vulkancts/framework/vulkan/vkObjUtil.cpp
+++ b/external/vulkancts/framework/vulkan/vkObjUtil.cpp
@@ -26,6 +26,7 @@
 #include "vkImageUtil.hpp"
 #include "vkObjUtil.hpp"
 #include "vkTypeUtil.hpp"
+#include "vkQueryUtil.hpp"
 #include "tcuVector.hpp"
@@ -411,6 +412,120 @@
 	return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
+Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface&							vk,
+									   const VkDevice									device,
+									   const VkPipelineLayout							pipelineLayout,
+									   const VkShaderModule								taskShaderModule,
+									   const VkShaderModule								meshShaderModule,
+									   const VkShaderModule								fragmentShaderModule,
+									   const VkRenderPass								renderPass,
+									   const std::vector<VkViewport>&					viewports,
+									   const std::vector<VkRect2D>&						scissors,
+									   const deUint32									subpass,
+									   const VkPipelineRasterizationStateCreateInfo*	rasterizationStateCreateInfo,
+									   const VkPipelineMultisampleStateCreateInfo*		multisampleStateCreateInfo,
+									   const VkPipelineDepthStencilStateCreateInfo*		depthStencilStateCreateInfo,
+									   const VkPipelineColorBlendStateCreateInfo*		colorBlendStateCreateInfo,
+									   const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfo)
+	const VkBool32									disableRasterization				= (fragmentShaderModule == DE_NULL);
+	VkPipelineShaderStageCreateInfo					stageCreateInfo						=
+	{
+		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType                     sType
+		nullptr,												// const void*                         pNext
+		0u,														// VkPipelineShaderStageCreateFlags    flags
+		VK_SHADER_STAGE_VERTEX_BIT,								// VkShaderStageFlagBits               stage
+		DE_NULL,												// VkShaderModule                      module
+		"main",													// const char*                         pName
+		nullptr													// const VkSpecializationInfo*         pSpecializationInfo
+	};
+	std::vector<VkPipelineShaderStageCreateInfo>	pipelineShaderStageParams;
+	if (taskShaderModule != DE_NULL)
+	{
+		stageCreateInfo.stage	= VK_SHADER_STAGE_TASK_BIT_NV;
+		stageCreateInfo.module	= taskShaderModule;
+		pipelineShaderStageParams.push_back(stageCreateInfo);
+	}
+	{
+		stageCreateInfo.stage	= VK_SHADER_STAGE_MESH_BIT_NV;
+		stageCreateInfo.module	= meshShaderModule;
+		pipelineShaderStageParams.push_back(stageCreateInfo);
+	}
+	if (fragmentShaderModule != DE_NULL)
+	{
+		stageCreateInfo.stage	= VK_SHADER_STAGE_FRAGMENT_BIT;
+		stageCreateInfo.module	= fragmentShaderModule;
+		pipelineShaderStageParams.push_back(stageCreateInfo);
+	}
+	VkPipelineViewportStateCreateInfo viewportStateCreateInfo = initVulkanStructure();
+	viewportStateCreateInfo.viewportCount	= static_cast<uint32_t>(viewports.size());
+	viewportStateCreateInfo.pViewports		= de::dataOrNull(viewports);
+	viewportStateCreateInfo.scissorCount	= static_cast<uint32_t>(scissors.size());
+	viewportStateCreateInfo.pScissors		= de::dataOrNull(scissors);
+	VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault = initVulkanStructure();
+	rasterizationStateCreateInfoDefault.rasterizerDiscardEnable	= disableRasterization;
+	rasterizationStateCreateInfoDefault.lineWidth				= 1.0f;
+	VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault = initVulkanStructure();
+	multisampleStateCreateInfoDefault.rasterizationSamples	= VK_SAMPLE_COUNT_1_BIT;
+	multisampleStateCreateInfoDefault.minSampleShading		= 1.0f;
+	VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault = initVulkanStructure();
+	depthStencilStateCreateInfoDefault.maxDepthBounds = 1.0f;
+	VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {};
+	VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault = initVulkanStructure();
+	colorBlendStateCreateInfoDefault.attachmentCount	= 1u;
+	colorBlendStateCreateInfoDefault.pAttachments		= &colorBlendAttachmentState;
+	std::vector<VkDynamicState> dynamicStates;
+	if (viewports.empty())
+		dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
+	if (scissors.empty())
+		dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
+	VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault = initVulkanStructure();
+	dynamicStateCreateInfoDefault.dynamicStateCount	= static_cast<uint32_t>(dynamicStates.size());
+	dynamicStateCreateInfoDefault.pDynamicStates	= de::dataOrNull(dynamicStates);
+	const VkPipelineDynamicStateCreateInfo*	dynamicStateCreateInfoDefaultPtr	= dynamicStates.empty() ? nullptr : &dynamicStateCreateInfoDefault;
+	const VkGraphicsPipelineCreateInfo		pipelineCreateInfo					=
+	{
+		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,														// VkStructureType                                  sType
+		nullptr,																								// const void*                                      pNext
+		0u,																										// VkPipelineCreateFlags                            flags
+		static_cast<uint32_t>(pipelineShaderStageParams.size()),												// deUint32                                         stageCount
+		de::dataOrNull(pipelineShaderStageParams),																// const VkPipelineShaderStageCreateInfo*           pStages
+		nullptr,																								// const VkPipelineVertexInputStateCreateInfo*      pVertexInputState
+		nullptr,																								// const VkPipelineInputAssemblyStateCreateInfo*    pInputAssemblyState
+		nullptr,																								// const VkPipelineTessellationStateCreateInfo*     pTessellationState
+		&viewportStateCreateInfo,																				// const VkPipelineViewportStateCreateInfo*         pViewportState
+		rasterizationStateCreateInfo	? rasterizationStateCreateInfo	: &rasterizationStateCreateInfoDefault,	// const VkPipelineRasterizationStateCreateInfo*    pRasterizationState
+		multisampleStateCreateInfo		? multisampleStateCreateInfo	: &multisampleStateCreateInfoDefault,	// const VkPipelineMultisampleStateCreateInfo*      pMultisampleState
+		depthStencilStateCreateInfo		? depthStencilStateCreateInfo	: &depthStencilStateCreateInfoDefault,	// const VkPipelineDepthStencilStateCreateInfo*     pDepthStencilState
+		colorBlendStateCreateInfo		? colorBlendStateCreateInfo		: &colorBlendStateCreateInfoDefault,	// const VkPipelineColorBlendStateCreateInfo*       pColorBlendState
+		dynamicStateCreateInfo			? dynamicStateCreateInfo		: dynamicStateCreateInfoDefaultPtr,		// const VkPipelineDynamicStateCreateInfo*          pDynamicState
+		pipelineLayout,																							// VkPipelineLayout                                 layout
+		renderPass,																								// VkRenderPass                                     renderPass
+		subpass,																								// deUint32                                         subpass
+		DE_NULL,																								// VkPipeline                                       basePipelineHandle
+		0																										// deInt32                                          basePipelineIndex;
+	};
+	return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
 Move<VkRenderPass> makeRenderPass (const DeviceInterface&				vk,
 								   const VkDevice						device,
 								   const VkFormat						colorFormat,
diff --git a/external/vulkancts/framework/vulkan/vkObjUtil.hpp b/external/vulkancts/framework/vulkan/vkObjUtil.hpp
index 19b51ef..c8286fa 100644
--- a/external/vulkancts/framework/vulkan/vkObjUtil.hpp
+++ b/external/vulkancts/framework/vulkan/vkObjUtil.hpp
@@ -78,6 +78,22 @@
 									   const VkPipelineColorBlendStateCreateInfo*		colorBlendStateCreateInfo = DE_NULL,
 									   const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfo = DE_NULL);
+Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface&							vk,
+									   const VkDevice									device,
+									   const VkPipelineLayout							pipelineLayout,
+									   const VkShaderModule								taskShaderModule,
+									   const VkShaderModule								meshShaderModule,
+									   const VkShaderModule								fragmentShaderModule,
+									   const VkRenderPass								renderPass,
+									   const std::vector<VkViewport>&					viewports,
+									   const std::vector<VkRect2D>&						scissors,
+									   const deUint32									subpass = 0u,
+									   const VkPipelineRasterizationStateCreateInfo*	rasterizationStateCreateInfo = nullptr,
+									   const VkPipelineMultisampleStateCreateInfo*		multisampleStateCreateInfo = nullptr,
+									   const VkPipelineDepthStencilStateCreateInfo*		depthStencilStateCreateInfo = nullptr,
+									   const VkPipelineColorBlendStateCreateInfo*		colorBlendStateCreateInfo = nullptr,
+									   const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfo = nullptr);
 Move<VkRenderPass> makeRenderPass (const DeviceInterface&				vk,
 								   const VkDevice						device,
 								   const VkFormat						colorFormat					= VK_FORMAT_UNDEFINED,
diff --git a/external/vulkancts/framework/vulkan/vkPrograms.cpp b/external/vulkancts/framework/vulkan/vkPrograms.cpp
index 2e41282..9ef8222 100644
--- a/external/vulkancts/framework/vulkan/vkPrograms.cpp
+++ b/external/vulkancts/framework/vulkan/vkPrograms.cpp
@@ -738,6 +738,8 @@
 	return de::getSizedArrayElement<glu::SHADERTYPE_LAST>(s_shaderStages, shaderType);
diff --git a/external/vulkancts/framework/vulkan/vkPrograms.hpp b/external/vulkancts/framework/vulkan/vkPrograms.hpp
index 9b8f344..deaf13e 100644
--- a/external/vulkancts/framework/vulkan/vkPrograms.hpp
+++ b/external/vulkancts/framework/vulkan/vkPrograms.hpp
@@ -217,7 +217,7 @@
 void					disassembleProgram	(const ProgramBinary& program, std::ostream* dst);
 bool					validateProgram		(const ProgramBinary& program, std::ostream* dst, const SpirvValidatorOptions&);
-Move<VkShaderModule>	createShaderModule	(const DeviceInterface& deviceInterface, VkDevice device, const ProgramBinary& binary, VkShaderModuleCreateFlags flags);
+Move<VkShaderModule>	createShaderModule	(const DeviceInterface& deviceInterface, VkDevice device, const ProgramBinary& binary, VkShaderModuleCreateFlags flags = 0u);
 glu::ShaderType			getGluShaderType	(VkShaderStageFlagBits shaderStage);
 VkShaderStageFlagBits	getVkShaderStage	(glu::ShaderType shaderType);
diff --git a/external/vulkancts/framework/vulkan/vkShaderToSpirV.cpp b/external/vulkancts/framework/vulkan/vkShaderToSpirV.cpp
index 469c2f9..26e7681 100644
--- a/external/vulkancts/framework/vulkan/vkShaderToSpirV.cpp
+++ b/external/vulkancts/framework/vulkan/vkShaderToSpirV.cpp
@@ -62,6 +62,8 @@
+		EShLangTaskNV,
+		EShLangMeshNV,
 	return de::getSizedArrayElement<glu::SHADERTYPE_LAST>(stageMap, type);
diff --git a/external/vulkancts/modules/vulkan/CMakeLists.txt b/external/vulkancts/modules/vulkan/CMakeLists.txt
index c000f5c..9015d71 100644
--- a/external/vulkancts/modules/vulkan/CMakeLists.txt
+++ b/external/vulkancts/modules/vulkan/CMakeLists.txt
@@ -44,6 +44,7 @@
@@ -91,6 +92,7 @@
+	mesh_shader
@@ -158,6 +160,7 @@
+	deqp-vk-mesh-shader
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/CMakeLists.txt b/external/vulkancts/modules/vulkan/mesh_shader/CMakeLists.txt
new file mode 100644
index 0000000..0468d8a
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/CMakeLists.txt
@@ -0,0 +1,28 @@
+	vktMeshShaderTests.cpp
+	vktMeshShaderTests.hpp
+	vktMeshShaderSyncTests.cpp
+	vktMeshShaderSyncTests.hpp
+	vktMeshShaderSmokeTests.cpp
+	vktMeshShaderSmokeTests.hpp
+	vktMeshShaderApiTests.cpp
+	vktMeshShaderApiTests.hpp
+	vktMeshShaderPropertyTests.cpp
+	vktMeshShaderPropertyTests.hpp
+	vktMeshShaderBuiltinTests.cpp
+	vktMeshShaderBuiltinTests.hpp
+	vktMeshShaderMiscTests.cpp
+	vktMeshShaderMiscTests.hpp
+	)
+	tcutil
+	vkutil
+	)
+add_library(deqp-vk-mesh-shader STATIC ${DEQP_VK_MESH_SHADER_SRCS})
+target_link_libraries(deqp-vk-mesh-shader ${DEQP_VK_MESH_SHADER_LIBS})
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderApiTests.cpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderApiTests.cpp
new file mode 100644
index 0000000..62f160b
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderApiTests.cpp
@@ -0,0 +1,816 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader API Tests
+ *//*--------------------------------------------------------------------*/
+#include "vktMeshShaderApiTests.hpp"
+#include "vktTestCase.hpp"
+#include "vkTypeUtil.hpp"
+#include "vkImageWithMemory.hpp"
+#include "vkBufferWithMemory.hpp"
+#include "vkObjUtil.hpp"
+#include "vkBuilderUtil.hpp"
+#include "vkCmdUtil.hpp"
+#include "vkImageUtil.hpp"
+#include "tcuMaybe.hpp"
+#include "tcuTestLog.hpp"
+#include "tcuImageCompare.hpp"
+#include "deRandom.hpp"
+#include <iostream>
+#include <sstream>
+#include <vector>
+#include <algorithm>
+#include <iterator>
+#include <limits>
+namespace vkt
+namespace MeshShader
+using namespace vk;
+using GroupPtr				= de::MovePtr<tcu::TestCaseGroup>;
+using ImageWithMemoryPtr	= de::MovePtr<ImageWithMemory>;
+using BufferWithMemoryPtr	= de::MovePtr<BufferWithMemory>;
+enum class DrawType
+	DRAW = 0,
+std::ostream& operator<< (std::ostream& stream, DrawType drawType)
+	switch (drawType)
+	{
+	case DrawType::DRAW:				stream << "draw";					break;
+	case DrawType::DRAW_INDIRECT:		stream << "draw_indirect";			break;
+	case DrawType::DRAW_INDIRECT_COUNT:	stream << "draw_indirect_count";	break;
+	default: DE_ASSERT(false); break;
+	}
+	return stream;
+// This helps test the maxDrawCount rule for the DRAW_INDIRECT_COUNT case.
+enum class IndirectCountLimitType
+	BUFFER_VALUE = 0,		// The actual count will be given by the count buffer.
+	MAX_COUNT,				// The actual count will be given by the maxDrawCount argument passed to the draw command.
+struct IndirectArgs
+	uint32_t offset;
+	uint32_t stride;
+struct TestParams
+	DrawType							drawType;
+	uint32_t							seed;
+	uint32_t							drawCount;				// Equivalent to taskCount or drawCount.
+	uint32_t							firstTask;				// Equivalent to firstTask in every call.
+	tcu::Maybe<IndirectArgs>			indirectArgs;			// Only used for DRAW_INDIRECT*.
+	tcu::Maybe<IndirectCountLimitType>	indirectCountLimit;		// Only used for DRAW_INDIRECT_COUNT.
+	tcu::Maybe<uint32_t>				indirectCountOffset;	// Only used for DRAW_INDIRECT_COUNT.
+	bool								useTask;
+// The framebuffer will have a number of rows and 32 columns. Each mesh shader workgroup will generate geometry to fill a single
+// framebuffer row, using a triangle list with 32 triangles of different colors, each covering a framebuffer pixel.
+// Note: the total framebuffer rows is called "full" below (e.g. 64). When using a task shader to generate work, each workgroup will
+// generate a single mesh workgroup using a push constant instead of a compile-time constant.
+// When using DRAW, the task count will tell us how many rows of pixels will be filled in the framebuffer.
+// When using indirect draws, the full framebuffer will always be drawn into by using multiple draw command structures, except in
+// the case of drawCount==0. Each draw will spawn the needed number of tasks to fill the whole framebuffer. In addition, in order to
+// make all argument structures different, the number of tasks in each draw count will be slightly different and assigned
+// pseudorandomly.
+// DRAW: taskCount=0, taskCount=1, taskCount=2, taskCount=half, taskCount=full
+// DRAW_INDIRECT: drawCount=0, drawCount=1, drawCount=2, drawCount=half, drawCount=full.
+//  * With offset 0 and pseudorandom (multiples of 4).
+//  * With stride adding a padding of 0 and pseudorandom (multiples of 4).
+// DRAW_INDIRECT_COUNT: same as indirect in two variants:
+//  1. Passing the count in a buffer with a large maximum.
+//  2. Passing a large value in the buffer and limiting it with the maximum.
+class MeshApiCase : public vkt::TestCase
+					MeshApiCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params)
+						: vkt::TestCase	(testCtx, name, description)
+						, m_params		(params)
+						{}
+	virtual			~MeshApiCase	(void) {}
+	void			initPrograms	(vk::SourceCollections& programCollection) const override;
+	void			checkSupport	(Context& context) const override;
+	TestInstance*	createInstance	(Context& context) const override;
+	TestParams		m_params;
+class MeshApiInstance : public vkt::TestInstance
+						MeshApiInstance		(Context& context, const TestParams& params)
+							: vkt::TestInstance	(context)
+							, m_params			(params)
+							{}
+	virtual				~MeshApiInstance	(void) {}
+	tcu::TestStatus		iterate				(void) override;
+	TestParams			m_params;
+TestInstance* MeshApiCase::createInstance (Context& context) const
+	return new MeshApiInstance(context, m_params);
+struct PushConstantData
+	uint32_t width;
+	uint32_t height;
+	uint32_t firstTaskMesh;
+	uint32_t one;
+	uint32_t firstTaskTask;
+	std::vector<VkPushConstantRange> getRanges (bool includeTask) const
+	{
+		constexpr uint32_t offsetMesh = 0u;
+		constexpr uint32_t offsetTask = static_cast<uint32_t>(offsetof(PushConstantData, one));
+		constexpr uint32_t sizeMesh = offsetTask;
+		constexpr uint32_t sizeTask = static_cast<uint32_t>(sizeof(PushConstantData)) - offsetTask;
+		const VkPushConstantRange meshRange =
+		{
+			VK_SHADER_STAGE_MESH_BIT_NV,	//	VkShaderStageFlags	stageFlags;
+			offsetMesh,						//	uint32_t			offset;
+			sizeMesh,						//	uint32_t			size;
+		};
+		const VkPushConstantRange taskRange =
+		{
+			VK_SHADER_STAGE_TASK_BIT_NV,	//	VkShaderStageFlags	stageFlags;
+			offsetTask,						//	uint32_t			offset;
+			sizeTask,						//	uint32_t			size;
+		};
+		std::vector<VkPushConstantRange> ranges (1u, meshRange);
+		if (includeTask)
+			ranges.push_back(taskRange);
+		return ranges;
+	}
+void MeshApiCase::initPrograms (vk::SourceCollections& programCollection) const
+	const std::string taskDataDecl =
+		"taskNV TaskData {\n"
+		"    uint blockNumber;\n"
+		"    uint blockRow;\n"
+		"} td;\n"
+		;
+	// Task shader if needed.
+	if (m_params.useTask)
+	{
+		std::ostringstream task;
+		task
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "\n"
+			<< "layout (push_constant, std430) uniform TaskPushConstantBlock {\n"
+			<< "    layout (offset=12) uint one;\n"
+			<< "    layout (offset=16) uint firstTask;\n"
+			<< "} pc;\n"
+			<< "\n"
+			<< "out " << taskDataDecl
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_TaskCountNV  =;\n"
+			<< "    td.blockNumber  = uint(gl_DrawID);\n"
+			<< "    td.blockRow     = gl_WorkGroupID.x - pc.firstTask;\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	}
+	// Mesh shader.
+	{
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=32) in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=96, max_primitives=32) out;\n"
+			<< "\n"
+			<< "layout (push_constant, std430) uniform MeshPushConstantBlock {\n"
+			<< "    uint width;\n"
+			<< "    uint height;\n"
+			<< "    uint firstTask;\n"
+			<< "} pc;\n"
+			<< "\n"
+			<< "layout (location=0) perprimitiveNV out vec4 primitiveColor[];\n"
+			<< "\n"
+			<< (m_params.useTask ? ("in " + taskDataDecl): "")
+			<< "\n"
+			<< "layout (set=0, binding=0, std430) readonly buffer BlockSizes {\n"
+			<< "    uint blockSize[];\n"
+			<< "} bsz;\n"
+			<< "\n"
+			<< "uint startOfBlock (uint blockNumber)\n"
+			<< "{\n"
+			<< "    uint start = 0;\n"
+			<< "    for (uint i = 0; i < blockNumber; i++)\n"
+			<< "        start += bsz.blockSize[i];\n"
+			<< "    return start;\n"
+			<< "}\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    const uint blockNumber = " << (m_params.useTask ? "td.blockNumber" : "uint(gl_DrawID)") << ";\n"
+			<< "    const uint blockRow = " << (m_params.useTask ? "td.blockRow" : "(gl_WorkGroupID.x - pc.firstTask)") << ";\n"
+			<< "\n"
+			<< "    // Each workgroup will fill one row, and each invocation will generate a\n"
+			<< "    // triangle around the pixel center in each column.\n"
+			<< "    const uint row = startOfBlock(blockNumber) + blockRow;\n"
+			<< "    const uint col = gl_LocalInvocationID.x;\n"
+			<< "\n"
+			<< "    const float fHeight = float(pc.height);\n"
+			<< "    const float fWidth = float(pc.width);\n"
+			<< "\n"
+			<< "    // Pixel coordinates, normalized.\n"
+			<< "    const float rowNorm = (float(row) + 0.5) / fHeight;\n"
+			<< "    const float colNorm = (float(col) + 0.5) / fWidth;\n"
+			<< "\n"
+			<< "    // Framebuffer coordinates.\n"
+			<< "    const float coordX = (colNorm * 2.0) - 1.0;\n"
+			<< "    const float coordY = (rowNorm * 2.0) - 1.0;\n"
+			<< "\n"
+			<< "    const float pixelWidth = 2.0 / fWidth;\n"
+			<< "    const float pixelHeight = 2.0 / fHeight;\n"
+			<< "\n"
+			<< "    const float offsetX = pixelWidth / 2.0;\n"
+			<< "    const float offsetY = pixelHeight / 2.0;\n"
+			<< "\n"
+			<< "    const uint baseIndex = col*3;\n"
+			<< "    const uvec3 indices = uvec3(baseIndex, baseIndex + 1, baseIndex + 2);\n"
+			<< "\n"
+			<< "    gl_PrimitiveCountNV = 32u;\n"
+			<< "    primitiveColor[col] = vec4(rowNorm, colNorm, 0.0, 1.0);\n"
+			<< "\n"
+			<< "    gl_PrimitiveIndicesNV[indices.x] = indices.x;\n"
+			<< "    gl_PrimitiveIndicesNV[indices.y] = indices.y;\n"
+			<< "    gl_PrimitiveIndicesNV[indices.z] = indices.z;\n"
+			<< "\n"
+			<< "    gl_MeshVerticesNV[indices.x].gl_Position = vec4(coordX - offsetX, coordY + offsetY, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[indices.y].gl_Position = vec4(coordX + offsetX, coordY + offsetY, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[indices.z].gl_Position = vec4(coordX, coordY - offsetY, 0.0, 1.0);\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	// Frag shader.
+	{
+		std::ostringstream frag;
+		frag
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (location=0) perprimitiveNV in vec4 primitiveColor;\n"
+			<< "layout (location=0) out vec4 outColor;\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    outColor = primitiveColor;\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
+	}
+void MeshApiCase::checkSupport (Context& context) const
+	context.requireDeviceFunctionality("VK_NV_mesh_shader");
+	const auto& meshFeatures = context.getMeshShaderFeatures();
+	if (!meshFeatures.meshShader)
+		TCU_THROW(NotSupportedError, "Mesh shaders not supported");
+	if (m_params.useTask && !meshFeatures.taskShader)
+		TCU_THROW(NotSupportedError, "Task shaders not supported");
+	// VUID-vkCmdDrawMeshTasksIndirectNV-drawCount-02718
+	if (m_params.drawType == DrawType::DRAW_INDIRECT && m_params.drawCount > 1u)
+	{
+		const auto& features = context.getDeviceFeatures();
+		if (!features.multiDrawIndirect)
+			TCU_THROW(NotSupportedError, "Indirect multi-draws not supported");
+	}
+	// VUID-vkCmdDrawMeshTasksIndirectCountNV-None-04445
+	if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
+		context.requireDeviceFunctionality("VK_KHR_draw_indirect_count");
+template <typename T>
+BufferWithMemoryPtr makeStridedBuffer(const DeviceInterface& vkd, VkDevice device, Allocator& alloc, const std::vector<T>& elements, uint32_t offset, uint32_t stride, VkBufferUsageFlags usage, uint32_t endPadding)
+	const auto elementSize	= static_cast<uint32_t>(sizeof(T));
+	const auto actualStride	= std::max(elementSize, stride);
+	const auto bufferSize	= static_cast<size_t>(offset) + static_cast<size_t>(actualStride) * elements.size() + static_cast<size_t>(endPadding);
+	const auto bufferInfo	= makeBufferCreateInfo(static_cast<VkDeviceSize>(bufferSize), usage);
+	BufferWithMemoryPtr buffer(new BufferWithMemory(vkd, device, alloc, bufferInfo, MemoryRequirement::HostVisible));
+	auto& bufferAlloc	= buffer->getAllocation();
+	char* bufferDataPtr	= reinterpret_cast<char*>(bufferAlloc.getHostPtr());
+	char* itr = bufferDataPtr + offset;
+	for (const auto& elem : elements)
+	{
+		deMemcpy(itr, &elem, sizeof(elem));
+		itr += actualStride;
+	}
+	if (endPadding > 0u)
+		deMemset(itr, 0xFF, endPadding);
+	flushAlloc(vkd, device, bufferAlloc);
+	return buffer;
+VkExtent3D getExtent ()
+	return makeExtent3D(32u, 64u, 1u);
+tcu::TestStatus MeshApiInstance::iterate (void)
+	const auto&		vkd			= m_context.getDeviceInterface();
+	const auto		device		= m_context.getDevice();
+	auto&			alloc		= m_context.getDefaultAllocator();
+	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
+	const auto		queue		= m_context.getUniversalQueue();
+	const auto		extent		= getExtent();
+	const auto		iExtent3D	= tcu::IVec3(static_cast<int>(extent.width), static_cast<int>(extent.height), static_cast<int>(extent.depth));
+	const auto		iExtent2D	= tcu::IVec2(iExtent3D.x(), iExtent3D.y());
+	const auto		format		= VK_FORMAT_R8G8B8A8_UNORM;
+	const auto		tcuFormat	= mapVkFormat(format);
+	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
+	const tcu::Vec4	clearColor	(0.0f, 0.0f, 0.0f, 1.0f);
+	const float		colorThres	= 0.005f; // 1/255 < 0.005 < 2/255
+	const tcu::Vec4	threshold	(colorThres, colorThres, 0.0f, 0.0f);
+	ImageWithMemoryPtr	colorBuffer;
+	Move<VkImageView>	colorBufferView;
+	{
+		const VkImageCreateInfo colorBufferInfo =
+		{
+			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
+			nullptr,								//	const void*				pNext;
+			0u,										//	VkImageCreateFlags		flags;
+			VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
+			format,									//	VkFormat				format;
+			extent,									//	VkExtent3D				extent;
+			1u,										//	uint32_t				mipLevels;
+			1u,										//	uint32_t				arrayLayers;
+			VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
+			VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
+			colorUsage,								//	VkImageUsageFlags		usage;
+			VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
+			0u,										//	uint32_t				queueFamilyIndexCount;
+			nullptr,								//	const uint32_t*			pQueueFamilyIndices;
+			VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
+		};
+		colorBuffer = ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any));
+		colorBufferView = makeImageView(vkd, device, colorBuffer->get(), VK_IMAGE_VIEW_TYPE_2D, format, colorSRR);
+	}
+	// Prepare buffer containing the array of block sizes.
+	de::Random				rnd				(m_params.seed);
+	std::vector<uint32_t>	blockSizes;
+	const uint32_t			vectorSize		= std::max(1u, m_params.drawCount);
+	const uint32_t			largeDrawCount	= vectorSize + 1u; // The indirect buffer needs to have some padding at the end. See below.
+	const uint32_t			evenBlockSize	= extent.height / vectorSize;
+	uint32_t				remainingRows	= extent.height;
+	blockSizes.reserve(vectorSize);
+	for (uint32_t i = 0; i < vectorSize - 1u; ++i)
+	{
+		const auto blockSize = static_cast<uint32_t>(rnd.getInt(1, evenBlockSize));
+		remainingRows -= blockSize;
+		blockSizes.push_back(blockSize);
+	}
+	blockSizes.push_back(remainingRows);
+	const auto			blockSizesBufferSize	= static_cast<VkDeviceSize>(de::dataSize(blockSizes));
+	BufferWithMemoryPtr	blockSizesBuffer		= makeStridedBuffer(vkd, device, alloc, blockSizes, 0u, 0u, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0u);
+	// Descriptor set layout, pool and set.
+	DescriptorSetLayoutBuilder layoutBuilder;
+	const auto setLayout =, device);
+	DescriptorPoolBuilder poolBuilder;
+	const auto descriptorPool =, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+	const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
+	// Update descriptor set.
+	{
+		DescriptorSetUpdateBuilder updateBuilder;
+		const auto location				= DescriptorSetUpdateBuilder::Location::binding(0u);
+		const auto descriptorBufferInfo	= makeDescriptorBufferInfo(blockSizesBuffer->get(), 0ull, blockSizesBufferSize);
+		updateBuilder.writeSingle(descriptorSet.get(), location, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorBufferInfo);
+		updateBuilder.update(vkd, device);
+	}
+	// Pipeline layout.
+	PushConstantData	pcData;
+	const auto			pcRanges		= pcData.getRanges(m_params.useTask);
+	const auto			pipelineLayout	= makePipelineLayout(vkd, device, 1u, &setLayout.get(), static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges));
+	// Push constants.
+	pcData.width			= extent.width;
+	pcData.height			= extent.height;
+	pcData.firstTaskMesh	= m_params.firstTask;
+				= 1u;
+	pcData.firstTaskTask	= m_params.firstTask;
+	// Render pass and framebuffer.
+	const auto renderPass	= makeRenderPass(vkd, device, format);
+	const auto framebuffer	= makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), extent.width, extent.height);
+	// Pipeline.
+	Move<VkShaderModule> taskModule;
+	Move<VkShaderModule> meshModule;
+	Move<VkShaderModule> fragModule;
+	const auto& binaries = m_context.getBinaryCollection();
+	if (m_params.useTask)
+		taskModule = createShaderModule(vkd, device, binaries.get("task"));
+	meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
+	fragModule = createShaderModule(vkd, device, binaries.get("frag"));
+	const std::vector<VkViewport>	viewports	(1u, makeViewport(extent));
+	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(extent));
+	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
+		taskModule.get(), meshModule.get(), fragModule.get(),
+		renderPass.get(), viewports, scissors);
+	// Command pool and buffer.
+	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
+	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+	const auto cmdBuffer	= cmdBufferPtr.get();
+	// Indirect and count buffers if needed.
+	BufferWithMemoryPtr indirectBuffer;
+	BufferWithMemoryPtr countBuffer;
+	if (m_params.drawType != DrawType::DRAW)
+	{
+		// Indirect draws.
+		DE_ASSERT(static_cast<bool>(m_params.indirectArgs));
+		const auto& indirectArgs = m_params.indirectArgs.get();
+		// Check stride and offset validity.
+		DE_ASSERT(indirectArgs.offset % 4u == 0u);
+		DE_ASSERT(indirectArgs.stride % 4u == 0u && (indirectArgs.stride == 0u || indirectArgs.stride >= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV))));
+		// Prepare struct vector, which will be converted to a buffer with the proper stride and offset later.
+		std::vector<VkDrawMeshTasksIndirectCommandNV> commands;
+		commands.reserve(blockSizes.size());
+		std::transform(begin(blockSizes), end(blockSizes), std::back_inserter(commands),
+			[this](uint32_t blockSize) { return VkDrawMeshTasksIndirectCommandNV{blockSize, this->m_params.firstTask}; });
+		const auto padding	= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV));
+		indirectBuffer		= makeStridedBuffer(vkd, device, alloc, commands, indirectArgs.offset, indirectArgs.stride, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, padding);
+		// Prepare count buffer if needed.
+		if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
+		{
+			DE_ASSERT(static_cast<bool>(m_params.indirectCountLimit));
+			DE_ASSERT(static_cast<bool>(m_params.indirectCountOffset));
+			const auto countBufferValue	= ((m_params.indirectCountLimit.get() == IndirectCountLimitType::BUFFER_VALUE)
+										? m_params.drawCount
+										: largeDrawCount);
+			const std::vector<uint32_t> singleCount (1u, countBufferValue);
+			countBuffer = makeStridedBuffer(vkd, device, alloc, singleCount, m_params.indirectCountOffset.get(), static_cast<uint32_t>(sizeof(uint32_t)), VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, 0u);
+		}
+	}
+	// Submit commands.
+	beginCommandBuffer(vkd, cmdBuffer);
+	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(),, clearColor);
+	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
+	{
+		const char* pcDataPtr = reinterpret_cast<const char*>(&pcData);
+		for (const auto& range : pcRanges)
+			vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size, pcDataPtr + range.offset);
+	}
+	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
+	if (m_params.drawType == DrawType::DRAW)
+	{
+		vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params.drawCount, m_params.firstTask);
+	}
+	else if (m_params.drawType == DrawType::DRAW_INDIRECT)
+	{
+		const auto& indirectArgs = m_params.indirectArgs.get();
+		vkd.cmdDrawMeshTasksIndirectNV(cmdBuffer, indirectBuffer->get(), indirectArgs.offset, m_params.drawCount, indirectArgs.stride);
+	}
+	else if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT)
+	{
+		const auto& indirectArgs		= m_params.indirectArgs.get();
+		const auto& indirectCountOffset	= m_params.indirectCountOffset.get();
+		const auto& indirectCountLimit	= m_params.indirectCountLimit.get();
+		const auto maxCount	= ((indirectCountLimit == IndirectCountLimitType::MAX_COUNT)
+							? m_params.drawCount
+							: largeDrawCount);
+		vkd.cmdDrawMeshTasksIndirectCountNV(cmdBuffer, indirectBuffer->get(), indirectArgs.offset, countBuffer->get(), indirectCountOffset, maxCount, indirectArgs.stride);
+	}
+	else
+		DE_ASSERT(false);
+	endRenderPass(vkd, cmdBuffer);
+	// Output buffer to extract the color buffer.
+	BufferWithMemoryPtr	outBuffer;
+	void*				outBufferData = nullptr;
+	{
+		const auto	outBufferSize	= static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) * extent.width * extent.height);
+		const auto	outBufferUsage	= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+		const auto	outBufferInfo	= makeBufferCreateInfo(outBufferSize, outBufferUsage);
+		outBuffer					= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible));
+		outBufferData				= outBuffer->getAllocation().getHostPtr();
+	}
+	copyImageToBuffer(vkd, cmdBuffer, colorBuffer->get(), outBuffer->get(), iExtent2D);
+	endCommandBuffer(vkd, cmdBuffer);
+	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
+	// Generate reference image and compare.
+	{
+		auto&						log				= m_context.getTestContext().getLog();
+		auto&						outBufferAlloc	= outBuffer->getAllocation();
+		tcu::ConstPixelBufferAccess	result			(tcuFormat, iExtent3D, outBufferData);
+		tcu::TextureLevel			referenceLevel	(tcuFormat, iExtent3D.x(), iExtent3D.y());
+		const auto					reference		= referenceLevel.getAccess();
+		const auto					setName			= de::toString(m_params.drawType) + "_draw_count_" + de::toString(m_params.drawCount) + (m_params.useTask ? "_with_task" : "_no_task");
+		const auto					fHeight			= static_cast<float>(extent.height);
+		const auto					fWidth			= static_cast<float>(extent.width);
+		invalidateAlloc(vkd, device, outBufferAlloc);
+		for (int y = 0; y < iExtent3D.y(); ++y)
+		for (int x = 0; x < iExtent3D.x(); ++x)
+		{
+			const tcu::Vec4 refColor	= ((m_params.drawCount == 0u || (m_params.drawType == DrawType::DRAW && y >= static_cast<int>(m_params.drawCount)))
+										? clearColor
+										: tcu::Vec4(
+											// These match the per-primitive color set by the mesh shader.
+											(static_cast<float>(y) + 0.5f) / fHeight,
+											(static_cast<float>(x) + 0.5f) / fWidth,
+											0.0f,
+											1.0f));
+			reference.setPixel(refColor, x, y);
+		}
+		if (!tcu::floatThresholdCompare(log, setName.c_str(), "", reference, result, threshold, tcu::COMPARE_LOG_ON_ERROR))
+			return tcu::TestStatus::fail("Image comparison failed; check log for details");
+	}
+	return tcu::TestStatus::pass("Pass");
+} // anonymous
+tcu::TestCaseGroup* createMeshShaderApiTests (tcu::TestContext& testCtx)
+	GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "api", "Mesh Shader API tests"));
+	const DrawType drawCases[] =
+	{
+		DrawType::DRAW,
+	};
+	const auto		extent				= getExtent();
+	const uint32_t	drawCountCases[]	= { 0u, 1u, 2u, extent.height / 2u, extent.height };
+	const uint32_t normalStride	= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV));
+	const uint32_t largeStride	= 2u * normalStride + 4u;
+	const uint32_t altOffset	= 20u;
+	const struct
+	{
+		tcu::Maybe<IndirectArgs>	indirectArgs;
+		const char*					name;
+	} indirectArgsCases[] =
+	{
+		{ tcu::nothing<IndirectArgs>(),							"no_indirect_args"			},
+		// Offset 0, varying strides.
+		{ tcu::just(IndirectArgs{ 0u, 0u }),					"offset_0_stride_0"			},
+		{ tcu::just(IndirectArgs{ 0u, normalStride }),			"offset_0_stride_normal"	},
+		{ tcu::just(IndirectArgs{ 0u, largeStride }),			"offset_0_stride_large"		},
+		// Nonzero offset, varying strides.
+		{ tcu::just(IndirectArgs{ altOffset, 0u }),				"offset_alt_stride_0"		},
+		{ tcu::just(IndirectArgs{ altOffset, normalStride }),	"offset_alt_stride_normal"	},
+		{ tcu::just(IndirectArgs{ altOffset, largeStride }),	"offset_alt_stride_large"	},
+	};
+	const struct
+	{
+		tcu::Maybe<IndirectCountLimitType>	limitType;
+		const char*							name;
+	} countLimitCases[] =
+	{
+		{ tcu::nothing<IndirectCountLimitType>(),			"no_count_limit"		},
+		{ tcu::just(IndirectCountLimitType::BUFFER_VALUE),	"count_limit_buffer"	},
+		{ tcu::just(IndirectCountLimitType::MAX_COUNT),		"count_limit_max_count"	},
+	};
+	const struct
+	{
+		tcu::Maybe<uint32_t>	countOffset;
+		const char*				name;
+	} countOffsetCases[] =
+	{
+		{ tcu::nothing<uint32_t>(),	"no_count_offset"	},
+		{ tcu::just(uint32_t{0u}),	"count_offset_0"	},
+		{ tcu::just(altOffset),		"count_offset_alt"	},
+	};
+	const struct
+	{
+		bool		useTask;
+		const char*	name;
+	} taskCases[] =
+	{
+		{ false,	"no_task_shader"	},
+		{ true,		"with_task_shader"	},
+	};
+	const struct
+	{
+		uint32_t	firstTask;
+		const char*	name;
+	} firstTaskCases[] =
+	{
+		{ 0u,		"first_task_zero"		},
+		{ 1001u,	"first_task_nonzero"	},
+	};
+	uint32_t seed = 1628678795u;
+	for (const auto& drawCase : drawCases)
+	{
+		const auto drawCaseName			= de::toString(drawCase);
+		const bool isIndirect			= (drawCase != DrawType::DRAW);
+		const bool isIndirectNoCount	= (drawCase == DrawType::DRAW_INDIRECT);
+		const bool isIndirectCount		= (drawCase == DrawType::DRAW_INDIRECT_COUNT);
+		GroupPtr drawGroup(new tcu::TestCaseGroup(testCtx, drawCaseName.c_str(), ""));
+		for (const auto& drawCountCase : drawCountCases)
+		{
+			const auto drawCountName = "draw_count_" + de::toString(drawCountCase);
+			GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, drawCountName.c_str(), ""));
+			for (const auto& indirectArgsCase : indirectArgsCases)
+			{
+				const bool hasIndirectArgs	= static_cast<bool>(indirectArgsCase.indirectArgs);
+				const bool strideZero		= (hasIndirectArgs && indirectArgsCase.indirectArgs.get().stride == 0u);
+				if (isIndirect != hasIndirectArgs)
+					continue;
+				// VUID-vkCmdDrawMeshTasksIndirectNV-drawCount-02146 and VUID-vkCmdDrawMeshTasksIndirectCountNV-stride-02182.
+				if (((isIndirectNoCount && drawCountCase > 1u) || isIndirectCount) && strideZero)
+					continue;
+				GroupPtr indirectArgsGroup(new tcu::TestCaseGroup(testCtx,, ""));
+				for (const auto& countLimitCase : countLimitCases)
+				{
+					const bool hasCountLimit = static_cast<bool>(countLimitCase.limitType);
+					if (isIndirectCount != hasCountLimit)
+						continue;
+					GroupPtr countLimitGroup(new tcu::TestCaseGroup(testCtx,, ""));
+					for (const auto& countOffsetCase : countOffsetCases)
+					{
+						const bool hasCountOffsetType = static_cast<bool>(countOffsetCase.countOffset);
+						if (isIndirectCount != hasCountOffsetType)
+							continue;
+						GroupPtr countOffsetGroup(new tcu::TestCaseGroup(testCtx,, ""));
+						for (const auto& taskCase : taskCases)
+						{
+							GroupPtr taskCaseGrp(new tcu::TestCaseGroup(testCtx,, ""));
+							for (const auto& firstTaskCase : firstTaskCases)
+							{
+								const TestParams params =
+								{
+									drawCase,						//	DrawType							drawType;
+									seed++,							//	uint32_t							seed;
+									drawCountCase,					//	uint32_t							drawCount;
+									firstTaskCase.firstTask,		//	uint32_t							firstTask;
+									indirectArgsCase.indirectArgs,	//	tcu::Maybe<IndirectArgs>			indirectArgs;
+									countLimitCase.limitType,		//	tcu::Maybe<IndirectCountLimitType>	indirectCountLimit;
+									countOffsetCase.countOffset,	//	tcu::Maybe<uint32_t>				indirectCountOffset;
+									taskCase.useTask,				//	bool								useTask;
+								};
+								taskCaseGrp->addChild(new MeshApiCase(testCtx,, "", params));
+							}
+							countOffsetGroup->addChild(taskCaseGrp.release());
+						}
+						countLimitGroup->addChild(countOffsetGroup.release());
+					}
+					indirectArgsGroup->addChild(countLimitGroup.release());
+				}
+				drawCountGroup->addChild(indirectArgsGroup.release());
+			}
+			drawGroup->addChild(drawCountGroup.release());
+		}
+		mainGroup->addChild(drawGroup.release());
+	}
+	return mainGroup.release();
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderApiTests.hpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderApiTests.hpp
new file mode 100644
index 0000000..cdd4c07
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderApiTests.hpp
@@ -0,0 +1,38 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader API Tests
+ *//*--------------------------------------------------------------------*/
+#include "tcuDefs.hpp"
+#include "tcuTestCase.hpp"
+namespace vkt
+namespace MeshShader
+tcu::TestCaseGroup* createMeshShaderApiTests (tcu::TestContext& testCtx);
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderBuiltinTests.cpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderBuiltinTests.cpp
new file mode 100644
index 0000000..19c7eee
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderBuiltinTests.cpp
@@ -0,0 +1,1765 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Builtin Tests
+ *//*--------------------------------------------------------------------*/
+#include "vktMeshShaderBuiltinTests.hpp"
+#include "vktTestCase.hpp"
+#include "vkTypeUtil.hpp"
+#include "vkImageUtil.hpp"
+#include "vkObjUtil.hpp"
+#include "vkBuilderUtil.hpp"
+#include "vkImageWithMemory.hpp"
+#include "vkBufferWithMemory.hpp"
+#include "vkCmdUtil.hpp"
+#include "vkBarrierUtil.hpp"
+#include "tcuTexture.hpp"
+#include "tcuTestLog.hpp"
+#include <vector>
+#include <algorithm>
+#include <sstream>
+#include <map>
+#include <utility>
+#include <sstream>
+namespace tcu
+// Needed for PixelMap below.
+bool operator<(const IVec2& a, const IVec2& b)
+	return (a.x() < b.x() || (a.x() == b.x() && a.y() < b.y()));
+namespace vkt
+namespace MeshShader
+using namespace vk;
+using GroupPtr				= de::MovePtr<tcu::TestCaseGroup>;
+using DrawCommandVec		= std::vector<VkDrawMeshTasksIndirectCommandNV>;
+using ImageWithMemoryPtr	= de::MovePtr<ImageWithMemory>;
+using BufferWithMemoryPtr	= de::MovePtr<BufferWithMemory>;
+using ViewportVec			= std::vector<VkViewport>;
+using ColorVec				= std::vector<tcu::Vec4>;
+using PixelMap				= std::map<tcu::IVec2, tcu::Vec4>; // Coordinates to color.
+VkExtent2D getDefaultExtent ()
+	return makeExtent2D(8u, 8u);
+VkExtent2D getLinearExtent ()
+	return makeExtent2D(8u, 1u);
+struct JobSize
+	uint32_t numTasks;
+	uint32_t localSize;
+JobSize getLargeJobSize ()
+	return JobSize{8u, 8u};
+// Single draw command with the given number of tasks, 1 by default.
+DrawCommandVec getDefaultDrawCommands (uint32_t taskCount = 1u)
+	return DrawCommandVec(1u, makeDrawMeshTasksIndirectCommandNV(taskCount, 0u));
+// Basic fragment shader that draws fragments in blue.
+std::string getBasicFragShader ()
+	return
+		"#version 460\n"
+		"#extension GL_NV_mesh_shader : enable\n"
+		"\n"
+		"layout (location=0) out vec4 outColor;\n"
+		"\n"
+		"void main ()\n"
+		"{\n"
+		"    outColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
+		"}\n"
+		;
+struct IterationParams
+	VkExtent2D		colorExtent;
+	uint32_t		numLayers;
+	DrawCommandVec	drawArgs;
+	bool			indirect;
+	ViewportVec		viewports;	// If empty, a single default viewport is used.
+class MeshShaderBuiltinInstance : public vkt::TestInstance
+						MeshShaderBuiltinInstance	(Context& context, const IterationParams& params)
+							: vkt::TestInstance	(context)
+							, m_params			(params)
+							{}
+	virtual				~MeshShaderBuiltinInstance	(void) {}
+	tcu::TestStatus		iterate						() override;
+	virtual void		verifyResults				(const tcu::ConstPixelBufferAccess& result) = 0;
+	IterationParams		m_params;
+tcu::TestStatus MeshShaderBuiltinInstance::iterate ()
+	const auto&		vkd			= m_context.getDeviceInterface();
+	const auto		device		= m_context.getDevice();
+	auto&			alloc		= m_context.getDefaultAllocator();
+	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
+	const auto		queue		= m_context.getUniversalQueue();
+	const auto&		binaries	= m_context.getBinaryCollection();
+	const auto		useTask		= binaries.contains("task");
+	const auto		useFrag		= binaries.contains("frag");
+	const auto		extent		= makeExtent3D(m_params.colorExtent.width, m_params.colorExtent.height, 1u);
+	const auto		iExtent3D	= tcu::IVec3(static_cast<int>(extent.width), static_cast<int>(extent.height), static_cast<int>(m_params.numLayers));
+	const auto		format		= VK_FORMAT_R8G8B8A8_UNORM;
+	const auto		tcuFormat	= mapVkFormat(format);
+	const auto		viewType	= ((m_params.numLayers > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
+	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, m_params.numLayers);
+	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, m_params.numLayers);
+	const tcu::Vec4	clearColor	(0.0f, 0.0f, 0.0f, 1.0f);
+	ImageWithMemoryPtr	colorBuffer;
+	Move<VkImageView>	colorBufferView;
+	{
+		const VkImageCreateInfo colorBufferInfo =
+		{
+			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
+			nullptr,								//	const void*				pNext;
+			0u,										//	VkImageCreateFlags		flags;
+			VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
+			format,									//	VkFormat				format;
+			extent,									//	VkExtent3D				extent;
+			1u,										//	uint32_t				mipLevels;
+			m_params.numLayers,						//	uint32_t				arrayLayers;
+			VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
+			VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
+			colorUsage,								//	VkImageUsageFlags		usage;
+			VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
+			0u,										//	uint32_t				queueFamilyIndexCount;
+			nullptr,								//	const uint32_t*			pQueueFamilyIndices;
+			VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
+		};
+		colorBuffer = ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any));
+		colorBufferView = makeImageView(vkd, device, colorBuffer->get(), viewType, format, colorSRR);
+	}
+	// Empty descriptor set layout.
+	DescriptorSetLayoutBuilder layoutBuilder;
+	const auto setLayout =, device);
+	// Pipeline layout.
+	const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
+	// Render pass and framebuffer.
+	const auto renderPass	= makeRenderPass(vkd, device, format);
+	const auto framebuffer	= makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), extent.width, extent.height, m_params.numLayers);
+	// Pipeline.
+	Move<VkShaderModule> taskModule;
+	Move<VkShaderModule> meshModule;
+	Move<VkShaderModule> fragModule;
+	if (useTask)
+		taskModule = createShaderModule(vkd, device, binaries.get("task"));
+	if (useFrag)
+		fragModule = createShaderModule(vkd, device, binaries.get("frag"));
+	meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
+	std::vector<VkViewport>	viewports;
+	std::vector<VkRect2D>	scissors;
+	if (m_params.viewports.empty())
+	{
+		// Default ones.
+		viewports.push_back(makeViewport(extent));
+		scissors.push_back(makeRect2D(extent));
+	}
+	else
+	{
+		// The desired viewports and the same number of default scissors.
+		viewports.reserve(m_params.viewports.size());
+		std::copy(begin(m_params.viewports), end(m_params.viewports), std::back_inserter(viewports));
+		scissors.resize(viewports.size(), makeRect2D(extent));
+	}
+	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
+		taskModule.get(), meshModule.get(), fragModule.get(),
+		renderPass.get(), viewports, scissors);
+	// Command pool and buffer.
+	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
+	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+	const auto cmdBuffer	= cmdBufferPtr.get();
+	// Indirect buffer if needed.
+	BufferWithMemoryPtr indirectBuffer;
+	DE_ASSERT(!m_params.drawArgs.empty());
+	if (m_params.indirect)
+	{
+		// Indirect draws.
+		const auto indirectBufferSize	= static_cast<VkDeviceSize>(de::dataSize(m_params.drawArgs));
+		const auto indirectBufferUsage	= (VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT);
+		const auto indirectBufferInfo	= makeBufferCreateInfo(indirectBufferSize, indirectBufferUsage);
+		indirectBuffer					= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, indirectBufferInfo, MemoryRequirement::HostVisible));
+		auto& indirectBufferAlloc		= indirectBuffer->getAllocation();
+		void* indirectBufferData		= indirectBufferAlloc.getHostPtr();
+		deMemcpy(indirectBufferData,, static_cast<size_t>(indirectBufferSize));
+		flushAlloc(vkd, device, indirectBufferAlloc);
+	}
+	// Submit commands.
+	beginCommandBuffer(vkd, cmdBuffer);
+	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(),, clearColor);
+	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
+	if (!m_params.indirect)
+	{
+		for (const auto& command : m_params.drawArgs)
+			vkd.cmdDrawMeshTasksNV(cmdBuffer, command.taskCount, command.firstTask);
+	}
+	else
+	{
+		const auto numDraws	= static_cast<uint32_t>(m_params.drawArgs.size());
+		const auto stride	= static_cast<uint32_t>(sizeof(decltype(m_params.drawArgs)::value_type));
+		vkd.cmdDrawMeshTasksIndirectNV(cmdBuffer, indirectBuffer->get(), 0ull, numDraws, stride);
+	}
+	endRenderPass(vkd, cmdBuffer);
+	// Output buffer to extract the color buffer contents.
+	BufferWithMemoryPtr	outBuffer;
+	void*				outBufferData	= nullptr;
+	{
+		const auto	layerSize			= static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) * extent.width * extent.height);
+		const auto	outBufferSize		= layerSize * m_params.numLayers;
+		const auto	outBufferUsage		= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+		const auto	outBufferInfo		= makeBufferCreateInfo(outBufferSize, outBufferUsage);
+		outBuffer						= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible));
+		outBufferData					= outBuffer->getAllocation().getHostPtr();
+	}
+	// Transition image layout.
+	const auto preTransferBarrier = makeImageMemoryBarrier(
+		colorBuffer->get(), colorSRR);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preTransferBarrier);
+	// Copy image to output buffer.
+	const std::vector<VkBufferImageCopy> regions (1u, makeBufferImageCopy(extent, colorSRL));
+	vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, outBuffer->get(), static_cast<uint32_t>(regions.size()), de::dataOrNull(regions));
+	// Transfer to host barrier.
+	const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postTransferBarrier, 0u, nullptr, 0u, nullptr);
+	endCommandBuffer(vkd, cmdBuffer);
+	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
+	// Invalidate alloc and verify result.
+	{
+		auto& outBufferAlloc = outBuffer->getAllocation();
+		invalidateAlloc(vkd, device, outBufferAlloc);
+		tcu::ConstPixelBufferAccess	result (tcuFormat, iExtent3D, outBufferData);
+		verifyResults(result);
+	}
+	return tcu::TestStatus::pass("Pass");
+// Abstract case that implements the generic checkSupport method.
+class MeshShaderBuiltinCase : public vkt::TestCase
+					MeshShaderBuiltinCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, bool taskNeeded)
+						: vkt::TestCase	(testCtx, name, description)
+						, m_taskNeeded	(taskNeeded)
+						{}
+	virtual			~MeshShaderBuiltinCase	(void) {}
+	void			checkSupport			(Context& context) const override;
+	const bool		m_taskNeeded;
+void MeshShaderBuiltinCase::checkSupport (Context& context) const
+	context.requireDeviceFunctionality("VK_NV_mesh_shader");
+	const auto& meshFeatures = context.getMeshShaderFeatures();
+	if (!meshFeatures.meshShader)
+		TCU_THROW(NotSupportedError, "Mesh shader not supported");
+	if (m_taskNeeded && !meshFeatures.taskShader)
+		TCU_THROW(NotSupportedError, "Task shader not supported");
+// Instance that verifies color layers.
+class FullScreenColorInstance : public MeshShaderBuiltinInstance
+				FullScreenColorInstance		(Context& context, const IterationParams& params, const ColorVec& expectedColors)
+					: MeshShaderBuiltinInstance (context, params)
+					, m_expectedColors			(expectedColors)
+					{}
+	virtual		~FullScreenColorInstance	(void) {}
+	void		verifyResults				(const tcu::ConstPixelBufferAccess& result) override;
+	const ColorVec m_expectedColors;
+void FullScreenColorInstance::verifyResults (const tcu::ConstPixelBufferAccess& result)
+	auto&		log		= m_context.getTestContext().getLog();
+	bool		fail	= false;
+	const auto	width	= result.getWidth();
+	const auto	height	= result.getHeight();
+	const auto	depth	= result.getDepth();
+	for (int z = 0; z < depth; ++z)
+	{
+		const auto& expected =;
+		for (int y = 0; y < height; ++y)
+		for (int x = 0; x < width; ++x)
+		{
+			const auto resultColor = result.getPixel(x, y, z);
+			if (resultColor != expected)
+			{
+				std::ostringstream msg;
+				msg << "Pixel (" << x << ", " << y << ", " << z << ") failed: expected " << expected << " and found " << resultColor;
+				log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
+				fail = true;
+			}
+		}
+	}
+	if (fail)
+	{
+		log << tcu::TestLog::Image("Result", "", result);
+		TCU_FAIL("Check log for details");
+	}
+// Instance that verifies single-layer framebuffers divided into 4 quadrants.
+class QuadrantsInstance : public MeshShaderBuiltinInstance
+				QuadrantsInstance	(Context& context, const IterationParams& params,
+									 const tcu::Vec4 topLeft,
+									 const tcu::Vec4 topRight,
+									 const tcu::Vec4 bottomLeft,
+									 const tcu::Vec4 bottomRight)
+					: MeshShaderBuiltinInstance (context, params)
+					, m_topLeft					(topLeft)
+					, m_topRight				(topRight)
+					, m_bottomLeft				(bottomLeft)
+					, m_bottomRight				(bottomRight)
+					{}
+	virtual		~QuadrantsInstance	(void) {}
+	void		verifyResults		(const tcu::ConstPixelBufferAccess& result) override;
+	const tcu::Vec4 m_topLeft;
+	const tcu::Vec4 m_topRight;
+	const tcu::Vec4 m_bottomLeft;
+	const tcu::Vec4 m_bottomRight;
+void QuadrantsInstance::verifyResults (const tcu::ConstPixelBufferAccess& result)
+	const auto width	= result.getWidth();
+	const auto height	= result.getHeight();
+	const auto depth	= result.getDepth();
+	DE_ASSERT(depth == 1);
+	DE_ASSERT(width > 0 && width % 2 == 0);
+	DE_ASSERT(height > 0 && height % 2 == 0);
+	DE_UNREF(depth); // For release builds.
+	const auto	halfWidth	= width / 2;
+	const auto	halfHeight	= height / 2;
+	tcu::Vec4	expected;
+	for (int y = 0; y < height; ++y)
+	for (int x = 0; x < width; ++x)
+	{
+		// Choose the right quadrant
+		if (y < halfHeight)
+			expected = ((x < halfWidth) ? m_topLeft : m_topRight);
+		else
+			expected = ((x < halfWidth) ? m_bottomLeft : m_bottomRight);
+		const auto resultColor = result.getPixel(x, y);
+		if (resultColor != expected)
+		{
+			std::ostringstream msg;
+			msg << "Pixel (" << x << ", " << y  << ") failed: expected " << expected << " and found " << resultColor;
+			TCU_FAIL(msg.str());
+		}
+	}
+// Instance that verifies single-layer framebuffers with specific pixels set to some color.
+struct PixelVerifierParams
+	const tcu::Vec4		background;
+	const PixelMap		pixelMap;
+class PixelsInstance : public MeshShaderBuiltinInstance
+				PixelsInstance	(Context& context, const IterationParams& params, const PixelVerifierParams& pixelParams)
+					: MeshShaderBuiltinInstance	(context, params)
+					, m_pixelParams				(pixelParams)
+					{}
+	virtual		~PixelsInstance	(void) {}
+	void		verifyResults	(const tcu::ConstPixelBufferAccess& result) override;
+	const PixelVerifierParams m_pixelParams;
+void PixelsInstance::verifyResults (const tcu::ConstPixelBufferAccess& result)
+	const auto width	= result.getWidth();
+	const auto height	= result.getHeight();
+	const auto depth	= result.getDepth();
+	DE_ASSERT(depth == 1);
+	DE_UNREF(depth); // For release builds.
+	for (int y = 0; y < height; ++y)
+	for (int x = 0; x < width; ++x)
+	{
+		const tcu::IVec2	coords		(x, y);
+		const auto			iter		= m_pixelParams.pixelMap.find(coords);
+		const auto			expected	= ((iter == m_pixelParams.pixelMap.end()) ? m_pixelParams.background : iter->second);
+		const auto			resultColor	= result.getPixel(x, y);
+		if (resultColor != expected)
+		{
+			std::ostringstream msg;
+			msg << "Pixel (" << x << ", " << y << ") failed: expected " << expected << " and found " << resultColor;
+			TCU_FAIL(msg.str());
+		}
+	}
+// Primitive ID cases.
+class PrimitiveIdCase : public MeshShaderBuiltinCase
+					PrimitiveIdCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, bool glslFrag)
+						: MeshShaderBuiltinCase (testCtx, name, description, false/*taskNeeded*/)
+						, m_glslFrag			(glslFrag)
+						{}
+	virtual			~PrimitiveIdCase	(void) {}
+	void			initPrograms		(vk::SourceCollections& programCollection) const override;
+	void			checkSupport		(Context& context) const override;
+	TestInstance*	createInstance		(Context& context) const override;
+	// Fragment shader in GLSL means glslang will use the Geometry capability due to gl_PrimitiveID.
+	const bool		m_glslFrag;
+void PrimitiveIdCase::initPrograms (vk::SourceCollections& programCollection) const
+	// Mesh shader.
+	{
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=3, max_primitives=1) out;\n"
+			<< "\n"
+			<< "perprimitiveNV out gl_MeshPerPrimitiveNV {\n"
+			<< "   int gl_PrimitiveID;\n"
+			<< "} gl_MeshPrimitivesNV[];\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = 1u;\n"
+			<< "\n"
+			<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
+			<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[2] = 2;\n"
+			<< "\n"
+			<< "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
+			<< "\n"
+			// Sets an arbitrary primitive id.
+			<< "    gl_MeshPrimitivesNV[0].gl_PrimitiveID = 1629198956;\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	// Frag shader.
+	if (m_glslFrag)
+	{
+		std::ostringstream frag;
+		frag
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (location=0) out vec4 outColor;\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			// Checks the primitive id matches.
+			<< "    outColor = ((gl_PrimitiveID == 1629198956) ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(0.0, 0.0, 0.0, 1.0));\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
+	}
+	else
+	{
+		// This is the same shader as above, but OpCapability Geometry has been replaced by OpCapability MeshShadingNV in order to
+		// access gl_PrimitiveID. This also needs the SPV_NV_mesh_shader extension.
+		std::ostringstream frag;
+		frag
+			<< "; Version: 1.0\n"
+			<< "; Generator: Khronos Glslang Reference Front End; 10\n"
+			<< "; Bound: 24\n"
+			<< "; Schema: 0\n"
+			<< "      OpCapability Shader\n"
+			// Manual change in these lines.
+			//<< "      OpCapability Geometry\n"
+			<< "      OpCapability MeshShadingNV\n"
+			<< "      OpExtension \"SPV_NV_mesh_shader\"\n"
+			<< " %1 = OpExtInstImport \"GLSL.std.450\"\n"
+			<< "      OpMemoryModel Logical GLSL450\n"
+			<< "      OpEntryPoint Fragment %4 \"main\" %9 %12\n"
+			<< "      OpExecutionMode %4 OriginUpperLeft\n"
+			<< "      OpDecorate %9 Location 0\n"
+			<< "      OpDecorate %12 Flat\n"
+			<< "      OpDecorate %12 BuiltIn PrimitiveId\n"
+			<< " %2 = OpTypeVoid\n"
+			<< " %3 = OpTypeFunction %2\n"
+			<< " %6 = OpTypeFloat 32\n"
+			<< " %7 = OpTypeVector %6 4\n"
+			<< " %8 = OpTypePointer Output %7\n"
+			<< " %9 = OpVariable %8 Output\n"
+			<< "%10 = OpTypeInt 32 1\n"
+			<< "%11 = OpTypePointer Input %10\n"
+			<< "%12 = OpVariable %11 Input\n"
+			<< "%14 = OpConstant %10 1629198956\n"
+			<< "%15 = OpTypeBool\n"
+			<< "%17 = OpConstant %6 0\n"
+			<< "%18 = OpConstant %6 1\n"
+			<< "%19 = OpConstantComposite %7 %17 %17 %18 %18\n"
+			<< "%20 = OpConstantComposite %7 %17 %17 %17 %18\n"
+			<< "%21 = OpTypeVector %15 4\n"
+			<< " %4 = OpFunction %2 None %3\n"
+			<< " %5 = OpLabel\n"
+			<< "%13 = OpLoad %10 %12\n"
+			<< "%16 = OpIEqual %15 %13 %14\n"
+			<< "%22 = OpCompositeConstruct %21 %16 %16 %16 %16\n"
+			<< "%23 = OpSelect %7 %22 %19 %20\n"
+			<< "      OpStore %9 %23\n"
+			<< "      OpReturn\n"
+			<< "      OpFunctionEnd\n"
+			;
+		programCollection.spirvAsmSources.add("frag") << frag.str();
+	}
+void PrimitiveIdCase::checkSupport (Context& context) const
+	MeshShaderBuiltinCase::checkSupport(context);
+	// Fragment shader in GLSL means glslang will use the Geometry capability due to gl_PrimitiveID.
+	if (m_glslFrag)
+		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
+TestInstance* PrimitiveIdCase::createInstance (Context& context) const
+	const ColorVec			expectedColors	(1u, tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f));
+	const IterationParams	iterationParams	=
+	{
+		getDefaultExtent(),			//	VkExtent2D		colorExtent;
+		1u,							//	uint32_t		numLayers;
+		getDefaultDrawCommands(),	//	DrawCommandVec	drawArgs;
+		false,						//	bool			indirect;
+		{},							//	ViewportVec		viewports;	// If empty, a single default viewport is used.
+	};
+	return new FullScreenColorInstance(context, iterationParams, expectedColors);
+// Layer builtin case.
+class LayerCase : public MeshShaderBuiltinCase
+					LayerCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, bool shareVertices)
+						: MeshShaderBuiltinCase	(testCtx, name, description, false/*taskNeeded*/)
+						, m_shareVertices		(shareVertices)
+						{}
+	virtual			~LayerCase	(void) {}
+	void			initPrograms	(vk::SourceCollections& programCollection) const override;
+	void			checkSupport	(Context& context) const override;
+	TestInstance*	createInstance	(Context& context) const override;
+	static constexpr uint32_t kNumLayers = 4u;
+	const bool m_shareVertices;
+void LayerCase::initPrograms (vk::SourceCollections& programCollection) const
+	const auto localSize		= (m_shareVertices ? kNumLayers : 1u);
+	const auto numPrimitives	= (m_shareVertices ? kNumLayers : 1u);
+	const auto layerNumber		= (m_shareVertices ? "gl_LocalInvocationIndex" : "gl_WorkGroupID.x");
+	// One layer per local invocation or work group (shared vertices or not, respectively).
+	{
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=" << localSize << ") in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=3, max_primitives=" << numPrimitives << ") out;\n"
+			<< "\n"
+			<< "perprimitiveNV out gl_MeshPerPrimitiveNV {\n"
+			<< "   int gl_Layer;\n"
+			<< "} gl_MeshPrimitivesNV[];\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = " << numPrimitives << ";\n"
+			<< "\n"
+			<< "    if (gl_LocalInvocationIndex == 0u)\n"
+			<< "    {\n"
+			<< "        gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
+			<< "        gl_MeshVerticesNV[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
+			<< "        gl_MeshVerticesNV[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
+			<< "    }\n"
+			<< "\n"
+			<< "    const uint baseIndex = gl_LocalInvocationIndex * 3u;\n"
+			<< "    gl_PrimitiveIndicesNV[baseIndex + 0] = 0;\n"
+			<< "    gl_PrimitiveIndicesNV[baseIndex + 1] = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[baseIndex + 2] = 2;\n"
+			<< "\n"
+			<< "    gl_MeshPrimitivesNV[gl_LocalInvocationIndex].gl_Layer = int(" << layerNumber << ");\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	// Fragment shader chooses one color per layer.
+	{
+		std::ostringstream frag;
+		frag
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (location=0) out vec4 outColor;\n"
+			<< "\n"
+			<< "vec4 colors[" << kNumLayers << "] = vec4[](\n"
+			<< "    vec4(0.0, 0.0, 1.0, 1.0),\n"
+			<< "    vec4(1.0, 0.0, 1.0, 1.0),\n"
+			<< "    vec4(0.0, 1.0, 1.0, 1.0),\n"
+			<< "    vec4(1.0, 1.0, 0.0, 1.0)\n"
+			<< ");\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    outColor = colors[gl_Layer];\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
+	}
+void LayerCase::checkSupport (Context& context) const
+	MeshShaderBuiltinCase::checkSupport(context);
+	if (!context.contextSupports(vk::ApiVersion(1u, 2u, 0u)))
+		context.requireDeviceFunctionality("VK_EXT_shader_viewport_index_layer");
+	else
+	{
+		const auto& features = context.getDeviceVulkan12Features();
+		if (!features.shaderOutputLayer)
+			TCU_THROW(NotSupportedError, "shaderOutputLayer feature not supported");
+	}
+TestInstance* LayerCase::createInstance (Context& context) const
+	ColorVec expectedColors;
+	expectedColors.reserve(kNumLayers);
+	expectedColors.push_back(tcu::Vec4(0.0, 0.0, 1.0, 1.0));
+	expectedColors.push_back(tcu::Vec4(1.0, 0.0, 1.0, 1.0));
+	expectedColors.push_back(tcu::Vec4(0.0, 1.0, 1.0, 1.0));
+	expectedColors.push_back(tcu::Vec4(1.0, 1.0, 0.0, 1.0));
+	const auto numWorkGroups = (m_shareVertices ? 1u : kNumLayers);
+	const IterationParams iterationParams =
+	{
+		getDefaultExtent(),						//	VkExtent2D		colorExtent;
+		kNumLayers,								//	uint32_t		numLayers;
+		getDefaultDrawCommands(numWorkGroups),	//	DrawCommandVec	drawArgs;
+		false,									//	bool			indirect;
+		{},										//	ViewportVec		viewports;	// If empty, a single default viewport is used.
+	};
+	return new FullScreenColorInstance(context, iterationParams, expectedColors);
+// ViewportIndex builtin case.
+class ViewportIndexCase : public MeshShaderBuiltinCase
+					ViewportIndexCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, bool shareVertices)
+						: MeshShaderBuiltinCase	(testCtx, name, description, false/*taskNeeded*/)
+						, m_shareVertices		(shareVertices)
+						{}
+	virtual			~ViewportIndexCase	(void) {}
+	void			initPrograms		(vk::SourceCollections& programCollection) const override;
+	void			checkSupport		(Context& context) const override;
+	TestInstance*	createInstance		(Context& context) const override;
+	static constexpr uint32_t kQuadrants = 4u;
+	const bool m_shareVertices;
+void ViewportIndexCase::initPrograms (vk::SourceCollections& programCollection) const
+	const auto localSize		= (m_shareVertices ? kQuadrants : 1u);
+	const auto numPrimitives	= (m_shareVertices ? kQuadrants : 1u);
+	const auto viewportIndex	= (m_shareVertices ? "gl_LocalInvocationIndex" : "gl_WorkGroupID.x");
+	// One viewport per local invocation or work group (sharing vertices or not, respectively).
+	{
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=" << localSize << ") in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=3, max_primitives=" << numPrimitives << ") out;\n"
+			<< "\n"
+			<< "perprimitiveNV out gl_MeshPerPrimitiveNV {\n"
+			<< "   int gl_ViewportIndex;\n"
+			<< "} gl_MeshPrimitivesNV[];\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = " << numPrimitives << ";\n"
+			<< "\n"
+			<< "    if (gl_LocalInvocationIndex == 0u)\n"
+			<< "    {\n"
+			<< "        gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
+			<< "        gl_MeshVerticesNV[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
+			<< "        gl_MeshVerticesNV[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
+			<< "    }\n"
+			<< "\n"
+			<< "    const uint baseIndex = gl_LocalInvocationIndex * 3u;\n"
+			<< "    gl_PrimitiveIndicesNV[baseIndex + 0] = 0;\n"
+			<< "    gl_PrimitiveIndicesNV[baseIndex + 1] = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[baseIndex + 2] = 2;\n"
+			<< "\n"
+			<< "    gl_MeshPrimitivesNV[gl_LocalInvocationIndex].gl_ViewportIndex = int(" << viewportIndex << ");\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	// Fragment shader chooses one color per viewport.
+	{
+		std::ostringstream frag;
+		frag
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (location=0) out vec4 outColor;\n"
+			<< "\n"
+			<< "vec4 colors[" << kQuadrants << "] = vec4[](\n"
+			<< "    vec4(0.0, 0.0, 1.0, 1.0),\n"
+			<< "    vec4(1.0, 0.0, 1.0, 1.0),\n"
+			<< "    vec4(0.0, 1.0, 1.0, 1.0),\n"
+			<< "    vec4(1.0, 1.0, 0.0, 1.0)\n"
+			<< ");\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    outColor = colors[gl_ViewportIndex];\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
+	}
+void ViewportIndexCase::checkSupport (Context& context) const
+	MeshShaderBuiltinCase::checkSupport(context);
+	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_MULTI_VIEWPORT);
+	if (!context.contextSupports(vk::ApiVersion(1u, 2u, 0u)))
+		context.requireDeviceFunctionality("VK_EXT_shader_viewport_index_layer");
+	else
+	{
+		const auto& features = context.getDeviceVulkan12Features();
+		if (!features.shaderOutputViewportIndex)
+			TCU_THROW(NotSupportedError, "shaderOutputViewportIndex feature not supported");
+	}
+TestInstance* ViewportIndexCase::createInstance (Context& context) const
+	const auto extent = getDefaultExtent();
+	DE_ASSERT(extent.width > 0u && extent.width % 2u == 0u);
+	DE_ASSERT(extent.height > 0u && extent.height % 2u == 0u);
+	const auto halfWidth	= static_cast<float>(extent.width / 2u);
+	const auto halfHeight	= static_cast<float>(extent.height / 2u);
+	const auto topLeft		= tcu::Vec4(0.0, 0.0, 1.0, 1.0);
+	const auto topRight		= tcu::Vec4(1.0, 0.0, 1.0, 1.0);
+	const auto bottomLeft	= tcu::Vec4(0.0, 1.0, 1.0, 1.0);
+	const auto bottomRight	= tcu::Vec4(1.0, 1.0, 0.0, 1.0);
+	ViewportVec viewports;
+	viewports.reserve(kQuadrants);
+	viewports.emplace_back(makeViewport(0.0f,		0.0f,		halfWidth, halfHeight, 0.0f, 1.0f));
+	viewports.emplace_back(makeViewport(halfWidth,	0.0f,		halfWidth, halfHeight, 0.0f, 1.0f));
+	viewports.emplace_back(makeViewport(0.0f,		halfHeight,	halfWidth, halfHeight, 0.0f, 1.0f));
+	viewports.emplace_back(makeViewport(halfWidth,	halfHeight,	halfWidth, halfHeight, 0.0f, 1.0f));
+	const auto numWorkGroups = (m_shareVertices ? 1u : kQuadrants);
+	const IterationParams iterationParams =
+	{
+		getDefaultExtent(),						//	VkExtent2D		colorExtent;
+		1u,										//	uint32_t		numLayers;
+		getDefaultDrawCommands(numWorkGroups),	//	DrawCommandVec	drawArgs;
+		false,									//	bool			indirect;
+		std::move(viewports),					//	ViewportVec		viewports;
+	};
+	return new QuadrantsInstance(context, iterationParams, topLeft, topRight, bottomLeft, bottomRight);
+// Position builtin case.
+class PositionCase : public MeshShaderBuiltinCase
+					PositionCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+						: MeshShaderBuiltinCase (testCtx, name, description, false/*taskNeeded*/)
+						{}
+	virtual			~PositionCase	(void) {}
+	void			initPrograms		(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance		(Context& context) const override;
+void PositionCase::initPrograms (vk::SourceCollections& programCollection) const
+	// Mesh shader: emit single triangle around the center of the top left pixel.
+	{
+		const auto extent	= getDefaultExtent();
+		const auto fWidth	= static_cast<float>(extent.width);
+		const auto fHeight	= static_cast<float>(extent.height);
+		const auto pxWidth	= 2.0f / fWidth;
+		const auto pxHeight = 2.0f / fHeight;
+		const auto halfXPix	= pxWidth / 2.0f;
+		const auto halfYPix	= pxHeight / 2.0f;
+		// Center of top left pixel.
+		const auto x		= -1.0f + halfXPix;
+		const auto y		= -1.0f + halfYPix;
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=3, max_primitives=1) out;\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = 1u;\n"
+			<< "\n"
+			<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
+			<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[2] = 2;\n"
+			<< "\n"
+			<< "    gl_MeshVerticesNV[0].gl_Position = vec4(" << (x - halfXPix) << ", " << (y + halfYPix) << ", 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[1].gl_Position = vec4(" << (x + halfXPix) << ", " << (y + halfYPix) << ", 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[2].gl_Position = vec4(" << x << ", " << (y - halfYPix) << ", 0.0, 1.0);\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	// Basic fragment shader.
+	{
+		const auto frag = getBasicFragShader();
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
+	}
+TestInstance* PositionCase::createInstance (Context& context) const
+	const IterationParams iterationParams =
+	{
+		getDefaultExtent(),			//	VkExtent2D		colorExtent;
+		1u,							//	uint32_t		numLayers;
+		getDefaultDrawCommands(),	//	DrawCommandVec	drawArgs;
+		false,						//	bool			indirect;
+		{},							//	ViewportVec		viewports;	// If empty, a single default viewport is used.
+	};
+	// Must match the shader.
+	PixelMap pixelMap;
+	pixelMap[tcu::IVec2(0, 0)] = tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f);
+	const PixelVerifierParams verifierParams =
+	{
+		tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f),	//	const tcu::Vec4		background;
+		std::move(pixelMap),				//	const PixelMap		pixelMap;
+	};
+	return new PixelsInstance(context, iterationParams, verifierParams);
+// PointSize builtin case.
+class PointSizeCase : public MeshShaderBuiltinCase
+					PointSizeCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+						: MeshShaderBuiltinCase (testCtx, name, description, false/*taskNeeded*/)
+						{}
+	virtual			~PointSizeCase	(void) {}
+	void			initPrograms		(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance		(Context& context) const override;
+	void			checkSupport		(Context& context) const override;
+	static constexpr float kPointSize = 4.0f;
+void PointSizeCase::initPrograms (vk::SourceCollections& programCollection) const
+	// Mesh shader: large point covering the top left quadrant.
+	{
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "layout (points) out;\n"
+			<< "layout (max_vertices=1, max_primitives=1) out;\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = 1u;\n"
+			<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
+			<< "    gl_MeshVerticesNV[0].gl_Position = vec4(-0.5, -0.5, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[0].gl_PointSize = " << kPointSize << ";\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	// Basic fragment shader.
+	{
+		const auto frag = getBasicFragShader();
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
+	}
+TestInstance* PointSizeCase::createInstance (Context& context) const
+	const IterationParams iterationParams =
+	{
+		getDefaultExtent(),			//	VkExtent2D		colorExtent;
+		1u,							//	uint32_t		numLayers;
+		getDefaultDrawCommands(),	//	DrawCommandVec	drawArgs;
+		false,						//	bool			indirect;
+		{},							//	ViewportVec		viewports;	// If empty, a single default viewport is used.
+	};
+	// Must match the shader.
+	const tcu::Vec4 black	(0.0f, 0.0f, 0.0f, 1.0f);
+	const tcu::Vec4 blue	(0.0f, 0.0f, 1.0f, 1.0f);
+	return new QuadrantsInstance(context, iterationParams, blue, black, black, black);
+void PointSizeCase::checkSupport (Context& context) const
+	MeshShaderBuiltinCase::checkSupport(context);
+	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_LARGE_POINTS);
+	const auto& properties = context.getDeviceProperties();
+	if (kPointSize < properties.limits.pointSizeRange[0] || kPointSize > properties.limits.pointSizeRange[1])
+		TCU_THROW(NotSupportedError, "Required point size outside point size range");
+// ClipDistance builtin case.
+class ClipDistanceCase : public MeshShaderBuiltinCase
+					ClipDistanceCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+						: MeshShaderBuiltinCase (testCtx, name, description, false/*taskNeeded*/)
+						{}
+	virtual			~ClipDistanceCase	(void) {}
+	void			initPrograms		(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance		(Context& context) const override;
+	void			checkSupport		(Context& context) const override;
+void ClipDistanceCase::initPrograms (vk::SourceCollections& programCollection) const
+	// Mesh shader: full-screen quad using different clip distances.
+	{
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=4, max_primitives=2) out;\n"
+			<< "\n"
+			<< "out gl_MeshPerVertexNV {\n"
+			<< "    vec4  gl_Position;\n"
+			<< "    float gl_ClipDistance[2];\n"
+			<< "} gl_MeshVerticesNV[];\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = 2u;\n"
+			<< "\n"
+			<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
+			<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[2] = 2;\n"
+			<< "    gl_PrimitiveIndicesNV[3] = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[4] = 3;\n"
+			<< "    gl_PrimitiveIndicesNV[5] = 2;\n"
+			<< "\n"
+			<< "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[1].gl_Position = vec4(-1.0,  1.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[2].gl_Position = vec4( 1.0, -1.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[3].gl_Position = vec4( 1.0,  1.0, 0.0, 1.0);\n"
+			<< "\n"
+			// The first clip plane keeps the left half of the frame buffer.
+			<< "    gl_MeshVerticesNV[0].gl_ClipDistance[0] =  1.0;\n"
+			<< "    gl_MeshVerticesNV[1].gl_ClipDistance[0] =  1.0;\n"
+			<< "    gl_MeshVerticesNV[2].gl_ClipDistance[0] = -1.0;\n"
+			<< "    gl_MeshVerticesNV[3].gl_ClipDistance[0] = -1.0;\n"
+			<< "\n"
+			// The second clip plane keeps the top half of the frame buffer.
+			<< "    gl_MeshVerticesNV[0].gl_ClipDistance[1] =  1.0;\n"
+			<< "    gl_MeshVerticesNV[1].gl_ClipDistance[1] = -1.0;\n"
+			<< "    gl_MeshVerticesNV[2].gl_ClipDistance[1] =  1.0;\n"
+			<< "    gl_MeshVerticesNV[3].gl_ClipDistance[1] = -1.0;\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	// Fragment shader chooses a constant color.
+	{
+		std::ostringstream frag;
+		frag
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (location=0) out vec4 outColor;\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			// White color should not actually be used, as those fragments are supposed to be discarded.
+			<< "    outColor = ((gl_ClipDistance[0] >= 0.0 && gl_ClipDistance[1] >= 0.0) ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(1.0, 1.0, 1.0, 1.0));\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
+	}
+TestInstance* ClipDistanceCase::createInstance (Context& context) const
+	const IterationParams iterationParams =
+	{
+		getDefaultExtent(),			//	VkExtent2D		colorExtent;
+		1u,							//	uint32_t		numLayers;
+		getDefaultDrawCommands(),	//	DrawCommandVec	drawArgs;
+		false,						//	bool			indirect;
+		{},							//	ViewportVec		viewports;	// If empty, a single default viewport is used.
+	};
+	// Must match the shader.
+	const tcu::Vec4 black	(0.0f, 0.0f, 0.0f, 1.0f);
+	const tcu::Vec4 blue	(0.0f, 0.0f, 1.0f, 1.0f);
+	return new QuadrantsInstance(context, iterationParams, blue, black, black, black);
+void ClipDistanceCase::checkSupport (Context& context) const
+	MeshShaderBuiltinCase::checkSupport(context);
+	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_CLIP_DISTANCE);
+// CullDistance builtin case.
+class CullDistanceCase : public MeshShaderBuiltinCase
+					CullDistanceCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+						: MeshShaderBuiltinCase (testCtx, name, description, false/*taskNeeded*/)
+						{}
+	virtual			~CullDistanceCase	(void) {}
+	void			initPrograms		(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance		(Context& context) const override;
+	void			checkSupport		(Context& context) const override;
+void CullDistanceCase::initPrograms (vk::SourceCollections& programCollection) const
+	// Mesh shader: two quads covering the whole screen, one on top of the other.
+	// Use cull distances to discard the bottom quad.
+	// Use cull distances to paint the top one in two colors: blue on the left, white on the right.
+	{
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=6, max_primitives=4) out;\n"
+			<< "\n"
+			<< "out gl_MeshPerVertexNV {\n"
+			<< "    vec4  gl_Position;\n"
+			<< "    float gl_CullDistance[2];\n"
+			<< "} gl_MeshVerticesNV[];\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = 4u;\n"
+			<< "\n"
+			<< "    gl_PrimitiveIndicesNV[0]  = 0;\n"
+			<< "    gl_PrimitiveIndicesNV[1]  = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[2]  = 3;\n"
+			<< "    gl_PrimitiveIndicesNV[3]  = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[4]  = 4;\n"
+			<< "    gl_PrimitiveIndicesNV[5]  = 3;\n"
+			<< "    gl_PrimitiveIndicesNV[6]  = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[7]  = 2;\n"
+			<< "    gl_PrimitiveIndicesNV[8]  = 4;\n"
+			<< "    gl_PrimitiveIndicesNV[9]  = 2;\n"
+			<< "    gl_PrimitiveIndicesNV[10] = 5;\n"
+			<< "    gl_PrimitiveIndicesNV[11] = 4;\n"
+			<< "\n"
+			<< "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[1].gl_Position = vec4(-1.0,  0.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[2].gl_Position = vec4(-1.0,  1.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[3].gl_Position = vec4( 1.0, -1.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[4].gl_Position = vec4( 1.0,  0.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[5].gl_Position = vec4( 1.0,  1.0, 0.0, 1.0);\n"
+			<< "\n"
+			// The first cull plane discards the bottom quad
+			<< "    gl_MeshVerticesNV[0].gl_CullDistance[0] =  1.0;\n"
+			<< "    gl_MeshVerticesNV[1].gl_CullDistance[0] = -1.0;\n"
+			<< "    gl_MeshVerticesNV[2].gl_CullDistance[0] = -2.0;\n"
+			<< "    gl_MeshVerticesNV[3].gl_CullDistance[0] =  1.0;\n"
+			<< "    gl_MeshVerticesNV[4].gl_CullDistance[0] = -1.0;\n"
+			<< "    gl_MeshVerticesNV[5].gl_CullDistance[0] = -2.0;\n"
+			<< "\n"
+			// The second cull plane helps paint left and right different.
+			<< "    gl_MeshVerticesNV[0].gl_CullDistance[1] =  1.0;\n"
+			<< "    gl_MeshVerticesNV[1].gl_CullDistance[1] =  1.0;\n"
+			<< "    gl_MeshVerticesNV[2].gl_CullDistance[1] =  1.0;\n"
+			<< "    gl_MeshVerticesNV[3].gl_CullDistance[1] = -1.0;\n"
+			<< "    gl_MeshVerticesNV[4].gl_CullDistance[1] = -1.0;\n"
+			<< "    gl_MeshVerticesNV[5].gl_CullDistance[1] = -1.0;\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	// Fragment shader chooses color based on the second cull distance.
+	{
+		std::ostringstream frag;
+		frag
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (location=0) out vec4 outColor;\n"
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    outColor = ((gl_CullDistance[1] >= 0.0) ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(1.0, 1.0, 1.0, 1.0));\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
+	}
+TestInstance* CullDistanceCase::createInstance (Context& context) const
+	const IterationParams iterationParams =
+	{
+		getDefaultExtent(),			//	VkExtent2D		colorExtent;
+		1u,							//	uint32_t		numLayers;
+		getDefaultDrawCommands(),	//	DrawCommandVec	drawArgs;
+		false,						//	bool			indirect;
+		{},							//	ViewportVec		viewports;	// If empty, a single default viewport is used.
+	};
+	// Must match the shader.
+	const tcu::Vec4 black	(0.0f, 0.0f, 0.0f, 1.0f);
+	const tcu::Vec4 blue	(0.0f, 0.0f, 1.0f, 1.0f);
+	const tcu::Vec4 white	(1.0f, 1.0f, 1.0f, 1.0f);
+	return new QuadrantsInstance(context, iterationParams, blue, white, black, black);
+void CullDistanceCase::checkSupport (Context& context) const
+	MeshShaderBuiltinCase::checkSupport(context);
+	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_CULL_DISTANCE);
+// Generates statements to draw a triangle around the given pixel number, knowing the framebuffer width (len).
+// Supposes the height of the framebuffer is 1.
+std::string triangleForPixel(const std::string& pixel, const std::string& len, const std::string& baseIndex)
+	std::ostringstream statements;
+	statements
+		<< "    const float imgWidth = float(" << len << ");\n"
+		<< "    const float pixWidth = (2.0 / imgWidth);\n"
+		<< "    const float halfPix  = (pixWidth / 2.0);\n"
+		<< "    const float xCenter  = (((float(" << pixel << ") + 0.5) / imgWidth) * 2.0 - 1.0);\n"
+		<< "    const float xLeft    = (xCenter - halfPix);\n"
+		<< "    const float xRight   = (xCenter + halfPix);\n"
+		<< "    const uvec3 indices  = uvec3(" << baseIndex << " + 0, " << baseIndex << " + 1, " << baseIndex << " + 2);\n"
+		<< "\n"
+		<< "    gl_PrimitiveIndicesNV[indices.x] = indices.x;\n"
+		<< "    gl_PrimitiveIndicesNV[indices.y] = indices.y;\n"
+		<< "    gl_PrimitiveIndicesNV[indices.z] = indices.z;\n"
+		<< "\n"
+		<< "    gl_MeshVerticesNV[indices.x].gl_Position = vec4(xLeft,    0.5, 0.0, 1.0);\n"
+		<< "    gl_MeshVerticesNV[indices.y].gl_Position = vec4(xRight,   0.5, 0.0, 1.0);\n"
+		<< "    gl_MeshVerticesNV[indices.z].gl_Position = vec4(xCenter, -0.5, 0.0, 1.0);\n"
+		;
+	return statements.str();
+// WorkGroupID builtin case.
+class WorkGroupIdCase : public MeshShaderBuiltinCase
+					WorkGroupIdCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, bool taskNeeded)
+						: MeshShaderBuiltinCase (testCtx, name, description, taskNeeded)
+						, m_extent				(getLinearExtent())
+						{}
+	virtual			~WorkGroupIdCase	(void) {}
+	void			initPrograms		(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance		(Context& context) const override;
+	const VkExtent2D m_extent;
+void WorkGroupIdCase::initPrograms (vk::SourceCollections& programCollection) const
+	const std::string taskDataDecl =
+		"taskNV TaskData {\n"
+		"    uint id;\n"
+		"    uint size;\n"
+		"} td;\n"
+		;
+	// Mesh shader: each work group fills one pixel.
+	{
+		const std::string pixel = (m_taskNeeded ? ""   : "gl_WorkGroupID.x"   );
+		const std::string len   = (m_taskNeeded ? "td.size" : de::toString(m_extent.width) );
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=3, max_primitives=1) out;\n"
+			<< "\n"
+			<< (m_taskNeeded ? ("in " + taskDataDecl) : "")
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = 1u;\n"
+			<< "\n"
+			<< triangleForPixel(pixel, len, "0")
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	if (m_taskNeeded)
+	{
+		std::ostringstream task;
+		task
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "\n"
+			<< "out " << taskDataDecl
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_TaskCountNV = 1u;\n"
+			<< "          = gl_WorkGroupID.x;\n"
+			<< "    td.size        = " << m_extent.width << ";\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	}
+	// Basic fragment shader.
+	{
+		const auto frag = getBasicFragShader();
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
+	}
+TestInstance* WorkGroupIdCase::createInstance (Context& context) const
+	// Must match the shader.
+	const ColorVec			expectedColors	(1u, tcu::Vec4(0.0, 0.0, 1.0, 1.0));
+	const IterationParams	iterationParams	=
+	{
+		m_extent,								//	VkExtent2D		colorExtent;
+		1u,										//	uint32_t		numLayers;
+		getDefaultDrawCommands(m_extent.width),	//	DrawCommandVec	drawArgs;
+		false,									//	bool			indirect;
+		{},										//	ViewportVec		viewports;	// If empty, a single default viewport is used.
+	};
+	return new FullScreenColorInstance(context, iterationParams, expectedColors);
+// Variable to use.
+enum class LocalInvocation { ID=0, INDEX };
+// LocalInvocationId and LocalInvocationIndex builtin cases. These are also used to test WorkGroupSize.
+class LocalInvocationCase : public MeshShaderBuiltinCase
+					LocalInvocationCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, bool taskNeeded, LocalInvocation variable)
+						: MeshShaderBuiltinCase (testCtx, name, description, taskNeeded)
+						, m_extent				(getLinearExtent())
+						, m_variable			(variable)
+						{}
+	virtual			~LocalInvocationCase	(void) {}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+	const VkExtent2D      m_extent;
+	const LocalInvocation m_variable;
+void LocalInvocationCase::initPrograms (vk::SourceCollections& programCollection) const
+	// Invocation index to use.
+	const std::string localIndex = ((m_variable == LocalInvocation::ID) ? "gl_LocalInvocationID.x" : "gl_LocalInvocationIndex");
+	// Task data.
+	std::ostringstream taskDataDecl;
+	taskDataDecl
+		<< "taskNV TaskData {\n"
+		// indexNumber[x] == x
+		<< "    uint indexNumber[" << m_extent.width << "];\n"
+		<< "    uint size;\n"
+		<< "} td;\n"
+		;
+	const auto taskDataDeclStr = taskDataDecl.str();
+	// Mesh shader: each work group fills one pixel.
+	{
+		const std::string pixel     = (m_taskNeeded ? "td.indexNumber[gl_WorkGroupID.x]" : localIndex);
+		const std::string len       = (m_taskNeeded ? "td.size" : "gl_WorkGroupSize.x");
+		const auto        localSize = (m_taskNeeded ? 1u : m_extent.width);
+		const auto        maxVert   = localSize * 3u;
+		const std::string baseIndex = (m_taskNeeded ? "0" : "(" + localIndex + " * 3u)");
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=" << localSize << ") in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=" << maxVert << ", max_primitives=" << localSize << ") out;\n"
+			<< "\n"
+			<< (m_taskNeeded ? ("in " + taskDataDeclStr) : "")
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = " << localSize << ";\n"
+			<< "\n"
+			<< triangleForPixel(pixel, len, baseIndex)
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	if (m_taskNeeded)
+	{
+		std::ostringstream task;
+		task
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=" << m_extent.width << ") in;\n"
+			<< "\n"
+			<< "out " << taskDataDeclStr
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_TaskCountNV = " << m_extent.width << ";\n"
+			<< "    td.indexNumber[" << localIndex << "] = " << localIndex << ";\n"
+			<< "    td.size = gl_WorkGroupSize.x;\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	}
+	// Basic fragment shader.
+	{
+		const auto frag = getBasicFragShader();
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
+	}
+TestInstance* LocalInvocationCase::createInstance (Context& context) const
+	// Must match the shader.
+	const ColorVec			expectedColors	(1u, tcu::Vec4(0.0, 0.0, 1.0, 1.0));
+	const IterationParams	iterationParams	=
+	{
+		m_extent,								//	VkExtent2D		colorExtent;
+		1u,										//	uint32_t		numLayers;
+		getDefaultDrawCommands(),				//	DrawCommandVec	drawArgs;
+		false,									//	bool			indirect;
+		{},										//	ViewportVec		viewports;	// If empty, a single default viewport is used.
+	};
+	return new FullScreenColorInstance(context, iterationParams, expectedColors);
+// GlobalInvocationId builtin case.
+class GlobalInvocationIdCase : public MeshShaderBuiltinCase
+					GlobalInvocationIdCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, bool taskNeeded)
+						: MeshShaderBuiltinCase (testCtx, name, description, taskNeeded)
+						, m_jobSize				(getLargeJobSize())
+						, m_extent				{m_jobSize.numTasks * m_jobSize.localSize, 1u}
+						{}
+	virtual			~GlobalInvocationIdCase		(void) {}
+	void			initPrograms				(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance				(Context& context) const override;
+	const JobSize    m_jobSize;
+	const VkExtent2D m_extent;
+void GlobalInvocationIdCase::initPrograms (vk::SourceCollections& programCollection) const
+	const auto& localSize = m_jobSize.localSize;
+	// Task data.
+	std::ostringstream taskDataDecl;
+	taskDataDecl
+		<< "taskNV TaskData {\n"
+		<< "    uint pixelId[" << localSize << "];\n"
+		<< "    uint size;\n"
+		<< "} td;\n"
+		;
+	const auto taskDataDeclStr = taskDataDecl.str();
+	// Mesh shader: each work group fills one pixel.
+	{
+		const std::string pixel     = (m_taskNeeded ? "td.pixelId[gl_LocalInvocationIndex]" : "gl_GlobalInvocationID.x");
+		const std::string len       = (m_taskNeeded ? "td.size" : de::toString(m_extent.width));
+		const std::string baseIndex = "(gl_LocalInvocationIndex * 3u)";
+		const auto        maxVert   = localSize * 3u;
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=" << localSize << ") in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=" << maxVert << ", max_primitives=" << localSize << ") out;\n"
+			<< "\n"
+			<< (m_taskNeeded ? ("in " + taskDataDeclStr) : "")
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = " << localSize << ";\n"
+			<< "\n"
+			<< triangleForPixel(pixel, len, baseIndex)
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	if (m_taskNeeded)
+	{
+		std::ostringstream task;
+		task
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=" << localSize << ") in;\n"
+			<< "\n"
+			<< "out " << taskDataDeclStr
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_TaskCountNV = 1;\n"
+			<< "    td.pixelId[gl_LocalInvocationIndex] = gl_GlobalInvocationID.x;\n"
+			<< "    td.size = " << m_extent.width << ";\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	}
+	// Basic fragment shader.
+	{
+		const auto frag = getBasicFragShader();
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
+	}
+TestInstance* GlobalInvocationIdCase::createInstance (Context& context) const
+	// Must match the shader.
+	const ColorVec			expectedColors	(1u, tcu::Vec4(0.0, 0.0, 1.0, 1.0));
+	const IterationParams	iterationParams	=
+	{
+		m_extent,									//	VkExtent2D		colorExtent;
+		1u,											//	uint32_t		numLayers;
+		getDefaultDrawCommands(m_jobSize.numTasks),	//	DrawCommandVec	drawArgs;
+		false,										//	bool			indirect;
+		{},											//	ViewportVec		viewports;	// If empty, a single default viewport is used.
+	};
+	return new FullScreenColorInstance(context, iterationParams, expectedColors);
+// DrawIndex builtin case.
+class DrawIndexCase : public MeshShaderBuiltinCase
+					DrawIndexCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, bool taskNeeded)
+						: MeshShaderBuiltinCase (testCtx, name, description, taskNeeded)
+						, m_extent				(getLinearExtent())
+						{}
+	virtual			~DrawIndexCase	(void) {}
+	void			initPrograms		(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance		(Context& context) const override;
+	const VkExtent2D m_extent;
+void DrawIndexCase::initPrograms (vk::SourceCollections& programCollection) const
+	const std::string taskDataDecl =
+		"taskNV TaskData {\n"
+		"    uint id;\n"
+		"    uint size;\n"
+		"} td;\n"
+		;
+	const auto drawIndex = "uint(gl_DrawID)";
+	// Mesh shader: each work group fills one pixel.
+	{
+		const std::string pixel = (m_taskNeeded ? ""   : drawIndex);
+		const std::string len   = (m_taskNeeded ? "td.size" : de::toString(m_extent.width));
+		std::ostringstream mesh;
+		mesh
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "layout (triangles) out;\n"
+			<< "layout (max_vertices=3, max_primitives=1) out;\n"
+			<< "\n"
+			<< (m_taskNeeded ? ("in " + taskDataDecl) : "")
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = 1u;\n"
+			<< "\n"
+			<< triangleForPixel(pixel, len, "0")
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	if (m_taskNeeded)
+	{
+		std::ostringstream task;
+		task
+			<< "#version 460\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "\n"
+			<< "out " << taskDataDecl
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_TaskCountNV = 1u;\n"
+			<< "          = " << drawIndex << ";\n"
+			<< "    td.size        = " << m_extent.width << ";\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	}
+	// Basic fragment shader.
+	{
+		const auto frag = getBasicFragShader();
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
+	}
+TestInstance* DrawIndexCase::createInstance (Context& context) const
+	// Must match the shader.
+	const ColorVec			expectedColors	(1u, tcu::Vec4(0.0, 0.0, 1.0, 1.0));
+	const DrawCommandVec	commands		(m_extent.width, makeDrawMeshTasksIndirectCommandNV(1u, 0u));
+	const IterationParams	iterationParams	=
+	{
+		m_extent,	//	VkExtent2D		colorExtent;
+		1u,			//	uint32_t		numLayers;
+		commands,	//	DrawCommandVec	drawArgs;
+		true,		//	bool			indirect;
+		{},			//	ViewportVec		viewports;	// If empty, a single default viewport is used.
+	};
+	return new FullScreenColorInstance(context, iterationParams, expectedColors);
+} // anonymous
+tcu::TestCaseGroup* createMeshShaderBuiltinTests (tcu::TestContext& testCtx)
+	GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "builtin", "Mesh Shader Builtin Tests"));
+	mainGroup->addChild(new PositionCase				(testCtx, "position", ""));
+	mainGroup->addChild(new PointSizeCase				(testCtx, "point_size", ""));
+	mainGroup->addChild(new ClipDistanceCase			(testCtx, "clip_distance", ""));
+	mainGroup->addChild(new CullDistanceCase			(testCtx, "cull_distance", ""));
+	mainGroup->addChild(new PrimitiveIdCase				(testCtx, "primitive_id_glsl", "", true/*glslFrag*/));
+	mainGroup->addChild(new PrimitiveIdCase				(testCtx, "primitive_id_spirv", "", false/*glslFrag*/));
+	mainGroup->addChild(new LayerCase					(testCtx, "layer", "", false/*shareVertices*/));
+	mainGroup->addChild(new LayerCase					(testCtx, "layer_shared", "", true/*shareVertices*/));
+	mainGroup->addChild(new ViewportIndexCase			(testCtx, "viewport_index", "", false/*shareVertices*/));
+	mainGroup->addChild(new ViewportIndexCase			(testCtx, "viewport_index_shared", "", true/*shareVertices*/));
+	mainGroup->addChild(new WorkGroupIdCase				(testCtx, "work_group_id_in_mesh", "", false/*taskNeeded*/));
+	mainGroup->addChild(new WorkGroupIdCase				(testCtx, "work_group_id_in_task", "", true/*taskNeeded*/));
+	mainGroup->addChild(new LocalInvocationCase			(testCtx, "local_invocation_id_in_mesh", "", false/*taskNeeded*/, LocalInvocation::ID));
+	mainGroup->addChild(new LocalInvocationCase			(testCtx, "local_invocation_id_in_task", "", true/*taskNeeded*/, LocalInvocation::ID));
+	mainGroup->addChild(new LocalInvocationCase			(testCtx, "local_invocation_index_in_task", "", true/*taskNeeded*/, LocalInvocation::INDEX));
+	mainGroup->addChild(new LocalInvocationCase			(testCtx, "local_invocation_index_in_mesh", "", false/*taskNeeded*/, LocalInvocation::INDEX));
+	mainGroup->addChild(new GlobalInvocationIdCase		(testCtx, "global_invocation_id_in_mesh", "", false/*taskNeeded*/));
+	mainGroup->addChild(new GlobalInvocationIdCase		(testCtx, "global_invocation_id_in_task", "", true/*taskNeeded*/));
+	mainGroup->addChild(new DrawIndexCase				(testCtx, "draw_index_in_mesh", "", false/*taskNeeded*/));
+	mainGroup->addChild(new DrawIndexCase				(testCtx, "draw_index_in_task", "", true/*taskNeeded*/));
+	return mainGroup.release();
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderBuiltinTests.hpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderBuiltinTests.hpp
new file mode 100644
index 0000000..f389c6f
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderBuiltinTests.hpp
@@ -0,0 +1,38 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Builtin Tests
+ *//*--------------------------------------------------------------------*/
+#include "tcuDefs.hpp"
+#include "tcuTestCase.hpp"
+namespace vkt
+namespace MeshShader
+tcu::TestCaseGroup* createMeshShaderBuiltinTests (tcu::TestContext& testCtx);
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderMiscTests.cpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderMiscTests.cpp
new file mode 100644
index 0000000..05efdf1
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderMiscTests.cpp
@@ -0,0 +1,2788 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Misc Tests
+ *//*--------------------------------------------------------------------*/
+#include "vktMeshShaderMiscTests.hpp"
+#include "vktTestCase.hpp"
+#include "vkBuilderUtil.hpp"
+#include "vkImageWithMemory.hpp"
+#include "vkBufferWithMemory.hpp"
+#include "vkObjUtil.hpp"
+#include "vkTypeUtil.hpp"
+#include "vkCmdUtil.hpp"
+#include "vkImageUtil.hpp"
+#include "vkBarrierUtil.hpp"
+#include "tcuImageCompare.hpp"
+#include "tcuTexture.hpp"
+#include "tcuTextureUtil.hpp"
+#include "tcuMaybe.hpp"
+#include "tcuStringTemplate.hpp"
+#include "tcuTestLog.hpp"
+#include <memory>
+#include <utility>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <map>
+namespace vkt
+namespace MeshShader
+using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
+using namespace vk;
+// Output images will use this format.
+VkFormat getOutputFormat ()
+	return VK_FORMAT_R8G8B8A8_UNORM;
+// Threshold that's reasonable for the previous format.
+float getCompareThreshold ()
+	return 0.005f; // 1/256 < 0.005 < 2/256
+// Check mesh shader support.
+void genericCheckSupport (Context& context, bool requireTaskShader, bool requireVertexStores)
+	context.requireDeviceFunctionality("VK_NV_mesh_shader");
+	const auto& meshFeatures = context.getMeshShaderFeatures();
+	if (!meshFeatures.meshShader)
+		TCU_THROW(NotSupportedError, "Mesh shader not supported");
+	if (requireTaskShader && !meshFeatures.taskShader)
+		TCU_THROW(NotSupportedError, "Task shader not supported");
+	if (requireVertexStores)
+	{
+		const auto& features = context.getDeviceFeatures();
+		if (!features.vertexPipelineStoresAndAtomics)
+			TCU_THROW(NotSupportedError, "Vertex pieline stores and atomics not supported");
+	}
+struct MiscTestParams
+	tcu::Maybe<uint32_t>	taskCount;
+	uint32_t				meshCount;
+	uint32_t				width;
+	uint32_t				height;
+	// Makes the class polymorphic and allows the right destructor to be used for subclasses.
+	virtual ~MiscTestParams () {}
+	bool needsTaskShader () const
+	{
+		return static_cast<bool>(taskCount);
+	}
+	uint32_t drawCount () const
+	{
+		if (needsTaskShader())
+			return taskCount.get();
+		return meshCount;
+	}
+using ParamsPtr = std::unique_ptr<MiscTestParams>;
+class MeshShaderMiscCase : public vkt::TestCase
+					MeshShaderMiscCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params);
+	virtual			~MeshShaderMiscCase		(void) {}
+	void			checkSupport			(Context& context) const override;
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	std::unique_ptr<MiscTestParams> m_params;
+MeshShaderMiscCase::MeshShaderMiscCase (tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+	: vkt::TestCase	(testCtx, name, description)
+	, m_params		(params.release())
+void MeshShaderMiscCase::checkSupport (Context& context) const
+	genericCheckSupport(context, m_params->needsTaskShader(), /*requireVertexStores*/false);
+// Adds the generic fragment shader. To be called by subclasses.
+void MeshShaderMiscCase::initPrograms (vk::SourceCollections& programCollection) const
+	std::string frag =
+		"#version 450\n"
+		"#extension GL_NV_mesh_shader : enable\n"
+		"\n"
+		"layout (location=0) in perprimitiveNV vec4 primitiveColor;\n"
+		"layout (location=0) out vec4 outColor;\n"
+		"\n"
+		"void main ()\n"
+		"{\n"
+		"    outColor = primitiveColor;\n"
+		"}\n"
+		;
+	programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
+class MeshShaderMiscInstance : public vkt::TestInstance
+					MeshShaderMiscInstance	(Context& context, const MiscTestParams* params)
+						: vkt::TestInstance	(context)
+						, m_params			(params)
+						, m_referenceLevel	()
+					{
+					}
+	void			generateSolidRefLevel	(const tcu::Vec4& color, std::unique_ptr<tcu::TextureLevel>& output);
+	virtual void	generateReferenceLevel	() = 0;
+	virtual bool	verifyResult			(const tcu::ConstPixelBufferAccess& resultAccess, const tcu::TextureLevel& referenceLevel) const;
+	virtual bool	verifyResult			(const tcu::ConstPixelBufferAccess& resultAccess) const;
+	tcu::TestStatus	iterate					() override;
+	const MiscTestParams*				m_params;
+	std::unique_ptr<tcu::TextureLevel>	m_referenceLevel;
+void MeshShaderMiscInstance::generateSolidRefLevel (const tcu::Vec4& color, std::unique_ptr<tcu::TextureLevel>& output)
+	const auto format		= getOutputFormat();
+	const auto tcuFormat	= mapVkFormat(format);
+	const auto iWidth		= static_cast<int>(m_params->width);
+	const auto iHeight		= static_cast<int>(m_params->height);
+	output.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
+	const auto access		= output->getAccess();
+	// Fill with solid color.
+	tcu::clear(access, color);
+bool MeshShaderMiscInstance::verifyResult (const tcu::ConstPixelBufferAccess& resultAccess) const
+	return verifyResult(resultAccess, *m_referenceLevel);
+bool MeshShaderMiscInstance::verifyResult (const tcu::ConstPixelBufferAccess& resultAccess, const tcu::TextureLevel& referenceLevel) const
+	const auto referenceAccess = referenceLevel.getAccess();
+	const auto refWidth		= referenceAccess.getWidth();
+	const auto refHeight	= referenceAccess.getHeight();
+	const auto refDepth		= referenceAccess.getDepth();
+	const auto resWidth		= resultAccess.getWidth();
+	const auto resHeight	= resultAccess.getHeight();
+	const auto resDepth		= resultAccess.getDepth();
+	DE_ASSERT(resWidth == refWidth || resHeight == refHeight || resDepth == refDepth);
+	// For release builds.
+	DE_UNREF(refWidth);
+	DE_UNREF(refHeight);
+	DE_UNREF(refDepth);
+	DE_UNREF(resWidth);
+	DE_UNREF(resHeight);
+	DE_UNREF(resDepth);
+	const auto outputFormat		= getOutputFormat();
+	const auto expectedFormat	= mapVkFormat(outputFormat);
+	const auto resFormat		= resultAccess.getFormat();
+	const auto refFormat		= referenceAccess.getFormat();
+	DE_ASSERT(resFormat == expectedFormat && refFormat == expectedFormat);
+	// For release builds
+	DE_UNREF(expectedFormat);
+	DE_UNREF(resFormat);
+	DE_UNREF(refFormat);
+	auto&			log				= m_context.getTestContext().getLog();
+	const auto		threshold		= getCompareThreshold();
+	const tcu::Vec4	thresholdVec	(threshold, threshold, threshold, threshold);
+	return tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, thresholdVec, tcu::COMPARE_LOG_ON_ERROR);
+tcu::TestStatus MeshShaderMiscInstance::iterate ()
+	const auto&		vkd			= m_context.getDeviceInterface();
+	const auto		device		= m_context.getDevice();
+	auto&			alloc		= m_context.getDefaultAllocator();
+	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
+	const auto		queue		= m_context.getUniversalQueue();
+	const auto		imageFormat	= getOutputFormat();
+	const auto		tcuFormat	= mapVkFormat(imageFormat);
+	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
+	const VkImageCreateInfo colorBufferInfo =
+	{
+		nullptr,								//	const void*				pNext;
+		0u,										//	VkImageCreateFlags		flags;
+		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
+		imageFormat,							//	VkFormat				format;
+		imageExtent,							//	VkExtent3D				extent;
+		1u,										//	uint32_t				mipLevels;
+		1u,										//	uint32_t				arrayLayers;
+		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
+		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
+		imageUsage,								//	VkImageUsageFlags		usage;
+		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
+		0u,										//	uint32_t				queueFamilyIndexCount;
+		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
+		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
+	};
+	// Create color image and view.
+	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
+	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
+	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
+	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
+	// Create a memory buffer for verification.
+	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
+	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
+	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
+	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
+	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
+	// Pipeline layout.
+	const auto pipelineLayout = makePipelineLayout(vkd, device);
+	// Shader modules.
+	const auto&	binaries	= m_context.getBinaryCollection();
+	const auto	hasTask		= binaries.contains("task");
+	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
+	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
+	Move<VkShaderModule> taskShader;
+	if (hasTask)
+		taskShader = createShaderModule(vkd, device, binaries.get("task"));
+	// Render pass.
+	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
+	// Framebuffer.
+	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
+	// Viewport and scissor.
+	const std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
+	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(imageExtent));
+	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
+		taskShader.get(), meshShader.get(), fragShader.get(),
+		renderPass.get(), viewports, scissors);
+	// Command pool and buffer.
+	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
+	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+	const auto cmdBuffer	= cmdBufferPtr.get();
+	beginCommandBuffer(vkd, cmdBuffer);
+	// Run pipeline.
+	const tcu::Vec4 clearColor (0.0f, 0.0f, 0.0f, 0.0f);
+	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(),, clearColor);
+	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
+	vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params->drawCount(), 0u);
+	endRenderPass(vkd, cmdBuffer);
+	// Copy color buffer to verification buffer.
+	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
+	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
+	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
+	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
+	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
+	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
+	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
+	endCommandBuffer(vkd, cmdBuffer);
+	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
+	// Generate reference image and compare results.
+	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
+	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
+	generateReferenceLevel();
+	invalidateAlloc(vkd, device, verificationBufferAlloc);
+	if (!verifyResult(verificationAccess))
+		TCU_FAIL("Result does not match reference; check log for details");
+	return tcu::TestStatus::pass("Pass");
+// Verify passing more complex data between the task and mesh shaders.
+class ComplexTaskDataCase : public MeshShaderMiscCase
+					ComplexTaskDataCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+class ComplexTaskDataInstance : public MeshShaderMiscInstance
+	ComplexTaskDataInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+void ComplexTaskDataInstance::generateReferenceLevel ()
+	const auto format		= getOutputFormat();
+	const auto tcuFormat	= mapVkFormat(format);
+	const auto iWidth		= static_cast<int>(m_params->width);
+	const auto iHeight		= static_cast<int>(m_params->height);
+	const auto halfWidth	= iWidth / 2;
+	const auto halfHeight	= iHeight / 2;
+	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
+	const auto access		= m_referenceLevel->getAccess();
+	// Each image quadrant gets a different color.
+	for (int y = 0; y < iHeight; ++y)
+	for (int x = 0; x < iWidth; ++x)
+	{
+		const float	red			= ((y < halfHeight) ? 0.0f : 1.0f);
+		const float	green		= ((x < halfWidth)  ? 0.0f : 1.0f);
+		const auto	refColor	= tcu::Vec4(red, green, 1.0f, 1.0f);
+		access.setPixel(refColor, x, y);
+	}
+void ComplexTaskDataCase::initPrograms (vk::SourceCollections& programCollection) const
+	// Add the generic fragment shader.
+	MeshShaderMiscCase::initPrograms(programCollection);
+	const std::string taskDataDeclTemplate =
+		"struct RowId {\n"
+		"    uint id;\n"
+		"};\n"
+		"\n"
+		"struct WorkGroupData {\n"
+		"    float WorkGroupIdPlusOnex1000Iota[10];\n"
+		"    RowId rowId;\n"
+		"    uvec3 WorkGroupIdPlusOnex2000Iota;\n"
+		"    vec2  WorkGroupIdPlusOnex3000Iota;\n"
+		"};\n"
+		"\n"
+		"struct ExternalData {\n"
+		"    float OneMillion;\n"
+		"    uint  TwoMillion;\n"
+		"    WorkGroupData workGroupData;\n"
+		"};\n"
+		"\n"
+		"${INOUT} taskNV TaskData {\n"
+		"    uint yes;\n"
+		"    ExternalData externalData;\n"
+		"} td;\n"
+		;
+	const tcu::StringTemplate taskDataDecl(taskDataDeclTemplate);
+	{
+		std::map<std::string, std::string> taskMap;
+		taskMap["INOUT"] = "out";
+		std::ostringstream task;
+		task
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=1) in;\n"
+			<< "\n"
+			<< taskDataDecl.specialize(taskMap)
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_TaskCountNV = 2u;\n"
+			<< "    td.yes = 1u;\n"
+			<< "    td.externalData.OneMillion = 1000000.0;\n"
+			<< "    td.externalData.TwoMillion = 2000000u;\n"
+			<< "    for (uint i = 0; i < 10; i++) {\n"
+			<< "        td.externalData.workGroupData.WorkGroupIdPlusOnex1000Iota[i] = float((gl_WorkGroupID.x + 1u) * 1000 + i);\n"
+			<< "    }\n"
+			<< "    {\n"
+			<< "        uint baseVal = (gl_WorkGroupID.x + 1u) * 2000;\n"
+			<< "        td.externalData.workGroupData.WorkGroupIdPlusOnex2000Iota = uvec3(baseVal, baseVal + 1, baseVal + 2);\n"
+			<< "    }\n"
+			<< "    {\n"
+			<< "        uint baseVal = (gl_WorkGroupID.x + 1u) * 3000;\n"
+			<< "        td.externalData.workGroupData.WorkGroupIdPlusOnex3000Iota = vec2(baseVal, baseVal + 1);\n"
+			<< "    }\n"
+			<< " = gl_WorkGroupID.x;\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	}
+	{
+		std::map<std::string, std::string> meshMap;
+		meshMap["INOUT"] = "in";
+		std::ostringstream mesh;
+		mesh
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout(local_size_x=2) in;\n"
+			<< "layout(triangles) out;\n"
+			<< "layout(max_vertices=4, max_primitives=2) out;\n"
+			<< "\n"
+			<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
+			<< "\n"
+			<< taskDataDecl.specialize(meshMap)
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    bool dataOK = true;\n"
+			<< "    dataOK = (dataOK && (td.yes == 1u));\n"
+			<< "    dataOK = (dataOK && (td.externalData.OneMillion == 1000000.0 && td.externalData.TwoMillion == 2000000u));\n"
+			<< "    uint rowId =;\n"
+			<< "    dataOK = (dataOK && (rowId == 0u || rowId == 1u));\n"
+			<< "\n"
+			<< "    {\n"
+			<< "        uint baseVal = (rowId + 1u) * 1000u;\n"
+			<< "        for (uint i = 0; i < 10; i++) {\n"
+			<< "            if (td.externalData.workGroupData.WorkGroupIdPlusOnex1000Iota[i] != float(baseVal + i)) {\n"
+			<< "                dataOK = false;\n"
+			<< "                break;\n"
+			<< "            }\n"
+			<< "        }\n"
+			<< "    }\n"
+			<< "\n"
+			<< "    {\n"
+			<< "        uint baseVal = (rowId + 1u) * 2000;\n"
+			<< "        uvec3 expected = uvec3(baseVal, baseVal + 1, baseVal + 2);\n"
+			<< "        if (td.externalData.workGroupData.WorkGroupIdPlusOnex2000Iota != expected) {\n"
+			<< "            dataOK = false;\n"
+			<< "        }\n"
+			<< "    }\n"
+			<< "\n"
+			<< "    {\n"
+			<< "        uint baseVal = (rowId + 1u) * 3000;\n"
+			<< "        vec2 expected = vec2(baseVal, baseVal + 1);\n"
+			<< "        if (td.externalData.workGroupData.WorkGroupIdPlusOnex3000Iota != expected) {\n"
+			<< "            dataOK = false;\n"
+			<< "        }\n"
+			<< "    }\n"
+			<< "\n"
+			<< "    uint columnId = gl_WorkGroupID.x;\n"
+			<< "\n"
+			<< "    if (dataOK) {\n"
+			<< "        gl_PrimitiveCountNV = 2u;\n"
+			<< "    }\n"
+			<< "    else {\n"
+			<< "        gl_PrimitiveCountNV = 0u;\n"
+			<< "        return;\n"
+			<< "    }\n"
+			<< "\n"
+			<< "    const vec4 outColor = vec4(rowId, columnId, 1.0f, 1.0f);\n"
+			<< "    triangleColor[0] = outColor;\n"
+			<< "    triangleColor[1] = outColor;\n"
+			<< "\n"
+			<< "    // Each local invocation will generate two points and one triangle from the quad.\n"
+			<< "    // The first local invocation will generate the top quad vertices.\n"
+			<< "    // The second invocation will generate the two bottom vertices.\n"
+			<< "    vec4 left  = vec4(0.0, 0.0, 0.0, 1.0);\n"
+			<< "    vec4 right = vec4(1.0, 0.0, 0.0, 1.0);\n"
+			<< "\n"
+			<< "    float localInvocationOffsetY = float(gl_LocalInvocationID.x);\n"
+			<< "    left.y  += localInvocationOffsetY;\n"
+			<< "    right.y += localInvocationOffsetY;\n"
+			<< "\n"
+			<< "    // The code above creates a quad from (0, 0) to (1, 1) but we need to offset it\n"
+			<< "    // in X and/or Y depending on the row and column, to place it in other quadrants.\n"
+			<< "    float quadrantOffsetX = float(int(columnId) - 1);\n"
+			<< "    float quadrantOffsetY = float(int(rowId) - 1);\n"
+			<< "\n"
+			<< "    left.x  += quadrantOffsetX;\n"
+			<< "    right.x += quadrantOffsetX;\n"
+			<< "\n"
+			<< "    left.y  += quadrantOffsetY;\n"
+			<< "    right.y += quadrantOffsetY;\n"
+			<< "\n"
+			<< "    uint baseVertexId = 2*gl_LocalInvocationID.x;\n"
+			<< "    gl_MeshVerticesNV[baseVertexId + 0].gl_Position = left;\n"
+			<< "    gl_MeshVerticesNV[baseVertexId + 1].gl_Position = right;\n"
+			<< "\n"
+			<< "    uint baseIndexId = 3*gl_LocalInvocationID.x;\n"
+			<< "    // 0,1,2 or 1,2,3 (note: triangles alternate front face this way)\n"
+			<< "    gl_PrimitiveIndicesNV[baseIndexId + 0] = 0 + gl_LocalInvocationID.x;\n"
+			<< "    gl_PrimitiveIndicesNV[baseIndexId + 1] = 1 + gl_LocalInvocationID.x;\n"
+			<< "    gl_PrimitiveIndicesNV[baseIndexId + 2] = 2 + gl_LocalInvocationID.x;\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+TestInstance* ComplexTaskDataCase::createInstance (Context& context) const
+	return new ComplexTaskDataInstance(context, m_params.get());
+// Verify drawing a single point.
+class SinglePointCase : public MeshShaderMiscCase
+					SinglePointCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+class SinglePointInstance : public MeshShaderMiscInstance
+	SinglePointInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+TestInstance* SinglePointCase::createInstance (Context& context) const
+	return new SinglePointInstance (context, m_params.get());
+void SinglePointCase::initPrograms (vk::SourceCollections& programCollection) const
+	DE_ASSERT(!m_params->needsTaskShader());
+	MeshShaderMiscCase::initPrograms(programCollection);
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout(local_size_x=1) in;\n"
+		<< "layout(points) out;\n"
+		<< "layout(max_vertices=256, max_primitives=256) out;\n"
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    gl_PrimitiveCountNV = 1u;\n"
+		<< "    pointColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
+		<< "    gl_MeshVerticesNV[0].gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n"
+		<< "    gl_MeshVerticesNV[0].gl_PointSize = 1.0f;\n"
+		<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
+		<< "}\n"
+		;
+	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+void SinglePointInstance::generateReferenceLevel ()
+	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
+	const auto halfWidth	= static_cast<int>(m_params->width / 2u);
+	const auto halfHeight	= static_cast<int>(m_params->height / 2u);
+	const auto access		= m_referenceLevel->getAccess();
+	access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), halfWidth, halfHeight);
+// Verify drawing a single line.
+class SingleLineCase : public MeshShaderMiscCase
+					SingleLineCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+class SingleLineInstance : public MeshShaderMiscInstance
+	SingleLineInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+TestInstance* SingleLineCase::createInstance (Context& context) const
+	return new SingleLineInstance (context, m_params.get());
+void SingleLineCase::initPrograms (vk::SourceCollections& programCollection) const
+	DE_ASSERT(!m_params->needsTaskShader());
+	MeshShaderMiscCase::initPrograms(programCollection);
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout(local_size_x=1) in;\n"
+		<< "layout(lines) out;\n"
+		<< "layout(max_vertices=256, max_primitives=256) out;\n"
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 lineColor[];\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    gl_PrimitiveCountNV = 1u;\n"
+		<< "    lineColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
+		<< "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0f, 0.0f, 0.0f, 1.0f);\n"
+		<< "    gl_MeshVerticesNV[1].gl_Position = vec4( 1.0f, 0.0f, 0.0f, 1.0f);\n"
+		<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
+		<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
+		<< "}\n"
+		;
+	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+void SingleLineInstance::generateReferenceLevel ()
+	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
+	const auto iWidth		= static_cast<int>(m_params->width);
+	const auto halfHeight	= static_cast<int>(m_params->height / 2u);
+	const auto access		= m_referenceLevel->getAccess();
+	// Center row.
+	for (int x = 0; x < iWidth; ++x)
+		access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), x, halfHeight);
+// Verify drawing a single triangle.
+class SingleTriangleCase : public MeshShaderMiscCase
+					SingleTriangleCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+class SingleTriangleInstance : public MeshShaderMiscInstance
+	SingleTriangleInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+TestInstance* SingleTriangleCase::createInstance (Context& context) const
+	return new SingleTriangleInstance (context, m_params.get());
+void SingleTriangleCase::initPrograms (vk::SourceCollections& programCollection) const
+	DE_ASSERT(!m_params->needsTaskShader());
+	MeshShaderMiscCase::initPrograms(programCollection);
+	const float halfPixelX = 2.0f / static_cast<float>(m_params->width);
+	const float halfPixelY = 2.0f / static_cast<float>(m_params->height);
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout(local_size_x=1) in;\n"
+		<< "layout(triangles) out;\n"
+		<< "layout(max_vertices=256, max_primitives=256) out;\n"
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    gl_PrimitiveCountNV = 1u;\n"
+		<< "    triangleColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
+		<< "    gl_MeshVerticesNV[0].gl_Position = vec4(" <<  halfPixelY << ", " << -halfPixelX << ", 0.0f, 1.0f);\n"
+		<< "    gl_MeshVerticesNV[1].gl_Position = vec4(" <<  halfPixelY << ", " <<  halfPixelX << ", 0.0f, 1.0f);\n"
+		<< "    gl_MeshVerticesNV[2].gl_Position = vec4(" << -halfPixelY << ", 0.0f, 0.0f, 1.0f);\n"
+		<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
+		<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
+		<< "    gl_PrimitiveIndicesNV[2] = 2;\n"
+		<< "}\n"
+		;
+	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+void SingleTriangleInstance::generateReferenceLevel ()
+	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
+	const auto halfWidth	= static_cast<int>(m_params->width / 2u);
+	const auto halfHeight	= static_cast<int>(m_params->height / 2u);
+	const auto access		= m_referenceLevel->getAccess();
+	// Single pixel in the center.
+	access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), halfWidth, halfHeight);
+// Verify drawing the maximum number of points.
+class MaxPointsCase : public MeshShaderMiscCase
+					MaxPointsCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+class MaxPointsInstance : public MeshShaderMiscInstance
+	MaxPointsInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+TestInstance* MaxPointsCase::createInstance (Context& context) const
+	return new MaxPointsInstance (context, m_params.get());
+void MaxPointsCase::initPrograms (vk::SourceCollections& programCollection) const
+	DE_ASSERT(!m_params->needsTaskShader());
+	MeshShaderMiscCase::initPrograms(programCollection);
+	// Fill a 16x16 image with 256 points. Each of the 32 local invocations will handle a segment of 8 pixels. Two segments per row.
+	DE_ASSERT(m_params->width == 16u && m_params->height == 16u);
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout(local_size_x=32) in;\n"
+		<< "layout(points) out;\n"
+		<< "layout(max_vertices=256, max_primitives=256) out;\n"
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    gl_PrimitiveCountNV = 256u;\n"
+		<< "    uint firstPixel = 8u * gl_LocalInvocationID.x;\n"
+		<< "    uint row = firstPixel / 16u;\n"
+		<< "    uint col = firstPixel % 16u;\n"
+		<< "    float pixSize = 2.0f / 16.0f;\n"
+		<< "    float yCoord = pixSize * (float(row) + 0.5f) - 1.0f;\n"
+		<< "    float baseXCoord = pixSize * (float(col) + 0.5f) - 1.0f;\n"
+		<< "    for (uint i = 0; i < 8u; i++) {\n"
+		<< "        float xCoord = baseXCoord + pixSize * float(i);\n"
+		<< "        uint pixId = firstPixel + i;\n"
+		<< "        gl_MeshVerticesNV[pixId].gl_Position = vec4(xCoord, yCoord, 0.0f, 1.0f);\n"
+		<< "        gl_MeshVerticesNV[pixId].gl_PointSize = 1.0f;\n"
+		<< "        gl_PrimitiveIndicesNV[pixId] = pixId;\n"
+		<< "        pointColor[pixId] = vec4(((xCoord + 1.0f) / 2.0f), ((yCoord + 1.0f) / 2.0f), 0.0f, 1.0f);\n"
+		<< "    }\n"
+		<< "}\n"
+		;
+	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+void MaxPointsInstance::generateReferenceLevel ()
+	const auto format		= getOutputFormat();
+	const auto tcuFormat	= mapVkFormat(format);
+	const auto iWidth		= static_cast<int>(m_params->width);
+	const auto iHeight		= static_cast<int>(m_params->height);
+	const auto fWidth		= static_cast<float>(m_params->width);
+	const auto fHeight		= static_cast<float>(m_params->height);
+	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
+	const auto access		= m_referenceLevel->getAccess();
+	// Fill with gradient like the shader does.
+	for (int y = 0; y < iHeight; ++y)
+	for (int x = 0; x < iWidth; ++x)
+	{
+		const tcu::Vec4 color (
+			((static_cast<float>(x) + 0.5f) / fWidth),
+			((static_cast<float>(y) + 0.5f) / fHeight),
+			0.0f, 1.0f);
+		access.setPixel(color, x, y);
+	}
+// Verify drawing the maximum number of lines.
+class MaxLinesCase : public MeshShaderMiscCase
+					MaxLinesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+class MaxLinesInstance : public MeshShaderMiscInstance
+	MaxLinesInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+TestInstance* MaxLinesCase::createInstance (Context& context) const
+	return new MaxLinesInstance (context, m_params.get());
+void MaxLinesCase::initPrograms (vk::SourceCollections& programCollection) const
+	DE_ASSERT(!m_params->needsTaskShader());
+	MeshShaderMiscCase::initPrograms(programCollection);
+	// Fill a 1x1020 image with 255 lines, each line being 4 pixels tall. Each invocation will generate ~8 lines.
+	DE_ASSERT(m_params->width == 1u && m_params->height == 1020u);
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout(local_size_x=32) in;\n"
+		<< "layout(lines) out;\n"
+		<< "layout(max_vertices=256, max_primitives=255) out;\n"
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 lineColor[];\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    gl_PrimitiveCountNV = 255u;\n"
+		<< "    uint firstLine = 8u * gl_LocalInvocationID.x;\n"
+		<< "    for (uint i = 0u; i < 8u; i++) {\n"
+		<< "        uint lineId = firstLine + i;\n"
+		<< "        uint topPixel = 4u * lineId;\n"
+		<< "        uint bottomPixel = 3u + topPixel;\n"
+		<< "        if (bottomPixel < 1020u) {\n"
+		<< "            float bottomCoord = ((float(bottomPixel) + 1.0f) / 1020.0) * 2.0 - 1.0;\n"
+		<< "            gl_MeshVerticesNV[lineId + 1u].gl_Position = vec4(0.0, bottomCoord, 0.0f, 1.0f);\n"
+		<< "            gl_PrimitiveIndicesNV[lineId * 2u] = lineId;\n"
+		<< "            gl_PrimitiveIndicesNV[lineId * 2u + 1u] = lineId + 1u;\n"
+		<< "            lineColor[lineId] = vec4(0.0f, 1.0f, float(lineId) / 255.0f, 1.0f);\n"
+		<< "        } else {\n"
+		<< "            // The last iteration of the last invocation emits the first point\n"
+		<< "            gl_MeshVerticesNV[0].gl_Position = vec4(0.0, -1.0, 0.0f, 1.0f);\n"
+		<< "        }\n"
+		<< "    }\n"
+		<< "}\n"
+		;
+	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+void MaxLinesInstance::generateReferenceLevel ()
+	const auto format		= getOutputFormat();
+	const auto tcuFormat	= mapVkFormat(format);
+	const auto iWidth		= static_cast<int>(m_params->width);
+	const auto iHeight		= static_cast<int>(m_params->height);
+	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
+	const auto access		= m_referenceLevel->getAccess();
+	// Fill lines, 4 pixels per line.
+	const uint32_t kNumLines = 255u;
+	const uint32_t kLineHeight = 4u;
+	for (uint32_t i = 0u; i < kNumLines; ++i)
+	{
+		const tcu::Vec4 color (0.0f, 1.0f, static_cast<float>(i) / static_cast<float>(kNumLines), 1.0f);
+		for (uint32_t j = 0u; j < kLineHeight; ++j)
+			access.setPixel(color, 0, i*kLineHeight + j);
+	}
+// Verify drawing the maximum number of triangles.
+class MaxTrianglesCase : public MeshShaderMiscCase
+					MaxTrianglesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+class MaxTrianglesInstance : public MeshShaderMiscInstance
+	MaxTrianglesInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+TestInstance* MaxTrianglesCase::createInstance (Context& context) const
+	return new MaxTrianglesInstance (context, m_params.get());
+void MaxTrianglesCase::initPrograms (vk::SourceCollections& programCollection) const
+	DE_ASSERT(!m_params->needsTaskShader());
+	MeshShaderMiscCase::initPrograms(programCollection);
+	// Fill a sufficiently large image with solid color. Generate a quarter of a circle with the center in the top left corner,
+	// using a triangle fan that advances from top to bottom. Each invocation will generate ~8 triangles.
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout(local_size_x=32) in;\n"
+		<< "layout(triangles) out;\n"
+		<< "layout(max_vertices=256, max_primitives=254) out;\n"
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
+		<< "\n"
+		<< "const float PI_2 = 1.57079632679489661923;\n"
+		<< "const float RADIUS = 4.5;\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    gl_PrimitiveCountNV = 254u;\n"
+		<< "    uint firstTriangle = 8u * gl_LocalInvocationID.x;\n"
+		<< "    for (uint i = 0u; i < 8u; i++) {\n"
+		<< "        uint triangleId = firstTriangle + i;\n"
+		<< "        if (triangleId < 254u) {\n"
+		<< "            uint vertexId = triangleId + 2u;\n"
+		<< "            float angleProportion = float(vertexId - 1u) / 254.0f;\n"
+		<< "            float angle = PI_2 * angleProportion;\n"
+		<< "            float xCoord = cos(angle) * RADIUS - 1.0;\n"
+		<< "            float yCoord = sin(angle) * RADIUS - 1.0;\n"
+		<< "            gl_MeshVerticesNV[vertexId].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
+		<< "            gl_PrimitiveIndicesNV[triangleId * 3u + 0u] = 0u;\n"
+		<< "            gl_PrimitiveIndicesNV[triangleId * 3u + 1u] = triangleId + 1u;\n"
+		<< "            gl_PrimitiveIndicesNV[triangleId * 3u + 2u] = triangleId + 2u;\n"
+		<< "            triangleColor[triangleId] = vec4(0.0f, 0.0f, 1.0f, 1.0f);\n"
+		<< "        } else {\n"
+		<< "            // The last iterations of the last invocation emit the first two vertices\n"
+		<< "            uint vertexId = triangleId - 254u;\n"
+		<< "            if (vertexId == 0u) {\n"
+		<< "                gl_MeshVerticesNV[0u].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
+		<< "            } else {\n"
+		<< "                gl_MeshVerticesNV[1u].gl_Position = vec4(RADIUS, -1.0, 0.0, 1.0);\n"
+		<< "            }\n"
+		<< "        }\n"
+		<< "    }\n"
+		<< "}\n"
+		;
+	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+void MaxTrianglesInstance::generateReferenceLevel ()
+	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
+// Large work groups with many threads.
+class LargeWorkGroupCase : public MeshShaderMiscCase
+					LargeWorkGroupCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+	static constexpr uint32_t kLocalInvocations = 32u;
+class LargeWorkGroupInstance : public MeshShaderMiscInstance
+	LargeWorkGroupInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+TestInstance* LargeWorkGroupCase::createInstance (Context& context) const
+	return new LargeWorkGroupInstance(context, m_params.get());
+void LargeWorkGroupInstance::generateReferenceLevel ()
+	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
+void LargeWorkGroupCase::initPrograms (vk::SourceCollections& programCollection) const
+	const auto useTaskShader	= m_params->needsTaskShader();
+	const auto taskMultiplier	= (useTaskShader ? m_params->taskCount.get() : 1u);
+	// Add the frag shader.
+	MeshShaderMiscCase::initPrograms(programCollection);
+	std::ostringstream taskData;
+	taskData
+		<< "taskNV TaskData {\n"
+		<< "    uint parentTask[" << kLocalInvocations << "];\n"
+		<< "} td;\n"
+		;
+	const auto taskDataStr = taskData.str();
+	if (useTaskShader)
+	{
+		std::ostringstream task;
+		task
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
+			<< "\n"
+			<< "out " << taskDataStr
+			<< "\n"
+			<< "void main () {\n"
+			<< "    gl_TaskCountNV = " << m_params->meshCount << ";\n"
+			<< "    td.parentTask[gl_LocalInvocationID.x] = gl_WorkGroupID.x;\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	}
+	// Needed for the code below to work.
+	DE_ASSERT(m_params->width * m_params->height == taskMultiplier * m_params->meshCount * kLocalInvocations);
+	DE_UNREF(taskMultiplier); // For release builds.
+	// Emit one point per framebuffer pixel. The number of jobs (kLocalInvocations in each mesh shader work group, multiplied by the
+	// number of mesh work groups emitted by each task work group) must be the same as the total framebuffer size. Calculate a job
+	// ID corresponding to the current mesh shader invocation, and assign a pixel position to it. Draw a point at that position.
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
+		<< "layout (points) out;\n"
+		<< "layout (max_vertices=" << kLocalInvocations << ", max_primitives=" << kLocalInvocations << ") out;\n"
+		<< "\n"
+		<< (useTaskShader ? "in " + taskDataStr : "")
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 pointColor[];\n"
+		<< "\n"
+		<< "void main () {\n"
+		;
+	if (useTaskShader)
+	{
+		mesh
+			<< "    uint parentTask = td.parentTask[0];\n"
+			<< "    if (td.parentTask[gl_LocalInvocationID.x] != parentTask) {\n"
+			<< "        return;\n"
+			<< "    }\n"
+			;
+	}
+	else
+	{
+		mesh << "    uint parentTask = 0;\n";
+	}
+	mesh
+		<< "    gl_PrimitiveCountNV = " << kLocalInvocations << ";\n"
+		<< "    uint jobId = ((parentTask * " << m_params->meshCount << ") + gl_WorkGroupID.x) * " << kLocalInvocations << " + gl_LocalInvocationID.x;\n"
+		<< "    uint row = jobId / " << m_params->width << ";\n"
+		<< "    uint col = jobId % " << m_params->width << ";\n"
+		<< "    float yCoord = (float(row + 0.5) / " << m_params->height << ".0) * 2.0 - 1.0;\n"
+		<< "    float xCoord = (float(col + 0.5) / " << m_params->width << ".0) * 2.0 - 1.0;\n"
+		<< "    gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
+		<< "    gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_PointSize = 1.0;\n"
+		<< "    gl_PrimitiveIndicesNV[gl_LocalInvocationID.x] = gl_LocalInvocationID.x;\n"
+		<< "    pointColor[gl_LocalInvocationID.x] = vec4(0.0, 0.0, 1.0, 1.0);\n"
+		<< "}\n"
+		;
+	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+// Tests that generate no primitives of a given type.
+enum class PrimitiveType { POINTS=0, LINES, TRIANGLES };
+std::string primitiveTypeName (PrimitiveType primitiveType)
+	std::string primitiveName;
+	switch (primitiveType)
+	{
+	case PrimitiveType::POINTS:		primitiveName = "points";		break;
+	case PrimitiveType::LINES:		primitiveName = "lines";		break;
+	case PrimitiveType::TRIANGLES:	primitiveName = "triangles";	break;
+	default: DE_ASSERT(false); break;
+	}
+	return primitiveName;
+struct NoPrimitivesParams : public MiscTestParams
+	PrimitiveType primitiveType;
+class NoPrimitivesCase : public MeshShaderMiscCase
+					NoPrimitivesCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+class NoPrimitivesInstance : public MeshShaderMiscInstance
+	NoPrimitivesInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+void NoPrimitivesInstance::generateReferenceLevel ()
+	// No primitives: clear color.
+	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
+TestInstance* NoPrimitivesCase::createInstance (Context& context) const
+	return new NoPrimitivesInstance(context, m_params.get());
+void NoPrimitivesCase::initPrograms (vk::SourceCollections& programCollection) const
+	const auto params = dynamic_cast<NoPrimitivesParams*>(m_params.get());
+	DE_ASSERT(params);
+	DE_ASSERT(!params->needsTaskShader());
+	const auto primitiveName = primitiveTypeName(params->primitiveType);
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout (local_size_x=32) in;\n"
+		<< "layout (" << primitiveName << ") out;\n"
+		<< "layout (max_vertices=256, max_primitives=256) out;\n"
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
+		<< "\n"
+		<< "void main () {\n"
+		<< "    gl_PrimitiveCountNV = 0u;\n"
+		<< "}\n"
+		;
+	MeshShaderMiscCase::initPrograms(programCollection);
+	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+class NoPrimitivesExtraWritesCase : public NoPrimitivesCase
+					NoPrimitivesExtraWritesCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: NoPrimitivesCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	static constexpr uint32_t kLocalInvocations = 32u;
+void NoPrimitivesExtraWritesCase::initPrograms (vk::SourceCollections& programCollection) const
+	const auto params = dynamic_cast<NoPrimitivesParams*>(m_params.get());
+	DE_ASSERT(params);
+	DE_ASSERT(m_params->needsTaskShader());
+	std::ostringstream taskData;
+	taskData
+		<< "taskNV TaskData {\n"
+		<< "    uint localInvocations[" << kLocalInvocations << "];\n"
+		<< "} td;\n"
+		;
+	const auto taskDataStr = taskData.str();
+	std::ostringstream task;
+	task
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
+		<< "\n"
+		<< "out " << taskDataStr
+		<< "\n"
+		<< "void main () {\n"
+		<< "    gl_TaskCountNV = " << params->meshCount << ";\n"
+		<< "    td.localInvocations[gl_LocalInvocationID.x] = gl_LocalInvocationID.x;\n"
+		<< "}\n"
+		;
+	programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	const auto primitiveName = primitiveTypeName(params->primitiveType);
+	// Otherwise the shader would be illegal.
+	DE_ASSERT(kLocalInvocations > 2u);
+	uint32_t maxPrimitives = 0u;
+	switch (params->primitiveType)
+	{
+	case PrimitiveType::POINTS:		maxPrimitives = kLocalInvocations - 0u;	break;
+	case PrimitiveType::LINES:		maxPrimitives = kLocalInvocations - 1u;	break;
+	case PrimitiveType::TRIANGLES:	maxPrimitives = kLocalInvocations - 2u;	break;
+	default: DE_ASSERT(false); break;
+	}
+	const std::string pointSizeDecl	= ((params->primitiveType == PrimitiveType::POINTS)
+									? "        gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_PointSize = 1.0;\n"
+									: "");
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
+		<< "layout (" << primitiveName << ") out;\n"
+		<< "layout (max_vertices=" << kLocalInvocations << ", max_primitives=" << maxPrimitives << ") out;\n"
+		<< "\n"
+		<< "in " << taskDataStr
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
+		<< "\n"
+		<< "shared uint sumOfIds;\n"
+		<< "\n"
+		<< "const float PI_2 = 1.57079632679489661923;\n"
+		<< "const float RADIUS = 1.0f;\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    sumOfIds = 0u;\n"
+		<< "    barrier();\n"
+		<< "    atomicAdd(sumOfIds, td.localInvocations[gl_LocalInvocationID.x]);\n"
+		<< "    barrier();\n"
+		<< "    // This should dynamically give 0\n"
+		<< "    gl_PrimitiveCountNV = sumOfIds - (" << kLocalInvocations * (kLocalInvocations - 1u) / 2u << ");\n"
+		<< "\n"
+		<< "    // Emit points and primitives to the arrays in any case\n"
+		<< "    if (gl_LocalInvocationID.x > 0u) {\n"
+		<< "        float proportion = (float(gl_LocalInvocationID.x - 1u) + 0.5f) / float(" << kLocalInvocations << " - 1u);\n"
+		<< "        float angle = PI_2 * proportion;\n"
+		<< "        float xCoord = cos(angle) * RADIUS - 1.0;\n"
+		<< "        float yCoord = sin(angle) * RADIUS - 1.0;\n"
+		<< "        gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
+		<< pointSizeDecl
+		<< "    } else {\n"
+		<< "        gl_MeshVerticesNV[gl_LocalInvocationID.x].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
+		<< pointSizeDecl
+		<< "    }\n"
+		<< "    uint primitiveId = max(gl_LocalInvocationID.x, " << (maxPrimitives - 1u) << ");\n"
+		<< "    primitiveColor[primitiveId] = vec4(0.0, 0.0, 1.0, 1.0);\n"
+		;
+	if (params->primitiveType == PrimitiveType::POINTS)
+	{
+		mesh
+			<< "    gl_PrimitiveIndicesNV[primitiveId] = primitiveId;\n"
+			;
+	}
+	else if (params->primitiveType == PrimitiveType::LINES)
+	{
+		mesh
+			<< "    gl_PrimitiveIndicesNV[primitiveId * 2u + 0u] = primitiveId + 0u;\n"
+			<< "    gl_PrimitiveIndicesNV[primitiveId * 2u + 1u] = primitiveId + 1u;\n"
+			;
+	}
+	else if (params->primitiveType == PrimitiveType::TRIANGLES)
+	{
+		mesh
+			<< "    gl_PrimitiveIndicesNV[primitiveId * 3u + 0u] = 0u;\n"
+			<< "    gl_PrimitiveIndicesNV[primitiveId * 3u + 1u] = primitiveId + 1u;\n"
+			<< "    gl_PrimitiveIndicesNV[primitiveId * 3u + 2u] = primitiveId + 3u;\n"
+			;
+	}
+	else
+		DE_ASSERT(false);
+	mesh
+		<< "}\n"
+		;
+	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	MeshShaderMiscCase::initPrograms(programCollection);
+// Case testing barrier().
+class SimpleBarrierCase : public MeshShaderMiscCase
+					SimpleBarrierCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+	static constexpr uint32_t kLocalInvocations = 32u;
+class SimpleBarrierInstance : public MeshShaderMiscInstance
+	SimpleBarrierInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+TestInstance* SimpleBarrierCase::createInstance (Context& context) const
+	return new SimpleBarrierInstance(context, m_params.get());
+void SimpleBarrierInstance::generateReferenceLevel ()
+	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
+void SimpleBarrierCase::initPrograms (vk::SourceCollections& programCollection) const
+	// Generate frag shader.
+	MeshShaderMiscCase::initPrograms(programCollection);
+	DE_ASSERT(m_params->meshCount == 1u);
+	DE_ASSERT(m_params->width == 1u && m_params->height == 1u);
+	std::ostringstream meshPrimData;
+	meshPrimData
+			<< "gl_PrimitiveCountNV = 1u;\n"
+			<< "gl_MeshVerticesNV[0].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
+			<< "gl_MeshVerticesNV[0].gl_PointSize = 1.0;\n"
+			<< "primitiveColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
+			<< "gl_PrimitiveIndicesNV[0] = 0;\n"
+			;
+	const std::string meshPrimStr	= meshPrimData.str();
+	const std::string taskOK		= "gl_TaskCountNV = 1u;\n";
+	const std::string taskFAIL		= "gl_TaskCountNV = 0u;\n";
+	const std::string meshOK		= meshPrimStr;
+	const std::string meshFAIL		= "gl_PrimitiveCountNV = 0u;\n";
+	const std::string okStatement	= (m_params->needsTaskShader() ? taskOK : meshOK);
+	const std::string failStatement	= (m_params->needsTaskShader() ? taskFAIL : meshFAIL);
+	const std::string	sharedDecl = "shared uint counter;\n\n";
+	std::ostringstream	verification;
+	verification
+		<< "counter = 0;\n"
+		<< "barrier();\n"
+		<< "atomicAdd(counter, 1u);\n"
+		<< "barrier();\n"
+		<< "if (gl_LocalInvocationID.x == 0u) {\n"
+		<< "    if (counter == " << kLocalInvocations << ") {\n"
+		<< "\n"
+		<< okStatement
+		<< "\n"
+		<< "    } else {\n"
+		<< "\n"
+		<< failStatement
+		<< "\n"
+		<< "    }\n"
+		<< "}\n"
+		;
+	// The mesh shader is very similar in both cases, so we use a template.
+	std::ostringstream meshTemplateStr;
+	meshTemplateStr
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout (local_size_x=${LOCAL_SIZE}) in;\n"
+		<< "layout (points) out;\n"
+		<< "layout (max_vertices=1, max_primitives=1) out;\n"
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
+		<< "\n"
+		<< "${GLOBALS:opt}"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "${BODY}"
+		<< "}\n"
+		;
+	const tcu::StringTemplate meshTemplate = meshTemplateStr.str();
+	if (m_params->needsTaskShader())
+	{
+		std::ostringstream task;
+		task
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
+			<< "\n"
+			<< sharedDecl
+			<< "void main ()\n"
+			<< "{\n"
+			<< verification.str()
+			<< "}\n"
+			;
+		std::map<std::string, std::string> replacements;
+		replacements["LOCAL_SIZE"]	= "1";
+		replacements["BODY"]		= meshPrimStr;
+		const auto meshStr = meshTemplate.specialize(replacements);
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
+	}
+	else
+	{
+		std::map<std::string, std::string> replacements;
+		replacements["LOCAL_SIZE"]	= std::to_string(kLocalInvocations);
+		replacements["BODY"]		= verification.str();
+		replacements["GLOBALS"]		= sharedDecl;
+		const auto meshStr = meshTemplate.specialize(replacements);
+		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
+	}
+// Case testing memoryBarrierShared() and groupMemoryBarrier().
+enum class MemoryBarrierType { SHARED = 0, GROUP };
+struct MemoryBarrierParams : public MiscTestParams
+	MemoryBarrierType memBarrierType;
+	std::string glslFunc () const
+	{
+		std::string funcName;
+		switch (memBarrierType)
+		{
+		case MemoryBarrierType::SHARED:		funcName = "memoryBarrierShared";	break;
+		case MemoryBarrierType::GROUP:		funcName = "groupMemoryBarrier";	break;
+		default: DE_ASSERT(false); break;
+		}
+		return funcName;
+	}
+class MemoryBarrierCase : public MeshShaderMiscCase
+					MemoryBarrierCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+	static constexpr uint32_t kLocalInvocations = 2u;
+class MemoryBarrierInstance : public MeshShaderMiscInstance
+	MemoryBarrierInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void	generateReferenceLevel	() override;
+	bool	verifyResult			(const tcu::ConstPixelBufferAccess& resultAccess) const override;
+	// Allow two possible outcomes.
+	std::unique_ptr<tcu::TextureLevel>	m_referenceLevel2;
+TestInstance* MemoryBarrierCase::createInstance (Context& context) const
+	return new MemoryBarrierInstance(context, m_params.get());
+void MemoryBarrierInstance::generateReferenceLevel ()
+	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
+	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f), m_referenceLevel2);
+bool MemoryBarrierInstance::verifyResult (const tcu::ConstPixelBufferAccess& resultAccess) const
+	// Any of the two results is considered valid.
+	constexpr auto Message		= tcu::TestLog::Message;
+	constexpr auto EndMessage	= tcu::TestLog::EndMessage;
+	// Clarify what we are checking in the logs; otherwise, they could be confusing.
+	auto& log = m_context.getTestContext().getLog();
+	const std::vector<tcu::TextureLevel*> levels = { m_referenceLevel.get(), m_referenceLevel2.get() };
+	bool good = false;
+	for (size_t i = 0; i < levels.size(); ++i)
+	{
+		log << Message << "Comparing result with reference " << i << "..." << EndMessage;
+		const auto success = MeshShaderMiscInstance::verifyResult(resultAccess, *levels[i]);
+		if (success)
+		{
+			log << Message << "Match! The test has passed" << EndMessage;
+			good = true;
+			break;
+		}
+	}
+	return good;
+void MemoryBarrierCase::initPrograms (vk::SourceCollections& programCollection) const
+	const auto params = dynamic_cast<MemoryBarrierParams*>(m_params.get());
+	DE_ASSERT(params);
+	// Generate frag shader.
+	MeshShaderMiscCase::initPrograms(programCollection);
+	DE_ASSERT(params->meshCount == 1u);
+	DE_ASSERT(params->width == 1u && params->height == 1u);
+	const bool taskShader = params->needsTaskShader();
+	const std::string	taskDataDecl	= "taskNV TaskData { float blue; } td;\n\n";
+	const std::string	inTaskData		= "in " + taskDataDecl;
+	const std::string	outTaskData		= "out " + taskDataDecl;
+	const auto			barrierFunc		= params->glslFunc();
+	std::ostringstream meshPrimData;
+	meshPrimData
+			<< "gl_PrimitiveCountNV = 1u;\n"
+			<< "gl_MeshVerticesNV[0].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
+			<< "gl_MeshVerticesNV[0].gl_PointSize = 1.0;\n"
+			<< "primitiveColor[0] = vec4(0.0, 0.0, " << (taskShader ? "" : "float(iterations % 2u)") << ", 1.0);\n"
+			<< "gl_PrimitiveIndicesNV[0] = 0;\n"
+			;
+	const std::string meshPrimStr	= meshPrimData.str();
+	const std::string taskAction	= "gl_TaskCountNV = 1u;\ = float(iterations % 2u);\n";
+	const std::string meshAction	= meshPrimStr;
+	const std::string action		= (taskShader ? taskAction : meshAction);
+	const std::string	sharedDecl = "shared uint flags[2];\n\n";
+	std::ostringstream	verification;
+	verification
+		<< "flags[gl_LocalInvocationID.x] = 0u;\n"
+		<< "barrier();\n"
+		<< "flags[gl_LocalInvocationID.x] = 1u;\n"
+		<<  barrierFunc << "();\n"
+		<< "uint otherInvocation = 1u - gl_LocalInvocationID.x;\n"
+		<< "uint iterations = 0u;\n"
+		<< "while (flags[otherInvocation] != 1u) {\n"
+		<< "    iterations++;\n"
+		<< "}\n"
+		<< "if (gl_LocalInvocationID.x == 0u) {\n"
+		<< "\n"
+		<< action
+		<< "\n"
+		<< "}\n"
+		;
+	// The mesh shader is very similar in both cases, so we use a template.
+	std::ostringstream meshTemplateStr;
+	meshTemplateStr
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout (local_size_x=${LOCAL_SIZE}) in;\n"
+		<< "layout (points) out;\n"
+		<< "layout (max_vertices=1, max_primitives=1) out;\n"
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 primitiveColor[];\n"
+		<< "\n"
+		<< "${GLOBALS}"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "${BODY}"
+		<< "}\n"
+		;
+	const tcu::StringTemplate meshTemplate = meshTemplateStr.str();
+	if (params->needsTaskShader())
+	{
+		std::ostringstream task;
+		task
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (local_size_x=" << kLocalInvocations << ") in;\n"
+			<< "\n"
+			<< sharedDecl
+			<< outTaskData
+			<< "void main ()\n"
+			<< "{\n"
+			<< verification.str()
+			<< "}\n"
+			;
+		std::map<std::string, std::string> replacements;
+		replacements["LOCAL_SIZE"]	= "1";
+		replacements["BODY"]		= meshPrimStr;
+		replacements["GLOBALS"]		= inTaskData;
+		const auto meshStr = meshTemplate.specialize(replacements);
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
+	}
+	else
+	{
+		std::map<std::string, std::string> replacements;
+		replacements["LOCAL_SIZE"]	= std::to_string(kLocalInvocations);
+		replacements["BODY"]		= verification.str();
+		replacements["GLOBALS"]		= sharedDecl;
+		const auto meshStr = meshTemplate.specialize(replacements);
+		programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr);
+	}
+class CustomAttributesCase : public MeshShaderMiscCase
+					CustomAttributesCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase(testCtx, name, description, std::move(params)) {}
+	virtual			~CustomAttributesCase		(void) {}
+	TestInstance*	createInstance				(Context& context) const override;
+	void			checkSupport				(Context& context) const override;
+	void			initPrograms				(vk::SourceCollections& programCollection) const override;
+class CustomAttributesInstance : public MeshShaderMiscInstance
+						CustomAttributesInstance	(Context& context, const MiscTestParams* params)
+							: MeshShaderMiscInstance(context, params) {}
+	virtual				~CustomAttributesInstance	(void) {}
+	void				generateReferenceLevel		() override;
+	tcu::TestStatus		iterate						(void) override;
+TestInstance* CustomAttributesCase::createInstance (Context& context) const
+	return new CustomAttributesInstance(context, m_params.get());
+void CustomAttributesCase::checkSupport (Context& context) const
+	MeshShaderMiscCase::checkSupport(context);
+	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_MULTI_VIEWPORT);
+	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_CLIP_DISTANCE);
+void CustomAttributesCase::initPrograms (vk::SourceCollections& programCollection) const
+	std::ostringstream frag;
+	frag
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout (location=0) in vec4 customAttribute1;\n"
+		<< "layout (location=1) in flat float customAttribute2;\n"
+		<< "layout (location=2) in flat int customAttribute3;\n"
+		<< "\n"
+		<< "layout (location=3) in perprimitiveNV flat uvec4 customAttribute4;\n"
+		<< "layout (location=4) in perprimitiveNV float customAttribute5;\n"
+		<< "\n"
+		<< "layout (location=0) out vec4 outColor;\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    bool goodPrimitiveID = (gl_PrimitiveID == 1000 || gl_PrimitiveID == 1001);\n"
+		<< "    bool goodViewportIndex = (gl_ViewportIndex == 1);\n"
+		<< "    bool goodCustom1 = (customAttribute1.x >= 0.25 && customAttribute1.x <= 0.5 &&\n"
+		<< "                        customAttribute1.y >= 0.5  && customAttribute1.y <= 1.0 &&\n"
+		<< "                        customAttribute1.z >= 10.0 && customAttribute1.z <= 20.0 &&\n"
+		<< "                        customAttribute1.w == 3.0);\n"
+		<< "    bool goodCustom2 = (customAttribute2 == 1.0 || customAttribute2 == 2.0);\n"
+		<< "    bool goodCustom3 = (customAttribute3 == 3 || customAttribute3 == 4);\n"
+		<< "    bool goodCustom4 = ((gl_PrimitiveID == 1000 && customAttribute4 == uvec4(100, 101, 102, 103)) ||\n"
+		<< "                        (gl_PrimitiveID == 1001 && customAttribute4 == uvec4(200, 201, 202, 203)));\n"
+		<< "    bool goodCustom5 = ((gl_PrimitiveID == 1000 && customAttribute5 == 6.0) ||\n"
+		<< "                        (gl_PrimitiveID == 1001 && customAttribute5 == 7.0));\n"
+		<< "    \n"
+		<< "    if (goodPrimitiveID && goodViewportIndex && goodCustom1 && goodCustom2 && goodCustom3 && goodCustom4 && goodCustom5) {\n"
+		<< "        outColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
+		<< "    } else {\n"
+		<< "        outColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
+		<< "    }\n"
+		<< "}\n"
+		;
+	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
+	std::ostringstream pvdDataDeclStream;
+	pvdDataDeclStream
+		<< "    vec4 positions[4];\n"
+		<< "    float pointSizes[4];\n"
+		<< "    float clipDistances[4];\n"
+		<< "    vec4 custom1[4];\n"
+		<< "    float custom2[4];\n"
+		<< "    int custom3[4];\n"
+		;
+	const auto pvdDataDecl = pvdDataDeclStream.str();
+	std::ostringstream ppdDataDeclStream;
+	ppdDataDeclStream
+		<< "    int primitiveIds[2];\n"
+		<< "    int viewportIndices[2];\n"
+		<< "    uvec4 custom4[2];\n"
+		<< "    float custom5[2];\n"
+		;
+	const auto ppdDataDecl = ppdDataDeclStream.str();
+	std::ostringstream bindingsDeclStream;
+	bindingsDeclStream
+		<< "layout (set=0, binding=0, std430) buffer PerVertexData {\n"
+		<< pvdDataDecl
+		<< "} pvd;\n"
+		<< "layout (set=0, binding=1) uniform PerPrimitiveData {\n"
+		<< ppdDataDecl
+		<< "} ppd;\n"
+		<< "\n"
+		;
+	const auto bindingsDecl = bindingsDeclStream.str();
+	std::ostringstream taskDataStream;
+	taskDataStream
+		<< "taskNV TaskData {\n"
+		<< pvdDataDecl
+		<< ppdDataDecl
+		<< "} td;\n"
+		<< "\n"
+		;
+	const auto taskDataDecl = taskDataStream.str();
+	const auto taskShader = m_params->needsTaskShader();
+	const auto meshPvdPrefix = (taskShader ? "td" : "pvd");
+	const auto meshPpdPrefix = (taskShader ? "td" : "ppd");
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout (local_size_x=1) in;\n"
+		<< "layout (max_primitives=2, max_vertices=4) out;\n"
+		<< "layout (triangles) out;\n"
+		<< "\n"
+		<< "out gl_MeshPerVertexNV {\n"
+		<< "    vec4  gl_Position;\n"
+		<< "    float gl_PointSize;\n"
+		<< "    float gl_ClipDistance[1];\n"
+		<< "} gl_MeshVerticesNV[];\n"
+		<< "\n"
+		<< "layout (location=0) out vec4 customAttribute1[];\n"
+		<< "layout (location=1) out flat float customAttribute2[];\n"
+		<< "layout (location=2) out int customAttribute3[];\n"
+		<< "\n"
+		<< "layout (location=3) out perprimitiveNV uvec4 customAttribute4[];\n"
+		<< "layout (location=4) out perprimitiveNV float customAttribute5[];\n"
+		<< "\n"
+		<< "out perprimitiveNV gl_MeshPerPrimitiveNV {\n"
+		<< "  int gl_PrimitiveID;\n"
+		<< "  int gl_ViewportIndex;\n"
+		<< "} gl_MeshPrimitivesNV[];\n"
+		<< "\n"
+		<< (taskShader ? "in " + taskDataDecl : bindingsDecl)
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    gl_PrimitiveCountNV = 2u;\n"
+		<< "\n"
+		<< "    gl_MeshVerticesNV[0].gl_Position = " << meshPvdPrefix << ".positions[0]; //vec4(-1.0, -1.0, 0.0, 1.0)\n"
+		<< "    gl_MeshVerticesNV[1].gl_Position = " << meshPvdPrefix << ".positions[1]; //vec4( 1.0, -1.0, 0.0, 1.0)\n"
+		<< "    gl_MeshVerticesNV[2].gl_Position = " << meshPvdPrefix << ".positions[2]; //vec4(-1.0,  1.0, 0.0, 1.0)\n"
+		<< "    gl_MeshVerticesNV[3].gl_Position = " << meshPvdPrefix << ".positions[3]; //vec4( 1.0,  1.0, 0.0, 1.0)\n"
+		<< "\n"
+		<< "    gl_MeshVerticesNV[0].gl_PointSize = " << meshPvdPrefix << ".pointSizes[0]; //1.0\n"
+		<< "    gl_MeshVerticesNV[1].gl_PointSize = " << meshPvdPrefix << ".pointSizes[1]; //1.0\n"
+		<< "    gl_MeshVerticesNV[2].gl_PointSize = " << meshPvdPrefix << ".pointSizes[2]; //1.0\n"
+		<< "    gl_MeshVerticesNV[3].gl_PointSize = " << meshPvdPrefix << ".pointSizes[3]; //1.0\n"
+		<< "\n"
+		<< "    // Remove geometry on the right side.\n"
+		<< "    gl_MeshVerticesNV[0].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[0]; // 1.0\n"
+		<< "    gl_MeshVerticesNV[1].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[1]; //-1.0\n"
+		<< "    gl_MeshVerticesNV[2].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[2]; // 1.0\n"
+		<< "    gl_MeshVerticesNV[3].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[3]; //-1.0\n"
+		<< "    \n"
+		<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
+		<< "    gl_PrimitiveIndicesNV[1] = 2;\n"
+		<< "    gl_PrimitiveIndicesNV[2] = 1;\n"
+		<< "\n"
+		<< "    gl_PrimitiveIndicesNV[3] = 2;\n"
+		<< "    gl_PrimitiveIndicesNV[4] = 3;\n"
+		<< "    gl_PrimitiveIndicesNV[5] = 1;\n"
+		<< "\n"
+		<< "    gl_MeshPrimitivesNV[0].gl_PrimitiveID = " << meshPpdPrefix << ".primitiveIds[0]; //1000\n"
+		<< "    gl_MeshPrimitivesNV[1].gl_PrimitiveID = " << meshPpdPrefix << ".primitiveIds[1]; //1001\n"
+		<< "\n"
+		<< "    gl_MeshPrimitivesNV[0].gl_ViewportIndex = " << meshPpdPrefix << ".viewportIndices[0]; //1\n"
+		<< "    gl_MeshPrimitivesNV[1].gl_ViewportIndex = " << meshPpdPrefix << ".viewportIndices[1]; //1\n"
+		<< "\n"
+		<< "    // Custom per-vertex attributes\n"
+		<< "    customAttribute1[0] = " << meshPvdPrefix << ".custom1[0]; //vec4(0.25, 0.5, 10.0, 3.0)\n"
+		<< "    customAttribute1[1] = " << meshPvdPrefix << ".custom1[1]; //vec4(0.25, 1.0, 20.0, 3.0)\n"
+		<< "    customAttribute1[2] = " << meshPvdPrefix << ".custom1[2]; //vec4( 0.5, 0.5, 20.0, 3.0)\n"
+		<< "    customAttribute1[3] = " << meshPvdPrefix << ".custom1[3]; //vec4( 0.5, 1.0, 10.0, 3.0)\n"
+		<< "\n"
+		<< "    customAttribute2[0] = " << meshPvdPrefix << ".custom2[0]; //1.0f\n"
+		<< "    customAttribute2[1] = " << meshPvdPrefix << ".custom2[1]; //1.0f\n"
+		<< "    customAttribute2[2] = " << meshPvdPrefix << ".custom2[2]; //2.0f\n"
+		<< "    customAttribute2[3] = " << meshPvdPrefix << ".custom2[3]; //2.0f\n"
+		<< "\n"
+		<< "    customAttribute3[0] = " << meshPvdPrefix << ".custom3[0]; //3\n"
+		<< "    customAttribute3[1] = " << meshPvdPrefix << ".custom3[1]; //3\n"
+		<< "    customAttribute3[2] = " << meshPvdPrefix << ".custom3[2]; //4\n"
+		<< "    customAttribute3[3] = " << meshPvdPrefix << ".custom3[3]; //4\n"
+		<< "\n"
+		<< "    // Custom per-primitive attributes.\n"
+		<< "    customAttribute4[0] = " << meshPpdPrefix << ".custom4[0]; //uvec4(100, 101, 102, 103)\n"
+		<< "    customAttribute4[1] = " << meshPpdPrefix << ".custom4[1]; //uvec4(200, 201, 202, 203)\n"
+		<< "\n"
+		<< "    customAttribute5[0] = " << meshPpdPrefix << ".custom5[0]; //6.0\n"
+		<< "    customAttribute5[1] = " << meshPpdPrefix << ".custom5[1]; //7.0\n"
+		<< "}\n"
+		;
+	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	if (taskShader)
+	{
+		std::ostringstream task;
+		task
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "out " << taskDataDecl
+			<< bindingsDecl
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_TaskCountNV = " << m_params->meshCount << ";\n"
+			<< "\n"
+			<< "    td.positions[0] = pvd.positions[0];\n"
+			<< "    td.positions[1] = pvd.positions[1];\n"
+			<< "    td.positions[2] = pvd.positions[2];\n"
+			<< "    td.positions[3] = pvd.positions[3];\n"
+			<< "\n"
+			<< "    td.pointSizes[0] = pvd.pointSizes[0];\n"
+			<< "    td.pointSizes[1] = pvd.pointSizes[1];\n"
+			<< "    td.pointSizes[2] = pvd.pointSizes[2];\n"
+			<< "    td.pointSizes[3] = pvd.pointSizes[3];\n"
+			<< "\n"
+			<< "    td.clipDistances[0] = pvd.clipDistances[0];\n"
+			<< "    td.clipDistances[1] = pvd.clipDistances[1];\n"
+			<< "    td.clipDistances[2] = pvd.clipDistances[2];\n"
+			<< "    td.clipDistances[3] = pvd.clipDistances[3];\n"
+			<< "\n"
+			<< "    td.custom1[0] = pvd.custom1[0];\n"
+			<< "    td.custom1[1] = pvd.custom1[1];\n"
+			<< "    td.custom1[2] = pvd.custom1[2];\n"
+			<< "    td.custom1[3] = pvd.custom1[3];\n"
+			<< "\n"
+			<< "    td.custom2[0] = pvd.custom2[0];\n"
+			<< "    td.custom2[1] = pvd.custom2[1];\n"
+			<< "    td.custom2[2] = pvd.custom2[2];\n"
+			<< "    td.custom2[3] = pvd.custom2[3];\n"
+			<< "\n"
+			<< "    td.custom3[0] = pvd.custom3[0];\n"
+			<< "    td.custom3[1] = pvd.custom3[1];\n"
+			<< "    td.custom3[2] = pvd.custom3[2];\n"
+			<< "    td.custom3[3] = pvd.custom3[3];\n"
+			<< "\n"
+			<< "    td.primitiveIds[0] = ppd.primitiveIds[0];\n"
+			<< "    td.primitiveIds[1] = ppd.primitiveIds[1];\n"
+			<< "\n"
+			<< "    td.viewportIndices[0] = ppd.viewportIndices[0];\n"
+			<< "    td.viewportIndices[1] = ppd.viewportIndices[1];\n"
+			<< "\n"
+			<< "    td.custom4[0] = ppd.custom4[0];\n"
+			<< "    td.custom4[1] = ppd.custom4[1];\n"
+			<< "\n"
+			<< "    td.custom5[0] = ppd.custom5[0];\n"
+			<< "    td.custom5[1] = ppd.custom5[1];\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	}
+void CustomAttributesInstance::generateReferenceLevel ()
+	const auto format		= getOutputFormat();
+	const auto tcuFormat	= mapVkFormat(format);
+	const auto iWidth		= static_cast<int>(m_params->width);
+	const auto iHeight		= static_cast<int>(m_params->height);
+	const auto halfWidth	= iWidth / 2;
+	const auto halfHeight	= iHeight / 2;
+	m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
+	const auto access		= m_referenceLevel->getAccess();
+	const auto clearColor	= tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
+	const auto blueColor	= tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f);
+	tcu::clear(access, clearColor);
+	// Fill the top left quarter.
+	for (int y = 0; y < halfWidth; ++y)
+	for (int x = 0; x < halfHeight; ++x)
+	{
+		access.setPixel(blueColor, x, y);
+	}
+tcu::TestStatus CustomAttributesInstance::iterate ()
+	struct PerVertexData
+	{
+		tcu::Vec4	positions[4];
+		float		pointSizes[4];
+		float		clipDistances[4];
+		tcu::Vec4	custom1[4];
+		float		custom2[4];
+		int32_t		custom3[4];
+	};
+	struct PerPrimitiveData
+	{
+		// Note some of these are declared as vectors to match the std140 layout.
+		tcu::IVec4	primitiveIds[2];
+		tcu::IVec4	viewportIndices[2];
+		tcu::UVec4	custom4[2];
+		tcu::Vec4	custom5[2];
+	};
+	const auto&		vkd			= m_context.getDeviceInterface();
+	const auto		device		= m_context.getDevice();
+	auto&			alloc		= m_context.getDefaultAllocator();
+	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
+	const auto		queue		= m_context.getUniversalQueue();
+	const auto		imageFormat	= getOutputFormat();
+	const auto		tcuFormat	= mapVkFormat(imageFormat);
+	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
+	const auto&		binaries	= m_context.getBinaryCollection();
+	const auto		hasTask		= binaries.contains("task");
+	const auto		bufStages	= (hasTask ? VK_SHADER_STAGE_TASK_BIT_NV : VK_SHADER_STAGE_MESH_BIT_NV);
+	const VkImageCreateInfo colorBufferInfo =
+	{
+		nullptr,								//	const void*				pNext;
+		0u,										//	VkImageCreateFlags		flags;
+		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
+		imageFormat,							//	VkFormat				format;
+		imageExtent,							//	VkExtent3D				extent;
+		1u,										//	uint32_t				mipLevels;
+		1u,										//	uint32_t				arrayLayers;
+		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
+		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
+		imageUsage,								//	VkImageUsageFlags		usage;
+		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
+		0u,										//	uint32_t				queueFamilyIndexCount;
+		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
+		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
+	};
+	// Create color image and view.
+	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
+	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
+	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
+	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
+	// Create a memory buffer for verification.
+	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
+	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
+	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
+	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
+	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
+	// This needs to match what the fragment shader will expect.
+	const PerVertexData perVertexData =
+	{
+		//	tcu::Vec4	positions[4];
+		{
+			tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
+			tcu::Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
+			tcu::Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
+			tcu::Vec4( 1.0f,  1.0f, 0.0f, 1.0f),
+		},
+		//	float		pointSizes[4];
+		{ 1.0f, 1.0f, 1.0f, 1.0f, },
+		//	float		clipDistances[4];
+		{
+			1.0f,
+			-1.0f,
+			1.0f,
+			-1.0f,
+		},
+		//	tcu::Vec4	custom1[4];
+		{
+			tcu::Vec4(0.25, 0.5, 10.0, 3.0),
+			tcu::Vec4(0.25, 1.0, 20.0, 3.0),
+			tcu::Vec4( 0.5, 0.5, 20.0, 3.0),
+			tcu::Vec4( 0.5, 1.0, 10.0, 3.0),
+		},
+		//	float		custom2[4];
+		{ 1.0f, 1.0f, 2.0f, 2.0f, },
+		//	int32_t		custom3[4];
+		{ 3, 3, 4, 4 },
+	};
+	// This needs to match what the fragment shader will expect. Reminder: some of these are declared as gvec4 to match the std140
+	// layout, but only the first component is actually used.
+	const PerPrimitiveData perPrimitiveData =
+	{
+		//	int			primitiveIds[2];
+		{
+			tcu::IVec4(1000, 0, 0, 0),
+			tcu::IVec4(1001, 0, 0, 0),
+		},
+		//	int			viewportIndices[2];
+		{
+			tcu::IVec4(1, 0, 0, 0),
+			tcu::IVec4(1, 0, 0, 0),
+		},
+		//	uvec4		custom4[2];
+		{
+			tcu::UVec4(100u, 101u, 102u, 103u),
+			tcu::UVec4(200u, 201u, 202u, 203u),
+		},
+		//	float		custom5[2];
+		{
+			tcu::Vec4(6.0f, 0.0f, 0.0f, 0.0f),
+			tcu::Vec4(7.0f, 0.0f, 0.0f, 0.0f),
+		},
+	};
+	// Create and fill buffers with this data.
+	const auto			pvdSize		= static_cast<VkDeviceSize>(sizeof(perVertexData));
+	const auto			pvdInfo		= makeBufferCreateInfo(pvdSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
+	BufferWithMemory	pvdData		(vkd, device, alloc, pvdInfo, MemoryRequirement::HostVisible);
+	auto&				pvdAlloc	= pvdData.getAllocation();
+	void*				pvdPtr		= pvdAlloc.getHostPtr();
+	const auto			ppdSize		= static_cast<VkDeviceSize>(sizeof(perPrimitiveData));
+	const auto			ppdInfo		= makeBufferCreateInfo(ppdSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
+	BufferWithMemory	ppdData		(vkd, device, alloc, ppdInfo, MemoryRequirement::HostVisible);
+	auto&				ppdAlloc	= ppdData.getAllocation();
+	void*				ppdPtr		= ppdAlloc.getHostPtr();
+	deMemcpy(pvdPtr, &perVertexData, sizeof(perVertexData));
+	deMemcpy(ppdPtr, &perPrimitiveData, sizeof(perPrimitiveData));
+	flushAlloc(vkd, device, pvdAlloc);
+	flushAlloc(vkd, device, ppdAlloc);
+	// Descriptor set layout.
+	DescriptorSetLayoutBuilder setLayoutBuilder;
+	setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bufStages);
+	setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, bufStages);
+	const auto setLayout =, device);
+	// Create and update descriptor set.
+	DescriptorPoolBuilder descriptorPoolBuilder;
+	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
+	const auto descriptorPool	=, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+	const auto descriptorSet	= makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
+	DescriptorSetUpdateBuilder updateBuilder;
+	const auto storageBufferInfo = makeDescriptorBufferInfo(pvdData.get(), 0ull, pvdSize);
+	const auto uniformBufferInfo = makeDescriptorBufferInfo(ppdData.get(), 0ull, ppdSize);
+	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &storageBufferInfo);
+	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferInfo);
+	updateBuilder.update(vkd, device);
+	// Pipeline layout.
+	const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
+	// Shader modules.
+	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
+	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
+	Move<VkShaderModule> taskShader;
+	if (hasTask)
+		taskShader = createShaderModule(vkd, device, binaries.get("task"));
+	// Render pass.
+	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
+	// Framebuffer.
+	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
+	// Viewport and scissor.
+	const auto						topHalf		= makeViewport(imageExtent.width, imageExtent.height / 2u);
+	const std::vector<VkViewport>	viewports	{ makeViewport(imageExtent), topHalf };
+	const std::vector<VkRect2D>		scissors	(2u, makeRect2D(imageExtent));
+	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
+		taskShader.get(), meshShader.get(), fragShader.get(),
+		renderPass.get(), viewports, scissors);
+	// Command pool and buffer.
+	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
+	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+	const auto cmdBuffer	= cmdBufferPtr.get();
+	beginCommandBuffer(vkd, cmdBuffer);
+	// Run pipeline.
+	const tcu::Vec4 clearColor (0.0f, 0.0f, 0.0f, 0.0f);
+	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(),, clearColor);
+	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
+	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
+	vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params->drawCount(), 0u);
+	endRenderPass(vkd, cmdBuffer);
+	// Copy color buffer to verification buffer.
+	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
+	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
+	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
+	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
+	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
+	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
+	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
+	endCommandBuffer(vkd, cmdBuffer);
+	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
+	// Generate reference image and compare results.
+	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
+	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
+	generateReferenceLevel();
+	invalidateAlloc(vkd, device, verificationBufferAlloc);
+	if (!verifyResult(verificationAccess))
+		TCU_FAIL("Result does not match reference; check log for details");
+	return tcu::TestStatus::pass("Pass");
+// Tests that use push constants in the new stages.
+class PushConstantCase : public MeshShaderMiscCase
+					PushConstantCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, ParamsPtr params)
+						: MeshShaderMiscCase (testCtx, name, description, std::move(params))
+					{}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+class PushConstantInstance : public MeshShaderMiscInstance
+	PushConstantInstance (Context& context, const MiscTestParams* params)
+		: MeshShaderMiscInstance (context, params)
+	{}
+	void			generateReferenceLevel	() override;
+	tcu::TestStatus	iterate					() override;
+TestInstance* PushConstantCase::createInstance (Context& context) const
+	return new PushConstantInstance(context, m_params.get());
+void PushConstantInstance::generateReferenceLevel ()
+	generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
+void PushConstantCase::initPrograms (vk::SourceCollections& programCollection) const
+	const auto useTaskShader	= m_params->needsTaskShader();
+	const auto pcNumFloats		= (useTaskShader ? 2u : 4u);
+	std::ostringstream pushConstantStream;
+	pushConstantStream
+		<< "layout (push_constant, std430) uniform PushConstantBlock {\n"
+		<< "    layout (offset=${PCOFFSET}) float values[" << pcNumFloats << "];\n"
+		<< "} pc;\n"
+		<< "\n"
+		;
+	const tcu::StringTemplate pushConstantsTemplate (pushConstantStream.str());
+	using TemplateMap = std::map<std::string, std::string>;
+	std::ostringstream taskDataStream;
+	taskDataStream
+		<< "taskNV TaskData {\n"
+		<< "    float values[2];\n"
+		<< "} td;\n"
+		<< "\n"
+		;
+	const auto taskDataDecl = taskDataStream.str();
+	if (useTaskShader)
+	{
+		TemplateMap taskMap;
+		taskMap["PCOFFSET"] = std::to_string(2u * sizeof(float));
+		std::ostringstream task;
+		task
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout(local_size_x=1) in;\n"
+			<< "\n"
+			<< "out " << taskDataDecl
+			<< pushConstantsTemplate.specialize(taskMap)
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_TaskCountNV = " << m_params->meshCount << ";\n"
+			<< "\n"
+			<< "    td.values[0] = pc.values[0];\n"
+			<< "    td.values[1] = pc.values[1];\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	}
+	{
+		const std::string blue	= (useTaskShader ? "td.values[0] + pc.values[0]" : "pc.values[0] + pc.values[2]");
+		const std::string alpha	= (useTaskShader ? "td.values[1] + pc.values[1]" : "pc.values[1] + pc.values[3]");
+		TemplateMap meshMap;
+		meshMap["PCOFFSET"] = "0";
+		std::ostringstream mesh;
+		mesh
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout(local_size_x=1) in;\n"
+			<< "layout(triangles) out;\n"
+			<< "layout(max_vertices=3, max_primitives=1) out;\n"
+			<< "\n"
+			<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
+			<< "\n"
+			<< pushConstantsTemplate.specialize(meshMap)
+			<< (useTaskShader ? "in " + taskDataDecl : "")
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = 1;\n"
+			<< "\n"
+			<< "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[1].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[2].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
+			<< "\n"
+			<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
+			<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[2] = 2;\n"
+			<< "\n"
+			<< "    triangleColor[0] = vec4(0.0, 0.0, " << blue << ", " << alpha << ");\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	// Add default fragment shader.
+	MeshShaderMiscCase::initPrograms(programCollection);
+tcu::TestStatus PushConstantInstance::iterate ()
+	const auto&		vkd			= m_context.getDeviceInterface();
+	const auto		device		= m_context.getDevice();
+	auto&			alloc		= m_context.getDefaultAllocator();
+	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
+	const auto		queue		= m_context.getUniversalQueue();
+	const auto		imageFormat	= getOutputFormat();
+	const auto		tcuFormat	= mapVkFormat(imageFormat);
+	const auto		imageExtent	= makeExtent3D(m_params->width, m_params->height, 1u);
+	const auto&		binaries	= m_context.getBinaryCollection();
+	const auto		hasTask		= binaries.contains("task");
+	const VkImageCreateInfo colorBufferInfo =
+	{
+		nullptr,								//	const void*				pNext;
+		0u,										//	VkImageCreateFlags		flags;
+		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
+		imageFormat,							//	VkFormat				format;
+		imageExtent,							//	VkExtent3D				extent;
+		1u,										//	uint32_t				mipLevels;
+		1u,										//	uint32_t				arrayLayers;
+		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
+		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
+		imageUsage,								//	VkImageUsageFlags		usage;
+		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
+		0u,										//	uint32_t				queueFamilyIndexCount;
+		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
+		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
+	};
+	// Create color image and view.
+	ImageWithMemory	colorImage	(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
+	const auto		colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
+	const auto		colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
+	const auto		colorView	= makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
+	// Create a memory buffer for verification.
+	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
+	const auto			verificationBufferUsage	= (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
+	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
+	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
+	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
+	// Push constant ranges.
+	std::vector<float> pcData { 0.25f, 0.25f, 0.75f, 0.75f };
+	const auto pcSize		= static_cast<uint32_t>(de::dataSize(pcData));
+	const auto pcHalfSize	= pcSize / 2u;
+	std::vector<VkPushConstantRange> pcRanges;
+	if (hasTask)
+	{
+		pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_NV, 0u, pcHalfSize));
+		pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_TASK_BIT_NV, pcHalfSize, pcHalfSize));
+	}
+	else
+	{
+		pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_NV, 0u, pcSize));
+	}
+	// Pipeline layout.
+	const auto pipelineLayout = makePipelineLayout(vkd, device, 0u, nullptr, static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges));
+	// Shader modules.
+	const auto	meshShader	= createShaderModule(vkd, device, binaries.get("mesh"));
+	const auto	fragShader	= createShaderModule(vkd, device, binaries.get("frag"));
+	Move<VkShaderModule> taskShader;
+	if (hasTask)
+		taskShader = createShaderModule(vkd, device, binaries.get("task"));
+	// Render pass.
+	const auto renderPass = makeRenderPass(vkd, device, imageFormat);
+	// Framebuffer.
+	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
+	// Viewport and scissor.
+	const std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
+	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(imageExtent));
+	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
+		taskShader.get(), meshShader.get(), fragShader.get(),
+		renderPass.get(), viewports, scissors);
+	// Command pool and buffer.
+	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
+	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+	const auto cmdBuffer	= cmdBufferPtr.get();
+	beginCommandBuffer(vkd, cmdBuffer);
+	// Run pipeline.
+	const tcu::Vec4 clearColor (0.0f, 0.0f, 0.0f, 0.0f);
+	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(),, clearColor);
+	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
+	for (const auto& range : pcRanges)
+		vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size, reinterpret_cast<const char*>( + range.offset);
+	vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params->drawCount(), 0u);
+	endRenderPass(vkd, cmdBuffer);
+	// Copy color buffer to verification buffer.
+	const auto transferRead		= VK_ACCESS_TRANSFER_READ_BIT;
+	const auto transferWrite	= VK_ACCESS_TRANSFER_WRITE_BIT;
+	const auto hostRead			= VK_ACCESS_HOST_READ_BIT;
+	const auto preCopyBarrier	= makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
+	const auto postCopyBarrier	= makeMemoryBarrier(transferWrite, hostRead);
+	const auto copyRegion		= makeBufferImageCopy(imageExtent, colorSRL);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
+	vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postCopyBarrier, 0u, nullptr, 0u, nullptr);
+	endCommandBuffer(vkd, cmdBuffer);
+	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
+	// Generate reference image and compare results.
+	const tcu::IVec3					iExtent				(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
+	const tcu::ConstPixelBufferAccess	verificationAccess	(tcuFormat, iExtent, verificationBufferData);
+	generateReferenceLevel();
+	invalidateAlloc(vkd, device, verificationBufferAlloc);
+	if (!verifyResult(verificationAccess))
+		TCU_FAIL("Result does not match reference; check log for details");
+	return tcu::TestStatus::pass("Pass");
+tcu::TestCaseGroup* createMeshShaderMiscTests (tcu::TestContext& testCtx)
+	GroupPtr miscTests (new tcu::TestCaseGroup(testCtx, "misc", "Mesh Shader Misc Tests"));
+	{
+		ParamsPtr paramsPtr (new MiscTestParams);
+		paramsPtr->taskCount	= tcu::just(2u);
+		paramsPtr->meshCount	= 2u;
+		paramsPtr->width		= 8u;
+		paramsPtr->height		= 8u;
+		miscTests->addChild(new ComplexTaskDataCase(testCtx, "complex_task_data", "Pass a complex structure from the task to the mesh shader", std::move(paramsPtr)));
+	}
+	{
+		ParamsPtr paramsPtr (new MiscTestParams);
+		paramsPtr->taskCount	= tcu::nothing<uint32_t>();
+		paramsPtr->meshCount	= 1u;
+		paramsPtr->width		= 5u;	// Use an odd value so there's a pixel in the exact center.
+		paramsPtr->height		= 7u;	// Idem.
+		miscTests->addChild(new SinglePointCase(testCtx, "single_point", "Draw a single point", std::move(paramsPtr)));
+	}
+	{
+		ParamsPtr paramsPtr (new MiscTestParams);
+		paramsPtr->taskCount	= tcu::nothing<uint32_t>();
+		paramsPtr->meshCount	= 1u;
+		paramsPtr->width		= 8u;
+		paramsPtr->height		= 5u;	// Use an odd value so there's a center line.
+		miscTests->addChild(new SingleLineCase(testCtx, "single_line", "Draw a single line", std::move(paramsPtr)));
+	}
+	{
+		ParamsPtr paramsPtr (new MiscTestParams);
+		paramsPtr->taskCount	= tcu::nothing<uint32_t>();
+		paramsPtr->meshCount	= 1u;
+		paramsPtr->width		= 5u;	// Use an odd value so there's a pixel in the exact center.
+		paramsPtr->height		= 7u;	// Idem.
+		miscTests->addChild(new SingleTriangleCase(testCtx, "single_triangle", "Draw a single triangle", std::move(paramsPtr)));
+	}
+	{
+		ParamsPtr paramsPtr (new MiscTestParams);
+		paramsPtr->taskCount	= tcu::nothing<uint32_t>();
+		paramsPtr->meshCount	= 1u;
+		paramsPtr->width		= 16u;
+		paramsPtr->height		= 16u;
+		miscTests->addChild(new MaxPointsCase(testCtx, "max_points", "Draw the maximum number of points", std::move(paramsPtr)));
+	}
+	{
+		ParamsPtr paramsPtr (new MiscTestParams);
+		paramsPtr->taskCount	= tcu::nothing<uint32_t>();
+		paramsPtr->meshCount	= 1u;
+		paramsPtr->width		= 1u;
+		paramsPtr->height		= 1020u;
+		miscTests->addChild(new MaxLinesCase(testCtx, "max_lines", "Draw the maximum number of lines", std::move(paramsPtr)));
+	}
+	{
+		ParamsPtr paramsPtr (new MiscTestParams);
+		paramsPtr->taskCount	= tcu::nothing<uint32_t>();
+		paramsPtr->meshCount	= 1u;
+		paramsPtr->width		= 512u;
+		paramsPtr->height		= 512u;
+		miscTests->addChild(new MaxTrianglesCase(testCtx, "max_triangles", "Draw the maximum number of triangles", std::move(paramsPtr)));
+	}
+	{
+		ParamsPtr paramsPtr (new MiscTestParams);
+		paramsPtr->taskCount	= tcu::just(65535u);
+		paramsPtr->meshCount	= 1u;
+		paramsPtr->width		= 1360u;
+		paramsPtr->height		= 1542u;
+		miscTests->addChild(new LargeWorkGroupCase(testCtx, "many_task_work_groups", "Generate a large number of task work groups", std::move(paramsPtr)));
+	}
+	{
+		ParamsPtr paramsPtr (new MiscTestParams);
+		paramsPtr->taskCount	= tcu::nothing<uint32_t>();
+		paramsPtr->meshCount	= 65535u;
+		paramsPtr->width		= 1360u;
+		paramsPtr->height		= 1542u;
+		miscTests->addChild(new LargeWorkGroupCase(testCtx, "many_mesh_work_groups", "Generate a large number of mesh work groups", std::move(paramsPtr)));
+	}
+	{
+		ParamsPtr paramsPtr (new MiscTestParams);
+		paramsPtr->taskCount	= tcu::just(512u);
+		paramsPtr->meshCount	= 512u;
+		paramsPtr->width		= 4096u;
+		paramsPtr->height		= 2048u;
+		miscTests->addChild(new LargeWorkGroupCase(testCtx, "many_task_mesh_work_groups", "Generate a large number of task and mesh work groups", std::move(paramsPtr)));
+	}
+	{
+		const PrimitiveType types[] = {
+			PrimitiveType::POINTS,
+			PrimitiveType::LINES,
+			PrimitiveType::TRIANGLES,
+		};
+		for (int i = 0; i < 2; ++i)
+		{
+			const bool extraWrites = (i > 0);
+			for (const auto primType : types)
+			{
+				std::unique_ptr<NoPrimitivesParams> params (new NoPrimitivesParams);
+				params->taskCount		= (extraWrites ? tcu::just(1u) : tcu::nothing<uint32_t>());
+				params->meshCount		= 1u;
+				params->width			= 16u;
+				params->height			= 16u;
+				params->primitiveType	= primType;
+				ParamsPtr			paramsPtr	(params.release());
+				const auto			primName	= primitiveTypeName(primType);
+				const std::string	name		= "no_" + primName + (extraWrites ? "_extra_writes" : "");
+				const std::string	desc		= "Run a pipeline that generates no " + primName + (extraWrites ? " but generates primitive data" : "");
+				miscTests->addChild(extraWrites
+					? (new NoPrimitivesExtraWritesCase(testCtx, name, desc, std::move(paramsPtr)))
+					: (new NoPrimitivesCase(testCtx, name, desc, std::move(paramsPtr))));
+			}
+		}
+	}
+	{
+		for (int i = 0; i < 2; ++i)
+		{
+			const bool useTaskShader = (i == 0);
+			ParamsPtr paramsPtr (new MiscTestParams);
+			paramsPtr->taskCount		= (useTaskShader ? tcu::just(1u) : tcu::nothing<uint32_t>());
+			paramsPtr->meshCount		= 1u;
+			paramsPtr->width			= 1u;
+			paramsPtr->height			= 1u;
+			const std::string shader	= (useTaskShader ? "task" : "mesh");
+			const std::string name		= "barrier_in_" + shader;
+			const std::string desc		= "Use a control barrier in the " + shader + " shader";
+			miscTests->addChild(new SimpleBarrierCase(testCtx, name, desc, std::move(paramsPtr)));
+		}
+	}
+	{
+		const struct
+		{
+			MemoryBarrierType	memBarrierType;
+			std::string			caseName;
+		} barrierTypes[] =
+		{
+			{ MemoryBarrierType::SHARED,	"memory_barrier_shared"	},
+			{ MemoryBarrierType::GROUP,		"group_memory_barrier"	},
+		};
+		for (const auto& barrierCase : barrierTypes)
+		{
+			for (int i = 0; i < 2; ++i)
+			{
+				const bool useTaskShader = (i == 0);
+				std::unique_ptr<MemoryBarrierParams> paramsPtr (new MemoryBarrierParams);
+				paramsPtr->taskCount		= (useTaskShader ? tcu::just(1u) : tcu::nothing<uint32_t>());
+				paramsPtr->meshCount		= 1u;
+				paramsPtr->width			= 1u;
+				paramsPtr->height			= 1u;
+				paramsPtr->memBarrierType	= barrierCase.memBarrierType;
+				const std::string shader	= (useTaskShader ? "task" : "mesh");
+				const std::string name		= barrierCase.caseName + "_in_" + shader;
+				const std::string desc		= "Use " + paramsPtr->glslFunc() + "() in the " + shader + " shader";
+				miscTests->addChild(new MemoryBarrierCase(testCtx, name, desc, std::move(paramsPtr)));
+			}
+		}
+	}
+	{
+		for (int i = 0; i < 2; ++i)
+		{
+			const bool useTaskShader	= (i > 0);
+			const auto name				= std::string("custom_attributes") + (useTaskShader ? "_and_task_shader" : "");
+			const auto desc				= std::string("Use several custom vertex and primitive attributes") + (useTaskShader ? " and also a task shader" : "");
+			ParamsPtr paramsPtr (new MiscTestParams);
+			paramsPtr->taskCount		= (useTaskShader ? tcu::just(1u) : tcu::nothing<uint32_t>());
+			paramsPtr->meshCount		= 1u;
+			paramsPtr->width			= 32u;
+			paramsPtr->height			= 32u;
+			miscTests->addChild(new CustomAttributesCase(testCtx, name, desc, std::move(paramsPtr)));
+		}
+	}
+	{
+		for (int i = 0; i < 2; ++i)
+		{
+			const bool useTaskShader	= (i > 0);
+			const auto name				= std::string("push_constant") + (useTaskShader ? "_and_task_shader" : "");
+			const auto desc				= std::string("Use push constants in the mesh shader stage") + (useTaskShader ? " and also in the task shader stage" : "");
+			ParamsPtr paramsPtr (new MiscTestParams);
+			paramsPtr->taskCount		= (useTaskShader ? tcu::just(1u) : tcu::nothing<uint32_t>());
+			paramsPtr->meshCount		= 1u;
+			paramsPtr->width			= 16u;
+			paramsPtr->height			= 16u;
+			miscTests->addChild(new PushConstantCase(testCtx, name, desc, std::move(paramsPtr)));
+		}
+	}
+	return miscTests.release();
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderMiscTests.hpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderMiscTests.hpp
new file mode 100644
index 0000000..b141a90
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderMiscTests.hpp
@@ -0,0 +1,38 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Misc Tests
+ *//*--------------------------------------------------------------------*/
+#include "tcuDefs.hpp"
+#include "tcuTestCase.hpp"
+namespace vkt
+namespace MeshShader
+tcu::TestCaseGroup* createMeshShaderMiscTests (tcu::TestContext& testCtx);
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderPropertyTests.cpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderPropertyTests.cpp
new file mode 100644
index 0000000..f4350be
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderPropertyTests.cpp
@@ -0,0 +1,675 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Property Tests
+ *//*--------------------------------------------------------------------*/
+#include "vktMeshShaderPropertyTests.hpp"
+#include "vktTestCase.hpp"
+#include "vkBufferWithMemory.hpp"
+#include "vkObjUtil.hpp"
+#include "vkBuilderUtil.hpp"
+#include "vkTypeUtil.hpp"
+#include "vkCmdUtil.hpp"
+#include "vkBarrierUtil.hpp"
+#include "tcuStringTemplate.hpp"
+#include <vector>
+#include <string>
+#include <map>
+#include <sstream>
+namespace vkt
+namespace MeshShader
+using GroupPtr			= de::MovePtr<tcu::TestCaseGroup>;
+using ReplacementsMap	= std::map<std::string, std::string>;
+using namespace vk;
+tcu::StringTemplate getTaskShaderTemplate ()
+	return tcu::StringTemplate(
+		"#version 460\n"
+		"#extension GL_NV_mesh_shader : enable\n"
+		"\n"
+		"layout (local_size_x=${TASK_LOCAL_SIZE_X:default=1}) in;\n"
+		"\n"
+		"${TASK_GLOBAL_DECL:opt}"
+		"\n"
+		"\n"
+		"void main ()\n"
+		"{\n"
+		"    gl_TaskCountNV = ${TASK_TASK_COUNT:default=0};\n"
+		"${TASK_BODY:opt}"
+		"}\n");
+tcu::StringTemplate getMeshShaderTemplate ()
+	return tcu::StringTemplate(
+		"#version 460\n"
+		"#extension GL_NV_mesh_shader : enable\n"
+		"\n"
+		"layout (local_size_x=${MESH_LOCAL_SIZE_X:default=1}) in;\n"
+		"layout (triangles) out;\n"
+		"layout (max_vertices=3, max_primitives=1) out;\n"
+		"\n"
+		"${MESH_GLOBAL_DECL:opt}"
+		"\n"
+		"\n"
+		"void main ()\n"
+		"{\n"
+		"    gl_PrimitiveCountNV = 0u;\n"
+		"${MESH_BODY:opt}"
+		"}\n");
+std::string getCommonStorageBufferDecl ()
+	return "layout (set=0, binding=0) buffer OutputBlock { uint values[]; } ov;\n";
+void genericCheckSupport (Context& context, bool taskShaderNeeded)
+	context.requireDeviceFunctionality("VK_NV_mesh_shader");
+	const auto& meshFeatures = context.getMeshShaderFeatures();
+	if (!meshFeatures.meshShader)
+		TCU_THROW(NotSupportedError, "Mesh shaders not supported");
+	if (taskShaderNeeded && !meshFeatures.taskShader)
+		TCU_THROW(NotSupportedError, "Task shaders not supported");
+	const auto& features = context.getDeviceFeatures();
+	if (!features.vertexPipelineStoresAndAtomics)
+		TCU_THROW(NotSupportedError, "Vertex pipeline stores and atomics not supported");
+struct InstanceParams
+	uint32_t	bufferElements;
+	uint32_t	taskCount;
+class MeshShaderPropertyInstance : public vkt::TestInstance
+					MeshShaderPropertyInstance	(Context& context, const InstanceParams& params)
+						: vkt::TestInstance	(context)
+						, m_params			(params)
+						{}
+	virtual			~MeshShaderPropertyInstance	(void) {}
+	tcu::TestStatus	iterate						() override;
+	InstanceParams	m_params;
+tcu::TestStatus MeshShaderPropertyInstance::iterate ()
+	const auto&		vkd			= m_context.getDeviceInterface();
+	const auto		device		= m_context.getDevice();
+	auto&			alloc		= m_context.getDefaultAllocator();
+	const auto		queueIndex	= m_context.getUniversalQueueFamilyIndex();
+	const auto		queue		= m_context.getUniversalQueue();
+	const auto&		binaries	= m_context.getBinaryCollection();
+	const auto		extent		= makeExtent3D(1u, 1u, 1u);
+	const auto		bindPoint	= VK_PIPELINE_BIND_POINT_GRAPHICS;
+	const auto		useTask		= binaries.contains("task");
+	const auto		storageBufferSize	= static_cast<VkDeviceSize>(m_params.bufferElements) * static_cast<VkDeviceSize>(sizeof(uint32_t));
+	const auto		storageBufferUsage	= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+	const auto		storageBufferType	= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+	const auto		storageBufferStages	= (VK_SHADER_STAGE_MESH_BIT_NV | (useTask ? VK_SHADER_STAGE_TASK_BIT_NV : 0));
+	// Create storage buffer with the required space.
+	const auto			storageBufferInfo		= makeBufferCreateInfo(storageBufferSize, storageBufferUsage);
+	BufferWithMemory	storageBuffer			(vkd, device, alloc, storageBufferInfo, MemoryRequirement::HostVisible);
+	auto&				storageBufferAlloc		= storageBuffer.getAllocation();
+	void*				storageBufferDataPtr	= storageBufferAlloc.getHostPtr();
+	const auto			storageBufferDescInfo	= makeDescriptorBufferInfo(storageBuffer.get(), 0ull, storageBufferSize);
+	deMemset(storageBufferDataPtr, 0xFF, static_cast<size_t>(storageBufferSize));
+	flushAlloc(vkd, device, storageBufferAlloc);
+	// Descriptor pool.
+	DescriptorPoolBuilder poolBuilder;
+	poolBuilder.addType(storageBufferType);
+	const auto descriptorPool =, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+	// Descriptor set layout and pipeline layout.
+	DescriptorSetLayoutBuilder layoutBuilder;
+	layoutBuilder.addSingleBinding(storageBufferType, storageBufferStages);
+	const auto setLayout		=, device);
+	const auto pipelineLayout	= makePipelineLayout(vkd, device, setLayout.get());
+	// Allocate and prepare descriptor set.
+	const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
+	DescriptorSetUpdateBuilder setUpdateBuilder;
+	setUpdateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), storageBufferType, &storageBufferDescInfo);
+	setUpdateBuilder.update(vkd, device);
+	// Create empty render pass and framebuffer.
+	const auto renderPass	= makeRenderPass(vkd, device);
+	const auto framebuffer	= makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, extent.width, extent.height);
+	// Shader modules and pipeline.
+	Move<VkShaderModule>		taskModule;
+	Move<VkShaderModule>		meshModule;
+	const Move<VkShaderModule>	fragModule;	// No fragment shader.
+	if (useTask)
+		taskModule = createShaderModule(vkd, device, binaries.get("task"));
+	meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
+	const std::vector<VkViewport>	viewports	(1u, makeViewport(extent));
+	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(extent));
+	const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
+		taskModule.get(), meshModule.get(), fragModule.get(),
+		renderPass.get(), viewports, scissors);
+	// Command pool and buffer.
+	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
+	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+	const auto cmdBuffer	= cmdBufferPtr.get();
+	// Run the pipeline.
+	beginCommandBuffer(vkd, cmdBuffer);
+	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(),;
+	vkd.cmdBindDescriptorSets(cmdBuffer, bindPoint, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
+	vkd.cmdBindPipeline(cmdBuffer, bindPoint, pipeline.get());
+	vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params.taskCount, 0u);
+	endRenderPass(vkd, cmdBuffer);
+	const auto shaderToHostBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &shaderToHostBarrier, 0u, nullptr, 0u, nullptr);
+	endCommandBuffer(vkd, cmdBuffer);
+	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
+	// Verify the storage buffer has the expected results.
+	invalidateAlloc(vkd, device, storageBufferAlloc);
+	std::vector<uint32_t> bufferData (m_params.bufferElements);
+	deMemcpy(, storageBufferDataPtr, de::dataSize(bufferData));
+	for (size_t idx = 0u; idx < bufferData.size(); ++idx)
+	{
+		const auto	expected	= static_cast<uint32_t>(idx);
+		const auto&	bufferValue	= bufferData[idx];
+		if (bufferValue != expected)
+			TCU_FAIL("Unexpected value found in buffer position " + de::toString(idx) + ": " + de::toString(bufferValue));
+	}
+	return tcu::TestStatus::pass("Pass");
+class MaxDrawMeshTasksCountCase : public vkt::TestCase
+	enum class TestType { TASK=0, MESH };
+					MaxDrawMeshTasksCountCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, TestType testType)
+						: vkt::TestCase	(testCtx, name, description)
+						, m_testType	(testType)
+						{}
+	virtual			~MaxDrawMeshTasksCountCase	(void) {}
+	void			initPrograms	(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance	(Context& context) const override;
+	void			checkSupport	(Context& context) const override;
+	static constexpr uint32_t minLimit = 65535u;
+	TestType		m_testType;
+void MaxDrawMeshTasksCountCase::checkSupport (Context& context) const
+	genericCheckSupport(context, (m_testType == TestType::TASK));
+	const auto& properties = context.getMeshShaderProperties();
+	if (properties.maxDrawMeshTasksCount < minLimit)
+		TCU_FAIL("maxDrawMeshTasksCount property below the minimum limit");
+TestInstance* MaxDrawMeshTasksCountCase::createInstance (Context& context) const
+	const InstanceParams params =
+	{
+		minLimit,						//	uint32_t	bufferElements;
+		minLimit,						//	uint32_t	taskCount;
+	};
+	return new MeshShaderPropertyInstance(context, params);
+void MaxDrawMeshTasksCountCase::initPrograms (vk::SourceCollections& programCollection) const
+	ReplacementsMap meshReplacements;
+	ReplacementsMap taskReplacements;
+	const auto meshTemplate = getMeshShaderTemplate();
+	const std::string desc = getCommonStorageBufferDecl();
+	const std::string body = "    ov.values[gl_WorkGroupID.x] = gl_WorkGroupID.x;\n";
+	if (m_testType == TestType::TASK)
+	{
+		const auto taskTemplate = getTaskShaderTemplate();
+		taskReplacements["TASK_GLOBAL_DECL"]	= desc;
+		taskReplacements["TASK_BODY"]			= body;
+		programCollection.glslSources.add("task") << glu::TaskSource(taskTemplate.specialize(taskReplacements));
+	}
+	else
+	{
+		meshReplacements["MESH_GLOBAL_DECL"]	= desc;
+		meshReplacements["MESH_BODY"]			= body;
+	}
+	programCollection.glslSources.add("mesh") << glu::MeshSource(meshTemplate.specialize(meshReplacements));
+class MaxTaskWorkGroupInvocationsCase : public vkt::TestCase
+					MaxTaskWorkGroupInvocationsCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+						: vkt::TestCase	(testCtx, name, description) {}
+	virtual			~MaxTaskWorkGroupInvocationsCase (void) {}
+	void			initPrograms	(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance	(Context& context) const override;
+	void			checkSupport	(Context& context) const override;
+	static constexpr uint32_t minLimit = 32u;
+void MaxTaskWorkGroupInvocationsCase::checkSupport (Context& context) const
+	genericCheckSupport(context, true/*taskShaderNeeded*/);
+	const auto& properties = context.getMeshShaderProperties();
+	if (properties.maxTaskWorkGroupInvocations < minLimit)
+		TCU_FAIL("maxTaskWorkGroupInvocations property below the minimum limit");
+TestInstance* MaxTaskWorkGroupInvocationsCase::createInstance (Context& context) const
+	const InstanceParams params =
+	{
+		minLimit,	//	uint32_t	bufferElements;
+		1u,			//	uint32_t	taskCount;
+	};
+	return new MeshShaderPropertyInstance(context, params);
+void MaxTaskWorkGroupInvocationsCase::initPrograms (vk::SourceCollections& programCollection) const
+	const ReplacementsMap	meshReplacements;
+	const auto				meshTemplate		= getMeshShaderTemplate();
+	programCollection.glslSources.add("mesh") << glu::MeshSource(meshTemplate.specialize(meshReplacements));
+	ReplacementsMap	taskReplacements;
+	const auto		taskTemplate		= getTaskShaderTemplate();
+	taskReplacements["TASK_GLOBAL_DECL"]	= getCommonStorageBufferDecl();
+	taskReplacements["TASK_BODY"]			= "    ov.values[gl_LocalInvocationID.x] = gl_LocalInvocationID.x;\n";
+	taskReplacements["TASK_LOCAL_SIZE_X"]	= de::toString(uint32_t{minLimit});
+	programCollection.glslSources.add("task") << glu::TaskSource(taskTemplate.specialize(taskReplacements));
+// In the case of the NV extension, this is very similar to the test above. Added for completion.
+class MaxTaskWorkGroupSizeCase : public MaxTaskWorkGroupInvocationsCase
+	MaxTaskWorkGroupSizeCase (tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+		: MaxTaskWorkGroupInvocationsCase (testCtx, name, description) {}
+	void checkSupport (Context& context) const override;
+	static constexpr uint32_t minSizeX = 32u;
+	static constexpr uint32_t minSizeY = 1u;
+	static constexpr uint32_t minSizeZ = 1u;
+void MaxTaskWorkGroupSizeCase::checkSupport (Context& context) const
+	genericCheckSupport(context, true/*taskShaderNeeded*/);
+	const auto& properties = context.getMeshShaderProperties();
+	if (properties.maxTaskWorkGroupSize[0] < minSizeX ||
+		properties.maxTaskWorkGroupSize[1] < minSizeY ||
+		properties.maxTaskWorkGroupSize[2] < minSizeZ)
+	{
+		TCU_FAIL("maxTaskWorkGroupSize property below the minimum limit");
+	}
+class MaxTaskOutputCountCase : public vkt::TestCase
+					MaxTaskOutputCountCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+						: vkt::TestCase	(testCtx, name, description) {}
+	virtual			~MaxTaskOutputCountCase (void) {}
+	void			initPrograms	(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance	(Context& context) const override;
+	void			checkSupport	(Context& context) const override;
+	static constexpr uint32_t minLimit = 65535u;
+void MaxTaskOutputCountCase::checkSupport (Context& context) const
+	genericCheckSupport(context, true/*taskShaderNeeded*/);
+	const auto& properties = context.getMeshShaderProperties();
+	if (properties.maxTaskOutputCount < minLimit)
+		TCU_FAIL("maxTaskOutputCount property below the minimum limit");
+TestInstance* MaxTaskOutputCountCase::createInstance (Context& context) const
+	const InstanceParams params =
+	{
+		minLimit,	//	uint32_t	bufferElements;
+		1u,			//	uint32_t	taskCount;
+	};
+	return new MeshShaderPropertyInstance(context, params);
+void MaxTaskOutputCountCase::initPrograms (vk::SourceCollections& programCollection) const
+	ReplacementsMap	meshReplacements;
+	ReplacementsMap	taskReplacements;
+	const auto		meshTemplate		= getMeshShaderTemplate();
+	const auto		taskTemplate		= getTaskShaderTemplate();
+	taskReplacements["TASK_TASK_COUNT"]		= de::toString(uint32_t{minLimit});
+	meshReplacements["MESH_GLOBAL_DECL"]	= getCommonStorageBufferDecl();
+	meshReplacements["MESH_BODY"]			= "    ov.values[gl_WorkGroupID.x] = gl_WorkGroupID.x;\n";
+	programCollection.glslSources.add("task") << glu::TaskSource(taskTemplate.specialize(taskReplacements));
+	programCollection.glslSources.add("mesh") << glu::MeshSource(meshTemplate.specialize(meshReplacements));
+class MaxMeshWorkGroupInvocationsCase : public vkt::TestCase
+					MaxMeshWorkGroupInvocationsCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+						: vkt::TestCase	(testCtx, name, description) {}
+	virtual			~MaxMeshWorkGroupInvocationsCase (void) {}
+	void			initPrograms	(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance	(Context& context) const override;
+	void			checkSupport	(Context& context) const override;
+	static constexpr uint32_t minLimit = 32u;
+void MaxMeshWorkGroupInvocationsCase::checkSupport (Context& context) const
+	genericCheckSupport(context, false/*taskShaderNeeded*/);
+	const auto& properties = context.getMeshShaderProperties();
+	if (properties.maxMeshWorkGroupInvocations < minLimit)
+		TCU_FAIL("maxMeshWorkGroupInvocations property below the minimum limit");
+TestInstance* MaxMeshWorkGroupInvocationsCase::createInstance (Context& context) const
+	const InstanceParams params =
+	{
+		minLimit,	//	uint32_t	bufferElements;
+		1u,			//	uint32_t	taskCount;
+	};
+	return new MeshShaderPropertyInstance(context, params);
+void MaxMeshWorkGroupInvocationsCase::initPrograms (vk::SourceCollections& programCollection) const
+	ReplacementsMap	meshReplacements;
+	const auto		meshTemplate		= getMeshShaderTemplate();
+	meshReplacements["MESH_LOCAL_SIZE_X"]	= de::toString(uint32_t{minLimit});
+	meshReplacements["MESH_GLOBAL_DECL"]	= getCommonStorageBufferDecl();
+	meshReplacements["MESH_BODY"]			= "    ov.values[gl_LocalInvocationID.x] = gl_LocalInvocationID.x;\n";
+	programCollection.glslSources.add("mesh") << glu::MeshSource(meshTemplate.specialize(meshReplacements));
+// In the case of the NV extension, this is very similar to the test above. Added for completion.
+class MaxMeshWorkGroupSizeCase : public MaxMeshWorkGroupInvocationsCase
+	MaxMeshWorkGroupSizeCase (tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+		: MaxMeshWorkGroupInvocationsCase (testCtx, name, description) {}
+	void checkSupport (Context& context) const override;
+	static constexpr uint32_t minSizeX = 32u;
+	static constexpr uint32_t minSizeY = 1u;
+	static constexpr uint32_t minSizeZ = 1u;
+void MaxMeshWorkGroupSizeCase::checkSupport (Context& context) const
+	genericCheckSupport(context, false/*taskShaderNeeded*/);
+	const auto& properties = context.getMeshShaderProperties();
+	if (properties.maxMeshWorkGroupSize[0] < minSizeX ||
+		properties.maxMeshWorkGroupSize[1] < minSizeY ||
+		properties.maxMeshWorkGroupSize[2] < minSizeZ)
+	{
+		TCU_FAIL("maxMeshWorkGroupSize property below the minimum limit");
+	}
+std::string getSharedArrayDecl (uint32_t numElements)
+	std::ostringstream decl;
+	decl
+		<< "const uint arrayElements = " << de::toString(numElements) << ";\n"
+		<< "shared uint sharedArray[arrayElements];\n"
+		;
+	return decl.str();
+std::string getSharedMemoryBody (uint32_t localSize)
+	std::ostringstream body;
+	body
+		<< "\n"
+		<< "    if (gl_LocalInvocationID.x == 0u)\n"
+		<< "    {\n"
+		<< "        for (uint i = 0; i < arrayElements; ++i)\n"
+		<< "            sharedArray[i] = 0u;\n"
+		<< "    }\n"
+		<< "\n"
+		<< "    barrier();\n"
+		<< "\n"
+		<< "    for (uint i = 0; i < arrayElements; ++i)\n"
+		<< "        atomicAdd(sharedArray[i], 1u);\n"
+		<< "\n"
+		<< "    barrier();\n"
+		<< "\n"
+		<< "    uint allGood = 1u;\n"
+		<< "    for (uint i = 0; i < arrayElements; ++i)\n"
+		<< "    {\n"
+		<< "        if (sharedArray[i] != " << localSize << ")\n"
+		<< "        {\n"
+		<< "            allGood = 0u;\n"
+		<< "            break;\n"
+		<< "        }\n"
+		<< "    }\n"
+		<< "\n"
+		<< "    ov.values[gl_LocalInvocationID.x] = ((allGood == 1u) ? gl_LocalInvocationID.x : gl_WorkGroupSize.x);\n"
+		;
+	return body.str();
+class MaxTaskTotalMemorySizeCase : public vkt::TestCase
+					MaxTaskTotalMemorySizeCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+						: vkt::TestCase	(testCtx, name, description) {}
+	virtual			~MaxTaskTotalMemorySizeCase (void) {}
+	void			initPrograms	(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance	(Context& context) const override;
+	void			checkSupport	(Context& context) const override;
+	static constexpr uint32_t localSize	= 32u;
+	static constexpr uint32_t minLimit	= 16384u;
+TestInstance* MaxTaskTotalMemorySizeCase::createInstance (Context& context) const
+	const InstanceParams params =
+	{
+		localSize,	//	uint32_t	bufferElements;
+		1u,			//	uint32_t	taskCount;
+	};
+	return new MeshShaderPropertyInstance(context, params);
+void MaxTaskTotalMemorySizeCase::checkSupport (Context& context) const
+	genericCheckSupport(context, true/*taskShaderNeeded*/);
+	const auto& properties = context.getMeshShaderProperties();
+	if (properties.maxTaskTotalMemorySize < minLimit)
+		TCU_FAIL("maxTaskTotalMemorySize property below the minimum limit");
+void MaxTaskTotalMemorySizeCase::initPrograms (vk::SourceCollections& programCollection) const
+	const ReplacementsMap	meshReplacements;
+	const auto				meshTemplate		= getMeshShaderTemplate();
+	programCollection.glslSources.add("mesh") << glu::MeshSource(meshTemplate.specialize(meshReplacements));
+	const auto taskTemplate		= getTaskShaderTemplate();
+	const auto arrayElements	= minLimit / static_cast<uint32_t>(sizeof(uint32_t));
+	const auto globalDecls		= getCommonStorageBufferDecl() + getSharedArrayDecl(arrayElements);
+	const auto body				= getSharedMemoryBody(localSize);
+	ReplacementsMap taskReplacements;
+	taskReplacements["TASK_LOCAL_SIZE_X"]	= de::toString(uint32_t{localSize});
+	taskReplacements["TASK_GLOBAL_DECL"]	= globalDecls;
+	taskReplacements["TASK_BODY"]			= body;
+	programCollection.glslSources.add("task") << glu::TaskSource(taskTemplate.specialize(taskReplacements));
+// Very similar to the previous one in NV.
+class MaxMeshTotalMemorySizeCase : public vkt::TestCase
+					MaxMeshTotalMemorySizeCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description)
+						: vkt::TestCase	(testCtx, name, description) {}
+	virtual			~MaxMeshTotalMemorySizeCase (void) {}
+	void			initPrograms	(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance	(Context& context) const override;
+	void			checkSupport	(Context& context) const override;
+	static constexpr uint32_t localSize	= 32u;
+	static constexpr uint32_t minLimit	= 16384u;
+TestInstance* MaxMeshTotalMemorySizeCase::createInstance (Context& context) const
+	const InstanceParams params =
+	{
+		localSize,	//	uint32_t	bufferElements;
+		1u,			//	uint32_t	taskCount;
+	};
+	return new MeshShaderPropertyInstance(context, params);
+void MaxMeshTotalMemorySizeCase::checkSupport (Context& context) const
+	genericCheckSupport(context, false/*taskShaderNeeded*/);
+	const auto& properties = context.getMeshShaderProperties();
+	if (properties.maxMeshTotalMemorySize < minLimit)
+		TCU_FAIL("maxMeshTotalMemorySize property below the minimum limit");
+void MaxMeshTotalMemorySizeCase::initPrograms (vk::SourceCollections& programCollection) const
+	const auto meshTemplate		= getMeshShaderTemplate();
+	const auto arrayElements	= minLimit / static_cast<uint32_t>(sizeof(uint32_t));
+	const auto globalDecls		= getCommonStorageBufferDecl() + getSharedArrayDecl(arrayElements);
+	const auto body				= getSharedMemoryBody(localSize);
+	ReplacementsMap meshReplacements;
+	meshReplacements["MESH_LOCAL_SIZE_X"]	= de::toString(uint32_t{localSize});
+	meshReplacements["MESH_GLOBAL_DECL"]	= globalDecls;
+	meshReplacements["MESH_BODY"]			= body;
+	programCollection.glslSources.add("mesh") << glu::MeshSource(meshTemplate.specialize(meshReplacements));
+tcu::TestCaseGroup* createMeshShaderPropertyTests (tcu::TestContext& testCtx)
+	GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "property", "Mesh Shader Property Tests"));
+	mainGroup->addChild(new MaxDrawMeshTasksCountCase		(testCtx, "max_draw_mesh_tasks_count_with_task",	"", MaxDrawMeshTasksCountCase::TestType::TASK));
+	mainGroup->addChild(new MaxDrawMeshTasksCountCase		(testCtx, "max_draw_mesh_tasks_count_with_mesh",	"", MaxDrawMeshTasksCountCase::TestType::MESH));
+	mainGroup->addChild(new MaxTaskWorkGroupInvocationsCase	(testCtx, "max_task_work_group_invocations",		""));
+	mainGroup->addChild(new MaxTaskWorkGroupSizeCase		(testCtx, "max_task_work_group_size",				""));
+	mainGroup->addChild(new MaxTaskOutputCountCase			(testCtx, "max_task_output_count",					""));
+	mainGroup->addChild(new MaxMeshWorkGroupInvocationsCase	(testCtx, "max_mesh_work_group_invocations",		""));
+	mainGroup->addChild(new MaxMeshWorkGroupSizeCase		(testCtx, "max_mesh_work_group_size",				""));
+	mainGroup->addChild(new MaxTaskTotalMemorySizeCase		(testCtx, "max_task_total_memory_size",				""));
+	mainGroup->addChild(new MaxMeshTotalMemorySizeCase		(testCtx, "max_mesh_total_memory_size",				""));
+	return mainGroup.release();
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderPropertyTests.hpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderPropertyTests.hpp
new file mode 100644
index 0000000..9e8b7ea
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderPropertyTests.hpp
@@ -0,0 +1,38 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Property Tests
+ *//*--------------------------------------------------------------------*/
+#include "tcuDefs.hpp"
+#include "tcuTestCase.hpp"
+namespace vkt
+namespace MeshShader
+tcu::TestCaseGroup* createMeshShaderPropertyTests (tcu::TestContext& testCtx);
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSmokeTests.cpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSmokeTests.cpp
new file mode 100644
index 0000000..c39959a
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSmokeTests.cpp
@@ -0,0 +1,558 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Smoke Tests
+ *//*--------------------------------------------------------------------*/
+#include "vktMeshShaderSmokeTests.hpp"
+#include "vktTestCase.hpp"
+#include "vkBuilderUtil.hpp"
+#include "vkImageWithMemory.hpp"
+#include "vkBufferWithMemory.hpp"
+#include "vkObjUtil.hpp"
+#include "vkTypeUtil.hpp"
+#include "vkCmdUtil.hpp"
+#include "vkImageUtil.hpp"
+#include "tcuImageCompare.hpp"
+#include <utility>
+#include <vector>
+#include <string>
+#include <sstream>
+namespace vkt
+namespace MeshShader
+using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
+using namespace vk;
+void checkTaskMeshShaderSupport (Context& context, bool requireTask, bool requireMesh)
+	context.requireDeviceFunctionality("VK_NV_mesh_shader");
+	DE_ASSERT(requireTask || requireMesh);
+	const auto& meshFeatures = context.getMeshShaderFeatures();
+	if (requireTask && !meshFeatures.taskShader)
+		TCU_THROW(NotSupportedError, "Task shader not supported");
+	if (requireMesh && !meshFeatures.meshShader)
+		TCU_THROW(NotSupportedError, "Mesh shader not supported");
+std::string commonMeshFragShader ()
+	std::string frag =
+		"#version 450\n"
+		"#extension GL_NV_mesh_shader : enable\n"
+		"\n"
+		"layout (location=0) in perprimitiveNV vec4 triangleColor;\n"
+		"layout (location=0) out vec4 outColor;\n"
+		"\n"
+		"void main ()\n"
+		"{\n"
+		"	outColor = triangleColor;\n"
+		"}\n"
+		;
+	return frag;
+struct MeshTriangleRendererParams
+	std::vector<tcu::Vec4>	vertexCoords;
+	std::vector<uint32_t>	vertexIndices;
+	uint32_t				taskCount;
+	tcu::Vec4				expectedColor;
+	MeshTriangleRendererParams (std::vector<tcu::Vec4> vertexCoords_, std::vector<uint32_t>	vertexIndices_, uint32_t taskCount_, const tcu::Vec4& expectedColor_)
+		: vertexCoords	(std::move(vertexCoords_))
+		, vertexIndices	(std::move(vertexIndices_))
+		, taskCount		(taskCount_)
+		, expectedColor	(expectedColor_)
+	{}
+	MeshTriangleRendererParams (MeshTriangleRendererParams&& other)
+		: MeshTriangleRendererParams (std::move(other.vertexCoords), std::move(other.vertexIndices), other.taskCount, other.expectedColor)
+	{}
+class MeshOnlyTriangleCase : public vkt::TestCase
+					MeshOnlyTriangleCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description) : vkt::TestCase (testCtx, name, description) {}
+	virtual			~MeshOnlyTriangleCase	(void) {}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+	void			checkSupport			(Context& context) const override;
+class MeshTaskTriangleCase : public vkt::TestCase
+					MeshTaskTriangleCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description) : vkt::TestCase (testCtx, name, description) {}
+	virtual			~MeshTaskTriangleCase	(void) {}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+	void			checkSupport			(Context& context) const override;
+// Note: not actually task-only. The task shader will not emit mesh shader work groups.
+class TaskOnlyTriangleCase : public vkt::TestCase
+					TaskOnlyTriangleCase	(tcu::TestContext& testCtx, const std::string& name, const std::string& description) : vkt::TestCase (testCtx, name, description) {}
+	virtual			~TaskOnlyTriangleCase	(void) {}
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+	void			checkSupport			(Context& context) const override;
+class MeshTriangleRenderer : public vkt::TestInstance
+						MeshTriangleRenderer	(Context& context, MeshTriangleRendererParams params) : vkt::TestInstance(context), m_params(std::move(params)) {}
+	virtual				~MeshTriangleRenderer	(void) {}
+	tcu::TestStatus		iterate					(void) override;
+	MeshTriangleRendererParams	m_params;
+void MeshOnlyTriangleCase::checkSupport (Context& context) const
+	checkTaskMeshShaderSupport(context, false, true);
+void MeshTaskTriangleCase::checkSupport (Context& context) const
+	checkTaskMeshShaderSupport(context, true, true);
+void TaskOnlyTriangleCase::checkSupport (Context& context) const
+	checkTaskMeshShaderSupport(context, true, true);
+void MeshOnlyTriangleCase::initPrograms (SourceCollections& dst) const
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		// We will actually output a single triangle and most invocations will do no work.
+		<< "layout(local_size_x=32) in;\n"
+		<< "layout(triangles) out;\n"
+		<< "layout(max_vertices=256, max_primitives=256) out;\n"
+		<< "\n"
+		// Unique vertex coordinates.
+		<< "layout (set=0, binding=0) uniform CoordsBuffer {\n"
+		<< "    vec4 coords[3];\n"
+		<< "} cb;\n"
+		// Unique vertex indices.
+		<< "layout (set=0, binding=1, std430) readonly buffer IndexBuffer {\n"
+		<< "    uint indices[3];\n"
+		<< "} ib;\n"
+		<< "\n"
+		// Triangle color.
+		<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    gl_PrimitiveCountNV = 1u;\n"
+		<< "    triangleColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
+		<< "\n"
+		<< "    const uint vertex = gl_LocalInvocationIndex;\n"
+		<< "    if (vertex < 3u)\n"
+		<< "    {\n"
+		<< "        const uint vertexIndex = ib.indices[vertex];\n"
+		<< "        gl_PrimitiveIndicesNV[vertex] = vertexIndex;\n"
+		<< "        gl_MeshVerticesNV[vertexIndex].gl_Position = cb.coords[vertexIndex];\n"
+		<< "    }\n"
+		<< "}\n"
+		;
+	dst.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	dst.glslSources.add("frag") << glu::FragmentSource(commonMeshFragShader());
+void MeshTaskTriangleCase::initPrograms (SourceCollections& dst) const
+	std::string taskDataDecl =
+		"taskNV TaskData {\n"
+		"	uint triangleIndex;\n"
+		"} td;\n"
+		;
+	std::ostringstream task;
+	task
+		// Each work group spawns 1 task each (2 in total) and each task will draw 1 triangle.
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout(local_size_x=32) in;\n"
+		<< "\n"
+		<< "out " << taskDataDecl
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    if (gl_LocalInvocationIndex == 0u)\n"
+		<< "    {\n"
+		<< "        gl_TaskCountNV = 1u;\n"
+		<< "        td.triangleIndex = gl_WorkGroupID.x;\n"
+		<< "    }\n"
+		<< "}\n"
+		;
+	dst.glslSources.add("task") << glu::TaskSource(task.str());
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		// We will actually output a single triangle and most invocations will do no work.
+		<< "layout(local_size_x=32) in;\n"
+		<< "layout(triangles) out;\n"
+		<< "layout(max_vertices=256, max_primitives=256) out;\n"
+		<< "\n"
+		// Unique vertex coordinates.
+		<< "layout (set=0, binding=0) uniform CoordsBuffer {\n"
+		<< "    vec4 coords[4];\n"
+		<< "} cb;\n"
+		// Unique vertex indices.
+		<< "layout (set=0, binding=1, std430) readonly buffer IndexBuffer {\n"
+		<< "    uint indices[6];\n"
+		<< "} ib;\n"
+		<< "\n"
+		// Triangle color.
+		<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
+		<< "\n"
+		<< "in " << taskDataDecl
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    if (gl_LocalInvocationIndex == 0u)\n"
+		<< "    {\n"
+		<< "        gl_PrimitiveCountNV = 1u;\n"
+		<< "        triangleColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
+		<< "    }\n"
+		<< "\n"
+		// Each "active" invocation will copy one vertex.
+		<< "    if (gl_LocalInvocationIndex < 3u)\n"
+		<< "    {\n"
+		<< "\n"
+		<< "        const uint triangleVertex = gl_LocalInvocationIndex;\n"
+		<< "        const uint coordsIndex    = ib.indices[td.triangleIndex * 3u + triangleVertex];\n"
+		<< "\n"
+		// Copy vertex coordinates.
+		<< "        gl_MeshVerticesNV[triangleVertex].gl_Position = cb.coords[coordsIndex];\n"
+		// Index renumbering: final indices will always be 0, 1, 2.
+		<< "        gl_PrimitiveIndicesNV[triangleVertex] = triangleVertex;\n"
+		<< "    }\n"
+		<< "}\n"
+		;
+	dst.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	dst.glslSources.add("frag") << glu::FragmentSource(commonMeshFragShader());
+void TaskOnlyTriangleCase::initPrograms (SourceCollections& dst) const
+	// The task shader does not spawn any mesh shader invocations.
+	std::ostringstream task;
+	task
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout(local_size_x=1) in;\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    gl_TaskCountNV = 0u;\n"
+		<< "}\n"
+		;
+	dst.glslSources.add("task") << glu::TaskSource(task.str());
+	// Same shader as the mesh only case, but it should not be launched.
+	std::ostringstream mesh;
+	mesh
+		<< "#version 450\n"
+		<< "#extension GL_NV_mesh_shader : enable\n"
+		<< "\n"
+		<< "layout(local_size_x=32) in;\n"
+		<< "layout(triangles) out;\n"
+		<< "layout(max_vertices=256, max_primitives=256) out;\n"
+		<< "\n"
+		<< "layout (set=0, binding=0) uniform CoordsBuffer {\n"
+		<< "    vec4 coords[3];\n"
+		<< "} cb;\n"
+		<< "layout (set=0, binding=1, std430) readonly buffer IndexBuffer {\n"
+		<< "    uint indices[3];\n"
+		<< "} ib;\n"
+		<< "\n"
+		<< "layout (location=0) out perprimitiveNV vec4 triangleColor[];\n"
+		<< "\n"
+		<< "void main ()\n"
+		<< "{\n"
+		<< "    gl_PrimitiveCountNV = 1u;\n"
+		<< "    triangleColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
+		<< "\n"
+		<< "    const uint vertex = gl_LocalInvocationIndex;\n"
+		<< "    if (vertex < 3u)\n"
+		<< "    {\n"
+		<< "        const uint vertexIndex = ib.indices[vertex];\n"
+		<< "        gl_PrimitiveIndicesNV[vertex] = vertexIndex;\n"
+		<< "        gl_MeshVerticesNV[vertexIndex].gl_Position = cb.coords[vertexIndex];\n"
+		<< "    }\n"
+		<< "}\n"
+		;
+	dst.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	dst.glslSources.add("frag") << glu::FragmentSource(commonMeshFragShader());
+TestInstance* MeshOnlyTriangleCase::createInstance (Context& context) const
+	const std::vector<tcu::Vec4>	vertexCoords	=
+	{
+		tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
+		tcu::Vec4(-1.0f,  3.0f, 0.0f, 1.0f),
+		tcu::Vec4( 3.0f, -1.0f, 0.0f, 1.0f),
+	};
+	const std::vector<uint32_t>		vertexIndices	= { 0u, 1u, 2u };
+	MeshTriangleRendererParams		params			(std::move(vertexCoords), std::move(vertexIndices), 1u, tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f));
+	return new MeshTriangleRenderer(context, std::move(params));
+TestInstance* MeshTaskTriangleCase::createInstance (Context& context) const
+	const std::vector<tcu::Vec4>	vertexCoords	=
+	{
+		tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
+		tcu::Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
+		tcu::Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
+		tcu::Vec4( 1.0f,  1.0f, 0.0f, 1.0f),
+	};
+	const std::vector<uint32_t>		vertexIndices	= { 2u, 0u, 1u, 1u, 3u, 2u };
+	MeshTriangleRendererParams		params			(std::move(vertexCoords), std::move(vertexIndices), 2u, tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f));
+	return new MeshTriangleRenderer(context, std::move(params));
+TestInstance* TaskOnlyTriangleCase::createInstance (Context& context) const
+	const std::vector<tcu::Vec4>	vertexCoords	=
+	{
+		tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
+		tcu::Vec4(-1.0f,  3.0f, 0.0f, 1.0f),
+		tcu::Vec4( 3.0f, -1.0f, 0.0f, 1.0f),
+	};
+	const std::vector<uint32_t>		vertexIndices	= { 0u, 1u, 2u };
+	// Note we expect the clear color.
+	MeshTriangleRendererParams		params			(std::move(vertexCoords), std::move(vertexIndices), 1u, tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f));
+	return new MeshTriangleRenderer(context, std::move(params));
+tcu::TestStatus MeshTriangleRenderer::iterate ()
+	const auto&		vkd					= m_context.getDeviceInterface();
+	const auto		device				= m_context.getDevice();
+	auto&			alloc				= m_context.getDefaultAllocator();
+	const auto		qIndex				= m_context.getUniversalQueueFamilyIndex();
+	const auto		queue				= m_context.getUniversalQueue();
+	const auto		vertexBufferStages	= VK_SHADER_STAGE_MESH_BIT_NV;
+	const auto		vertexBufferSize	= static_cast<VkDeviceSize>(de::dataSize(m_params.vertexCoords));
+	const auto		vertexBufferUsage	= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+	const auto		vertexBufferLoc		= DescriptorSetUpdateBuilder::Location::binding(0u);
+	const auto		vertexBufferType	= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+	const auto		indexBufferStages	= VK_SHADER_STAGE_MESH_BIT_NV;
+	const auto		indexBufferSize		= static_cast<VkDeviceSize>(de::dataSize(m_params.vertexIndices));
+	const auto		indexBufferUsage	= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+	const auto		indexBufferLoc		= DescriptorSetUpdateBuilder::Location::binding(1u);
+	const auto		indexBufferType		= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+	// Vertex buffer.
+	const auto			vertexBufferInfo	= makeBufferCreateInfo(vertexBufferSize, vertexBufferUsage);
+	BufferWithMemory	vertexBuffer		(vkd, device, alloc, vertexBufferInfo, MemoryRequirement::HostVisible);
+	auto&				vertexBufferAlloc	= vertexBuffer.getAllocation();
+	void*				vertexBufferDataPtr	= vertexBufferAlloc.getHostPtr();
+	deMemcpy(vertexBufferDataPtr,, static_cast<size_t>(vertexBufferSize));
+	flushAlloc(vkd, device, vertexBufferAlloc);
+	// Index buffer.
+	const auto			indexBufferInfo		= makeBufferCreateInfo(indexBufferSize, indexBufferUsage);
+	BufferWithMemory	indexBuffer			(vkd, device, alloc, indexBufferInfo, MemoryRequirement::HostVisible);
+	auto&				indexBufferAlloc	= indexBuffer.getAllocation();
+	void*				indexBufferDataPtr	= indexBufferAlloc.getHostPtr();
+	deMemcpy(indexBufferDataPtr,, static_cast<size_t>(indexBufferSize));
+	flushAlloc(vkd, device, indexBufferAlloc);
+	// Color buffer.
+	const auto	colorBufferFormat	= VK_FORMAT_R8G8B8A8_UNORM;
+	const auto	colorBufferExtent	= makeExtent3D(8u, 8u, 1u);
+	const VkImageCreateInfo colorBufferInfo =
+	{
+		nullptr,								//	const void*				pNext;
+		0u,										//	VkImageCreateFlags		flags;
+		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
+		colorBufferFormat,						//	VkFormat				format;
+		colorBufferExtent,						//	VkExtent3D				extent;
+		1u,										//	uint32_t				mipLevels;
+		1u,										//	uint32_t				arrayLayers;
+		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
+		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
+		colorBufferUsage,						//	VkImageUsageFlags		usage;
+		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
+		0u,										//	uint32_t				queueFamilyIndexCount;
+		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
+		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
+	};
+	ImageWithMemory colorBuffer(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
+	const auto colorSRR			= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
+	const auto colorBufferView	= makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, colorBufferFormat, colorSRR);
+	// Render pass.
+	const auto renderPass = makeRenderPass(vkd, device, colorBufferFormat);
+	// Framebuffer.
+	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), colorBufferExtent.width, colorBufferExtent.height);
+	// Set layout.
+	DescriptorSetLayoutBuilder layoutBuilder;
+	layoutBuilder.addSingleBinding(vertexBufferType, vertexBufferStages);
+	layoutBuilder.addSingleBinding(indexBufferType, indexBufferStages);
+	const auto setLayout =, device);
+	// Descriptor pool.
+	DescriptorPoolBuilder poolBuilder;
+	poolBuilder.addType(vertexBufferType);
+	poolBuilder.addType(indexBufferType);
+	const auto descriptorPool =, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+	// Descriptor set.
+	const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
+	// Update descriptor set.
+	DescriptorSetUpdateBuilder updateBuilder;
+	const auto vertexBufferDescInfo	= makeDescriptorBufferInfo(vertexBuffer.get(), 0ull, vertexBufferSize);
+	const auto indexBufferDescInfo	= makeDescriptorBufferInfo(indexBuffer.get(), 0ull, indexBufferSize);
+	updateBuilder.writeSingle(descriptorSet.get(), vertexBufferLoc, vertexBufferType, &vertexBufferDescInfo);
+	updateBuilder.writeSingle(descriptorSet.get(), indexBufferLoc, indexBufferType, &indexBufferDescInfo);
+	updateBuilder.update(vkd, device);
+	// Pipeline layout.
+	const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
+	// Shader modules.
+	Move<VkShaderModule>	taskModule;
+	const auto&				binaries = m_context.getBinaryCollection();
+	if (binaries.contains("task"))
+		taskModule = createShaderModule(vkd, device, binaries.get("task"), 0u);
+	const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"), 0u);
+	const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"), 0u);
+	// Graphics pipeline.
+	std::vector<VkViewport>	viewports	(1u, makeViewport(colorBufferExtent));
+	std::vector<VkRect2D>	scissors	(1u, makeRect2D(colorBufferExtent));
+	const auto				pipeline	= makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskModule.get(), meshModule.get(), fragModule.get(), renderPass.get(), viewports, scissors);
+	// Command pool and buffer.
+	const auto cmdPool			= makeCommandPool(vkd, device, qIndex);
+	const auto cmdBufferPtr		= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+	const auto cmdBuffer		= cmdBufferPtr.get();
+	// Output buffer.
+	const auto	tcuFormat		= mapVkFormat(colorBufferFormat);
+	const auto	outBufferSize	= static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) * colorBufferExtent.width * colorBufferExtent.height);
+	const auto	outBufferUsage	= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+	const auto	outBufferInfo	= makeBufferCreateInfo(outBufferSize, outBufferUsage);
+	BufferWithMemory outBuffer (vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible);
+	auto&		outBufferAlloc	= outBuffer.getAllocation();
+	void*		outBufferData	= outBufferAlloc.getHostPtr();
+	// Draw triangle.
+	beginCommandBuffer(vkd, cmdBuffer);
+	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(),, tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f)/*clear color*/);
+	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
+	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
+	vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params.taskCount, 0u);
+	endRenderPass(vkd, cmdBuffer);
+	// Copy color buffer to output buffer.
+	const tcu::IVec3 imageDim	(static_cast<int>(colorBufferExtent.width), static_cast<int>(colorBufferExtent.height), static_cast<int>(colorBufferExtent.depth));
+	const tcu::IVec2 imageSize	(imageDim.x(), imageDim.y());
+	copyImageToBuffer(vkd, cmdBuffer, colorBuffer.get(), outBuffer.get(), imageSize);
+	endCommandBuffer(vkd, cmdBuffer);
+	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
+	// Invalidate alloc.
+	invalidateAlloc(vkd, device, outBufferAlloc);
+	tcu::ConstPixelBufferAccess outPixels(tcuFormat, imageDim, outBufferData);
+	auto& log = m_context.getTestContext().getLog();
+	const tcu::Vec4 threshold (0.0f); // The color can be represented exactly.
+	if (!tcu::floatThresholdCompare(log, "Result", "", m_params.expectedColor, outPixels, threshold, tcu::COMPARE_LOG_EVERYTHING))
+		return tcu::TestStatus::fail("Failed; check log for details");
+	return tcu::TestStatus::pass("Pass");
+tcu::TestCaseGroup* createMeshShaderSmokeTests (tcu::TestContext& testCtx)
+	GroupPtr smokeTests (new tcu::TestCaseGroup(testCtx, "smoke", "Mesh Shader Smoke Tests"));
+	smokeTests->addChild(new MeshOnlyTriangleCase(testCtx, "mesh_shader_triangle", ""));
+	smokeTests->addChild(new MeshTaskTriangleCase(testCtx, "mesh_task_shader_triangle", ""));
+	smokeTests->addChild(new TaskOnlyTriangleCase(testCtx, "task_only_shader_triangle", ""));
+	return smokeTests.release();
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSmokeTests.hpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSmokeTests.hpp
new file mode 100644
index 0000000..6a5b1fa
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSmokeTests.hpp
@@ -0,0 +1,38 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Smoke Tests
+ *//*--------------------------------------------------------------------*/
+#include "tcuDefs.hpp"
+#include "tcuTestCase.hpp"
+namespace vkt
+namespace MeshShader
+tcu::TestCaseGroup* createMeshShaderSmokeTests (tcu::TestContext& testCtx);
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSyncTests.cpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSyncTests.cpp
new file mode 100644
index 0000000..7d1a423
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSyncTests.cpp
@@ -0,0 +1,1268 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Synchronization Tests
+ *//*--------------------------------------------------------------------*/
+#include "vktMeshShaderSyncTests.hpp"
+#include "vktTestCase.hpp"
+#include "vkDefs.hpp"
+#include "vkTypeUtil.hpp"
+#include "vkImageWithMemory.hpp"
+#include "vkBufferWithMemory.hpp"
+#include "vkObjUtil.hpp"
+#include "vkBuilderUtil.hpp"
+#include "vkCmdUtil.hpp"
+#include "vkBarrierUtil.hpp"
+#include "vkImageUtil.hpp"
+#include "deUniquePtr.hpp"
+#include <iostream>
+#include <sstream>
+#include <vector>
+namespace vkt
+namespace MeshShader
+using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
+using namespace vk;
+// Stages that will be used in these tests.
+enum class Stage
+	HOST = 0,
+std::ostream& operator<< (std::ostream& stream, Stage stage)
+	switch (stage)
+	{
+	case Stage::HOST:		stream << "host";		break;
+	case Stage::TRANSFER:	stream << "transfer";	break;
+	case Stage::TASK:		stream << "task";		break;
+	case Stage::MESH:		stream << "mesh";		break;
+	case Stage::FRAG:		stream << "frag";		break;
+	default: DE_ASSERT(false); break;
+	}
+	return stream;
+bool isShaderStage (Stage stage)
+	return (stage == Stage::TASK || stage == Stage::MESH || stage == Stage::FRAG);
+VkPipelineStageFlags stageToFlags (Stage stage)
+	switch (stage)
+	{
+	case Stage::HOST:		return VK_PIPELINE_STAGE_HOST_BIT;
+	default:				DE_ASSERT(false); break;
+	}
+	// Unreachable.
+	DE_ASSERT(false);
+	return 0u;
+VkFormat getImageFormat ()
+	return VK_FORMAT_R32_UINT;
+VkExtent3D getImageExtent ()
+	return makeExtent3D(1u, 1u, 1u);
+// Types of resources we will use.
+enum class ResourceType
+VkDescriptorType resourceTypeToDescriptor (ResourceType resType)
+	switch (resType)
+	{
+	default:							DE_ASSERT(false); break;
+	}
+	// Unreachable.
+	DE_ASSERT(false);
+// Will the test use a specific barrier or a general memory barrier?
+enum class BarrierType
+	GENERAL = 0,
+// Types of writes we will use.
+enum class WriteAccess
+VkAccessFlags writeAccessToFlags (WriteAccess access)
+	switch (access)
+	{
+	case WriteAccess::HOST_WRITE:		return VK_ACCESS_HOST_WRITE_BIT;
+	default:							DE_ASSERT(false); break;
+	}
+	// Unreachable.
+	DE_ASSERT(false);
+	return 0u;
+// Types of reads we will use.
+enum class ReadAccess
+	HOST_READ = 0,
+VkAccessFlags readAccessToFlags (ReadAccess access)
+	switch (access)
+	{
+	case ReadAccess::HOST_READ:			return VK_ACCESS_HOST_READ_BIT;
+	case ReadAccess::SHADER_READ:		return VK_ACCESS_SHADER_READ_BIT;
+	default:							DE_ASSERT(false); break;
+	}
+	// Unreachable.
+	DE_ASSERT(false);
+	return 0u;
+// Auxiliary functions to verify certain combinations are possible.
+// Check if the writing stage can use the specified write access.
+bool canWriteFromStageAsAccess (Stage writeStage, WriteAccess access)
+	switch (writeStage)
+	{
+	case Stage::HOST:		return (access == WriteAccess::HOST_WRITE);
+	case Stage::TRANSFER:	return (access == WriteAccess::TRANSFER_WRITE);
+	case Stage::TASK:		// fallthrough
+	case Stage::MESH:		// fallthrough
+	case Stage::FRAG:		return (access == WriteAccess::SHADER_WRITE);
+	default:				DE_ASSERT(false); break;
+	}
+	return false;
+// Check if the reading stage can use the specified read access.
+bool canReadFromStageAsAccess (Stage readStage, ReadAccess access)
+	switch (readStage)
+	{
+	case Stage::HOST:		return (access == ReadAccess::HOST_READ);
+	case Stage::TRANSFER:	return (access == ReadAccess::TRANSFER_READ);
+	case Stage::TASK:		// fallthrough
+	case Stage::MESH:		// fallthrough
+	case Stage::FRAG:		return (access == ReadAccess::SHADER_READ || access == ReadAccess::UNIFORM_READ);
+	default:				DE_ASSERT(false); break;
+	}
+	return false;
+// Check if reading the given resource type is possible with the given type of read access.
+bool canReadResourceAsAccess (ResourceType resType, ReadAccess access)
+	if (access == ReadAccess::UNIFORM_READ)
+		return (resType == ResourceType::UNIFORM_BUFFER);
+	return true;
+// Check if writing to the given resource type is possible with the given type of write access.
+bool canWriteResourceAsAccess (ResourceType resType, WriteAccess access)
+	if (resType == ResourceType::UNIFORM_BUFFER)
+		return (access != WriteAccess::SHADER_WRITE);
+	return true;
+// Check if the given stage can write to the given resource type.
+bool canWriteTo (Stage stage, ResourceType resType)
+	switch (stage)
+	{
+	case Stage::HOST:		return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
+	case Stage::TRANSFER:	return true;
+	case Stage::TASK:		// fallthrough
+	case Stage::MESH:		return (resType == ResourceType::STORAGE_BUFFER || resType == ResourceType::STORAGE_IMAGE);
+	default:				DE_ASSERT(false); break;
+	}
+	return false;
+// Check if the given stage can read from the given resource type.
+bool canReadFrom (Stage stage, ResourceType resType)
+	switch (stage)
+	{
+	case Stage::HOST:		return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
+	case Stage::TRANSFER:	// fallthrough
+	case Stage::TASK:		// fallthrough
+	case Stage::MESH:
+	case Stage::FRAG:		return true;
+	default:				DE_ASSERT(false); break;
+	}
+	return false;
+// Will we need to store the test value in an auxiliar buffer to be read?
+bool needsAuxiliarSourceBuffer (Stage fromStage, Stage toStage)
+	DE_UNREF(toStage);
+	return (fromStage == Stage::TRANSFER);
+// Will we need to store the read operation result into an auxiliar buffer to be checked?
+bool needsAuxiliarDestBuffer (Stage fromStage, Stage toStage)
+	DE_UNREF(fromStage);
+	return (toStage == Stage::TRANSFER);
+// Needs any auxiliar buffer for any case?
+bool needsAuxiliarBuffer (Stage fromStage, Stage toStage)
+	return (needsAuxiliarSourceBuffer(fromStage, toStage) || needsAuxiliarDestBuffer(fromStage, toStage));
+// Will the final value be stored in the auxiliar destination buffer?
+bool valueInAuxiliarDestBuffer (Stage toStage)
+	return (toStage == Stage::TRANSFER);
+// Will the final value be stored in the resource buffer itself?
+bool valueInResourceBuffer (Stage toStage)
+	return (toStage == Stage::HOST);
+// Will the final value be stored in the color buffer?
+bool valueInColorBuffer (Stage toStage)
+	return (!valueInAuxiliarDestBuffer(toStage) && !valueInResourceBuffer(toStage));
+// Image usage flags for the image resource.
+VkImageUsageFlags resourceImageUsageFlags (ResourceType resourceType)
+	switch (resourceType)
+	{
+	case ResourceType::STORAGE_IMAGE:	flags |= VK_IMAGE_USAGE_STORAGE_BIT;	break;
+	case ResourceType::SAMPLED_IMAGE:	flags |= VK_IMAGE_USAGE_SAMPLED_BIT;	break;
+	default: DE_ASSERT(false); break;
+	}
+	return flags;
+// Buffer usage flags for the buffer resource.
+VkBufferUsageFlags resourceBufferUsageFlags (ResourceType resourceType)
+	switch (resourceType)
+	{
+	case ResourceType::UNIFORM_BUFFER:	flags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;	break;
+	case ResourceType::STORAGE_BUFFER:	flags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;	break;
+	default: DE_ASSERT(false); break;
+	}
+	return flags;
+// A subpass dependency is needed if both the source and destination stages are shader stages.
+bool needsSubpassDependency (Stage fromStage, Stage toStage)
+	return (isShaderStage(fromStage) && isShaderStage(toStage));
+struct TestParams
+	Stage			fromStage;
+	Stage			toStage;
+	ResourceType	resourceType;
+	BarrierType		barrierType;
+	WriteAccess		writeAccess;
+	ReadAccess		readAccess;
+	uint32_t		testValue;
+	bool readsOrWritesIn (Stage stage) const
+	{
+		DE_ASSERT(fromStage != toStage);
+		return (fromStage == stage || toStage == stage);
+	}
+	bool needsTask () const
+	{
+		return readsOrWritesIn(Stage::TASK);
+	}
+	bool readsOrWritesInMesh () const
+	{
+		return readsOrWritesIn(Stage::MESH);
+	}
+	std::string getResourceDecl () const
+	{
+		const auto			imgFormat		= ((resourceType == ResourceType::STORAGE_IMAGE) ? ", r32ui" : "");
+		const auto			storagePrefix	= ((writeAccess == WriteAccess::SHADER_WRITE) ? "" : "readonly ");
+		std::ostringstream	decl;
+		decl << "layout (set=0, binding=0" << imgFormat << ") ";
+		switch (resourceType)
+		{
+		case ResourceType::UNIFORM_BUFFER:	decl << "uniform UniformBuffer { uint value; } ub;";					break;
+		case ResourceType::STORAGE_BUFFER:	decl << storagePrefix << "buffer StorageBuffer { uint value; } sb;";	break;
+		case ResourceType::STORAGE_IMAGE:	decl << storagePrefix << "uniform uimage2D si;";						break;
+		case ResourceType::SAMPLED_IMAGE:	decl << "uniform usampler2D sampled;";									break;
+		default:							DE_ASSERT(false);														break;
+		}
+		decl << "\n";
+		return decl.str();
+	}
+	std::string getReadStatement (const std::string& outName) const
+	{
+		std::ostringstream statement;
+		statement << "    " << outName << " = ";
+		switch (resourceType)
+		{
+		case ResourceType::UNIFORM_BUFFER:	statement << "ub.value";							break;
+		case ResourceType::STORAGE_BUFFER:	statement << "sb.value";							break;
+		case ResourceType::STORAGE_IMAGE:	statement << "imageLoad(si, ivec2(0, 0)).x";		break;
+		case ResourceType::SAMPLED_IMAGE:	statement << "texture(sampled, vec2(0.5, 0.5)).x";	break;
+		default:							DE_ASSERT(false); break;
+		}
+		statement << ";\n";
+		return statement.str();
+	}
+	std::string getWriteStatement (const std::string& valueName) const
+	{
+		std::ostringstream statement;
+		statement << "    ";
+		switch (resourceType)
+		{
+		case ResourceType::STORAGE_BUFFER:	statement << "sb.value = " << valueName;											break;
+		case ResourceType::STORAGE_IMAGE:	statement << "imageStore(si, ivec2(0, 0), uvec4(" << valueName << ", 0, 0, 0))";	break;
+		case ResourceType::UNIFORM_BUFFER:	// fallthrough
+		case ResourceType::SAMPLED_IMAGE:	// fallthrough
+		default:							DE_ASSERT(false); break;
+		}
+		statement << ";\n";
+		return statement.str();
+	}
+	VkShaderStageFlags getResourceShaderStages () const
+	{
+		VkShaderStageFlags flags = 0u;
+		if (fromStage == Stage::TASK || toStage == Stage::TASK)	flags |= VK_SHADER_STAGE_TASK_BIT_NV;
+		if (fromStage == Stage::MESH || toStage == Stage::MESH)	flags |= VK_SHADER_STAGE_MESH_BIT_NV;
+		if (fromStage == Stage::FRAG || toStage == Stage::FRAG)	flags |= VK_SHADER_STAGE_FRAGMENT_BIT;
+		// We assume at least something must be done either on the task or mesh shaders for the tests to be interesting.
+		return flags;
+	}
+	// We'll prefer to keep the image in the general layout if it will be written to from a shader stage or if the barrier is going to be a generic memory barrier.
+	bool preferGeneralLayout () const
+	{
+		return (isShaderStage(fromStage) || (barrierType == BarrierType::GENERAL) || (resourceType == ResourceType::STORAGE_IMAGE));
+	}
+class MeshShaderSyncCase : public vkt::TestCase
+					MeshShaderSyncCase		(tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params)
+						: vkt::TestCase (testCtx, name, description), m_params (params)
+						{}
+	virtual			~MeshShaderSyncCase		(void) {}
+	void			checkSupport			(Context& context) const override;
+	void			initPrograms			(vk::SourceCollections& programCollection) const override;
+	TestInstance*	createInstance			(Context& context) const override;
+	TestParams		m_params;
+class MeshShaderSyncInstance : public vkt::TestInstance
+						MeshShaderSyncInstance	(Context& context, const TestParams& params) : vkt::TestInstance(context), m_params(params) {}
+	virtual				~MeshShaderSyncInstance	(void) {}
+	tcu::TestStatus		iterate					(void) override;
+	TestParams			m_params;
+void MeshShaderSyncCase::checkSupport (Context& context) const
+	context.requireDeviceFunctionality("VK_NV_mesh_shader");
+	const auto& meshFeatures = context.getMeshShaderFeatures();
+	if (!meshFeatures.meshShader)
+		TCU_THROW(NotSupportedError, "Mesh shaders not supported");
+	if (m_params.needsTask() && !meshFeatures.taskShader)
+		TCU_THROW(NotSupportedError, "Task shaders not supported");
+	if (m_params.writeAccess == WriteAccess::SHADER_WRITE)
+	{
+		const auto& features = context.getDeviceFeatures();
+		if (!features.vertexPipelineStoresAndAtomics)
+			TCU_THROW(NotSupportedError, "Vertex pipeline stores not supported");
+	}
+void MeshShaderSyncCase::initPrograms (vk::SourceCollections& programCollection) const
+	const bool	needsTaskShader	= m_params.needsTask();
+	const auto	valueStr		= de::toString(m_params.testValue);
+	const auto	resourceDecl	= m_params.getResourceDecl();
+	if (needsTaskShader)
+	{
+		std::ostringstream task;
+		task
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout(local_size_x=1) in;\n"
+			<< "\n"
+			<< "out taskNV TaskData { uint value; } td;\n"
+			<< "\n"
+			<< resourceDecl
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_TaskCountNV = 1u;\n"
+			<< "    td.value = 0u;\n"
+			<< ((m_params.fromStage == Stage::TASK)	? m_params.getWriteStatement(valueStr)	: "")
+			<< ((m_params.toStage == Stage::TASK)	? m_params.getReadStatement("td.value")	: "")
+			<< "}\n"
+			;
+		programCollection.glslSources.add("task") << glu::TaskSource(task.str());
+	}
+	{
+		std::ostringstream mesh;
+		mesh
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout(local_size_x=1) in;\n"
+			<< "layout(triangles) out;\n"
+			<< "layout(max_vertices=3, max_primitives=1) out;\n"
+			<< "\n"
+			<< (needsTaskShader ? "in taskNV TaskData { uint value; } td;\n" : "")
+			<< "layout (location=0) out perprimitiveNV uint primitiveValue[];\n"
+			<< "\n"
+			<< (m_params.readsOrWritesInMesh() ? resourceDecl : "")
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "    gl_PrimitiveCountNV = 1u;\n"
+			<< (needsTaskShader ? "    primitiveValue[0] = td.value;\n" : "")
+			<< ((m_params.fromStage == Stage::MESH)	? m_params.getWriteStatement(valueStr)				: "")
+			<< ((m_params.toStage == Stage::MESH)	? m_params.getReadStatement("primitiveValue[0]")	: "")
+			<< "\n"
+			<< "    gl_MeshVerticesNV[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
+			<< "    gl_MeshVerticesNV[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
+			<< "    gl_PrimitiveIndicesNV[0] = 0;\n"
+			<< "    gl_PrimitiveIndicesNV[1] = 1;\n"
+			<< "    gl_PrimitiveIndicesNV[2] = 2;\n"
+			<< "}\n"
+			;
+		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str());
+	}
+	{
+		const bool			readFromFrag = (m_params.toStage == Stage::FRAG);
+		std::ostringstream	frag;
+		frag
+			<< "#version 450\n"
+			<< "#extension GL_NV_mesh_shader : enable\n"
+			<< "\n"
+			<< "layout (location=0) in perprimitiveNV flat uint primitiveValue;\n"
+			<< "layout (location=0) out uvec4 outColor;\n"
+			<< "\n"
+			<< (readFromFrag ? resourceDecl : "")
+			<< "\n"
+			<< "void main ()\n"
+			<< "{\n"
+			<< "	outColor = uvec4(primitiveValue, 0, 0, 0);\n"
+			<< (readFromFrag ? m_params.getReadStatement("const uint readVal")	: "")
+			<< (readFromFrag ? "    outColor = uvec4(readVal, 0, 0, 0);\n"		: "")
+			<< "}\n"
+			;
+		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
+	}
+TestInstance* MeshShaderSyncCase::createInstance (Context& context) const
+	return new MeshShaderSyncInstance(context, m_params);
+// General description behind these tests.
+//	From				To
+//	==============================
+//	HOST				TASK			Prepare buffer from host. Only valid for uniform and storage buffers. Read value from task into td.value. Verify color buffer.
+//	HOST				MESH			Same situation. Read value from mesh into primitiveValue[0]. Verify color buffer.
+//	TRANSFER			TASK			Prepare auxiliary host-coherent source buffer from host. Copy buffer to buffer or buffer to image. Read from task into td.value. Verify color buffer.
+//	TRANSFER			MESH			Same initial steps. Read from mesh into primitiveValue[0]. Verify color buffer.
+//	TASK				MESH			Write value to buffer or image from task shader. Only valid for storage buffers and images. Read from mesh into primitiveValue[0]. Verify color buffer.
+//	TASK				FRAG			Same write procedure and restrictions. Read from frag into outColor. Verify color buffer.
+//	TASK				TRANSFER		Same write procedure and restrictions. Prepare auxiliary host-coherent read buffer and copy buffer to buffer or image to buffer. Verify auxiliary buffer.
+//	TASK				HOST			Due to From/To restrictions, only valid for storage buffers. Same write procedure. Read and verify buffer directly.
+//	MESH				FRAG			Same as task to frag but the write instructions need to be in the mesh shader.
+//	MESH				TRANSFER		Same as task to transfer but the write instructions need to be in the mesh shader.
+//	MESH				HOST			Same as task to host but the write instructions need to be in the mesh shader.
+Move<VkRenderPass> createCustomRenderPass (const DeviceInterface& vkd, VkDevice device, VkFormat colorFormat, const TestParams& params)
+	const std::vector<VkAttachmentDescription> attachmentDescs =
+	{
+		{
+			0u,											//	VkAttachmentDescriptionFlags	flags;
+			colorFormat,								//	VkFormat						format;
+			VK_SAMPLE_COUNT_1_BIT,						//	VkSampleCountFlagBits			samples;
+			VK_ATTACHMENT_LOAD_OP_CLEAR,				//	VkAttachmentLoadOp				loadOp;
+			VK_ATTACHMENT_STORE_OP_STORE,				//	VkAttachmentStoreOp				storeOp;
+			VK_ATTACHMENT_LOAD_OP_DONT_CARE,			//	VkAttachmentLoadOp				stencilLoadOp;
+			VK_ATTACHMENT_STORE_OP_DONT_CARE,			//	VkAttachmentStoreOp				stencilStoreOp;
+			VK_IMAGE_LAYOUT_UNDEFINED,					//	VkImageLayout					initialLayout;
+			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,	//	VkImageLayout					finalLayout;
+		}
+	};
+	const std::vector<VkAttachmentReference> attachmentRefs = { { 0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL } };
+	const std::vector<VkSubpassDescription> subpassDescs =
+	{
+		{
+			0u,												//	VkSubpassDescriptionFlags		flags;
+			VK_PIPELINE_BIND_POINT_GRAPHICS,				//	VkPipelineBindPoint				pipelineBindPoint;
+			0u,												//	uint32_t						inputAttachmentCount;
+			nullptr,										//	const VkAttachmentReference*	pInputAttachments;
+			static_cast<uint32_t>(attachmentRefs.size()),	//	uint32_t						colorAttachmentCount;
+			de::dataOrNull(attachmentRefs),					//	const VkAttachmentReference*	pColorAttachments;
+			nullptr,										//	const VkAttachmentReference*	pResolveAttachments;
+			nullptr,										//	const VkAttachmentReference*	pDepthStencilAttachment;
+			0u,												//	uint32_t						preserveAttachmentCount;
+			nullptr,										//	const uint32_t*					pPreserveAttachments;
+		}
+	};
+	// When both stages are shader stages, the dependency will be expressed as a subpass dependency.
+	std::vector<VkSubpassDependency> dependencies;
+	if (needsSubpassDependency(params.fromStage, params.toStage))
+	{
+		const VkSubpassDependency dependency =
+		{
+			0u,											//	uint32_t				srcSubpass;
+			0u,											//	uint32_t				dstSubpass;
+			stageToFlags(params.fromStage),				//	VkPipelineStageFlags	srcStageMask;
+			stageToFlags(params.toStage),				//	VkPipelineStageFlags	dstStageMask;
+			writeAccessToFlags(params.writeAccess),		//	VkAccessFlags			srcAccessMask;
+			readAccessToFlags(params.readAccess),		//	VkAccessFlags			dstAccessMask;
+			0u,											//	VkDependencyFlags		dependencyFlags;
+		};
+		dependencies.push_back(dependency);
+	}
+	const VkRenderPassCreateInfo createInfo =
+	{
+		nullptr,										//	const void*						pNext;
+		0u,												//	VkRenderPassCreateFlags			flags;
+		static_cast<uint32_t>(attachmentDescs.size()),	//	uint32_t						attachmentCount;
+		de::dataOrNull(attachmentDescs),				//	const VkAttachmentDescription*	pAttachments;
+		static_cast<uint32_t>(subpassDescs.size()),		//	uint32_t						subpassCount;
+		de::dataOrNull(subpassDescs),					//	const VkSubpassDescription*		pSubpasses;
+		static_cast<uint32_t>(dependencies.size()),		//	uint32_t						dependencyCount;
+		de::dataOrNull(dependencies),					//	const VkSubpassDependency*		pDependencies;
+	};
+	return createRenderPass(vkd, device, &createInfo);
+void hostToTransferMemoryBarrier (const DeviceInterface& vkd, VkCommandBuffer cmdBuffer)
+	const auto barrier = makeMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 1u, &barrier, 0u, nullptr, 0u, nullptr);
+void transferToHostMemoryBarrier (const DeviceInterface& vkd, VkCommandBuffer cmdBuffer)
+	const auto barrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
+	vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &barrier, 0u, nullptr, 0u, nullptr);
+tcu::TestStatus MeshShaderSyncInstance::iterate (void)
+	const auto&	vkd						= m_context.getDeviceInterface();
+	const auto	device					= m_context.getDevice();
+	auto&		alloc					= m_context.getDefaultAllocator();
+	const auto	queueIndex				= m_context.getUniversalQueueFamilyIndex();
+	const auto	queue					= m_context.getUniversalQueue();
+	const auto	imageFormat				= getImageFormat();
+	const auto	imageExtent				= getImageExtent();
+	const auto	colorSRR				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
+	const auto	colorSRL				= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
+	const auto	bufferSize				= static_cast<VkDeviceSize>(sizeof(m_params.testValue));
+	const auto	descriptorType			= resourceTypeToDescriptor(m_params.resourceType);
+	const auto	resourceStages			= m_params.getResourceShaderStages();
+	const auto	useGeneralLayout		= m_params.preferGeneralLayout();
+	const auto	writeAccessFlags		= writeAccessToFlags(m_params.writeAccess);
+	const auto	readAccessFlags			= readAccessToFlags(m_params.readAccess);
+	const auto	fromStageFlags			= stageToFlags(m_params.fromStage);
+	const auto	toStageFlags			= stageToFlags(m_params.toStage);
+	// Prepare color buffer.
+	const VkImageCreateInfo colorBufferCreateInfo =
+	{
+		nullptr,								//	const void*				pNext;
+		0u,										//	VkImageCreateFlags		flags;
+		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
+		imageFormat,							//	VkFormat				format;
+		imageExtent,							//	VkExtent3D				extent;
+		1u,										//	uint32_t				mipLevels;
+		1u,										//	uint32_t				arrayLayers;
+		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
+		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
+		colorBufferUsage,						//	VkImageUsageFlags		usage;
+		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
+		0u,										//	uint32_t				queueFamilyIndexCount;
+		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
+		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
+	};
+	ImageWithMemory	colorBuffer		(vkd, device, alloc, colorBufferCreateInfo, MemoryRequirement::Any);
+	const auto		colorBufferView	= makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
+	// Main resource.
+	using ImageWithMemoryPtr	= de::MovePtr<ImageWithMemory>;
+	using BufferWithMemoryPtr	= de::MovePtr<BufferWithMemory>;
+	ImageWithMemoryPtr	imageResource;
+	Move<VkImageView>	imageResourceView;
+	VkImageLayout		imageDescriptorLayout	= (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+	VkImageLayout		currentLayout			= VK_IMAGE_LAYOUT_UNDEFINED;
+	BufferWithMemoryPtr	bufferResource;
+	bool useImageResource	= false;
+	bool useBufferResource	= false;
+	switch (m_params.resourceType)
+	{
+	case ResourceType::UNIFORM_BUFFER:
+	case ResourceType::STORAGE_BUFFER:
+		useBufferResource = true;
+		break;
+	case ResourceType::STORAGE_IMAGE:
+	case ResourceType::SAMPLED_IMAGE:
+		useImageResource = true;
+		break;
+	default:
+		DE_ASSERT(false);
+		break;
+	}
+	// One resource needed.
+	DE_ASSERT(useImageResource != useBufferResource);
+	if (useImageResource)
+	{
+		const auto resourceImageUsage = resourceImageUsageFlags(m_params.resourceType);
+		const VkImageCreateInfo resourceCreateInfo =
+		{
+			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
+			nullptr,								//	const void*				pNext;
+			0u,										//	VkImageCreateFlags		flags;
+			VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
+			imageFormat,							//	VkFormat				format;
+			imageExtent,							//	VkExtent3D				extent;
+			1u,										//	uint32_t				mipLevels;
+			1u,										//	uint32_t				arrayLayers;
+			VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
+			VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
+			resourceImageUsage,						//	VkImageUsageFlags		usage;
+			VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
+			0u,										//	uint32_t				queueFamilyIndexCount;
+			nullptr,								//	const uint32_t*			pQueueFamilyIndices;
+			VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
+		};
+		imageResource		= ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, resourceCreateInfo, MemoryRequirement::Any));
+		imageResourceView	= makeImageView(vkd, device, imageResource->get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
+	}
+	else
+	{
+		const auto resourceBufferUsage		= resourceBufferUsageFlags(m_params.resourceType);
+		const auto resourceBufferCreateInfo	= makeBufferCreateInfo(bufferSize, resourceBufferUsage);
+		bufferResource = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, resourceBufferCreateInfo, MemoryRequirement::HostVisible));
+	}
+	Move<VkSampler> sampler;
+	{
+		const VkSamplerCreateInfo samplerCreateInfo =
+		{
+			nullptr,								//	const void*				pNext;
+			0u,										//	VkSamplerCreateFlags	flags;
+			VK_FILTER_NEAREST,						//	VkFilter				magFilter;
+			VK_FILTER_NEAREST,						//	VkFilter				minFilter;
+			VK_SAMPLER_MIPMAP_MODE_NEAREST,			//	VkSamplerMipmapMode		mipmapMode;
+			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,	//	VkSamplerAddressMode	addressModeU;
+			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,	//	VkSamplerAddressMode	addressModeV;
+			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,	//	VkSamplerAddressMode	addressModeW;
+			0.0f,									//	float					mipLodBias;
+			VK_FALSE,								//	VkBool32				anisotropyEnable;
+			1.0f,									//	float					maxAnisotropy;
+			VK_FALSE,								//	VkBool32				compareEnable;
+			VK_COMPARE_OP_NEVER,					//	VkCompareOp				compareOp;
+			0.0f,									//	float					minLod;
+			0.0f,									//	float					maxLod;
+			VK_BORDER_COLOR_INT_TRANSPARENT_BLACK,	//	VkBorderColor			borderColor;
+			VK_FALSE,								//	VkBool32				unnormalizedCoordinates;
+		};
+		sampler = createSampler(vkd, device, &samplerCreateInfo);
+	}
+	// Auxiliary host-coherent buffer for some cases. Being host-coherent lets us avoid extra barriers that would "pollute" synchronization tests.
+	BufferWithMemoryPtr hostCoherentBuffer;
+	void*				hostCoherentDataPtr = nullptr;
+	if (needsAuxiliarBuffer(m_params.fromStage, m_params.toStage))
+	{
+		const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
+		hostCoherentBuffer	= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo, (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
+		hostCoherentDataPtr	= hostCoherentBuffer->getAllocation().getHostPtr();
+	}
+	// Descriptor pool.
+	Move<VkDescriptorPool> descriptorPool;
+	{
+		DescriptorPoolBuilder poolBuilder;
+		poolBuilder.addType(descriptorType);
+	}
+	// Descriptor set layout.
+	Move<VkDescriptorSetLayout> setLayout;
+	{
+		DescriptorSetLayoutBuilder layoutBuilder;
+		layoutBuilder.addSingleBinding(descriptorType, resourceStages);
+		setLayout =, device);
+	}
+	// Descriptor set.
+	const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
+	// Update descriptor set.
+	{
+		DescriptorSetUpdateBuilder	updateBuilder;
+		const auto					location = DescriptorSetUpdateBuilder::Location::binding(0u);
+		switch (descriptorType)
+		{
+			{
+				const auto bufferInfo = makeDescriptorBufferInfo(bufferResource->get(), 0ull, bufferSize);
+				updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &bufferInfo);
+			}
+			break;
+			{
+				auto descriptorImageInfo = makeDescriptorImageInfo(sampler.get(), imageResourceView.get(), imageDescriptorLayout);
+				updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &descriptorImageInfo);
+			}
+			break;
+		default:
+			DE_ASSERT(false); break;
+		}
+		updateBuilder.update(vkd, device);
+	}
+	// Shader modules.
+	Move<VkShaderModule> taskShader;
+	Move<VkShaderModule> meshShader;
+	Move<VkShaderModule> fragShader;
+	const auto& binaries = m_context.getBinaryCollection();
+	if (m_params.needsTask())
+		taskShader = createShaderModule(vkd, device, binaries.get("task"), 0u);
+	meshShader = createShaderModule(vkd, device, binaries.get("mesh"), 0u);
+	fragShader = createShaderModule(vkd, device, binaries.get("frag"), 0u);
+	// Pipeline layout, render pass, framebuffer.
+	const auto pipelineLayout	= makePipelineLayout(vkd, device, setLayout.get());
+	const auto renderPass		= createCustomRenderPass(vkd, device, imageFormat, m_params);
+	const auto framebuffer		= makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), imageExtent.width, imageExtent.height);
+	// Pipeline.
+	std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
+	std::vector<VkRect2D>	scissors	(1u, makeRect2D(imageExtent));
+	const auto				pipeline	= makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(), fragShader.get(), renderPass.get(), viewports, scissors);
+	// Command pool and buffer.
+	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
+	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+	const auto cmdBuffer	= cmdBufferPtr.get();
+	beginCommandBuffer(vkd, cmdBuffer);
+	if (m_params.fromStage == Stage::HOST)
+	{
+		// Prepare buffer from host when the source stage is the host.
+		DE_ASSERT(useBufferResource);
+		auto& resourceBufferAlloc	= bufferResource->getAllocation();
+		void* resourceBufferDataPtr	= resourceBufferAlloc.getHostPtr();
+		deMemcpy(resourceBufferDataPtr, &m_params.testValue, sizeof(m_params.testValue));
+		flushAlloc(vkd, device, resourceBufferAlloc);
+	}
+	else if (m_params.fromStage == Stage::TRANSFER)
+	{
+		// Put value in host-coherent buffer and transfer it to the resource buffer or image.
+		deMemcpy(hostCoherentDataPtr, &m_params.testValue, sizeof(m_params.testValue));
+		hostToTransferMemoryBarrier(vkd, cmdBuffer);
+		if (useBufferResource)
+		{
+			const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
+			vkd.cmdCopyBuffer(cmdBuffer, hostCoherentBuffer->get(), bufferResource->get(), 1u, &copyRegion);
+		}
+		else
+		{
+			// Move image to the right layout for transfer.
+			const auto newLayout = (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+			if (newLayout != currentLayout)
+			{
+				const auto preCopyBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, currentLayout, newLayout, imageResource->get(), colorSRR);
+				vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
+				currentLayout = newLayout;
+			}
+			const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
+			vkd.cmdCopyBufferToImage(cmdBuffer, hostCoherentBuffer->get(), imageResource->get(), currentLayout, 1u, &copyRegion);
+		}
+	}
+	else if (m_params.fromStage == Stage::TASK || m_params.fromStage == Stage::MESH)
+	{
+		// The image or buffer will be written to from shaders. Images need to be in the right layout.
+		if (useImageResource)
+		{
+			const auto newLayout = VK_IMAGE_LAYOUT_GENERAL;
+			if (newLayout != currentLayout)
+			{
+				const auto preWriteBarrier = makeImageMemoryBarrier(0u, (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT), currentLayout, newLayout, imageResource->get(), colorSRR);
+				vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, fromStageFlags, 0u, 0u, nullptr, 0u, nullptr, 1u, &preWriteBarrier);
+				currentLayout = newLayout;
+			}
+		}
+	}
+	else
+	{
+		DE_ASSERT(false);
+	}
+	// If the resource is going to be read from shaders, we'll insert the main barrier before running the pipeline.
+	if (isShaderStage(m_params.toStage))
+	{
+		if (m_params.barrierType == BarrierType::GENERAL)
+		{
+			const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
+			vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 1u, &memoryBarrier, 0u, nullptr, 0u, nullptr);
+		}
+		else if (m_params.barrierType == BarrierType::SPECIFIC)
+		{
+			if (useBufferResource)
+			{
+				const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
+				vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
+			}
+			else
+			{
+				const auto newLayout	= (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+				const auto imageBarrier	= makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR);
+				vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 0u, nullptr, 1u, &imageBarrier);
+				currentLayout = newLayout;
+			}
+		}
+		// For subpass dependencies, they have already been included in the render pass.
+	}
+	// Run the pipeline.
+	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(),, tcu::UVec4(0u));
+	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
+	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
+	vkd.cmdDrawMeshTasksNV(cmdBuffer, 1u, 0u);
+	endRenderPass(vkd, cmdBuffer);
+	// If the resource was written to from the shaders, insert the main barrier after running the pipeline.
+	if (isShaderStage(m_params.fromStage))
+	{
+		if (m_params.barrierType == BarrierType::GENERAL)
+		{
+			const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
+			vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 1u, &memoryBarrier, 0u, nullptr, 0u, nullptr);
+		}
+		else if (m_params.barrierType == BarrierType::SPECIFIC)
+		{
+			if (useBufferResource)
+			{
+				const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
+				vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 1u, &bufferBarrier, 0u, nullptr);
+			}
+			else
+			{
+				// Note: the image will only be read from shader stages (which is covered in BarrierType::DEPENDENCY) or from the transfer stage.
+				const auto newLayout	= (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+				const auto imageBarrier	= makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR);
+				vkd.cmdPipelineBarrier(cmdBuffer, fromStageFlags, toStageFlags, 0u, 0u, nullptr, 0u, nullptr, 1u, &imageBarrier);
+				currentLayout = newLayout;
+			}
+		}
+		// For subpass dependencies, they have already been included in the render pass.
+	}
+	// Read resource from the destination stage if needed.
+	if (m_params.toStage == Stage::HOST)
+	{
+		// Nothing to do. The test value should be in the resource buffer already, which is host-visible.
+	}
+	else if (m_params.toStage == Stage::TRANSFER)
+	{
+		// Copy value from resource to host-coherent buffer to be verified later.
+		if (useBufferResource)
+		{
+			const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
+			vkd.cmdCopyBuffer(cmdBuffer, bufferResource->get(), hostCoherentBuffer->get(), 1u, &copyRegion);
+		}
+		else
+		{
+			const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
+			vkd.cmdCopyImageToBuffer(cmdBuffer, imageResource->get(), currentLayout, hostCoherentBuffer->get(), 1u, &copyRegion);
+		}
+		transferToHostMemoryBarrier(vkd, cmdBuffer);
+	}
+	// If the output value will be available in the color buffer, take the chance to transfer its contents to a host-coherent buffer.
+	BufferWithMemoryPtr colorVerificationBuffer;
+	void*				colorVerificationDataPtr = nullptr;
+	if (valueInColorBuffer(m_params.toStage))
+	{
+		const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
+		colorVerificationBuffer		= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo, (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
+		colorVerificationDataPtr	= colorVerificationBuffer->getAllocation().getHostPtr();
+		const auto dstAccess	= VK_ACCESS_TRANSFER_READ_BIT;
+		const auto colorBarrier	= makeImageMemoryBarrier(srcAccess, dstAccess, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorBuffer.get(), colorSRR);
+		vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &colorBarrier);
+		const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
+		vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorVerificationBuffer->get(), 1u, &copyRegion);
+		transferToHostMemoryBarrier(vkd, cmdBuffer);
+	}
+	endCommandBuffer(vkd, cmdBuffer);
+	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
+	// Verify output resources as needed.
+	if (valueInAuxiliarDestBuffer(m_params.toStage))
+	{
+		uint32_t bufferValue;
+		deMemcpy(&bufferValue, hostCoherentDataPtr, sizeof(bufferValue));
+		if (bufferValue != m_params.testValue)
+		{
+			std::ostringstream msg;
+			msg << "Unexpected value in auxiliar host-coherent buffer: found " << bufferValue << " and expected " << m_params.testValue;
+			TCU_FAIL(msg.str());
+		}
+	}
+	if (valueInResourceBuffer(m_params.toStage))
+	{
+		auto&		resourceBufferAlloc		= bufferResource->getAllocation();
+		void*		resourceBufferDataPtr	= resourceBufferAlloc.getHostPtr();
+		uint32_t	bufferValue;
+		invalidateAlloc(vkd, device, resourceBufferAlloc);
+		deMemcpy(&bufferValue, resourceBufferDataPtr, sizeof(bufferValue));
+		if (bufferValue != m_params.testValue)
+		{
+			std::ostringstream msg;
+			msg << "Unexpected value in resource buffer: found " << bufferValue << " and expected " << m_params.testValue;
+			TCU_FAIL(msg.str());
+		}
+	}
+	if (valueInColorBuffer(m_params.toStage))
+	{
+		uint32_t bufferValue;
+		deMemcpy(&bufferValue, colorVerificationDataPtr, sizeof(bufferValue));
+		if (bufferValue != m_params.testValue)
+		{
+			std::ostringstream msg;
+			msg << "Unexpected value in color verification buffer: found " << bufferValue << " and expected " << m_params.testValue;
+			TCU_FAIL(msg.str());
+		}
+	}
+	return tcu::TestStatus::pass("Pass");
+} // anonymous
+tcu::TestCaseGroup* createMeshShaderSyncTests (tcu::TestContext& testCtx)
+	const struct
+	{
+		Stage		fromStage;
+		Stage		toStage;
+	} stageCombinations[] =
+	{
+		// Combinations where the source and destination stages involve mesh shaders.
+		// Note: this could be tested procedurally.
+		{	Stage::HOST,		Stage::TASK			},
+		{	Stage::HOST,		Stage::MESH			},
+		{	Stage::TRANSFER,	Stage::TASK			},
+		{	Stage::TRANSFER,	Stage::MESH			},
+		{	Stage::TASK,		Stage::MESH			},
+		{	Stage::TASK,		Stage::FRAG			},
+		{	Stage::TASK,		Stage::TRANSFER		},
+		{	Stage::TASK,		Stage::HOST			},
+		{	Stage::MESH,		Stage::FRAG			},
+		{	Stage::MESH,		Stage::TRANSFER		},
+		{	Stage::MESH,		Stage::HOST			},
+	};
+	const struct
+	{
+		ResourceType	resourceType;
+		const char*		name;
+	} resourceTypes[] =
+	{
+		{ ResourceType::UNIFORM_BUFFER,	"uniform_buffer"	},
+		{ ResourceType::STORAGE_BUFFER,	"storage_buffer"	},
+		{ ResourceType::STORAGE_IMAGE,	"storage_image"		},
+		{ ResourceType::SAMPLED_IMAGE,	"sampled_image"		},
+	};
+	const struct
+	{
+		BarrierType		barrierType;
+		const char*		name;
+	} barrierTypes[] =
+	{
+		{	BarrierType::GENERAL,		"memory_barrier"		},
+		{	BarrierType::SPECIFIC,		"specific_barrier"		},
+		{	BarrierType::DEPENDENCY,	"subpass_dependency"	},
+	};
+	const struct
+	{
+		WriteAccess		writeAccess;
+		const char*		name;
+	} writeAccesses[] =
+	{
+		{	WriteAccess::HOST_WRITE,		"host_write"		},
+		{	WriteAccess::TRANSFER_WRITE,	"transfer_write"	},
+		{	WriteAccess::SHADER_WRITE,		"shader_write"		},
+	};
+	const struct
+	{
+		ReadAccess		readAccess;
+		const char*		name;
+	} readAccesses[] =
+	{
+		{	ReadAccess::HOST_READ,		"host_read"		},
+		{	ReadAccess::TRANSFER_READ,	"transfer_read"	},
+		{	ReadAccess::SHADER_READ,	"shader_read"	},
+		{	ReadAccess::UNIFORM_READ,	"uniform_read"	},
+	};
+	uint32_t testValue = 1628510124u;
+	GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "synchronization", "Mesh Shader synchronization tests"));
+	for (const auto& stageCombination : stageCombinations)
+	{
+		const std::string	combinationName		= de::toString(stageCombination.fromStage) + "_to_" + de::toString(stageCombination.toStage);
+		GroupPtr			combinationGroup	(new tcu::TestCaseGroup(testCtx, combinationName.c_str(), ""));
+		for (const auto& resourceCase : resourceTypes)
+		{
+			if (!canWriteTo(stageCombination.fromStage, resourceCase.resourceType))
+				continue;
+			if (!canReadFrom(stageCombination.toStage, resourceCase.resourceType))
+				continue;
+			GroupPtr resourceGroup (new tcu::TestCaseGroup(testCtx,, ""));
+			for (const auto& barrierCase : barrierTypes)
+			{
+				const auto subpassDependencyNeeded	= needsSubpassDependency(stageCombination.fromStage, stageCombination.toStage);
+				const auto barrierIsDependency		= (barrierCase.barrierType == BarrierType::DEPENDENCY);
+				// Subpass dependencies must be used if, and only if, they are needed.
+				if (subpassDependencyNeeded != barrierIsDependency)
+					continue;
+				GroupPtr barrierGroup (new tcu::TestCaseGroup(testCtx,, ""));
+				for (const auto& writeCase	: writeAccesses)
+				for (const auto& readCase	: readAccesses)
+				{
+					if (!canReadResourceAsAccess(resourceCase.resourceType, readCase.readAccess))
+						continue;
+					if (!canWriteResourceAsAccess(resourceCase.resourceType, writeCase.writeAccess))
+						continue;
+					if (!canReadFromStageAsAccess(stageCombination.toStage, readCase.readAccess))
+						continue;
+					if (!canWriteFromStageAsAccess(stageCombination.fromStage, writeCase.writeAccess))
+						continue;
+					const std::string accessCaseName = + std::string("_") +;
+					const TestParams testParams =
+					{
+						stageCombination.fromStage,	//	Stage			fromStage;
+						stageCombination.toStage,	//	Stage			toStage;
+						resourceCase.resourceType,	//	ResourceType	resourceType;
+						barrierCase.barrierType,	//	BarrierType		barrierType;
+						writeCase.writeAccess,		//	WriteAccess		writeAccess;
+						readCase.readAccess,		//	ReadAccess		readAccess;
+						testValue++,				//	uint32_t		testValue;
+					};
+					barrierGroup->addChild(new MeshShaderSyncCase(testCtx, accessCaseName, "", testParams));
+				}
+				resourceGroup->addChild(barrierGroup.release());
+			}
+			combinationGroup->addChild(resourceGroup.release());
+		}
+		mainGroup->addChild(combinationGroup.release());
+	}
+	return mainGroup.release();
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSyncTests.hpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSyncTests.hpp
new file mode 100644
index 0000000..c13b707
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderSyncTests.hpp
@@ -0,0 +1,38 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Synchronization Tests
+ *//*--------------------------------------------------------------------*/
+#include "tcuDefs.hpp"
+#include "tcuTestCase.hpp"
+namespace vkt
+namespace MeshShader
+tcu::TestCaseGroup* createMeshShaderSyncTests (tcu::TestContext& testCtx);
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderTests.cpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderTests.cpp
new file mode 100644
index 0000000..217d29a
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderTests.cpp
@@ -0,0 +1,62 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Tests
+ *//*--------------------------------------------------------------------*/
+#include "vktMeshShaderTests.hpp"
+#include "vktMeshShaderSmokeTests.hpp"
+#include "vktMeshShaderSyncTests.hpp"
+#include "vktMeshShaderApiTests.hpp"
+#include "vktMeshShaderPropertyTests.hpp"
+#include "vktMeshShaderBuiltinTests.hpp"
+#include "vktMeshShaderMiscTests.hpp"
+#include "deUniquePtr.hpp"
+namespace vkt
+namespace MeshShader
+using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
+tcu::TestCaseGroup*	createTests	(tcu::TestContext& testCtx)
+	GroupPtr mainGroup	(new tcu::TestCaseGroup(testCtx, "mesh_shader", "Mesh Shader Tests"));
+	GroupPtr nvGroup	(new tcu::TestCaseGroup(testCtx, "nv", "Tests for VK_NV_mesh_shader"));
+	nvGroup->addChild(createMeshShaderSmokeTests(testCtx));
+	nvGroup->addChild(createMeshShaderApiTests(testCtx));
+	nvGroup->addChild(createMeshShaderSyncTests(testCtx));
+	nvGroup->addChild(createMeshShaderPropertyTests(testCtx));
+	nvGroup->addChild(createMeshShaderBuiltinTests(testCtx));
+	nvGroup->addChild(createMeshShaderMiscTests(testCtx));
+	mainGroup->addChild(nvGroup.release());
+	return mainGroup.release();
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderTests.hpp b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderTests.hpp
new file mode 100644
index 0000000..512b5b5
--- /dev/null
+++ b/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderTests.hpp
@@ -0,0 +1,40 @@
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2021 The Khronos Group Inc.
+ * Copyright (c) 2021 Valve Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *//*!
+ * \file
+ * \brief Mesh Shader Tests
+ *//*--------------------------------------------------------------------*/
+#include "tcuDefs.hpp"
+#include "tcuTestCase.hpp"
+namespace vkt
+namespace MeshShader
+tcu::TestCaseGroup*	createTests	(tcu::TestContext& testCtx);
+} // MeshShader
+} // vkt
diff --git a/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm64bitCompareTests.cpp b/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm64bitCompareTests.cpp
index e92243e..ca1c2fa 100644
--- a/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm64bitCompareTests.cpp
+++ b/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm64bitCompareTests.cpp
@@ -1072,24 +1072,6 @@
 	return bufmem;
-vk::Move<vk::VkShaderModule> createShaderModule (const vk::DeviceInterface&	deviceInterface,
-												 vk::VkDevice				device,
-												 const vk::ProgramBinary&	binary)
-	DE_ASSERT(binary.getFormat() == vk::PROGRAM_FORMAT_SPIRV);
-	const struct vk::VkShaderModuleCreateInfo shaderModuleInfo =
-	{
-		0,
-		static_cast<deUintptr>(binary.getSize()),
-		reinterpret_cast<const deUint32*>(binary.getBinary()),
-	};
-	return createShaderModule(deviceInterface, device, &shaderModuleInfo);
 // Make sure the length of the following vectors is a multiple of 4. This will make sure operands can be reused for vectorized tests.
 const OperandsVector<double>	DOUBLE_OPERANDS		=
diff --git a/external/vulkancts/modules/vulkan/vktTestCase.cpp b/external/vulkancts/modules/vulkan/vktTestCase.cpp
index 1d0b3b5..e0c66f9 100644
--- a/external/vulkancts/modules/vulkan/vktTestCase.cpp
+++ b/external/vulkancts/modules/vulkan/vktTestCase.cpp
@@ -69,6 +69,7 @@
+		"VK_NV_mesh_shader",
diff --git a/external/vulkancts/modules/vulkan/vktTestPackage.cpp b/external/vulkancts/modules/vulkan/vktTestPackage.cpp
index a0e9b1a..1827114 100644
--- a/external/vulkancts/modules/vulkan/vktTestPackage.cpp
+++ b/external/vulkancts/modules/vulkan/vktTestPackage.cpp
@@ -104,6 +104,7 @@
 #include "vktPostmortemTests.hpp"
 #include "vktFragmentShadingRateTests.hpp"
 #include "vktReconvergenceTests.hpp"
+#include "vktMeshShaderTests.hpp"
 #include <vector>
 #include <sstream>
@@ -580,6 +581,7 @@
 	addChild(RayQuery::createTests				(m_testCtx));
 	addChild(FragmentShadingRate::createTests	(m_testCtx));
 	addChild(Reconvergence::createTests			(m_testCtx, false));
+	addChild(MeshShader::createTests			(m_testCtx));
 void ExperimentalTestPackage::init (void)
diff --git a/external/vulkancts/mustpass/master/vk-default.txt b/external/vulkancts/mustpass/master/vk-default.txt
index 96a9d63..6e37da3 100644
--- a/external/vulkancts/mustpass/master/vk-default.txt
+++ b/external/vulkancts/mustpass/master/vk-default.txt
@@ -19,6 +19,7 @@
diff --git a/external/vulkancts/mustpass/master/vk-default/mesh-shader.txt b/external/vulkancts/mustpass/master/vk-default/mesh-shader.txt
new file mode 100644
index 0000000..0d171e7
--- /dev/null
+++ b/external/vulkancts/mustpass/master/vk-default/mesh-shader.txt
@@ -0,0 +1,544 @@
diff --git a/external/vulkancts/scripts/src/extensions_data.txt b/external/vulkancts/scripts/src/extensions_data.txt
index ed35f27..24c87af 100644
--- a/external/vulkancts/scripts/src/extensions_data.txt
+++ b/external/vulkancts/scripts/src/extensions_data.txt
@@ -102,3 +102,4 @@
 VK_KHR_format_feature_flags2				DEVICE
 VK_KHR_maintenance4							DEVICE
 VK_EXT_border_color_swizzle					DEVICE
+VK_NV_mesh_shader							DEVICE
diff --git a/framework/common/tcuStringTemplate.cpp b/framework/common/tcuStringTemplate.cpp
index 702f949..2813737 100644
--- a/framework/common/tcuStringTemplate.cpp
+++ b/framework/common/tcuStringTemplate.cpp
@@ -44,6 +44,10 @@
+StringTemplate::StringTemplate (StringTemplate&& other)
+	: m_template(std::move(other.m_template))
 StringTemplate::~StringTemplate (void)
diff --git a/framework/common/tcuStringTemplate.hpp b/framework/common/tcuStringTemplate.hpp
index 8896f47..13c3656 100644
--- a/framework/common/tcuStringTemplate.hpp
+++ b/framework/common/tcuStringTemplate.hpp
@@ -38,6 +38,7 @@
 						StringTemplate		(void);
 						StringTemplate		(const std::string& str);
+						StringTemplate		(StringTemplate&& other);
 						~StringTemplate		(void);
 	void				setString			(const std::string& str);
diff --git a/framework/opengl/gluShaderProgram.cpp b/framework/opengl/gluShaderProgram.cpp
index 87f02be..1b58b89 100644
--- a/framework/opengl/gluShaderProgram.cpp
+++ b/framework/opengl/gluShaderProgram.cpp
@@ -549,7 +549,9 @@
-		0
+		0,
+		0,
+		0,
 	DE_ASSERT(de::inBounds<int>(shaderType, 0, DE_LENGTH_OF_ARRAY(s_typeMap)));
@@ -571,7 +573,9 @@
-		0
+		0,
+		0,
+		0,
 	DE_ASSERT(de::inBounds<int>(shaderType, 0, DE_LENGTH_OF_ARRAY(s_typebitMap)));
@@ -594,6 +598,8 @@
 	DE_ASSERT(de::inBounds<int>(shaderType, 0, DE_LENGTH_OF_ARRAY(s_typeMap)));
@@ -645,6 +651,8 @@
 			{ "MissCompileTime",			"Miss shader compile time"						},
 			{ "IntersectionCompileTime",	"Intersection shader compile time"				},
 			{ "CallableCompileTime",		"Callable shader compile time"					},
+			{ "TaskCompileTime",			"Task shader compile time"						},
+			{ "MeshCompileTime",			"Mesh shader compile time"						},
diff --git a/framework/opengl/gluShaderProgram.hpp b/framework/opengl/gluShaderProgram.hpp
index 3c347a3..d194c3a 100644
--- a/framework/opengl/gluShaderProgram.hpp
+++ b/framework/opengl/gluShaderProgram.hpp
@@ -337,6 +337,16 @@
 	CallableSource(const std::string& source_) : ShaderSource(glu::SHADERTYPE_CALLABLE, source_) {}
+struct TaskSource : public ShaderSource
+	TaskSource(const std::string& source_) : ShaderSource(glu::SHADERTYPE_TASK, source_) {}
+struct MeshSource : public ShaderSource
+	MeshSource(const std::string& source_) : ShaderSource(glu::SHADERTYPE_MESH, source_) {}
 struct ProgramSources
 	std::vector<std::string>			sources[SHADERTYPE_LAST];
diff --git a/framework/opengl/gluShaderUtil.cpp b/framework/opengl/gluShaderUtil.cpp
index f2b1b58..c38e326 100644
--- a/framework/opengl/gluShaderUtil.cpp
+++ b/framework/opengl/gluShaderUtil.cpp
@@ -158,6 +158,8 @@
+		"task",
+		"mesh",
diff --git a/framework/opengl/gluShaderUtil.hpp b/framework/opengl/gluShaderUtil.hpp
index 860a281..83ad6b2 100644
--- a/framework/opengl/gluShaderUtil.hpp
+++ b/framework/opengl/gluShaderUtil.hpp
@@ -82,6 +82,9 @@
diff --git a/framework/qphelper/qpTestLog.c b/framework/qphelper/qpTestLog.c
index b1b3cb1..ae0d5fe 100644
--- a/framework/qphelper/qpTestLog.c
+++ b/framework/qphelper/qpTestLog.c
@@ -249,6 +249,8 @@
 	{ QP_SHADER_TYPE_MISS,				"MissShader"			},
 	{ QP_SHADER_TYPE_INTERSECTION,		"IntersectionShader"	},
 	{ QP_SHADER_TYPE_CALLABLE,			"CallableShader"		},
+	{ QP_SHADER_TYPE_TASK,				"TaskShader"			},
+	{ QP_SHADER_TYPE_MESH,				"MeshShader"			},
diff --git a/framework/qphelper/qpTestLog.h b/framework/qphelper/qpTestLog.h
index 62cd1de..daaf7f6 100644
--- a/framework/qphelper/qpTestLog.h
+++ b/framework/qphelper/qpTestLog.h
@@ -154,6 +154,8 @@
 } qpShaderType;
diff --git a/modules/gles31/functional/es31fProgramInterfaceDefinition.cpp b/modules/gles31/functional/es31fProgramInterfaceDefinition.cpp
index 80b120f..5f495a6 100644
--- a/modules/gles31/functional/es31fProgramInterfaceDefinition.cpp
+++ b/modules/gles31/functional/es31fProgramInterfaceDefinition.cpp
@@ -51,12 +51,16 @@
 // s_shaderStageOrder does not contain ShaderType_LAST