Do not reset perf queries in multi submitted cmdbuffer

Some implementations can require multiple passes to gather values for
their performance queries. It's important not to reset the query pool
in a batch that can be submitted multiple times, otherwise the queries
result risk to never become available.

Compontents: Vulkan
Affects: dEQP-VK.query_pool.performance_query.*

Change-Id: Ief00457fc0808a7c5ee7c0bf6aaf7de7f3042ca5
diff --git a/external/vulkancts/modules/vulkan/query_pool/vktQueryPoolPerformanceTests.cpp b/external/vulkancts/modules/vulkan/query_pool/vktQueryPoolPerformanceTests.cpp
index 836d960..ce1c454 100644
--- a/external/vulkancts/modules/vulkan/query_pool/vktQueryPoolPerformanceTests.cpp
+++ b/external/vulkancts/modules/vulkan/query_pool/vktQueryPoolPerformanceTests.cpp
@@ -587,6 +587,31 @@
 		return tcu::TestStatus::pass("Pass");
 	}
 
+	// reset query pool
+	{
+		Unique<VkCommandBuffer>		resetCmdBuffer	(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
+		const Unique<VkFence>		fence			(createFence(vkd, device));
+		const VkSubmitInfo			submitInfo		=
+		{
+			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
+			DE_NULL,											// pNext
+			0u,													// waitSemaphoreCount
+			DE_NULL,											// pWaitSemaphores
+			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
+			1u,													// commandBufferCount
+			&resetCmdBuffer.get(),								// pCommandBuffers
+			0u,													// signalSemaphoreCount
+			DE_NULL,											// pSignalSemaphores
+		};
+
+		beginCommandBuffer(vkd, *resetCmdBuffer);
+		vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
+		endCommandBuffer(vkd, *resetCmdBuffer);
+
+		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
+		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
+	}
+
 	// begin command buffer
 	const VkCommandBufferBeginInfo commandBufBeginParams =
 	{
@@ -604,9 +629,6 @@
 	VkClearValue renderPassClearValue;
 	deMemset(&renderPassClearValue, 0, sizeof(VkClearValue));
 
-	// reset query pool
-	vkd.cmdResetQueryPool(*cmdBuffer, *queryPool, 0, 1);
-
 	// perform query during triangle draw
 	vkd.cmdBeginQuery(*cmdBuffer, *queryPool, 0, VK_QUERY_CONTROL_PRECISE_BIT);
 
@@ -708,6 +730,32 @@
 		return tcu::TestStatus::pass("Pass");
 	}
 
+	// reset query pools
+	{
+		Unique<VkCommandBuffer>		resetCmdBuffer	(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
+		const Unique<VkFence>		fence			(createFence(vkd, device));
+		const VkSubmitInfo			submitInfo		=
+		{
+			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
+			DE_NULL,											// pNext
+			0u,													// waitSemaphoreCount
+			DE_NULL,											// pWaitSemaphores
+			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
+			1u,													// commandBufferCount
+			&resetCmdBuffer.get(),								// pCommandBuffers
+			0u,													// signalSemaphoreCount
+			DE_NULL,											// pSignalSemaphores
+		};
+
+		beginCommandBuffer(vkd, *resetCmdBuffer);
+		vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool1, 0u, 1u);
+		vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool2, 0u, 1u);
+		endCommandBuffer(vkd, *resetCmdBuffer);
+
+		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, *fence));
+		VK_CHECK(vkd.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
+	}
+
 	// begin command buffer
 	const VkCommandBufferBeginInfo commandBufBeginParams =
 	{
@@ -733,10 +781,6 @@
 		*queryPool2
 	};
 
-	// reset query pools
-	vkd.cmdResetQueryPool(*cmdBuffer, queryPools[0], 0u, 1u);
-	vkd.cmdResetQueryPool(*cmdBuffer, queryPools[1], 0u, 1u);
-
 	// perform two queries during triangle draw
 	for (deUint32 loop = 0; loop < DE_LENGTH_OF_ARRAY(queryPools); ++loop)
 	{
@@ -943,6 +987,7 @@
 	const VkQueue					queue				= m_context.getUniversalQueue();
 	const CmdPoolCreateInfo			cmdPoolCreateInfo	(m_context.getUniversalQueueFamilyIndex());
 	const Unique<VkCommandPool>		cmdPool				(createCommandPool(vkd, device, &cmdPoolCreateInfo));
+	const Unique<VkCommandBuffer>	resetCmdBuffer		(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
 	const Unique<VkCommandBuffer>	cmdBuffer			(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
 
 	initStateObjects();
@@ -956,9 +1001,11 @@
 		return tcu::TestStatus::pass("Pass");
 	}
 
-	beginCommandBuffer(vkd, *cmdBuffer);
-	vkd.cmdResetQueryPool(*cmdBuffer, *queryPool, 0u, 1u);
+	beginCommandBuffer(vkd, *resetCmdBuffer);
+	vkd.cmdResetQueryPool(*resetCmdBuffer, *queryPool, 0u, 1u);
+	endCommandBuffer(vkd, *resetCmdBuffer);
 
+	beginCommandBuffer(vkd, *cmdBuffer);
 	vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
 	vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
 
@@ -970,6 +1017,24 @@
 		(VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
 	endCommandBuffer(vkd, *cmdBuffer);
 
+	// submit reset of queries only once
+	{
+		const VkSubmitInfo submitInfo =
+		{
+			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
+			DE_NULL,											// pNext
+			0u,													// waitSemaphoreCount
+			DE_NULL,											// pWaitSemaphores
+			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
+			1u,													// commandBufferCount
+			&resetCmdBuffer.get(),								// pCommandBuffers
+			0u,													// signalSemaphoreCount
+			DE_NULL,											// pSignalSemaphores
+		};
+
+		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
+	}
+
 	// submit command buffer for each pass and wait for its completion
 	for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
 	{
@@ -1030,6 +1095,7 @@
 	const VkQueue					queue = m_context.getUniversalQueue();
 	const CmdPoolCreateInfo			cmdPoolCreateInfo(m_context.getUniversalQueueFamilyIndex());
 	const Unique<VkCommandPool>		cmdPool(createCommandPool(vkd, device, &cmdPoolCreateInfo));
+	const Unique<VkCommandBuffer>	resetCmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
 
 	initStateObjects();
@@ -1050,10 +1116,12 @@
 		*queryPool2
 	};
 
-	beginCommandBuffer(vkd, *cmdBuffer);
-	vkd.cmdResetQueryPool(*cmdBuffer, queryPools[0], 0u, 1u);
-	vkd.cmdResetQueryPool(*cmdBuffer, queryPools[1], 0u, 1u);
+	beginCommandBuffer(vkd, *resetCmdBuffer);
+	vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[0], 0u, 1u);
+	vkd.cmdResetQueryPool(*resetCmdBuffer, queryPools[1], 0u, 1u);
+	endCommandBuffer(vkd, *resetCmdBuffer);
 
+	beginCommandBuffer(vkd, *cmdBuffer);
 	vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipeline);
 	vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &(m_descriptorSet.get()), 0u, DE_NULL);
 
@@ -1070,6 +1138,24 @@
 		(VkDependencyFlags)0u, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &m_computeFinishBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
 	endCommandBuffer(vkd, *cmdBuffer);
 
+	// submit reset of queries only once
+	{
+		const VkSubmitInfo submitInfo =
+		{
+			VK_STRUCTURE_TYPE_SUBMIT_INFO,						// sType
+			DE_NULL,											// pNext
+			0u,													// waitSemaphoreCount
+			DE_NULL,											// pWaitSemaphores
+			(const VkPipelineStageFlags*)DE_NULL,				// pWaitDstStageMask
+			1u,													// commandBufferCount
+			&resetCmdBuffer.get(),								// pCommandBuffers
+			0u,													// signalSemaphoreCount
+			DE_NULL,											// pSignalSemaphores
+		};
+
+		VK_CHECK(vkd.queueSubmit(queue, 1u, &submitInfo, DE_NULL));
+	}
+
 	// submit command buffer for each pass and wait for its completion
 	for (deUint32 passIndex = 0; passIndex < getRequiredNumerOfPasses(); passIndex++)
 	{