blob: 03679c35274dd62519d02c915e46f082a818d7d9 [file] [log] [blame]
/*------------------------------------------------------------------------
* Vulkan Conformance Tests
* ------------------------
*
* Copyright (c) 2019 The Khronos Group Inc.
* Copyright (c) 2019 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*//*!
* \file
* \brief Concurrent draw tests
* Tests that create queue for rendering as well as queue for
* compute, and trigger work on both pipelines at the same time,
* and finally verify that the results are as expected.
*//*--------------------------------------------------------------------*/
#include "vktDrawConcurrentTests.hpp"
#include "vktTestCaseUtil.hpp"
#include "vktDrawTestCaseUtil.hpp"
#include "../compute/vktComputeTestsUtil.hpp"
#include "vktDrawBaseClass.hpp"
#include "tcuTestLog.hpp"
#include "tcuResource.hpp"
#include "tcuImageCompare.hpp"
#include "tcuTextureUtil.hpp"
#include "tcuRGBA.hpp"
#include "vkDefs.hpp"
#include "vkCmdUtil.hpp"
#include "vkQueryUtil.hpp"
#include "vkBuilderUtil.hpp"
#include "vkBarrierUtil.hpp"
#include "deRandom.hpp"
using namespace vk;
namespace vkt
{
namespace Draw
{
namespace
{
class ConcurrentDraw : public DrawTestsBaseClass
{
public:
typedef TestSpecBase TestSpec;
ConcurrentDraw (Context &context, TestSpec testSpec);
virtual tcu::TestStatus iterate (void);
};
ConcurrentDraw::ConcurrentDraw (Context &context, TestSpec testSpec)
: DrawTestsBaseClass(context, testSpec.shaders[glu::SHADERTYPE_VERTEX], testSpec.shaders[glu::SHADERTYPE_FRAGMENT], testSpec.topology)
{
m_data.push_back(VertexElementData(tcu::Vec4(1.0f, -1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
m_data.push_back(VertexElementData(tcu::Vec4(-1.0f, 1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
int refVertexIndex = 2;
for (int i = 0; i < 1000; i++)
{
m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
m_data.push_back(VertexElementData(tcu::Vec4(0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
m_data.push_back(VertexElementData(tcu::Vec4(0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
m_data.push_back(VertexElementData(tcu::Vec4(0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
}
m_data.push_back(VertexElementData(tcu::Vec4(-1.0f, 1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
initialize();
}
tcu::TestStatus ConcurrentDraw::iterate (void)
{
enum
{
NO_MATCH_FOUND = ~((deUint32)0),
ERROR_NONE = 0,
ERROR_WAIT_COMPUTE = 1,
ERROR_WAIT_DRAW = 2
};
struct Queue
{
VkQueue queue;
deUint32 queueFamilyIndex;
};
const DeviceInterface& vk = m_context.getDeviceInterface();
const deUint32 numValues = 1024;
const InstanceInterface& instance = m_context.getInstanceInterface();
const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
tcu::TestLog& log = m_context.getTestContext().getLog();
Move<VkDevice> computeDevice;
std::vector<VkQueueFamilyProperties> queueFamilyProperties;
VkDeviceCreateInfo deviceInfo;
VkPhysicalDeviceFeatures deviceFeatures;
const float queuePriority = 1.0f;
VkDeviceQueueCreateInfo queueInfos;
Queue computeQueue = { DE_NULL, (deUint32)NO_MATCH_FOUND };
// Set up compute
queueFamilyProperties = getPhysicalDeviceQueueFamilyProperties(instance, physicalDevice);
for (deUint32 queueNdx = 0; queueNdx < queueFamilyProperties.size(); ++queueNdx)
{
if (queueFamilyProperties[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
{
if (computeQueue.queueFamilyIndex == NO_MATCH_FOUND)
computeQueue.queueFamilyIndex = queueNdx;
}
}
if (computeQueue.queueFamilyIndex == NO_MATCH_FOUND)
TCU_THROW(NotSupportedError, "Compute queue couldn't be created");
VkDeviceQueueCreateInfo queueInfo;
deMemset(&queueInfo, 0, sizeof(queueInfo));
queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueInfo.pNext = DE_NULL;
queueInfo.flags = (VkDeviceQueueCreateFlags)0u;
queueInfo.queueFamilyIndex = computeQueue.queueFamilyIndex;
queueInfo.queueCount = 1;
queueInfo.pQueuePriorities = &queuePriority;
queueInfos = queueInfo;
deMemset(&deviceInfo, 0, sizeof(deviceInfo));
instance.getPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);
deviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
deviceInfo.pNext = DE_NULL;
deviceInfo.enabledExtensionCount = 0u;
deviceInfo.ppEnabledExtensionNames = DE_NULL;
deviceInfo.enabledLayerCount = 0u;
deviceInfo.ppEnabledLayerNames = DE_NULL;
deviceInfo.pEnabledFeatures = &deviceFeatures;
deviceInfo.queueCreateInfoCount = 1;
deviceInfo.pQueueCreateInfos = &queueInfos;
computeDevice = createDevice(m_context.getPlatformInterface(), m_context.getInstance(), instance, physicalDevice, &deviceInfo);
vk.getDeviceQueue(*computeDevice, computeQueue.queueFamilyIndex, 0, &computeQueue.queue);
// Create an input/output buffer
const VkPhysicalDeviceMemoryProperties memoryProperties = getPhysicalDeviceMemoryProperties(instance, physicalDevice);
SimpleAllocator * allocator = new SimpleAllocator(vk, *computeDevice, memoryProperties);
const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * numValues;
const vkt::compute::Buffer buffer(vk, *computeDevice, *allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
// Fill the buffer with data
typedef std::vector<deUint32> data_vector_t;
data_vector_t inputData(numValues);
{
de::Random rnd(0x82ce7f);
const Allocation& bufferAllocation = buffer.getAllocation();
deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
for (deUint32 i = 0; i < numValues; ++i)
{
deUint32 val = rnd.getUint32();
inputData[i] = val;
*bufferPtr++ = val;
}
flushAlloc(vk, *computeDevice, bufferAllocation);
}
// Create descriptor set
const Unique<VkDescriptorSetLayout> descriptorSetLayout(
DescriptorSetLayoutBuilder()
.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
.build(vk, *computeDevice));
const Unique<VkDescriptorPool> descriptorPool(
DescriptorPoolBuilder()
.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
.build(vk, *computeDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, *computeDevice, *descriptorPool, *descriptorSetLayout));
const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
DescriptorSetUpdateBuilder()
.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
.update(vk, *computeDevice);
// Perform the computation
const Unique<VkShaderModule> shaderModule(createShaderModule(vk, *computeDevice, m_context.getBinaryCollection().get("vulkan/draw/ConcurrentPayload.comp"), 0u));
const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, *computeDevice, *descriptorSetLayout));
const Unique<VkPipeline> pipeline(vkt::compute::makeComputePipeline(vk, *computeDevice, *pipelineLayout, *shaderModule));
const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, *computeDevice, computeQueue.queueFamilyIndex));
const Unique<VkCommandBuffer> computeCommandBuffer(allocateCommandBuffer(vk, *computeDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
// Compute command buffer
beginCommandBuffer(vk, *computeCommandBuffer);
vk.cmdBindPipeline(*computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
vk.cmdBindDescriptorSets(*computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
vk.cmdPipelineBarrier(*computeCommandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
vk.cmdDispatch(*computeCommandBuffer, 1, 1, 1);
vk.cmdPipelineBarrier(*computeCommandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
endCommandBuffer(vk, *computeCommandBuffer);
const VkSubmitInfo submitInfo =
{
VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
DE_NULL, // pNext
0u, // waitSemaphoreCount
DE_NULL, // pWaitSemaphores
(const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
1u, // commandBufferCount
&computeCommandBuffer.get(), // pCommandBuffers
0u, // signalSemaphoreCount
DE_NULL // pSignalSemaphores
};
// Set up draw
const VkQueue drawQueue = m_context.getUniversalQueue();
const VkDevice drawDevice = m_context.getDevice();
beginRenderPass();
const VkDeviceSize vertexBufferOffset = 0;
const VkBuffer vertexBuffer = m_vertexBuffer->object();
m_vk.cmdBindVertexBuffers(*m_cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
m_vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
m_vk.cmdDraw(*m_cmdBuffer, 6, 1, 2, 0);
endRenderPass(m_vk, *m_cmdBuffer);
endCommandBuffer(m_vk, *m_cmdBuffer);
const VkCommandBuffer drawCommandBuffer = m_cmdBuffer.get();
const bool useDeviceGroups = false;
const deUint32 deviceMask = 1u;
const Unique<VkFence> drawFence(createFence(vk, drawDevice));
VkDeviceGroupSubmitInfo deviceGroupSubmitInfo =
{
VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO_KHR, // VkStructureType sType;
DE_NULL, // const void* pNext;
0u, // deUint32 waitSemaphoreCount;
DE_NULL, // const deUint32* pWaitSemaphoreDeviceIndices;
1u, // deUint32 commandBufferCount;
&deviceMask, // const deUint32* pCommandBufferDeviceMasks;
0u, // deUint32 signalSemaphoreCount;
DE_NULL, // const deUint32* pSignalSemaphoreDeviceIndices;
};
const VkSubmitInfo drawSubmitInfo =
{
VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType sType;
useDeviceGroups ? &deviceGroupSubmitInfo : DE_NULL, // const void* pNext;
0u, // deUint32 waitSemaphoreCount;
DE_NULL, // const VkSemaphore* pWaitSemaphores;
(const VkPipelineStageFlags*)DE_NULL, // const VkPipelineStageFlags* pWaitDstStageMask;
1u, // deUint32 commandBufferCount;
&drawCommandBuffer, // const VkCommandBuffer* pCommandBuffers;
0u, // deUint32 signalSemaphoreCount;
DE_NULL, // const VkSemaphore* pSignalSemaphores;
};
const Unique<VkFence> computeFence(createFence(vk, *computeDevice));
// Submit both compute and draw queues
VK_CHECK(vk.queueSubmit(computeQueue.queue, 1u, &submitInfo, *computeFence));
VK_CHECK(vk.queueSubmit(drawQueue, 1u, &drawSubmitInfo, *drawFence));
int err = ERROR_NONE;
if (VK_SUCCESS != vk.waitForFences(*computeDevice, 1u, &computeFence.get(), DE_TRUE, ~0ull))
err = ERROR_WAIT_COMPUTE;
if (VK_SUCCESS != vk.waitForFences(drawDevice, 1u, &drawFence.get(), DE_TRUE, ~0ull))
err = ERROR_WAIT_DRAW;
// Have to wait for all fences before calling fail, or some fence may be left hanging.
if (err == ERROR_WAIT_COMPUTE)
return tcu::TestStatus::fail("Failed waiting for compute queue fence.");
if (err == ERROR_WAIT_DRAW)
return tcu::TestStatus::fail("Failed waiting for draw queue fence.");
// Validation - compute
const Allocation& bufferAllocation = buffer.getAllocation();
invalidateAlloc(vk, *computeDevice, bufferAllocation);
const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
for (deUint32 ndx = 0; ndx < numValues; ++ndx)
{
const deUint32 res = bufferPtr[ndx];
const deUint32 inp = inputData[ndx];
const deUint32 ref = ~inp;
if (res != ref)
{
std::ostringstream msg;
msg << "Comparison failed (compute) for InOut.values[" << ndx << "] ref:" << ref << " res:" << res << " inp:" << inp;
return tcu::TestStatus::fail(msg.str());
}
}
// Validation - draw
tcu::Texture2D referenceFrame(mapVkFormat(m_colorAttachmentFormat), (int)(0.5f + static_cast<float>(WIDTH)), (int)(0.5f + static_cast<float>(HEIGHT)));
referenceFrame.allocLevel(0);
const deInt32 frameWidth = referenceFrame.getWidth();
const deInt32 frameHeight = referenceFrame.getHeight();
tcu::clear(referenceFrame.getLevel(0), tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f));
ReferenceImageCoordinates refCoords;
for (int y = 0; y < frameHeight; y++)
{
const float yCoord = (float)(y / (0.5 * frameHeight)) - 1.0f;
for (int x = 0; x < frameWidth; x++)
{
const float xCoord = (float)(x / (0.5 * frameWidth)) - 1.0f;
if ((yCoord >= refCoords.bottom &&
yCoord <= refCoords.top &&
xCoord >= refCoords.left &&
xCoord <= refCoords.right))
referenceFrame.getLevel(0).setPixel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), x, y);
}
}
const VkOffset3D zeroOffset = { 0, 0, 0 };
const tcu::ConstPixelBufferAccess renderedFrame = m_colorTargetImage->readSurface(
drawQueue, m_context.getDefaultAllocator(), VK_IMAGE_LAYOUT_GENERAL, zeroOffset, WIDTH, HEIGHT, VK_IMAGE_ASPECT_COLOR_BIT);
qpTestResult res = QP_TEST_RESULT_PASS;
if (!tcu::fuzzyCompare(log, "Result", "Image comparison result",
referenceFrame.getLevel(0), renderedFrame, 0.05f,
tcu::COMPARE_LOG_RESULT))
{
res = QP_TEST_RESULT_FAIL;
}
return tcu::TestStatus(res, qpGetTestResultName(res));
}
} // anonymous
ConcurrentDrawTests::ConcurrentDrawTests (tcu::TestContext &testCtx)
: TestCaseGroup (testCtx, "concurrent", "concurrent drawing")
{
/* Left blank on purpose */
}
ConcurrentDrawTests::~ConcurrentDrawTests (void) {}
void ConcurrentDrawTests::init (void)
{
{
ConcurrentDraw::TestSpec testSpec;
testSpec.shaders[glu::SHADERTYPE_VERTEX] = "vulkan/draw/VertexFetch.vert";
testSpec.shaders[glu::SHADERTYPE_FRAGMENT] = "vulkan/draw/VertexFetch.frag";
testSpec.shaders[glu::SHADERTYPE_COMPUTE] = "vulkan/draw/ConcurrentPayload.comp";
testSpec.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
addChild(new InstanceFactory<ConcurrentDraw>(m_testCtx, "compute_and_triangle_list", "Draws triangle list while running a compute shader", testSpec));
}
}
} // DrawTests
} // vkt