blob: 5a6960c7a78f247b69ee4a700c4287f77def5415 [file] [log] [blame]
/*
* Copyright © 2022 Imagination Technologies Ltd.
*
* based in part on radv driver which is:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/**
* This file implements VkQueue, VkFence, and VkSemaphore
*/
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
#include <vulkan/vulkan.h>
#include "pvr_job_compute.h"
#include "pvr_job_context.h"
#include "pvr_job_render.h"
#include "pvr_job_transfer.h"
#include "pvr_limits.h"
#include "pvr_private.h"
#include "util/macros.h"
#include "util/u_atomic.h"
#include "vk_alloc.h"
#include "vk_fence.h"
#include "vk_log.h"
#include "vk_object.h"
#include "vk_queue.h"
#include "vk_semaphore.h"
#include "vk_sync.h"
#include "vk_sync_dummy.h"
#include "vk_util.h"
static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
struct vk_queue_submit *submit);
static VkResult pvr_queue_init(struct pvr_device *device,
struct pvr_queue *queue,
const VkDeviceQueueCreateInfo *pCreateInfo,
uint32_t index_in_family)
{
struct pvr_transfer_ctx *transfer_ctx;
struct pvr_compute_ctx *compute_ctx;
struct pvr_compute_ctx *query_ctx;
struct pvr_render_ctx *gfx_ctx;
VkResult result;
*queue = (struct pvr_queue){ 0 };
result =
vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
if (result != VK_SUCCESS)
return result;
if (device->ws->features.supports_threaded_submit) {
result = vk_queue_enable_submit_thread(&queue->vk);
if (result != VK_SUCCESS)
goto err_vk_queue_finish;
}
result = pvr_transfer_ctx_create(device,
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
&transfer_ctx);
if (result != VK_SUCCESS)
goto err_vk_queue_finish;
result = pvr_compute_ctx_create(device,
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
&compute_ctx);
if (result != VK_SUCCESS)
goto err_transfer_ctx_destroy;
result = pvr_compute_ctx_create(device,
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
&query_ctx);
if (result != VK_SUCCESS)
goto err_compute_ctx_destroy;
result =
pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
if (result != VK_SUCCESS)
goto err_query_ctx_destroy;
queue->device = device;
queue->gfx_ctx = gfx_ctx;
queue->compute_ctx = compute_ctx;
queue->query_ctx = query_ctx;
queue->transfer_ctx = transfer_ctx;
queue->vk.driver_submit = pvr_driver_queue_submit;
return VK_SUCCESS;
err_query_ctx_destroy:
pvr_compute_ctx_destroy(query_ctx);
err_compute_ctx_destroy:
pvr_compute_ctx_destroy(compute_ctx);
err_transfer_ctx_destroy:
pvr_transfer_ctx_destroy(transfer_ctx);
err_vk_queue_finish:
vk_queue_finish(&queue->vk);
return result;
}
VkResult pvr_queues_create(struct pvr_device *device,
const VkDeviceCreateInfo *pCreateInfo)
{
VkResult result;
/* Check requested queue families and queues */
assert(pCreateInfo->queueCreateInfoCount == 1);
assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
const VkDeviceQueueCreateInfo *queue_create =
&pCreateInfo->pQueueCreateInfos[0];
device->queues = vk_alloc(&device->vk.alloc,
queue_create->queueCount * sizeof(*device->queues),
8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!device->queues)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
device->queue_count = 0;
for (uint32_t i = 0; i < queue_create->queueCount; i++) {
result = pvr_queue_init(device, &device->queues[i], queue_create, i);
if (result != VK_SUCCESS)
goto err_queues_finish;
device->queue_count++;
}
return VK_SUCCESS;
err_queues_finish:
pvr_queues_destroy(device);
return result;
}
static void pvr_queue_finish(struct pvr_queue *queue)
{
for (uint32_t i = 0; i < ARRAY_SIZE(queue->next_job_wait_sync); i++) {
if (queue->next_job_wait_sync[i])
vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
}
for (uint32_t i = 0; i < ARRAY_SIZE(queue->last_job_signal_sync); i++) {
if (queue->last_job_signal_sync[i])
vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
}
pvr_render_ctx_destroy(queue->gfx_ctx);
pvr_compute_ctx_destroy(queue->query_ctx);
pvr_compute_ctx_destroy(queue->compute_ctx);
pvr_transfer_ctx_destroy(queue->transfer_ctx);
vk_queue_finish(&queue->vk);
}
void pvr_queues_destroy(struct pvr_device *device)
{
for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
pvr_queue_finish(&device->queues[q_idx]);
vk_free(&device->vk.alloc, device->queues);
}
static void pvr_update_job_syncs(struct pvr_device *device,
struct pvr_queue *queue,
struct vk_sync *new_signal_sync,
enum pvr_job_type submitted_job_type)
{
if (queue->next_job_wait_sync[submitted_job_type]) {
vk_sync_destroy(&device->vk,
queue->next_job_wait_sync[submitted_job_type]);
queue->next_job_wait_sync[submitted_job_type] = NULL;
}
if (queue->last_job_signal_sync[submitted_job_type]) {
vk_sync_destroy(&device->vk,
queue->last_job_signal_sync[submitted_job_type]);
}
queue->last_job_signal_sync[submitted_job_type] = new_signal_sync;
}
static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_gfx *sub_cmd)
{
pvr_dev_addr_t original_ctrl_stream_addr = { 0 };
struct vk_sync *geom_signal_sync;
struct vk_sync *frag_signal_sync = NULL;
VkResult result;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&geom_signal_sync);
if (result != VK_SUCCESS)
return result;
if (sub_cmd->job.run_frag) {
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&frag_signal_sync);
if (result != VK_SUCCESS)
goto err_destroy_geom_sync;
}
/* FIXME: DoShadowLoadOrStore() */
/* Perform two render submits when using multiple framebuffer layers. The
* first submit contains just geometry, while the second only terminates
* (and triggers the fragment render if originally specified). This is needed
* because the render target cache gets cleared on terminating submits, which
* could result in missing primitives.
*/
if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
/* If fragment work shouldn't be run there's no need for a split,
* and if geometry_terminate is false this kick can't have a fragment
* stage without another terminating geometry kick.
*/
assert(sub_cmd->job.geometry_terminate && sub_cmd->job.run_frag);
/* First submit must not touch fragment work. */
sub_cmd->job.geometry_terminate = false;
sub_cmd->job.run_frag = false;
result =
pvr_render_job_submit(queue->gfx_ctx,
&sub_cmd->job,
queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
NULL,
NULL,
NULL);
sub_cmd->job.geometry_terminate = true;
sub_cmd->job.run_frag = true;
if (result != VK_SUCCESS)
goto err_destroy_frag_sync;
original_ctrl_stream_addr = sub_cmd->job.ctrl_stream_addr;
/* Second submit contains only a trivial control stream to terminate the
* geometry work.
*/
assert(sub_cmd->terminate_ctrl_stream);
sub_cmd->job.ctrl_stream_addr =
sub_cmd->terminate_ctrl_stream->vma->dev_addr;
}
result = pvr_render_job_submit(queue->gfx_ctx,
&sub_cmd->job,
queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
queue->next_job_wait_sync[PVR_JOB_TYPE_FRAG],
geom_signal_sync,
frag_signal_sync);
if (original_ctrl_stream_addr.addr > 0)
sub_cmd->job.ctrl_stream_addr = original_ctrl_stream_addr;
if (result != VK_SUCCESS)
goto err_destroy_frag_sync;
pvr_update_job_syncs(device, queue, geom_signal_sync, PVR_JOB_TYPE_GEOM);
if (sub_cmd->job.run_frag)
pvr_update_job_syncs(device, queue, frag_signal_sync, PVR_JOB_TYPE_FRAG);
/* FIXME: DoShadowLoadOrStore() */
return VK_SUCCESS;
err_destroy_frag_sync:
if (frag_signal_sync)
vk_sync_destroy(&device->vk, frag_signal_sync);
err_destroy_geom_sync:
vk_sync_destroy(&device->vk, geom_signal_sync);
return result;
}
static VkResult pvr_process_compute_cmd(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_compute *sub_cmd)
{
struct vk_sync *sync;
VkResult result;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&sync);
if (result != VK_SUCCESS)
return result;
result =
pvr_compute_job_submit(queue->compute_ctx,
sub_cmd,
queue->next_job_wait_sync[PVR_JOB_TYPE_COMPUTE],
sync);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, sync);
return result;
}
pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_COMPUTE);
return result;
}
static VkResult pvr_process_transfer_cmds(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_transfer *sub_cmd)
{
struct vk_sync *sync;
VkResult result;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&sync);
if (result != VK_SUCCESS)
return result;
result =
pvr_transfer_job_submit(queue->transfer_ctx,
sub_cmd,
queue->next_job_wait_sync[PVR_JOB_TYPE_TRANSFER],
sync);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, sync);
return result;
}
pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_TRANSFER);
return result;
}
static VkResult
pvr_process_occlusion_query_cmd(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_compute *sub_cmd)
{
struct vk_sync *sync;
VkResult result;
/* TODO: Currently we add barrier event sub commands to handle the sync
* necessary for the different occlusion query types. Would we get any speed
* up in processing the queue by doing that sync here without using event sub
* commands?
*/
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&sync);
if (result != VK_SUCCESS)
return result;
result = pvr_compute_job_submit(
queue->query_ctx,
sub_cmd,
queue->next_job_wait_sync[PVR_JOB_TYPE_OCCLUSION_QUERY],
sync);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, sync);
return result;
}
pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_OCCLUSION_QUERY);
return result;
}
static VkResult
pvr_process_event_cmd_barrier(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_event_barrier *sub_cmd)
{
const uint32_t src_mask = sub_cmd->wait_for_stage_mask;
const uint32_t dst_mask = sub_cmd->wait_at_stage_mask;
struct vk_sync_wait wait_syncs[PVR_JOB_TYPE_MAX + 1];
uint32_t src_wait_count = 0;
VkResult result;
assert(!(src_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS |
PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT)));
assert(!(dst_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS |
PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT)));
u_foreach_bit (stage, src_mask) {
if (queue->last_job_signal_sync[stage]) {
wait_syncs[src_wait_count++] = (struct vk_sync_wait){
.sync = queue->last_job_signal_sync[stage],
.stage_mask = ~(VkPipelineStageFlags2)0,
.wait_value = 0,
};
}
}
/* No previous src jobs that need finishing so no need for a barrier. */
if (src_wait_count == 0)
return VK_SUCCESS;
u_foreach_bit (stage, dst_mask) {
uint32_t wait_count = src_wait_count;
struct vk_sync_signal signal;
struct vk_sync *signal_sync;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&signal_sync);
if (result != VK_SUCCESS)
return result;
signal = (struct vk_sync_signal){
.sync = signal_sync,
.stage_mask = ~(VkPipelineStageFlags2)0,
.signal_value = 0,
};
if (queue->next_job_wait_sync[stage]) {
wait_syncs[wait_count++] = (struct vk_sync_wait){
.sync = queue->next_job_wait_sync[stage],
.stage_mask = ~(VkPipelineStageFlags2)0,
.wait_value = 0,
};
}
result = device->ws->ops->null_job_submit(device->ws,
wait_syncs,
wait_count,
&signal);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, signal_sync);
return result;
}
if (queue->next_job_wait_sync[stage])
vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
queue->next_job_wait_sync[stage] = signal_sync;
}
return VK_SUCCESS;
}
static VkResult
pvr_process_event_cmd_set_or_reset(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_event_set_reset *sub_cmd,
const enum pvr_event_state new_event_state)
{
/* Not PVR_JOB_TYPE_MAX since that also includes
* PVR_JOB_TYPE_OCCLUSION_QUERY so no stage in the src mask.
*/
struct vk_sync_wait waits[PVR_NUM_SYNC_PIPELINE_STAGES];
struct vk_sync_signal signal;
struct vk_sync *signal_sync;
uint32_t wait_count = 0;
VkResult result;
assert(!(sub_cmd->wait_for_stage_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
u_foreach_bit (stage, sub_cmd->wait_for_stage_mask) {
if (!queue->last_job_signal_sync[stage])
continue;
waits[wait_count++] = (struct vk_sync_wait){
.sync = queue->last_job_signal_sync[stage],
.stage_mask = ~(VkPipelineStageFlags2)0,
.wait_value = 0,
};
}
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&signal_sync);
if (result != VK_SUCCESS)
return result;
signal = (struct vk_sync_signal){
.sync = signal_sync,
.stage_mask = ~(VkPipelineStageFlags2)0,
.signal_value = 0,
};
result =
device->ws->ops->null_job_submit(device->ws, waits, wait_count, &signal);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, signal_sync);
return result;
}
if (sub_cmd->event->sync)
vk_sync_destroy(&device->vk, sub_cmd->event->sync);
sub_cmd->event->sync = signal_sync;
sub_cmd->event->state = new_event_state;
return VK_SUCCESS;
}
static inline VkResult
pvr_process_event_cmd_set(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_event_set_reset *sub_cmd)
{
return pvr_process_event_cmd_set_or_reset(device,
queue,
sub_cmd,
PVR_EVENT_STATE_SET_BY_DEVICE);
}
static inline VkResult
pvr_process_event_cmd_reset(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_event_set_reset *sub_cmd)
{
return pvr_process_event_cmd_set_or_reset(device,
queue,
sub_cmd,
PVR_EVENT_STATE_RESET_BY_DEVICE);
}
/**
* \brief Process an event sub command of wait type.
*
* This sets up barrier syncobjs to create a dependency from the event syncobjs
* onto the next job submissions.
*
* The barriers are setup by taking into consideration each event's dst stage
* mask so this is in line with vkCmdWaitEvents2().
*
* \param[in] device Device to create the syncobjs on.
* \param[in] sub_cmd Sub command to process.
* \param[in,out] barriers Current barriers as input. Barriers
* for the next jobs as output.
* \parma[in,out] per_cmd_buffer_syncobjs Completion syncobjs for the command
* buffer being processed.
*/
static VkResult
pvr_process_event_cmd_wait(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_event_wait *sub_cmd)
{
uint32_t dst_mask = 0;
VkResult result;
STACK_ARRAY(struct vk_sync_wait, waits, sub_cmd->count + 1);
if (!waits)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
for (uint32_t i = 0; i < sub_cmd->count; i++)
dst_mask |= sub_cmd->wait_at_stage_masks[i];
u_foreach_bit (stage, dst_mask) {
struct vk_sync_signal signal;
struct vk_sync *signal_sync;
uint32_t wait_count = 0;
for (uint32_t i = 0; i < sub_cmd->count; i++) {
if (sub_cmd->wait_at_stage_masks[i] & stage) {
waits[wait_count++] = (struct vk_sync_wait){
.sync = sub_cmd->events[i]->sync,
.stage_mask = ~(VkPipelineStageFlags2)0,
.wait_value = 0,
};
}
}
if (!wait_count)
continue;
if (queue->next_job_wait_sync[stage]) {
waits[wait_count++] = (struct vk_sync_wait){
.sync = queue->next_job_wait_sync[stage],
.stage_mask = ~(VkPipelineStageFlags2)0,
.wait_value = 0,
};
}
assert(wait_count <= (sub_cmd->count + 1));
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&signal_sync);
if (result != VK_SUCCESS)
goto err_free_waits;
signal = (struct vk_sync_signal){
.sync = signal_sync,
.stage_mask = ~(VkPipelineStageFlags2)0,
.signal_value = 0,
};
result = device->ws->ops->null_job_submit(device->ws,
waits,
wait_count,
&signal);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, signal.sync);
goto err_free_waits;
}
if (queue->next_job_wait_sync[stage])
vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
queue->next_job_wait_sync[stage] = signal.sync;
}
STACK_ARRAY_FINISH(waits);
return VK_SUCCESS;
err_free_waits:
STACK_ARRAY_FINISH(waits);
return result;
}
static VkResult pvr_process_event_cmd(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_event *sub_cmd)
{
switch (sub_cmd->type) {
case PVR_EVENT_TYPE_SET:
return pvr_process_event_cmd_set(device, queue, &sub_cmd->set_reset);
case PVR_EVENT_TYPE_RESET:
return pvr_process_event_cmd_reset(device, queue, &sub_cmd->set_reset);
case PVR_EVENT_TYPE_WAIT:
return pvr_process_event_cmd_wait(device, queue, &sub_cmd->wait);
case PVR_EVENT_TYPE_BARRIER:
return pvr_process_event_cmd_barrier(device, queue, &sub_cmd->barrier);
default:
unreachable("Invalid event sub-command type.");
};
}
static VkResult pvr_process_cmd_buffer(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_cmd_buffer *cmd_buffer)
{
VkResult result;
list_for_each_entry_safe (struct pvr_sub_cmd,
sub_cmd,
&cmd_buffer->sub_cmds,
link) {
switch (sub_cmd->type) {
case PVR_SUB_CMD_TYPE_GRAPHICS: {
/* If the fragment job utilizes occlusion queries, for data integrity
* it needs to wait for the occlusion query to be processed.
*/
if (sub_cmd->gfx.has_occlusion_query) {
struct pvr_sub_cmd_event_barrier barrier = {
.wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
.wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
};
result = pvr_process_event_cmd_barrier(device, queue, &barrier);
if (result != VK_SUCCESS)
break;
}
if (sub_cmd->gfx.wait_on_previous_transfer) {
struct pvr_sub_cmd_event_barrier barrier = {
.wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
.wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
};
result = pvr_process_event_cmd_barrier(device, queue, &barrier);
if (result != VK_SUCCESS)
break;
}
result =
pvr_process_graphics_cmd(device, queue, cmd_buffer, &sub_cmd->gfx);
break;
}
case PVR_SUB_CMD_TYPE_COMPUTE:
result = pvr_process_compute_cmd(device, queue, &sub_cmd->compute);
break;
case PVR_SUB_CMD_TYPE_TRANSFER: {
const bool serialize_with_frag = sub_cmd->transfer.serialize_with_frag;
if (serialize_with_frag) {
struct pvr_sub_cmd_event_barrier barrier = {
.wait_for_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
.wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
};
result = pvr_process_event_cmd_barrier(device, queue, &barrier);
if (result != VK_SUCCESS)
break;
}
result = pvr_process_transfer_cmds(device, queue, &sub_cmd->transfer);
if (serialize_with_frag) {
struct pvr_sub_cmd_event_barrier barrier = {
.wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
.wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
};
if (result != VK_SUCCESS)
break;
result = pvr_process_event_cmd_barrier(device, queue, &barrier);
}
break;
}
case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
result =
pvr_process_occlusion_query_cmd(device, queue, &sub_cmd->compute);
break;
case PVR_SUB_CMD_TYPE_EVENT:
result = pvr_process_event_cmd(device, queue, &sub_cmd->event);
break;
default:
mesa_loge("Unsupported sub-command type %d", sub_cmd->type);
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
if (result != VK_SUCCESS)
return result;
p_atomic_inc(&device->global_cmd_buffer_submit_count);
}
return VK_SUCCESS;
}
static VkResult pvr_clear_last_submits_syncs(struct pvr_queue *queue)
{
struct vk_sync_wait waits[PVR_JOB_TYPE_MAX * 2];
uint32_t wait_count = 0;
VkResult result;
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
if (queue->next_job_wait_sync[i]) {
waits[wait_count++] = (struct vk_sync_wait){
.sync = queue->next_job_wait_sync[i],
.stage_mask = ~(VkPipelineStageFlags2)0,
.wait_value = 0,
};
}
if (queue->last_job_signal_sync[i]) {
waits[wait_count++] = (struct vk_sync_wait){
.sync = queue->last_job_signal_sync[i],
.stage_mask = ~(VkPipelineStageFlags2)0,
.wait_value = 0,
};
}
}
result = vk_sync_wait_many(&queue->device->vk,
wait_count,
waits,
VK_SYNC_WAIT_COMPLETE,
UINT64_MAX);
if (result != VK_SUCCESS)
return vk_error(queue, result);
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
if (queue->next_job_wait_sync[i]) {
vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
queue->next_job_wait_sync[i] = NULL;
}
if (queue->last_job_signal_sync[i]) {
vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
queue->last_job_signal_sync[i] = NULL;
}
}
return VK_SUCCESS;
}
static VkResult pvr_process_queue_signals(struct pvr_queue *queue,
struct vk_sync_signal *signals,
uint32_t signal_count)
{
struct vk_sync_wait signal_waits[PVR_JOB_TYPE_MAX];
struct pvr_device *device = queue->device;
VkResult result;
for (uint32_t signal_idx = 0; signal_idx < signal_count; signal_idx++) {
struct vk_sync_signal *signal = &signals[signal_idx];
const enum pvr_pipeline_stage_bits signal_stage_src =
pvr_stage_mask_src(signal->stage_mask);
uint32_t wait_count = 0;
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
/* Exception for occlusion query jobs since that's something internal,
* so the user provided syncs won't ever have it as a source stage.
*/
if (!(signal_stage_src & BITFIELD_BIT(i)) &&
i != PVR_JOB_TYPE_OCCLUSION_QUERY)
continue;
if (!queue->last_job_signal_sync[i])
continue;
signal_waits[wait_count++] = (struct vk_sync_wait){
.sync = queue->last_job_signal_sync[i],
.stage_mask = ~(VkPipelineStageFlags2)0,
.wait_value = 0,
};
}
result = device->ws->ops->null_job_submit(device->ws,
signal_waits,
wait_count,
signal);
if (result != VK_SUCCESS)
return result;
}
return VK_SUCCESS;
}
static VkResult pvr_process_queue_waits(struct pvr_queue *queue,
struct vk_sync_wait *waits,
uint32_t wait_count)
{
struct pvr_device *device = queue->device;
VkResult result;
STACK_ARRAY(struct vk_sync_wait, stage_waits, wait_count);
if (!stage_waits)
return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
struct vk_sync_signal next_job_wait_signal_sync;
uint32_t stage_wait_count = 0;
for (uint32_t wait_idx = 0; wait_idx < wait_count; wait_idx++) {
if (!(pvr_stage_mask_dst(waits[wait_idx].stage_mask) &
BITFIELD_BIT(i))) {
continue;
}
stage_waits[stage_wait_count++] = (struct vk_sync_wait){
.sync = waits[wait_idx].sync,
.stage_mask = ~(VkPipelineStageFlags2)0,
.wait_value = waits[wait_idx].wait_value,
};
}
if (!stage_wait_count)
continue;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&queue->next_job_wait_sync[i]);
if (result != VK_SUCCESS)
goto err_free_waits;
next_job_wait_signal_sync = (struct vk_sync_signal){
.sync = queue->next_job_wait_sync[i],
.stage_mask = ~(VkPipelineStageFlags2)0,
.signal_value = 0,
};
result = device->ws->ops->null_job_submit(device->ws,
stage_waits,
stage_wait_count,
&next_job_wait_signal_sync);
if (result != VK_SUCCESS)
goto err_free_waits;
}
STACK_ARRAY_FINISH(stage_waits);
return VK_SUCCESS;
err_free_waits:
STACK_ARRAY_FINISH(stage_waits);
return result;
}
static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
struct vk_queue_submit *submit)
{
struct pvr_queue *driver_queue = container_of(queue, struct pvr_queue, vk);
struct pvr_device *device = driver_queue->device;
VkResult result;
result = pvr_clear_last_submits_syncs(driver_queue);
if (result != VK_SUCCESS)
return result;
if (submit->wait_count) {
result = pvr_process_queue_waits(driver_queue,
submit->waits,
submit->wait_count);
if (result != VK_SUCCESS)
return result;
}
for (uint32_t i = 0U; i < submit->command_buffer_count; i++) {
result = pvr_process_cmd_buffer(
device,
driver_queue,
container_of(submit->command_buffers[i], struct pvr_cmd_buffer, vk));
if (result != VK_SUCCESS)
return result;
}
result = pvr_process_queue_signals(driver_queue,
submit->signals,
submit->signal_count);
if (result != VK_SUCCESS)
return result;
return VK_SUCCESS;
}