| /* |
| * Copyright © 2019 Raspberry Pi Ltd |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "v3dv_private.h" |
| #include "drm-uapi/v3d_drm.h" |
| |
| #include "broadcom/clif/clif_dump.h" |
| #include "util/libsync.h" |
| #include "util/os_time.h" |
| #include "util/perf/cpu_trace.h" |
| #include "vk_drm_syncobj.h" |
| |
| #include <errno.h> |
| #include <time.h> |
| |
| static void |
| v3dv_clif_dump(struct v3dv_device *device, |
| struct v3dv_job *job, |
| struct drm_v3d_submit_cl *submit) |
| { |
| if (!(V3D_DBG(CL) || |
| V3D_DBG(CL_NO_BIN) || |
| V3D_DBG(CLIF))) |
| return; |
| |
| struct clif_dump *clif = clif_dump_init(&device->devinfo, |
| stderr, |
| V3D_DBG(CL) || |
| V3D_DBG(CL_NO_BIN), |
| V3D_DBG(CL_NO_BIN)); |
| |
| set_foreach(job->bos, entry) { |
| struct v3dv_bo *bo = (void *)entry->key; |
| char *name = ralloc_asprintf(NULL, "%s_0x%x", |
| bo->name, bo->offset); |
| |
| bool ok = v3dv_bo_map(device, bo, bo->size); |
| if (!ok) { |
| mesa_loge("failed to map BO for clif_dump.\n"); |
| ralloc_free(name); |
| goto free_clif; |
| } |
| clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map); |
| |
| ralloc_free(name); |
| } |
| |
| clif_dump(clif, submit); |
| |
| free_clif: |
| clif_dump_destroy(clif); |
| } |
| |
| static VkResult |
| queue_wait_idle(struct v3dv_queue *queue, |
| struct v3dv_submit_sync_info *sync_info) |
| { |
| int ret = drmSyncobjWait(queue->device->pdevice->render_fd, |
| queue->last_job_syncs.syncs, 4, |
| INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, |
| NULL); |
| if (ret) |
| return vk_errorf(queue, VK_ERROR_DEVICE_LOST, "syncobj wait failed: %m"); |
| |
| bool first = true; |
| for (int i = 0; i < 4; i++) { |
| if (!queue->last_job_syncs.first[i]) |
| first = false; |
| } |
| |
| /* If we're not the first job, that means we're waiting on some |
| * per-queue-type syncobj which transitively waited on the semaphores |
| * so we can skip the semaphore wait. |
| */ |
| if (first) { |
| VkResult result = vk_sync_wait_many(&queue->device->vk, |
| sync_info->wait_count, |
| sync_info->waits, |
| VK_SYNC_WAIT_COMPLETE, |
| UINT64_MAX); |
| if (result != VK_SUCCESS) |
| return result; |
| } |
| |
| for (int i = 0; i < 4; i++) |
| queue->last_job_syncs.first[i] = false; |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| multisync_free(struct v3dv_device *device, |
| struct drm_v3d_multi_sync *ms) |
| { |
| vk_free(&device->vk.alloc, (void *)(uintptr_t)ms->out_syncs); |
| vk_free(&device->vk.alloc, (void *)(uintptr_t)ms->in_syncs); |
| } |
| |
| static struct drm_v3d_sem * |
| set_in_syncs(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| enum v3dv_queue_type queue_sync, |
| uint32_t *count, |
| struct vk_sync_wait *waits, |
| unsigned wait_count, |
| struct v3dv_submit_sync_info *sync_info) |
| { |
| struct v3dv_device *device = queue->device; |
| uint32_t n_syncs = 0; |
| |
| /* If this is the first job submitted to a given GPU queue in this cmd buf |
| * batch, it has to wait on wait semaphores (if any) before running. |
| */ |
| if (queue->last_job_syncs.first[queue_sync]) |
| n_syncs = sync_info->wait_count; |
| |
| /* If the serialize flag is set the job needs to be serialized in the |
| * corresponding queues. Notice that we may implement transfer operations |
| * as both CL or TFU jobs. |
| * |
| * FIXME: maybe we could track more precisely if the source of a transfer |
| * barrier is a CL and/or a TFU job. |
| */ |
| bool sync_csd = job->serialize & V3DV_BARRIER_COMPUTE_BIT; |
| bool sync_tfu = job->serialize & V3DV_BARRIER_TRANSFER_BIT; |
| bool sync_cl = job->serialize & (V3DV_BARRIER_GRAPHICS_BIT | |
| V3DV_BARRIER_TRANSFER_BIT); |
| bool sync_cpu = job->serialize & V3DV_BARRIER_CPU_BIT; |
| |
| *count = n_syncs; |
| if (sync_cl) |
| (*count)++; |
| if (sync_tfu) |
| (*count)++; |
| if (sync_csd) |
| (*count)++; |
| if (sync_cpu) |
| (*count)++; |
| |
| *count += wait_count; |
| |
| if (!*count) |
| return NULL; |
| |
| struct drm_v3d_sem *syncs = |
| vk_zalloc(&device->vk.alloc, *count * sizeof(struct drm_v3d_sem), |
| 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); |
| |
| if (!syncs) |
| return NULL; |
| |
| for (int i = 0; i < n_syncs; i++) { |
| syncs[i].handle = |
| vk_sync_as_drm_syncobj(sync_info->waits[i].sync)->syncobj; |
| } |
| |
| for (int i = 0; i < wait_count; i++) { |
| syncs[n_syncs++].handle = |
| vk_sync_as_drm_syncobj(waits[i].sync)->syncobj; |
| } |
| |
| if (sync_cl) |
| syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CL]; |
| |
| if (sync_csd) |
| syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CSD]; |
| |
| if (sync_tfu) |
| syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_TFU]; |
| |
| if (sync_cpu) |
| syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CPU]; |
| |
| assert(n_syncs == *count); |
| return syncs; |
| } |
| |
| static struct drm_v3d_sem * |
| set_out_syncs(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| enum v3dv_queue_type queue_sync, |
| uint32_t *count, |
| struct v3dv_submit_sync_info *sync_info, |
| bool signal_syncs) |
| { |
| struct v3dv_device *device = queue->device; |
| |
| uint32_t n_vk_syncs = signal_syncs ? sync_info->signal_count : 0; |
| |
| /* We always signal the syncobj from `device->last_job_syncs` related to |
| * this v3dv_queue_type to track the last job submitted to this queue. |
| */ |
| (*count) = n_vk_syncs + 1; |
| |
| struct drm_v3d_sem *syncs = |
| vk_zalloc(&device->vk.alloc, *count * sizeof(struct drm_v3d_sem), |
| 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); |
| |
| if (!syncs) |
| return NULL; |
| |
| if (n_vk_syncs) { |
| for (unsigned i = 0; i < n_vk_syncs; i++) { |
| syncs[i].handle = |
| vk_sync_as_drm_syncobj(sync_info->signals[i].sync)->syncobj; |
| } |
| } |
| |
| syncs[n_vk_syncs].handle = queue->last_job_syncs.syncs[queue_sync]; |
| |
| return syncs; |
| } |
| |
| static void |
| set_ext(struct drm_v3d_extension *ext, |
| struct drm_v3d_extension *next, |
| uint32_t id, |
| uintptr_t flags) |
| { |
| ext->next = (uintptr_t)(void *)next; |
| ext->id = id; |
| ext->flags = flags; |
| } |
| |
| /* This function sets the extension for multiple in/out syncobjs. When it is |
| * successful, it sets the extension id to DRM_V3D_EXT_ID_MULTI_SYNC. |
| * Otherwise, the extension id is 0, which means an out-of-memory error. |
| */ |
| static void |
| set_multisync(struct drm_v3d_multi_sync *ms, |
| struct v3dv_submit_sync_info *sync_info, |
| struct vk_sync_wait *waits, |
| unsigned wait_count, |
| struct drm_v3d_extension *next, |
| struct v3dv_device *device, |
| struct v3dv_job *job, |
| enum v3dv_queue_type in_queue_sync, |
| enum v3dv_queue_type out_queue_sync, |
| enum v3d_queue wait_stage, |
| bool signal_syncs) |
| { |
| struct v3dv_queue *queue = &device->queue; |
| uint32_t out_sync_count = 0, in_sync_count = 0; |
| struct drm_v3d_sem *out_syncs = NULL, *in_syncs = NULL; |
| |
| in_syncs = set_in_syncs(queue, job, in_queue_sync, |
| &in_sync_count, waits, wait_count, sync_info); |
| if (!in_syncs && in_sync_count) |
| goto fail; |
| |
| out_syncs = set_out_syncs(queue, job, out_queue_sync, |
| &out_sync_count, sync_info, signal_syncs); |
| |
| assert(out_sync_count > 0); |
| |
| if (!out_syncs) |
| goto fail; |
| |
| set_ext(&ms->base, next, DRM_V3D_EXT_ID_MULTI_SYNC, 0); |
| ms->wait_stage = wait_stage; |
| ms->out_sync_count = out_sync_count; |
| ms->out_syncs = (uintptr_t)(void *)out_syncs; |
| ms->in_sync_count = in_sync_count; |
| ms->in_syncs = (uintptr_t)(void *)in_syncs; |
| |
| return; |
| |
| fail: |
| if (in_syncs) |
| vk_free(&device->vk.alloc, in_syncs); |
| assert(!out_syncs); |
| |
| return; |
| } |
| |
| static VkResult |
| handle_reset_query_cpu_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| struct v3dv_submit_sync_info *sync_info, |
| bool signal_syncs) |
| { |
| MESA_TRACE_FUNC(); |
| struct v3dv_device *device = queue->device; |
| struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset; |
| assert(info->pool); |
| |
| assert(info->pool->query_type != VK_QUERY_TYPE_OCCLUSION); |
| |
| if (device->pdevice->caps.cpu_queue) { |
| assert(info->first + info->count <= info->pool->query_count); |
| |
| struct drm_v3d_submit_cpu submit = {0}; |
| struct drm_v3d_multi_sync ms = {0}; |
| |
| uint32_t *syncs = (uint32_t *) malloc(sizeof(uint32_t) * info->count); |
| uintptr_t *kperfmon_ids = NULL; |
| |
| if (info->pool->query_type == VK_QUERY_TYPE_TIMESTAMP) { |
| submit.bo_handle_count = 1; |
| submit.bo_handles = (uintptr_t)(void *)&info->pool->timestamp.bo->handle; |
| |
| struct drm_v3d_reset_timestamp_query reset = {0}; |
| |
| set_ext(&reset.base, NULL, DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY, 0); |
| |
| reset.count = info->count; |
| reset.offset = info->pool->queries[info->first].timestamp.offset; |
| |
| for (uint32_t i = 0; i < info->count; i++) { |
| struct v3dv_query *query = &info->pool->queries[info->first + i]; |
| syncs[i] = vk_sync_as_drm_syncobj(query->timestamp.sync)->syncobj; |
| } |
| |
| reset.syncs = (uintptr_t)(void *)syncs; |
| |
| set_multisync(&ms, sync_info, NULL, 0, (void *)&reset, device, job, |
| V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs); |
| if (!ms.base.id) { |
| free(syncs); |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| } else { |
| assert(info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR); |
| struct drm_v3d_reset_performance_query reset = {0}; |
| |
| set_ext(&reset.base, NULL, DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY, 0); |
| |
| struct vk_sync_wait waits[info->count]; |
| unsigned wait_count = 0; |
| for (int i = 0; i < info->count; i++) { |
| struct v3dv_query *query = &info->pool->queries[info->first + i]; |
| /* Only wait for a query if we've used it otherwise we will be |
| * waiting forever for the fence to become signaled. |
| */ |
| if (query->maybe_available) { |
| waits[wait_count] = (struct vk_sync_wait){ |
| .sync = query->perf.last_job_sync |
| }; |
| wait_count++; |
| }; |
| } |
| |
| reset.count = info->count; |
| reset.nperfmons = info->pool->perfmon.nperfmons; |
| |
| kperfmon_ids = (uintptr_t *) malloc(sizeof(uintptr_t) * info->count); |
| |
| for (uint32_t i = 0; i < info->count; i++) { |
| struct v3dv_query *query = &info->pool->queries[info->first + i]; |
| |
| syncs[i] = vk_sync_as_drm_syncobj(query->perf.last_job_sync)->syncobj; |
| kperfmon_ids[i] = (uintptr_t)(void *)query->perf.kperfmon_ids; |
| } |
| |
| reset.syncs = (uintptr_t)(void *)syncs; |
| reset.kperfmon_ids = (uintptr_t)(void *)kperfmon_ids; |
| |
| set_multisync(&ms, sync_info, waits, wait_count, (void *)&reset, device, job, |
| V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs); |
| if (!ms.base.id) { |
| free(syncs); |
| free(kperfmon_ids); |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| } |
| |
| submit.flags |= DRM_V3D_SUBMIT_EXTENSION; |
| submit.extensions = (uintptr_t)(void *)&ms; |
| |
| /* From the Vulkan spec for vkCmdResetQueryPool: |
| * |
| * "This command defines an execution dependency between other query commands |
| * that reference the same query. |
| * ... |
| * The second synchronization scope includes all commands which reference the |
| * queries in queryPool indicated by firstQuery and queryCount that occur later |
| * in submission order." |
| * |
| * This means we should ensure that any timestamps after a reset don't execute before |
| * the reset, however, for timestamps queries in particular we don't have to do |
| * anything special because timestamp queries have to wait for all previously |
| * submitted work to complete before executing (which we accomplish by using |
| * V3DV_BARRIER_ALL on them) and that includes reset jobs submitted to the CPU queue. |
| */ |
| int ret = v3d_ioctl(device->pdevice->render_fd, |
| DRM_IOCTL_V3D_SUBMIT_CPU, &submit); |
| |
| free(syncs); |
| free(kperfmon_ids); |
| multisync_free(device, &ms); |
| |
| queue->last_job_syncs.first[V3DV_QUEUE_CPU] = false; |
| |
| if (ret) |
| return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CPU failed: %m"); |
| |
| return VK_SUCCESS; |
| } |
| |
| /* We are about to reset query counters in user-space so we need to make |
| * sure that the GPU is not using them. |
| */ |
| if (info->pool->query_type == VK_QUERY_TYPE_TIMESTAMP) { |
| VkResult result = queue_wait_idle(queue, sync_info); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| v3dv_bo_wait(job->device, info->pool->timestamp.bo, OS_TIMEOUT_INFINITE); |
| } |
| |
| if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { |
| struct vk_sync_wait waits[info->count]; |
| unsigned wait_count = 0; |
| for (int i = 0; i < info->count; i++) { |
| struct v3dv_query *query = &info->pool->queries[info->first + i]; |
| /* Only wait for a query if we've used it otherwise we will be |
| * waiting forever for the fence to become signaled. |
| */ |
| if (query->maybe_available) { |
| waits[wait_count] = (struct vk_sync_wait){ |
| .sync = query->perf.last_job_sync |
| }; |
| wait_count++; |
| }; |
| } |
| |
| VkResult result = vk_sync_wait_many(&job->device->vk, wait_count, waits, |
| VK_SYNC_WAIT_COMPLETE, UINT64_MAX); |
| |
| if (result != VK_SUCCESS) |
| return result; |
| } |
| |
| v3dv_reset_query_pool_cpu(job->device, info->pool, info->first, info->count); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| export_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int *fd) |
| { |
| int err; |
| static const enum v3dv_queue_type queues_to_sync[] = { |
| V3DV_QUEUE_CL, |
| V3DV_QUEUE_CSD, |
| }; |
| |
| for (uint32_t i = 0; i < ARRAY_SIZE(queues_to_sync); i++) { |
| enum v3dv_queue_type queue_type = queues_to_sync[i]; |
| int tmp_fd = -1; |
| |
| err = drmSyncobjExportSyncFile(job->device->pdevice->render_fd, |
| queue->last_job_syncs.syncs[queue_type], |
| &tmp_fd); |
| |
| if (err) { |
| close(*fd); |
| return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN, |
| "sync file export failed: %m"); |
| } |
| |
| err = sync_accumulate("v3dv", fd, tmp_fd); |
| |
| if (err) { |
| close(tmp_fd); |
| close(*fd); |
| return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN, |
| "failed to accumulate sync files: %m"); |
| } |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx) |
| { |
| MESA_TRACE_FUNC(); |
| VkResult result = VK_SUCCESS; |
| |
| mtx_lock(&job->device->query_mutex); |
| |
| struct v3dv_end_query_info *info = &job->cpu.query_end; |
| struct v3dv_queue *queue = &job->device->queue; |
| |
| int err = 0; |
| int fd = -1; |
| |
| assert(info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR); |
| |
| if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { |
| result = export_perfmon_last_job_sync(queue, job, &fd); |
| |
| if (result != VK_SUCCESS) |
| goto fail; |
| |
| assert(fd >= 0); |
| } |
| |
| for (uint32_t i = 0; i < info->count; i++) { |
| assert(info->query + i < info->pool->query_count); |
| struct v3dv_query *query = &info->pool->queries[info->query + i]; |
| |
| if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { |
| uint32_t syncobj = vk_sync_as_drm_syncobj(query->perf.last_job_sync)->syncobj; |
| err = drmSyncobjImportSyncFile(job->device->pdevice->render_fd, |
| syncobj, fd); |
| |
| if (err) { |
| result = vk_errorf(queue, VK_ERROR_UNKNOWN, |
| "sync file import failed: %m"); |
| goto fail; |
| } |
| } |
| |
| query->maybe_available = true; |
| } |
| |
| fail: |
| if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) |
| close(fd); |
| |
| cnd_broadcast(&job->device->query_ended); |
| mtx_unlock(&job->device->query_mutex); |
| |
| return result; |
| } |
| |
| static VkResult |
| handle_copy_query_results_cpu_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| struct v3dv_submit_sync_info *sync_info, |
| bool signal_syncs) |
| { |
| MESA_TRACE_FUNC(); |
| struct v3dv_device *device = queue->device; |
| struct v3dv_copy_query_results_cpu_job_info *info = |
| &job->cpu.query_copy_results; |
| |
| assert(info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR || |
| info->pool->query_type == VK_QUERY_TYPE_TIMESTAMP); |
| |
| assert(info->dst && info->dst->mem && info->dst->mem->bo); |
| struct v3dv_bo *bo = info->dst->mem->bo; |
| |
| if (device->pdevice->caps.cpu_queue) { |
| struct drm_v3d_submit_cpu submit = {0}; |
| struct drm_v3d_multi_sync ms = {0}; |
| |
| uint32_t *offsets = (uint32_t *) malloc(sizeof(uint32_t) * info->count); |
| uint32_t *syncs = (uint32_t *) malloc(sizeof(uint32_t) * info->count); |
| uint32_t *bo_handles = NULL; |
| uintptr_t *kperfmon_ids = NULL; |
| |
| if (info->pool->query_type == VK_QUERY_TYPE_TIMESTAMP) { |
| submit.bo_handle_count = 2; |
| |
| bo_handles = (uint32_t *) |
| malloc(sizeof(uint32_t) * submit.bo_handle_count); |
| |
| bo_handles[0] = bo->handle; |
| bo_handles[1] = info->pool->timestamp.bo->handle; |
| submit.bo_handles = (uintptr_t)(void *)bo_handles; |
| |
| struct drm_v3d_copy_timestamp_query copy = {0}; |
| |
| set_ext(©.base, NULL, DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY, 0); |
| |
| copy.do_64bit = info->flags & VK_QUERY_RESULT_64_BIT; |
| copy.do_partial = info->flags & VK_QUERY_RESULT_PARTIAL_BIT; |
| copy.availability_bit = info->flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT; |
| copy.offset = info->offset + info->dst->mem_offset; |
| copy.stride = info->stride; |
| copy.count = info->count; |
| |
| for (uint32_t i = 0; i < info->count; i++) { |
| assert(info->first < info->pool->query_count); |
| assert(info->first + info->count <= info->pool->query_count); |
| struct v3dv_query *query = &info->pool->queries[info->first + i]; |
| |
| offsets[i] = query->timestamp.offset; |
| syncs[i] = vk_sync_as_drm_syncobj(query->timestamp.sync)->syncobj; |
| } |
| |
| copy.offsets = (uintptr_t)(void *)offsets; |
| copy.syncs = (uintptr_t)(void *)syncs; |
| |
| set_multisync(&ms, sync_info, NULL, 0, (void *)©, device, job, |
| V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs); |
| if (!ms.base.id) { |
| free(bo_handles); |
| free(offsets); |
| free(syncs); |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| } else { |
| assert(info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR); |
| |
| submit.bo_handle_count = 1; |
| submit.bo_handles = (uintptr_t)(void *)&bo->handle; |
| |
| struct drm_v3d_copy_performance_query copy = {0}; |
| |
| set_ext(©.base, NULL, DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY, 0); |
| |
| /* If the queryPool was created with VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR, |
| * results for each query are written as an array of the type indicated |
| * by VkPerformanceCounterKHR::storage for the counter being queried. |
| * For v3dv, VkPerformanceCounterKHR::storage is |
| * VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR. |
| */ |
| copy.do_64bit = true; |
| copy.do_partial = info->flags & VK_QUERY_RESULT_PARTIAL_BIT; |
| copy.availability_bit = info->flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT; |
| copy.offset = info->offset + info->dst->mem_offset; |
| copy.stride = info->stride; |
| copy.count = info->count; |
| copy.nperfmons = info->pool->perfmon.nperfmons; |
| copy.ncounters = info->pool->perfmon.ncounters; |
| |
| kperfmon_ids = (uintptr_t *) malloc(sizeof(uintptr_t) * info->count); |
| |
| struct vk_sync_wait waits[info->count]; |
| unsigned wait_count = 0; |
| |
| for (uint32_t i = 0; i < info->count; i++) { |
| assert(info->first < info->pool->query_count); |
| assert(info->first + info->count <= info->pool->query_count); |
| struct v3dv_query *query = &info->pool->queries[info->first + i]; |
| |
| syncs[i] = vk_sync_as_drm_syncobj(query->perf.last_job_sync)->syncobj; |
| kperfmon_ids[i] = (uintptr_t)(void *)query->perf.kperfmon_ids; |
| |
| if (info->flags & VK_QUERY_RESULT_WAIT_BIT) { |
| waits[wait_count] = (struct vk_sync_wait){ |
| .sync = query->perf.last_job_sync |
| }; |
| wait_count++; |
| } |
| } |
| |
| copy.syncs = (uintptr_t)(void *)syncs; |
| copy.kperfmon_ids = (uintptr_t)(void *)kperfmon_ids; |
| |
| set_multisync(&ms, sync_info, waits, wait_count, (void *)©, device, job, |
| V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs); |
| if (!ms.base.id) { |
| free(kperfmon_ids); |
| free(bo_handles); |
| free(offsets); |
| free(syncs); |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| } |
| |
| submit.flags |= DRM_V3D_SUBMIT_EXTENSION; |
| submit.extensions = (uintptr_t)(void *)&ms; |
| |
| int ret = v3d_ioctl(device->pdevice->render_fd, |
| DRM_IOCTL_V3D_SUBMIT_CPU, &submit); |
| |
| free(kperfmon_ids); |
| free(bo_handles); |
| free(offsets); |
| free(syncs); |
| multisync_free(device, &ms); |
| |
| queue->last_job_syncs.first[V3DV_QUEUE_CPU] = false; |
| |
| if (ret) |
| return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CPU failed: %m"); |
| |
| return VK_SUCCESS; |
| } |
| |
| /* Map the entire dst buffer for the CPU copy if needed */ |
| assert(!bo->map || bo->map_size == bo->size); |
| if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size)) |
| return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| uint8_t *offset = ((uint8_t *) bo->map) + |
| info->offset + info->dst->mem_offset; |
| v3dv_get_query_pool_results_cpu(job->device, |
| info->pool, |
| info->first, |
| info->count, |
| offset, |
| info->stride, |
| info->flags); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_timestamp_query_cpu_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| struct v3dv_submit_sync_info *sync_info, |
| bool signal_syncs) |
| { |
| MESA_TRACE_FUNC(); |
| struct v3dv_device *device = queue->device; |
| |
| assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY); |
| struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp; |
| |
| if (!device->pdevice->caps.cpu_queue) { |
| /* Wait for completion of all work queued before the timestamp query */ |
| VkResult result = queue_wait_idle(queue, sync_info); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| mtx_lock(&job->device->query_mutex); |
| |
| /* Compute timestamp */ |
| struct timespec t; |
| clock_gettime(CLOCK_MONOTONIC, &t); |
| |
| for (uint32_t i = 0; i < info->count; i++) { |
| assert(info->query + i < info->pool->query_count); |
| struct v3dv_query *query = &info->pool->queries[info->query + i]; |
| query->maybe_available = true; |
| |
| /* Value */ |
| uint8_t *value_addr = |
| ((uint8_t *) info->pool->timestamp.bo->map) + query->timestamp.offset; |
| *((uint64_t*)value_addr) = (i == 0) ? t.tv_sec * 1000000000ull + t.tv_nsec : 0ull; |
| |
| /* Availability */ |
| result = vk_sync_signal(&job->device->vk, query->timestamp.sync, 0); |
| } |
| |
| cnd_broadcast(&job->device->query_ended); |
| mtx_unlock(&job->device->query_mutex); |
| |
| return result; |
| } |
| |
| struct drm_v3d_submit_cpu submit = {0}; |
| |
| submit.bo_handle_count = 1; |
| submit.bo_handles = (uintptr_t)(void *)&info->pool->timestamp.bo->handle; |
| |
| struct drm_v3d_timestamp_query timestamp = {0}; |
| |
| set_ext(×tamp.base, NULL, DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY, 0); |
| |
| timestamp.count = info->count; |
| |
| uint32_t *offsets = |
| (uint32_t *) malloc(sizeof(uint32_t) * info->count); |
| uint32_t *syncs = |
| (uint32_t *) malloc(sizeof(uint32_t) * info->count); |
| |
| for (uint32_t i = 0; i < info->count; i++) { |
| assert(info->query + i < info->pool->query_count); |
| struct v3dv_query *query = &info->pool->queries[info->query + i]; |
| query->maybe_available = true; |
| |
| offsets[i] = query->timestamp.offset; |
| syncs[i] = vk_sync_as_drm_syncobj(query->timestamp.sync)->syncobj; |
| } |
| |
| timestamp.offsets = (uintptr_t)(void *)offsets; |
| timestamp.syncs = (uintptr_t)(void *)syncs; |
| |
| struct drm_v3d_multi_sync ms = {0}; |
| |
| /* The CPU job should be serialized so it only executes after all previously |
| * submitted work has completed |
| */ |
| job->serialize = V3DV_BARRIER_ALL; |
| |
| set_multisync(&ms, sync_info, NULL, 0, (void *)×tamp, device, job, |
| V3DV_QUEUE_CPU, V3DV_QUEUE_CPU, V3D_CPU, signal_syncs); |
| if (!ms.base.id) { |
| free(offsets); |
| free(syncs); |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| submit.flags |= DRM_V3D_SUBMIT_EXTENSION; |
| submit.extensions = (uintptr_t)(void *)&ms; |
| |
| int ret = v3d_ioctl(device->pdevice->render_fd, |
| DRM_IOCTL_V3D_SUBMIT_CPU, &submit); |
| |
| free(offsets); |
| free(syncs); |
| multisync_free(device, &ms); |
| |
| queue->last_job_syncs.first[V3DV_QUEUE_CPU] = false; |
| |
| if (ret) |
| return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CPU failed: %m"); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_csd_indirect_cpu_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| struct v3dv_submit_sync_info *sync_info, |
| bool signal_syncs) |
| { |
| MESA_TRACE_FUNC(); |
| struct v3dv_device *device = queue->device; |
| |
| assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT); |
| struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect; |
| assert(info->csd_job); |
| |
| assert(info->buffer && info->buffer->mem && info->buffer->mem->bo); |
| struct v3dv_bo *bo = info->buffer->mem->bo; |
| |
| if (!device->pdevice->caps.cpu_queue) { |
| /* Make sure the GPU is no longer using the indirect buffer*/ |
| v3dv_bo_wait(queue->device, bo, OS_TIMEOUT_INFINITE); |
| |
| /* Map the indirect buffer and read the dispatch parameters */ |
| if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size)) |
| return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| assert(bo->map); |
| |
| const uint32_t offset = info->buffer->mem_offset + info->offset; |
| const uint32_t *group_counts = (uint32_t *) (bo->map + offset); |
| if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0) |
| return VK_SUCCESS; |
| |
| if (memcmp(group_counts, info->csd_job->csd.wg_count, |
| sizeof(info->csd_job->csd.wg_count)) != 0) { |
| v3dv_cmd_buffer_rewrite_indirect_csd_job(queue->device, info, group_counts); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| struct v3dv_job *csd_job = info->csd_job; |
| |
| struct drm_v3d_submit_cpu submit = {0}; |
| |
| submit.bo_handle_count = 1; |
| submit.bo_handles = (uintptr_t)(void *)&bo->handle; |
| |
| csd_job->csd.submit.bo_handle_count = csd_job->bo_count; |
| uint32_t *bo_handles = (uint32_t *) malloc(sizeof(uint32_t) * csd_job->bo_count); |
| uint32_t bo_idx = 0; |
| set_foreach (csd_job->bos, entry) { |
| struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; |
| bo_handles[bo_idx++] = bo->handle; |
| } |
| csd_job->csd.submit.bo_handles = (uintptr_t)(void *)bo_handles; |
| |
| struct drm_v3d_indirect_csd indirect = {0}; |
| |
| set_ext(&indirect.base, NULL, DRM_V3D_EXT_ID_CPU_INDIRECT_CSD, 0); |
| |
| indirect.submit = csd_job->csd.submit; |
| indirect.offset = info->buffer->mem_offset + info->offset; |
| indirect.wg_size = info->wg_size; |
| |
| for (int i = 0; i < 3; i++) { |
| if (info->wg_uniform_offsets[i]) { |
| assert(info->wg_uniform_offsets[i] >= (uint32_t *) csd_job->indirect.base); |
| indirect.wg_uniform_offsets[i] = info->wg_uniform_offsets[i] - (uint32_t *) csd_job->indirect.base; |
| } else { |
| indirect.wg_uniform_offsets[i] = 0xffffffff; /* No rewrite */ |
| } |
| } |
| |
| indirect.indirect = csd_job->indirect.bo->handle; |
| |
| struct drm_v3d_multi_sync ms = {0}; |
| |
| /* We need to configure the semaphores of this job with the indirect |
| * CSD job, as the CPU job must obey to the CSD job synchronization |
| * demands, such as barriers. |
| */ |
| set_multisync(&ms, sync_info, NULL, 0, (void *)&indirect, device, csd_job, |
| V3DV_QUEUE_CPU, V3DV_QUEUE_CSD, V3D_CPU, signal_syncs); |
| if (!ms.base.id) |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| submit.flags |= DRM_V3D_SUBMIT_EXTENSION; |
| submit.extensions = (uintptr_t)(void *)&ms; |
| |
| int ret = v3d_ioctl(device->pdevice->render_fd, |
| DRM_IOCTL_V3D_SUBMIT_CPU, &submit); |
| |
| free(bo_handles); |
| multisync_free(device, &ms); |
| |
| queue->last_job_syncs.first[V3DV_QUEUE_CPU] = false; |
| queue->last_job_syncs.first[V3DV_QUEUE_CSD] = false; |
| |
| if (ret) |
| return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CPU failed: %m"); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_cl_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| uint32_t counter_pass_idx, |
| struct v3dv_submit_sync_info *sync_info, |
| bool signal_syncs) |
| { |
| MESA_TRACE_FUNC(); |
| struct v3dv_device *device = queue->device; |
| |
| struct drm_v3d_submit_cl submit = { 0 }; |
| |
| /* Sanity check: we should only flag a bcl sync on a job that needs to be |
| * serialized. |
| */ |
| assert(job->serialize || !job->needs_bcl_sync); |
| |
| /* We expect to have just one RCL per job which should fit in just one BO. |
| * Our BCL, could chain multiple BOS together though. |
| */ |
| assert(list_length(&job->rcl.bo_list) == 1); |
| assert(list_length(&job->bcl.bo_list) >= 1); |
| struct v3dv_bo *bcl_fist_bo = |
| list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link); |
| submit.bcl_start = bcl_fist_bo->offset; |
| submit.bcl_end = job->suspending ? job->suspended_bcl_end : |
| job->bcl.bo->offset + v3dv_cl_offset(&job->bcl); |
| submit.rcl_start = job->rcl.bo->offset; |
| submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl); |
| |
| submit.qma = job->tile_alloc->offset; |
| submit.qms = job->tile_alloc->size; |
| submit.qts = job->tile_state->offset; |
| |
| submit.flags = 0; |
| if (job->tmu_dirty_rcl) |
| submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE; |
| |
| /* If the job uses VK_KHR_buffer_device_address we need to ensure all |
| * buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
| * are included. |
| */ |
| if (job->uses_buffer_device_address) { |
| util_dynarray_foreach(&queue->device->device_address_bo_list, |
| struct v3dv_bo *, bo) { |
| v3dv_job_add_bo(job, *bo); |
| } |
| } |
| |
| submit.bo_handle_count = job->bo_count; |
| uint32_t *bo_handles = |
| (uint32_t *) malloc(sizeof(uint32_t) * submit.bo_handle_count); |
| uint32_t bo_idx = 0; |
| set_foreach(job->bos, entry) { |
| struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; |
| bo_handles[bo_idx++] = bo->handle; |
| } |
| assert(bo_idx == submit.bo_handle_count); |
| submit.bo_handles = (uintptr_t)(void *)bo_handles; |
| |
| submit.perfmon_id = job->perf ? |
| job->perf->kperfmon_ids[counter_pass_idx] : 0; |
| const bool needs_perf_sync = queue->last_perfmon_id != submit.perfmon_id; |
| queue->last_perfmon_id = submit.perfmon_id; |
| |
| /* We need a binning sync if we are the first CL job waiting on a semaphore |
| * with a wait stage that involves the geometry pipeline, or if the job |
| * comes after a pipeline barrier that involves geometry stages |
| * (needs_bcl_sync) or when performance queries are in use. |
| * |
| * We need a render sync if the job doesn't need a binning sync but has |
| * still been flagged for serialization. It should be noted that RCL jobs |
| * don't start until the previous RCL job has finished so we don't really |
| * need to add a fence for those, however, we might need to wait on a CSD or |
| * TFU job, which are not automatically serialized with CL jobs. |
| */ |
| bool needs_bcl_sync = job->needs_bcl_sync || needs_perf_sync; |
| if (queue->last_job_syncs.first[V3DV_QUEUE_CL]) { |
| for (int i = 0; !needs_bcl_sync && i < sync_info->wait_count; i++) { |
| needs_bcl_sync = sync_info->waits[i].stage_mask & |
| (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | |
| VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT | |
| VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | |
| VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT | |
| VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | |
| VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | |
| VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | |
| VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | |
| VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | |
| VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | |
| VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT); |
| } |
| } |
| |
| bool needs_rcl_sync = job->serialize && !needs_bcl_sync; |
| |
| /* Replace single semaphore settings whenever our kernel-driver supports |
| * multiple semaphores extension. |
| */ |
| struct drm_v3d_multi_sync ms = { 0 }; |
| enum v3d_queue wait_stage = needs_rcl_sync ? V3D_RENDER : V3D_BIN; |
| set_multisync(&ms, sync_info, NULL, 0, NULL, device, job, |
| V3DV_QUEUE_CL, V3DV_QUEUE_CL, wait_stage, signal_syncs); |
| if (!ms.base.id) { |
| free(bo_handles); |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| submit.flags |= DRM_V3D_SUBMIT_EXTENSION; |
| submit.extensions = (uintptr_t)(void *)&ms; |
| |
| /* We are using multisync so disable legacy single-sync interface */ |
| submit.in_sync_rcl = 0; |
| submit.in_sync_bcl = 0; |
| submit.out_sync = 0; |
| |
| v3dv_clif_dump(device, job, &submit); |
| int ret = v3d_ioctl(device->pdevice->render_fd, |
| DRM_IOCTL_V3D_SUBMIT_CL, &submit); |
| |
| static bool warned = false; |
| if (ret && !warned) { |
| mesa_loge("Draw call returned %s. Expect corruption.\n", |
| strerror(errno)); |
| warned = true; |
| } |
| |
| free(bo_handles); |
| multisync_free(device, &ms); |
| |
| queue->last_job_syncs.first[V3DV_QUEUE_CL] = false; |
| |
| if (ret) |
| return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CL failed: %m"); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_tfu_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| struct v3dv_submit_sync_info *sync_info, |
| bool signal_syncs) |
| { |
| MESA_TRACE_FUNC(); |
| assert(!V3D_DBG(DISABLE_TFU)); |
| |
| struct v3dv_device *device = queue->device; |
| |
| /* Replace single semaphore settings whenever our kernel-driver supports |
| * multiple semaphore extension. |
| */ |
| struct drm_v3d_multi_sync ms = { 0 }; |
| set_multisync(&ms, sync_info, NULL, 0, NULL, device, job, |
| V3DV_QUEUE_TFU, V3DV_QUEUE_TFU, V3D_TFU, signal_syncs); |
| if (!ms.base.id) |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| job->tfu.flags |= DRM_V3D_SUBMIT_EXTENSION; |
| job->tfu.extensions = (uintptr_t)(void *)&ms; |
| |
| /* We are using multisync so disable legacy single-sync interface */ |
| job->tfu.in_sync = 0; |
| job->tfu.out_sync = 0; |
| |
| int ret = v3d_ioctl(device->pdevice->render_fd, |
| DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu); |
| |
| multisync_free(device, &ms); |
| queue->last_job_syncs.first[V3DV_QUEUE_TFU] = false; |
| |
| if (ret != 0) |
| return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_TFU failed: %m"); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_csd_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| uint32_t counter_pass_idx, |
| struct v3dv_submit_sync_info *sync_info, |
| bool signal_syncs) |
| { |
| MESA_TRACE_FUNC(); |
| struct v3dv_device *device = queue->device; |
| |
| struct drm_v3d_submit_csd *submit = &job->csd.submit; |
| |
| /* If the job uses VK_KHR_buffer_device_address we need to ensure all |
| * buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
| * are included. |
| */ |
| if (job->uses_buffer_device_address) { |
| util_dynarray_foreach(&queue->device->device_address_bo_list, |
| struct v3dv_bo *, bo) { |
| v3dv_job_add_bo(job, *bo); |
| } |
| } |
| |
| submit->bo_handle_count = job->bo_count; |
| uint32_t *bo_handles = |
| (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2)); |
| uint32_t bo_idx = 0; |
| set_foreach(job->bos, entry) { |
| struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; |
| bo_handles[bo_idx++] = bo->handle; |
| } |
| assert(bo_idx == submit->bo_handle_count); |
| submit->bo_handles = (uintptr_t)(void *)bo_handles; |
| |
| /* Replace single semaphore settings whenever our kernel-driver supports |
| * multiple semaphore extension. |
| */ |
| struct drm_v3d_multi_sync ms = { 0 }; |
| set_multisync(&ms, sync_info, NULL, 0, NULL, device, job, |
| V3DV_QUEUE_CSD, V3DV_QUEUE_CSD, V3D_CSD, signal_syncs); |
| if (!ms.base.id) |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| submit->flags |= DRM_V3D_SUBMIT_EXTENSION; |
| submit->extensions = (uintptr_t)(void *)&ms; |
| |
| /* We are using multisync so disable legacy single-sync interface */ |
| submit->in_sync = 0; |
| submit->out_sync = 0; |
| |
| submit->perfmon_id = job->perf ? |
| job->perf->kperfmon_ids[counter_pass_idx] : 0; |
| queue->last_perfmon_id = submit->perfmon_id; |
| |
| int ret = v3d_ioctl(device->pdevice->render_fd, |
| DRM_IOCTL_V3D_SUBMIT_CSD, submit); |
| |
| static bool warned = false; |
| if (ret && !warned) { |
| mesa_loge("Compute dispatch returned %s. Expect corruption.\n", |
| strerror(errno)); |
| warned = true; |
| } |
| |
| free(bo_handles); |
| |
| multisync_free(device, &ms); |
| queue->last_job_syncs.first[V3DV_QUEUE_CSD] = false; |
| |
| if (ret) |
| return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CSD failed: %m"); |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| queue_apply_barrier_state(struct v3dv_job *job, |
| struct v3dv_barrier_state *barrier) |
| { |
| if (!v3dv_job_apply_barrier_state(job, barrier)) |
| return; |
| |
| if (job->type != V3DV_JOB_TYPE_GPU_CL) |
| return; |
| |
| if (job->serialize && |
| (barrier->bcl_buffer_access || barrier->bcl_image_access)) { |
| job->needs_bcl_sync = true; |
| barrier->bcl_buffer_access = barrier->bcl_image_access = 0; |
| } |
| } |
| |
| static VkResult |
| queue_handle_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| uint32_t counter_pass_idx, |
| struct v3dv_barrier_state *barrier, |
| struct v3dv_submit_sync_info *sync_info, |
| bool signal_syncs) |
| { |
| if (barrier) |
| queue_apply_barrier_state(job, barrier); |
| |
| if (unlikely(V3D_DBG(SYNC))) { |
| job->serialize = V3DV_BARRIER_ALL; |
| job->needs_bcl_sync = job->type == V3DV_JOB_TYPE_GPU_CL; |
| } |
| |
| switch (job->type) { |
| case V3DV_JOB_TYPE_GPU_CL: |
| return handle_cl_job(queue, job, counter_pass_idx, sync_info, signal_syncs); |
| case V3DV_JOB_TYPE_GPU_TFU: |
| return handle_tfu_job(queue, job, sync_info, signal_syncs); |
| case V3DV_JOB_TYPE_GPU_CSD: |
| return handle_csd_job(queue, job, counter_pass_idx, sync_info, signal_syncs); |
| case V3DV_JOB_TYPE_CPU_RESET_QUERIES: |
| return handle_reset_query_cpu_job(queue, job, sync_info, signal_syncs); |
| case V3DV_JOB_TYPE_CPU_END_QUERY: |
| return handle_end_query_cpu_job(job, counter_pass_idx); |
| case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS: |
| return handle_copy_query_results_cpu_job(queue, job, sync_info, signal_syncs); |
| case V3DV_JOB_TYPE_CPU_CSD_INDIRECT: |
| return handle_csd_indirect_cpu_job(queue, job, sync_info, signal_syncs); |
| case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY: |
| return handle_timestamp_query_cpu_job(queue, job, sync_info, signal_syncs); |
| default: |
| unreachable("Unhandled job type"); |
| } |
| } |
| |
| static VkResult |
| queue_create_noop_job(struct v3dv_queue *queue) |
| { |
| struct v3dv_device *device = queue->device; |
| queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!queue->noop_job) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1); |
| |
| v3d_X((&device->devinfo), job_emit_noop)(queue->noop_job); |
| |
| /* We use no-op jobs to signal semaphores/fences. These jobs needs to be |
| * serialized across all hw queues to comply with Vulkan's signal operation |
| * order requirements, which basically require that signal operations occur |
| * in submission order. |
| */ |
| queue->noop_job->serialize = V3DV_BARRIER_ALL; |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| queue_submit_noop_job(struct v3dv_queue *queue, |
| uint32_t counter_pass_idx, |
| struct v3dv_submit_sync_info *sync_info, |
| bool signal_syncs) |
| { |
| if (!queue->noop_job) { |
| VkResult result = queue_create_noop_job(queue); |
| if (result != VK_SUCCESS) |
| return result; |
| } |
| |
| assert(queue->noop_job); |
| return queue_handle_job(queue, queue->noop_job, counter_pass_idx, NULL, |
| sync_info, signal_syncs); |
| } |
| |
| VkResult |
| v3dv_queue_driver_submit(struct vk_queue *vk_queue, |
| struct vk_queue_submit *submit) |
| { |
| MESA_TRACE_FUNC(); |
| struct v3dv_queue *queue = container_of(vk_queue, struct v3dv_queue, vk); |
| VkResult result; |
| |
| struct v3dv_submit_sync_info sync_info = { |
| .wait_count = submit->wait_count, |
| .waits = submit->waits, |
| .signal_count = submit->signal_count, |
| .signals = submit->signals, |
| }; |
| |
| for (int i = 0; i < V3DV_QUEUE_COUNT; i++) |
| queue->last_job_syncs.first[i] = true; |
| |
| struct v3dv_barrier_state pending_barrier = { 0 }; |
| struct v3dv_job *first_suspend_job = NULL; |
| struct v3dv_job *current_suspend_job = NULL; |
| for (uint32_t i = 0; i < submit->command_buffer_count; i++) { |
| struct v3dv_cmd_buffer *cmd_buffer = |
| container_of(submit->command_buffers[i], struct v3dv_cmd_buffer, vk); |
| list_for_each_entry_safe(struct v3dv_job, job, |
| &cmd_buffer->jobs, list_link) { |
| if (job->suspending) { |
| job = v3d_X((&job->device->devinfo), |
| cmd_buffer_prepare_suspend_job_for_submit)(job); |
| if (!job) |
| return VK_ERROR_OUT_OF_DEVICE_MEMORY; |
| } |
| |
| if (job->suspending && !job->resuming) { |
| assert(!first_suspend_job); |
| assert(!current_suspend_job); |
| first_suspend_job = job; |
| } |
| |
| if (job->resuming) { |
| assert(first_suspend_job); |
| assert(current_suspend_job); |
| v3d_X((&job->device->devinfo), job_patch_resume_address)(first_suspend_job, |
| current_suspend_job, |
| job); |
| current_suspend_job = NULL; |
| } |
| |
| if (job->suspending) { |
| current_suspend_job = job; |
| } else { |
| assert(!current_suspend_job); |
| struct v3dv_job *submit_job = first_suspend_job ? |
| first_suspend_job : job; |
| result = |
| queue_handle_job(queue, submit_job, submit->perf_pass_index, |
| &pending_barrier, &sync_info, false); |
| |
| if (result != VK_SUCCESS) |
| return result; |
| |
| first_suspend_job = NULL; |
| } |
| } |
| |
| /* If the command buffer ends with a barrier, save the pending barrier |
| * state so we can apply it on the next command buffer. |
| */ |
| v3dv_merge_barrier_state(&pending_barrier, &cmd_buffer->state.barrier); |
| } |
| |
| assert(!first_suspend_job); |
| assert(!current_suspend_job); |
| |
| /* Handle signaling now */ |
| if (submit->signal_count > 0) { |
| /* Finish by submitting a no-op job that synchronizes across all queues. |
| * This will ensure that the signal semaphores don't get triggered until |
| * all work on any queue completes. See Vulkan's signal operation order |
| * requirements. |
| */ |
| return queue_submit_noop_job(queue, submit->perf_pass_index, |
| &sync_info, true); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_QueueBindSparse(VkQueue _queue, |
| uint32_t bindInfoCount, |
| const VkBindSparseInfo *pBindInfo, |
| VkFence fence) |
| { |
| V3DV_FROM_HANDLE(v3dv_queue, queue, _queue); |
| return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |