| /* |
| * Copyright © 2019 Raspberry Pi |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "v3dv_private.h" |
| #include "drm-uapi/v3d_drm.h" |
| |
| #include "broadcom/clif/clif_dump.h" |
| |
| #include <errno.h> |
| #include <time.h> |
| |
| static void |
| v3dv_clif_dump(struct v3dv_device *device, |
| struct v3dv_job *job, |
| struct drm_v3d_submit_cl *submit) |
| { |
| if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL | |
| V3D_DEBUG_CL_NO_BIN | |
| V3D_DEBUG_CLIF)))) |
| return; |
| |
| struct clif_dump *clif = clif_dump_init(&device->devinfo, |
| stderr, |
| V3D_DEBUG & (V3D_DEBUG_CL | |
| V3D_DEBUG_CL_NO_BIN), |
| V3D_DEBUG & V3D_DEBUG_CL_NO_BIN); |
| |
| set_foreach(job->bos, entry) { |
| struct v3dv_bo *bo = (void *)entry->key; |
| char *name = ralloc_asprintf(NULL, "%s_0x%x", |
| bo->name, bo->offset); |
| |
| bool ok = v3dv_bo_map(device, bo, bo->size); |
| if (!ok) { |
| fprintf(stderr, "failed to map BO for clif_dump.\n"); |
| ralloc_free(name); |
| goto free_clif; |
| } |
| clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map); |
| |
| ralloc_free(name); |
| } |
| |
| clif_dump(clif, submit); |
| |
| free_clif: |
| clif_dump_destroy(clif); |
| } |
| |
| static uint64_t |
| gettime_ns() |
| { |
| struct timespec current; |
| clock_gettime(CLOCK_MONOTONIC, ¤t); |
| return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec; |
| } |
| |
| static uint64_t |
| get_absolute_timeout(uint64_t timeout) |
| { |
| uint64_t current_time = gettime_ns(); |
| uint64_t max_timeout = (uint64_t) INT64_MAX - current_time; |
| |
| timeout = MIN2(max_timeout, timeout); |
| |
| return (current_time + timeout); |
| } |
| |
| static VkResult |
| queue_submit_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| bool do_sem_wait, |
| pthread_t *wait_thread); |
| |
| /* Waits for active CPU wait threads spawned before the current thread to |
| * complete and submit all their GPU jobs. |
| */ |
| static void |
| cpu_queue_wait_idle(struct v3dv_queue *queue) |
| { |
| const pthread_t this_thread = pthread_self(); |
| |
| retry: |
| mtx_lock(&queue->mutex); |
| list_for_each_entry(struct v3dv_queue_submit_wait_info, info, |
| &queue->submit_wait_list, list_link) { |
| for (uint32_t i = 0; i < info->wait_thread_count; i++) { |
| if (info->wait_threads[i].finished) |
| continue; |
| |
| /* Because we are testing this against the list of spawned threads |
| * it will never match for the main thread, so when we call this from |
| * the main thread we are effectively waiting for all active threads |
| * to complete, and otherwise we are only waiting for work submitted |
| * before the wait thread that called this (a wait thread should never |
| * be waiting for work submitted after it). |
| */ |
| if (info->wait_threads[i].thread == this_thread) |
| goto done; |
| |
| /* Wait and try again */ |
| mtx_unlock(&queue->mutex); |
| usleep(500); /* 0.5 ms */ |
| goto retry; |
| } |
| } |
| |
| done: |
| mtx_unlock(&queue->mutex); |
| } |
| |
| static VkResult |
| gpu_queue_wait_idle(struct v3dv_queue *queue) |
| { |
| struct v3dv_device *device = queue->device; |
| |
| mtx_lock(&device->mutex); |
| uint32_t last_job_sync = device->last_job_sync; |
| mtx_unlock(&device->mutex); |
| |
| int ret = drmSyncobjWait(device->pdevice->render_fd, |
| &last_job_sync, 1, INT64_MAX, 0, NULL); |
| if (ret) |
| return VK_ERROR_DEVICE_LOST; |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_QueueWaitIdle(VkQueue _queue) |
| { |
| V3DV_FROM_HANDLE(v3dv_queue, queue, _queue); |
| |
| /* Check that we don't have any wait threads running in the CPU first, |
| * as these can spawn new GPU jobs. |
| */ |
| cpu_queue_wait_idle(queue); |
| |
| /* Check we don't have any GPU jobs running */ |
| return gpu_queue_wait_idle(queue); |
| } |
| |
| static VkResult |
| handle_reset_query_cpu_job(struct v3dv_job *job) |
| { |
| struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset; |
| assert(info->pool); |
| |
| /* We are about to reset query counters so we need to make sure that |
| * The GPU is not using them. The exception is timestamp queries, since |
| * we handle those in the CPU. |
| * |
| * FIXME: we could avoid blocking the main thread for this if we use |
| * submission thread. |
| */ |
| if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) |
| v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE); |
| |
| for (uint32_t i = info->first; i < info->first + info->count; i++) { |
| assert(i < info->pool->query_count); |
| struct v3dv_query *q = &info->pool->queries[i]; |
| q->maybe_available = false; |
| switch (info->pool->query_type) { |
| case VK_QUERY_TYPE_OCCLUSION: { |
| const uint8_t *q_addr = ((uint8_t *) q->bo->map) + q->offset; |
| uint32_t *counter = (uint32_t *) q_addr; |
| *counter = 0; |
| break; |
| } |
| case VK_QUERY_TYPE_TIMESTAMP: |
| q->value = 0; |
| break; |
| default: |
| unreachable("Unsupported query type"); |
| } |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_end_query_cpu_job(struct v3dv_job *job) |
| { |
| struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end; |
| for (uint32_t i = 0; i < info->count; i++) { |
| assert(info->query + i < info->pool->query_count); |
| struct v3dv_query *query = &info->pool->queries[info->query + i]; |
| query->maybe_available = true; |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_copy_query_results_cpu_job(struct v3dv_job *job) |
| { |
| struct v3dv_copy_query_results_cpu_job_info *info = |
| &job->cpu.query_copy_results; |
| |
| assert(info->dst && info->dst->mem && info->dst->mem->bo); |
| struct v3dv_bo *bo = info->dst->mem->bo; |
| |
| /* Map the entire dst buffer for the CPU copy if needed */ |
| assert(!bo->map || bo->map_size == bo->size); |
| if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size)) |
| return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| /* FIXME: if flags includes VK_QUERY_RESULT_WAIT_BIT this could trigger a |
| * sync wait on the CPU for the corresponding GPU jobs to finish. We might |
| * want to use a submission thread to avoid blocking on the main thread. |
| */ |
| uint8_t *offset = ((uint8_t *) bo->map) + |
| info->offset + info->dst->mem_offset; |
| v3dv_get_query_pool_results_cpu(job->device, |
| info->pool, |
| info->first, |
| info->count, |
| offset, |
| info->stride, |
| info->flags); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_set_event_cpu_job(struct v3dv_job *job, bool is_wait_thread) |
| { |
| /* From the Vulkan 1.0 spec: |
| * |
| * "When vkCmdSetEvent is submitted to a queue, it defines an execution |
| * dependency on commands that were submitted before it, and defines an |
| * event signal operation which sets the event to the signaled state. |
| * The first synchronization scope includes every command previously |
| * submitted to the same queue, including those in the same command |
| * buffer and batch". |
| * |
| * So we should wait for all prior work to be completed before signaling |
| * the event, this includes all active CPU wait threads spawned for any |
| * command buffer submitted *before* this. |
| * |
| * FIXME: we could avoid blocking the main thread for this if we use a |
| * submission thread. |
| */ |
| |
| /* If we are calling this from a wait thread it will only wait |
| * wait threads sspawned before it, otherwise it will wait for |
| * all active threads to complete. |
| */ |
| cpu_queue_wait_idle(&job->device->queue); |
| |
| VkResult result = gpu_queue_wait_idle(&job->device->queue); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set; |
| p_atomic_set(&info->event->state, info->state); |
| |
| return VK_SUCCESS; |
| } |
| |
| static bool |
| check_wait_events_complete(struct v3dv_job *job) |
| { |
| assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); |
| |
| struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait; |
| for (uint32_t i = 0; i < info->event_count; i++) { |
| if (!p_atomic_read(&info->events[i]->state)) |
| return false; |
| } |
| return true; |
| } |
| |
| static void |
| wait_thread_finish(struct v3dv_queue *queue, pthread_t thread) |
| { |
| mtx_lock(&queue->mutex); |
| list_for_each_entry(struct v3dv_queue_submit_wait_info, info, |
| &queue->submit_wait_list, list_link) { |
| for (uint32_t i = 0; i < info->wait_thread_count; i++) { |
| if (info->wait_threads[i].thread == thread) { |
| info->wait_threads[i].finished = true; |
| goto done; |
| } |
| } |
| } |
| |
| unreachable(!"Failed to finish wait thread: not found"); |
| |
| done: |
| mtx_unlock(&queue->mutex); |
| } |
| |
| static void * |
| event_wait_thread_func(void *_job) |
| { |
| struct v3dv_job *job = (struct v3dv_job *) _job; |
| assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); |
| struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait; |
| |
| /* Wait for events to be signaled */ |
| const useconds_t wait_interval_ms = 1; |
| while (!check_wait_events_complete(job)) |
| usleep(wait_interval_ms * 1000); |
| |
| /* Now continue submitting pending jobs for the same command buffer after |
| * the wait job. |
| */ |
| struct v3dv_queue *queue = &job->device->queue; |
| list_for_each_entry_from(struct v3dv_job, pjob, job->list_link.next, |
| &job->cmd_buffer->jobs, list_link) { |
| /* We don't want to spawn more than one wait thread per command buffer. |
| * If this job also requires a wait for events, we will do the wait here. |
| */ |
| VkResult result = queue_submit_job(queue, pjob, info->sem_wait, NULL); |
| if (result == VK_NOT_READY) { |
| while (!check_wait_events_complete(pjob)) { |
| usleep(wait_interval_ms * 1000); |
| } |
| result = VK_SUCCESS; |
| } |
| |
| if (result != VK_SUCCESS) { |
| fprintf(stderr, "Wait thread job execution failed.\n"); |
| goto done; |
| } |
| } |
| |
| done: |
| wait_thread_finish(queue, pthread_self()); |
| return NULL; |
| } |
| |
| static VkResult |
| spawn_event_wait_thread(struct v3dv_job *job, pthread_t *wait_thread) |
| |
| { |
| assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); |
| assert(job->cmd_buffer); |
| assert(wait_thread != NULL); |
| |
| if (pthread_create(wait_thread, NULL, event_wait_thread_func, job)) |
| return vk_error(job->device, VK_ERROR_DEVICE_LOST); |
| |
| return VK_NOT_READY; |
| } |
| |
| static VkResult |
| handle_wait_events_cpu_job(struct v3dv_job *job, |
| bool sem_wait, |
| pthread_t *wait_thread) |
| { |
| assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); |
| struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait; |
| |
| /* If all events are signaled then we are done and can continue submitting |
| * the rest of the command buffer normally. |
| */ |
| if (check_wait_events_complete(job)) |
| return VK_SUCCESS; |
| |
| /* Otherwise, we put the rest of the command buffer on a wait thread until |
| * all events are signaled. We only spawn a new thread on the first |
| * wait job we see for a command buffer, any additional wait jobs in the |
| * same command buffer will run in that same wait thread and will get here |
| * with a NULL wait_thread pointer. |
| * |
| * Also, whether we spawn a wait thread or not, we always return |
| * VK_NOT_READY (unless an error happened), so we stop trying to submit |
| * any jobs in the same command buffer after the wait job. The wait thread |
| * will attempt to submit them after the wait completes. |
| */ |
| info->sem_wait = sem_wait; |
| if (wait_thread) |
| return spawn_event_wait_thread(job, wait_thread); |
| else |
| return VK_NOT_READY; |
| } |
| |
| static VkResult |
| handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job) |
| { |
| assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE); |
| struct v3dv_copy_buffer_to_image_cpu_job_info *info = |
| &job->cpu.copy_buffer_to_image; |
| |
| /* Wait for all GPU work to finish first, since we may be accessing |
| * the BOs involved in the operation. |
| */ |
| v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue)); |
| |
| /* Map BOs */ |
| struct v3dv_bo *dst_bo = info->image->mem->bo; |
| assert(!dst_bo->map || dst_bo->map_size == dst_bo->size); |
| if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size)) |
| return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| void *dst_ptr = dst_bo->map; |
| |
| struct v3dv_bo *src_bo = info->buffer->mem->bo; |
| assert(!src_bo->map || src_bo->map_size == src_bo->size); |
| if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size)) |
| return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| void *src_ptr = src_bo->map; |
| |
| const struct v3d_resource_slice *slice = |
| &info->image->slices[info->mip_level]; |
| |
| const struct pipe_box box = { |
| info->image_offset.x, info->image_offset.y, info->base_layer, |
| info->image_extent.width, info->image_extent.height, info->layer_count, |
| }; |
| |
| /* Copy each layer */ |
| for (uint32_t i = 0; i < info->layer_count; i++) { |
| const uint32_t dst_offset = |
| v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i); |
| const uint32_t src_offset = |
| info->buffer->mem_offset + info->buffer_offset + |
| info->buffer_layer_stride * i; |
| v3d_store_tiled_image( |
| dst_ptr + dst_offset, slice->stride, |
| src_ptr + src_offset, info->buffer_stride, |
| slice->tiling, info->image->cpp, slice->padded_height, &box); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_timestamp_query_cpu_job(struct v3dv_job *job) |
| { |
| assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY); |
| struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp; |
| |
| /* Wait for completion of all work queued before the timestamp query */ |
| v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue)); |
| |
| /* Compute timestamp */ |
| struct timespec t; |
| clock_gettime(CLOCK_MONOTONIC, &t); |
| |
| for (uint32_t i = 0; i < info->count; i++) { |
| assert(info->query + i < info->pool->query_count); |
| struct v3dv_query *query = &info->pool->queries[info->query + i]; |
| query->maybe_available = true; |
| if (i == 0) |
| query->value = t.tv_sec * 1000000000ull + t.tv_nsec; |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_csd_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| bool do_sem_wait); |
| |
| static VkResult |
| handle_csd_indirect_cpu_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| bool do_sem_wait) |
| { |
| assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT); |
| struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect; |
| assert(info->csd_job); |
| |
| /* Make sure the GPU is no longer using the indirect buffer*/ |
| assert(info->buffer && info->buffer->mem && info->buffer->mem->bo); |
| v3dv_bo_wait(queue->device, info->buffer->mem->bo, PIPE_TIMEOUT_INFINITE); |
| |
| /* Map the indirect buffer and read the dispatch parameters */ |
| assert(info->buffer && info->buffer->mem && info->buffer->mem->bo); |
| struct v3dv_bo *bo = info->buffer->mem->bo; |
| if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size)) |
| return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| assert(bo->map); |
| |
| const uint32_t offset = info->buffer->mem_offset + info->offset; |
| const uint32_t *group_counts = (uint32_t *) (bo->map + offset); |
| if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0) |
| return VK_SUCCESS; |
| |
| if (memcmp(group_counts, info->csd_job->csd.wg_count, |
| sizeof(info->csd_job->csd.wg_count)) != 0) { |
| v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts); |
| } |
| |
| handle_csd_job(queue, info->csd_job, do_sem_wait); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| process_semaphores_to_signal(struct v3dv_device *device, |
| uint32_t count, const VkSemaphore *sems) |
| { |
| if (count == 0) |
| return VK_SUCCESS; |
| |
| int render_fd = device->pdevice->render_fd; |
| |
| int fd; |
| mtx_lock(&device->mutex); |
| drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd); |
| mtx_unlock(&device->mutex); |
| if (fd == -1) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| VkResult result = VK_SUCCESS; |
| for (uint32_t i = 0; i < count; i++) { |
| struct v3dv_semaphore *sem = v3dv_semaphore_from_handle(sems[i]); |
| |
| int ret; |
| if (!sem->temp_sync) |
| ret = drmSyncobjImportSyncFile(render_fd, sem->sync, fd); |
| else |
| ret = drmSyncobjImportSyncFile(render_fd, sem->temp_sync, fd); |
| |
| if (ret) { |
| result = VK_ERROR_OUT_OF_HOST_MEMORY; |
| break; |
| } |
| } |
| |
| assert(fd >= 0); |
| close(fd); |
| |
| return result; |
| } |
| |
| static VkResult |
| process_fence_to_signal(struct v3dv_device *device, VkFence _fence) |
| { |
| if (_fence == VK_NULL_HANDLE) |
| return VK_SUCCESS; |
| |
| struct v3dv_fence *fence = v3dv_fence_from_handle(_fence); |
| |
| int render_fd = device->pdevice->render_fd; |
| |
| int fd; |
| mtx_lock(&device->mutex); |
| drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd); |
| mtx_unlock(&device->mutex); |
| if (fd == -1) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| int ret; |
| if (!fence->temp_sync) |
| ret = drmSyncobjImportSyncFile(render_fd, fence->sync, fd); |
| else |
| ret = drmSyncobjImportSyncFile(render_fd, fence->temp_sync, fd); |
| |
| assert(fd >= 0); |
| close(fd); |
| |
| return ret ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_cl_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| bool do_sem_wait) |
| { |
| struct v3dv_device *device = queue->device; |
| |
| struct drm_v3d_submit_cl submit = { 0 }; |
| |
| /* Sanity check: we should only flag a bcl sync on a job that needs to be |
| * serialized. |
| */ |
| assert(job->serialize || !job->needs_bcl_sync); |
| |
| /* We expect to have just one RCL per job which should fit in just one BO. |
| * Our BCL, could chain multiple BOS together though. |
| */ |
| assert(list_length(&job->rcl.bo_list) == 1); |
| assert(list_length(&job->bcl.bo_list) >= 1); |
| struct v3dv_bo *bcl_fist_bo = |
| list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link); |
| submit.bcl_start = bcl_fist_bo->offset; |
| submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl); |
| submit.rcl_start = job->rcl.bo->offset; |
| submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl); |
| |
| submit.qma = job->tile_alloc->offset; |
| submit.qms = job->tile_alloc->size; |
| submit.qts = job->tile_state->offset; |
| |
| submit.flags = 0; |
| if (job->tmu_dirty_rcl) |
| submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE; |
| |
| submit.bo_handle_count = job->bo_count; |
| uint32_t *bo_handles = |
| (uint32_t *) malloc(sizeof(uint32_t) * submit.bo_handle_count); |
| uint32_t bo_idx = 0; |
| set_foreach(job->bos, entry) { |
| struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; |
| bo_handles[bo_idx++] = bo->handle; |
| } |
| assert(bo_idx == submit.bo_handle_count); |
| submit.bo_handles = (uintptr_t)(void *)bo_handles; |
| |
| /* We need a binning sync if we are waiting on a sempahore (do_sem_wait) or |
| * if the job comes after a pipeline barrier than involves geometry stages |
| * (needs_bcl_sync). |
| * |
| * We need a render sync if the job doesn't need a binning sync but has |
| * still been flagged for serialization. It should be noted that RCL jobs |
| * don't start until the previous RCL job has finished so we don't really |
| * need to add a fence for those, however, we might need to wait on a CSD or |
| * TFU job, which are not automatically serialized with CL jobs. |
| * |
| * FIXME: for now, if we are asked to wait on any semaphores, we just wait |
| * on the last job we submitted. In the future we might want to pass the |
| * actual syncobj of the wait semaphores so we don't block on the last RCL |
| * if we only need to wait for a previous CSD or TFU, for example, but |
| * we would have to extend our kernel interface to support the case where |
| * we have more than one semaphore to wait on. |
| */ |
| const bool needs_bcl_sync = do_sem_wait || job->needs_bcl_sync; |
| const bool needs_rcl_sync = job->serialize && !needs_bcl_sync; |
| |
| mtx_lock(&queue->device->mutex); |
| submit.in_sync_bcl = needs_bcl_sync ? device->last_job_sync : 0; |
| submit.in_sync_rcl = needs_rcl_sync ? device->last_job_sync : 0; |
| submit.out_sync = device->last_job_sync; |
| v3dv_clif_dump(device, job, &submit); |
| int ret = v3dv_ioctl(device->pdevice->render_fd, |
| DRM_IOCTL_V3D_SUBMIT_CL, &submit); |
| mtx_unlock(&queue->device->mutex); |
| |
| static bool warned = false; |
| if (ret && !warned) { |
| fprintf(stderr, "Draw call returned %s. Expect corruption.\n", |
| strerror(errno)); |
| warned = true; |
| } |
| |
| free(bo_handles); |
| |
| if (ret) |
| return vk_error(device, VK_ERROR_DEVICE_LOST); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_tfu_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| bool do_sem_wait) |
| { |
| struct v3dv_device *device = queue->device; |
| |
| const bool needs_sync = do_sem_wait || job->serialize; |
| |
| mtx_lock(&device->mutex); |
| job->tfu.in_sync = needs_sync ? device->last_job_sync : 0; |
| job->tfu.out_sync = device->last_job_sync; |
| int ret = v3dv_ioctl(device->pdevice->render_fd, |
| DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu); |
| mtx_unlock(&device->mutex); |
| |
| if (ret != 0) { |
| fprintf(stderr, "Failed to submit TFU job: %d\n", ret); |
| return vk_error(device, VK_ERROR_DEVICE_LOST); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| handle_csd_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| bool do_sem_wait) |
| { |
| struct v3dv_device *device = queue->device; |
| |
| struct drm_v3d_submit_csd *submit = &job->csd.submit; |
| |
| submit->bo_handle_count = job->bo_count; |
| uint32_t *bo_handles = |
| (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2)); |
| uint32_t bo_idx = 0; |
| set_foreach(job->bos, entry) { |
| struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; |
| bo_handles[bo_idx++] = bo->handle; |
| } |
| assert(bo_idx == submit->bo_handle_count); |
| submit->bo_handles = (uintptr_t)(void *)bo_handles; |
| |
| const bool needs_sync = do_sem_wait || job->serialize; |
| |
| mtx_lock(&queue->device->mutex); |
| submit->in_sync = needs_sync ? device->last_job_sync : 0; |
| submit->out_sync = device->last_job_sync; |
| int ret = v3dv_ioctl(device->pdevice->render_fd, |
| DRM_IOCTL_V3D_SUBMIT_CSD, submit); |
| mtx_unlock(&queue->device->mutex); |
| |
| static bool warned = false; |
| if (ret && !warned) { |
| fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n", |
| strerror(errno)); |
| warned = true; |
| } |
| |
| free(bo_handles); |
| |
| if (ret) |
| return vk_error(device, VK_ERROR_DEVICE_LOST); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| queue_submit_job(struct v3dv_queue *queue, |
| struct v3dv_job *job, |
| bool do_sem_wait, |
| pthread_t *wait_thread) |
| { |
| assert(job); |
| |
| switch (job->type) { |
| case V3DV_JOB_TYPE_GPU_CL: |
| return handle_cl_job(queue, job, do_sem_wait); |
| case V3DV_JOB_TYPE_GPU_TFU: |
| return handle_tfu_job(queue, job, do_sem_wait); |
| case V3DV_JOB_TYPE_GPU_CSD: |
| return handle_csd_job(queue, job, do_sem_wait); |
| case V3DV_JOB_TYPE_CPU_RESET_QUERIES: |
| return handle_reset_query_cpu_job(job); |
| case V3DV_JOB_TYPE_CPU_END_QUERY: |
| return handle_end_query_cpu_job(job); |
| case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS: |
| return handle_copy_query_results_cpu_job(job); |
| case V3DV_JOB_TYPE_CPU_SET_EVENT: |
| return handle_set_event_cpu_job(job, wait_thread != NULL); |
| case V3DV_JOB_TYPE_CPU_WAIT_EVENTS: |
| return handle_wait_events_cpu_job(job, do_sem_wait, wait_thread); |
| case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE: |
| return handle_copy_buffer_to_image_cpu_job(job); |
| case V3DV_JOB_TYPE_CPU_CSD_INDIRECT: |
| return handle_csd_indirect_cpu_job(queue, job, do_sem_wait); |
| case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY: |
| return handle_timestamp_query_cpu_job(job); |
| default: |
| unreachable("Unhandled job type"); |
| } |
| } |
| |
| static VkResult |
| queue_create_noop_job(struct v3dv_queue *queue) |
| { |
| struct v3dv_device *device = queue->device; |
| queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!queue->noop_job) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1); |
| |
| v3dv_X(device, job_emit_noop)(queue->noop_job); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| queue_submit_noop_job(struct v3dv_queue *queue, const VkSubmitInfo *pSubmit) |
| { |
| /* VkQueue host access is externally synchronized so we don't need to lock |
| * here for the static variable. |
| */ |
| if (!queue->noop_job) { |
| VkResult result = queue_create_noop_job(queue); |
| if (result != VK_SUCCESS) |
| return result; |
| } |
| |
| return queue_submit_job(queue, queue->noop_job, |
| pSubmit->waitSemaphoreCount > 0, NULL); |
| } |
| |
| static VkResult |
| queue_submit_cmd_buffer(struct v3dv_queue *queue, |
| struct v3dv_cmd_buffer *cmd_buffer, |
| const VkSubmitInfo *pSubmit, |
| pthread_t *wait_thread) |
| { |
| assert(cmd_buffer); |
| assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_EXECUTABLE); |
| |
| if (list_is_empty(&cmd_buffer->jobs)) |
| return queue_submit_noop_job(queue, pSubmit); |
| |
| list_for_each_entry_safe(struct v3dv_job, job, |
| &cmd_buffer->jobs, list_link) { |
| VkResult result = queue_submit_job(queue, job, |
| pSubmit->waitSemaphoreCount > 0, |
| wait_thread); |
| if (result != VK_SUCCESS) |
| return result; |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| add_wait_thread_to_list(struct v3dv_device *device, |
| pthread_t thread, |
| struct v3dv_queue_submit_wait_info **wait_info) |
| { |
| /* If this is the first time we spawn a wait thread for this queue |
| * submission create a v3dv_queue_submit_wait_info to track this and |
| * any other threads in the same submission and add it to the global list |
| * in the queue. |
| */ |
| if (*wait_info == NULL) { |
| *wait_info = |
| vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_queue_submit_wait_info), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); |
| (*wait_info)->device = device; |
| } |
| |
| /* And add the thread to the list of wait threads for this submission */ |
| const uint32_t thread_idx = (*wait_info)->wait_thread_count; |
| assert(thread_idx < 16); |
| (*wait_info)->wait_threads[thread_idx].thread = thread; |
| (*wait_info)->wait_threads[thread_idx].finished = false; |
| (*wait_info)->wait_thread_count++; |
| } |
| |
| static void |
| add_signal_semaphores_to_wait_list(struct v3dv_device *device, |
| const VkSubmitInfo *pSubmit, |
| struct v3dv_queue_submit_wait_info *wait_info) |
| { |
| assert(wait_info); |
| |
| if (pSubmit->signalSemaphoreCount == 0) |
| return; |
| |
| /* FIXME: We put all the semaphores in a list and we signal all of them |
| * together from the submit master thread when the last wait thread in the |
| * submit completes. We could do better though: group the semaphores per |
| * submit and signal them as soon as all wait threads for a particular |
| * submit completes. Not sure if the extra work would be worth it though, |
| * since we only spawn waith threads for event waits and only when the |
| * event if set from the host after the queue submission. |
| */ |
| |
| /* Check the size of the current semaphore list */ |
| const uint32_t prev_count = wait_info->signal_semaphore_count; |
| const uint32_t prev_alloc_size = prev_count * sizeof(VkSemaphore); |
| VkSemaphore *prev_list = wait_info->signal_semaphores; |
| |
| /* Resize the list to hold the additional semaphores */ |
| const uint32_t extra_alloc_size = |
| pSubmit->signalSemaphoreCount * sizeof(VkSemaphore); |
| wait_info->signal_semaphore_count += pSubmit->signalSemaphoreCount; |
| wait_info->signal_semaphores = |
| vk_alloc(&device->vk.alloc, prev_alloc_size + extra_alloc_size, 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); |
| |
| /* Copy the old list to the new allocation and free the old list */ |
| if (prev_count > 0) { |
| memcpy(wait_info->signal_semaphores, prev_list, prev_alloc_size); |
| vk_free(&device->vk.alloc, prev_list); |
| } |
| |
| /* Add the new semaphores to the list */ |
| memcpy(wait_info->signal_semaphores + prev_count, |
| pSubmit->pSignalSemaphores, extra_alloc_size); |
| } |
| |
| static VkResult |
| queue_submit_cmd_buffer_batch(struct v3dv_queue *queue, |
| const VkSubmitInfo *pSubmit, |
| struct v3dv_queue_submit_wait_info **wait_info) |
| { |
| VkResult result = VK_SUCCESS; |
| bool has_wait_threads = false; |
| |
| /* Even if we don't have any actual work to submit we still need to wait |
| * on the wait semaphores and signal the signal semaphores and fence, so |
| * in this scenario we just submit a trivial no-op job so we don't have |
| * to do anything special, it should not be a common case anyway. |
| */ |
| if (pSubmit->commandBufferCount == 0) { |
| result = queue_submit_noop_job(queue, pSubmit); |
| } else { |
| for (uint32_t i = 0; i < pSubmit->commandBufferCount; i++) { |
| pthread_t wait_thread; |
| struct v3dv_cmd_buffer *cmd_buffer = |
| v3dv_cmd_buffer_from_handle(pSubmit->pCommandBuffers[i]); |
| result = queue_submit_cmd_buffer(queue, cmd_buffer, pSubmit, |
| &wait_thread); |
| |
| /* We get VK_NOT_READY if we had to spawn a wait thread for the |
| * command buffer. In that scenario, we want to continue submitting |
| * any pending command buffers in the batch, but we don't want to |
| * process any signal semaphores for the batch until we know we have |
| * submitted every job for every command buffer in the batch. |
| */ |
| if (result == VK_NOT_READY) { |
| result = VK_SUCCESS; |
| add_wait_thread_to_list(queue->device, wait_thread, wait_info); |
| has_wait_threads = true; |
| } |
| |
| if (result != VK_SUCCESS) |
| break; |
| } |
| } |
| |
| if (result != VK_SUCCESS) |
| return result; |
| |
| /* If had to emit any wait threads in this submit we need to wait for all |
| * of them to complete before we can signal any semaphores. |
| */ |
| if (!has_wait_threads) { |
| return process_semaphores_to_signal(queue->device, |
| pSubmit->signalSemaphoreCount, |
| pSubmit->pSignalSemaphores); |
| } else { |
| assert(*wait_info); |
| add_signal_semaphores_to_wait_list(queue->device, pSubmit, *wait_info); |
| return VK_NOT_READY; |
| } |
| } |
| |
| static void * |
| master_wait_thread_func(void *_wait_info) |
| { |
| struct v3dv_queue_submit_wait_info *wait_info = |
| (struct v3dv_queue_submit_wait_info *) _wait_info; |
| |
| struct v3dv_queue *queue = &wait_info->device->queue; |
| |
| /* Wait for all command buffer wait threads to complete */ |
| for (uint32_t i = 0; i < wait_info->wait_thread_count; i++) { |
| int res = pthread_join(wait_info->wait_threads[i].thread, NULL); |
| if (res != 0) |
| fprintf(stderr, "Wait thread failed to join.\n"); |
| } |
| |
| /* Signal semaphores and fences */ |
| VkResult result; |
| result = process_semaphores_to_signal(wait_info->device, |
| wait_info->signal_semaphore_count, |
| wait_info->signal_semaphores); |
| if (result != VK_SUCCESS) |
| fprintf(stderr, "Wait thread semaphore signaling failed."); |
| |
| result = process_fence_to_signal(wait_info->device, wait_info->fence); |
| if (result != VK_SUCCESS) |
| fprintf(stderr, "Wait thread fence signaling failed."); |
| |
| /* Release wait_info */ |
| mtx_lock(&queue->mutex); |
| list_del(&wait_info->list_link); |
| mtx_unlock(&queue->mutex); |
| |
| vk_free(&wait_info->device->vk.alloc, wait_info->signal_semaphores); |
| vk_free(&wait_info->device->vk.alloc, wait_info); |
| |
| return NULL; |
| } |
| |
| |
| static VkResult |
| spawn_master_wait_thread(struct v3dv_queue *queue, |
| struct v3dv_queue_submit_wait_info *wait_info) |
| |
| { |
| VkResult result = VK_SUCCESS; |
| |
| mtx_lock(&queue->mutex); |
| if (pthread_create(&wait_info->master_wait_thread, NULL, |
| master_wait_thread_func, wait_info)) { |
| result = vk_error(queue, VK_ERROR_DEVICE_LOST); |
| goto done; |
| } |
| |
| list_addtail(&wait_info->list_link, &queue->submit_wait_list); |
| |
| done: |
| mtx_unlock(&queue->mutex); |
| return result; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_QueueSubmit(VkQueue _queue, |
| uint32_t submitCount, |
| const VkSubmitInfo* pSubmits, |
| VkFence fence) |
| { |
| V3DV_FROM_HANDLE(v3dv_queue, queue, _queue); |
| |
| struct v3dv_queue_submit_wait_info *wait_info = NULL; |
| |
| VkResult result = VK_SUCCESS; |
| for (uint32_t i = 0; i < submitCount; i++) { |
| result = queue_submit_cmd_buffer_batch(queue, &pSubmits[i], &wait_info); |
| if (result != VK_SUCCESS && result != VK_NOT_READY) |
| goto done; |
| } |
| |
| if (!wait_info) { |
| assert(result != VK_NOT_READY); |
| result = process_fence_to_signal(queue->device, fence); |
| goto done; |
| } |
| |
| /* We emitted wait threads, so we have to spwan a master thread for this |
| * queue submission that waits for all other threads to complete and then |
| * will signal any semaphores and fences. |
| */ |
| assert(wait_info); |
| wait_info->fence = fence; |
| result = spawn_master_wait_thread(queue, wait_info); |
| |
| done: |
| return result; |
| } |
| |
| static void |
| destroy_syncobj(uint32_t device_fd, uint32_t *sync) |
| { |
| assert(sync); |
| drmSyncobjDestroy(device_fd, *sync); |
| *sync = 0; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_CreateSemaphore(VkDevice _device, |
| const VkSemaphoreCreateInfo *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator, |
| VkSemaphore *pSemaphore) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| |
| assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO); |
| |
| struct v3dv_semaphore *sem = |
| vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_semaphore), |
| VK_OBJECT_TYPE_SEMAPHORE); |
| if (sem == NULL) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| int ret = drmSyncobjCreate(device->pdevice->render_fd, 0, &sem->sync); |
| if (ret) { |
| vk_object_free(&device->vk, pAllocator, sem); |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| *pSemaphore = v3dv_semaphore_to_handle(sem); |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| v3dv_GetPhysicalDeviceExternalSemaphoreProperties( |
| VkPhysicalDevice physicalDevice, |
| const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, |
| VkExternalSemaphoreProperties *pExternalSemaphoreProperties) |
| { |
| switch (pExternalSemaphoreInfo->handleType) { |
| case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: |
| case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: |
| pExternalSemaphoreProperties->exportFromImportedHandleTypes = |
| VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | |
| VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; |
| pExternalSemaphoreProperties->compatibleHandleTypes = |
| VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | |
| VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; |
| |
| /* FIXME: we can't import external semaphores until we improve the kernel |
| * submit interface to handle multiple in syncobjs, because once we have |
| * an imported semaphore in our list of semaphores to wait on, we can no |
| * longer use the workaround of waiting on the last syncobj fence produced |
| * from the device, since the imported semaphore may not (and in fact, it |
| * would typically not) have been produced from same device. |
| * |
| * This behavior is exercised via dEQP-VK.synchronization.cross_instance.*. |
| * Particularly, this test: |
| * dEQP-VK.synchronization.cross_instance.dedicated. |
| * write_ssbo_compute_read_vertex_input.buffer_16384_binary_semaphore_fd |
| * fails consistently because of this, so it'll be a good reference to |
| * verify the implementation when the kernel bits are in place. |
| */ |
| pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; |
| |
| /* FIXME: See comment in GetPhysicalDeviceExternalFenceProperties |
| * for details on why we can't export to SYNC_FD. |
| */ |
| if (pExternalSemaphoreInfo->handleType != |
| VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) { |
| pExternalSemaphoreProperties->externalSemaphoreFeatures |= |
| VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT; |
| } |
| break; |
| default: |
| pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; |
| pExternalSemaphoreProperties->compatibleHandleTypes = 0; |
| pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; |
| break; |
| } |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_ImportSemaphoreFdKHR( |
| VkDevice _device, |
| const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| V3DV_FROM_HANDLE(v3dv_semaphore, sem, pImportSemaphoreFdInfo->semaphore); |
| |
| assert(pImportSemaphoreFdInfo->sType == |
| VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR); |
| |
| int fd = pImportSemaphoreFdInfo->fd; |
| int render_fd = device->pdevice->render_fd; |
| |
| bool is_temporary = |
| pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT || |
| (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT); |
| |
| uint32_t new_sync; |
| switch (pImportSemaphoreFdInfo->handleType) { |
| case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: { |
| /* "If handleType is VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, the |
| * special value -1 for fd is treated like a valid sync file descriptor |
| * referring to an object that has already signaled. The import |
| * operation will succeed and the VkSemaphore will have a temporarily |
| * imported payload as if a valid file descriptor had been provided." |
| */ |
| unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0; |
| if (drmSyncobjCreate(render_fd, flags, &new_sync)) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| if (fd != -1) { |
| if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) { |
| drmSyncobjDestroy(render_fd, new_sync); |
| return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); |
| } |
| } |
| break; |
| } |
| case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: { |
| if (drmSyncobjFDToHandle(render_fd, fd, &new_sync)) |
| return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); |
| break; |
| } |
| default: |
| return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); |
| } |
| |
| destroy_syncobj(render_fd, &sem->temp_sync); |
| if (is_temporary) { |
| sem->temp_sync = new_sync; |
| } else { |
| destroy_syncobj(render_fd, &sem->sync); |
| sem->sync = new_sync; |
| } |
| |
| /* From the Vulkan 1.0.53 spec: |
| * |
| * "Importing a semaphore payload from a file descriptor transfers |
| * ownership of the file descriptor from the application to the |
| * Vulkan implementation. The application must not perform any |
| * operations on the file descriptor after a successful import." |
| * |
| * If the import fails, we leave the file descriptor open. |
| */ |
| if (fd != -1) |
| close(fd); |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_GetSemaphoreFdKHR(VkDevice _device, |
| const VkSemaphoreGetFdInfoKHR *pGetFdInfo, |
| int *pFd) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| V3DV_FROM_HANDLE(v3dv_semaphore, sem, pGetFdInfo->semaphore); |
| |
| assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR); |
| |
| *pFd = -1; |
| int render_fd = device->pdevice->render_fd; |
| switch (pGetFdInfo->handleType) { |
| case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: { |
| drmSyncobjExportSyncFile(render_fd, sem->sync, pFd); |
| if (*pFd == -1) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| break; |
| case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: |
| drmSyncobjHandleToFD(render_fd, sem->sync, pFd); |
| if (*pFd == -1) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| break; |
| } |
| default: |
| unreachable("Unsupported external semaphore handle type"); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| v3dv_DestroySemaphore(VkDevice _device, |
| VkSemaphore semaphore, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| V3DV_FROM_HANDLE(v3dv_semaphore, sem, semaphore); |
| |
| if (sem == NULL) |
| return; |
| |
| destroy_syncobj(device->pdevice->render_fd, &sem->sync); |
| destroy_syncobj(device->pdevice->render_fd, &sem->temp_sync); |
| |
| vk_object_free(&device->vk, pAllocator, sem); |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_CreateFence(VkDevice _device, |
| const VkFenceCreateInfo *pCreateInfo, |
| const VkAllocationCallbacks *pAllocator, |
| VkFence *pFence) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| |
| assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); |
| |
| struct v3dv_fence *fence = |
| vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_fence), |
| VK_OBJECT_TYPE_FENCE); |
| if (fence == NULL) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| unsigned flags = 0; |
| if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) |
| flags |= DRM_SYNCOBJ_CREATE_SIGNALED; |
| int ret = drmSyncobjCreate(device->pdevice->render_fd, flags, &fence->sync); |
| if (ret) { |
| vk_object_free(&device->vk, pAllocator, fence); |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| *pFence = v3dv_fence_to_handle(fence); |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| v3dv_GetPhysicalDeviceExternalFenceProperties( |
| VkPhysicalDevice physicalDevice, |
| const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, |
| VkExternalFenceProperties *pExternalFenceProperties) |
| |
| { |
| switch (pExternalFenceInfo->handleType) { |
| case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: |
| case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: |
| pExternalFenceProperties->exportFromImportedHandleTypes = |
| VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | |
| VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; |
| pExternalFenceProperties->compatibleHandleTypes = |
| VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | |
| VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; |
| pExternalFenceProperties->externalFenceFeatures = |
| VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT; |
| |
| /* FIXME: SYNC_FD exports the actual fence referenced by the syncobj, not |
| * the syncobj itself, and that fence is only created after we have |
| * submitted to the kernel and updated the syncobj for the fence to import |
| * the actual DRM fence created with the submission. Unfortunately, if the |
| * queue submission has a 'wait for events' we may hold any jobs after the |
| * wait in a user-space thread until the events are signaled, and in that |
| * case we don't update the out fence of the submit until the events are |
| * signaled and we can submit all the jobs involved with the vkQueueSubmit |
| * call. This means that if the applications submits with an out fence and |
| * a wait for events, trying to export the out fence to a SYNC_FD rigth |
| * after the submission and before the events are signaled will fail, |
| * because the actual DRM fence won't exist yet. This is not a problem |
| * with OPAQUE_FD because in this case we export the entire syncobj, not |
| * the underlying DRM fence. To fix this we need to rework our kernel |
| * interface to be more flexible and accept multiple in/out syncobjs so |
| * we can implement event waits as regular fence waits on the kernel side, |
| * until then, we can only reliably export OPAQUE_FD. |
| */ |
| if (pExternalFenceInfo->handleType != |
| VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) { |
| pExternalFenceProperties->externalFenceFeatures |= |
| VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT; |
| } |
| break; |
| default: |
| pExternalFenceProperties->exportFromImportedHandleTypes = 0; |
| pExternalFenceProperties->compatibleHandleTypes = 0; |
| pExternalFenceProperties->externalFenceFeatures = 0; |
| break; |
| } |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_ImportFenceFdKHR(VkDevice _device, |
| const VkImportFenceFdInfoKHR *pImportFenceFdInfo) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| V3DV_FROM_HANDLE(v3dv_fence, fence, pImportFenceFdInfo->fence); |
| |
| assert(pImportFenceFdInfo->sType == |
| VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR); |
| |
| int fd = pImportFenceFdInfo->fd; |
| int render_fd = device->pdevice->render_fd; |
| |
| bool is_temporary = |
| pImportFenceFdInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT || |
| (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT); |
| |
| uint32_t new_sync; |
| switch (pImportFenceFdInfo->handleType) { |
| case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: { |
| /* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the |
| * special value -1 for fd is treated like a valid sync file descriptor |
| * referring to an object that has already signaled. The import |
| * operation will succeed and the VkFence will have a temporarily |
| * imported payload as if a valid file descriptor had been provided." |
| */ |
| unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0; |
| if (drmSyncobjCreate(render_fd, flags, &new_sync)) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| if (fd != -1) { |
| if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) { |
| drmSyncobjDestroy(render_fd, new_sync); |
| return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); |
| } |
| } |
| break; |
| } |
| case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: { |
| if (drmSyncobjFDToHandle(render_fd, fd, &new_sync)) |
| return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); |
| break; |
| } |
| default: |
| return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); |
| } |
| |
| destroy_syncobj(render_fd, &fence->temp_sync); |
| if (is_temporary) { |
| fence->temp_sync = new_sync; |
| } else { |
| destroy_syncobj(render_fd, &fence->sync); |
| fence->sync = new_sync; |
| } |
| |
| /* From the Vulkan 1.0.53 spec: |
| * |
| * "Importing a fence payload from a file descriptor transfers |
| * ownership of the file descriptor from the application to the |
| * Vulkan implementation. The application must not perform any |
| * operations on the file descriptor after a successful import." |
| * |
| * If the import fails, we leave the file descriptor open. |
| */ |
| if (fd != -1) |
| close(fd); |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| v3dv_DestroyFence(VkDevice _device, |
| VkFence _fence, |
| const VkAllocationCallbacks *pAllocator) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| V3DV_FROM_HANDLE(v3dv_fence, fence, _fence); |
| |
| if (fence == NULL) |
| return; |
| |
| destroy_syncobj(device->pdevice->render_fd, &fence->sync); |
| destroy_syncobj(device->pdevice->render_fd, &fence->temp_sync); |
| |
| vk_object_free(&device->vk, pAllocator, fence); |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_GetFenceStatus(VkDevice _device, VkFence _fence) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| V3DV_FROM_HANDLE(v3dv_fence, fence, _fence); |
| |
| int ret = drmSyncobjWait(device->pdevice->render_fd, &fence->sync, 1, |
| 0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL); |
| if (ret == -ETIME) |
| return VK_NOT_READY; |
| else if (ret) |
| return vk_error(device, VK_ERROR_DEVICE_LOST); |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_GetFenceFdKHR(VkDevice _device, |
| const VkFenceGetFdInfoKHR *pGetFdInfo, |
| int *pFd) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| V3DV_FROM_HANDLE(v3dv_fence, fence, pGetFdInfo->fence); |
| |
| assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR); |
| |
| *pFd = -1; |
| int render_fd = device->pdevice->render_fd; |
| switch (pGetFdInfo->handleType) { |
| case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: { |
| drmSyncobjExportSyncFile(render_fd, fence->sync, pFd); |
| if (*pFd == -1) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| break; |
| case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: |
| drmSyncobjHandleToFD(render_fd, fence->sync, pFd); |
| if (*pFd == -1) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| break; |
| } |
| default: |
| unreachable("Unsupported external fence handle type"); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| |
| uint32_t *syncobjs = vk_alloc(&device->vk.alloc, |
| sizeof(*syncobjs) * fenceCount, 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); |
| if (!syncobjs) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| int render_fd = device->pdevice->render_fd; |
| uint32_t reset_count = 0; |
| for (uint32_t i = 0; i < fenceCount; i++) { |
| struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]); |
| /* From the Vulkan spec, section 'Importing Fence Payloads': |
| * |
| * "If the import is temporary, the fence will be restored to its |
| * permanent state the next time that fence is passed to |
| * vkResetFences. |
| * |
| * Note: Restoring a fence to its prior permanent payload is a |
| * distinct operation from resetting a fence payload." |
| * |
| * To restore the previous state, we just need to destroy the temporary. |
| */ |
| if (fence->temp_sync) |
| destroy_syncobj(render_fd, &fence->temp_sync); |
| else |
| syncobjs[reset_count++] = fence->sync; |
| } |
| |
| int ret = 0; |
| if (reset_count > 0) |
| ret = drmSyncobjReset(render_fd, syncobjs, reset_count); |
| |
| vk_free(&device->vk.alloc, syncobjs); |
| |
| if (ret) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_WaitForFences(VkDevice _device, |
| uint32_t fenceCount, |
| const VkFence *pFences, |
| VkBool32 waitAll, |
| uint64_t timeout) |
| { |
| V3DV_FROM_HANDLE(v3dv_device, device, _device); |
| |
| const uint64_t abs_timeout = get_absolute_timeout(timeout); |
| |
| uint32_t *syncobjs = vk_alloc(&device->vk.alloc, |
| sizeof(*syncobjs) * fenceCount, 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); |
| if (!syncobjs) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| for (uint32_t i = 0; i < fenceCount; i++) { |
| struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]); |
| syncobjs[i] = fence->temp_sync ? fence->temp_sync : fence->sync; |
| } |
| |
| unsigned flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT; |
| if (waitAll) |
| flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL; |
| |
| int ret; |
| do { |
| ret = drmSyncobjWait(device->pdevice->render_fd, syncobjs, fenceCount, |
| timeout, flags, NULL); |
| } while (ret == -ETIME && gettime_ns() < abs_timeout); |
| |
| vk_free(&device->vk.alloc, syncobjs); |
| |
| if (ret == -ETIME) |
| return VK_TIMEOUT; |
| else if (ret) |
| return vk_error(device, VK_ERROR_DEVICE_LOST); |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| v3dv_QueueBindSparse(VkQueue _queue, |
| uint32_t bindInfoCount, |
| const VkBindSparseInfo *pBindInfo, |
| VkFence fence) |
| { |
| V3DV_FROM_HANDLE(v3dv_queue, queue, _queue); |
| return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT); |
| } |