| /* |
| * Copyright © Microsoft Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "dzn_private.h" |
| |
| #include "vk_alloc.h" |
| #include "vk_debug_report.h" |
| #include "vk_format.h" |
| #include "vk_util.h" |
| |
| #include "dxil_spirv_nir.h" |
| |
| static void |
| dzn_cmd_buffer_exec_transition_barriers(struct dzn_cmd_buffer *cmdbuf, |
| D3D12_RESOURCE_BARRIER *barriers, |
| uint32_t barrier_count) |
| { |
| assert(!cmdbuf->enhanced_barriers); |
| uint32_t flush_count = 0; |
| for (uint32_t b = 0; b < barrier_count; b++) { |
| assert(barriers[b].Transition.pResource); |
| |
| /* some layouts map to the same states, and NOP-barriers are illegal */ |
| if (barriers[b].Transition.StateBefore == barriers[b].Transition.StateAfter) { |
| if (flush_count) { |
| ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count, |
| &barriers[b - flush_count]); |
| flush_count = 0; |
| } |
| } else { |
| flush_count++; |
| } |
| } |
| |
| if (flush_count) |
| ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, flush_count, |
| &barriers[barrier_count - flush_count]); |
| |
| /* Set Before = After so we don't execute the same barrier twice. */ |
| for (uint32_t b = 0; b < barrier_count; b++) |
| barriers[b].Transition.StateBefore = barriers[b].Transition.StateAfter; |
| } |
| |
| static void |
| dzn_cmd_buffer_flush_transition_barriers(struct dzn_cmd_buffer *cmdbuf, |
| ID3D12Resource *res, |
| uint32_t first_subres, |
| uint32_t subres_count) |
| { |
| assert(!cmdbuf->enhanced_barriers); |
| struct hash_entry *he = |
| _mesa_hash_table_search(cmdbuf->transition_barriers, res); |
| D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL; |
| |
| if (!barriers) |
| return; |
| |
| dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count); |
| } |
| |
| enum dzn_queue_transition_flags { |
| DZN_QUEUE_TRANSITION_FLUSH = 1 << 0, |
| DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED = 1 << 1, |
| }; |
| |
| static VkResult |
| dzn_cmd_buffer_queue_transition_barriers(struct dzn_cmd_buffer *cmdbuf, |
| ID3D12Resource *res, |
| uint32_t first_subres, |
| uint32_t subres_count, |
| D3D12_RESOURCE_STATES before, |
| D3D12_RESOURCE_STATES after, |
| uint32_t flags) |
| { |
| assert(!cmdbuf->enhanced_barriers); |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| struct hash_entry *he = |
| _mesa_hash_table_search(cmdbuf->transition_barriers, res); |
| struct D3D12_RESOURCE_BARRIER *barriers = he ? he->data : NULL; |
| |
| if (!barriers) { |
| D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(res); |
| D3D12_FEATURE_DATA_FORMAT_INFO fmt_info = { desc.Format, 0 }; |
| ID3D12Device_CheckFeatureSupport(device->dev, D3D12_FEATURE_FORMAT_INFO, &fmt_info, sizeof(fmt_info)); |
| uint32_t barrier_count = |
| fmt_info.PlaneCount * |
| desc.MipLevels * desc.DepthOrArraySize; |
| |
| barriers = |
| vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*barriers) * barrier_count, |
| 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!barriers) |
| return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| he = _mesa_hash_table_insert(cmdbuf->transition_barriers, res, barriers); |
| if (!he) |
| return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| for (uint32_t subres = first_subres; subres < first_subres + subres_count; subres++) { |
| if (!barriers[subres].Transition.pResource) { |
| barriers[subres] = (D3D12_RESOURCE_BARRIER) { |
| .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, |
| .Flags = 0, |
| .Transition = { |
| .pResource = res, |
| .Subresource = subres, |
| .StateBefore = before, |
| .StateAfter = after, |
| }, |
| }; |
| } else { |
| if (flags & DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED) |
| before = barriers[subres].Transition.StateAfter; |
| |
| assert(barriers[subres].Transition.StateAfter == before || |
| barriers[subres].Transition.StateAfter == after); |
| barriers[subres].Transition.StateAfter = after; |
| } |
| } |
| |
| if (flags & DZN_QUEUE_TRANSITION_FLUSH) |
| dzn_cmd_buffer_exec_transition_barriers(cmdbuf, &barriers[first_subres], subres_count); |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_queue_image_range_state_transition(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| const VkImageSubresourceRange *range, |
| D3D12_RESOURCE_STATES before, |
| D3D12_RESOURCE_STATES after, |
| uint32_t flags) |
| { |
| assert(!cmdbuf->enhanced_barriers); |
| uint32_t first_barrier = 0, barrier_count = 0; |
| VkResult ret = VK_SUCCESS; |
| |
| dzn_foreach_aspect(aspect, range->aspectMask) { |
| uint32_t layer_count = dzn_get_layer_count(image, range); |
| uint32_t level_count = dzn_get_level_count(image, range); |
| for (uint32_t layer = 0; layer < layer_count; layer++) { |
| uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer); |
| if (!barrier_count) { |
| first_barrier = subres; |
| barrier_count = level_count; |
| continue; |
| } else if (first_barrier + barrier_count == subres) { |
| barrier_count += level_count; |
| continue; |
| } |
| |
| ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res, |
| first_barrier, barrier_count, |
| before, after, flags); |
| if (ret != VK_SUCCESS) |
| return ret; |
| |
| barrier_count = 0; |
| } |
| |
| if (barrier_count) { |
| ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res, |
| first_barrier, barrier_count, |
| before, after, flags); |
| if (ret != VK_SUCCESS) |
| return ret; |
| } |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_queue_image_range_layout_transition(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| const VkImageSubresourceRange *range, |
| VkImageLayout old_layout, |
| VkImageLayout new_layout, |
| uint32_t flags) |
| { |
| assert(!cmdbuf->enhanced_barriers); |
| uint32_t first_barrier = 0, barrier_count = 0; |
| VkResult ret = VK_SUCCESS; |
| |
| if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED) |
| flags |= DZN_QUEUE_TRANSITION_BEFORE_IS_UNDEFINED; |
| |
| dzn_foreach_aspect(aspect, range->aspectMask) { |
| D3D12_RESOURCE_STATES after = |
| dzn_image_layout_to_state(image, new_layout, aspect, cmdbuf->type); |
| D3D12_RESOURCE_STATES before = |
| (old_layout == VK_IMAGE_LAYOUT_UNDEFINED || |
| old_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) ? |
| D3D12_RESOURCE_STATE_COMMON : |
| dzn_image_layout_to_state(image, old_layout, aspect, cmdbuf->type); |
| |
| uint32_t layer_count = dzn_get_layer_count(image, range); |
| uint32_t level_count = dzn_get_level_count(image, range); |
| for (uint32_t layer = 0; layer < layer_count; layer++) { |
| uint32_t subres = dzn_image_range_get_subresource_index(image, range, aspect, 0, layer); |
| if (!barrier_count) { |
| first_barrier = subres; |
| barrier_count = level_count; |
| continue; |
| } else if (first_barrier + barrier_count == subres) { |
| barrier_count += level_count; |
| continue; |
| } |
| |
| ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res, |
| first_barrier, barrier_count, |
| before, after, flags); |
| if (ret != VK_SUCCESS) |
| return ret; |
| |
| barrier_count = 0; |
| } |
| |
| if (barrier_count) { |
| ret = dzn_cmd_buffer_queue_transition_barriers(cmdbuf, image->res, |
| first_barrier, barrier_count, |
| before, after, flags); |
| if (ret != VK_SUCCESS) |
| return ret; |
| } |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static void |
| dzn_cmd_buffer_global_barrier(struct dzn_cmd_buffer *cmdbuf, |
| D3D12_BARRIER_SYNC sync_before, |
| D3D12_BARRIER_SYNC sync_after, |
| D3D12_BARRIER_ACCESS access_before, |
| D3D12_BARRIER_ACCESS access_after) |
| { |
| assert(cmdbuf->enhanced_barriers); |
| D3D12_GLOBAL_BARRIER global = { |
| .SyncBefore = sync_before, |
| .SyncAfter = sync_after, |
| .AccessBefore = access_before, |
| .AccessAfter = access_after, |
| }; |
| D3D12_BARRIER_GROUP group = { |
| .Type = D3D12_BARRIER_TYPE_GLOBAL, |
| .NumBarriers = 1, |
| .pGlobalBarriers = &global, |
| }; |
| ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group); |
| } |
| |
| static void |
| dzn_cmd_buffer_buffer_barrier(struct dzn_cmd_buffer *cmdbuf, |
| ID3D12Resource *buf, |
| D3D12_BARRIER_SYNC sync_before, |
| D3D12_BARRIER_SYNC sync_after, |
| D3D12_BARRIER_ACCESS access_before, |
| D3D12_BARRIER_ACCESS access_after) |
| { |
| assert(cmdbuf->enhanced_barriers); |
| D3D12_BUFFER_BARRIER buffer = { |
| .SyncBefore = sync_before, |
| .SyncAfter = sync_after, |
| .AccessBefore = access_before, |
| .AccessAfter = access_after, |
| .pResource = buf, |
| .Offset = 0, |
| .Size = UINT64_MAX, |
| }; |
| D3D12_BARRIER_GROUP group = { |
| .Type = D3D12_BARRIER_TYPE_BUFFER, |
| .NumBarriers = 1, |
| .pBufferBarriers = &buffer, |
| }; |
| ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group); |
| } |
| |
| static void |
| dzn_cmd_buffer_image_barrier(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| D3D12_BARRIER_SYNC sync_before, |
| D3D12_BARRIER_SYNC sync_after, |
| D3D12_BARRIER_ACCESS access_before, |
| D3D12_BARRIER_ACCESS access_after, |
| D3D12_BARRIER_LAYOUT layout_before, |
| D3D12_BARRIER_LAYOUT layout_after, |
| const VkImageSubresourceRange *range) |
| { |
| assert(cmdbuf->enhanced_barriers); |
| uint32_t first_plane = (range->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0; |
| uint32_t plane_count = first_plane == 0 && (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? 2 : 1; |
| D3D12_TEXTURE_BARRIER texture = { |
| .SyncBefore = sync_before, |
| .SyncAfter = sync_after, |
| .AccessBefore = access_before, |
| .AccessAfter = access_after, |
| .LayoutBefore = layout_before, |
| .LayoutAfter = layout_after, |
| .Subresources.FirstArraySlice = range->baseArrayLayer, |
| .Subresources.NumArraySlices = dzn_get_layer_count(image, range), |
| .Subresources.IndexOrFirstMipLevel = range->baseMipLevel, |
| .Subresources.NumMipLevels = dzn_get_level_count(image, range), |
| .Subresources.FirstPlane = first_plane, |
| .Subresources.NumPlanes = plane_count, |
| .pResource = image->res, |
| }; |
| D3D12_BARRIER_GROUP group = { |
| .Type = D3D12_BARRIER_TYPE_TEXTURE, |
| .NumBarriers = 1, |
| .pTextureBarriers = &texture, |
| }; |
| ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &group); |
| } |
| |
| static D3D12_BARRIER_LAYOUT |
| dzn_cmd_buffer_require_layout(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| VkImageLayout current_layout, |
| D3D12_BARRIER_LAYOUT needed_layout, |
| const VkImageSubresourceRange *range) |
| { |
| assert(cmdbuf->enhanced_barriers); |
| /* We shouldn't need these fixups on a subresource range which includes depth and stencil, |
| where one is read-only and the other is writable */ |
| if (range->aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| assert(current_layout != VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL && |
| current_layout != VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL); |
| } |
| |
| /* Nothing needs to be done for these, the appropriate sync/access was already handled */ |
| if (image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) |
| return needed_layout; |
| |
| D3D12_BARRIER_LAYOUT current_d3d_layout = dzn_vk_layout_to_d3d_layout(current_layout, cmdbuf->type, range->aspectMask); |
| if (current_d3d_layout != needed_layout) { |
| dzn_cmd_buffer_image_barrier(cmdbuf, image, |
| D3D12_BARRIER_SYNC_ALL, D3D12_BARRIER_SYNC_ALL, |
| D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON, |
| current_d3d_layout, needed_layout, range); |
| } |
| return current_d3d_layout; |
| } |
| |
| static void |
| dzn_cmd_buffer_restore_layout(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| D3D12_BARRIER_SYNC sync, |
| D3D12_BARRIER_ACCESS access, |
| D3D12_BARRIER_LAYOUT needed_layout, |
| D3D12_BARRIER_LAYOUT restore_layout, |
| const VkImageSubresourceRange *range) |
| { |
| if (needed_layout != restore_layout) { |
| dzn_cmd_buffer_image_barrier(cmdbuf, image, |
| sync, D3D12_BARRIER_SYNC_COPY, |
| access, D3D12_BARRIER_ACCESS_COMMON, |
| needed_layout, restore_layout, range); |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_destroy(struct vk_command_buffer *cbuf) |
| { |
| if (!cbuf) |
| return; |
| |
| struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk); |
| |
| if (cmdbuf->cmdlist) |
| ID3D12GraphicsCommandList1_Release(cmdbuf->cmdlist); |
| |
| if (cmdbuf->cmdlist8) |
| ID3D12GraphicsCommandList8_Release(cmdbuf->cmdlist8); |
| |
| if (cmdbuf->cmdlist9) |
| ID3D12GraphicsCommandList9_Release(cmdbuf->cmdlist9); |
| |
| if (cmdbuf->cmdalloc) |
| ID3D12CommandAllocator_Release(cmdbuf->cmdalloc); |
| |
| for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) { |
| list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs[bucket], link) { |
| list_del(&res->link); |
| ID3D12Resource_Release(res->res); |
| vk_free(&cbuf->pool->alloc, res); |
| } |
| } |
| |
| dzn_descriptor_heap_pool_finish(&cmdbuf->cbv_srv_uav_pool); |
| dzn_descriptor_heap_pool_finish(&cmdbuf->sampler_pool); |
| dzn_descriptor_heap_pool_finish(&cmdbuf->rtvs.pool); |
| dzn_descriptor_heap_pool_finish(&cmdbuf->dsvs.pool); |
| util_dynarray_fini(&cmdbuf->events.signal); |
| util_dynarray_fini(&cmdbuf->queries.reset); |
| util_dynarray_fini(&cmdbuf->queries.signal); |
| |
| if (cmdbuf->rtvs.ht) { |
| hash_table_foreach(cmdbuf->rtvs.ht, he) |
| vk_free(&cbuf->pool->alloc, he->data); |
| _mesa_hash_table_destroy(cmdbuf->rtvs.ht, NULL); |
| } |
| |
| if (cmdbuf->dsvs.ht) { |
| hash_table_foreach(cmdbuf->dsvs.ht, he) |
| vk_free(&cbuf->pool->alloc, he->data); |
| _mesa_hash_table_destroy(cmdbuf->dsvs.ht, NULL); |
| } |
| |
| if (cmdbuf->events.ht) |
| _mesa_hash_table_destroy(cmdbuf->events.ht, NULL); |
| |
| if (cmdbuf->queries.ht) { |
| hash_table_foreach(cmdbuf->queries.ht, he) { |
| struct dzn_cmd_buffer_query_pool_state *qpstate = he->data; |
| util_dynarray_fini(&qpstate->reset); |
| util_dynarray_fini(&qpstate->collect); |
| util_dynarray_fini(&qpstate->signal); |
| util_dynarray_fini(&qpstate->zero); |
| vk_free(&cbuf->pool->alloc, he->data); |
| } |
| _mesa_hash_table_destroy(cmdbuf->queries.ht, NULL); |
| } |
| |
| if (cmdbuf->transition_barriers) { |
| hash_table_foreach(cmdbuf->transition_barriers, he) |
| vk_free(&cbuf->pool->alloc, he->data); |
| _mesa_hash_table_destroy(cmdbuf->transition_barriers, NULL); |
| } |
| |
| vk_command_buffer_finish(&cmdbuf->vk); |
| vk_free(&cbuf->pool->alloc, cmdbuf); |
| } |
| |
| static void |
| dzn_cmd_buffer_reset(struct vk_command_buffer *cbuf, VkCommandBufferResetFlags flags) |
| { |
| struct dzn_cmd_buffer *cmdbuf = container_of(cbuf, struct dzn_cmd_buffer, vk); |
| |
| /* Reset the state */ |
| memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); |
| cmdbuf->state.multiview.num_views = 1; |
| cmdbuf->state.multiview.view_mask = 1; |
| |
| /* TODO: Return resources to the pool */ |
| for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) { |
| list_for_each_entry_safe(struct dzn_internal_resource, res, &cmdbuf->internal_bufs[bucket], link) { |
| list_del(&res->link); |
| ID3D12Resource_Release(res->res); |
| vk_free(&cmdbuf->vk.pool->alloc, res); |
| } |
| } |
| cmdbuf->cur_upload_buf = NULL; |
| |
| util_dynarray_clear(&cmdbuf->events.signal); |
| util_dynarray_clear(&cmdbuf->queries.reset); |
| util_dynarray_clear(&cmdbuf->queries.signal); |
| hash_table_foreach(cmdbuf->rtvs.ht, he) |
| vk_free(&cmdbuf->vk.pool->alloc, he->data); |
| _mesa_hash_table_clear(cmdbuf->rtvs.ht, NULL); |
| cmdbuf->null_rtv.ptr = 0; |
| dzn_descriptor_heap_pool_reset(&cmdbuf->rtvs.pool); |
| hash_table_foreach(cmdbuf->dsvs.ht, he) |
| vk_free(&cmdbuf->vk.pool->alloc, he->data); |
| _mesa_hash_table_clear(cmdbuf->dsvs.ht, NULL); |
| hash_table_foreach(cmdbuf->queries.ht, he) { |
| struct dzn_cmd_buffer_query_pool_state *qpstate = he->data; |
| util_dynarray_fini(&qpstate->reset); |
| util_dynarray_fini(&qpstate->collect); |
| util_dynarray_fini(&qpstate->signal); |
| util_dynarray_fini(&qpstate->zero); |
| vk_free(&cmdbuf->vk.pool->alloc, he->data); |
| } |
| _mesa_hash_table_clear(cmdbuf->queries.ht, NULL); |
| _mesa_hash_table_clear(cmdbuf->events.ht, NULL); |
| hash_table_foreach(cmdbuf->transition_barriers, he) |
| vk_free(&cmdbuf->vk.pool->alloc, he->data); |
| _mesa_hash_table_clear(cmdbuf->transition_barriers, NULL); |
| dzn_descriptor_heap_pool_reset(&cmdbuf->dsvs.pool); |
| dzn_descriptor_heap_pool_reset(&cmdbuf->cbv_srv_uav_pool); |
| dzn_descriptor_heap_pool_reset(&cmdbuf->sampler_pool); |
| |
| if (cmdbuf->vk.state == MESA_VK_COMMAND_BUFFER_STATE_RECORDING && |
| cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) |
| ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist); |
| |
| vk_command_buffer_reset(&cmdbuf->vk); |
| |
| if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) |
| ID3D12CommandAllocator_Reset(cmdbuf->cmdalloc); |
| } |
| |
| static uint32_t |
| dzn_cmd_buffer_rtv_key_hash_function(const void *key) |
| { |
| return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_rtv_key)); |
| } |
| |
| static bool |
| dzn_cmd_buffer_rtv_key_equals_function(const void *a, const void *b) |
| { |
| return memcmp(a, b, sizeof(struct dzn_cmd_buffer_rtv_key)) == 0; |
| } |
| |
| static uint32_t |
| dzn_cmd_buffer_dsv_key_hash_function(const void *key) |
| { |
| return _mesa_hash_data(key, sizeof(struct dzn_cmd_buffer_dsv_key)); |
| } |
| |
| static bool |
| dzn_cmd_buffer_dsv_key_equals_function(const void *a, const void *b) |
| { |
| return memcmp(a, b, sizeof(struct dzn_cmd_buffer_dsv_key)) == 0; |
| } |
| |
| static const struct vk_command_buffer_ops cmd_buffer_ops = { |
| .destroy = dzn_cmd_buffer_destroy, |
| .reset = dzn_cmd_buffer_reset, |
| }; |
| |
| static const D3D12_BARRIER_SYNC cmd_buffer_valid_sync[] = { |
| [D3D12_COMMAND_LIST_TYPE_DIRECT] = ~(D3D12_BARRIER_SYNC_VIDEO_DECODE | |
| D3D12_BARRIER_SYNC_VIDEO_PROCESS | |
| D3D12_BARRIER_SYNC_VIDEO_ENCODE), |
| [D3D12_COMMAND_LIST_TYPE_COMPUTE] = (D3D12_BARRIER_SYNC_ALL | |
| D3D12_BARRIER_SYNC_COMPUTE_SHADING | |
| D3D12_BARRIER_SYNC_RAYTRACING | |
| D3D12_BARRIER_SYNC_COPY | |
| D3D12_BARRIER_SYNC_EXECUTE_INDIRECT | |
| D3D12_BARRIER_SYNC_PREDICATION | |
| D3D12_BARRIER_SYNC_ALL_SHADING | |
| D3D12_BARRIER_SYNC_NON_PIXEL_SHADING | |
| D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO | |
| D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW | |
| D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE | |
| D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE), |
| [D3D12_COMMAND_LIST_TYPE_COPY] = D3D12_BARRIER_SYNC_ALL | D3D12_BARRIER_SYNC_COPY |
| }; |
| static const D3D12_BARRIER_ACCESS cmd_buffer_valid_access[] = { |
| [D3D12_COMMAND_LIST_TYPE_DIRECT] = ~(D3D12_BARRIER_ACCESS_VIDEO_DECODE_READ | |
| D3D12_BARRIER_ACCESS_VIDEO_DECODE_WRITE | |
| D3D12_BARRIER_ACCESS_VIDEO_PROCESS_READ | |
| D3D12_BARRIER_ACCESS_VIDEO_PROCESS_WRITE | |
| D3D12_BARRIER_ACCESS_VIDEO_ENCODE_READ | |
| D3D12_BARRIER_ACCESS_VIDEO_ENCODE_WRITE), |
| [D3D12_COMMAND_LIST_TYPE_COMPUTE] = (D3D12_BARRIER_ACCESS_CONSTANT_BUFFER | |
| D3D12_BARRIER_ACCESS_UNORDERED_ACCESS | |
| D3D12_BARRIER_ACCESS_SHADER_RESOURCE | |
| D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT | |
| D3D12_BARRIER_ACCESS_PREDICATION | |
| D3D12_BARRIER_ACCESS_COPY_DEST | |
| D3D12_BARRIER_ACCESS_COPY_SOURCE | |
| D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ | |
| D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE), |
| [D3D12_COMMAND_LIST_TYPE_COPY] = D3D12_BARRIER_ACCESS_COPY_SOURCE | D3D12_BARRIER_ACCESS_COPY_DEST, |
| }; |
| |
| static VkResult |
| dzn_cmd_buffer_create(const VkCommandBufferAllocateInfo *info, |
| VkCommandBuffer *out) |
| { |
| VK_FROM_HANDLE(vk_command_pool, pool, info->commandPool); |
| struct dzn_device *device = container_of(pool->base.device, struct dzn_device, vk); |
| struct dzn_physical_device *pdev = |
| container_of(device->vk.physical, struct dzn_physical_device, vk); |
| |
| assert(pool->queue_family_index < pdev->queue_family_count); |
| |
| D3D12_COMMAND_LIST_TYPE type = |
| pdev->queue_families[pool->queue_family_index].desc.Type; |
| |
| struct dzn_cmd_buffer *cmdbuf = |
| vk_zalloc(&pool->alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!cmdbuf) |
| return vk_error(pool->base.device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| VkResult result = |
| vk_command_buffer_init(pool, &cmdbuf->vk, &cmd_buffer_ops, info->level); |
| if (result != VK_SUCCESS) { |
| vk_free(&pool->alloc, cmdbuf); |
| return result; |
| } |
| |
| memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); |
| cmdbuf->state.multiview.num_views = 1; |
| cmdbuf->state.multiview.view_mask = 1; |
| for (uint32_t bucket = 0; bucket < DZN_INTERNAL_BUF_BUCKET_COUNT; ++bucket) |
| list_inithead(&cmdbuf->internal_bufs[bucket]); |
| util_dynarray_init(&cmdbuf->events.signal, NULL); |
| util_dynarray_init(&cmdbuf->queries.reset, NULL); |
| util_dynarray_init(&cmdbuf->queries.signal, NULL); |
| dzn_descriptor_heap_pool_init(&cmdbuf->rtvs.pool, device, |
| D3D12_DESCRIPTOR_HEAP_TYPE_RTV, |
| false, &pool->alloc); |
| dzn_descriptor_heap_pool_init(&cmdbuf->dsvs.pool, device, |
| D3D12_DESCRIPTOR_HEAP_TYPE_DSV, |
| false, &pool->alloc); |
| dzn_descriptor_heap_pool_init(&cmdbuf->cbv_srv_uav_pool, device, |
| D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, |
| true, &pool->alloc); |
| dzn_descriptor_heap_pool_init(&cmdbuf->sampler_pool, device, |
| D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, |
| true, &pool->alloc); |
| |
| cmdbuf->events.ht = |
| _mesa_pointer_hash_table_create(NULL); |
| cmdbuf->queries.ht = |
| _mesa_pointer_hash_table_create(NULL); |
| cmdbuf->transition_barriers = |
| _mesa_pointer_hash_table_create(NULL); |
| cmdbuf->rtvs.ht = |
| _mesa_hash_table_create(NULL, |
| dzn_cmd_buffer_rtv_key_hash_function, |
| dzn_cmd_buffer_rtv_key_equals_function); |
| cmdbuf->dsvs.ht = |
| _mesa_hash_table_create(NULL, |
| dzn_cmd_buffer_dsv_key_hash_function, |
| dzn_cmd_buffer_dsv_key_equals_function); |
| if (!cmdbuf->events.ht || !cmdbuf->queries.ht || |
| !cmdbuf->transition_barriers || |
| !cmdbuf->rtvs.ht || !cmdbuf->dsvs.ht) { |
| result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| goto out; |
| } |
| |
| if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { |
| if (FAILED(ID3D12Device1_CreateCommandAllocator(device->dev, type, |
| &IID_ID3D12CommandAllocator, |
| (void **)&cmdbuf->cmdalloc))) { |
| result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| goto out; |
| } |
| |
| if (FAILED(ID3D12Device4_CreateCommandList1(device->dev, 0, type, |
| D3D12_COMMAND_LIST_FLAG_NONE, |
| &IID_ID3D12GraphicsCommandList1, |
| (void **)&cmdbuf->cmdlist))) { |
| result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| goto out; |
| } |
| |
| (void)ID3D12GraphicsCommandList_QueryInterface(cmdbuf->cmdlist, &IID_ID3D12GraphicsCommandList8, (void **)&cmdbuf->cmdlist8); |
| (void)ID3D12GraphicsCommandList_QueryInterface(cmdbuf->cmdlist, &IID_ID3D12GraphicsCommandList9, (void **)&cmdbuf->cmdlist9); |
| } |
| |
| cmdbuf->type = type; |
| cmdbuf->valid_sync = cmd_buffer_valid_sync[type]; |
| cmdbuf->valid_access = cmd_buffer_valid_access[type]; |
| cmdbuf->enhanced_barriers = pdev->options12.EnhancedBarriersSupported; |
| |
| out: |
| if (result != VK_SUCCESS) |
| dzn_cmd_buffer_destroy(&cmdbuf->vk); |
| else |
| *out = dzn_cmd_buffer_to_handle(cmdbuf); |
| |
| return result; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| dzn_AllocateCommandBuffers(VkDevice device, |
| const VkCommandBufferAllocateInfo *pAllocateInfo, |
| VkCommandBuffer *pCommandBuffers) |
| { |
| VK_FROM_HANDLE(dzn_device, dev, device); |
| VkResult result = VK_SUCCESS; |
| uint32_t i; |
| |
| for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { |
| result = dzn_cmd_buffer_create(pAllocateInfo, |
| &pCommandBuffers[i]); |
| if (result != VK_SUCCESS) |
| break; |
| } |
| |
| if (result != VK_SUCCESS) { |
| dev->vk.dispatch_table.FreeCommandBuffers(device, pAllocateInfo->commandPool, |
| i, pCommandBuffers); |
| for (i = 0; i < pAllocateInfo->commandBufferCount; i++) |
| pCommandBuffers[i] = VK_NULL_HANDLE; |
| } |
| |
| return result; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| dzn_BeginCommandBuffer(VkCommandBuffer commandBuffer, |
| const VkCommandBufferBeginInfo *info) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| vk_command_buffer_begin(&cmdbuf->vk, info); |
| if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) |
| ID3D12GraphicsCommandList1_Reset(cmdbuf->cmdlist, cmdbuf->cmdalloc, NULL); |
| return vk_command_buffer_get_record_result(&cmdbuf->vk); |
| } |
| |
| static void |
| dzn_cmd_buffer_gather_events(struct dzn_cmd_buffer *cmdbuf) |
| { |
| if (vk_command_buffer_has_error(&cmdbuf->vk)) |
| goto out; |
| |
| hash_table_foreach(cmdbuf->events.ht, he) { |
| enum dzn_event_state state = (uintptr_t)he->data; |
| |
| struct dzn_cmd_event_signal signal = { (struct dzn_event *)he->key, state == DZN_EVENT_STATE_SET }; |
| struct dzn_cmd_event_signal *entry = |
| util_dynarray_grow(&cmdbuf->events.signal, struct dzn_cmd_event_signal, 1); |
| |
| if (!entry) { |
| vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| break; |
| } |
| |
| *entry = signal; |
| } |
| |
| out: |
| _mesa_hash_table_clear(cmdbuf->events.ht, NULL); |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_dynbitset_reserve(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) |
| { |
| |
| if (bit < util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS) |
| return VK_SUCCESS; |
| |
| unsigned old_sz = array->size; |
| void *ptr = util_dynarray_grow(array, BITSET_WORD, (bit + BITSET_WORDBITS) / BITSET_WORDBITS); |
| if (!ptr) |
| return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| memset(ptr, 0, array->size - old_sz); |
| return VK_SUCCESS; |
| } |
| |
| static bool |
| dzn_cmd_buffer_dynbitset_test(struct util_dynarray *array, uint32_t bit) |
| { |
| uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS; |
| |
| if (bit < nbits) |
| return BITSET_TEST(util_dynarray_element(array, BITSET_WORD, 0), bit); |
| |
| return false; |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_dynbitset_set(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) |
| { |
| VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| BITSET_SET(util_dynarray_element(array, BITSET_WORD, 0), bit); |
| return VK_SUCCESS; |
| } |
| |
| static void |
| dzn_cmd_buffer_dynbitset_clear(struct dzn_cmd_buffer *cmdbuf, struct util_dynarray *array, uint32_t bit) |
| { |
| if (bit >= util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS) |
| return; |
| |
| BITSET_CLEAR(util_dynarray_element(array, BITSET_WORD, 0), bit); |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_dynbitset_set_range(struct dzn_cmd_buffer *cmdbuf, |
| struct util_dynarray *array, |
| uint32_t bit, uint32_t count) |
| { |
| VkResult result = dzn_cmd_buffer_dynbitset_reserve(cmdbuf, array, bit + count - 1); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| BITSET_SET_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + count - 1); |
| return VK_SUCCESS; |
| } |
| |
| static void |
| dzn_cmd_buffer_dynbitset_clear_range(struct dzn_cmd_buffer *cmdbuf, |
| struct util_dynarray *array, |
| uint32_t bit, uint32_t count) |
| { |
| uint32_t nbits = util_dynarray_num_elements(array, BITSET_WORD) * BITSET_WORDBITS; |
| |
| if (!nbits) |
| return; |
| |
| uint32_t end = MIN2(bit + count, nbits) - 1; |
| |
| while (bit <= end) { |
| uint32_t subcount = MIN2(end + 1 - bit, 32 - (bit % 32)); |
| BITSET_CLEAR_RANGE(util_dynarray_element(array, BITSET_WORD, 0), bit, bit + subcount - 1); |
| bit += subcount; |
| } |
| } |
| |
| static struct dzn_cmd_buffer_query_pool_state * |
| dzn_cmd_buffer_create_query_pool_state(struct dzn_cmd_buffer *cmdbuf) |
| { |
| struct dzn_cmd_buffer_query_pool_state *state = |
| vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*state), |
| 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!state) { |
| vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| return NULL; |
| } |
| |
| util_dynarray_init(&state->reset, NULL); |
| util_dynarray_init(&state->collect, NULL); |
| util_dynarray_init(&state->signal, NULL); |
| util_dynarray_init(&state->zero, NULL); |
| return state; |
| } |
| |
| static void |
| dzn_cmd_buffer_destroy_query_pool_state(struct dzn_cmd_buffer *cmdbuf, |
| struct dzn_cmd_buffer_query_pool_state *state) |
| { |
| util_dynarray_fini(&state->reset); |
| util_dynarray_fini(&state->collect); |
| util_dynarray_fini(&state->signal); |
| util_dynarray_fini(&state->zero); |
| vk_free(&cmdbuf->vk.pool->alloc, state); |
| } |
| |
| static struct dzn_cmd_buffer_query_pool_state * |
| dzn_cmd_buffer_get_query_pool_state(struct dzn_cmd_buffer *cmdbuf, |
| struct dzn_query_pool *qpool) |
| { |
| struct dzn_cmd_buffer_query_pool_state *state = NULL; |
| struct hash_entry *he = |
| _mesa_hash_table_search(cmdbuf->queries.ht, qpool); |
| |
| if (!he) { |
| state = dzn_cmd_buffer_create_query_pool_state(cmdbuf); |
| if (!state) |
| return NULL; |
| |
| he = _mesa_hash_table_insert(cmdbuf->queries.ht, qpool, state); |
| if (!he) { |
| dzn_cmd_buffer_destroy_query_pool_state(cmdbuf, state); |
| vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| return NULL; |
| } |
| } else { |
| state = he->data; |
| } |
| |
| return state; |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_collect_queries(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_query_pool *qpool, |
| struct dzn_cmd_buffer_query_pool_state *state, |
| uint32_t first_query, |
| uint32_t query_count) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| uint32_t nbits_collect = util_dynarray_num_elements(&state->collect, BITSET_WORD) * BITSET_WORDBITS; |
| uint32_t nbits_zero = util_dynarray_num_elements(&state->zero, BITSET_WORD) * BITSET_WORDBITS; |
| uint32_t start, end; |
| |
| if (!nbits_collect && !nbits_zero) |
| return VK_SUCCESS; |
| |
| query_count = MIN2(query_count, MAX2(nbits_collect, nbits_zero) - first_query); |
| nbits_collect = MIN2(first_query + query_count, nbits_collect); |
| nbits_zero = MIN2(first_query + query_count, nbits_zero); |
| |
| VkResult result = |
| dzn_cmd_buffer_dynbitset_reserve(cmdbuf, &state->signal, first_query + query_count - 1); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| if (cmdbuf->enhanced_barriers) { |
| /* A global barrier is used because both resolve_buffer and collect_buffer might have been |
| * copied from recently, and it's not worth the effort to track whether that's true. */ |
| dzn_cmd_buffer_global_barrier(cmdbuf, |
| D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY, |
| D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_ACCESS_COPY_DEST); |
| } else { |
| dzn_cmd_buffer_flush_transition_barriers(cmdbuf, qpool->resolve_buffer, 0, 1); |
| } |
| |
| /* Resolve the valid query regions into the resolve buffer */ |
| BITSET_WORD *collect = |
| util_dynarray_element(&state->collect, BITSET_WORD, 0); |
| |
| for (start = first_query, end = first_query, |
| __bitset_next_range(&start, &end, collect, nbits_collect); |
| start < nbits_collect; |
| __bitset_next_range(&start, &end, collect, nbits_collect)) { |
| ID3D12GraphicsCommandList1_ResolveQueryData(cmdbuf->cmdlist, |
| qpool->heap, |
| qpool->queries[start].type, |
| start, end - start, |
| qpool->resolve_buffer, |
| qpool->query_size * start); |
| } |
| |
| /* Zero out sections of the resolve buffer that contain queries for multi-view rendering |
| * for views other than the first one. */ |
| BITSET_WORD *zero = |
| util_dynarray_element(&state->zero, BITSET_WORD, 0); |
| const uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); |
| |
| for (start = first_query, end = first_query, |
| __bitset_next_range(&start, &end, zero, nbits_zero); |
| start < nbits_zero; |
| __bitset_next_range(&start, &end, zero, nbits_zero)) { |
| uint32_t count = end - start; |
| |
| for (unsigned i = 0; i < count; i += step) { |
| uint32_t sub_count = MIN2(step, count - i); |
| |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, |
| qpool->resolve_buffer, |
| dzn_query_pool_get_result_offset(qpool, start + i), |
| device->queries.refs, |
| DZN_QUERY_REFS_ALL_ZEROS_OFFSET, |
| qpool->query_size * sub_count); |
| } |
| } |
| |
| uint32_t offset = dzn_query_pool_get_result_offset(qpool, first_query); |
| uint32_t size = dzn_query_pool_get_result_size(qpool, query_count); |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_buffer_barrier(cmdbuf, |
| qpool->resolve_buffer, |
| D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY, |
| D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE); |
| } else { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer, |
| 0, 1, |
| D3D12_RESOURCE_STATE_COPY_DEST, |
| D3D12_RESOURCE_STATE_COPY_SOURCE, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, |
| qpool->collect_buffer, offset, |
| qpool->resolve_buffer, offset, |
| size); |
| |
| struct query_pass_data { |
| struct util_dynarray *dynarray; |
| BITSET_WORD *bitset; |
| uint32_t count; |
| } passes[] = { |
| { &state->collect, collect, nbits_collect }, |
| { &state->zero, zero, nbits_zero } |
| }; |
| for (uint32_t pass = 0; pass < ARRAY_SIZE(passes); ++pass) { |
| BITSET_WORD *bitset = passes[pass].bitset; |
| uint32_t nbits = passes[pass].count; |
| for (start = first_query, end = first_query, |
| __bitset_next_range(&start, &end, bitset, nbits); |
| start < nbits; |
| __bitset_next_range(&start, &end, bitset, nbits)) { |
| uint32_t step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); |
| uint32_t count = end - start; |
| |
| for (unsigned i = 0; i < count; i += step) { |
| uint32_t sub_count = MIN2(step, count - i); |
| |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, |
| qpool->collect_buffer, |
| dzn_query_pool_get_availability_offset(qpool, start + i), |
| device->queries.refs, |
| DZN_QUERY_REFS_ALL_ONES_OFFSET, |
| sizeof(uint64_t) * sub_count); |
| } |
| |
| dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->signal, start, count); |
| dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, passes[pass].dynarray, start, count); |
| } |
| } |
| |
| if (!cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->resolve_buffer, |
| 0, 1, |
| D3D12_RESOURCE_STATE_COPY_SOURCE, |
| D3D12_RESOURCE_STATE_COPY_DEST, |
| 0); |
| } |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_collect_query_ops(struct dzn_cmd_buffer *cmdbuf, |
| struct dzn_query_pool *qpool, |
| struct util_dynarray *bitset_array, |
| struct util_dynarray *ops_array) |
| { |
| BITSET_WORD *bitset = util_dynarray_element(bitset_array, BITSET_WORD, 0); |
| uint32_t nbits = util_dynarray_num_elements(bitset_array, BITSET_WORD) * BITSET_WORDBITS; |
| uint32_t start, end; |
| |
| BITSET_FOREACH_RANGE(start, end, bitset, nbits) { |
| struct dzn_cmd_buffer_query_range range = { qpool, start, end - start }; |
| struct dzn_cmd_buffer_query_range *entry = |
| util_dynarray_grow(ops_array, struct dzn_cmd_buffer_query_range, 1); |
| |
| if (!entry) |
| return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| *entry = range; |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_gather_queries(struct dzn_cmd_buffer *cmdbuf) |
| { |
| hash_table_foreach(cmdbuf->queries.ht, he) { |
| struct dzn_query_pool *qpool = (struct dzn_query_pool *)he->key; |
| struct dzn_cmd_buffer_query_pool_state *state = he->data; |
| VkResult result = |
| dzn_cmd_buffer_collect_queries(cmdbuf, qpool, state, 0, qpool->query_count); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->reset, &cmdbuf->queries.reset); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| result = dzn_cmd_buffer_collect_query_ops(cmdbuf, qpool, &state->signal, &cmdbuf->queries.signal); |
| if (result != VK_SUCCESS) |
| return result; |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR VkResult VKAPI_CALL |
| dzn_EndCommandBuffer(VkCommandBuffer commandBuffer) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { |
| dzn_cmd_buffer_gather_events(cmdbuf); |
| dzn_cmd_buffer_gather_queries(cmdbuf); |
| HRESULT hres = ID3D12GraphicsCommandList1_Close(cmdbuf->cmdlist); |
| if (FAILED(hres)) |
| vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| return vk_command_buffer_end(&cmdbuf->vk); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, |
| const VkDependencyInfo *info) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| bool execution_barrier = |
| !info->memoryBarrierCount && |
| !info->bufferMemoryBarrierCount && |
| !info->imageMemoryBarrierCount; |
| |
| if (execution_barrier) { |
| /* Execution barrier can be emulated with a NULL UAV barrier (AKA |
| * pipeline flush). That's the best we can do with the standard D3D12 |
| * barrier API. |
| */ |
| D3D12_RESOURCE_BARRIER barrier = { |
| .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, |
| .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, |
| .UAV = { .pResource = NULL }, |
| }; |
| |
| ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); |
| } |
| |
| /* Global memory barriers can be emulated with NULL UAV/Aliasing barriers. |
| * Scopes are not taken into account, but that's inherent to the current |
| * D3D12 barrier API. |
| */ |
| if (info->memoryBarrierCount) { |
| D3D12_RESOURCE_BARRIER barriers[2] = { 0 }; |
| |
| barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; |
| barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; |
| barriers[0].UAV.pResource = NULL; |
| barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; |
| barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; |
| barriers[1].Aliasing.pResourceBefore = NULL; |
| barriers[1].Aliasing.pResourceAfter = NULL; |
| ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 2, barriers); |
| } |
| |
| for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) { |
| VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer); |
| D3D12_RESOURCE_BARRIER barrier = { 0 }; |
| |
| /* UAV are used only for storage buffers, skip all other buffers. */ |
| if (!(buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)) |
| continue; |
| |
| barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; |
| barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; |
| barrier.UAV.pResource = buf->res; |
| ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); |
| } |
| |
| for (uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) { |
| const VkImageMemoryBarrier2 *ibarrier = &info->pImageMemoryBarriers[i]; |
| const VkImageSubresourceRange *range = &ibarrier->subresourceRange; |
| VK_FROM_HANDLE(dzn_image, image, ibarrier->image); |
| |
| VkImageLayout old_layout = ibarrier->oldLayout; |
| VkImageLayout new_layout = ibarrier->newLayout; |
| if ((image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && |
| old_layout == VK_IMAGE_LAYOUT_GENERAL && |
| (ibarrier->srcAccessMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) |
| old_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; |
| if ((image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && |
| new_layout == VK_IMAGE_LAYOUT_GENERAL && |
| (ibarrier->dstAccessMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) |
| new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, |
| old_layout, |
| new_layout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| |
| /* A straightforward translation of the Vulkan sync flags to D3D sync flags */ |
| static D3D12_BARRIER_SYNC |
| translate_sync(VkPipelineStageFlags2 flags, bool before) |
| { |
| if (!before && (flags & VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) |
| return D3D12_BARRIER_SYNC_ALL; |
| else if (before && (flags & VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)) |
| return D3D12_BARRIER_SYNC_ALL; |
| |
| if (flags & (VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT | |
| /* Theoretically transfer should be less, but it encompasses blit |
| * (which can be draws) and clears, so bloat it up to everything. */ |
| VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | |
| VK_PIPELINE_STAGE_2_BLIT_BIT)) |
| return D3D12_BARRIER_SYNC_ALL; |
| |
| D3D12_BARRIER_SYNC ret = D3D12_BARRIER_SYNC_NONE; |
| if (flags & (VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | |
| VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT | |
| VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT)) |
| ret |= D3D12_BARRIER_SYNC_INDEX_INPUT; |
| if (flags & VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT) |
| ret |= D3D12_BARRIER_SYNC_VERTEX_SHADING; |
| if (flags & (VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | |
| VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | |
| VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | |
| VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT | |
| VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT | |
| VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | |
| VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT)) |
| ret |= D3D12_BARRIER_SYNC_NON_PIXEL_SHADING; |
| if (flags & (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | |
| VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)) |
| ret |= D3D12_BARRIER_SYNC_PIXEL_SHADING; |
| if (flags & (VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | |
| VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT)) |
| ret |= D3D12_BARRIER_SYNC_DEPTH_STENCIL; |
| if (flags & VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT) |
| ret |= D3D12_BARRIER_SYNC_RENDER_TARGET; |
| if (flags & VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT) |
| ret |= D3D12_BARRIER_SYNC_COMPUTE_SHADING; |
| if (flags & VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT) |
| ret |= D3D12_BARRIER_SYNC_DRAW; |
| if (flags & VK_PIPELINE_STAGE_2_COPY_BIT) |
| ret |= D3D12_BARRIER_SYNC_COPY; |
| if (flags & VK_PIPELINE_STAGE_2_RESOLVE_BIT) |
| ret |= D3D12_BARRIER_SYNC_RESOLVE; |
| if (flags & VK_PIPELINE_STAGE_2_CLEAR_BIT) |
| ret |= D3D12_BARRIER_SYNC_RENDER_TARGET | |
| D3D12_BARRIER_SYNC_DEPTH_STENCIL | |
| D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW; |
| if (flags & VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT) |
| ret |= D3D12_BARRIER_SYNC_PREDICATION; |
| if (flags & (VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | |
| VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV)) |
| ret |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT; |
| if (flags & VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR) |
| ret |= D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE; |
| if (flags & VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR) |
| ret |= D3D12_BARRIER_SYNC_RAYTRACING; |
| if (flags & VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR) |
| ret |= D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE; |
| |
| return ret; |
| } |
| |
| /* A straightforward translation of Vulkan access to D3D access */ |
| static D3D12_BARRIER_ACCESS |
| translate_access(VkAccessFlags2 flags) |
| { |
| D3D12_BARRIER_ACCESS ret = D3D12_BARRIER_ACCESS_COMMON; |
| if (flags & VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT) |
| ret |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT; |
| if (flags & VK_ACCESS_2_INDEX_READ_BIT) |
| ret |= D3D12_BARRIER_ACCESS_INDEX_BUFFER; |
| if (flags & VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT) |
| ret |= D3D12_BARRIER_ACCESS_VERTEX_BUFFER; |
| if (flags & VK_ACCESS_2_UNIFORM_READ_BIT) |
| ret |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER; |
| if (flags & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | |
| VK_ACCESS_2_SHADER_SAMPLED_READ_BIT)) |
| ret |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE; |
| if (flags & VK_ACCESS_2_SHADER_READ_BIT) |
| ret |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER | |
| D3D12_BARRIER_ACCESS_SHADER_RESOURCE | |
| D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; |
| if (flags & (VK_ACCESS_2_SHADER_WRITE_BIT | |
| VK_ACCESS_2_SHADER_STORAGE_READ_BIT | |
| VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT)) |
| ret |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; |
| if (flags & VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT) |
| ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET | |
| D3D12_BARRIER_ACCESS_RESOLVE_SOURCE; |
| if (flags & VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT) |
| ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET | |
| D3D12_BARRIER_ACCESS_RESOLVE_DEST; |
| if (flags & VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT) |
| ret |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ; |
| if (flags & VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT) |
| ret |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE; |
| if (flags & VK_ACCESS_2_TRANSFER_READ_BIT) |
| ret |= D3D12_BARRIER_ACCESS_COPY_SOURCE | |
| D3D12_BARRIER_ACCESS_RESOLVE_SOURCE; |
| if (flags & VK_ACCESS_2_TRANSFER_WRITE_BIT) |
| ret |= D3D12_BARRIER_ACCESS_RENDER_TARGET | |
| D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE | |
| D3D12_BARRIER_ACCESS_UNORDERED_ACCESS | |
| D3D12_BARRIER_ACCESS_COPY_DEST | |
| D3D12_BARRIER_ACCESS_RESOLVE_DEST; |
| if (flags & VK_ACCESS_2_MEMORY_READ_BIT) |
| ret |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT | |
| D3D12_BARRIER_ACCESS_INDEX_BUFFER | |
| D3D12_BARRIER_ACCESS_VERTEX_BUFFER | |
| D3D12_BARRIER_ACCESS_CONSTANT_BUFFER | |
| D3D12_BARRIER_ACCESS_SHADER_RESOURCE | |
| D3D12_BARRIER_ACCESS_UNORDERED_ACCESS | |
| D3D12_BARRIER_ACCESS_RENDER_TARGET | |
| D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ | |
| D3D12_BARRIER_ACCESS_COPY_SOURCE | |
| D3D12_BARRIER_ACCESS_RESOLVE_SOURCE; |
| if (flags & VK_ACCESS_2_MEMORY_WRITE_BIT) |
| ret |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS | |
| D3D12_BARRIER_ACCESS_RENDER_TARGET | |
| D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE | |
| D3D12_BARRIER_ACCESS_COPY_DEST | |
| D3D12_BARRIER_ACCESS_RESOLVE_DEST; |
| if (flags & (VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | |
| VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT | |
| VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT)) |
| ret |= D3D12_BARRIER_ACCESS_STREAM_OUTPUT; |
| if (flags & VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT) |
| ret |= D3D12_BARRIER_ACCESS_PREDICATION; |
| if (flags & VK_ACCESS_2_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR) |
| ret |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE; |
| if (flags & VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR) |
| ret |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ; |
| if (flags & VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR) |
| ret |= D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE; |
| return ret; |
| } |
| |
| /* For texture barriers, D3D will validate that the access flags used are actually |
| * things that were valid for the specified layout. Use the mask returned from here |
| * to scope down the set of app-provided access flags to make validation happy. */ |
| static D3D12_BARRIER_ACCESS |
| valid_access_for_layout(D3D12_BARRIER_LAYOUT layout) |
| { |
| switch (layout) { |
| case D3D12_BARRIER_LAYOUT_UNDEFINED: |
| return D3D12_BARRIER_ACCESS_NO_ACCESS; |
| case D3D12_BARRIER_LAYOUT_COMMON: |
| return D3D12_BARRIER_ACCESS_SHADER_RESOURCE | |
| D3D12_BARRIER_ACCESS_COPY_SOURCE | |
| D3D12_BARRIER_ACCESS_COPY_DEST; |
| case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON: |
| case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON: |
| return D3D12_BARRIER_ACCESS_SHADER_RESOURCE | |
| D3D12_BARRIER_ACCESS_COPY_SOURCE | |
| D3D12_BARRIER_ACCESS_COPY_DEST | |
| D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; |
| case D3D12_BARRIER_LAYOUT_GENERIC_READ: |
| case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ: |
| return D3D12_BARRIER_ACCESS_SHADER_RESOURCE | |
| D3D12_BARRIER_ACCESS_COPY_SOURCE | |
| D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ | |
| D3D12_BARRIER_ACCESS_RESOLVE_SOURCE | |
| D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE; |
| case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ: |
| return D3D12_BARRIER_ACCESS_SHADER_RESOURCE| |
| D3D12_BARRIER_ACCESS_COPY_SOURCE; |
| case D3D12_BARRIER_LAYOUT_RENDER_TARGET: |
| return D3D12_BARRIER_ACCESS_RENDER_TARGET; |
| case D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS: |
| case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS: |
| case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS: |
| return D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; |
| case D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE: |
| return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE; |
| case D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ: |
| return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ; |
| case D3D12_BARRIER_LAYOUT_SHADER_RESOURCE: |
| case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE: |
| case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE: |
| return D3D12_BARRIER_ACCESS_SHADER_RESOURCE; |
| case D3D12_BARRIER_LAYOUT_COPY_SOURCE: |
| case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE: |
| case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE: |
| return D3D12_BARRIER_ACCESS_COPY_SOURCE; |
| case D3D12_BARRIER_LAYOUT_COPY_DEST: |
| case D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST: |
| case D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST: |
| return D3D12_BARRIER_ACCESS_COPY_DEST; |
| case D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE: |
| return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE; |
| case D3D12_BARRIER_LAYOUT_RESOLVE_DEST: |
| return D3D12_BARRIER_ACCESS_RESOLVE_DEST; |
| case D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE: |
| return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE; |
| default: |
| return D3D12_BARRIER_ACCESS_COMMON; |
| } |
| } |
| |
| /* Similar to layout -> access, there's access -> sync validation too. D3D |
| * doesn't like over-synchronizing if you weren't accessing a resource through |
| * a relevant access bit. */ |
| static D3D12_BARRIER_SYNC |
| adjust_sync_for_access(D3D12_BARRIER_SYNC in, D3D12_BARRIER_ACCESS access) |
| { |
| /* NO_ACCESS must not add sync */ |
| if (access == D3D12_BARRIER_ACCESS_NO_ACCESS) |
| return D3D12_BARRIER_SYNC_NONE; |
| /* SYNC_ALL can be used with any access bits */ |
| if (in == D3D12_BARRIER_SYNC_ALL) |
| return in; |
| /* ACCESS_COMMON needs at least one sync bit */ |
| if (access == D3D12_BARRIER_ACCESS_COMMON) |
| return in == D3D12_BARRIER_SYNC_NONE ? D3D12_BARRIER_SYNC_ALL : in; |
| |
| D3D12_BARRIER_SYNC out = D3D12_BARRIER_SYNC_NONE; |
| if (access & D3D12_BARRIER_ACCESS_VERTEX_BUFFER) |
| out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING | |
| D3D12_BARRIER_SYNC_DRAW | |
| D3D12_BARRIER_SYNC_ALL_SHADING | |
| D3D12_BARRIER_SYNC_NON_PIXEL_SHADING); |
| if (access & D3D12_BARRIER_ACCESS_CONSTANT_BUFFER) |
| out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING | |
| D3D12_BARRIER_SYNC_PIXEL_SHADING | |
| D3D12_BARRIER_SYNC_COMPUTE_SHADING | |
| D3D12_BARRIER_SYNC_DRAW | |
| D3D12_BARRIER_SYNC_ALL_SHADING | |
| D3D12_BARRIER_SYNC_NON_PIXEL_SHADING); |
| if (access & D3D12_BARRIER_ACCESS_INDEX_BUFFER) |
| out |= in & D3D12_BARRIER_SYNC_INDEX_INPUT; |
| if (access & D3D12_BARRIER_ACCESS_RENDER_TARGET) |
| out |= in & D3D12_BARRIER_SYNC_RENDER_TARGET; |
| if (access & D3D12_BARRIER_ACCESS_UNORDERED_ACCESS) |
| out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING | |
| D3D12_BARRIER_SYNC_PIXEL_SHADING | |
| D3D12_BARRIER_SYNC_COMPUTE_SHADING | |
| D3D12_BARRIER_SYNC_DRAW | |
| D3D12_BARRIER_SYNC_ALL_SHADING | |
| D3D12_BARRIER_SYNC_NON_PIXEL_SHADING); |
| if (access & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE) |
| out |= in & (D3D12_BARRIER_SYNC_DRAW | |
| D3D12_BARRIER_SYNC_DEPTH_STENCIL); |
| if (access & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ) |
| out |= in & (D3D12_BARRIER_SYNC_DRAW | |
| D3D12_BARRIER_SYNC_DEPTH_STENCIL); |
| if (access & D3D12_BARRIER_ACCESS_SHADER_RESOURCE) |
| out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING | |
| D3D12_BARRIER_SYNC_PIXEL_SHADING | |
| D3D12_BARRIER_SYNC_COMPUTE_SHADING | |
| D3D12_BARRIER_SYNC_DRAW | |
| D3D12_BARRIER_SYNC_ALL_SHADING | |
| D3D12_BARRIER_SYNC_NON_PIXEL_SHADING); |
| if (access & D3D12_BARRIER_ACCESS_STREAM_OUTPUT) |
| out |= in & (D3D12_BARRIER_SYNC_VERTEX_SHADING | |
| D3D12_BARRIER_SYNC_DRAW | |
| D3D12_BARRIER_SYNC_ALL_SHADING | |
| D3D12_BARRIER_SYNC_NON_PIXEL_SHADING); |
| if (access & D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT) |
| out |= in & (D3D12_BARRIER_SYNC_DRAW | |
| D3D12_BARRIER_SYNC_EXECUTE_INDIRECT); |
| if (access & D3D12_BARRIER_ACCESS_PREDICATION) |
| out |= in & (D3D12_BARRIER_SYNC_DRAW | |
| D3D12_BARRIER_SYNC_EXECUTE_INDIRECT); |
| if (access & (D3D12_BARRIER_ACCESS_COPY_DEST | D3D12_BARRIER_ACCESS_COPY_SOURCE)) |
| out |= in & D3D12_BARRIER_SYNC_COPY; |
| if (access & (D3D12_BARRIER_ACCESS_RESOLVE_DEST | D3D12_BARRIER_ACCESS_RESOLVE_SOURCE)) |
| out |= in & D3D12_BARRIER_SYNC_RESOLVE; |
| if (access & D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ) |
| out |= in & (D3D12_BARRIER_SYNC_COMPUTE_SHADING | |
| D3D12_BARRIER_SYNC_RAYTRACING | |
| D3D12_BARRIER_SYNC_ALL_SHADING | |
| D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE | |
| D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE | |
| D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO | |
| D3D12_BARRIER_SYNC_NON_PIXEL_SHADING); |
| if (access & D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE) |
| out |= in & (D3D12_BARRIER_SYNC_COMPUTE_SHADING | |
| D3D12_BARRIER_SYNC_RAYTRACING | |
| D3D12_BARRIER_SYNC_ALL_SHADING | |
| D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE | |
| D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE | |
| D3D12_BARRIER_SYNC_NON_PIXEL_SHADING); |
| if (access & D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE) |
| out |= in & (D3D12_BARRIER_SYNC_PIXEL_SHADING | |
| D3D12_BARRIER_SYNC_ALL_SHADING); |
| /* SYNC_NONE means it won't be accessed, so if we can't express the app's original intent |
| * here, then be conservative and over-sync. */ |
| return out ? out : D3D12_BARRIER_SYNC_ALL; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdPipelineBarrier2_enhanced(VkCommandBuffer commandBuffer, |
| const VkDependencyInfo *info) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| uint32_t num_barrier_groups = 0; |
| D3D12_BARRIER_GROUP groups[3]; |
| |
| /* Some input image barriers will expand into 2 outputs, and some will turn into buffer barriers. |
| * Do a first pass and count how much we need to allocate. */ |
| uint32_t num_image_barriers = 0; |
| uint32_t num_buffer_barriers = info->bufferMemoryBarrierCount; |
| for (uint32_t i = 0; i < info->imageMemoryBarrierCount; ++i) { |
| VK_FROM_HANDLE(dzn_image, image, info->pImageMemoryBarriers[i].image); |
| bool need_separate_aspect_barriers = |
| info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL || |
| info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL || |
| info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL || |
| info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL; |
| if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) |
| ++num_buffer_barriers; |
| else |
| num_image_barriers += need_separate_aspect_barriers ? 2 : 1; |
| } |
| |
| VK_MULTIALLOC(ma); |
| VK_MULTIALLOC_DECL(&ma, D3D12_GLOBAL_BARRIER, global_barriers, info->memoryBarrierCount); |
| VK_MULTIALLOC_DECL(&ma, D3D12_BUFFER_BARRIER, buffer_barriers, num_buffer_barriers); |
| VK_MULTIALLOC_DECL(&ma, D3D12_TEXTURE_BARRIER, texture_barriers, num_image_barriers); |
| |
| if (ma.size == 0) |
| return; |
| |
| if (!vk_multialloc_alloc(&ma, &cmdbuf->vk.pool->alloc, |
| VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) { |
| vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| return; |
| } |
| |
| if (info->memoryBarrierCount) { |
| groups[num_barrier_groups].NumBarriers = info->memoryBarrierCount; |
| groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_GLOBAL; |
| groups[num_barrier_groups].pGlobalBarriers = global_barriers; |
| ++num_barrier_groups; |
| for (uint32_t i = 0; i < info->memoryBarrierCount; ++i) { |
| global_barriers[i].SyncBefore = translate_sync(info->pMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync; |
| global_barriers[i].SyncAfter = translate_sync(info->pMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync; |
| global_barriers[i].AccessBefore = global_barriers[i].SyncBefore == D3D12_BARRIER_SYNC_NONE ? |
| D3D12_BARRIER_ACCESS_NO_ACCESS : |
| translate_access(info->pMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access; |
| global_barriers[i].AccessAfter = global_barriers[i].SyncAfter == D3D12_BARRIER_SYNC_NONE ? |
| D3D12_BARRIER_ACCESS_NO_ACCESS : |
| translate_access(info->pMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access; |
| |
| if ((global_barriers[i].AccessBefore & D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE) && |
| (global_barriers[i].AccessAfter == D3D12_BARRIER_ACCESS_COMMON || |
| global_barriers[i].AccessAfter & ~(D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ))) { |
| /* D3D validates against a global barrier attempting to transition from depth write to something other than depth write, |
| * but this is a D3D bug; it's absolutely valid to use a global barrier to transition *multiple* types of accesses. |
| * The validation does say that you'd need an image barrier to actually get that kind of transition, which is still correct, |
| * so just remove this bit under the assumption that a dedicated image barrier will be submitted to do any necessary work later. */ |
| global_barriers[i].AccessBefore &= ~D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE; |
| } |
| if (global_barriers[i].AccessBefore == D3D12_BARRIER_ACCESS_COMMON) |
| global_barriers[i].AccessAfter = D3D12_BARRIER_ACCESS_COMMON; |
| global_barriers[i].SyncBefore = adjust_sync_for_access(global_barriers[i].SyncBefore, global_barriers[i].AccessBefore); |
| global_barriers[i].SyncAfter = adjust_sync_for_access(global_barriers[i].SyncAfter, global_barriers[i].AccessAfter); |
| } |
| } |
| |
| if (num_buffer_barriers) { |
| groups[num_barrier_groups].NumBarriers = num_buffer_barriers; |
| groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_BUFFER; |
| groups[num_barrier_groups].pBufferBarriers = buffer_barriers; |
| ++num_barrier_groups; |
| for (uint32_t i = 0; i < info->bufferMemoryBarrierCount; ++i) { |
| VK_FROM_HANDLE(dzn_buffer, buf, info->pBufferMemoryBarriers[i].buffer); |
| buffer_barriers[i].SyncBefore = translate_sync(info->pBufferMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync; |
| buffer_barriers[i].SyncAfter = translate_sync(info->pBufferMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync; |
| buffer_barriers[i].AccessBefore = buffer_barriers[i].SyncBefore == D3D12_BARRIER_SYNC_NONE ? |
| D3D12_BARRIER_ACCESS_NO_ACCESS : |
| translate_access(info->pBufferMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access & buf->valid_access; |
| buffer_barriers[i].AccessAfter = buffer_barriers[i].SyncAfter == D3D12_BARRIER_SYNC_NONE ? |
| D3D12_BARRIER_ACCESS_NO_ACCESS : |
| translate_access(info->pBufferMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access & buf->valid_access; |
| buffer_barriers[i].SyncBefore = adjust_sync_for_access(buffer_barriers[i].SyncBefore, buffer_barriers[i].AccessBefore); |
| buffer_barriers[i].SyncAfter = adjust_sync_for_access(buffer_barriers[i].SyncAfter, buffer_barriers[i].AccessAfter); |
| buffer_barriers[i].pResource = buf->res; |
| buffer_barriers[i].Offset = 0; |
| buffer_barriers[i].Size = UINT64_MAX; |
| } |
| } |
| |
| if (num_image_barriers) { |
| groups[num_barrier_groups].Type = D3D12_BARRIER_TYPE_TEXTURE; |
| groups[num_barrier_groups].pTextureBarriers = texture_barriers; |
| groups[num_barrier_groups].NumBarriers = num_image_barriers; |
| ++num_barrier_groups; |
| } |
| |
| uint32_t tbar = 0; |
| uint32_t bbar = info->bufferMemoryBarrierCount; |
| for (uint32_t i = 0; i < info->imageMemoryBarrierCount; ++i) { |
| VK_FROM_HANDLE(dzn_image, image, info->pImageMemoryBarriers[i].image); |
| |
| if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) { |
| /* Barriers on linear images turn into buffer barriers */ |
| buffer_barriers[bbar].SyncBefore = translate_sync(info->pImageMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync; |
| buffer_barriers[bbar].SyncAfter = translate_sync(info->pImageMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync; |
| buffer_barriers[bbar].AccessBefore = buffer_barriers[bbar].SyncBefore == D3D12_BARRIER_SYNC_NONE ? |
| D3D12_BARRIER_ACCESS_NO_ACCESS : |
| translate_access(info->pImageMemoryBarriers[i].srcAccessMask) & cmdbuf->valid_access & image->valid_access; |
| buffer_barriers[bbar].AccessAfter = buffer_barriers[bbar].SyncAfter == D3D12_BARRIER_SYNC_NONE ? |
| D3D12_BARRIER_ACCESS_NO_ACCESS : |
| translate_access(info->pImageMemoryBarriers[i].dstAccessMask) & cmdbuf->valid_access & image->valid_access; |
| buffer_barriers[bbar].SyncBefore = adjust_sync_for_access(buffer_barriers[bbar].SyncBefore, buffer_barriers[bbar].AccessBefore); |
| buffer_barriers[bbar].SyncAfter = adjust_sync_for_access(buffer_barriers[bbar].SyncAfter, buffer_barriers[bbar].AccessAfter); |
| buffer_barriers[bbar].pResource = image->res; |
| buffer_barriers[bbar].Offset = 0; |
| buffer_barriers[bbar].Size = UINT64_MAX; |
| ++bbar; |
| continue; |
| } |
| |
| const VkImageSubresourceRange *range = &info->pImageMemoryBarriers[i].subresourceRange; |
| const bool simultaneous_access = image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; |
| |
| bool need_separate_aspect_barriers = |
| info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL || |
| info->pImageMemoryBarriers[i].oldLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL || |
| info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL || |
| info->pImageMemoryBarriers[i].newLayout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL; |
| uint32_t num_aspects = need_separate_aspect_barriers ? 2 : 1; |
| VkImageAspectFlags aspect_0_mask = need_separate_aspect_barriers ? |
| (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) : VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM; |
| VkImageAspectFlags aspects[] = { |
| range->aspectMask & aspect_0_mask, |
| range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT, |
| }; |
| |
| for (uint32_t aspect_idx = 0; aspect_idx < num_aspects; ++aspect_idx) { |
| VkImageAspectFlags aspect = aspects[aspect_idx]; |
| texture_barriers[tbar].SyncBefore = translate_sync(info->pImageMemoryBarriers[i].srcStageMask, true) & cmdbuf->valid_sync; |
| texture_barriers[tbar].SyncAfter = translate_sync(info->pImageMemoryBarriers[i].dstStageMask, false) & cmdbuf->valid_sync; |
| const bool queue_ownership_transfer = info->pImageMemoryBarriers[i].srcQueueFamilyIndex != info->pImageMemoryBarriers[i].dstQueueFamilyIndex; |
| D3D12_BARRIER_ACCESS layout_before_valid_access = ~0; |
| D3D12_BARRIER_ACCESS layout_after_valid_access = ~0; |
| if (simultaneous_access) { |
| /* Simultaneous access textures never perform layout transitions, and can do any type of access from COMMON layout */ |
| texture_barriers[tbar].LayoutAfter = texture_barriers[tbar].LayoutBefore = D3D12_BARRIER_LAYOUT_UNDEFINED; |
| } else if (queue_ownership_transfer) { |
| /* For an ownership transfer, force the foreign layout to COMMON and the matching sync/access to NONE */ |
| assert(info->pImageMemoryBarriers[i].srcQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED); |
| assert(info->pImageMemoryBarriers[i].dstQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED); |
| const bool is_release = info->pImageMemoryBarriers[i].srcQueueFamilyIndex == cmdbuf->vk.pool->queue_family_index; |
| const bool is_acquire = info->pImageMemoryBarriers[i].dstQueueFamilyIndex == cmdbuf->vk.pool->queue_family_index; |
| assert(is_release ^ is_acquire); |
| texture_barriers[tbar].LayoutBefore = is_acquire ? |
| D3D12_BARRIER_LAYOUT_COMMON : dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].oldLayout, cmdbuf->type, aspect); |
| texture_barriers[tbar].LayoutAfter = is_release ? |
| D3D12_BARRIER_LAYOUT_COMMON : dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].newLayout, cmdbuf->type, aspect); |
| if (is_acquire) { |
| texture_barriers[tbar].SyncBefore = D3D12_BARRIER_SYNC_NONE; |
| texture_barriers[tbar].AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS; |
| layout_after_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutAfter); |
| } else { |
| texture_barriers[tbar].SyncAfter = D3D12_BARRIER_SYNC_NONE; |
| texture_barriers[tbar].AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS; |
| layout_before_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutBefore); |
| } |
| } else { |
| texture_barriers[tbar].LayoutBefore = dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].oldLayout, cmdbuf->type, aspect); |
| texture_barriers[tbar].LayoutAfter = dzn_vk_layout_to_d3d_layout(info->pImageMemoryBarriers[i].newLayout, cmdbuf->type, aspect); |
| layout_before_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutBefore); |
| layout_after_valid_access = valid_access_for_layout(texture_barriers[tbar].LayoutAfter); |
| } |
| |
| texture_barriers[tbar].AccessBefore = texture_barriers[tbar].SyncBefore == D3D12_BARRIER_SYNC_NONE || |
| texture_barriers[tbar].LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED ? |
| D3D12_BARRIER_ACCESS_NO_ACCESS : |
| translate_access(info->pImageMemoryBarriers[i].srcAccessMask) & |
| cmdbuf->valid_access & image->valid_access & layout_before_valid_access; |
| texture_barriers[tbar].AccessAfter = texture_barriers[tbar].SyncAfter == D3D12_BARRIER_SYNC_NONE ? |
| D3D12_BARRIER_ACCESS_NO_ACCESS : |
| translate_access(info->pImageMemoryBarriers[i].dstAccessMask) & |
| cmdbuf->valid_access & image->valid_access & layout_after_valid_access; |
| |
| texture_barriers[tbar].SyncBefore = adjust_sync_for_access(texture_barriers[tbar].SyncBefore, texture_barriers[tbar].AccessBefore); |
| texture_barriers[tbar].SyncAfter = adjust_sync_for_access(texture_barriers[tbar].SyncAfter, texture_barriers[tbar].AccessAfter); |
| texture_barriers[tbar].Subresources.FirstArraySlice = range->baseArrayLayer; |
| texture_barriers[tbar].Subresources.NumArraySlices = dzn_get_layer_count(image, range); |
| texture_barriers[tbar].Subresources.IndexOrFirstMipLevel = range->baseMipLevel; |
| texture_barriers[tbar].Subresources.NumMipLevels = dzn_get_level_count(image, range); |
| texture_barriers[tbar].Subresources.FirstPlane = aspect_idx; |
| texture_barriers[tbar].Subresources.NumPlanes = util_bitcount(aspect); |
| texture_barriers[tbar].pResource = image->res; |
| texture_barriers[tbar].Flags = D3D12_TEXTURE_BARRIER_FLAG_NONE; |
| if (texture_barriers[tbar].LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED) |
| texture_barriers[tbar].Flags |= D3D12_TEXTURE_BARRIER_FLAG_DISCARD; |
| ++tbar; |
| } |
| } |
| assert(bbar == num_buffer_barriers); |
| assert(tbar == num_image_barriers); |
| |
| ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, num_barrier_groups, groups); |
| |
| vk_free(&cmdbuf->vk.pool->alloc, global_barriers); |
| } |
| |
| static D3D12_CPU_DESCRIPTOR_HANDLE |
| dzn_cmd_buffer_get_dsv(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| const D3D12_DEPTH_STENCIL_VIEW_DESC *desc) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| struct dzn_cmd_buffer_dsv_key key = { image, *desc }; |
| struct hash_entry *he = _mesa_hash_table_search(cmdbuf->dsvs.ht, &key); |
| struct dzn_cmd_buffer_dsv_entry *dsve; |
| |
| if (!he) { |
| struct dzn_descriptor_heap *heap; |
| uint32_t slot; |
| |
| // TODO: error handling |
| dsve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*dsve), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| dsve->key = key; |
| dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->dsvs.pool, device, 1, &heap, &slot); |
| dsve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot); |
| ID3D12Device1_CreateDepthStencilView(device->dev, image->res, desc, dsve->handle); |
| _mesa_hash_table_insert(cmdbuf->dsvs.ht, &dsve->key, dsve); |
| } else { |
| dsve = he->data; |
| } |
| |
| return dsve->handle; |
| } |
| |
| static D3D12_CPU_DESCRIPTOR_HANDLE |
| dzn_cmd_buffer_get_rtv(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| const D3D12_RENDER_TARGET_VIEW_DESC *desc) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| struct dzn_cmd_buffer_rtv_key key = { image, *desc }; |
| struct hash_entry *he = _mesa_hash_table_search(cmdbuf->rtvs.ht, &key); |
| struct dzn_cmd_buffer_rtv_entry *rtve; |
| |
| if (!he) { |
| struct dzn_descriptor_heap *heap; |
| uint32_t slot; |
| |
| // TODO: error handling |
| rtve = vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*rtve), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| rtve->key = key; |
| dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot); |
| rtve->handle = dzn_descriptor_heap_get_cpu_handle(heap, slot); |
| ID3D12Device1_CreateRenderTargetView(device->dev, image->res, desc, rtve->handle); |
| he = _mesa_hash_table_insert(cmdbuf->rtvs.ht, &rtve->key, rtve); |
| } else { |
| rtve = he->data; |
| } |
| |
| return rtve->handle; |
| } |
| |
| static D3D12_CPU_DESCRIPTOR_HANDLE |
| dzn_cmd_buffer_get_null_rtv(struct dzn_cmd_buffer *cmdbuf) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| |
| if (!cmdbuf->null_rtv.ptr) { |
| struct dzn_descriptor_heap *heap; |
| uint32_t slot; |
| dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->rtvs.pool, device, 1, &heap, &slot); |
| cmdbuf->null_rtv = dzn_descriptor_heap_get_cpu_handle(heap, slot); |
| |
| D3D12_RENDER_TARGET_VIEW_DESC desc = { 0 }; |
| desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; |
| desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; |
| desc.Texture2D.MipSlice = 0; |
| desc.Texture2D.PlaneSlice = 0; |
| |
| ID3D12Device1_CreateRenderTargetView(device->dev, NULL, &desc, cmdbuf->null_rtv); |
| } |
| |
| return cmdbuf->null_rtv; |
| } |
| |
| static D3D12_HEAP_TYPE |
| heap_type_for_bucket(enum dzn_internal_buf_bucket bucket) |
| { |
| switch (bucket) { |
| case DZN_INTERNAL_BUF_UPLOAD: return D3D12_HEAP_TYPE_UPLOAD; |
| case DZN_INTERNAL_BUF_DEFAULT: return D3D12_HEAP_TYPE_DEFAULT; |
| default: unreachable("Invalid value"); |
| } |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_alloc_internal_buf(struct dzn_cmd_buffer *cmdbuf, |
| uint32_t size, |
| enum dzn_internal_buf_bucket bucket, |
| D3D12_RESOURCE_STATES init_state, |
| uint64_t align, |
| ID3D12Resource **out, |
| uint64_t *offset) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| ID3D12Resource *res; |
| *out = NULL; |
| D3D12_HEAP_TYPE heap_type = heap_type_for_bucket(bucket); |
| |
| if (bucket == DZN_INTERNAL_BUF_UPLOAD && cmdbuf->cur_upload_buf) { |
| uint64_t new_offset = ALIGN_POT(cmdbuf->cur_upload_buf_offset, align); |
| if (cmdbuf->cur_upload_buf->size >= size + new_offset) { |
| cmdbuf->cur_upload_buf_offset = new_offset + size; |
| *out = cmdbuf->cur_upload_buf->res; |
| *offset = new_offset; |
| return VK_SUCCESS; |
| } |
| cmdbuf->cur_upload_buf = NULL; |
| cmdbuf->cur_upload_buf_offset = 0; |
| } |
| |
| uint32_t alloc_size = size; |
| if (bucket == DZN_INTERNAL_BUF_UPLOAD) |
| /* Walk through a 4MB upload buffer */ |
| alloc_size = ALIGN_POT(size, 4 * 1024 * 1024); |
| else |
| /* Align size on 64k (the default alignment) */ |
| alloc_size = ALIGN_POT(size, 64 * 1024); |
| |
| D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, heap_type); |
| D3D12_RESOURCE_DESC rdesc = { |
| .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, |
| .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, |
| .Width = alloc_size, |
| .Height = 1, |
| .DepthOrArraySize = 1, |
| .MipLevels = 1, |
| .Format = DXGI_FORMAT_UNKNOWN, |
| .SampleDesc = { .Count = 1, .Quality = 0 }, |
| .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, |
| .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, |
| }; |
| |
| HRESULT hres = |
| ID3D12Device1_CreateCommittedResource(device->dev, &hprops, |
| D3D12_HEAP_FLAG_NONE, &rdesc, |
| init_state, NULL, |
| &IID_ID3D12Resource, |
| (void **)&res); |
| if (FAILED(hres)) { |
| return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY); |
| } |
| |
| struct dzn_internal_resource *entry = |
| vk_alloc(&cmdbuf->vk.pool->alloc, sizeof(*entry), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (!entry) { |
| ID3D12Resource_Release(res); |
| return vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY); |
| } |
| |
| entry->res = res; |
| entry->size = alloc_size; |
| list_addtail(&entry->link, &cmdbuf->internal_bufs[bucket]); |
| *out = entry->res; |
| if (offset) |
| *offset = 0; |
| if (bucket == DZN_INTERNAL_BUF_UPLOAD) { |
| cmdbuf->cur_upload_buf = entry; |
| cmdbuf->cur_upload_buf_offset = size; |
| } |
| return VK_SUCCESS; |
| } |
| |
| static void |
| dzn_cmd_buffer_clear_rects_with_copy(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| VkImageLayout layout, |
| const VkClearColorValue *color, |
| const VkImageSubresourceRange *range, |
| uint32_t rect_count, D3D12_RECT *rects) |
| { |
| enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); |
| uint32_t blksize = util_format_get_blocksize(pfmt); |
| uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 }; |
| uint32_t raw[4] = { 0 }; |
| |
| assert(blksize <= sizeof(raw)); |
| assert(!(sizeof(buf) % blksize)); |
| |
| util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1); |
| |
| uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; |
| while (fill_step % blksize) |
| fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; |
| |
| uint32_t max_w = u_minify(image->vk.extent.width, range->baseMipLevel); |
| uint32_t max_h = u_minify(image->vk.extent.height, range->baseMipLevel); |
| uint32_t row_pitch = ALIGN_NPOT(max_w * blksize, fill_step); |
| uint32_t res_size = max_h * row_pitch; |
| |
| assert(fill_step <= sizeof(buf)); |
| |
| for (uint32_t i = 0; i < fill_step; i += blksize) |
| memcpy(&buf[i], raw, blksize); |
| |
| ID3D12Resource *src_res; |
| uint64_t src_offset; |
| |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size, |
| DZN_INTERNAL_BUF_UPLOAD, |
| D3D12_RESOURCE_STATE_GENERIC_READ, |
| D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, |
| &src_res, |
| &src_offset); |
| if (result != VK_SUCCESS) |
| return; |
| |
| assert(!(res_size % fill_step)); |
| |
| uint8_t *cpu_ptr; |
| ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); |
| cpu_ptr += src_offset; |
| for (uint32_t i = 0; i < res_size; i += fill_step) |
| memcpy(&cpu_ptr[i], buf, fill_step); |
| |
| ID3D12Resource_Unmap(src_res, 0, NULL); |
| |
| D3D12_TEXTURE_COPY_LOCATION src_loc = { |
| .pResource = src_res, |
| .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, |
| .PlacedFootprint = { |
| .Offset = src_offset, |
| .Footprint = { |
| .Width = max_w, |
| .Height = max_h, |
| .Depth = 1, |
| .RowPitch = (UINT)ALIGN_NPOT(max_w * blksize, fill_step), |
| }, |
| }, |
| }; |
| |
| if (!cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, |
| layout, |
| VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| assert(dzn_get_level_count(image, range) == 1); |
| uint32_t layer_count = dzn_get_layer_count(image, range); |
| |
| dzn_foreach_aspect(aspect, range->aspectMask) { |
| VkImageSubresourceLayers subres = { |
| .aspectMask = (VkImageAspectFlags)aspect, |
| .mipLevel = range->baseMipLevel, |
| .baseArrayLayer = range->baseArrayLayer, |
| .layerCount = layer_count, |
| }; |
| |
| for (uint32_t layer = 0; layer < layer_count; layer++) { |
| D3D12_TEXTURE_COPY_LOCATION dst_loc = |
| dzn_image_get_copy_loc(image, &subres, aspect, layer); |
| |
| src_loc.PlacedFootprint.Footprint.Format = |
| dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ? |
| dst_loc.PlacedFootprint.Footprint.Format : |
| image->desc.Format; |
| |
| for (uint32_t r = 0; r < rect_count; r++) { |
| D3D12_BOX src_box = { |
| .left = 0, |
| .top = 0, |
| .front = 0, |
| .right = (UINT)(rects[r].right - rects[r].left), |
| .bottom = (UINT)(rects[r].bottom - rects[r].top), |
| .back = 1, |
| }; |
| |
| ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, |
| &dst_loc, |
| rects[r].left, |
| rects[r].top, 0, |
| &src_loc, |
| &src_box); |
| } |
| } |
| } |
| |
| if (!cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, |
| VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| layout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| |
| static VkClearColorValue |
| adjust_clear_color(struct dzn_physical_device *pdev, |
| VkFormat format, const VkClearColorValue *col) |
| { |
| VkClearColorValue out = *col; |
| |
| // D3D12 doesn't support bgra4, so we map it to rgba4 and swizzle things |
| // manually where it matters, like here, in the clear path. |
| if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { |
| if (pdev->support_a4b4g4r4) { |
| DZN_SWAP(float, out.float32[0], out.float32[2]); |
| } else { |
| DZN_SWAP(float, out.float32[0], out.float32[1]); |
| DZN_SWAP(float, out.float32[2], out.float32[3]); |
| } |
| } |
| |
| return out; |
| } |
| |
| static void |
| dzn_cmd_buffer_clear_ranges_with_copy(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| VkImageLayout layout, |
| const VkClearColorValue *color, |
| uint32_t range_count, |
| const VkImageSubresourceRange *ranges) |
| { |
| enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); |
| uint32_t blksize = util_format_get_blocksize(pfmt); |
| uint8_t buf[D3D12_TEXTURE_DATA_PITCH_ALIGNMENT * 3] = { 0 }; |
| uint32_t raw[4] = { 0 }; |
| |
| assert(blksize <= sizeof(raw)); |
| assert(!(sizeof(buf) % blksize)); |
| |
| util_format_write_4(pfmt, color, 0, raw, 0, 0, 0, 1, 1); |
| |
| uint32_t fill_step = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; |
| while (fill_step % blksize) |
| fill_step += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; |
| |
| uint32_t res_size = 0; |
| for (uint32_t r = 0; r < range_count; r++) { |
| uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel); |
| uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel); |
| uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel); |
| uint32_t row_pitch = ALIGN_NPOT(w * blksize, fill_step); |
| |
| res_size = MAX2(res_size, h * d * row_pitch); |
| } |
| |
| assert(fill_step <= sizeof(buf)); |
| |
| for (uint32_t i = 0; i < fill_step; i += blksize) |
| memcpy(&buf[i], raw, blksize); |
| |
| ID3D12Resource *src_res; |
| uint64_t src_offset; |
| |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, res_size, |
| DZN_INTERNAL_BUF_UPLOAD, |
| D3D12_RESOURCE_STATE_GENERIC_READ, |
| D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, |
| &src_res, |
| &src_offset); |
| if (result != VK_SUCCESS) |
| return; |
| |
| assert(!(res_size % fill_step)); |
| |
| uint8_t *cpu_ptr; |
| ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); |
| cpu_ptr += src_offset; |
| for (uint32_t i = 0; i < res_size; i += fill_step) |
| memcpy(&cpu_ptr[i], buf, fill_step); |
| |
| ID3D12Resource_Unmap(src_res, 0, NULL); |
| |
| D3D12_TEXTURE_COPY_LOCATION src_loc = { |
| .pResource = src_res, |
| .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, |
| .PlacedFootprint = { |
| .Offset = src_offset, |
| }, |
| }; |
| |
| for (uint32_t r = 0; r < range_count; r++) { |
| uint32_t level_count = dzn_get_level_count(image, &ranges[r]); |
| uint32_t layer_count = dzn_get_layer_count(image, &ranges[r]); |
| |
| if (!cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r], |
| layout, |
| VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| dzn_foreach_aspect(aspect, ranges[r].aspectMask) { |
| for (uint32_t lvl = 0; lvl < level_count; lvl++) { |
| uint32_t w = u_minify(image->vk.extent.width, ranges[r].baseMipLevel + lvl); |
| uint32_t h = u_minify(image->vk.extent.height, ranges[r].baseMipLevel + lvl); |
| uint32_t d = u_minify(image->vk.extent.depth, ranges[r].baseMipLevel + lvl); |
| VkImageSubresourceLayers subres = { |
| .aspectMask = (VkImageAspectFlags)aspect, |
| .mipLevel = ranges[r].baseMipLevel + lvl, |
| .baseArrayLayer = ranges[r].baseArrayLayer, |
| .layerCount = layer_count, |
| }; |
| |
| for (uint32_t layer = 0; layer < layer_count; layer++) { |
| D3D12_TEXTURE_COPY_LOCATION dst_loc = |
| dzn_image_get_copy_loc(image, &subres, aspect, layer); |
| |
| src_loc.PlacedFootprint.Footprint.Format = |
| dst_loc.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT ? |
| dst_loc.PlacedFootprint.Footprint.Format : |
| image->desc.Format; |
| src_loc.PlacedFootprint.Footprint.Width = w; |
| src_loc.PlacedFootprint.Footprint.Height = h; |
| src_loc.PlacedFootprint.Footprint.Depth = d; |
| src_loc.PlacedFootprint.Footprint.RowPitch = |
| ALIGN_NPOT(w * blksize, fill_step); |
| D3D12_BOX src_box = { |
| .left = 0, |
| .top = 0, |
| .front = 0, |
| .right = w, |
| .bottom = h, |
| .back = d, |
| }; |
| |
| ID3D12GraphicsCommandList1_CopyTextureRegion(cmdbuf->cmdlist, &dst_loc, 0, 0, 0, |
| &src_loc, &src_box); |
| |
| } |
| } |
| } |
| |
| if (!cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &ranges[r], |
| VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| layout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_clear_attachment(struct dzn_cmd_buffer *cmdbuf, |
| struct dzn_image_view *view, |
| VkImageLayout layout, |
| const VkClearValue *value, |
| VkImageAspectFlags aspects, |
| uint32_t base_layer, |
| uint32_t layer_count, |
| uint32_t rect_count, |
| D3D12_RECT *rects) |
| { |
| struct dzn_image *image = |
| container_of(view->vk.image, struct dzn_image, vk); |
| struct dzn_physical_device *pdev = |
| container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk); |
| |
| VkImageSubresourceRange range = { |
| .aspectMask = aspects, |
| .baseMipLevel = view->vk.base_mip_level, |
| .levelCount = 1, |
| .baseArrayLayer = view->vk.base_array_layer + base_layer, |
| .layerCount = layer_count == VK_REMAINING_ARRAY_LAYERS ? |
| view->vk.layer_count - base_layer : layer_count, |
| }; |
| |
| layer_count = vk_image_subresource_layer_count(&image->vk, &range); |
| D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON; |
| |
| if (vk_format_is_depth_or_stencil(view->vk.format)) { |
| D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; |
| |
| if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) |
| flags |= D3D12_CLEAR_FLAG_DEPTH; |
| if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) |
| flags |= D3D12_CLEAR_FLAG_STENCIL; |
| |
| if (flags != 0) { |
| if (cmdbuf->enhanced_barriers) { |
| restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image, |
| layout, D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, |
| &range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range, |
| layout, |
| VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, &range, 0); |
| D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc); |
| ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, flags, |
| value->depthStencil.depth, |
| value->depthStencil.stencil, |
| rect_count, rects); |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_restore_layout(cmdbuf, image, |
| D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE, |
| D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, restore_layout, |
| &range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range, |
| VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, |
| layout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| } else if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { |
| VkClearColorValue color = adjust_clear_color(pdev, view->vk.format, &value->color); |
| bool clear_with_cpy = false; |
| float vals[4]; |
| |
| if (vk_format_is_sint(view->vk.format)) { |
| for (uint32_t i = 0; i < 4; i++) { |
| vals[i] = color.int32[i]; |
| if (color.int32[i] != (int32_t)vals[i]) { |
| clear_with_cpy = true; |
| break; |
| } |
| } |
| } else if (vk_format_is_uint(view->vk.format)) { |
| for (uint32_t i = 0; i < 4; i++) { |
| vals[i] = color.uint32[i]; |
| if (color.uint32[i] != (uint32_t)vals[i]) { |
| clear_with_cpy = true; |
| break; |
| } |
| } |
| } else { |
| for (uint32_t i = 0; i < 4; i++) |
| vals[i] = color.float32[i]; |
| } |
| |
| if (clear_with_cpy) { |
| dzn_cmd_buffer_clear_rects_with_copy(cmdbuf, image, |
| VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, |
| &value->color, |
| &range, rect_count, rects); |
| } else { |
| if (cmdbuf->enhanced_barriers) { |
| restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image, |
| layout, D3D12_BARRIER_LAYOUT_RENDER_TARGET, |
| &range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range, |
| layout, |
| VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &range, 0); |
| D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc); |
| ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, vals, rect_count, rects); |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_restore_layout(cmdbuf, image, |
| D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET, |
| D3D12_BARRIER_LAYOUT_RENDER_TARGET, restore_layout, |
| &range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range, |
| VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, |
| layout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_clear_color(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| VkImageLayout layout, |
| const VkClearColorValue *col, |
| uint32_t range_count, |
| const VkImageSubresourceRange *ranges) |
| { |
| struct dzn_physical_device *pdev = |
| container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk); |
| if (!(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || |
| cmdbuf->type != D3D12_COMMAND_LIST_TYPE_DIRECT) { |
| dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); |
| return; |
| } |
| |
| VkClearColorValue color = adjust_clear_color(pdev, image->vk.format, col); |
| float clear_vals[4]; |
| |
| enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); |
| D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON; |
| |
| if (util_format_is_pure_sint(pfmt)) { |
| for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) { |
| clear_vals[c] = color.int32[c]; |
| if (color.int32[c] != (int32_t)clear_vals[c]) { |
| dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); |
| return; |
| } |
| } |
| } else if (util_format_is_pure_uint(pfmt)) { |
| for (uint32_t c = 0; c < ARRAY_SIZE(clear_vals); c++) { |
| clear_vals[c] = color.uint32[c]; |
| if (color.uint32[c] != (uint32_t)clear_vals[c]) { |
| dzn_cmd_buffer_clear_ranges_with_copy(cmdbuf, image, layout, col, range_count, ranges); |
| return; |
| } |
| } |
| } else { |
| memcpy(clear_vals, color.float32, sizeof(clear_vals)); |
| } |
| |
| for (uint32_t r = 0; r < range_count; r++) { |
| const VkImageSubresourceRange *range = &ranges[r]; |
| uint32_t level_count = dzn_get_level_count(image, range); |
| |
| if (cmdbuf->enhanced_barriers) { |
| restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image, |
| layout, D3D12_BARRIER_LAYOUT_RENDER_TARGET, |
| range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, |
| layout, |
| VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| for (uint32_t lvl = 0; lvl < level_count; lvl++) { |
| VkImageSubresourceRange view_range = *range; |
| |
| if (image->vk.image_type == VK_IMAGE_TYPE_3D) { |
| view_range.baseArrayLayer = 0; |
| view_range.layerCount = u_minify(image->vk.extent.depth, range->baseMipLevel + lvl); |
| } |
| |
| D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(image, &view_range, lvl); |
| D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, image, &desc); |
| ID3D12GraphicsCommandList1_ClearRenderTargetView(cmdbuf->cmdlist, handle, clear_vals, 0, NULL); |
| } |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_restore_layout(cmdbuf, image, |
| D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET, |
| D3D12_BARRIER_LAYOUT_RENDER_TARGET, restore_layout, |
| range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, |
| VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, |
| layout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_clear_zs(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *image, |
| VkImageLayout layout, |
| const VkClearDepthStencilValue *zs, |
| uint32_t range_count, |
| const VkImageSubresourceRange *ranges) |
| { |
| assert(image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); |
| |
| for (uint32_t r = 0; r < range_count; r++) { |
| const VkImageSubresourceRange *range = &ranges[r]; |
| uint32_t level_count = dzn_get_level_count(image, range); |
| |
| D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; |
| D3D12_BARRIER_LAYOUT restore_layout = D3D12_BARRIER_LAYOUT_COMMON; |
| |
| if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) |
| flags |= D3D12_CLEAR_FLAG_DEPTH; |
| if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) |
| flags |= D3D12_CLEAR_FLAG_STENCIL; |
| |
| if (cmdbuf->enhanced_barriers) { |
| restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, image, |
| layout, D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, |
| range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, |
| layout, |
| VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| for (uint32_t lvl = 0; lvl < level_count; lvl++) { |
| D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(image, range, lvl); |
| D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, image, &desc); |
| ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, |
| handle, flags, |
| zs->depth, |
| zs->stencil, |
| 0, NULL); |
| } |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_restore_layout(cmdbuf, image, |
| D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE, |
| D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, restore_layout, |
| range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, range, |
| VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, |
| layout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_copy_buf2img_region(struct dzn_cmd_buffer *cmdbuf, |
| const VkCopyBufferToImageInfo2 *info, |
| uint32_t r, |
| VkImageAspectFlagBits aspect, |
| uint32_t l) |
| { |
| VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer); |
| VK_FROM_HANDLE(dzn_image, dst_image, info->dstImage); |
| struct dzn_physical_device *pdev = |
| container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk); |
| |
| ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; |
| |
| VkBufferImageCopy2 region = info->pRegions[r]; |
| enum pipe_format pfmt = vk_format_to_pipe_format(dst_image->vk.format); |
| uint32_t blkh = util_format_get_blockheight(pfmt); |
| uint32_t blkd = util_format_get_blockdepth(pfmt); |
| |
| /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent |
| * to not be block aligned if it's reaching the image boundary, offsets still |
| * have to be aligned. Align the image extent to make D3D12 happy. |
| */ |
| dzn_image_align_extent(dst_image, ®ion.imageExtent); |
| |
| D3D12_TEXTURE_COPY_LOCATION dst_img_loc = |
| dzn_image_get_copy_loc(dst_image, ®ion.imageSubresource, aspect, l); |
| D3D12_TEXTURE_COPY_LOCATION src_buf_loc = |
| dzn_buffer_get_copy_loc(src_buffer, dst_image->vk.format, ®ion, aspect, l); |
| |
| if (dzn_buffer_supports_region_copy(pdev, &src_buf_loc)) { |
| /* RowPitch and Offset are properly aligned, we can copy |
| * the whole thing in one call. |
| */ |
| D3D12_BOX src_box = { |
| .left = 0, |
| .top = 0, |
| .front = 0, |
| .right = region.imageExtent.width, |
| .bottom = region.imageExtent.height, |
| .back = region.imageExtent.depth, |
| }; |
| |
| ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_img_loc, |
| region.imageOffset.x, |
| region.imageOffset.y, |
| region.imageOffset.z, |
| &src_buf_loc, &src_box); |
| return; |
| } |
| |
| /* Copy line-by-line if things are not properly aligned. */ |
| D3D12_BOX src_box = { |
| .top = 0, |
| .front = 0, |
| .bottom = blkh, |
| .back = blkd, |
| }; |
| |
| for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) { |
| for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) { |
| uint32_t src_x; |
| |
| D3D12_TEXTURE_COPY_LOCATION src_buf_line_loc = |
| dzn_buffer_get_line_copy_loc(src_buffer, dst_image->vk.format, |
| ®ion, &src_buf_loc, |
| y, z, &src_x); |
| |
| src_box.left = src_x; |
| src_box.right = src_x + region.imageExtent.width; |
| ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, |
| &dst_img_loc, |
| region.imageOffset.x, |
| region.imageOffset.y + y, |
| region.imageOffset.z + z, |
| &src_buf_line_loc, |
| &src_box); |
| } |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_copy_img2buf_region(struct dzn_cmd_buffer *cmdbuf, |
| const VkCopyImageToBufferInfo2 *info, |
| uint32_t r, |
| VkImageAspectFlagBits aspect, |
| uint32_t l) |
| { |
| VK_FROM_HANDLE(dzn_image, src_image, info->srcImage); |
| VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer); |
| struct dzn_physical_device *pdev = |
| container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk); |
| |
| ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; |
| |
| VkBufferImageCopy2 region = info->pRegions[r]; |
| enum pipe_format pfmt = vk_format_to_pipe_format(src_image->vk.format); |
| uint32_t blkh = util_format_get_blockheight(pfmt); |
| uint32_t blkd = util_format_get_blockdepth(pfmt); |
| |
| /* D3D12 wants block aligned offsets/extent, but vulkan allows the extent |
| * to not be block aligned if it's reaching the image boundary, offsets still |
| * have to be aligned. Align the image extent to make D3D12 happy. |
| */ |
| dzn_image_align_extent(src_image, ®ion.imageExtent); |
| |
| D3D12_TEXTURE_COPY_LOCATION src_img_loc = |
| dzn_image_get_copy_loc(src_image, ®ion.imageSubresource, aspect, l); |
| D3D12_TEXTURE_COPY_LOCATION dst_buf_loc = |
| dzn_buffer_get_copy_loc(dst_buffer, src_image->vk.format, ®ion, aspect, l); |
| |
| if (dzn_buffer_supports_region_copy(pdev, &dst_buf_loc)) { |
| /* RowPitch and Offset are properly aligned on 256 bytes, we can copy |
| * the whole thing in one call. |
| */ |
| D3D12_BOX src_box = { |
| .left = (UINT)region.imageOffset.x, |
| .top = (UINT)region.imageOffset.y, |
| .front = (UINT)region.imageOffset.z, |
| .right = (UINT)(region.imageOffset.x + region.imageExtent.width), |
| .bottom = (UINT)(region.imageOffset.y + region.imageExtent.height), |
| .back = (UINT)(region.imageOffset.z + region.imageExtent.depth), |
| }; |
| |
| ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_buf_loc, |
| 0, 0, 0, &src_img_loc, |
| &src_box); |
| return; |
| } |
| |
| D3D12_BOX src_box = { |
| .left = (UINT)region.imageOffset.x, |
| .right = (UINT)(region.imageOffset.x + region.imageExtent.width), |
| }; |
| |
| /* Copy line-by-line if things are not properly aligned. */ |
| for (uint32_t z = 0; z < region.imageExtent.depth; z += blkd) { |
| src_box.front = region.imageOffset.z + z; |
| src_box.back = src_box.front + blkd; |
| |
| for (uint32_t y = 0; y < region.imageExtent.height; y += blkh) { |
| uint32_t dst_x; |
| |
| D3D12_TEXTURE_COPY_LOCATION dst_buf_line_loc = |
| dzn_buffer_get_line_copy_loc(dst_buffer, src_image->vk.format, |
| ®ion, &dst_buf_loc, |
| y, z, &dst_x); |
| |
| src_box.top = region.imageOffset.y + y; |
| src_box.bottom = src_box.top + blkh; |
| |
| ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, |
| &dst_buf_line_loc, |
| dst_x, 0, 0, |
| &src_img_loc, |
| &src_box); |
| } |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_copy_img_chunk(struct dzn_cmd_buffer *cmdbuf, |
| const VkCopyImageInfo2 *info, |
| D3D12_RESOURCE_DESC *tmp_desc, |
| D3D12_TEXTURE_COPY_LOCATION *tmp_loc, |
| uint32_t r, |
| VkImageAspectFlagBits aspect, |
| uint32_t l) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); |
| VK_FROM_HANDLE(dzn_image, src, info->srcImage); |
| VK_FROM_HANDLE(dzn_image, dst, info->dstImage); |
| |
| ID3D12Device4 *dev = device->dev; |
| ID3D12GraphicsCommandList1 *cmdlist = cmdbuf->cmdlist; |
| |
| VkImageCopy2 region = info->pRegions[r]; |
| dzn_image_align_extent(src, ®ion.extent); |
| |
| const VkImageSubresourceLayers *src_subres = ®ion.srcSubresource; |
| const VkImageSubresourceLayers *dst_subres = ®ion.dstSubresource; |
| VkFormat src_format = |
| dzn_image_get_plane_format(src->vk.format, aspect); |
| VkFormat dst_format = |
| dzn_image_get_plane_format(dst->vk.format, aspect); |
| |
| enum pipe_format src_pfmt = vk_format_to_pipe_format(src_format); |
| uint32_t src_blkw = util_format_get_blockwidth(src_pfmt); |
| uint32_t src_blkh = util_format_get_blockheight(src_pfmt); |
| uint32_t src_blkd = util_format_get_blockdepth(src_pfmt); |
| enum pipe_format dst_pfmt = vk_format_to_pipe_format(dst_format); |
| uint32_t dst_blkw = util_format_get_blockwidth(dst_pfmt); |
| uint32_t dst_blkh = util_format_get_blockheight(dst_pfmt); |
| uint32_t dst_blkd = util_format_get_blockdepth(dst_pfmt); |
| uint32_t dst_z = region.dstOffset.z, src_z = region.srcOffset.z; |
| uint32_t depth = region.extent.depth; |
| uint32_t dst_l = l, src_l = l; |
| |
| assert(src_subres->aspectMask == dst_subres->aspectMask); |
| |
| if (src->vk.image_type == VK_IMAGE_TYPE_3D && |
| dst->vk.image_type == VK_IMAGE_TYPE_2D) { |
| assert(src_subres->layerCount == 1); |
| src_l = 0; |
| src_z += l; |
| depth = 1; |
| } else if (src->vk.image_type == VK_IMAGE_TYPE_2D && |
| dst->vk.image_type == VK_IMAGE_TYPE_3D) { |
| assert(dst_subres->layerCount == 1); |
| dst_l = 0; |
| dst_z += l; |
| depth = 1; |
| } else { |
| assert(src_subres->layerCount == dst_subres->layerCount); |
| } |
| |
| D3D12_TEXTURE_COPY_LOCATION dst_loc = dzn_image_get_copy_loc(dst, dst_subres, aspect, dst_l); |
| D3D12_TEXTURE_COPY_LOCATION src_loc = dzn_image_get_copy_loc(src, src_subres, aspect, src_l); |
| |
| D3D12_BOX src_box = { |
| .left = (UINT)MAX2(region.srcOffset.x, 0), |
| .top = (UINT)MAX2(region.srcOffset.y, 0), |
| .front = (UINT)MAX2(src_z, 0), |
| .right = (UINT)region.srcOffset.x + region.extent.width, |
| .bottom = (UINT)region.srcOffset.y + region.extent.height, |
| .back = (UINT)src_z + depth, |
| }; |
| |
| if (!tmp_loc->pResource) { |
| ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc, |
| region.dstOffset.x, |
| region.dstOffset.y, |
| dst_z, &src_loc, |
| &src_box); |
| return; |
| } |
| |
| tmp_desc->Format = |
| dzn_image_get_placed_footprint_format(pdev, src->vk.format, aspect); |
| tmp_desc->Width = region.extent.width; |
| tmp_desc->Height = region.extent.height; |
| |
| ID3D12Device1_GetCopyableFootprints(dev, tmp_desc, |
| 0, 1, 0, |
| &tmp_loc->PlacedFootprint, |
| NULL, NULL, NULL); |
| |
| tmp_loc->PlacedFootprint.Footprint.Depth = depth; |
| |
| if (r > 0 || l > 0) { |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_buffer_barrier(cmdbuf, tmp_loc->pResource, |
| D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY, |
| D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_ACCESS_COPY_DEST); |
| } else { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1, |
| D3D12_RESOURCE_STATE_COPY_SOURCE, |
| D3D12_RESOURCE_STATE_COPY_DEST, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| |
| ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, tmp_loc, 0, 0, 0, &src_loc, &src_box); |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_buffer_barrier(cmdbuf, tmp_loc->pResource, |
| D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY, |
| D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE); |
| } else { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, tmp_loc->pResource, 0, 1, |
| D3D12_RESOURCE_STATE_COPY_DEST, |
| D3D12_RESOURCE_STATE_COPY_SOURCE, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| tmp_desc->Format = |
| dzn_image_get_placed_footprint_format(pdev, dst->vk.format, aspect); |
| if (src_blkw != dst_blkw) |
| tmp_desc->Width = DIV_ROUND_UP(region.extent.width, src_blkw) * dst_blkw; |
| if (src_blkh != dst_blkh) |
| tmp_desc->Height = DIV_ROUND_UP(region.extent.height, src_blkh) * dst_blkh; |
| |
| ID3D12Device1_GetCopyableFootprints(device->dev, tmp_desc, |
| 0, 1, 0, |
| &tmp_loc->PlacedFootprint, |
| NULL, NULL, NULL); |
| |
| if (src_blkd != dst_blkd) { |
| tmp_loc->PlacedFootprint.Footprint.Depth = |
| DIV_ROUND_UP(depth, src_blkd) * dst_blkd; |
| } else { |
| tmp_loc->PlacedFootprint.Footprint.Depth = region.extent.depth; |
| } |
| |
| D3D12_BOX tmp_box = { |
| .left = 0, |
| .top = 0, |
| .front = 0, |
| .right = tmp_loc->PlacedFootprint.Footprint.Width, |
| .bottom = tmp_loc->PlacedFootprint.Footprint.Height, |
| .back = tmp_loc->PlacedFootprint.Footprint.Depth, |
| }; |
| |
| ID3D12GraphicsCommandList1_CopyTextureRegion(cmdlist, &dst_loc, |
| region.dstOffset.x, |
| region.dstOffset.y, |
| dst_z, |
| tmp_loc, &tmp_box); |
| } |
| |
| static void |
| dzn_cmd_buffer_blit_prepare_src_view(struct dzn_cmd_buffer *cmdbuf, |
| VkImage image, |
| VkImageAspectFlagBits aspect, |
| const VkImageSubresourceLayers *subres, |
| struct dzn_descriptor_heap *heap, |
| uint32_t heap_slot) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| VK_FROM_HANDLE(dzn_image, img, image); |
| VkImageViewCreateInfo iview_info = { |
| .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| .image = image, |
| .format = img->vk.format, |
| .subresourceRange = { |
| .aspectMask = (VkImageAspectFlags)aspect, |
| .baseMipLevel = subres->mipLevel, |
| .levelCount = 1, |
| .baseArrayLayer = subres->baseArrayLayer, |
| .layerCount = subres->layerCount, |
| }, |
| }; |
| |
| switch (img->vk.image_type) { |
| case VK_IMAGE_TYPE_1D: |
| iview_info.viewType = img->vk.array_layers > 1 ? |
| VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D; |
| break; |
| case VK_IMAGE_TYPE_2D: |
| iview_info.viewType = img->vk.array_layers > 1 ? |
| VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; |
| break; |
| case VK_IMAGE_TYPE_3D: |
| iview_info.viewType = VK_IMAGE_VIEW_TYPE_3D; |
| break; |
| default: |
| unreachable("Invalid type"); |
| } |
| |
| struct dzn_image_view iview; |
| dzn_image_view_init(device, &iview, &iview_info); |
| dzn_descriptor_heap_write_image_view_desc(device, heap, heap_slot, false, false, &iview); |
| dzn_image_view_finish(&iview); |
| |
| D3D12_GPU_DESCRIPTOR_HANDLE handle = |
| dzn_descriptor_heap_get_gpu_handle(heap, heap_slot); |
| ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 0, handle); |
| } |
| |
| static void |
| dzn_cmd_buffer_blit_prepare_dst_view(struct dzn_cmd_buffer *cmdbuf, |
| struct dzn_image *img, |
| VkImageAspectFlagBits aspect, |
| uint32_t level, uint32_t layer, |
| const VkOffset3D *dst_offsets) |
| { |
| bool ds = aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); |
| VkImageSubresourceRange range = { |
| .aspectMask = (VkImageAspectFlags)aspect, |
| .baseMipLevel = level, |
| .levelCount = 1, |
| .baseArrayLayer = layer, |
| .layerCount = 1, |
| }; |
| |
| if (ds) { |
| D3D12_DEPTH_STENCIL_VIEW_DESC desc = dzn_image_get_dsv_desc(img, &range, 0); |
| D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &desc); |
| ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 0, NULL, true, &handle); |
| |
| if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { |
| const struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk); |
| if (!pdev->options.PSSpecifiedStencilRefSupported) { |
| D3D12_RECT clear_rect = { |
| .left = dst_offsets[0].x, |
| .right = dst_offsets[1].x, |
| .top = dst_offsets[0].y, |
| .bottom = dst_offsets[1].y, |
| }; |
| ID3D12GraphicsCommandList1_ClearDepthStencilView(cmdbuf->cmdlist, handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, 0, 1, &clear_rect); |
| } |
| } |
| } else { |
| D3D12_RENDER_TARGET_VIEW_DESC desc = dzn_image_get_rtv_desc(img, &range, 0); |
| D3D12_CPU_DESCRIPTOR_HANDLE handle = dzn_cmd_buffer_get_rtv(cmdbuf, img, &desc); |
| ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, 1, &handle, false, NULL); |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_blit_set_pipeline(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *src, |
| const struct dzn_image *dst, |
| VkImageAspectFlagBits aspect, |
| VkFilter filter, |
| enum dzn_blit_resolve_mode resolve_mode, |
| uint32_t stencil_bit) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); |
| assert(pdev->options.PSSpecifiedStencilRefSupported || aspect != VK_IMAGE_ASPECT_STENCIL_BIT || stencil_bit != 0xf); |
| enum pipe_format pfmt = vk_format_to_pipe_format(dst->vk.format); |
| VkImageUsageFlags usage = |
| vk_format_is_depth_or_stencil(dst->vk.format) ? |
| VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : |
| VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; |
| struct dzn_meta_blit_key ctx_key = { |
| .out_format = dzn_image_get_dxgi_format(pdev, dst->vk.format, usage, aspect), |
| .samples = (uint32_t)src->vk.samples, |
| .loc = (uint32_t)(aspect == VK_IMAGE_ASPECT_DEPTH_BIT ? |
| FRAG_RESULT_DEPTH : |
| aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? |
| FRAG_RESULT_STENCIL : |
| FRAG_RESULT_DATA0), |
| .out_type = (uint32_t)(util_format_is_pure_uint(pfmt) ? GLSL_TYPE_UINT : |
| util_format_is_pure_sint(pfmt) ? GLSL_TYPE_INT : |
| aspect == VK_IMAGE_ASPECT_STENCIL_BIT ? GLSL_TYPE_UINT : |
| GLSL_TYPE_FLOAT), |
| .sampler_dim = (uint32_t)(src->vk.image_type == VK_IMAGE_TYPE_1D ? GLSL_SAMPLER_DIM_1D : |
| src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples == 1 ? GLSL_SAMPLER_DIM_2D : |
| src->vk.image_type == VK_IMAGE_TYPE_2D && src->vk.samples > 1 ? GLSL_SAMPLER_DIM_MS : |
| GLSL_SAMPLER_DIM_3D), |
| .src_is_array = src->vk.array_layers > 1, |
| .resolve_mode = resolve_mode, |
| /* Filter doesn't need to be part of the key if we're not embedding a static sampler */ |
| .linear_filter = filter == VK_FILTER_LINEAR && device->support_static_samplers, |
| .stencil_bit = stencil_bit, |
| .padding = 0, |
| }; |
| |
| const struct dzn_meta_blit *ctx = |
| dzn_meta_blits_get_context(device, &ctx_key); |
| assert(ctx); |
| |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig != ctx->root_sig) { |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].root_sig = ctx->root_sig; |
| ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, ctx->root_sig); |
| } |
| ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, ctx->pipeline_state); |
| } |
| |
| static void |
| dzn_cmd_buffer_blit_set_2d_region(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_image *src, |
| const VkImageSubresourceLayers *src_subres, |
| const VkOffset3D *src_offsets, |
| const struct dzn_image *dst, |
| const VkImageSubresourceLayers *dst_subres, |
| const VkOffset3D *dst_offsets, |
| bool normalize_src_coords) |
| { |
| uint32_t dst_w = u_minify(dst->vk.extent.width, dst_subres->mipLevel); |
| uint32_t dst_h = u_minify(dst->vk.extent.height, dst_subres->mipLevel); |
| uint32_t src_w = u_minify(src->vk.extent.width, src_subres->mipLevel); |
| uint32_t src_h = u_minify(src->vk.extent.height, src_subres->mipLevel); |
| |
| float dst_pos[4] = { |
| (2 * (float)dst_offsets[0].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[0].y / (float)dst_h) - 1.0f), |
| (2 * (float)dst_offsets[1].x / (float)dst_w) - 1.0f, -((2 * (float)dst_offsets[1].y / (float)dst_h) - 1.0f), |
| }; |
| |
| float src_pos[4] = { |
| (float)src_offsets[0].x, (float)src_offsets[0].y, |
| (float)src_offsets[1].x, (float)src_offsets[1].y, |
| }; |
| |
| if (normalize_src_coords) { |
| src_pos[0] /= src_w; |
| src_pos[1] /= src_h; |
| src_pos[2] /= src_w; |
| src_pos[3] /= src_h; |
| } |
| |
| float coords[] = { |
| dst_pos[0], dst_pos[1], src_pos[0], src_pos[1], |
| dst_pos[2], dst_pos[1], src_pos[2], src_pos[1], |
| dst_pos[0], dst_pos[3], src_pos[0], src_pos[3], |
| dst_pos[2], dst_pos[3], src_pos[2], src_pos[3], |
| }; |
| |
| ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, ARRAY_SIZE(coords), coords, 0); |
| |
| D3D12_VIEWPORT vp = { |
| .TopLeftX = 0, |
| .TopLeftY = 0, |
| .Width = (float)dst_w, |
| .Height = (float)dst_h, |
| .MinDepth = 0, |
| .MaxDepth = 1, |
| }; |
| ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, 1, &vp); |
| |
| D3D12_RECT scissor = { |
| .left = MIN2(dst_offsets[0].x, dst_offsets[1].x), |
| .top = MIN2(dst_offsets[0].y, dst_offsets[1].y), |
| .right = MAX2(dst_offsets[0].x, dst_offsets[1].x), |
| .bottom = MAX2(dst_offsets[0].y, dst_offsets[1].y), |
| }; |
| ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &scissor); |
| } |
| |
| static void |
| dzn_cmd_buffer_blit_issue_barriers(struct dzn_cmd_buffer *cmdbuf, |
| struct dzn_image *src, VkImageLayout src_layout, |
| const VkImageSubresourceLayers *src_subres, |
| struct dzn_image *dst, VkImageLayout dst_layout, |
| const VkImageSubresourceLayers *dst_subres, |
| VkImageAspectFlagBits aspect, |
| D3D12_BARRIER_LAYOUT *restore_src_layout, |
| D3D12_BARRIER_LAYOUT *restore_dst_layout, |
| bool post) |
| { |
| VkImageSubresourceRange src_range = { |
| .aspectMask = aspect, |
| .baseMipLevel = src_subres->mipLevel, |
| .levelCount = 1, |
| .baseArrayLayer = src_subres->baseArrayLayer, |
| .layerCount = src_subres->layerCount, |
| }; |
| VkImageSubresourceRange dst_range = { |
| .aspectMask = aspect, |
| .baseMipLevel = dst_subres->mipLevel, |
| .levelCount = 1, |
| .baseArrayLayer = dst_subres->baseArrayLayer, |
| .layerCount = dst_subres->layerCount, |
| }; |
| |
| if (!post) { |
| if (cmdbuf->enhanced_barriers) { |
| D3D12_BARRIER_LAYOUT dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ? |
| D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE : D3D12_BARRIER_LAYOUT_RENDER_TARGET; |
| *restore_src_layout = dzn_cmd_buffer_require_layout(cmdbuf, src, src_layout, |
| D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, |
| &src_range); |
| *restore_dst_layout = dzn_cmd_buffer_require_layout(cmdbuf, dst, |
| dst_layout, |
| dst_new_layout, |
| &dst_range); |
| } else { |
| VkImageLayout dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ? |
| VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range, |
| src_layout, |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range, |
| dst_layout, |
| dst_new_layout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } else { |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_restore_layout(cmdbuf, src, |
| D3D12_BARRIER_SYNC_PIXEL_SHADING, D3D12_BARRIER_ACCESS_SHADER_RESOURCE, |
| D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, *restore_src_layout, |
| &src_range); |
| if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) { |
| dzn_cmd_buffer_restore_layout(cmdbuf, dst, |
| D3D12_BARRIER_SYNC_DEPTH_STENCIL, D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE, |
| D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, *restore_dst_layout, |
| &dst_range); |
| } else { |
| dzn_cmd_buffer_restore_layout(cmdbuf, dst, |
| D3D12_BARRIER_SYNC_RENDER_TARGET, D3D12_BARRIER_ACCESS_RENDER_TARGET, |
| D3D12_BARRIER_LAYOUT_RENDER_TARGET, *restore_dst_layout, |
| &dst_range); |
| } |
| } else { |
| VkImageLayout dst_new_layout = (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) ? |
| VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, src, &src_range, |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| src_layout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, dst, &dst_range, |
| dst_new_layout, |
| dst_layout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_blit_region(struct dzn_cmd_buffer *cmdbuf, |
| const VkBlitImageInfo2 *info, |
| struct dzn_descriptor_heap *heap, |
| uint32_t *heap_slot, |
| struct dzn_descriptor_heap *sampler_heap, |
| uint32_t sampler_heap_slot, |
| uint32_t r) |
| { |
| VK_FROM_HANDLE(dzn_image, src, info->srcImage); |
| VK_FROM_HANDLE(dzn_image, dst, info->dstImage); |
| |
| const VkImageBlit2 *region = &info->pRegions[r]; |
| bool src_is_3d = src->vk.image_type == VK_IMAGE_TYPE_3D; |
| bool dst_is_3d = dst->vk.image_type == VK_IMAGE_TYPE_3D; |
| const struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| const struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); |
| bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported; |
| uint32_t stencil_bit = support_stencil_blit ? 0xf : 0; |
| uint32_t stencil_bit_root_param_slot = 2; |
| assert(device->support_static_samplers == (sampler_heap == NULL)); |
| |
| dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { |
| D3D12_BARRIER_LAYOUT restore_src_layout = D3D12_BARRIER_LAYOUT_COMMON; |
| D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON; |
| dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit); |
| dzn_cmd_buffer_blit_issue_barriers(cmdbuf, |
| src, info->srcImageLayout, ®ion->srcSubresource, |
| dst, info->dstImageLayout, ®ion->dstSubresource, |
| aspect, &restore_src_layout, &restore_dst_layout, false); |
| dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, |
| aspect, ®ion->srcSubresource, |
| heap, (*heap_slot)++); |
| dzn_cmd_buffer_blit_set_2d_region(cmdbuf, |
| src, ®ion->srcSubresource, region->srcOffsets, |
| dst, ®ion->dstSubresource, region->dstOffsets, |
| src->vk.samples == 1); |
| |
| uint32_t dst_depth = |
| region->dstOffsets[1].z > region->dstOffsets[0].z ? |
| region->dstOffsets[1].z - region->dstOffsets[0].z : |
| region->dstOffsets[0].z - region->dstOffsets[1].z; |
| uint32_t src_depth = |
| region->srcOffsets[1].z > region->srcOffsets[0].z ? |
| region->srcOffsets[1].z - region->srcOffsets[0].z : |
| region->srcOffsets[0].z - region->srcOffsets[1].z; |
| |
| uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource); |
| uint32_t dst_level = region->dstSubresource.mipLevel; |
| |
| float src_slice_step = src_is_3d ? (float)src_depth / dst_depth : 1; |
| if (region->srcOffsets[0].z > region->srcOffsets[1].z) |
| src_slice_step = -src_slice_step; |
| float src_z_coord = |
| src_is_3d ? (float)region->srcOffsets[0].z + (src_slice_step * 0.5f) : 0; |
| uint32_t slice_count = dst_is_3d ? dst_depth : layer_count; |
| uint32_t dst_z_coord = |
| dst_is_3d ? region->dstOffsets[0].z : region->dstSubresource.baseArrayLayer; |
| if (region->dstOffsets[0].z > region->dstOffsets[1].z) |
| dst_z_coord--; |
| |
| uint32_t dst_slice_step = region->dstOffsets[0].z < region->dstOffsets[1].z ? |
| 1 : -1; |
| |
| /* Normalize the src coordinates/step */ |
| if (src_is_3d) { |
| src_z_coord /= src->vk.extent.depth; |
| src_slice_step /= src->vk.extent.depth; |
| } |
| |
| for (uint32_t slice = 0; slice < slice_count; slice++) { |
| dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, dst, aspect, dst_level, dst_z_coord, region->dstOffsets); |
| ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16); |
| if (!device->support_static_samplers) { |
| ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 2, dzn_descriptor_heap_get_gpu_handle(sampler_heap, sampler_heap_slot)); |
| stencil_bit_root_param_slot++; |
| } |
| if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) { |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; |
| ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, 0xff); |
| for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) { |
| dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, info->filter, dzn_blit_resolve_none, stencil_bit); |
| ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, stencil_bit_root_param_slot, (1 << stencil_bit), 0); |
| ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); |
| } |
| } else { |
| ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); |
| } |
| src_z_coord += src_slice_step; |
| dst_z_coord += dst_slice_step; |
| } |
| |
| dzn_cmd_buffer_blit_issue_barriers(cmdbuf, |
| src, info->srcImageLayout, ®ion->srcSubresource, |
| dst, info->dstImageLayout, ®ion->dstSubresource, |
| aspect, &restore_src_layout, &restore_dst_layout, true); |
| } |
| } |
| |
| static enum dzn_blit_resolve_mode |
| get_blit_resolve_mode(VkResolveModeFlagBits mode) |
| { |
| switch (mode) { |
| case VK_RESOLVE_MODE_AVERAGE_BIT: return dzn_blit_resolve_average; |
| case VK_RESOLVE_MODE_MIN_BIT: return dzn_blit_resolve_min; |
| case VK_RESOLVE_MODE_MAX_BIT: return dzn_blit_resolve_max; |
| case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: return dzn_blit_resolve_sample_zero; |
| default: unreachable("Unexpected resolve mode"); |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_resolve_region(struct dzn_cmd_buffer *cmdbuf, |
| const VkResolveImageInfo2 *info, |
| VkResolveModeFlags mode, |
| struct dzn_descriptor_heap *heap, |
| uint32_t *heap_slot, |
| struct dzn_descriptor_heap *sampler_heap, |
| uint32_t sampler_heap_slot, |
| uint32_t r) |
| { |
| VK_FROM_HANDLE(dzn_image, src, info->srcImage); |
| VK_FROM_HANDLE(dzn_image, dst, info->dstImage); |
| |
| const VkImageResolve2 *region = &info->pRegions[r]; |
| |
| const struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| const struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); |
| bool support_stencil_blit = pdev->options.PSSpecifiedStencilRefSupported; |
| uint32_t stencil_bit = support_stencil_blit ? 0xf : 0; |
| uint32_t stencil_bit_root_param_slot = 2; |
| assert(device->support_static_samplers == (sampler_heap == NULL)); |
| enum dzn_blit_resolve_mode resolve_mode = get_blit_resolve_mode(mode); |
| |
| dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { |
| D3D12_BARRIER_LAYOUT restore_src_layout = D3D12_BARRIER_LAYOUT_COMMON; |
| D3D12_BARRIER_LAYOUT restore_dst_layout = D3D12_BARRIER_LAYOUT_COMMON; |
| dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit); |
| dzn_cmd_buffer_blit_issue_barriers(cmdbuf, |
| src, info->srcImageLayout, ®ion->srcSubresource, |
| dst, info->dstImageLayout, ®ion->dstSubresource, |
| aspect, &restore_src_layout, &restore_dst_layout, false); |
| dzn_cmd_buffer_blit_prepare_src_view(cmdbuf, info->srcImage, aspect, |
| ®ion->srcSubresource, |
| heap, (*heap_slot)++); |
| |
| VkOffset3D src_offset[2] = { |
| { |
| .x = region->srcOffset.x, |
| .y = region->srcOffset.y, |
| }, |
| { |
| .x = (int32_t)(region->srcOffset.x + region->extent.width), |
| .y = (int32_t)(region->srcOffset.y + region->extent.height), |
| }, |
| }; |
| VkOffset3D dst_offset[2] = { |
| { |
| .x = region->dstOffset.x, |
| .y = region->dstOffset.y, |
| }, |
| { |
| .x = (int32_t)(region->dstOffset.x + region->extent.width), |
| .y = (int32_t)(region->dstOffset.y + region->extent.height), |
| }, |
| }; |
| |
| dzn_cmd_buffer_blit_set_2d_region(cmdbuf, |
| src, ®ion->srcSubresource, src_offset, |
| dst, ®ion->dstSubresource, dst_offset, |
| false); |
| |
| uint32_t layer_count = dzn_get_layer_count(src, ®ion->srcSubresource); |
| for (uint32_t layer = 0; layer < layer_count; layer++) { |
| float src_z_coord = layer; |
| |
| dzn_cmd_buffer_blit_prepare_dst_view(cmdbuf, |
| dst, aspect, region->dstSubresource.mipLevel, |
| region->dstSubresource.baseArrayLayer + layer, |
| dst_offset); |
| ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, 1, 1, &src_z_coord, 16); |
| if (!device->support_static_samplers) { |
| ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, 2, dzn_descriptor_heap_get_gpu_handle(sampler_heap, sampler_heap_slot)); |
| stencil_bit_root_param_slot++; |
| } |
| if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT && !support_stencil_blit) { |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; |
| ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist8, 0xff); |
| for (stencil_bit = 0; stencil_bit < 8; ++stencil_bit) { |
| dzn_cmd_buffer_blit_set_pipeline(cmdbuf, src, dst, aspect, VK_FILTER_NEAREST, resolve_mode, stencil_bit); |
| ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstant(cmdbuf->cmdlist, stencil_bit_root_param_slot, (1 << stencil_bit), 0); |
| ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); |
| } |
| } else { |
| ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, 4, 1, 0, 0); |
| } |
| } |
| |
| dzn_cmd_buffer_blit_issue_barriers(cmdbuf, |
| src, info->srcImageLayout, ®ion->srcSubresource, |
| dst, info->dstImageLayout, ®ion->dstSubresource, |
| aspect, &restore_src_layout, &restore_dst_layout, true); |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_update_pipeline(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) |
| { |
| const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline; |
| |
| if (!pipeline) |
| return; |
| |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| ID3D12PipelineState *old_pipeline_state = |
| cmdbuf->state.pipeline ? cmdbuf->state.pipeline->state : NULL; |
| |
| uint32_t view_instance_mask = 0; |
| if (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_PIPELINE) { |
| if (cmdbuf->state.bindpoint[bindpoint].root_sig != pipeline->root.sig) { |
| cmdbuf->state.bindpoint[bindpoint].root_sig = pipeline->root.sig; |
| /* Changing root signature always requires re-binding descriptor heaps */ |
| cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_HEAPS; |
| |
| if (device->bindless) { |
| /* Note: The D3D12 spec for descriptor heap indexing requires that the descriptor heaps |
| * are bound *before* the root signature. */ |
| bool bind_heaps = false; |
| dzn_foreach_pool_type(type) { |
| if (cmdbuf->state.heaps[type] != &device->device_heaps[type].heap) { |
| bind_heaps = true; |
| cmdbuf->state.heaps[type] = &device->device_heaps[type].heap; |
| } |
| } |
| if (bind_heaps) { |
| ID3D12DescriptorHeap *heaps[NUM_POOL_TYPES]; |
| dzn_foreach_pool_type(type) |
| heaps[type] = cmdbuf->state.heaps[type]->heap; |
| ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, NUM_POOL_TYPES, heaps); |
| } |
| } |
| |
| if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) |
| ID3D12GraphicsCommandList1_SetGraphicsRootSignature(cmdbuf->cmdlist, pipeline->root.sig); |
| else |
| ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, pipeline->root.sig); |
| } |
| if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { |
| struct dzn_graphics_pipeline *gfx = |
| (struct dzn_graphics_pipeline *)pipeline; |
| ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, gfx->ia.topology); |
| dzn_graphics_pipeline_get_state(gfx, &cmdbuf->state.pipeline_variant); |
| if (gfx->multiview.native_view_instancing) |
| view_instance_mask = gfx->multiview.view_mask; |
| else |
| view_instance_mask = 1; |
| |
| if (gfx->zsa.dynamic_depth_bias && gfx->use_gs_for_polygon_mode_point) |
| cmdbuf->state.bindpoint[bindpoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS; |
| } |
| } |
| |
| ID3D12PipelineState *new_pipeline_state = pipeline->state; |
| |
| if (old_pipeline_state != new_pipeline_state) { |
| ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, pipeline->state); |
| cmdbuf->state.pipeline = pipeline; |
| } |
| |
| /* Deferring this until after the pipeline has been set due to an NVIDIA driver bug |
| * when view instancing mask is set with no pipeline bound. */ |
| if (view_instance_mask) |
| ID3D12GraphicsCommandList1_SetViewInstanceMask(cmdbuf->cmdlist, view_instance_mask); |
| } |
| |
| static void |
| dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| struct dzn_descriptor_state *desc_state = |
| &cmdbuf->state.bindpoint[bindpoint].desc_state; |
| struct dzn_descriptor_heap *new_heaps[NUM_POOL_TYPES] = { |
| desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV], |
| desc_state->heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] |
| }; |
| uint32_t new_heap_offsets[NUM_POOL_TYPES] = { 0 }; |
| bool update_root_desc_table[NUM_POOL_TYPES] = { 0 }; |
| const struct dzn_pipeline *pipeline = |
| cmdbuf->state.bindpoint[bindpoint].pipeline; |
| |
| /* The set of dirty bits that are cleared by running this function. Notably, |
| * for bindless, descriptor sets that are bound but unused by the currently |
| * set pipeline are not processed, meaning their dirty bits should persist |
| * until such a point as a pipeline does use them. For not-bindless, |
| * all sets are processed. */ |
| uint32_t dirty_bits_bindless = |
| (pipeline->dynamic_buffer_count ? DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS : 0) | |
| (((DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << pipeline->set_count) - 1) & DZN_CMD_BINDPOINT_DIRTY_DESC_SETS); |
| uint32_t dirty_bits = (device->bindless ? dirty_bits_bindless : DZN_CMD_BINDPOINT_DIRTY_DESC_SETS | DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS); |
| if (!(cmdbuf->state.bindpoint[bindpoint].dirty & dirty_bits)) |
| return; |
| |
| dzn_foreach_pool_type (type) { |
| if (device->bindless) { |
| new_heaps[type] = &device->device_heaps[type].heap; |
| } else { |
| uint32_t desc_count = pipeline->desc_count[type]; |
| if (!desc_count) |
| continue; |
| |
| struct dzn_descriptor_heap_pool *pool = |
| type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ? |
| &cmdbuf->cbv_srv_uav_pool : &cmdbuf->sampler_pool; |
| struct dzn_descriptor_heap *dst_heap = NULL; |
| uint32_t dst_heap_offset = 0; |
| |
| dzn_descriptor_heap_pool_alloc_slots(pool, device, desc_count, |
| &dst_heap, &dst_heap_offset); |
| new_heap_offsets[type] = dst_heap_offset; |
| update_root_desc_table[type] = true; |
| |
| for (uint32_t s = 0; s < MAX_SETS; s++) { |
| const struct dzn_descriptor_set *set = desc_state->sets[s].set; |
| if (!set) continue; |
| |
| uint32_t set_heap_offset = pipeline->sets[s].heap_offsets[type]; |
| uint32_t set_desc_count = MIN2(pipeline->sets[s].range_desc_count[type], set->heap_sizes[type]); |
| if (set_desc_count) { |
| dzn_descriptor_heap_copy(device, dst_heap, dst_heap_offset + set_heap_offset, |
| &set->pool->heaps[type], set->heap_offsets[type], |
| set_desc_count, type); |
| } |
| |
| if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) { |
| uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count; |
| for (uint32_t o = 0; o < dynamic_buffer_count; o++) { |
| struct dzn_buffer_desc bdesc = set->dynamic_buffers[o]; |
| if (!bdesc.buffer) |
| continue; |
| bdesc.offset += desc_state->sets[s].dynamic_offsets[o]; |
| |
| bool primary_is_writable = bdesc.type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; |
| uint32_t desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary; |
| dzn_descriptor_heap_write_buffer_desc(device, dst_heap, |
| dst_heap_offset + set_heap_offset + desc_heap_offset, |
| primary_is_writable, &bdesc); |
| |
| if (pipeline->sets[s].dynamic_buffer_heap_offsets[o].alt != ~0) { |
| assert(primary_is_writable); |
| desc_heap_offset = pipeline->sets[s].dynamic_buffer_heap_offsets[o].alt; |
| dzn_descriptor_heap_write_buffer_desc(device, dst_heap, |
| dst_heap_offset + set_heap_offset + desc_heap_offset, |
| false, &bdesc); |
| } |
| } |
| } |
| } |
| |
| new_heaps[type] = dst_heap; |
| } |
| } |
| |
| if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] || |
| new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) { |
| ID3D12DescriptorHeap *desc_heaps[2]; |
| uint32_t num_desc_heaps = 0; |
| if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]) |
| desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]->heap; |
| if (new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]) |
| desc_heaps[num_desc_heaps++] = new_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]->heap; |
| ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, num_desc_heaps, desc_heaps); |
| |
| for (unsigned h = 0; h < ARRAY_SIZE(cmdbuf->state.heaps); h++) |
| cmdbuf->state.heaps[h] = new_heaps[h]; |
| } |
| |
| if (!device->bindless) { |
| for (uint32_t r = 0; r < pipeline->root.sets_param_count; r++) { |
| D3D12_DESCRIPTOR_HEAP_TYPE type = pipeline->root.type[r]; |
| |
| if (!update_root_desc_table[type]) |
| continue; |
| |
| D3D12_GPU_DESCRIPTOR_HANDLE handle = |
| dzn_descriptor_heap_get_gpu_handle(new_heaps[type], new_heap_offsets[type]); |
| |
| if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) |
| ID3D12GraphicsCommandList1_SetGraphicsRootDescriptorTable(cmdbuf->cmdlist, r, handle); |
| else |
| ID3D12GraphicsCommandList1_SetComputeRootDescriptorTable(cmdbuf->cmdlist, r, handle); |
| } |
| } |
| |
| if (device->bindless) { |
| for (uint32_t s = 0; s < pipeline->set_count; ++s) { |
| const struct dzn_descriptor_set *set = desc_state->sets[s].set; |
| if (!set || !set->pool->bindless.buf) |
| continue; |
| |
| uint32_t dirty_bit = DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << s; |
| if (cmdbuf->state.bindpoint[bindpoint].dirty & dirty_bit) { |
| uint64_t gpuva = set->pool->bindless.gpuva + (set->heap_offsets[0] * sizeof(struct dxil_spirv_bindless_entry)); |
| if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) |
| ID3D12GraphicsCommandList1_SetGraphicsRootShaderResourceView(cmdbuf->cmdlist, s, gpuva); |
| else |
| ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, s, gpuva); |
| } |
| } |
| if (pipeline->dynamic_buffer_count && |
| (cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS)) { |
| ID3D12Resource *dynamic_buffer_buf = NULL; |
| uint64_t dynamic_buffer_buf_offset; |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(struct dxil_spirv_bindless_entry) * pipeline->dynamic_buffer_count, |
| DZN_INTERNAL_BUF_UPLOAD, |
| D3D12_RESOURCE_STATE_GENERIC_READ, |
| D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT, |
| &dynamic_buffer_buf, |
| &dynamic_buffer_buf_offset); |
| if (result != VK_SUCCESS) |
| return; |
| |
| uint64_t gpuva = ID3D12Resource_GetGPUVirtualAddress(dynamic_buffer_buf) + dynamic_buffer_buf_offset; |
| struct dxil_spirv_bindless_entry *map; |
| ID3D12Resource_Map(dynamic_buffer_buf, 0, NULL, (void **)&map); |
| map += (dynamic_buffer_buf_offset / sizeof(*map)); |
| |
| for (uint32_t s = 0; s < MAX_SETS; ++s) { |
| const struct dzn_descriptor_set *set = desc_state->sets[s].set; |
| if (!set) |
| continue; |
| |
| uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count; |
| for (uint32_t o = 0; o < dynamic_buffer_count; o++) { |
| const struct dzn_buffer_desc *bdesc = &set->dynamic_buffers[o]; |
| volatile struct dxil_spirv_bindless_entry *map_entry = &map[pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary]; |
| struct dzn_buffer_desc bdesc_updated = *bdesc; |
| bdesc_updated.offset += cmdbuf->state.bindpoint[bindpoint].desc_state.sets[s].dynamic_offsets[o]; |
| dzn_buffer_get_bindless_buffer_descriptor(device, &bdesc_updated, map_entry); |
| } |
| } |
| |
| ID3D12Resource_Unmap(dynamic_buffer_buf, 0, NULL); |
| if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) |
| ID3D12GraphicsCommandList1_SetGraphicsRootShaderResourceView(cmdbuf->cmdlist, |
| pipeline->root.dynamic_buffer_bindless_param_idx, |
| gpuva); |
| else |
| ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, |
| pipeline->root.dynamic_buffer_bindless_param_idx, |
| gpuva); |
| } |
| } |
| |
| cmdbuf->state.bindpoint[bindpoint].dirty &= ~dirty_bits; |
| } |
| |
| static void |
| dzn_cmd_buffer_update_sysvals(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) |
| { |
| if (!(cmdbuf->state.bindpoint[bindpoint].dirty & DZN_CMD_BINDPOINT_DIRTY_SYSVALS)) |
| return; |
| |
| const struct dzn_pipeline *pipeline = cmdbuf->state.bindpoint[bindpoint].pipeline; |
| uint32_t sysval_cbv_param_idx = pipeline->root.sysval_cbv_param_idx; |
| |
| if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { |
| ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx, |
| sizeof(cmdbuf->state.sysvals.gfx) / 4, |
| &cmdbuf->state.sysvals.gfx, 0); |
| } else { |
| ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, sysval_cbv_param_idx, |
| sizeof(cmdbuf->state.sysvals.compute) / 4, |
| &cmdbuf->state.sysvals.compute, 0); |
| } |
| |
| cmdbuf->state.bindpoint[bindpoint].dirty &= ~DZN_CMD_BINDPOINT_DIRTY_SYSVALS; |
| } |
| |
| static void |
| dzn_cmd_buffer_update_viewports(struct dzn_cmd_buffer *cmdbuf) |
| { |
| const struct dzn_graphics_pipeline *pipeline = |
| (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; |
| |
| if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_VIEWPORTS) || |
| !pipeline->vp.count) |
| return; |
| |
| ID3D12GraphicsCommandList1_RSSetViewports(cmdbuf->cmdlist, pipeline->vp.count, cmdbuf->state.viewports); |
| } |
| |
| static void |
| dzn_cmd_buffer_update_scissors(struct dzn_cmd_buffer *cmdbuf) |
| { |
| const struct dzn_graphics_pipeline *pipeline = |
| (const struct dzn_graphics_pipeline *)cmdbuf->state.pipeline; |
| |
| if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_SCISSORS)) |
| return; |
| |
| if (!pipeline->scissor.count) { |
| /* Apply a scissor delimiting the render area. */ |
| ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, 1, &cmdbuf->state.render.area); |
| return; |
| } |
| |
| D3D12_RECT scissors[MAX_SCISSOR]; |
| |
| memcpy(scissors, cmdbuf->state.scissors, sizeof(D3D12_RECT) * pipeline->scissor.count); |
| for (uint32_t i = 0; i < pipeline->scissor.count; i++) { |
| scissors[i].left = MAX2(scissors[i].left, cmdbuf->state.render.area.left); |
| scissors[i].top = MAX2(scissors[i].top, cmdbuf->state.render.area.top); |
| scissors[i].right = MIN2(scissors[i].right, cmdbuf->state.render.area.right); |
| scissors[i].bottom = MIN2(scissors[i].bottom, cmdbuf->state.render.area.bottom); |
| } |
| |
| ID3D12GraphicsCommandList1_RSSetScissorRects(cmdbuf->cmdlist, pipeline->scissor.count, scissors); |
| } |
| |
| static void |
| dzn_cmd_buffer_update_vbviews(struct dzn_cmd_buffer *cmdbuf) |
| { |
| unsigned start, end; |
| |
| BITSET_FOREACH_RANGE(start, end, cmdbuf->state.vb.dirty, MAX_VBS) |
| ID3D12GraphicsCommandList1_IASetVertexBuffers(cmdbuf->cmdlist, start, end - start, &cmdbuf->state.vb.views[start]); |
| |
| BITSET_CLEAR_RANGE(cmdbuf->state.vb.dirty, 0, MAX_VBS); |
| } |
| |
| static void |
| dzn_cmd_buffer_update_ibview(struct dzn_cmd_buffer *cmdbuf) |
| { |
| if (!(cmdbuf->state.dirty & DZN_CMD_DIRTY_IB)) |
| return; |
| |
| ID3D12GraphicsCommandList1_IASetIndexBuffer(cmdbuf->cmdlist, &cmdbuf->state.ib.view); |
| } |
| |
| static void |
| dzn_cmd_buffer_update_push_constants(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) |
| { |
| struct dzn_cmd_buffer_push_constant_state *state = |
| bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS ? |
| &cmdbuf->state.push_constant.gfx : &cmdbuf->state.push_constant.compute; |
| |
| uint32_t offset = state->offset / 4; |
| uint32_t end = ALIGN(state->end, 4) / 4; |
| uint32_t count = end - offset; |
| |
| if (!count) |
| return; |
| |
| uint32_t slot = cmdbuf->state.pipeline->root.push_constant_cbv_param_idx; |
| uint32_t *vals = state->values + offset; |
| |
| if (bindpoint == VK_PIPELINE_BIND_POINT_GRAPHICS) |
| ID3D12GraphicsCommandList1_SetGraphicsRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset); |
| else |
| ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, slot, count, vals, offset); |
| |
| state->offset = 0; |
| state->end = 0; |
| } |
| |
| static void |
| dzn_cmd_buffer_update_zsa(struct dzn_cmd_buffer *cmdbuf) |
| { |
| struct dzn_physical_device *pdev = |
| container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk); |
| if (cmdbuf->state.dirty & DZN_CMD_DIRTY_STENCIL_REF) { |
| const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *) |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; |
| if (cmdbuf->cmdlist8 && |
| pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) { |
| ID3D12GraphicsCommandList8_OMSetFrontAndBackStencilRef(cmdbuf->cmdlist8, |
| cmdbuf->state.zsa.stencil_test.front.ref, |
| cmdbuf->state.zsa.stencil_test.back.ref); |
| } else { |
| uint32_t ref = |
| gfx->zsa.stencil_test.front.uses_ref ? |
| cmdbuf->state.zsa.stencil_test.front.ref : |
| cmdbuf->state.zsa.stencil_test.back.ref; |
| ID3D12GraphicsCommandList1_OMSetStencilRef(cmdbuf->cmdlist, ref); |
| } |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_update_blend_constants(struct dzn_cmd_buffer *cmdbuf) |
| { |
| if (cmdbuf->state.dirty & DZN_CMD_DIRTY_BLEND_CONSTANTS) |
| ID3D12GraphicsCommandList1_OMSetBlendFactor(cmdbuf->cmdlist, |
| cmdbuf->state.blend.constants); |
| } |
| |
| static void |
| dzn_cmd_buffer_update_depth_bounds(struct dzn_cmd_buffer *cmdbuf) |
| { |
| if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BOUNDS) { |
| ID3D12GraphicsCommandList1_OMSetDepthBounds(cmdbuf->cmdlist, |
| cmdbuf->state.zsa.depth_bounds.min, |
| cmdbuf->state.zsa.depth_bounds.max); |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_update_depth_bias(struct dzn_cmd_buffer *cmdbuf) |
| { |
| if (cmdbuf->state.dirty & DZN_CMD_DIRTY_DEPTH_BIAS) { |
| assert(cmdbuf->cmdlist9); |
| ID3D12GraphicsCommandList9_RSSetDepthBias(cmdbuf->cmdlist9, |
| cmdbuf->state.pipeline_variant.depth_bias.constant_factor, |
| cmdbuf->state.pipeline_variant.depth_bias.clamp, |
| cmdbuf->state.pipeline_variant.depth_bias.slope_factor); |
| } |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_triangle_fan_create_index(struct dzn_cmd_buffer *cmdbuf, uint32_t *vertex_count) |
| { |
| uint8_t index_size = *vertex_count <= 0xffff ? 2 : 4; |
| uint32_t triangle_count = MAX2(*vertex_count, 2) - 2; |
| |
| *vertex_count = triangle_count * 3; |
| if (!*vertex_count) |
| return VK_SUCCESS; |
| |
| ID3D12Resource *index_buf; |
| uint64_t index_offset; |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *vertex_count * index_size, |
| DZN_INTERNAL_BUF_UPLOAD, |
| D3D12_RESOURCE_STATE_GENERIC_READ, |
| index_size, |
| &index_buf, |
| &index_offset); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| void *cpu_ptr; |
| ID3D12Resource_Map(index_buf, 0, NULL, &cpu_ptr); |
| cpu_ptr = (uint8_t *)cpu_ptr + index_offset; |
| |
| /* TODO: VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT */ |
| if (index_size == 2) { |
| uint16_t *indices = (uint16_t *)cpu_ptr; |
| for (uint32_t t = 0; t < triangle_count; t++) { |
| indices[t * 3] = t + 1; |
| indices[(t * 3) + 1] = t + 2; |
| indices[(t * 3) + 2] = 0; |
| } |
| cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT; |
| } else { |
| uint32_t *indices = (uint32_t *)cpu_ptr; |
| for (uint32_t t = 0; t < triangle_count; t++) { |
| indices[t * 3] = t + 1; |
| indices[(t * 3) + 1] = t + 2; |
| indices[(t * 3) + 2] = 0; |
| } |
| cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; |
| } |
| |
| cmdbuf->state.ib.view.SizeInBytes = *vertex_count * index_size; |
| cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buf) + index_offset; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; |
| return VK_SUCCESS; |
| } |
| |
| static VkResult |
| dzn_cmd_buffer_triangle_fan_rewrite_index(struct dzn_cmd_buffer *cmdbuf, |
| uint32_t *index_count, |
| uint32_t *first_index) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| uint32_t triangle_count = MAX2(*index_count, 2) - 2; |
| |
| *index_count = triangle_count * 3; |
| if (!*index_count) |
| return VK_SUCCESS; |
| |
| /* New index is always 32bit to make the compute shader rewriting the |
| * index simpler */ |
| ID3D12Resource *new_index_buf; |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, *index_count * 4, |
| DZN_INTERNAL_BUF_DEFAULT, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS, |
| 4, |
| &new_index_buf, |
| NULL); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| D3D12_GPU_VIRTUAL_ADDRESS old_index_buf_gpu = |
| cmdbuf->state.ib.view.BufferLocation; |
| |
| ASSERTED const struct dzn_graphics_pipeline *gfx_pipeline = (const struct dzn_graphics_pipeline *) |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; |
| ASSERTED bool prim_restart = |
| dzn_graphics_pipeline_get_desc_template(gfx_pipeline, ib_strip_cut) != NULL; |
| |
| assert(!prim_restart); |
| |
| enum dzn_index_type index_type = |
| dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, false); |
| const struct dzn_meta_triangle_fan_rewrite_index *rewrite_index = |
| &device->triangle_fan[index_type]; |
| |
| struct dzn_triangle_fan_rewrite_index_params params = { |
| .first_index = *first_index, |
| }; |
| |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL; |
| ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig); |
| ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state); |
| ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, 0, ID3D12Resource_GetGPUVirtualAddress(new_index_buf)); |
| ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, 1, sizeof(params) / 4, |
| ¶ms, 0); |
| ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, 2, old_index_buf_gpu); |
| ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, triangle_count, 1, 1); |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_buffer_barrier(cmdbuf, new_index_buf, |
| D3D12_BARRIER_SYNC_COMPUTE_SHADING, D3D12_BARRIER_SYNC_INDEX_INPUT, |
| D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, D3D12_BARRIER_ACCESS_INDEX_BUFFER); |
| } else { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, new_index_buf, 0, 1, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS, |
| D3D12_RESOURCE_STATE_INDEX_BUFFER, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| /* We don't mess up with the driver state when executing our internal |
| * compute shader, but we still change the D3D12 state, so let's mark |
| * things dirty if needed. |
| */ |
| cmdbuf->state.pipeline = NULL; |
| if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) { |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| } |
| |
| cmdbuf->state.ib.view.SizeInBytes = *index_count * 4; |
| cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(new_index_buf); |
| cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; |
| *first_index = 0; |
| return VK_SUCCESS; |
| } |
| |
| static void |
| dzn_cmd_buffer_prepare_draw(struct dzn_cmd_buffer *cmdbuf, bool indexed) |
| { |
| if (indexed) |
| dzn_cmd_buffer_update_ibview(cmdbuf); |
| |
| dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); |
| dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); |
| dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); |
| dzn_cmd_buffer_update_viewports(cmdbuf); |
| dzn_cmd_buffer_update_scissors(cmdbuf); |
| dzn_cmd_buffer_update_vbviews(cmdbuf); |
| dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); |
| dzn_cmd_buffer_update_zsa(cmdbuf); |
| dzn_cmd_buffer_update_blend_constants(cmdbuf); |
| dzn_cmd_buffer_update_depth_bounds(cmdbuf); |
| dzn_cmd_buffer_update_depth_bias(cmdbuf); |
| |
| /* Reset the dirty states */ |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty &= DZN_CMD_BINDPOINT_DIRTY_HEAPS; |
| cmdbuf->state.dirty = 0; |
| } |
| |
| static uint32_t |
| dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(struct dzn_cmd_buffer *cmdbuf, bool indexed) |
| { |
| struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *) |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; |
| |
| if (!pipeline->ia.triangle_fan) |
| return 0; |
| |
| uint32_t max_triangles; |
| |
| if (indexed) { |
| uint32_t index_size = cmdbuf->state.ib.view.Format == DXGI_FORMAT_R32_UINT ? 4 : 2; |
| uint32_t max_indices = cmdbuf->state.ib.view.SizeInBytes / index_size; |
| |
| max_triangles = MAX2(max_indices, 2) - 2; |
| } else { |
| uint32_t max_vertex = 0; |
| for (uint32_t i = 0; i < pipeline->vb.count; i++) { |
| max_vertex = |
| MAX2(max_vertex, |
| cmdbuf->state.vb.views[i].SizeInBytes / cmdbuf->state.vb.views[i].StrideInBytes); |
| } |
| |
| max_triangles = MAX2(max_vertex, 2) - 2; |
| } |
| |
| return max_triangles * 3; |
| } |
| |
| static void |
| dzn_cmd_buffer_patch_indirect_draw(struct dzn_cmd_buffer *cmdbuf, |
| struct dzn_indirect_draw_type draw_type, |
| ID3D12Resource **inout_draw_buf, |
| size_t *inout_draw_buf_offset, |
| ID3D12Resource **inout_count_buf, |
| size_t *inout_count_buf_offset, |
| uint32_t max_draw_count, |
| uint32_t *inout_draw_buf_stride, |
| bool *inout_indexed) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| uint32_t triangle_fan_index_buf_stride = |
| dzn_cmd_buffer_triangle_fan_get_max_index_buf_size(cmdbuf, *inout_indexed) * |
| sizeof(uint32_t); |
| |
| uint32_t min_draw_buf_stride = *inout_indexed ? sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) : sizeof(D3D12_DRAW_ARGUMENTS); |
| |
| uint32_t exec_buf_stride = |
| (draw_type.triangle_fan ? sizeof(D3D12_INDEX_BUFFER_VIEW) : 0) + |
| (draw_type.draw_params ? sizeof(uint32_t) * 2 : 0) + |
| (draw_type.draw_id ? sizeof(uint32_t) : 0) + |
| min_draw_buf_stride; |
| uint32_t triangle_fan_exec_buf_stride = |
| sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params); |
| uint32_t exec_buf_size = max_draw_count * exec_buf_stride; |
| uint32_t exec_buf_draw_offset = 0; |
| |
| // We reserve the first slot for the draw_count value when indirect count is |
| // involved. |
| if (*inout_count_buf != NULL) { |
| exec_buf_size += exec_buf_stride; |
| exec_buf_draw_offset = exec_buf_stride; |
| } |
| |
| ID3D12Resource *exec_buf; |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, exec_buf_size, |
| DZN_INTERNAL_BUF_DEFAULT, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS, |
| 0, |
| &exec_buf, NULL); |
| if (result != VK_SUCCESS) |
| return; |
| |
| D3D12_GPU_VIRTUAL_ADDRESS draw_buf_gpu = |
| ID3D12Resource_GetGPUVirtualAddress(*inout_draw_buf) + *inout_draw_buf_offset; |
| ID3D12Resource *triangle_fan_index_buf = NULL; |
| ID3D12Resource *triangle_fan_exec_buf = NULL; |
| |
| if (triangle_fan_index_buf_stride) { |
| result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, |
| max_draw_count * triangle_fan_index_buf_stride, |
| DZN_INTERNAL_BUF_DEFAULT, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS, |
| 0, |
| &triangle_fan_index_buf, NULL); |
| if (result != VK_SUCCESS) |
| return; |
| |
| result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, |
| max_draw_count * triangle_fan_exec_buf_stride, |
| DZN_INTERNAL_BUF_DEFAULT, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS, |
| 0, |
| &triangle_fan_exec_buf, NULL); |
| if (result != VK_SUCCESS) |
| return; |
| } |
| |
| struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params params = { |
| .draw_buf_stride = *inout_draw_buf_stride, |
| .triangle_fan_index_buf_stride = triangle_fan_index_buf_stride, |
| .triangle_fan_index_buf_start = |
| triangle_fan_index_buf ? |
| ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf) : 0, |
| .exec_buf_start = |
| draw_type.triangle_fan_primitive_restart ? |
| ID3D12Resource_GetGPUVirtualAddress(exec_buf) + exec_buf_draw_offset : 0, |
| }; |
| uint32_t params_size; |
| if (draw_type.triangle_fan_primitive_restart) |
| params_size = sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params); |
| else if (draw_type.triangle_fan) |
| params_size = sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params); |
| else |
| params_size = sizeof(struct dzn_indirect_draw_rewrite_params); |
| |
| struct dzn_meta_indirect_draw *indirect_draw = &device->indirect_draws[draw_type.value]; |
| uint32_t root_param_idx = 0; |
| |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL; |
| ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, indirect_draw->root_sig); |
| ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, indirect_draw->pipeline_state); |
| ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++, |
| params_size / 4, (const void *)¶ms, 0); |
| ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, root_param_idx++, |
| draw_buf_gpu); |
| ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++, |
| ID3D12Resource_GetGPUVirtualAddress(exec_buf)); |
| if (*inout_count_buf) { |
| ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, |
| root_param_idx++, |
| ID3D12Resource_GetGPUVirtualAddress(*inout_count_buf) + |
| *inout_count_buf_offset); |
| } |
| |
| if (triangle_fan_exec_buf) { |
| ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, |
| root_param_idx++, |
| ID3D12Resource_GetGPUVirtualAddress(triangle_fan_exec_buf)); |
| } |
| |
| ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, max_draw_count, 1, 1); |
| |
| D3D12_BUFFER_BARRIER buf_barriers[2]; |
| D3D12_BARRIER_GROUP enhanced_barriers = { |
| .NumBarriers = 0, |
| .Type = D3D12_BARRIER_TYPE_BUFFER, |
| .pBufferBarriers = buf_barriers |
| }; |
| |
| if (triangle_fan_exec_buf) { |
| enum dzn_index_type index_type = |
| *inout_indexed ? |
| dzn_index_type_from_dxgi_format(cmdbuf->state.ib.view.Format, draw_type.triangle_fan_primitive_restart) : |
| DZN_NO_INDEX; |
| struct dzn_meta_triangle_fan_rewrite_index *rewrite_index = |
| &device->triangle_fan[index_type]; |
| |
| struct dzn_triangle_fan_rewrite_index_params rewrite_index_params = { 0 }; |
| |
| assert(rewrite_index->root_sig); |
| assert(rewrite_index->pipeline_state); |
| assert(rewrite_index->cmd_sig); |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_buffer_barrier(cmdbuf, triangle_fan_exec_buf, |
| D3D12_BARRIER_SYNC_COMPUTE_SHADING, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT, |
| D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT); |
| } |
| else { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_exec_buf, 0, 1, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS, |
| D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].root_sig = NULL; |
| ID3D12GraphicsCommandList1_SetComputeRootSignature(cmdbuf->cmdlist, rewrite_index->root_sig); |
| ID3D12GraphicsCommandList1_SetPipelineState(cmdbuf->cmdlist, rewrite_index->pipeline_state); |
| root_param_idx = 0; |
| ID3D12GraphicsCommandList1_SetComputeRootUnorderedAccessView(cmdbuf->cmdlist, root_param_idx++, |
| ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf)); |
| ID3D12GraphicsCommandList1_SetComputeRoot32BitConstants(cmdbuf->cmdlist, root_param_idx++, |
| sizeof(rewrite_index_params) / 4, |
| (const void *)&rewrite_index_params, 0); |
| |
| if (*inout_indexed) { |
| ID3D12GraphicsCommandList1_SetComputeRootShaderResourceView(cmdbuf->cmdlist, |
| root_param_idx++, |
| cmdbuf->state.ib.view.BufferLocation); |
| } |
| |
| ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, rewrite_index->cmd_sig, |
| max_draw_count, triangle_fan_exec_buf, 0, |
| *inout_count_buf ? exec_buf : NULL, 0); |
| |
| if (cmdbuf->enhanced_barriers) { |
| buf_barriers[enhanced_barriers.NumBarriers++] = (D3D12_BUFFER_BARRIER){ |
| .SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING, |
| .SyncAfter = D3D12_BARRIER_SYNC_INDEX_INPUT, |
| .AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, |
| .AccessAfter = D3D12_BARRIER_ACCESS_INDEX_BUFFER, |
| .pResource = triangle_fan_index_buf, |
| .Offset = 0, .Size = UINT64_MAX |
| }; |
| } |
| else { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, triangle_fan_index_buf, 0, 1, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS, |
| D3D12_RESOURCE_STATE_INDEX_BUFFER, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| /* After our triangle-fan lowering the draw is indexed */ |
| *inout_indexed = true; |
| cmdbuf->state.ib.view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(triangle_fan_index_buf); |
| cmdbuf->state.ib.view.SizeInBytes = triangle_fan_index_buf_stride; |
| cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; |
| } |
| |
| if (cmdbuf->enhanced_barriers) { |
| buf_barriers[enhanced_barriers.NumBarriers++] = (D3D12_BUFFER_BARRIER){ |
| .SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING, |
| .SyncAfter = D3D12_BARRIER_SYNC_EXECUTE_INDIRECT, |
| .AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, |
| .AccessAfter = D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT, |
| .pResource = exec_buf, |
| .Offset = 0, .Size = UINT64_MAX |
| }; |
| ID3D12GraphicsCommandList8_Barrier(cmdbuf->cmdlist8, 1, &enhanced_barriers); |
| } |
| else { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS, |
| D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| /* We don't mess up with the driver state when executing our internal |
| * compute shader, but we still change the D3D12 state, so let's mark |
| * things dirty if needed. |
| */ |
| cmdbuf->state.pipeline = NULL; |
| if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline) { |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| } |
| |
| *inout_draw_buf = exec_buf; |
| *inout_draw_buf_offset = exec_buf_draw_offset; |
| if (*inout_count_buf) { |
| *inout_count_buf = exec_buf; |
| *inout_count_buf_offset = 0; |
| } |
| *inout_draw_buf_stride = exec_buf_stride; |
| } |
| |
| static void |
| dzn_cmd_buffer_indirect_draw(struct dzn_cmd_buffer *cmdbuf, |
| ID3D12Resource *draw_buf, |
| size_t draw_buf_offset, |
| ID3D12Resource *count_buf, |
| size_t count_buf_offset, |
| uint32_t max_draw_count, |
| uint32_t draw_buf_stride, |
| bool indexed) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); |
| struct dzn_graphics_pipeline *pipeline = (struct dzn_graphics_pipeline *) |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; |
| uint32_t min_draw_buf_stride = |
| indexed ? |
| sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) : |
| sizeof(D3D12_DRAW_ARGUMENTS); |
| bool prim_restart = |
| dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut) != NULL; |
| |
| draw_buf_stride = draw_buf_stride ? draw_buf_stride : min_draw_buf_stride; |
| assert(draw_buf_stride >= min_draw_buf_stride); |
| assert((draw_buf_stride & 3) == 0); |
| |
| D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; |
| |
| struct dzn_indirect_draw_type draw_type; |
| draw_type.value = 0; |
| draw_type.indexed = indexed; |
| draw_type.indirect_count = count_buf != NULL; |
| draw_type.draw_params = pipeline->needs_draw_sysvals && !pdev->options21.ExtendedCommandInfoSupported; |
| draw_type.draw_id = max_draw_count > 1 && pdev->options21.ExecuteIndirectTier < D3D12_EXECUTE_INDIRECT_TIER_1_1; |
| draw_type.triangle_fan = pipeline->ia.triangle_fan; |
| draw_type.triangle_fan_primitive_restart = draw_type.triangle_fan && prim_restart; |
| |
| if (draw_type.draw_params || draw_type.draw_id || draw_type.triangle_fan) { |
| dzn_cmd_buffer_patch_indirect_draw(cmdbuf, draw_type, |
| &draw_buf, &draw_buf_offset, |
| &count_buf, &count_buf_offset, |
| max_draw_count, &draw_buf_stride, &indexed); |
| } |
| |
| |
| struct dzn_indirect_draw_cmd_sig_key cmd_sig_key; |
| memset(&cmd_sig_key, 0, sizeof(cmd_sig_key)); |
| cmd_sig_key.indexed = indexed; |
| cmd_sig_key.triangle_fan = draw_type.triangle_fan; |
| cmd_sig_key.draw_params = draw_type.draw_params; |
| cmd_sig_key.draw_id = max_draw_count > 1; |
| cmd_sig_key.custom_stride = draw_buf_stride; |
| ID3D12CommandSignature *cmdsig = |
| dzn_graphics_pipeline_get_indirect_cmd_sig(pipeline, cmd_sig_key); |
| |
| if (!cmdsig) { |
| vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY); |
| return; |
| } |
| |
| cmdbuf->state.sysvals.gfx.first_vertex = 0; |
| cmdbuf->state.sysvals.gfx.base_instance = 0; |
| cmdbuf->state.sysvals.gfx.is_indexed_draw = indexed; |
| |
| uint32_t view_mask = pipeline->multiview.native_view_instancing ? |
| 1 : pipeline->multiview.view_mask; |
| u_foreach_bit(view, view_mask) { |
| cmdbuf->state.sysvals.gfx.view_index = view; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_SYSVALS; |
| |
| dzn_cmd_buffer_prepare_draw(cmdbuf, indexed); |
| |
| ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, |
| max_draw_count, |
| draw_buf, draw_buf_offset, |
| count_buf, count_buf_offset); |
| } |
| |
| /* Restore the old IB view if we modified it during the triangle fan lowering */ |
| if (draw_type.triangle_fan) { |
| cmdbuf->state.ib.view = ib_view; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_prepare_dispatch(struct dzn_cmd_buffer *cmdbuf) |
| { |
| dzn_cmd_buffer_update_pipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); |
| dzn_cmd_buffer_update_heaps(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); |
| dzn_cmd_buffer_update_sysvals(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); |
| dzn_cmd_buffer_update_push_constants(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE); |
| |
| /* Reset the dirty states */ |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty &= DZN_CMD_BINDPOINT_DIRTY_HEAPS; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdCopyBuffer2(VkCommandBuffer commandBuffer, |
| const VkCopyBufferInfo2 *info) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_buffer, src_buffer, info->srcBuffer); |
| VK_FROM_HANDLE(dzn_buffer, dst_buffer, info->dstBuffer); |
| |
| for (int i = 0; i < info->regionCount; i++) { |
| const VkBufferCopy2 *region = info->pRegions + i; |
| |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, dst_buffer->res, region->dstOffset, |
| src_buffer->res, region->srcOffset, |
| region->size); |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, |
| const VkCopyBufferToImageInfo2 *info) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| for (int i = 0; i < info->regionCount; i++) { |
| const VkBufferImageCopy2 *region = info->pRegions + i; |
| |
| dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) { |
| for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++) |
| dzn_cmd_buffer_copy_buf2img_region(cmdbuf, info, i, aspect, l); |
| } |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, |
| const VkCopyImageToBufferInfo2 *info) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| for (int i = 0; i < info->regionCount; i++) { |
| const VkBufferImageCopy2 *region = info->pRegions + i; |
| |
| dzn_foreach_aspect(aspect, region->imageSubresource.aspectMask) { |
| for (uint32_t l = 0; l < region->imageSubresource.layerCount; l++) |
| dzn_cmd_buffer_copy_img2buf_region(cmdbuf, info, i, aspect, l); |
| } |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdCopyImage2(VkCommandBuffer commandBuffer, |
| const VkCopyImageInfo2 *info) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); |
| VK_FROM_HANDLE(dzn_image, src, info->srcImage); |
| VK_FROM_HANDLE(dzn_image, dst, info->dstImage); |
| |
| assert(src->vk.samples == dst->vk.samples); |
| |
| bool requires_temp_res = false; |
| |
| for (uint32_t i = 0; i < info->regionCount && !requires_temp_res; i++) { |
| const VkImageCopy2 *region = &info->pRegions[i]; |
| |
| dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { |
| assert(aspect & region->dstSubresource.aspectMask); |
| |
| if (!dzn_image_formats_are_compatible(device, src->vk.format, dst->vk.format, |
| VK_IMAGE_USAGE_TRANSFER_SRC_BIT, aspect) && |
| src->vk.tiling != VK_IMAGE_TILING_LINEAR && |
| dst->vk.tiling != VK_IMAGE_TILING_LINEAR) { |
| requires_temp_res = true; |
| break; |
| } |
| } |
| } |
| |
| bool use_blit = false; |
| if (src->vk.samples > 1) { |
| use_blit = requires_temp_res; |
| |
| for (int i = 0; i < info->regionCount; i++) { |
| const VkImageCopy2 *region = info->pRegions + i; |
| if (region->srcOffset.x != 0 || region->srcOffset.y != 0 || |
| region->extent.width != u_minify(src->vk.extent.width, region->srcSubresource.mipLevel) || |
| region->extent.height != u_minify(src->vk.extent.height, region->srcSubresource.mipLevel) || |
| region->dstOffset.x != 0 || region->dstOffset.y != 0 || |
| region->extent.width != u_minify(dst->vk.extent.width, region->dstSubresource.mipLevel) || |
| region->extent.height != u_minify(dst->vk.extent.height, region->dstSubresource.mipLevel)) |
| use_blit = true; |
| } |
| } |
| |
| if (use_blit) { |
| /* This copy -> blit lowering doesn't work if the vkCmdCopyImage[2]() is |
| * is issued on a transfer queue, but we don't have any better option |
| * right now... |
| */ |
| STACK_ARRAY(VkImageBlit2, blit_regions, info->regionCount); |
| |
| VkBlitImageInfo2 blit_info = { |
| .sType = VK_STRUCTURE_TYPE_BLIT_IMAGE_INFO_2, |
| .srcImage = info->srcImage, |
| .srcImageLayout = info->srcImageLayout, |
| .dstImage = info->dstImage, |
| .dstImageLayout = info->dstImageLayout, |
| .regionCount = info->regionCount, |
| .pRegions = blit_regions, |
| .filter = VK_FILTER_NEAREST, |
| }; |
| |
| for (uint32_t r = 0; r < info->regionCount; r++) { |
| blit_regions[r] = (VkImageBlit2) { |
| .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2, |
| .srcSubresource = info->pRegions[r].srcSubresource, |
| .srcOffsets = { |
| info->pRegions[r].srcOffset, |
| info->pRegions[r].srcOffset, |
| }, |
| .dstSubresource = info->pRegions[r].dstSubresource, |
| .dstOffsets = { |
| info->pRegions[r].dstOffset, |
| info->pRegions[r].dstOffset, |
| }, |
| }; |
| |
| blit_regions[r].srcOffsets[1].x += info->pRegions[r].extent.width; |
| blit_regions[r].srcOffsets[1].y += info->pRegions[r].extent.height; |
| blit_regions[r].srcOffsets[1].z += info->pRegions[r].extent.depth; |
| blit_regions[r].dstOffsets[1].x += info->pRegions[r].extent.width; |
| blit_regions[r].dstOffsets[1].y += info->pRegions[r].extent.height; |
| blit_regions[r].dstOffsets[1].z += info->pRegions[r].extent.depth; |
| } |
| |
| dzn_CmdBlitImage2(commandBuffer, &blit_info); |
| |
| STACK_ARRAY_FINISH(blit_regions); |
| return; |
| } |
| |
| D3D12_TEXTURE_COPY_LOCATION tmp_loc = { 0 }; |
| D3D12_RESOURCE_DESC tmp_desc = { |
| .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, |
| .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, |
| .DepthOrArraySize = 1, |
| .MipLevels = 1, |
| .Format = src->desc.Format, |
| .SampleDesc = { .Count = 1, .Quality = 0 }, |
| .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, |
| .Flags = D3D12_RESOURCE_FLAG_NONE, |
| }; |
| |
| if (requires_temp_res) { |
| ID3D12Device4 *dev = device->dev; |
| VkImageAspectFlags aspect = 0; |
| uint64_t max_size = 0; |
| |
| if (vk_format_has_depth(src->vk.format)) |
| aspect = VK_IMAGE_ASPECT_DEPTH_BIT; |
| else if (vk_format_has_stencil(src->vk.format)) |
| aspect = VK_IMAGE_ASPECT_DEPTH_BIT; |
| else |
| aspect = VK_IMAGE_ASPECT_COLOR_BIT; |
| |
| for (uint32_t i = 0; i < info->regionCount; i++) { |
| const VkImageCopy2 *region = &info->pRegions[i]; |
| uint64_t region_size = 0; |
| |
| tmp_desc.Format = |
| dzn_image_get_dxgi_format(pdev, src->vk.format, |
| VK_IMAGE_USAGE_TRANSFER_DST_BIT, |
| aspect); |
| tmp_desc.Width = region->extent.width; |
| tmp_desc.Height = region->extent.height; |
| |
| ID3D12Device1_GetCopyableFootprints(dev, &src->desc, |
| 0, 1, 0, |
| NULL, NULL, NULL, |
| ®ion_size); |
| max_size = MAX2(max_size, region_size * region->extent.depth); |
| } |
| |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, max_size, |
| DZN_INTERNAL_BUF_DEFAULT, |
| D3D12_RESOURCE_STATE_COPY_DEST, |
| 0, |
| &tmp_loc.pResource, NULL); |
| if (result != VK_SUCCESS) |
| return; |
| |
| tmp_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; |
| } |
| |
| for (int i = 0; i < info->regionCount; i++) { |
| const VkImageCopy2 *region = &info->pRegions[i]; |
| |
| dzn_foreach_aspect(aspect, region->srcSubresource.aspectMask) { |
| for (uint32_t l = 0; l < MAX2(region->srcSubresource.layerCount, region->dstSubresource.layerCount); l++) |
| dzn_cmd_buffer_copy_img_chunk(cmdbuf, info, &tmp_desc, &tmp_loc, i, aspect, l); |
| } |
| } |
| } |
| |
| static VkResult |
| dzn_alloc_and_bind_blit_heap_slots(struct dzn_cmd_buffer *cmdbuf, |
| uint32_t num_view_slots, D3D12_FILTER sampler_filter, |
| struct dzn_descriptor_heap **view_heap, uint32_t *view_heap_slot, |
| struct dzn_descriptor_heap **sampler_heap, uint32_t *sampler_heap_slot) |
| { |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| |
| VkResult result = |
| dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->cbv_srv_uav_pool, device, |
| num_view_slots, view_heap, view_heap_slot); |
| |
| if (result != VK_SUCCESS) { |
| vk_command_buffer_set_error(&cmdbuf->vk, result); |
| return result; |
| } |
| |
| if (!device->support_static_samplers) { |
| result = |
| dzn_descriptor_heap_pool_alloc_slots(&cmdbuf->sampler_pool, device, |
| 1, sampler_heap, sampler_heap_slot); |
| |
| if (result != VK_SUCCESS) { |
| vk_command_buffer_set_error(&cmdbuf->vk, result); |
| return result; |
| } |
| |
| D3D12_SAMPLER_DESC sampler_desc = { |
| .Filter = sampler_filter, |
| .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, |
| .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, |
| .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP, |
| .MipLODBias = 0, |
| .MaxAnisotropy = 0, |
| .MinLOD = 0, |
| .MaxLOD = D3D12_FLOAT32_MAX, |
| }; |
| ID3D12Device4_CreateSampler(device->dev, &sampler_desc, |
| dzn_descriptor_heap_get_cpu_handle(*sampler_heap, *sampler_heap_slot)); |
| } |
| |
| if (*view_heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] || |
| (*sampler_heap && *sampler_heap != cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER])) { |
| ID3D12DescriptorHeap * const heaps[] = { (*view_heap)->heap, *sampler_heap ? (*sampler_heap)->heap : NULL }; |
| cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = *view_heap; |
| cmdbuf->state.heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = *sampler_heap; |
| ID3D12GraphicsCommandList1_SetDescriptorHeaps(cmdbuf->cmdlist, *sampler_heap ? 2 : 1, heaps); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdBlitImage2(VkCommandBuffer commandBuffer, |
| const VkBlitImageInfo2 *info) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| if (info->regionCount == 0) |
| return; |
| |
| uint32_t desc_count = 0; |
| for (uint32_t r = 0; r < info->regionCount; r++) |
| desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask); |
| |
| struct dzn_descriptor_heap *heap; |
| uint32_t heap_slot; |
| struct dzn_descriptor_heap *sampler_heap = NULL; |
| uint32_t sampler_heap_slot = 0; |
| VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count, |
| info->filter == VK_FILTER_LINEAR ? |
| D3D12_FILTER_MIN_MAG_MIP_LINEAR : |
| D3D12_FILTER_MIN_MAG_MIP_POINT, |
| &heap, &heap_slot, &sampler_heap, &sampler_heap_slot); |
| |
| if (result != VK_SUCCESS) |
| return; |
| |
| ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); |
| |
| for (uint32_t r = 0; r < info->regionCount; r++) |
| dzn_cmd_buffer_blit_region(cmdbuf, info, heap, &heap_slot, sampler_heap, sampler_heap_slot, r); |
| |
| cmdbuf->state.pipeline = NULL; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; |
| if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdResolveImage2(VkCommandBuffer commandBuffer, |
| const VkResolveImageInfo2 *info) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| if (info->regionCount == 0) |
| return; |
| |
| uint32_t desc_count = 0; |
| for (uint32_t r = 0; r < info->regionCount; r++) |
| desc_count += util_bitcount(info->pRegions[r].srcSubresource.aspectMask); |
| |
| struct dzn_descriptor_heap *heap; |
| uint32_t heap_slot; |
| struct dzn_descriptor_heap *sampler_heap = NULL; |
| uint32_t sampler_heap_slot = 0; |
| VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count, |
| D3D12_FILTER_MIN_MAG_MIP_POINT, |
| &heap, &heap_slot, &sampler_heap, &sampler_heap_slot); |
| if (result != VK_SUCCESS) |
| return; |
| |
| ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); |
| |
| for (uint32_t r = 0; r < info->regionCount; r++) |
| dzn_cmd_buffer_resolve_region(cmdbuf, info, VK_RESOLVE_MODE_AVERAGE_BIT, heap, &heap_slot, sampler_heap, sampler_heap_slot, r); |
| |
| cmdbuf->state.pipeline = NULL; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; |
| if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdClearColorImage(VkCommandBuffer commandBuffer, |
| VkImage image, |
| VkImageLayout imageLayout, |
| const VkClearColorValue *pColor, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_image, img, image); |
| |
| dzn_cmd_buffer_clear_color(cmdbuf, img, imageLayout, pColor, rangeCount, pRanges); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, |
| VkImage image, |
| VkImageLayout imageLayout, |
| const VkClearDepthStencilValue *pDepthStencil, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_image, img, image); |
| |
| dzn_cmd_buffer_clear_zs(cmdbuf, img, imageLayout, pDepthStencil, rangeCount, pRanges); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdDispatchBase(VkCommandBuffer commandBuffer, |
| uint32_t baseGroupX, |
| uint32_t baseGroupY, |
| uint32_t baseGroupZ, |
| uint32_t groupCountX, |
| uint32_t groupCountY, |
| uint32_t groupCountZ) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| cmdbuf->state.sysvals.compute.group_count_x = groupCountX; |
| cmdbuf->state.sysvals.compute.group_count_y = groupCountY; |
| cmdbuf->state.sysvals.compute.group_count_z = groupCountZ; |
| cmdbuf->state.sysvals.compute.base_group_x = baseGroupX; |
| cmdbuf->state.sysvals.compute.base_group_y = baseGroupY; |
| cmdbuf->state.sysvals.compute.base_group_z = baseGroupZ; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_SYSVALS; |
| |
| dzn_cmd_buffer_prepare_dispatch(cmdbuf); |
| ID3D12GraphicsCommandList1_Dispatch(cmdbuf->cmdlist, groupCountX, groupCountY, groupCountZ); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdFillBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize size, |
| uint32_t data) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); |
| |
| if (size == VK_WHOLE_SIZE) |
| size = buf->size - dstOffset; |
| |
| size &= ~3ULL; |
| |
| ID3D12Resource *src_res; |
| uint64_t src_offset; |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size, |
| DZN_INTERNAL_BUF_UPLOAD, |
| D3D12_RESOURCE_STATE_GENERIC_READ, |
| 4, |
| &src_res, |
| &src_offset); |
| if (result != VK_SUCCESS) |
| return; |
| |
| uint32_t *cpu_ptr; |
| ID3D12Resource_Map(src_res, 0, NULL, (void **)&cpu_ptr); |
| cpu_ptr += src_offset / sizeof(uint32_t); |
| for (uint32_t i = 0; i < size / 4; i++) |
| cpu_ptr[i] = data; |
| |
| ID3D12Resource_Unmap(src_res, 0, NULL); |
| |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, src_offset, size); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdUpdateBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize size, |
| const void *data) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); |
| |
| if (size == VK_WHOLE_SIZE) |
| size = buf->size - dstOffset; |
| |
| /* |
| * The spec says: |
| * 4, or VK_WHOLE_SIZE to fill the range from offset to the end of the |
| * buffer. If VK_WHOLE_SIZE is used and the remaining size of the buffer |
| * is not a multiple of 4, then the nearest smaller multiple is used." |
| */ |
| size &= ~3ULL; |
| |
| ID3D12Resource *src_res; |
| uint64_t src_offset; |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, size, |
| DZN_INTERNAL_BUF_UPLOAD, |
| D3D12_RESOURCE_STATE_GENERIC_READ, |
| 4, |
| &src_res, &src_offset); |
| if (result != VK_SUCCESS) |
| return; |
| |
| void *cpu_ptr; |
| ID3D12Resource_Map(src_res, 0, NULL, &cpu_ptr); |
| memcpy((uint8_t *)cpu_ptr + src_offset, data, size), |
| ID3D12Resource_Unmap(src_res, 0, NULL); |
| |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, src_res, src_offset, size); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdClearAttachments(VkCommandBuffer commandBuffer, |
| uint32_t attachmentCount, |
| const VkClearAttachment *pAttachments, |
| uint32_t rectCount, |
| const VkClearRect *pRects) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| for (unsigned i = 0; i < attachmentCount; i++) { |
| VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; |
| struct dzn_image_view *view = NULL; |
| |
| if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { |
| assert(pAttachments[i].colorAttachment < cmdbuf->state.render.attachments.color_count); |
| view = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].iview; |
| layout = cmdbuf->state.render.attachments.colors[pAttachments[i].colorAttachment].layout; |
| } else { |
| if (cmdbuf->state.render.attachments.depth.iview && |
| (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) { |
| view = cmdbuf->state.render.attachments.depth.iview; |
| layout = cmdbuf->state.render.attachments.depth.layout; |
| } |
| |
| if (cmdbuf->state.render.attachments.stencil.iview && |
| (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| assert(!view || view == cmdbuf->state.render.attachments.depth.iview); |
| view = cmdbuf->state.render.attachments.stencil.iview; |
| layout = cmdbuf->state.render.attachments.stencil.layout; |
| } |
| } |
| |
| if (!view) |
| continue; |
| |
| for (uint32_t j = 0; j < rectCount; j++) { |
| D3D12_RECT rect; |
| dzn_translate_rect(&rect, &pRects[j].rect); |
| |
| uint32_t view_mask = cmdbuf->state.multiview.view_mask; |
| if (view_mask != 0) { |
| u_foreach_bit(layer, view_mask) { |
| dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout, |
| &pAttachments[i].clearValue, |
| pAttachments[i].aspectMask, |
| pRects[j].baseArrayLayer + layer, |
| pRects[j].layerCount, |
| 1, &rect); |
| } |
| } else { |
| dzn_cmd_buffer_clear_attachment(cmdbuf, view, layout, |
| &pAttachments[i].clearValue, |
| pAttachments[i].aspectMask, |
| pRects[j].baseArrayLayer, |
| pRects[j].layerCount, |
| 1, &rect); |
| } |
| } |
| } |
| } |
| |
| static D3D12_RESOLVE_MODE |
| dzn_get_resolve_mode(VkResolveModeFlags mode) |
| { |
| switch (mode) { |
| case VK_RESOLVE_MODE_AVERAGE_BIT: return D3D12_RESOLVE_MODE_AVERAGE; |
| case VK_RESOLVE_MODE_MAX_BIT: return D3D12_RESOLVE_MODE_MAX; |
| case VK_RESOLVE_MODE_MIN_BIT: return D3D12_RESOLVE_MODE_MIN; |
| /* TODO */ |
| case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: return D3D12_RESOLVE_MODE_MIN; |
| default: return D3D12_RESOLVE_MODE_AVERAGE; |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_resolve_rendering_attachment_via_blit(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_rendering_attachment *att, |
| VkImageAspectFlagBits aspect, |
| const VkImageSubresourceRange *src_range, |
| const VkImageSubresourceRange *dst_range) |
| { |
| uint32_t desc_count = util_bitcount(aspect) * src_range->levelCount * src_range->layerCount; |
| |
| struct dzn_descriptor_heap *heap; |
| uint32_t heap_slot; |
| struct dzn_descriptor_heap *sampler_heap = NULL; |
| uint32_t sampler_heap_slot = 0; |
| VkResult result = dzn_alloc_and_bind_blit_heap_slots(cmdbuf, desc_count, |
| D3D12_FILTER_MIN_MAG_MIP_POINT, |
| &heap, &heap_slot, &sampler_heap, &sampler_heap_slot); |
| if (result != VK_SUCCESS) |
| return; |
| |
| ID3D12GraphicsCommandList1_IASetPrimitiveTopology(cmdbuf->cmdlist, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); |
| |
| VkImageResolve2 region = { |
| .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2, |
| .srcSubresource = { |
| .aspectMask = aspect, |
| .baseArrayLayer = src_range->baseArrayLayer, |
| .layerCount = src_range->layerCount, |
| }, |
| .dstSubresource = { |
| .aspectMask = aspect, |
| .baseArrayLayer = dst_range->baseArrayLayer, |
| .layerCount = dst_range->layerCount, |
| }, |
| }; |
| VkResolveImageInfo2 resolve_info = { |
| .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2, |
| .srcImage = vk_image_to_handle(att->iview->vk.image), |
| .dstImage = vk_image_to_handle(att->resolve.iview->vk.image), |
| .srcImageLayout = att->layout, |
| .dstImageLayout = att->resolve.layout, |
| .regionCount = 1, |
| .pRegions = ®ion |
| }; |
| for (uint32_t level = 0; level < src_range->levelCount; ++level) { |
| region.srcSubresource.mipLevel = level + src_range->baseMipLevel; |
| region.dstSubresource.mipLevel = level + dst_range->baseMipLevel; |
| region.extent = (VkExtent3D){ |
| u_minify(att->iview->vk.image->extent.width, region.srcSubresource.mipLevel), |
| u_minify(att->iview->vk.image->extent.height, region.srcSubresource.mipLevel), |
| u_minify(att->iview->vk.image->extent.depth, region.srcSubresource.mipLevel), |
| }; |
| dzn_cmd_buffer_resolve_region(cmdbuf, &resolve_info, att->resolve.mode, heap, &heap_slot, sampler_heap, sampler_heap_slot, 0); |
| } |
| |
| cmdbuf->state.pipeline = NULL; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS | DZN_CMD_DIRTY_SCISSORS; |
| if (cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline) { |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| } |
| } |
| |
| static void |
| dzn_cmd_buffer_resolve_rendering_attachment(struct dzn_cmd_buffer *cmdbuf, |
| const struct dzn_rendering_attachment *att, |
| VkImageAspectFlagBits aspect, |
| bool force_blit_resolve) |
| { |
| struct dzn_image_view *src = att->iview; |
| struct dzn_image_view *dst = att->resolve.iview; |
| |
| if (!src || !dst || att->resolve.mode == VK_RESOLVE_MODE_NONE) |
| return; |
| |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| struct dzn_physical_device *pdev = |
| container_of(device->vk.physical, struct dzn_physical_device, vk); |
| |
| struct dzn_image *src_img = container_of(src->vk.image, struct dzn_image, vk); |
| struct dzn_image *dst_img = container_of(dst->vk.image, struct dzn_image, vk); |
| |
| VkImageSubresourceRange src_range = { |
| .aspectMask = (VkImageAspectFlags)aspect, |
| .baseMipLevel = src->vk.base_mip_level, |
| .levelCount = MIN2(src->vk.level_count, dst->vk.level_count), |
| .baseArrayLayer = src->vk.base_array_layer, |
| .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count), |
| }; |
| if (src_img->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) { |
| src_range.baseArrayLayer = 0; |
| src_range.layerCount = 1; |
| } |
| |
| VkImageSubresourceRange dst_range = { |
| .aspectMask = (VkImageAspectFlags)aspect, |
| .baseMipLevel = dst->vk.base_mip_level, |
| .levelCount = MIN2(src->vk.level_count, dst->vk.level_count), |
| .baseArrayLayer = dst->vk.base_array_layer, |
| .layerCount = MIN2(src->vk.layer_count, dst->vk.layer_count), |
| }; |
| if (dst_img->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) { |
| dst_range.baseArrayLayer = 0; |
| dst_range.layerCount = 1; |
| } |
| |
| if (force_blit_resolve || |
| /* Resolve modes other than average are poorly tested / buggy */ |
| att->resolve.mode != VK_RESOLVE_MODE_AVERAGE_BIT || |
| /* D3D resolve API can't go from (e.g.) D32S8X24 to D32 */ |
| src->vk.view_format != dst->vk.view_format) { |
| dzn_cmd_buffer_resolve_rendering_attachment_via_blit(cmdbuf, att, aspect, &src_range, &dst_range); |
| return; |
| } |
| |
| VkImageLayout src_layout = att->layout; |
| VkImageLayout dst_layout = att->resolve.layout; |
| |
| D3D12_RESOURCE_STATES src_state = dzn_image_layout_to_state(src_img, src_layout, aspect, cmdbuf->type); |
| D3D12_RESOURCE_STATES dst_state = dzn_image_layout_to_state(dst_img, dst_layout, aspect, cmdbuf->type); |
| D3D12_BARRIER_LAYOUT src_restore_layout = D3D12_BARRIER_LAYOUT_COMMON, |
| src_needed_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ; |
| D3D12_BARRIER_LAYOUT dst_restore_layout = D3D12_BARRIER_LAYOUT_COMMON, |
| dst_needed_layout = D3D12_BARRIER_LAYOUT_RESOLVE_DEST; |
| if (cmdbuf->enhanced_barriers) { |
| src_restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, src_img, |
| src_layout, src_needed_layout, |
| &src_range); |
| dst_restore_layout = dzn_cmd_buffer_require_layout(cmdbuf, dst_img, |
| dst_layout, dst_needed_layout, |
| &dst_range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range, |
| src_state, |
| D3D12_RESOURCE_STATE_RESOLVE_SOURCE, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range, |
| dst_state, |
| D3D12_RESOURCE_STATE_RESOLVE_DEST, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| for (uint32_t level = 0; level < src_range.levelCount; level++) { |
| for (uint32_t layer = 0; layer < src_range.layerCount; layer++) { |
| uint32_t src_subres = |
| dzn_image_range_get_subresource_index(src_img, &src_range, aspect, level, layer); |
| uint32_t dst_subres = |
| dzn_image_range_get_subresource_index(dst_img, &dst_range, aspect, level, layer); |
| |
| DXGI_FORMAT format = |
| dzn_image_get_dxgi_format(pdev, dst->vk.format, |
| dst->vk.usage & ~VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, |
| aspect); |
| |
| if (cmdbuf->cmdlist8 && |
| pdev->options2.ProgrammableSamplePositionsTier > D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED) { |
| ID3D12GraphicsCommandList8_ResolveSubresourceRegion(cmdbuf->cmdlist8, |
| dst_img->res, dst_subres, |
| 0, 0, |
| src_img->res, src_subres, |
| NULL, |
| format, |
| dzn_get_resolve_mode(att->resolve.mode)); |
| } else { |
| ID3D12GraphicsCommandList1_ResolveSubresource(cmdbuf->cmdlist, |
| dst_img->res, dst_subres, |
| src_img->res, src_subres, |
| format); |
| } |
| } |
| } |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_restore_layout(cmdbuf, src_img, |
| D3D12_BARRIER_SYNC_RESOLVE, D3D12_BARRIER_ACCESS_RESOLVE_SOURCE, |
| src_needed_layout, src_restore_layout, |
| &src_range); |
| dzn_cmd_buffer_restore_layout(cmdbuf, dst_img, |
| D3D12_BARRIER_SYNC_RESOLVE, D3D12_BARRIER_ACCESS_RESOLVE_DEST, |
| dst_needed_layout, dst_restore_layout, |
| &dst_range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, src_img, &src_range, |
| D3D12_RESOURCE_STATE_RESOLVE_SOURCE, |
| src_state, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| dzn_cmd_buffer_queue_image_range_state_transition(cmdbuf, dst_img, &dst_range, |
| D3D12_RESOURCE_STATE_RESOLVE_DEST, |
| dst_state, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| |
| static void |
| dzn_rendering_attachment_initial_transition(struct dzn_cmd_buffer *cmdbuf, |
| const VkRenderingAttachmentInfo *att, |
| VkImageAspectFlagBits aspect) |
| { |
| const VkRenderingAttachmentInitialLayoutInfoMESA *initial_layout = |
| vk_find_struct_const(att->pNext, RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA); |
| VK_FROM_HANDLE(dzn_image_view, iview, att->imageView); |
| |
| if (!initial_layout || !iview) |
| return; |
| |
| struct dzn_image *image = container_of(iview->vk.image, struct dzn_image, vk); |
| VkImageSubresourceRange range = { |
| .aspectMask = aspect, |
| .baseMipLevel = iview->vk.base_mip_level, |
| .levelCount = iview->vk.level_count, |
| .baseArrayLayer = iview->vk.base_array_layer, |
| .layerCount = iview->vk.layer_count, |
| }; |
| if (image->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) { |
| range.baseArrayLayer = 0; |
| range.layerCount = 1; |
| } |
| |
| if (cmdbuf->enhanced_barriers) { |
| D3D12_BARRIER_SYNC sync_before = D3D12_BARRIER_SYNC_ALL; |
| D3D12_BARRIER_ACCESS access_before = D3D12_BARRIER_ACCESS_COMMON; |
| if (initial_layout->initialLayout == VK_IMAGE_LAYOUT_UNDEFINED) { |
| sync_before = D3D12_BARRIER_SYNC_NONE; |
| access_before = D3D12_BARRIER_ACCESS_NO_ACCESS; |
| } |
| |
| D3D12_BARRIER_LAYOUT layout_before = dzn_vk_layout_to_d3d_layout(initial_layout->initialLayout, cmdbuf->type, aspect); |
| D3D12_BARRIER_LAYOUT layout_after = dzn_vk_layout_to_d3d_layout(att->imageLayout, cmdbuf->type, aspect); |
| if (image->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) { |
| layout_before = D3D12_BARRIER_LAYOUT_UNDEFINED; |
| layout_after = D3D12_BARRIER_LAYOUT_UNDEFINED; |
| } |
| |
| dzn_cmd_buffer_image_barrier(cmdbuf, image, |
| sync_before, D3D12_BARRIER_SYNC_DRAW, |
| access_before, D3D12_BARRIER_ACCESS_COMMON, |
| layout_before, |
| layout_after, |
| &range); |
| } else { |
| dzn_cmd_buffer_queue_image_range_layout_transition(cmdbuf, image, &range, |
| initial_layout->initialLayout, |
| att->imageLayout, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdBeginRendering(VkCommandBuffer commandBuffer, |
| const VkRenderingInfo *pRenderingInfo) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| D3D12_RECT new_render_area = { |
| .left = pRenderingInfo->renderArea.offset.x, |
| .top = pRenderingInfo->renderArea.offset.y, |
| .right = (LONG)(pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width), |
| .bottom = (LONG)(pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height), |
| }; |
| |
| // The render area has an impact on the scissor state. |
| if (memcmp(&cmdbuf->state.render.area, &new_render_area, sizeof(new_render_area))) { |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; |
| cmdbuf->state.render.area = new_render_area; |
| } |
| |
| cmdbuf->state.render.flags = pRenderingInfo->flags; |
| cmdbuf->state.render.layer_count = pRenderingInfo->layerCount; |
| cmdbuf->state.render.view_mask = pRenderingInfo->viewMask; |
| |
| D3D12_CPU_DESCRIPTOR_HANDLE rt_handles[MAX_RTS] = { 0 }; |
| D3D12_CPU_DESCRIPTOR_HANDLE zs_handle = { 0 }; |
| |
| cmdbuf->state.render.attachments.color_count = pRenderingInfo->colorAttachmentCount; |
| for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { |
| const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[i]; |
| VK_FROM_HANDLE(dzn_image_view, iview, att->imageView); |
| |
| cmdbuf->state.render.attachments.colors[i].iview = iview; |
| cmdbuf->state.render.attachments.colors[i].layout = att->imageLayout; |
| cmdbuf->state.render.attachments.colors[i].resolve.mode = att->resolveMode; |
| cmdbuf->state.render.attachments.colors[i].resolve.iview = |
| dzn_image_view_from_handle(att->resolveImageView); |
| cmdbuf->state.render.attachments.colors[i].resolve.layout = |
| att->resolveImageLayout; |
| cmdbuf->state.render.attachments.colors[i].store_op = att->storeOp; |
| |
| if (!iview) { |
| rt_handles[i] = dzn_cmd_buffer_get_null_rtv(cmdbuf); |
| continue; |
| } |
| |
| struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk); |
| rt_handles[i] = dzn_cmd_buffer_get_rtv(cmdbuf, img, &iview->rtv_desc); |
| dzn_rendering_attachment_initial_transition(cmdbuf, att, |
| VK_IMAGE_ASPECT_COLOR_BIT); |
| } |
| |
| if (pRenderingInfo->pDepthAttachment) { |
| const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment; |
| |
| cmdbuf->state.render.attachments.depth.iview = |
| dzn_image_view_from_handle(att->imageView); |
| cmdbuf->state.render.attachments.depth.layout = att->imageLayout; |
| cmdbuf->state.render.attachments.depth.resolve.mode = att->resolveMode; |
| cmdbuf->state.render.attachments.depth.resolve.iview = |
| dzn_image_view_from_handle(att->resolveImageView); |
| cmdbuf->state.render.attachments.depth.resolve.layout = |
| att->resolveImageLayout; |
| cmdbuf->state.render.attachments.depth.store_op = att->storeOp; |
| dzn_rendering_attachment_initial_transition(cmdbuf, att, |
| VK_IMAGE_ASPECT_DEPTH_BIT); |
| } |
| |
| if (pRenderingInfo->pStencilAttachment) { |
| const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment; |
| |
| cmdbuf->state.render.attachments.stencil.iview = |
| dzn_image_view_from_handle(att->imageView); |
| cmdbuf->state.render.attachments.stencil.layout = att->imageLayout; |
| cmdbuf->state.render.attachments.stencil.resolve.mode = att->resolveMode; |
| cmdbuf->state.render.attachments.stencil.resolve.iview = |
| dzn_image_view_from_handle(att->resolveImageView); |
| cmdbuf->state.render.attachments.stencil.resolve.layout = |
| att->resolveImageLayout; |
| cmdbuf->state.render.attachments.stencil.store_op = att->storeOp; |
| dzn_rendering_attachment_initial_transition(cmdbuf, att, |
| VK_IMAGE_ASPECT_STENCIL_BIT); |
| } |
| |
| if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) { |
| struct dzn_image_view *z_iview = |
| pRenderingInfo->pDepthAttachment ? |
| dzn_image_view_from_handle(pRenderingInfo->pDepthAttachment->imageView) : |
| NULL; |
| struct dzn_image_view *s_iview = |
| pRenderingInfo->pStencilAttachment ? |
| dzn_image_view_from_handle(pRenderingInfo->pStencilAttachment->imageView) : |
| NULL; |
| struct dzn_image_view *iview = z_iview ? z_iview : s_iview; |
| assert(!z_iview || !s_iview || z_iview == s_iview); |
| |
| if (iview) { |
| struct dzn_image *img = container_of(iview->vk.image, struct dzn_image, vk); |
| |
| zs_handle = dzn_cmd_buffer_get_dsv(cmdbuf, img, &iview->dsv_desc); |
| } |
| } |
| |
| ID3D12GraphicsCommandList1_OMSetRenderTargets(cmdbuf->cmdlist, |
| pRenderingInfo->colorAttachmentCount, |
| pRenderingInfo->colorAttachmentCount ? rt_handles : NULL, |
| false, zs_handle.ptr ? &zs_handle : NULL); |
| |
| for (uint32_t a = 0; a < pRenderingInfo->colorAttachmentCount; a++) { |
| const VkRenderingAttachmentInfo *att = &pRenderingInfo->pColorAttachments[a]; |
| VK_FROM_HANDLE(dzn_image_view, iview, att->imageView); |
| |
| if (iview != NULL && att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR && |
| !(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) { |
| if (pRenderingInfo->viewMask != 0) { |
| u_foreach_bit(layer, pRenderingInfo->viewMask) { |
| dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout, |
| &att->clearValue, |
| VK_IMAGE_ASPECT_COLOR_BIT, layer, |
| 1, 1, &cmdbuf->state.render.area); |
| } |
| } else { |
| dzn_cmd_buffer_clear_attachment(cmdbuf, iview, att->imageLayout, |
| &att->clearValue, |
| VK_IMAGE_ASPECT_COLOR_BIT, 0, |
| pRenderingInfo->layerCount, 1, |
| &cmdbuf->state.render.area); |
| } |
| } |
| } |
| |
| if ((pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) && |
| !(pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)) { |
| const VkRenderingAttachmentInfo *z_att = pRenderingInfo->pDepthAttachment; |
| const VkRenderingAttachmentInfo *s_att = pRenderingInfo->pStencilAttachment; |
| struct dzn_image_view *z_iview = z_att ? dzn_image_view_from_handle(z_att->imageView) : NULL; |
| struct dzn_image_view *s_iview = s_att ? dzn_image_view_from_handle(s_att->imageView) : NULL; |
| struct dzn_image_view *iview = z_iview ? z_iview : s_iview; |
| VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; |
| |
| assert(!z_iview || !s_iview || z_iview == s_iview); |
| |
| VkImageAspectFlags aspects = 0; |
| VkClearValue clear_val; |
| |
| if (z_iview && z_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { |
| aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; |
| clear_val.depthStencil.depth = z_att->clearValue.depthStencil.depth; |
| layout = z_att->imageLayout; |
| } |
| |
| if (s_iview && s_att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { |
| aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; |
| clear_val.depthStencil.stencil = s_att->clearValue.depthStencil.stencil; |
| layout = s_att->imageLayout; |
| } |
| |
| if (aspects != 0) { |
| if (pRenderingInfo->viewMask != 0) { |
| u_foreach_bit(layer, pRenderingInfo->viewMask) { |
| dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout, |
| &clear_val, aspects, layer, |
| 1, 1, &cmdbuf->state.render.area); |
| } |
| } else { |
| dzn_cmd_buffer_clear_attachment(cmdbuf, iview, layout, |
| &clear_val, aspects, 0, |
| VK_REMAINING_ARRAY_LAYERS, 1, |
| &cmdbuf->state.render.area); |
| } |
| } |
| } |
| |
| cmdbuf->state.multiview.num_views = MAX2(util_bitcount(pRenderingInfo->viewMask), 1); |
| cmdbuf->state.multiview.view_mask = MAX2(pRenderingInfo->viewMask, 1); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdEndRendering(VkCommandBuffer commandBuffer) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| if (!(cmdbuf->state.render.flags & VK_RENDERING_SUSPENDING_BIT)) { |
| for (uint32_t i = 0; i < cmdbuf->state.render.attachments.color_count; i++) { |
| dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf, |
| &cmdbuf->state.render.attachments.colors[i], |
| VK_IMAGE_ASPECT_COLOR_BIT, false); |
| } |
| |
| bool separate_stencil_resolve = |
| cmdbuf->state.render.attachments.depth.resolve.mode != |
| cmdbuf->state.render.attachments.stencil.resolve.mode; |
| dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf, |
| &cmdbuf->state.render.attachments.depth, |
| VK_IMAGE_ASPECT_DEPTH_BIT, |
| separate_stencil_resolve); |
| dzn_cmd_buffer_resolve_rendering_attachment(cmdbuf, |
| &cmdbuf->state.render.attachments.stencil, |
| VK_IMAGE_ASPECT_STENCIL_BIT, |
| separate_stencil_resolve); |
| } |
| |
| memset(&cmdbuf->state.render, 0, sizeof(cmdbuf->state.render)); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdBindPipeline(VkCommandBuffer commandBuffer, |
| VkPipelineBindPoint pipelineBindPoint, |
| VkPipeline pipe) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_pipeline, pipeline, pipe); |
| |
| cmdbuf->state.bindpoint[pipelineBindPoint].pipeline = pipeline; |
| cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { |
| const struct dzn_graphics_pipeline *gfx = (const struct dzn_graphics_pipeline *)pipeline; |
| |
| if (!gfx->vp.dynamic) { |
| memcpy(cmdbuf->state.viewports, gfx->vp.desc, |
| gfx->vp.count * sizeof(cmdbuf->state.viewports[0])); |
| cmdbuf->state.sysvals.gfx.viewport_width = cmdbuf->state.viewports[0].Width; |
| cmdbuf->state.sysvals.gfx.viewport_height = cmdbuf->state.viewports[0].Height; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS; |
| cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= DZN_CMD_BINDPOINT_DIRTY_SYSVALS; |
| } |
| |
| if (!gfx->scissor.dynamic) { |
| memcpy(cmdbuf->state.scissors, gfx->scissor.desc, |
| gfx->scissor.count * sizeof(cmdbuf->state.scissors[0])); |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; |
| } |
| |
| if (gfx->zsa.stencil_test.enable && !gfx->zsa.stencil_test.dynamic_ref) { |
| cmdbuf->state.zsa.stencil_test.front.ref = gfx->zsa.stencil_test.front.ref; |
| cmdbuf->state.zsa.stencil_test.back.ref = gfx->zsa.stencil_test.back.ref; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; |
| } |
| |
| if (gfx->zsa.depth_bounds.enable && !gfx->zsa.depth_bounds.dynamic) { |
| cmdbuf->state.zsa.depth_bounds.min = gfx->zsa.depth_bounds.min; |
| cmdbuf->state.zsa.depth_bounds.max = gfx->zsa.depth_bounds.max; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS; |
| } |
| |
| if (!gfx->blend.dynamic_constants) { |
| memcpy(cmdbuf->state.blend.constants, gfx->blend.constants, |
| sizeof(cmdbuf->state.blend.constants)); |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS; |
| } |
| |
| for (uint32_t vb = 0; vb < gfx->vb.count; vb++) |
| cmdbuf->state.vb.views[vb].StrideInBytes = gfx->vb.strides[vb]; |
| |
| if (gfx->vb.count > 0) |
| BITSET_SET_RANGE(cmdbuf->state.vb.dirty, 0, gfx->vb.count - 1); |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, |
| VkPipelineBindPoint pipelineBindPoint, |
| VkPipelineLayout layout, |
| uint32_t firstSet, |
| uint32_t descriptorSetCount, |
| const VkDescriptorSet *pDescriptorSets, |
| uint32_t dynamicOffsetCount, |
| const uint32_t *pDynamicOffsets) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_pipeline_layout, playout, layout); |
| |
| struct dzn_descriptor_state *desc_state = |
| &cmdbuf->state.bindpoint[pipelineBindPoint].desc_state; |
| uint32_t dirty = 0; |
| |
| for (uint32_t i = 0; i < descriptorSetCount; i++) { |
| uint32_t idx = firstSet + i; |
| VK_FROM_HANDLE(dzn_descriptor_set, set, pDescriptorSets[i]); |
| |
| if (desc_state->sets[idx].set != set) { |
| desc_state->sets[idx].set = set; |
| dirty |= DZN_CMD_BINDPOINT_DIRTY_DESC_SET0 << idx; |
| } |
| |
| uint32_t dynamic_buffer_count = playout->sets[idx].dynamic_buffer_count; |
| if (dynamic_buffer_count) { |
| assert(dynamicOffsetCount >= dynamic_buffer_count); |
| |
| for (uint32_t j = 0; j < dynamic_buffer_count; j++) |
| desc_state->sets[idx].dynamic_offsets[j] = pDynamicOffsets[j]; |
| |
| dynamicOffsetCount -= dynamic_buffer_count; |
| pDynamicOffsets += dynamic_buffer_count; |
| dirty |= DZN_CMD_BINDPOINT_DIRTY_DYNAMIC_BUFFERS; |
| } |
| } |
| |
| cmdbuf->state.bindpoint[pipelineBindPoint].dirty |= dirty; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdSetViewport(VkCommandBuffer commandBuffer, |
| uint32_t firstViewport, |
| uint32_t viewportCount, |
| const VkViewport *pViewports) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| STATIC_ASSERT(MAX_VP <= DXIL_SPIRV_MAX_VIEWPORT); |
| |
| for (uint32_t i = 0; i < viewportCount; i++) { |
| uint32_t vp = i + firstViewport; |
| |
| dzn_translate_viewport(&cmdbuf->state.viewports[vp], &pViewports[i]); |
| |
| if (pViewports[i].minDepth > pViewports[i].maxDepth) |
| cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT); |
| else |
| cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp + DXIL_SPIRV_Z_FLIP_SHIFT); |
| |
| if (pViewports[i].height > 0) |
| cmdbuf->state.sysvals.gfx.yz_flip_mask |= BITFIELD_BIT(vp); |
| else |
| cmdbuf->state.sysvals.gfx.yz_flip_mask &= ~BITFIELD_BIT(vp); |
| } |
| |
| cmdbuf->state.sysvals.gfx.viewport_width = cmdbuf->state.viewports[0].Width; |
| cmdbuf->state.sysvals.gfx.viewport_height = cmdbuf->state.viewports[0].Height; |
| |
| if (viewportCount) { |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_VIEWPORTS; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_SYSVALS; |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdSetScissor(VkCommandBuffer commandBuffer, |
| uint32_t firstScissor, |
| uint32_t scissorCount, |
| const VkRect2D *pScissors) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| for (uint32_t i = 0; i < scissorCount; i++) |
| dzn_translate_rect(&cmdbuf->state.scissors[i + firstScissor], &pScissors[i]); |
| |
| if (scissorCount) |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_SCISSORS; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, |
| VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, |
| const void *pValues) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| struct dzn_cmd_buffer_push_constant_state *states[2]; |
| uint32_t num_states = 0; |
| |
| if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) |
| states[num_states++] = &cmdbuf->state.push_constant.gfx; |
| |
| if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) |
| states[num_states++] = &cmdbuf->state.push_constant.compute; |
| |
| for (uint32_t i = 0; i < num_states; i++) { |
| memcpy(((char *)states[i]->values) + offset, pValues, size); |
| states[i]->offset = |
| states[i]->end > 0 ? MIN2(states[i]->offset, offset) : offset; |
| states[i]->end = MAX2(states[i]->end, offset + size); |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdDraw(VkCommandBuffer commandBuffer, |
| uint32_t vertexCount, |
| uint32_t instanceCount, |
| uint32_t firstVertex, |
| uint32_t firstInstance) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *) |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; |
| |
| cmdbuf->state.sysvals.gfx.first_vertex = firstVertex; |
| cmdbuf->state.sysvals.gfx.base_instance = firstInstance; |
| |
| uint32_t view_mask = pipeline->multiview.native_view_instancing ? |
| 1 : pipeline->multiview.view_mask; |
| |
| if (pipeline->ia.triangle_fan) { |
| D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; |
| |
| VkResult result = |
| dzn_cmd_buffer_triangle_fan_create_index(cmdbuf, &vertexCount); |
| if (result != VK_SUCCESS || !vertexCount) |
| return; |
| |
| cmdbuf->state.sysvals.gfx.is_indexed_draw = true; |
| u_foreach_bit(view, view_mask) { |
| cmdbuf->state.sysvals.gfx.view_index = view; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_SYSVALS; |
| dzn_cmd_buffer_prepare_draw(cmdbuf, true); |
| ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, 0, |
| firstVertex, firstInstance); |
| } |
| |
| /* Restore the IB view if we modified it when lowering triangle fans. */ |
| if (ib_view.SizeInBytes > 0) { |
| cmdbuf->state.ib.view = ib_view; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; |
| } |
| } else { |
| cmdbuf->state.sysvals.gfx.is_indexed_draw = false; |
| u_foreach_bit(view, view_mask) { |
| cmdbuf->state.sysvals.gfx.view_index = view; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_SYSVALS; |
| dzn_cmd_buffer_prepare_draw(cmdbuf, false); |
| ID3D12GraphicsCommandList1_DrawInstanced(cmdbuf->cmdlist, vertexCount, instanceCount, |
| firstVertex, firstInstance); |
| } |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdDrawIndexed(VkCommandBuffer commandBuffer, |
| uint32_t indexCount, |
| uint32_t instanceCount, |
| uint32_t firstIndex, |
| int32_t vertexOffset, |
| uint32_t firstInstance) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| const struct dzn_graphics_pipeline *pipeline = (const struct dzn_graphics_pipeline *) |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; |
| |
| if (pipeline->ia.triangle_fan && |
| dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) { |
| /* The indexed+primitive-restart+triangle-fan combination is a mess, |
| * since we have to walk the index buffer, skip entries with the |
| * special 0xffff/0xffffffff values, and push triangle list indices |
| * for the remaining values. All of this has an impact on the index |
| * count passed to the draw call, which forces us to use the indirect |
| * path. |
| */ |
| D3D12_DRAW_INDEXED_ARGUMENTS params = { |
| .IndexCountPerInstance = indexCount, |
| .InstanceCount = instanceCount, |
| .StartIndexLocation = firstIndex, |
| .BaseVertexLocation = vertexOffset, |
| .StartInstanceLocation = firstInstance, |
| }; |
| |
| ID3D12Resource *draw_buf; |
| uint64_t offset; |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(params), |
| DZN_INTERNAL_BUF_UPLOAD, |
| D3D12_RESOURCE_STATE_GENERIC_READ, |
| 4, |
| &draw_buf, &offset); |
| if (result != VK_SUCCESS) |
| return; |
| |
| void *cpu_ptr; |
| ID3D12Resource_Map(draw_buf, 0, NULL, &cpu_ptr); |
| memcpy((uint8_t *)cpu_ptr + offset, ¶ms, sizeof(params)); |
| |
| ID3D12Resource_Unmap(draw_buf, 0, NULL); |
| |
| dzn_cmd_buffer_indirect_draw(cmdbuf, draw_buf, offset, NULL, 0, 1, sizeof(params), true); |
| return; |
| } |
| |
| cmdbuf->state.sysvals.gfx.first_vertex = vertexOffset; |
| cmdbuf->state.sysvals.gfx.base_instance = firstInstance; |
| cmdbuf->state.sysvals.gfx.is_indexed_draw = true; |
| |
| D3D12_INDEX_BUFFER_VIEW ib_view = cmdbuf->state.ib.view; |
| |
| if (pipeline->ia.triangle_fan) { |
| VkResult result = |
| dzn_cmd_buffer_triangle_fan_rewrite_index(cmdbuf, &indexCount, &firstIndex); |
| if (result != VK_SUCCESS || !indexCount) |
| return; |
| } |
| |
| uint32_t view_mask = pipeline->multiview.native_view_instancing ? |
| 1 : pipeline->multiview.view_mask; |
| u_foreach_bit(view, view_mask) { |
| cmdbuf->state.sysvals.gfx.view_index = view; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_SYSVALS; |
| |
| dzn_cmd_buffer_prepare_draw(cmdbuf, true); |
| ID3D12GraphicsCommandList1_DrawIndexedInstanced(cmdbuf->cmdlist, indexCount, instanceCount, firstIndex, |
| vertexOffset, firstInstance); |
| } |
| |
| /* Restore the IB view if we modified it when lowering triangle fans. */ |
| if (pipeline->ia.triangle_fan && ib_view.SizeInBytes) { |
| cmdbuf->state.ib.view = ib_view; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdDrawIndirect(VkCommandBuffer commandBuffer, |
| VkBuffer buffer, |
| VkDeviceSize offset, |
| uint32_t drawCount, |
| uint32_t stride) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_buffer, buf, buffer); |
| |
| dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, false); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, |
| VkBuffer buffer, |
| VkDeviceSize offset, |
| uint32_t drawCount, |
| uint32_t stride) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_buffer, buf, buffer); |
| |
| dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, NULL, 0, drawCount, stride, true); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, |
| VkBuffer buffer, |
| VkDeviceSize offset, |
| VkBuffer countBuffer, |
| VkDeviceSize countBufferOffset, |
| uint32_t maxDrawCount, |
| uint32_t stride) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_buffer, buf, buffer); |
| VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer); |
| |
| dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, |
| count_buf->res, countBufferOffset, |
| maxDrawCount, stride, false); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, |
| VkBuffer buffer, |
| VkDeviceSize offset, |
| VkBuffer countBuffer, |
| VkDeviceSize countBufferOffset, |
| uint32_t maxDrawCount, |
| uint32_t stride) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_buffer, buf, buffer); |
| VK_FROM_HANDLE(dzn_buffer, count_buf, countBuffer); |
| |
| dzn_cmd_buffer_indirect_draw(cmdbuf, buf->res, offset, |
| count_buf->res, countBufferOffset, |
| maxDrawCount, stride, true); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, |
| uint32_t firstBinding, |
| uint32_t bindingCount, |
| const VkBuffer *pBuffers, |
| const VkDeviceSize *pOffsets) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| if (!bindingCount) |
| return; |
| |
| D3D12_VERTEX_BUFFER_VIEW *vbviews = cmdbuf->state.vb.views; |
| |
| for (uint32_t i = 0; i < bindingCount; i++) { |
| VK_FROM_HANDLE(dzn_buffer, buf, pBuffers[i]); |
| |
| vbviews[firstBinding + i].BufferLocation = buf->gpuva + pOffsets[i]; |
| vbviews[firstBinding + i].SizeInBytes = buf->size - pOffsets[i]; |
| } |
| |
| BITSET_SET_RANGE(cmdbuf->state.vb.dirty, firstBinding, |
| firstBinding + bindingCount - 1); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer buffer, |
| VkDeviceSize offset, |
| VkIndexType indexType) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_buffer, buf, buffer); |
| |
| cmdbuf->state.ib.view.BufferLocation = buf->gpuva + offset; |
| cmdbuf->state.ib.view.SizeInBytes = buf->size - offset; |
| switch (indexType) { |
| case VK_INDEX_TYPE_UINT16: |
| cmdbuf->state.ib.view.Format = DXGI_FORMAT_R16_UINT; |
| cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF; |
| break; |
| case VK_INDEX_TYPE_UINT32: |
| cmdbuf->state.ib.view.Format = DXGI_FORMAT_R32_UINT; |
| cmdbuf->state.pipeline_variant.ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF; |
| break; |
| default: unreachable("Invalid index type"); |
| } |
| |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_IB; |
| |
| const struct dzn_graphics_pipeline *pipeline = |
| (const struct dzn_graphics_pipeline *)cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].pipeline; |
| |
| if (pipeline && |
| dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut)) |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdResetEvent2(VkCommandBuffer commandBuffer, |
| VkEvent event, |
| VkPipelineStageFlags2 stageMask) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_event, evt, event); |
| |
| if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_RESET)) |
| vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdSetEvent2(VkCommandBuffer commandBuffer, |
| VkEvent event, |
| const VkDependencyInfo *pDependencyInfo) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_event, evt, event); |
| |
| if (!_mesa_hash_table_insert(cmdbuf->events.ht, evt, (void *)(uintptr_t)DZN_EVENT_STATE_SET)) |
| vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdWaitEvents2(VkCommandBuffer commandBuffer, |
| uint32_t eventCount, |
| const VkEvent *pEvents, |
| const VkDependencyInfo *pDependencyInfo) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| /* Intra-command list wait is handle by this pipeline flush, which is |
| * overkill, but that's the best we can do with the standard D3D12 barrier |
| * API. |
| * |
| * Inter-command list is taken care of by the serialization done at the |
| * ExecuteCommandList() level: |
| * "Calling ExecuteCommandLists twice in succession (from the same thread, |
| * or different threads) guarantees that the first workload (A) finishes |
| * before the second workload (B)" |
| * |
| * HOST -> DEVICE signaling is ignored and we assume events are always |
| * signaled when we reach the vkCmdWaitEvents() point.: |
| * "Command buffers in the submission can include vkCmdWaitEvents commands |
| * that wait on events that will not be signaled by earlier commands in the |
| * queue. Such events must be signaled by the application using vkSetEvent, |
| * and the vkCmdWaitEvents commands that wait upon them must not be inside |
| * a render pass instance. |
| * The event must be set before the vkCmdWaitEvents command is executed." |
| */ |
| bool flush_pipeline = false; |
| |
| for (uint32_t i = 0; i < eventCount; i++) { |
| VK_FROM_HANDLE(dzn_event, event, pEvents[i]); |
| |
| struct hash_entry *he = |
| _mesa_hash_table_search(cmdbuf->events.ht, event); |
| if (he) { |
| enum dzn_event_state state = (uintptr_t)he->data; |
| assert(state != DZN_EVENT_STATE_RESET); |
| flush_pipeline = state == DZN_EVENT_STATE_SET; |
| } |
| } |
| |
| if (flush_pipeline) { |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_global_barrier(cmdbuf, |
| D3D12_BARRIER_SYNC_ALL, D3D12_BARRIER_SYNC_ALL, |
| D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON); |
| } else { |
| D3D12_RESOURCE_BARRIER barrier = { |
| .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, |
| .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, |
| .UAV = {.pResource = NULL }, |
| }; |
| |
| ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); |
| } |
| } |
| cmdbuf->vk.base.device->dispatch_table.CmdPipelineBarrier2( |
| vk_command_buffer_to_handle(&cmdbuf->vk), |
| pDependencyInfo); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdBeginQuery(VkCommandBuffer commandBuffer, |
| VkQueryPool queryPool, |
| uint32_t query, |
| VkQueryControlFlags flags) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); |
| |
| struct dzn_cmd_buffer_query_pool_state *state = |
| dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); |
| if (!state) |
| return; |
| |
| for (uint32_t i = 0; i < cmdbuf->state.multiview.num_views; ++i) |
| qpool->queries[query + i].type = dzn_query_pool_get_query_type(qpool, flags); |
| |
| ID3D12GraphicsCommandList1_BeginQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); |
| |
| dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, query, cmdbuf->state.multiview.num_views); |
| dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->zero, query, cmdbuf->state.multiview.num_views); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdEndQuery(VkCommandBuffer commandBuffer, |
| VkQueryPool queryPool, |
| uint32_t query) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); |
| |
| struct dzn_cmd_buffer_query_pool_state *state = |
| dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); |
| if (!state) |
| return; |
| |
| ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); |
| |
| dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query); |
| if (cmdbuf->state.multiview.num_views > 1) |
| dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->zero, query + 1, cmdbuf->state.multiview.num_views - 1); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, |
| VkPipelineStageFlags2 stage, |
| VkQueryPool queryPool, |
| uint32_t query) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); |
| |
| struct dzn_cmd_buffer_query_pool_state *state = |
| dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); |
| if (!state) |
| return; |
| |
| /* Execution barrier so the timestamp gets written after the pipeline flush. */ |
| D3D12_RESOURCE_BARRIER barrier = { |
| .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV, |
| .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, |
| .UAV = { .pResource = NULL }, |
| }; |
| |
| ID3D12GraphicsCommandList1_ResourceBarrier(cmdbuf->cmdlist, 1, &barrier); |
| |
| for (uint32_t i = 0; i < cmdbuf->state.multiview.num_views; ++i) |
| qpool->queries[query + i].type = D3D12_QUERY_TYPE_TIMESTAMP; |
| ID3D12GraphicsCommandList1_EndQuery(cmdbuf->cmdlist, qpool->heap, qpool->queries[query].type, query); |
| |
| dzn_cmd_buffer_dynbitset_set(cmdbuf, &state->collect, query); |
| if (cmdbuf->state.multiview.num_views > 1) |
| dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->zero, query + 1, cmdbuf->state.multiview.num_views - 1); |
| } |
| |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdResetQueryPool(VkCommandBuffer commandBuffer, |
| VkQueryPool queryPool, |
| uint32_t firstQuery, |
| uint32_t queryCount) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); |
| |
| struct dzn_cmd_buffer_query_pool_state *state = |
| dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); |
| |
| if (!state) |
| return; |
| |
| uint32_t q_step = DZN_QUERY_REFS_SECTION_SIZE / sizeof(uint64_t); |
| |
| for (uint32_t q = 0; q < queryCount; q += q_step) { |
| uint32_t q_count = MIN2(queryCount - q, q_step); |
| |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer, |
| dzn_query_pool_get_availability_offset(qpool, firstQuery + q), |
| device->queries.refs, |
| DZN_QUERY_REFS_ALL_ZEROS_OFFSET, |
| q_count * sizeof(uint64_t)); |
| } |
| |
| q_step = DZN_QUERY_REFS_SECTION_SIZE / qpool->query_size; |
| |
| for (uint32_t q = 0; q < queryCount; q += q_step) { |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, qpool->collect_buffer, |
| dzn_query_pool_get_result_offset(qpool, firstQuery + q), |
| device->queries.refs, |
| DZN_QUERY_REFS_ALL_ZEROS_OFFSET, |
| qpool->query_size); |
| } |
| |
| dzn_cmd_buffer_dynbitset_set_range(cmdbuf, &state->reset, firstQuery, queryCount); |
| dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->collect, firstQuery, queryCount); |
| dzn_cmd_buffer_dynbitset_clear_range(cmdbuf, &state->zero, firstQuery, queryCount); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, |
| VkQueryPool queryPool, |
| uint32_t firstQuery, |
| uint32_t queryCount, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize stride, |
| VkQueryResultFlags flags) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_query_pool, qpool, queryPool); |
| VK_FROM_HANDLE(dzn_buffer, buf, dstBuffer); |
| |
| struct dzn_cmd_buffer_query_pool_state *qpstate = |
| dzn_cmd_buffer_get_query_pool_state(cmdbuf, qpool); |
| if (!qpstate) |
| return; |
| |
| VkResult result = |
| dzn_cmd_buffer_collect_queries(cmdbuf, qpool, qpstate, firstQuery, queryCount); |
| if (result != VK_SUCCESS) |
| return; |
| |
| bool raw_copy = (flags & VK_QUERY_RESULT_64_BIT) && |
| stride == qpool->query_size && |
| !(flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT); |
| #define ALL_STATS \ |
| (VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | \ |
| VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | \ |
| VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | \ |
| VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | \ |
| VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | \ |
| VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | \ |
| VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | \ |
| VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | \ |
| VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | \ |
| VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | \ |
| VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT) |
| if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS && |
| qpool->pipeline_statistics != ALL_STATS) |
| raw_copy = false; |
| #undef ALL_STATS |
| |
| if (cmdbuf->enhanced_barriers) { |
| if (flags & VK_QUERY_RESULT_WAIT_BIT) { |
| dzn_cmd_buffer_buffer_barrier(cmdbuf, qpool->collect_buffer, |
| D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_COPY, |
| D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_COPY_SOURCE); |
| } |
| } else { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1, |
| D3D12_RESOURCE_STATE_COPY_DEST, |
| D3D12_RESOURCE_STATE_COPY_SOURCE, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| if (raw_copy) { |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, |
| qpool->collect_buffer, |
| dzn_query_pool_get_result_offset(qpool, firstQuery), |
| dzn_query_pool_get_result_size(qpool, queryCount)); |
| } else { |
| uint32_t step = flags & VK_QUERY_RESULT_64_BIT ? sizeof(uint64_t) : sizeof(uint32_t); |
| |
| for (uint32_t q = 0; q < queryCount; q++) { |
| uint32_t res_offset = dzn_query_pool_get_result_offset(qpool, firstQuery + q); |
| uint32_t dst_counter_offset = 0; |
| |
| if (qpool->heap_type == D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS) { |
| for (uint32_t c = 0; c < sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); c++) { |
| if (!(BITFIELD_BIT(c) & qpool->pipeline_statistics)) |
| continue; |
| |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset, |
| qpool->collect_buffer, |
| res_offset + (c * sizeof(uint64_t)), |
| step); |
| dst_counter_offset += step; |
| } |
| } else { |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset, |
| qpool->collect_buffer, |
| res_offset, step); |
| dst_counter_offset += step; |
| } |
| |
| if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, buf->res, dstOffset + dst_counter_offset, |
| qpool->collect_buffer, |
| dzn_query_pool_get_availability_offset(qpool, firstQuery + q), |
| step); |
| } |
| |
| dstOffset += stride; |
| } |
| } |
| |
| if (!cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, qpool->collect_buffer, 0, 1, |
| D3D12_RESOURCE_STATE_COPY_SOURCE, |
| D3D12_RESOURCE_STATE_COPY_DEST, |
| 0); |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdDispatchIndirect(VkCommandBuffer commandBuffer, |
| VkBuffer buffer, |
| VkDeviceSize offset) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| VK_FROM_HANDLE(dzn_buffer, buf, buffer); |
| |
| cmdbuf->state.sysvals.compute.group_count_x = 0; |
| cmdbuf->state.sysvals.compute.group_count_y = 0; |
| cmdbuf->state.sysvals.compute.group_count_z = 0; |
| cmdbuf->state.sysvals.compute.base_group_x = 0; |
| cmdbuf->state.sysvals.compute.base_group_y = 0; |
| cmdbuf->state.sysvals.compute.base_group_z = 0; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].dirty |= |
| DZN_CMD_BINDPOINT_DIRTY_SYSVALS; |
| |
| dzn_cmd_buffer_prepare_dispatch(cmdbuf); |
| |
| struct dzn_compute_pipeline *pipeline = (struct dzn_compute_pipeline *) |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_COMPUTE].pipeline; |
| ID3D12CommandSignature *cmdsig = |
| dzn_compute_pipeline_get_indirect_cmd_sig(pipeline); |
| |
| if (!cmdsig) { |
| vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| return; |
| } |
| |
| ID3D12Resource *exec_buf; |
| VkResult result = |
| dzn_cmd_buffer_alloc_internal_buf(cmdbuf, sizeof(D3D12_DISPATCH_ARGUMENTS) * 2, |
| DZN_INTERNAL_BUF_DEFAULT, |
| D3D12_RESOURCE_STATE_COPY_DEST, |
| 0, |
| &exec_buf, NULL); |
| if (result != VK_SUCCESS) |
| return; |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_buffer_barrier(cmdbuf, buf->res, |
| D3D12_BARRIER_SYNC_EXECUTE_INDIRECT, D3D12_BARRIER_SYNC_COPY, |
| D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT, D3D12_BARRIER_ACCESS_COPY_SOURCE); |
| } else { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, buf->res, 0, 1, |
| D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, |
| D3D12_RESOURCE_STATE_COPY_SOURCE, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, 0, |
| buf->res, |
| offset, |
| sizeof(D3D12_DISPATCH_ARGUMENTS)); |
| ID3D12GraphicsCommandList1_CopyBufferRegion(cmdbuf->cmdlist, exec_buf, sizeof(D3D12_DISPATCH_ARGUMENTS), |
| buf->res, |
| offset, |
| sizeof(D3D12_DISPATCH_ARGUMENTS)); |
| |
| if (cmdbuf->enhanced_barriers) { |
| dzn_cmd_buffer_buffer_barrier(cmdbuf, exec_buf, |
| D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT, |
| D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT); |
| } else { |
| dzn_cmd_buffer_queue_transition_barriers(cmdbuf, exec_buf, 0, 1, |
| D3D12_RESOURCE_STATE_COPY_DEST, |
| D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, |
| DZN_QUEUE_TRANSITION_FLUSH); |
| } |
| |
| ID3D12GraphicsCommandList1_ExecuteIndirect(cmdbuf->cmdlist, cmdsig, 1, exec_buf, 0, NULL, 0); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdSetLineWidth(VkCommandBuffer commandBuffer, |
| float lineWidth) |
| { |
| assert(lineWidth == 1.0f); |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdSetDepthBias(VkCommandBuffer commandBuffer, |
| float depthBiasConstantFactor, |
| float depthBiasClamp, |
| float depthBiasSlopeFactor) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| struct dzn_physical_device *pdev = container_of(cmdbuf->vk.base.device->physical, struct dzn_physical_device, vk); |
| |
| cmdbuf->state.pipeline_variant.depth_bias.constant_factor = depthBiasConstantFactor; |
| cmdbuf->state.pipeline_variant.depth_bias.clamp = depthBiasClamp; |
| cmdbuf->state.pipeline_variant.depth_bias.slope_factor = depthBiasSlopeFactor; |
| cmdbuf->state.sysvals.gfx.depth_bias = depthBiasConstantFactor; |
| if (pdev->options16.DynamicDepthBiasSupported) |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BIAS; |
| else |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdSetBlendConstants(VkCommandBuffer commandBuffer, |
| const float blendConstants[4]) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| memcpy(cmdbuf->state.blend.constants, blendConstants, |
| sizeof(cmdbuf->state.blend.constants)); |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_BLEND_CONSTANTS; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdSetDepthBounds(VkCommandBuffer commandBuffer, |
| float minDepthBounds, |
| float maxDepthBounds) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| struct dzn_device *device = container_of(cmdbuf->vk.base.device, struct dzn_device, vk); |
| struct dzn_physical_device *pdev = |
| container_of(device->vk.physical, struct dzn_physical_device, vk); |
| |
| if (pdev->options2.DepthBoundsTestSupported) { |
| cmdbuf->state.zsa.depth_bounds.min = minDepthBounds; |
| cmdbuf->state.zsa.depth_bounds.max = maxDepthBounds; |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_DEPTH_BOUNDS; |
| } |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, |
| VkStencilFaceFlags faceMask, |
| uint32_t compareMask) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { |
| cmdbuf->state.zsa.stencil_test.front.compare_mask = compareMask; |
| cmdbuf->state.pipeline_variant.stencil_test.front.compare_mask = compareMask; |
| } |
| |
| if (faceMask & VK_STENCIL_FACE_BACK_BIT) { |
| cmdbuf->state.zsa.stencil_test.back.compare_mask = compareMask; |
| cmdbuf->state.pipeline_variant.stencil_test.back.compare_mask = compareMask; |
| } |
| |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_COMPARE_MASK; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, |
| VkStencilFaceFlags faceMask, |
| uint32_t writeMask) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { |
| cmdbuf->state.zsa.stencil_test.front.write_mask = writeMask; |
| cmdbuf->state.pipeline_variant.stencil_test.front.write_mask = writeMask; |
| } |
| |
| if (faceMask & VK_STENCIL_FACE_BACK_BIT) { |
| cmdbuf->state.zsa.stencil_test.back.write_mask = writeMask; |
| cmdbuf->state.pipeline_variant.stencil_test.back.write_mask = writeMask; |
| } |
| |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_WRITE_MASK; |
| cmdbuf->state.bindpoint[VK_PIPELINE_BIND_POINT_GRAPHICS].dirty |= DZN_CMD_BINDPOINT_DIRTY_PIPELINE; |
| } |
| |
| VKAPI_ATTR void VKAPI_CALL |
| dzn_CmdSetStencilReference(VkCommandBuffer commandBuffer, |
| VkStencilFaceFlags faceMask, |
| uint32_t reference) |
| { |
| VK_FROM_HANDLE(dzn_cmd_buffer, cmdbuf, commandBuffer); |
| |
| if (faceMask & VK_STENCIL_FACE_FRONT_BIT) |
| cmdbuf->state.zsa.stencil_test.front.ref = reference; |
| |
| if (faceMask & VK_STENCIL_FACE_BACK_BIT) |
| cmdbuf->state.zsa.stencil_test.back.ref = reference; |
| |
| cmdbuf->state.dirty |= DZN_CMD_DIRTY_STENCIL_REF; |
| } |