| /* |
| * Copyright © 2016 Red Hat. |
| * Copyright © 2016 Bas Nieuwenhuizen |
| * |
| * based in part on anv driver which is: |
| * Copyright © 2015 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "radv_cs.h" |
| #include "radv_debug.h" |
| #include "radv_meta.h" |
| #include "radv_private.h" |
| #include "radv_radeon_winsys.h" |
| #include "radv_shader.h" |
| #include "sid.h" |
| #include "vk_format.h" |
| #include "vk_util.h" |
| #include "vk_enum_defines.h" |
| #include "vk_common_entrypoints.h" |
| #include "vk_render_pass.h" |
| #include "vk_framebuffer.h" |
| |
| #include "ac_debug.h" |
| #include "ac_shader_args.h" |
| |
| #include "util/fast_idiv_by_const.h" |
| |
| enum { |
| RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0), |
| RADV_PREFETCH_VS = (1 << 1), |
| RADV_PREFETCH_TCS = (1 << 2), |
| RADV_PREFETCH_TES = (1 << 3), |
| RADV_PREFETCH_GS = (1 << 4), |
| RADV_PREFETCH_PS = (1 << 5), |
| RADV_PREFETCH_MS = (1 << 6), |
| RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES | |
| RADV_PREFETCH_GS | RADV_PREFETCH_PS | RADV_PREFETCH_MS) |
| }; |
| |
| static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, |
| struct radv_image *image, |
| VkImageLayout src_layout, VkImageLayout dst_layout, |
| uint32_t src_family_index, uint32_t dst_family_index, |
| const VkImageSubresourceRange *range, |
| struct radv_sample_locations_state *sample_locs); |
| |
| static void radv_set_rt_stack_size(struct radv_cmd_buffer *cmd_buffer, uint32_t size); |
| |
| const struct radv_dynamic_state default_dynamic_state = { |
| .viewport = |
| { |
| .count = 0, |
| }, |
| .scissor = |
| { |
| .count = 0, |
| }, |
| .line_width = 1.0f, |
| .depth_bias = |
| { |
| .bias = 0.0f, |
| .clamp = 0.0f, |
| .slope = 0.0f, |
| }, |
| .blend_constants = {0.0f, 0.0f, 0.0f, 0.0f}, |
| .depth_bounds = |
| { |
| .min = 0.0f, |
| .max = 1.0f, |
| }, |
| .stencil_compare_mask = |
| { |
| .front = ~0u, |
| .back = ~0u, |
| }, |
| .stencil_write_mask = |
| { |
| .front = ~0u, |
| .back = ~0u, |
| }, |
| .stencil_reference = |
| { |
| .front = 0u, |
| .back = 0u, |
| }, |
| .line_stipple = |
| { |
| .factor = 0u, |
| .pattern = 0u, |
| }, |
| .cull_mode = 0u, |
| .front_face = 0u, |
| .primitive_topology = 0u, |
| .fragment_shading_rate = |
| { |
| .size = {1u, 1u}, |
| .combiner_ops = {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, |
| VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR}, |
| }, |
| .depth_bias_enable = 0u, |
| .primitive_restart_enable = 0u, |
| .rasterizer_discard_enable = 0u, |
| .logic_op = 0u, |
| .color_write_enable = 0u, |
| .patch_control_points = 0, |
| .polygon_mode = 0, |
| .tess_domain_origin = VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT, |
| .logic_op_enable = 0u, |
| .stippled_line_enable = 0u, |
| .alpha_to_coverage_enable = 0u, |
| .sample_mask = 0u, |
| .depth_clip_enable = 0u, |
| .conservative_rast_mode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, |
| .depth_clip_negative_one_to_one = 0u, |
| .provoking_vertex_mode = VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, |
| .depth_clamp_enable = 0u, |
| }; |
| |
| static void |
| radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dynamic_state *src) |
| { |
| struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic; |
| uint64_t copy_mask = src->mask; |
| uint64_t dest_mask = 0; |
| |
| dest->discard_rectangle.count = src->discard_rectangle.count; |
| dest->sample_location.count = src->sample_location.count; |
| |
| if (copy_mask & RADV_DYNAMIC_VIEWPORT) { |
| if (dest->viewport.count != src->viewport.count) { |
| dest->viewport.count = src->viewport.count; |
| dest_mask |= RADV_DYNAMIC_VIEWPORT; |
| } |
| |
| if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, |
| src->viewport.count * sizeof(VkViewport))) { |
| typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count); |
| typed_memcpy(dest->viewport.xform, src->viewport.xform, src->viewport.count); |
| dest_mask |= RADV_DYNAMIC_VIEWPORT; |
| } |
| } |
| |
| if (copy_mask & RADV_DYNAMIC_SCISSOR) { |
| if (dest->scissor.count != src->scissor.count) { |
| dest->scissor.count = src->scissor.count; |
| dest_mask |= RADV_DYNAMIC_SCISSOR; |
| } |
| |
| if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, |
| src->scissor.count * sizeof(VkRect2D))) { |
| typed_memcpy(dest->scissor.scissors, src->scissor.scissors, src->scissor.count); |
| dest_mask |= RADV_DYNAMIC_SCISSOR; |
| } |
| } |
| |
| if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) { |
| if (memcmp(&dest->blend_constants, &src->blend_constants, sizeof(src->blend_constants))) { |
| typed_memcpy(dest->blend_constants, src->blend_constants, 4); |
| dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS; |
| } |
| } |
| |
| if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) { |
| if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles, |
| src->discard_rectangle.count * sizeof(VkRect2D))) { |
| typed_memcpy(dest->discard_rectangle.rectangles, src->discard_rectangle.rectangles, |
| src->discard_rectangle.count); |
| dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE; |
| } |
| } |
| |
| if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) { |
| if (dest->sample_location.per_pixel != src->sample_location.per_pixel || |
| dest->sample_location.grid_size.width != src->sample_location.grid_size.width || |
| dest->sample_location.grid_size.height != src->sample_location.grid_size.height || |
| memcmp(&dest->sample_location.locations, &src->sample_location.locations, |
| src->sample_location.count * sizeof(VkSampleLocationEXT))) { |
| dest->sample_location.per_pixel = src->sample_location.per_pixel; |
| dest->sample_location.grid_size = src->sample_location.grid_size; |
| typed_memcpy(dest->sample_location.locations, src->sample_location.locations, |
| src->sample_location.count); |
| dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS; |
| } |
| } |
| |
| #define RADV_CMP_COPY(field, flag) \ |
| if (copy_mask & flag) { \ |
| if (dest->field != src->field) { \ |
| dest->field = src->field; \ |
| dest_mask |= flag; \ |
| } \ |
| } |
| |
| RADV_CMP_COPY(line_width, RADV_DYNAMIC_LINE_WIDTH); |
| |
| RADV_CMP_COPY(depth_bias.bias, RADV_DYNAMIC_DEPTH_BIAS); |
| RADV_CMP_COPY(depth_bias.clamp, RADV_DYNAMIC_DEPTH_BIAS); |
| RADV_CMP_COPY(depth_bias.slope, RADV_DYNAMIC_DEPTH_BIAS); |
| |
| RADV_CMP_COPY(depth_bounds.min, RADV_DYNAMIC_DEPTH_BOUNDS); |
| RADV_CMP_COPY(depth_bounds.max, RADV_DYNAMIC_DEPTH_BOUNDS); |
| |
| RADV_CMP_COPY(stencil_compare_mask.front, RADV_DYNAMIC_STENCIL_COMPARE_MASK); |
| RADV_CMP_COPY(stencil_compare_mask.back, RADV_DYNAMIC_STENCIL_COMPARE_MASK); |
| |
| RADV_CMP_COPY(stencil_write_mask.front, RADV_DYNAMIC_STENCIL_WRITE_MASK); |
| RADV_CMP_COPY(stencil_write_mask.back, RADV_DYNAMIC_STENCIL_WRITE_MASK); |
| |
| RADV_CMP_COPY(stencil_reference.front, RADV_DYNAMIC_STENCIL_REFERENCE); |
| RADV_CMP_COPY(stencil_reference.back, RADV_DYNAMIC_STENCIL_REFERENCE); |
| |
| RADV_CMP_COPY(line_stipple.factor, RADV_DYNAMIC_LINE_STIPPLE); |
| RADV_CMP_COPY(line_stipple.pattern, RADV_DYNAMIC_LINE_STIPPLE); |
| |
| RADV_CMP_COPY(cull_mode, RADV_DYNAMIC_CULL_MODE); |
| RADV_CMP_COPY(front_face, RADV_DYNAMIC_FRONT_FACE); |
| RADV_CMP_COPY(primitive_topology, RADV_DYNAMIC_PRIMITIVE_TOPOLOGY); |
| RADV_CMP_COPY(depth_test_enable, RADV_DYNAMIC_DEPTH_TEST_ENABLE); |
| RADV_CMP_COPY(depth_write_enable, RADV_DYNAMIC_DEPTH_WRITE_ENABLE); |
| RADV_CMP_COPY(depth_compare_op, RADV_DYNAMIC_DEPTH_COMPARE_OP); |
| RADV_CMP_COPY(depth_bounds_test_enable, RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE); |
| RADV_CMP_COPY(stencil_test_enable, RADV_DYNAMIC_STENCIL_TEST_ENABLE); |
| |
| RADV_CMP_COPY(stencil_op.front.fail_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.front.pass_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.front.depth_fail_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.front.compare_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.back.fail_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.back.pass_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.back.depth_fail_op, RADV_DYNAMIC_STENCIL_OP); |
| RADV_CMP_COPY(stencil_op.back.compare_op, RADV_DYNAMIC_STENCIL_OP); |
| |
| RADV_CMP_COPY(fragment_shading_rate.size.width, RADV_DYNAMIC_FRAGMENT_SHADING_RATE); |
| RADV_CMP_COPY(fragment_shading_rate.size.height, RADV_DYNAMIC_FRAGMENT_SHADING_RATE); |
| RADV_CMP_COPY(fragment_shading_rate.combiner_ops[0], RADV_DYNAMIC_FRAGMENT_SHADING_RATE); |
| RADV_CMP_COPY(fragment_shading_rate.combiner_ops[1], RADV_DYNAMIC_FRAGMENT_SHADING_RATE); |
| |
| RADV_CMP_COPY(depth_bias_enable, RADV_DYNAMIC_DEPTH_BIAS_ENABLE); |
| |
| RADV_CMP_COPY(primitive_restart_enable, RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE); |
| |
| RADV_CMP_COPY(rasterizer_discard_enable, RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE); |
| |
| RADV_CMP_COPY(logic_op, RADV_DYNAMIC_LOGIC_OP); |
| |
| RADV_CMP_COPY(color_write_enable, RADV_DYNAMIC_COLOR_WRITE_ENABLE); |
| |
| RADV_CMP_COPY(patch_control_points, RADV_DYNAMIC_PATCH_CONTROL_POINTS); |
| |
| RADV_CMP_COPY(polygon_mode, RADV_DYNAMIC_POLYGON_MODE); |
| |
| RADV_CMP_COPY(tess_domain_origin, RADV_DYNAMIC_TESS_DOMAIN_ORIGIN); |
| |
| RADV_CMP_COPY(logic_op_enable, RADV_DYNAMIC_LOGIC_OP_ENABLE); |
| |
| RADV_CMP_COPY(stippled_line_enable, RADV_DYNAMIC_LINE_STIPPLE_ENABLE); |
| |
| RADV_CMP_COPY(alpha_to_coverage_enable, RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE); |
| |
| RADV_CMP_COPY(sample_mask, RADV_DYNAMIC_SAMPLE_MASK); |
| |
| RADV_CMP_COPY(depth_clip_enable, RADV_DYNAMIC_DEPTH_CLIP_ENABLE); |
| |
| RADV_CMP_COPY(conservative_rast_mode, RADV_DYNAMIC_CONSERVATIVE_RAST_MODE); |
| |
| RADV_CMP_COPY(depth_clip_negative_one_to_one, RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE); |
| |
| RADV_CMP_COPY(provoking_vertex_mode, RADV_DYNAMIC_PROVOKING_VERTEX_MODE); |
| |
| RADV_CMP_COPY(depth_clamp_enable, RADV_DYNAMIC_DEPTH_CLAMP_ENABLE); |
| |
| #undef RADV_CMP_COPY |
| |
| cmd_buffer->state.dirty |= dest_mask; |
| } |
| |
| bool |
| radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) |
| { |
| return cmd_buffer->qf == RADV_QUEUE_COMPUTE && |
| cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7; |
| } |
| |
| enum amd_ip_type |
| radv_queue_family_to_ring(struct radv_physical_device *physical_device, |
| enum radv_queue_family f) |
| { |
| switch (f) { |
| case RADV_QUEUE_GENERAL: |
| return AMD_IP_GFX; |
| case RADV_QUEUE_COMPUTE: |
| return AMD_IP_COMPUTE; |
| case RADV_QUEUE_TRANSFER: |
| return AMD_IP_SDMA; |
| default: |
| unreachable("Unknown queue family"); |
| } |
| } |
| |
| static void |
| radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, |
| unsigned count, const uint32_t *data) |
| { |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| |
| radeon_check_space(cmd_buffer->device->ws, cs, 4 + count); |
| |
| radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); |
| radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel)); |
| radeon_emit(cs, va); |
| radeon_emit(cs, va >> 32); |
| radeon_emit_array(cs, data, count); |
| } |
| |
| static void |
| radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, |
| unsigned size) |
| { |
| uint32_t *zeroes = alloca(size); |
| memset(zeroes, 0, size); |
| radv_emit_write_data_packet(cmd_buffer, engine_sel, va, size / 4, zeroes); |
| } |
| |
| static void |
| radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer) |
| { |
| struct radv_cmd_buffer *cmd_buffer = container_of(vk_cmd_buffer, struct radv_cmd_buffer, vk); |
| |
| list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) |
| { |
| cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); |
| list_del(&up->list); |
| free(up); |
| } |
| |
| if (cmd_buffer->upload.upload_bo) |
| cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo); |
| |
| if (cmd_buffer->cs) |
| cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs); |
| if (cmd_buffer->ace_internal.cs) |
| cmd_buffer->device->ws->cs_destroy(cmd_buffer->ace_internal.cs); |
| |
| for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { |
| struct radv_descriptor_set_header *set = &cmd_buffer->descriptors[i].push_set.set; |
| free(set->mapped_ptr); |
| if (set->layout) |
| vk_descriptor_set_layout_unref(&cmd_buffer->device->vk, &set->layout->vk); |
| vk_object_base_finish(&set->base); |
| } |
| |
| vk_object_base_finish(&cmd_buffer->meta_push_descriptors.base); |
| |
| vk_command_buffer_finish(&cmd_buffer->vk); |
| vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer); |
| } |
| |
| static VkResult |
| radv_create_cmd_buffer(struct vk_command_pool *pool, |
| struct vk_command_buffer **cmd_buffer_out) |
| { |
| struct radv_device *device = container_of(pool->base.device, struct radv_device, vk); |
| |
| struct radv_cmd_buffer *cmd_buffer; |
| unsigned ring; |
| cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (cmd_buffer == NULL) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| VkResult result = |
| vk_command_buffer_init(pool, &cmd_buffer->vk, &radv_cmd_buffer_ops, 0); |
| if (result != VK_SUCCESS) { |
| vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer); |
| return result; |
| } |
| |
| cmd_buffer->device = device; |
| |
| cmd_buffer->qf = vk_queue_to_radv(device->physical_device, pool->queue_family_index); |
| |
| ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); |
| |
| cmd_buffer->cs = device->ws->cs_create(device->ws, ring); |
| if (!cmd_buffer->cs) { |
| radv_destroy_cmd_buffer(&cmd_buffer->vk); |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| vk_object_base_init(&device->vk, &cmd_buffer->meta_push_descriptors.base, |
| VK_OBJECT_TYPE_DESCRIPTOR_SET); |
| |
| for (unsigned i = 0; i < MAX_BIND_POINTS; i++) |
| vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base, |
| VK_OBJECT_TYPE_DESCRIPTOR_SET); |
| |
| *cmd_buffer_out = &cmd_buffer->vk; |
| |
| list_inithead(&cmd_buffer->upload.list); |
| |
| return VK_SUCCESS; |
| } |
| |
| void |
| radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer) |
| { |
| memset(&cmd_buffer->state.render, 0, sizeof(cmd_buffer->state.render)); |
| } |
| |
| static void |
| radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, |
| UNUSED VkCommandBufferResetFlags flags) |
| { |
| struct radv_cmd_buffer *cmd_buffer = container_of(vk_cmd_buffer, struct radv_cmd_buffer, vk); |
| |
| vk_command_buffer_reset(&cmd_buffer->vk); |
| |
| cmd_buffer->device->ws->cs_reset(cmd_buffer->cs); |
| if (cmd_buffer->ace_internal.cs) |
| cmd_buffer->device->ws->cs_reset(cmd_buffer->ace_internal.cs); |
| |
| list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) |
| { |
| cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); |
| list_del(&up->list); |
| free(up); |
| } |
| |
| cmd_buffer->push_constant_stages = 0; |
| cmd_buffer->scratch_size_per_wave_needed = 0; |
| cmd_buffer->scratch_waves_wanted = 0; |
| cmd_buffer->compute_scratch_size_per_wave_needed = 0; |
| cmd_buffer->compute_scratch_waves_wanted = 0; |
| cmd_buffer->esgs_ring_size_needed = 0; |
| cmd_buffer->gsvs_ring_size_needed = 0; |
| cmd_buffer->tess_rings_needed = false; |
| cmd_buffer->task_rings_needed = false; |
| cmd_buffer->mesh_scratch_ring_needed = false; |
| cmd_buffer->gds_needed = false; |
| cmd_buffer->gds_oa_needed = false; |
| cmd_buffer->sample_positions_needed = false; |
| cmd_buffer->ace_internal.sem.gfx2ace_value = 0; |
| cmd_buffer->ace_internal.sem.emitted_gfx2ace_value = 0; |
| cmd_buffer->ace_internal.sem.va = 0; |
| |
| if (cmd_buffer->upload.upload_bo) |
| radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo); |
| cmd_buffer->upload.offset = 0; |
| |
| memset(cmd_buffer->vertex_binding_buffers, 0, sizeof(struct radv_buffer *) * cmd_buffer->used_vertex_bindings); |
| cmd_buffer->used_vertex_bindings = 0; |
| |
| for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { |
| cmd_buffer->descriptors[i].dirty = 0; |
| cmd_buffer->descriptors[i].valid = 0; |
| cmd_buffer->descriptors[i].push_dirty = false; |
| } |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { |
| uint32_t pred_value = 0; |
| uint32_t pred_offset; |
| if (!radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset)) |
| vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| cmd_buffer->mec_inv_pred_emitted = false; |
| cmd_buffer->mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; |
| } |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && |
| cmd_buffer->qf == RADV_QUEUE_GENERAL) { |
| unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends; |
| unsigned fence_offset, eop_bug_offset; |
| void *fence_ptr; |
| |
| radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset, &fence_ptr); |
| memset(fence_ptr, 0, 8); |
| |
| cmd_buffer->gfx9_fence_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); |
| cmd_buffer->gfx9_fence_va += fence_offset; |
| |
| radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_fence_va, 8); |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) { |
| /* Allocate a buffer for the EOP bug on GFX9. */ |
| radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr); |
| memset(fence_ptr, 0, 16 * num_db); |
| cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); |
| cmd_buffer->gfx9_eop_bug_va += eop_bug_offset; |
| |
| radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_eop_bug_va, 16 * num_db); |
| } |
| } |
| |
| radv_cmd_buffer_reset_rendering(cmd_buffer); |
| |
| cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL; |
| } |
| |
| const struct vk_command_buffer_ops radv_cmd_buffer_ops = { |
| .create = radv_create_cmd_buffer, |
| .reset = radv_reset_cmd_buffer, |
| .destroy = radv_destroy_cmd_buffer, |
| }; |
| |
| static bool |
| radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t min_needed) |
| { |
| uint64_t new_size; |
| struct radeon_winsys_bo *bo = NULL; |
| struct radv_cmd_buffer_upload *upload; |
| struct radv_device *device = cmd_buffer->device; |
| |
| new_size = MAX2(min_needed, 16 * 1024); |
| new_size = MAX2(new_size, 2 * cmd_buffer->upload.size); |
| |
| VkResult result = |
| device->ws->buffer_create(device->ws, new_size, 4096, device->ws->cs_domain(device->ws), |
| RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | |
| RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC, |
| RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo); |
| |
| if (result != VK_SUCCESS) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, result); |
| return false; |
| } |
| |
| radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); |
| if (cmd_buffer->upload.upload_bo) { |
| upload = malloc(sizeof(*upload)); |
| |
| if (!upload) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| device->ws->buffer_destroy(device->ws, bo); |
| return false; |
| } |
| |
| memcpy(upload, &cmd_buffer->upload, sizeof(*upload)); |
| list_add(&upload->list, &cmd_buffer->upload.list); |
| } |
| |
| cmd_buffer->upload.upload_bo = bo; |
| cmd_buffer->upload.size = new_size; |
| cmd_buffer->upload.offset = 0; |
| cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo); |
| |
| if (!cmd_buffer->upload.map) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_DEVICE_MEMORY); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool |
| radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, |
| unsigned *out_offset, void **ptr) |
| { |
| assert(size % 4 == 0); |
| |
| struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info; |
| |
| /* Align to the scalar cache line size if it results in this allocation |
| * being placed in less of them. |
| */ |
| unsigned offset = cmd_buffer->upload.offset; |
| unsigned line_size = rad_info->gfx_level >= GFX10 ? 64 : 32; |
| unsigned gap = align(offset, line_size) - offset; |
| if ((size & (line_size - 1)) > gap) |
| offset = align(offset, line_size); |
| |
| if (offset + size > cmd_buffer->upload.size) { |
| if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size)) |
| return false; |
| offset = 0; |
| } |
| |
| *out_offset = offset; |
| *ptr = cmd_buffer->upload.map + offset; |
| |
| cmd_buffer->upload.offset = offset + size; |
| return true; |
| } |
| |
| bool |
| radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data, |
| unsigned *out_offset) |
| { |
| uint8_t *ptr; |
| |
| if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr)) |
| return false; |
| assert(ptr); |
| |
| memcpy(ptr, data, size); |
| return true; |
| } |
| |
| void |
| radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_device *device = cmd_buffer->device; |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| uint64_t va; |
| |
| va = radv_buffer_get_va(device->trace_bo); |
| if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) |
| va += 4; |
| |
| ++cmd_buffer->state.trace_id; |
| radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id); |
| |
| radeon_check_space(cmd_buffer->device->ws, cs, 2); |
| |
| radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); |
| radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id)); |
| } |
| |
| static void |
| radv_ace_internal_barrier(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stage_mask, |
| VkPipelineStageFlags2 dst_stage_mask) |
| { |
| /* Update flush bits from the main cmdbuf, except the stage flush. */ |
| cmd_buffer->ace_internal.flush_bits |= |
| cmd_buffer->state.flush_bits & RADV_CMD_FLUSH_ALL_COMPUTE & ~RADV_CMD_FLAG_CS_PARTIAL_FLUSH; |
| |
| /* Add stage flush only when necessary. */ |
| if (src_stage_mask & |
| (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_TRANSFER_BIT | |
| VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) |
| cmd_buffer->ace_internal.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; |
| |
| /* Block task shaders when we have to wait for CP DMA on the GFX cmdbuf. */ |
| if (src_stage_mask & |
| (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT | |
| VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) |
| dst_stage_mask |= cmd_buffer->state.dma_is_busy ? VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT : 0; |
| |
| /* Increment the GFX/ACE semaphore when task shaders are blocked. */ |
| if (dst_stage_mask & |
| (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | |
| VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT)) |
| cmd_buffer->ace_internal.sem.gfx2ace_value++; |
| } |
| |
| static void |
| radv_ace_internal_cache_flush(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; |
| const uint32_t flush_bits = cmd_buffer->ace_internal.flush_bits; |
| enum rgp_flush_bits sqtt_flush_bits = 0; |
| |
| si_cs_emit_cache_flush(ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, 0, |
| true, flush_bits, &sqtt_flush_bits, 0); |
| |
| cmd_buffer->ace_internal.flush_bits = 0; |
| } |
| |
| static uint64_t |
| radv_ace_internal_sem_create(struct radv_cmd_buffer *cmd_buffer) |
| { |
| /* DWORD 0: GFX->ACE semaphore (GFX blocks ACE, ie. ACE waits for GFX) |
| * DWORD 1: ACE->GFX semaphore |
| */ |
| uint64_t sem_init = 0; |
| uint32_t va_off = 0; |
| if (!radv_cmd_buffer_upload_data(cmd_buffer, sizeof(uint64_t), &sem_init, &va_off)) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| return 0; |
| } |
| |
| return radv_buffer_get_va(cmd_buffer->upload.upload_bo) + va_off; |
| } |
| |
| static bool |
| radv_ace_internal_sem_dirty(const struct radv_cmd_buffer *cmd_buffer) |
| { |
| return cmd_buffer->ace_internal.sem.gfx2ace_value != |
| cmd_buffer->ace_internal.sem.emitted_gfx2ace_value; |
| } |
| |
| ALWAYS_INLINE static bool |
| radv_flush_gfx2ace_semaphore(struct radv_cmd_buffer *cmd_buffer) |
| { |
| if (!radv_ace_internal_sem_dirty(cmd_buffer)) |
| return false; |
| |
| if (!cmd_buffer->ace_internal.sem.va) { |
| cmd_buffer->ace_internal.sem.va = radv_ace_internal_sem_create(cmd_buffer); |
| if (!cmd_buffer->ace_internal.sem.va) |
| return false; |
| } |
| |
| /* GFX writes a value to the semaphore which ACE can wait for.*/ |
| si_cs_emit_write_event_eop( |
| cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level, |
| radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, |
| EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->ace_internal.sem.va, |
| cmd_buffer->ace_internal.sem.gfx2ace_value, cmd_buffer->gfx9_eop_bug_va); |
| |
| cmd_buffer->ace_internal.sem.emitted_gfx2ace_value = cmd_buffer->ace_internal.sem.gfx2ace_value; |
| return true; |
| } |
| |
| ALWAYS_INLINE static void |
| radv_wait_gfx2ace_semaphore(struct radv_cmd_buffer *cmd_buffer) |
| { |
| assert(cmd_buffer->ace_internal.sem.va); |
| struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; |
| radeon_check_space(cmd_buffer->device->ws, ace_cs, 7); |
| |
| /* ACE waits for the semaphore which GFX wrote. */ |
| radv_cp_wait_mem(ace_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->ace_internal.sem.va, |
| cmd_buffer->ace_internal.sem.gfx2ace_value, 0xffffffff); |
| } |
| |
| static struct radeon_cmdbuf * |
| radv_ace_internal_create(struct radv_cmd_buffer *cmd_buffer) |
| { |
| assert(!cmd_buffer->ace_internal.cs); |
| struct radv_device *device = cmd_buffer->device; |
| struct radeon_cmdbuf *ace_cs = device->ws->cs_create(device->ws, AMD_IP_COMPUTE); |
| |
| if (!ace_cs) |
| vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| return ace_cs; |
| } |
| |
| static VkResult |
| radv_ace_internal_finalize(struct radv_cmd_buffer *cmd_buffer) |
| { |
| assert(cmd_buffer->ace_internal.cs); |
| struct radv_device *device = cmd_buffer->device; |
| struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; |
| |
| /* Emit pending cache flush. */ |
| radv_ace_internal_cache_flush(cmd_buffer); |
| |
| /* Clear the ACE semaphore if it exists. |
| * This is necessary in case the same cmd buffer is submitted again in the future. |
| */ |
| if (cmd_buffer->ace_internal.sem.va) { |
| struct radeon_cmdbuf *main_cs = cmd_buffer->cs; |
| uint64_t gfx2ace_va = cmd_buffer->ace_internal.sem.va; |
| uint64_t ace2gfx_va = cmd_buffer->ace_internal.sem.va + 4; |
| |
| /* ACE: write 1 to the ACE->GFX semaphore. */ |
| si_cs_emit_write_event_eop(ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level, |
| true, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, |
| EOP_DATA_SEL_VALUE_32BIT, ace2gfx_va, 1, |
| cmd_buffer->gfx9_eop_bug_va); |
| |
| /* Wait for ACE to finish, otherwise we may risk writing 0 to the semaphore |
| * when ACE is still waiting for it. This may not happen in practice, but |
| * better safe than sorry. |
| */ |
| radv_cp_wait_mem(main_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, ace2gfx_va, 1, 0xffffffff); |
| |
| /* GFX: clear GFX->ACE and ACE->GFX semaphores. */ |
| radv_emit_clear_data(cmd_buffer, V_370_ME, gfx2ace_va, 8); |
| } |
| |
| device->ws->cs_add_buffers(ace_cs, cmd_buffer->cs); |
| return device->ws->cs_finalize(ace_cs); |
| } |
| |
| static void |
| radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags) |
| { |
| if (unlikely(cmd_buffer->device->thread_trace.bo)) { |
| radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
| radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); |
| } |
| |
| if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) { |
| enum rgp_flush_bits sqtt_flush_bits = 0; |
| assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)); |
| |
| radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); |
| |
| /* Force wait for graphics or compute engines to be idle. */ |
| si_cs_emit_cache_flush(cmd_buffer->cs, |
| cmd_buffer->device->physical_device->rad_info.gfx_level, |
| &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, |
| radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits, |
| cmd_buffer->gfx9_eop_bug_va); |
| |
| if (cmd_buffer->state.graphics_pipeline && (flags & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) && |
| radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_TASK)) { |
| /* Force wait for compute engines to be idle on the internal cmdbuf. */ |
| si_cs_emit_cache_flush(cmd_buffer->ace_internal.cs, |
| cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, 0, |
| true, RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0); |
| } |
| } |
| |
| if (unlikely(cmd_buffer->device->trace_bo)) |
| radv_cmd_buffer_trace_emit(cmd_buffer); |
| } |
| |
| static void |
| radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) |
| { |
| struct radv_device *device = cmd_buffer->device; |
| enum amd_ip_type ring; |
| uint32_t data[2]; |
| uint64_t va; |
| |
| va = radv_buffer_get_va(device->trace_bo); |
| |
| ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); |
| |
| switch (ring) { |
| case AMD_IP_GFX: |
| va += 8; |
| break; |
| case AMD_IP_COMPUTE: |
| va += 16; |
| break; |
| default: |
| assert(!"invalid IP type"); |
| } |
| |
| uint64_t pipeline_address = (uintptr_t)pipeline; |
| data[0] = pipeline_address; |
| data[1] = pipeline_address >> 32; |
| |
| radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); |
| } |
| |
| static void |
| radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr) |
| { |
| struct radv_device *device = cmd_buffer->device; |
| uint32_t data[2]; |
| uint64_t va; |
| |
| va = radv_buffer_get_va(device->trace_bo); |
| va += 24; |
| |
| data[0] = vb_ptr; |
| data[1] = vb_ptr >> 32; |
| |
| radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); |
| } |
| |
| static void |
| radv_save_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader_part *prolog) |
| { |
| struct radv_device *device = cmd_buffer->device; |
| uint32_t data[2]; |
| uint64_t va; |
| |
| va = radv_buffer_get_va(device->trace_bo); |
| va += 32; |
| |
| uint64_t prolog_address = (uintptr_t)prolog; |
| data[0] = prolog_address; |
| data[1] = prolog_address >> 32; |
| |
| radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); |
| } |
| |
| void |
| radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, |
| struct radv_descriptor_set *set, unsigned idx) |
| { |
| struct radv_descriptor_state *descriptors_state = |
| radv_get_descriptors_state(cmd_buffer, bind_point); |
| |
| descriptors_state->sets[idx] = set; |
| |
| descriptors_state->valid |= (1u << idx); /* active descriptors */ |
| descriptors_state->dirty |= (1u << idx); |
| } |
| |
| static void |
| radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) |
| { |
| struct radv_descriptor_state *descriptors_state = |
| radv_get_descriptors_state(cmd_buffer, bind_point); |
| struct radv_device *device = cmd_buffer->device; |
| uint32_t data[MAX_SETS * 2] = {0}; |
| uint64_t va; |
| va = radv_buffer_get_va(device->trace_bo) + 40; |
| |
| u_foreach_bit(i, descriptors_state->valid) |
| { |
| struct radv_descriptor_set *set = descriptors_state->sets[i]; |
| data[i * 2] = (uint64_t)(uintptr_t)set; |
| data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32; |
| } |
| |
| radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data); |
| } |
| |
| struct radv_userdata_info * |
| radv_lookup_user_sgpr(const struct radv_pipeline *pipeline, gl_shader_stage stage, int idx) |
| { |
| struct radv_shader *shader = radv_get_shader(pipeline, stage); |
| return &shader->info.user_sgprs_locs.shader_data[idx]; |
| } |
| |
| static void |
| radv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs, |
| struct radv_pipeline *pipeline, gl_shader_stage stage, int idx, |
| uint64_t va) |
| { |
| struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); |
| uint32_t base_reg = pipeline->user_data_0[stage]; |
| if (loc->sgpr_idx == -1) |
| return; |
| |
| assert(loc->num_sgprs == 1); |
| |
| radv_emit_shader_pointer(device, cs, base_reg + loc->sgpr_idx * 4, va, false); |
| } |
| |
| static void |
| radv_emit_descriptor_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, |
| struct radv_pipeline *pipeline, |
| struct radv_descriptor_state *descriptors_state, |
| gl_shader_stage stage) |
| { |
| uint32_t sh_base = pipeline->user_data_0[stage]; |
| struct radv_userdata_locations *locs = &pipeline->shaders[stage]->info.user_sgprs_locs; |
| unsigned mask = locs->descriptor_sets_enabled; |
| |
| mask &= descriptors_state->dirty & descriptors_state->valid; |
| |
| while (mask) { |
| int start, count; |
| |
| u_bit_scan_consecutive_range(&mask, &start, &count); |
| |
| struct radv_userdata_info *loc = &locs->descriptor_sets[start]; |
| unsigned sh_offset = sh_base + loc->sgpr_idx * 4; |
| |
| radv_emit_shader_pointer_head(cs, sh_offset, count, true); |
| for (int i = 0; i < count; i++) { |
| struct radv_descriptor_set *set = descriptors_state->sets[start + i]; |
| |
| radv_emit_shader_pointer_body(device, cs, set->header.va, true); |
| } |
| } |
| } |
| |
| /** |
| * Convert the user sample locations to hardware sample locations (the values |
| * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*). |
| */ |
| static void |
| radv_convert_user_sample_locs(struct radv_sample_locations_state *state, uint32_t x, uint32_t y, |
| VkOffset2D *sample_locs) |
| { |
| uint32_t x_offset = x % state->grid_size.width; |
| uint32_t y_offset = y % state->grid_size.height; |
| uint32_t num_samples = (uint32_t)state->per_pixel; |
| VkSampleLocationEXT *user_locs; |
| uint32_t pixel_offset; |
| |
| pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples; |
| |
| assert(pixel_offset <= MAX_SAMPLE_LOCATIONS); |
| user_locs = &state->locations[pixel_offset]; |
| |
| for (uint32_t i = 0; i < num_samples; i++) { |
| float shifted_pos_x = user_locs[i].x - 0.5; |
| float shifted_pos_y = user_locs[i].y - 0.5; |
| |
| int32_t scaled_pos_x = floorf(shifted_pos_x * 16); |
| int32_t scaled_pos_y = floorf(shifted_pos_y * 16); |
| |
| sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7); |
| sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7); |
| } |
| } |
| |
| /** |
| * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample |
| * locations. |
| */ |
| static void |
| radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs, |
| uint32_t *sample_locs_pixel) |
| { |
| for (uint32_t i = 0; i < num_samples; i++) { |
| uint32_t sample_reg_idx = i / 4; |
| uint32_t sample_loc_idx = i % 4; |
| int32_t pos_x = sample_locs[i].x; |
| int32_t pos_y = sample_locs[i].y; |
| |
| uint32_t shift_x = 8 * sample_loc_idx; |
| uint32_t shift_y = shift_x + 4; |
| |
| sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x; |
| sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y; |
| } |
| } |
| |
| /** |
| * Compute the PA_SC_CENTROID_PRIORITY_* mask based on the top left hardware |
| * sample locations. |
| */ |
| static uint64_t |
| radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs, |
| uint32_t num_samples) |
| { |
| uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities)); |
| uint32_t sample_mask = num_samples - 1; |
| uint32_t *distances = alloca(num_samples * sizeof(*distances)); |
| uint64_t centroid_priority = 0; |
| |
| /* Compute the distances from center for each sample. */ |
| for (int i = 0; i < num_samples; i++) { |
| distances[i] = (sample_locs[i].x * sample_locs[i].x) + (sample_locs[i].y * sample_locs[i].y); |
| } |
| |
| /* Compute the centroid priorities by looking at the distances array. */ |
| for (int i = 0; i < num_samples; i++) { |
| uint32_t min_idx = 0; |
| |
| for (int j = 1; j < num_samples; j++) { |
| if (distances[j] < distances[min_idx]) |
| min_idx = j; |
| } |
| |
| centroid_priorities[i] = min_idx; |
| distances[min_idx] = 0xffffffff; |
| } |
| |
| /* Compute the final centroid priority. */ |
| for (int i = 0; i < 8; i++) { |
| centroid_priority |= centroid_priorities[i & sample_mask] << (i * 4); |
| } |
| |
| return centroid_priority << 32 | centroid_priority; |
| } |
| |
| /** |
| * Emit the sample locations that are specified with VK_EXT_sample_locations. |
| */ |
| static void |
| radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_sample_locations_state *sample_location = &cmd_buffer->state.dynamic.sample_location; |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| uint32_t num_samples = (uint32_t)sample_location->per_pixel; |
| unsigned pa_sc_aa_config = pipeline->ms.pa_sc_aa_config; |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| uint32_t sample_locs_pixel[4][2] = {0}; |
| VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */ |
| uint32_t max_sample_dist = 0; |
| uint64_t centroid_priority; |
| |
| if (!cmd_buffer->state.dynamic.sample_location.count) |
| return; |
| |
| /* Convert the user sample locations to hardware sample locations. */ |
| radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]); |
| radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]); |
| radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]); |
| radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]); |
| |
| /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */ |
| for (uint32_t i = 0; i < 4; i++) { |
| radv_compute_sample_locs_pixel(num_samples, sample_locs[i], sample_locs_pixel[i]); |
| } |
| |
| /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */ |
| centroid_priority = radv_compute_centroid_priority(cmd_buffer, sample_locs[0], num_samples); |
| |
| /* Compute the maximum sample distance from the specified locations. */ |
| for (unsigned i = 0; i < 4; ++i) { |
| for (uint32_t j = 0; j < num_samples; j++) { |
| VkOffset2D offset = sample_locs[i][j]; |
| max_sample_dist = MAX2(max_sample_dist, MAX2(abs(offset.x), abs(offset.y))); |
| } |
| } |
| |
| /* Emit the specified user sample locations. */ |
| switch (num_samples) { |
| case 2: |
| case 4: |
| radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, |
| sample_locs_pixel[0][0]); |
| radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, |
| sample_locs_pixel[1][0]); |
| radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, |
| sample_locs_pixel[2][0]); |
| radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, |
| sample_locs_pixel[3][0]); |
| break; |
| case 8: |
| radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, |
| sample_locs_pixel[0][0]); |
| radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, |
| sample_locs_pixel[1][0]); |
| radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, |
| sample_locs_pixel[2][0]); |
| radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, |
| sample_locs_pixel[3][0]); |
| radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, |
| sample_locs_pixel[0][1]); |
| radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, |
| sample_locs_pixel[1][1]); |
| radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, |
| sample_locs_pixel[2][1]); |
| radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, |
| sample_locs_pixel[3][1]); |
| break; |
| default: |
| unreachable("invalid number of samples"); |
| } |
| |
| /* Emit the maximum sample distance and the centroid priority. */ |
| pa_sc_aa_config &= C_028BE0_MAX_SAMPLE_DIST; |
| pa_sc_aa_config |= S_028BE0_MAX_SAMPLE_DIST(max_sample_dist); |
| |
| radeon_set_context_reg(cs, R_028BE0_PA_SC_AA_CONFIG, pa_sc_aa_config); |
| |
| radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); |
| radeon_emit(cs, centroid_priority); |
| radeon_emit(cs, centroid_priority >> 32); |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| } |
| |
| static void |
| radv_emit_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs, |
| struct radv_pipeline *pipeline, gl_shader_stage stage, int idx, |
| uint32_t *values) |
| { |
| struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); |
| uint32_t base_reg = pipeline->user_data_0[stage]; |
| if (loc->sgpr_idx == -1) |
| return; |
| |
| radeon_check_space(device->ws, cs, 2 + loc->num_sgprs); |
| |
| radeon_set_sh_reg_seq(cs, base_reg + loc->sgpr_idx * 4, loc->num_sgprs); |
| radeon_emit_array(cs, values, loc->num_sgprs); |
| } |
| |
| static void |
| radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, |
| struct radv_graphics_pipeline *pipeline) |
| { |
| int num_samples = pipeline->ms.num_samples; |
| struct radv_graphics_pipeline *old_pipeline = cmd_buffer->state.emitted_graphics_pipeline; |
| |
| if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.needs_sample_positions) |
| cmd_buffer->sample_positions_needed = true; |
| |
| if (old_pipeline && num_samples == old_pipeline->ms.num_samples) |
| return; |
| |
| radv_emit_default_sample_locations(cmd_buffer->cs, num_samples); |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| } |
| |
| static void |
| radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, |
| struct radv_graphics_pipeline *pipeline) |
| { |
| const struct radv_graphics_pipeline *old_pipeline = cmd_buffer->state.emitted_graphics_pipeline; |
| |
| if (pipeline->base.device->physical_device->rad_info.gfx_level < GFX9) |
| return; |
| |
| if (old_pipeline && |
| old_pipeline->binning.pa_sc_binner_cntl_0 == |
| pipeline->binning.pa_sc_binner_cntl_0) |
| return; |
| |
| bool binning_flush = false; |
| if (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA12 || |
| cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA20 || |
| cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN2 || |
| cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { |
| binning_flush = !old_pipeline || |
| G_028C44_BINNING_MODE(old_pipeline->binning.pa_sc_binner_cntl_0) != |
| G_028C44_BINNING_MODE(pipeline->binning.pa_sc_binner_cntl_0); |
| } |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0, |
| pipeline->binning.pa_sc_binner_cntl_0 | |
| S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush)); |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| } |
| |
| static void |
| radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader) |
| { |
| uint64_t va; |
| |
| if (!shader) |
| return; |
| |
| va = radv_shader_get_va(shader); |
| |
| si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); |
| } |
| |
| static void |
| radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer, |
| struct radv_graphics_pipeline *pipeline, bool first_stage_only) |
| { |
| struct radv_cmd_state *state = &cmd_buffer->state; |
| uint32_t mask = state->prefetch_L2_mask; |
| |
| /* Fast prefetch path for starting draws as soon as possible. */ |
| if (first_stage_only) |
| mask &= RADV_PREFETCH_VS | RADV_PREFETCH_VBO_DESCRIPTORS | RADV_PREFETCH_MS; |
| |
| if (mask & RADV_PREFETCH_VS) |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_VERTEX]); |
| |
| if (mask & RADV_PREFETCH_MS) |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_MESH]); |
| |
| if (mask & RADV_PREFETCH_VBO_DESCRIPTORS) |
| si_cp_dma_prefetch(cmd_buffer, state->vb_va, pipeline->vb_desc_alloc_size); |
| |
| if (mask & RADV_PREFETCH_TCS) |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_TESS_CTRL]); |
| |
| if (mask & RADV_PREFETCH_TES) |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_TESS_EVAL]); |
| |
| if (mask & RADV_PREFETCH_GS) { |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_GEOMETRY]); |
| if (radv_pipeline_has_gs_copy_shader(&pipeline->base)) |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.gs_copy_shader); |
| } |
| |
| if (mask & RADV_PREFETCH_PS) { |
| radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_FRAGMENT]); |
| if (pipeline->ps_epilog) { |
| struct radv_shader_part *ps_epilog = pipeline->ps_epilog; |
| |
| si_cp_dma_prefetch(cmd_buffer, ps_epilog->va, ps_epilog->code_size); |
| } |
| } |
| |
| state->prefetch_L2_mask &= ~mask; |
| } |
| |
| static void |
| radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) |
| { |
| if (!cmd_buffer->device->physical_device->rad_info.rbplus_allowed) |
| return; |
| |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| struct radv_rendering_state *render = &cmd_buffer->state.render; |
| |
| unsigned sx_ps_downconvert = 0; |
| unsigned sx_blend_opt_epsilon = 0; |
| unsigned sx_blend_opt_control = 0; |
| |
| for (unsigned i = 0; i < render->color_att_count; i++) { |
| unsigned format, swap; |
| bool has_alpha, has_rgb; |
| if (render->color_att[i].iview == NULL) { |
| /* We don't set the DISABLE bits, because the HW can't have holes, |
| * so the SPI color format is set to 32-bit 1-component. */ |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); |
| continue; |
| } |
| |
| struct radv_color_buffer_info *cb = &render->color_att[i].cb; |
| |
| format = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 |
| ? G_028C70_FORMAT_GFX11(cb->cb_color_info) |
| : G_028C70_FORMAT_GFX6(cb->cb_color_info); |
| swap = G_028C70_COMP_SWAP(cb->cb_color_info); |
| has_alpha = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 |
| ? !G_028C74_FORCE_DST_ALPHA_1_GFX11(cb->cb_color_attrib) |
| : !G_028C74_FORCE_DST_ALPHA_1_GFX6(cb->cb_color_attrib); |
| |
| uint32_t spi_format = (pipeline->col_format_non_compacted >> (i * 4)) & 0xf; |
| uint32_t colormask = (pipeline->cb_target_mask >> (i * 4)) & 0xf; |
| |
| if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || format == V_028C70_COLOR_32) |
| has_rgb = !has_alpha; |
| else |
| has_rgb = true; |
| |
| /* Check the colormask and export format. */ |
| if (!(colormask & 0x7)) |
| has_rgb = false; |
| if (!(colormask & 0x8)) |
| has_alpha = false; |
| |
| if (spi_format == V_028714_SPI_SHADER_ZERO) { |
| has_rgb = false; |
| has_alpha = false; |
| } |
| |
| /* The HW doesn't quite blend correctly with rgb9e5 if we disable the alpha |
| * optimization, even though it has no alpha. */ |
| if (has_rgb && format == V_028C70_COLOR_5_9_9_9) |
| has_alpha = true; |
| |
| /* Disable value checking for disabled channels. */ |
| if (!has_rgb) |
| sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); |
| if (!has_alpha) |
| sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); |
| |
| /* Enable down-conversion for 32bpp and smaller formats. */ |
| switch (format) { |
| case V_028C70_COLOR_8: |
| case V_028C70_COLOR_8_8: |
| case V_028C70_COLOR_8_8_8_8: |
| /* For 1 and 2-channel formats, use the superset thereof. */ |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || |
| spi_format == V_028714_SPI_SHADER_UINT16_ABGR || |
| spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); |
| |
| if (G_028C70_NUMBER_TYPE(cb->cb_color_info) != V_028C70_NUMBER_SRGB) |
| sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); |
| } |
| break; |
| |
| case V_028C70_COLOR_5_6_5: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); |
| sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); |
| } |
| break; |
| |
| case V_028C70_COLOR_1_5_5_5: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); |
| sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); |
| } |
| break; |
| |
| case V_028C70_COLOR_4_4_4_4: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); |
| sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); |
| } |
| break; |
| |
| case V_028C70_COLOR_32: |
| if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R) |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); |
| else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR) |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); |
| break; |
| |
| case V_028C70_COLOR_16: |
| case V_028C70_COLOR_16_16: |
| /* For 1-channel formats, use the superset thereof. */ |
| if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || |
| spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || |
| spi_format == V_028714_SPI_SHADER_UINT16_ABGR || |
| spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { |
| if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV) |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); |
| else |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); |
| } |
| break; |
| |
| case V_028C70_COLOR_10_11_11: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); |
| break; |
| |
| case V_028C70_COLOR_2_10_10_10: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); |
| sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); |
| } |
| break; |
| case V_028C70_COLOR_5_9_9_9: |
| if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) |
| sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4); |
| break; |
| } |
| } |
| |
| /* Do not set the DISABLE bits for the unused attachments, as that |
| * breaks dual source blending in SkQP and does not seem to improve |
| * performance. */ |
| |
| if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert && |
| sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon && |
| sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control) |
| return; |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); |
| radeon_emit(cmd_buffer->cs, sx_ps_downconvert); |
| radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon); |
| radeon_emit(cmd_buffer->cs, sx_blend_opt_control); |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| |
| cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert; |
| cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon; |
| cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control; |
| } |
| |
| static void |
| radv_emit_ps_epilog(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| struct radv_shader *ps_shader = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; |
| struct radv_shader_part *ps_epilog = pipeline->ps_epilog; |
| |
| if (!ps_epilog) |
| return; |
| |
| /* The main shader must not use less VGPRs than the epilog, otherwise shared vgprs might not |
| * work. |
| */ |
| assert(G_00B848_VGPRS(ps_shader->config.rsrc1) >= G_00B848_VGPRS(ps_epilog->rsrc1)); |
| |
| radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, ps_epilog->bo); |
| |
| assert((ps_epilog->va >> 32) == cmd_buffer->device->physical_device->rad_info.address32_hi); |
| |
| struct radv_userdata_info *loc = |
| &ps_shader->info.user_sgprs_locs.shader_data[AC_UD_PS_EPILOG_PC]; |
| uint32_t base_reg = pipeline->base.user_data_0[MESA_SHADER_FRAGMENT]; |
| assert(loc->sgpr_idx != -1); |
| assert(loc->num_sgprs == 1); |
| radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, |
| ps_epilog->va, false); |
| } |
| |
| static void |
| radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| const struct radv_device *device = cmd_buffer->device; |
| |
| if (cmd_buffer->state.emitted_graphics_pipeline == pipeline) |
| return; |
| |
| radv_update_multisample_state(cmd_buffer, pipeline); |
| radv_update_binning_state(cmd_buffer, pipeline); |
| |
| cmd_buffer->scratch_size_per_wave_needed = |
| MAX2(cmd_buffer->scratch_size_per_wave_needed, pipeline->base.scratch_bytes_per_wave); |
| cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted, pipeline->base.max_waves); |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | |
| RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP | |
| RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS | |
| RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE | |
| RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | |
| RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | |
| RADV_CMD_DIRTY_DYNAMIC_POLYGON_MODE | |
| RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE | |
| RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | |
| RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| radv_rast_prim_is_points_or_lines(cmd_buffer->state.emitted_graphics_pipeline->rast_prim) != radv_rast_prim_is_points_or_lines(pipeline->rast_prim)) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_GUARDBAND; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->cb_color_control != pipeline->cb_color_control) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP | |
| RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP_ENABLE; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->cb_target_mask != pipeline->cb_target_mask) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->vgt_tf_param != pipeline->vgt_tf_param) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_TESS_DOMAIN_ORIGIN; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->ms.pa_sc_mode_cntl_0 != pipeline->ms.pa_sc_mode_cntl_0) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE_ENABLE; |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->ms.pa_sc_aa_config != pipeline->ms.pa_sc_aa_config || |
| cmd_buffer->state.emitted_graphics_pipeline->ms.db_eqaa != pipeline->ms.db_eqaa) |
| cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE; |
| |
| radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw); |
| |
| if (pipeline->has_ngg_culling && |
| pipeline->last_vgt_api_stage != MESA_SHADER_GEOMETRY && |
| !cmd_buffer->state.last_nggc_settings) { |
| /* The already emitted RSRC2 contains the LDS required for NGG culling. |
| * Culling is currently disabled, so re-emit RSRC2 to reduce LDS usage. |
| * API GS always needs LDS, so this isn't useful there. |
| */ |
| struct radv_shader *v = pipeline->base.shaders[pipeline->last_vgt_api_stage]; |
| radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, |
| (v->config.rsrc2 & C_00B22C_LDS_SIZE) | |
| S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling)); |
| } |
| |
| if (!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs.cdw != pipeline->base.ctx_cs.cdw || |
| cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs_hash != pipeline->base.ctx_cs_hash || |
| memcmp(cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs.buf, pipeline->base.ctx_cs.buf, |
| pipeline->base.ctx_cs.cdw * 4)) { |
| radeon_emit_array(cmd_buffer->cs, pipeline->base.ctx_cs.buf, pipeline->base.ctx_cs.cdw); |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| } |
| |
| if (device->pbb_allowed) { |
| struct radv_binning_settings *settings = &device->physical_device->binning_settings; |
| |
| if ((!cmd_buffer->state.emitted_graphics_pipeline || |
| cmd_buffer->state.emitted_graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT] != |
| cmd_buffer->state.graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]) && |
| (settings->context_states_per_bin > 1 || settings->persistent_states_per_bin > 1)) { |
| /* Break the batch on PS changes. */ |
| radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
| radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); |
| } |
| } |
| |
| radv_emit_ps_epilog(cmd_buffer); |
| |
| radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.slab_bo); |
| |
| /* With graphics pipeline library, binaries are uploaded from a library and they hold a pointer |
| * to the slab BO. |
| */ |
| for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { |
| struct radv_shader *shader = pipeline->base.shaders[s]; |
| |
| if (!shader || !shader->bo) |
| continue; |
| |
| radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, shader->bo); |
| } |
| |
| if (pipeline->base.gs_copy_shader && pipeline->base.gs_copy_shader->bo) { |
| radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.gs_copy_shader->bo); |
| } |
| |
| if (unlikely(cmd_buffer->device->trace_bo)) |
| radv_save_pipeline(cmd_buffer, &pipeline->base); |
| |
| cmd_buffer->state.emitted_graphics_pipeline = pipeline; |
| |
| cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE; |
| } |
| |
| static enum radv_depth_clamp_mode |
| radv_get_depth_clamp_mode(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_device *device = cmd_buffer->device; |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| enum radv_depth_clamp_mode mode; |
| |
| mode = RADV_DEPTH_CLAMP_MODE_VIEWPORT; |
| if (!d->depth_clamp_enable) { |
| /* For optimal performance, depth clamping should always be enabled except if the application |
| * disables clamping explicitly or uses depth values outside of the [0.0, 1.0] range. |
| */ |
| if (!d->depth_clip_enable || device->vk.enabled_extensions.EXT_depth_range_unrestricted) { |
| mode = RADV_DEPTH_CLAMP_MODE_DISABLED; |
| } else { |
| mode = RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE; |
| } |
| } |
| |
| return mode; |
| } |
| |
| static void |
| radv_emit_viewport(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| const struct radv_viewport_state *viewport = &cmd_buffer->state.dynamic.viewport; |
| enum radv_depth_clamp_mode depth_clamp_mode = radv_get_depth_clamp_mode(cmd_buffer); |
| int i; |
| const unsigned count = viewport->count; |
| |
| assert(count); |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_02843C_PA_CL_VPORT_XSCALE, count * 6); |
| |
| for (i = 0; i < count; i++) { |
| radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].scale[0])); |
| radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].translate[0])); |
| radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].scale[1])); |
| radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].translate[1])); |
| |
| double scale_z, translate_z; |
| if (d->depth_clip_negative_one_to_one) { |
| scale_z = viewport->xform[i].scale[2] * 0.5f; |
| translate_z = (viewport->xform[i].translate[2] + viewport->viewports[i].maxDepth) * 0.5f; |
| } else { |
| scale_z = viewport->xform[i].scale[2]; |
| translate_z = viewport->xform[i].translate[2]; |
| |
| } |
| radeon_emit(cmd_buffer->cs, fui(scale_z)); |
| radeon_emit(cmd_buffer->cs, fui(translate_z)); |
| } |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_0282D0_PA_SC_VPORT_ZMIN_0, count * 2); |
| for (i = 0; i < count; i++) { |
| float zmin, zmax; |
| |
| if (depth_clamp_mode == RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE) { |
| zmin = 0.0f; |
| zmax = 1.0f; |
| } else { |
| zmin = MIN2(viewport->viewports[i].minDepth, viewport->viewports[i].maxDepth); |
| zmax = MAX2(viewport->viewports[i].minDepth, viewport->viewports[i].maxDepth); |
| } |
| |
| radeon_emit(cmd_buffer->cs, fui(zmin)); |
| radeon_emit(cmd_buffer->cs, fui(zmax)); |
| } |
| } |
| |
| void |
| radv_write_scissors(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs) |
| { |
| uint32_t count = cmd_buffer->state.dynamic.scissor.count; |
| |
| si_write_scissors(cs, count, cmd_buffer->state.dynamic.scissor.scissors, |
| cmd_buffer->state.dynamic.viewport.viewports); |
| } |
| |
| static void |
| radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer) |
| { |
| radv_write_scissors(cmd_buffer, cmd_buffer->cs); |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = false; |
| } |
| |
| static void |
| radv_emit_discard_rectangle(struct radv_cmd_buffer *cmd_buffer) |
| { |
| if (!cmd_buffer->state.dynamic.discard_rectangle.count) |
| return; |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL, |
| cmd_buffer->state.dynamic.discard_rectangle.count * 2); |
| for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) { |
| VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i]; |
| radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y)); |
| radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) | |
| S_028214_BR_Y(rect.offset.y + rect.extent.height)); |
| } |
| } |
| |
| static void |
| radv_emit_line_width(struct radv_cmd_buffer *cmd_buffer) |
| { |
| unsigned width = cmd_buffer->state.dynamic.line_width * 8; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL, |
| S_028A08_WIDTH(CLAMP(width, 0, 0xFFFF))); |
| } |
| |
| static void |
| radv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4); |
| radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4); |
| } |
| |
| static void |
| radv_emit_stencil(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028430_DB_STENCILREFMASK, 2); |
| radeon_emit(cmd_buffer->cs, S_028430_STENCILTESTVAL(d->stencil_reference.front) | |
| S_028430_STENCILMASK(d->stencil_compare_mask.front) | |
| S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) | |
| S_028430_STENCILOPVAL(1)); |
| radeon_emit(cmd_buffer->cs, S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) | |
| S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) | |
| S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) | |
| S_028434_STENCILOPVAL_BF(1)); |
| } |
| |
| static void |
| radv_emit_depth_bounds(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, 2); |
| radeon_emit(cmd_buffer->cs, fui(d->depth_bounds.min)); |
| radeon_emit(cmd_buffer->cs, fui(d->depth_bounds.max)); |
| } |
| |
| static void |
| radv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| unsigned slope = fui(d->depth_bias.slope * 16.0f); |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5); |
| radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */ |
| radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */ |
| radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* FRONT OFFSET */ |
| radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */ |
| radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* BACK OFFSET */ |
| } |
| |
| static void |
| radv_emit_line_stipple(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| uint32_t auto_reset_cntl = 1; |
| |
| if (d->primitive_topology == V_008958_DI_PT_LINESTRIP) |
| auto_reset_cntl = 2; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028A0C_PA_SC_LINE_STIPPLE, |
| S_028A0C_LINE_PATTERN(d->line_stipple.pattern) | |
| S_028A0C_REPEAT_COUNT(d->line_stipple.factor - 1) | |
| S_028A0C_AUTO_RESET_CNTL(auto_reset_cntl)); |
| } |
| |
| uint32_t |
| radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer) |
| { |
| enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; |
| const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| unsigned pa_su_sc_mode_cntl; |
| |
| pa_su_sc_mode_cntl = S_028814_CULL_FRONT(!!(d->cull_mode & VK_CULL_MODE_FRONT_BIT)) | |
| S_028814_CULL_BACK(!!(d->cull_mode & VK_CULL_MODE_BACK_BIT)) | |
| S_028814_FACE(d->front_face) | |
| S_028814_POLY_OFFSET_FRONT_ENABLE(d->depth_bias_enable) | |
| S_028814_POLY_OFFSET_BACK_ENABLE(d->depth_bias_enable) | |
| S_028814_POLY_OFFSET_PARA_ENABLE(d->depth_bias_enable) | |
| S_028814_POLY_MODE(d->polygon_mode != V_028814_X_DRAW_TRIANGLES) | |
| S_028814_POLYMODE_FRONT_PTYPE(d->polygon_mode) | |
| S_028814_POLYMODE_BACK_PTYPE(d->polygon_mode) | |
| S_028814_PROVOKING_VTX_LAST(d->provoking_vertex_mode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT); |
| |
| if (gfx_level >= GFX10) { |
| pa_su_sc_mode_cntl |= |
| S_028814_KEEP_TOGETHER_ENABLE(d->polygon_mode != V_028814_X_DRAW_TRIANGLES); |
| } |
| |
| return pa_su_sc_mode_cntl; |
| } |
| |
| static void |
| radv_emit_culling(struct radv_cmd_buffer *cmd_buffer) |
| { |
| unsigned pa_su_sc_mode_cntl = radv_get_pa_su_sc_mode_cntl(cmd_buffer); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl); |
| } |
| |
| static void |
| radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| assert(!cmd_buffer->state.mesh_shading); |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { |
| radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs, |
| R_030908_VGT_PRIMITIVE_TYPE, 1, d->primitive_topology); |
| } else { |
| radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, d->primitive_topology); |
| } |
| } |
| |
| static void |
| radv_emit_depth_control(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, |
| S_028800_Z_ENABLE(d->depth_test_enable ? 1 : 0) | |
| S_028800_Z_WRITE_ENABLE(d->depth_write_enable ? 1 : 0) | |
| S_028800_ZFUNC(d->depth_compare_op) | |
| S_028800_DEPTH_BOUNDS_ENABLE(d->depth_bounds_test_enable ? 1 : 0) | |
| S_028800_STENCIL_ENABLE(d->stencil_test_enable ? 1 : 0) | |
| S_028800_BACKFACE_ENABLE(d->stencil_test_enable ? 1 : 0) | |
| S_028800_STENCILFUNC(d->stencil_op.front.compare_op) | |
| S_028800_STENCILFUNC_BF(d->stencil_op.back.compare_op)); |
| } |
| |
| static void |
| radv_emit_stencil_control(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg( |
| cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL, |
| S_02842C_STENCILFAIL(si_translate_stencil_op(d->stencil_op.front.fail_op)) | |
| S_02842C_STENCILZPASS(si_translate_stencil_op(d->stencil_op.front.pass_op)) | |
| S_02842C_STENCILZFAIL(si_translate_stencil_op(d->stencil_op.front.depth_fail_op)) | |
| S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->stencil_op.back.fail_op)) | |
| S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->stencil_op.back.pass_op)) | |
| S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op))); |
| } |
| |
| static void |
| radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1; |
| uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1; |
| uint32_t pa_cl_vrs_cntl = pipeline->vrs.pa_cl_vrs_cntl; |
| uint32_t pipeline_comb_mode = d->fragment_shading_rate.combiner_ops[0]; |
| uint32_t htile_comb_mode = d->fragment_shading_rate.combiner_ops[1]; |
| |
| assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3); |
| |
| if (!cmd_buffer->state.render.vrs_att.iview) { |
| /* When the current subpass has no VRS attachment, the VRS rates are expected to be 1x1, so we |
| * can cheat by tweaking the different combiner modes. |
| */ |
| switch (htile_comb_mode) { |
| case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR: |
| /* The result of min(A, 1x1) is always 1x1. */ |
| FALLTHROUGH; |
| case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR: |
| /* Force the per-draw VRS rate to 1x1. */ |
| rate_x = rate_y = 0; |
| |
| /* As the result of min(A, 1x1) or replace(A, 1x1) are always 1x1, set the vertex rate |
| * combiner mode as passthrough. |
| */ |
| pipeline_comb_mode = V_028848_VRS_COMB_MODE_PASSTHRU; |
| break; |
| case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR: |
| /* The result of max(A, 1x1) is always A. */ |
| FALLTHROUGH; |
| case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR: |
| /* Nothing to do here because the SAMPLE_ITER combiner mode should already be passthrough. */ |
| break; |
| default: |
| break; |
| } |
| } |
| |
| /* Emit per-draw VRS rate which is the first combiner. */ |
| radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE, |
| S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y)); |
| |
| /* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the |
| * draw rate and the vertex rate. |
| */ |
| if (cmd_buffer->state.mesh_shading) { |
| pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU) | |
| S_028848_PRIMITIVE_RATE_COMBINER_MODE(pipeline_comb_mode); |
| } else { |
| pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(pipeline_comb_mode) | |
| S_028848_PRIMITIVE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU); |
| } |
| |
| /* HTILE_RATE_COMBINER_MODE controls the combiner mode between the primitive rate and the HTILE |
| * rate. |
| */ |
| pa_cl_vrs_cntl |= S_028848_HTILE_RATE_COMBINER_MODE(htile_comb_mode); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl); |
| } |
| |
| static void |
| radv_emit_primitive_restart_enable(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { |
| radeon_set_uconfig_reg(cmd_buffer->cs, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, |
| d->primitive_restart_enable); |
| } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { |
| radeon_set_uconfig_reg(cmd_buffer->cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, |
| d->primitive_restart_enable); |
| } else { |
| radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, |
| d->primitive_restart_enable); |
| } |
| } |
| |
| static void |
| radv_emit_clipping(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL, |
| S_028810_DX_RASTERIZATION_KILL(d->rasterizer_discard_enable) | |
| S_028810_ZCLIP_NEAR_DISABLE(!d->depth_clip_enable) | |
| S_028810_ZCLIP_FAR_DISABLE(!d->depth_clip_enable) | |
| S_028810_DX_CLIP_SPACE_DEF(!d->depth_clip_negative_one_to_one) | |
| S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); |
| } |
| |
| static void |
| radv_emit_logic_op(struct radv_cmd_buffer *cmd_buffer) |
| { |
| unsigned cb_color_control = cmd_buffer->state.graphics_pipeline->cb_color_control; |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| if (d->logic_op_enable) { |
| cb_color_control |= S_028808_ROP3(d->logic_op); |
| } else { |
| cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY); |
| } |
| |
| if (cmd_buffer->device->physical_device->rad_info.has_rbplus) { |
| cb_color_control |= S_028808_DISABLE_DUAL_QUAD(d->logic_op_enable); |
| } |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028808_CB_COLOR_CONTROL, cb_color_control); |
| } |
| |
| static void |
| radv_emit_color_write_enable(struct radv_cmd_buffer *cmd_buffer) |
| { |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, |
| pipeline->cb_target_mask & d->color_write_enable); |
| } |
| |
| static void |
| radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| struct radv_shader *tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]; |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| unsigned ls_hs_config, base_reg; |
| struct radv_userdata_info *loc; |
| |
| ls_hs_config = S_028B58_NUM_PATCHES(cmd_buffer->state.tess_num_patches) | |
| S_028B58_HS_NUM_INPUT_CP(d->patch_control_points) | |
| S_028B58_HS_NUM_OUTPUT_CP(tcs->info.tcs.tcs_vertices_out); |
| |
| if (pdevice->rad_info.gfx_level >= GFX7) { |
| radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); |
| } else { |
| radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); |
| } |
| |
| if (pdevice->rad_info.gfx_level >= GFX9) { |
| unsigned hs_rsrc2 = tcs->config.rsrc2; |
| |
| if (pdevice->rad_info.gfx_level >= GFX10) { |
| hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX10(cmd_buffer->state.tess_lds_size); |
| } else { |
| hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(cmd_buffer->state.tess_lds_size); |
| } |
| |
| radeon_set_sh_reg(cmd_buffer->cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2); |
| } else { |
| struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX]; |
| unsigned ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size); |
| |
| radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); |
| } |
| |
| /* Emit user SGPRs for dynamic patch control points. */ |
| loc = radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_TESS_CTRL, AC_UD_TCS_OFFCHIP_LAYOUT); |
| if (loc->sgpr_idx == -1) |
| return; |
| assert(loc->num_sgprs == 1); |
| |
| base_reg = pipeline->base.user_data_0[MESA_SHADER_TESS_CTRL]; |
| radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, |
| (cmd_buffer->state.tess_num_patches << 6) | d->patch_control_points); |
| |
| loc = radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_TESS_EVAL, AC_UD_TES_NUM_PATCHES); |
| assert(loc->sgpr_idx != -1 && loc->num_sgprs == 1); |
| |
| base_reg = pipeline->base.user_data_0[MESA_SHADER_TESS_EVAL]; |
| radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, |
| cmd_buffer->state.tess_num_patches); |
| } |
| |
| static void |
| radv_emit_conservative_rast_mode(struct radv_cmd_buffer *cmd_buffer) |
| { |
| const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; |
| struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; |
| struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; |
| unsigned pa_sc_aa_config = pipeline->ms.pa_sc_aa_config; |
| unsigned db_eqaa = pipeline->ms.db_eqaa; |
| |
| if (pdevice->rad_info.gfx_level >= GFX9) { |
| uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1); |
| |
| if (d->conservative_rast_mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { |
| pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) | |
| S_028C4C_CENTROID_SAMPLE_OVERRIDE(1); |
| |
| if (d->conservative_rast_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) { |
| pa_sc_conservative_rast |= |
| S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_OVER_RAST_SAMPLE_SELECT(0) | |
| S_028C4C_UNDER_RAST_ENABLE(0) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) | |
| S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1); |
| } else { |
| assert(d->conservative_rast_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT); |
| pa_sc_conservative_rast |= |
| S_028C4C_OVER_RAST_ENABLE(0) | S_028C4C_OVER_RAST_SAMPLE_SELECT(1) | |
| S_028C4C_UNDER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) | |
| S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0); |
| } |
| |
| /* Adjust MSAA state if conservative rasterization is enabled. */ |
| pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1); |
| db_eqaa |= S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) | |
| S_028804_OVERRASTERIZATION_AMOUNT(4); |
| } |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, |
| pa_sc_conservative_rast); |
| } |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028BE0_PA_SC_AA_CONFIG, pa_sc_aa_config); |
| radeon_set_context_reg(cmd_buffer->cs, R_028804_DB_EQAA, db_eqaa); |
| } |
| |
| static void |
| radv_emit_depth_clamp_enable(struct radv_cmd_buffer *cmd_buffer) |
| { |
| enum radv_depth_clamp_mode mode = radv_get_depth_clamp_mode(cmd_buffer); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_02800C_DB_RENDER_OVERRIDE, |
| S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | |
| S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) | |
| S_02800C_DISABLE_VIEWPORT_CLAMP(mode == RADV_DEPTH_CLAMP_MODE_DISABLED)); |
| } |
| |
| static void |
| radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, |
| struct radv_color_buffer_info *cb, struct radv_image_view *iview, |
| VkImageLayout layout) |
| { |
| bool is_vi = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8; |
| uint32_t cb_fdcc_control = cb->cb_dcc_control; |
| uint32_t cb_color_info = cb->cb_color_info; |
| struct radv_image *image = iview->image; |
| |
| if (!radv_layout_dcc_compressed( |
| cmd_buffer->device, image, iview->vk.base_mip_level, layout, |
| radv_image_queue_family_mask(image, cmd_buffer->qf, |
| cmd_buffer->qf))) { |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { |
| cb_fdcc_control &= C_028C78_FDCC_ENABLE; |
| } else { |
| cb_color_info &= C_028C70_DCC_ENABLE; |
| } |
| } |
| |
| if (!radv_layout_fmask_compressed( |
| cmd_buffer->device, image, layout, |
| radv_image_queue_family_mask(image, cmd_buffer->qf, |
| cmd_buffer->qf))) { |
| cb_color_info &= C_028C70_COMPRESSION; |
| } |
| |
| if (radv_image_is_tc_compat_cmask(image) && (radv_is_fmask_decompress_pipeline(cmd_buffer) || |
| radv_is_dcc_decompress_pipeline(cmd_buffer))) { |
| /* If this bit is set, the FMASK decompression operation |
| * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS). |
| */ |
| cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY; |
| } |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028C6C_CB_COLOR0_VIEW + index * 0x3c, 4); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ |
| radeon_emit(cmd_buffer->cs, cb->cb_color_info); /* CB_COLOR0_INFO */ |
| radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); /* CB_COLOR0_ATTRIB */ |
| radeon_emit(cmd_buffer->cs, cb_fdcc_control); /* CB_COLOR0_FDCC_CONTROL */ |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, cb->cb_color_base); |
| radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, cb->cb_color_base >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, cb->cb_dcc_base >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, cb->cb_color_attrib2); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, cb->cb_color_attrib3); |
| } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_base); |
| radeon_emit(cmd_buffer->cs, 0); |
| radeon_emit(cmd_buffer->cs, 0); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_view); |
| radeon_emit(cmd_buffer->cs, cb_color_info); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); |
| radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); |
| radeon_emit(cmd_buffer->cs, 0); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); |
| radeon_emit(cmd_buffer->cs, 0); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, |
| cb->cb_color_base >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4, |
| cb->cb_color_cmask >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4, |
| cb->cb_color_fmask >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, |
| cb->cb_dcc_base >> 32); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, |
| cb->cb_color_attrib2); |
| radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, |
| cb->cb_color_attrib3); |
| } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) { |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_base); |
| radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32)); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_view); |
| radeon_emit(cmd_buffer->cs, cb_color_info); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); |
| radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); |
| radeon_emit(cmd_buffer->cs, S_028C80_BASE_256B(cb->cb_color_cmask >> 32)); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); |
| radeon_emit(cmd_buffer->cs, S_028C88_BASE_256B(cb->cb_color_fmask >> 32)); |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2); |
| radeon_emit(cmd_buffer->cs, cb->cb_dcc_base); |
| radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32)); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4, |
| cb->cb_mrt_epitch); |
| } else { |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_base); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_pitch); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_slice); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_view); |
| radeon_emit(cmd_buffer->cs, cb_color_info); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); |
| radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); |
| radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice); |
| |
| if (is_vi) { /* DCC BASE */ |
| radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, |
| cb->cb_dcc_base); |
| } |
| } |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 |
| ? G_028C78_FDCC_ENABLE(cb_fdcc_control) |
| : G_028C70_DCC_ENABLE(cb_color_info)) { |
| /* Drawing with DCC enabled also compresses colorbuffers. */ |
| VkImageSubresourceRange range = { |
| .aspectMask = iview->vk.aspects, |
| .baseMipLevel = iview->vk.base_mip_level, |
| .levelCount = iview->vk.level_count, |
| .baseArrayLayer = iview->vk.base_array_layer, |
| .layerCount = iview->vk.layer_count, |
| }; |
| |
| radv_update_dcc_metadata(cmd_buffer, image, &range, true); |
| } |
| } |
| |
| static void |
| radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, |
| const struct radv_image_view *iview, VkImageLayout layout, |
| bool requires_cond_exec) |
| { |
| const struct radv_image *image = iview->image; |
| uint32_t db_z_info = ds->db_z_info; |
| uint32_t db_z_info_reg; |
| |
| if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug || |
| !radv_image_is_tc_compat_htile(image)) |
| return; |
| |
| if (!radv_layout_is_htile_compressed( |
| cmd_buffer->device, image, layout, |
| radv_image_queue_family_mask(image, cmd_buffer->qf, |
| cmd_buffer->qf))) { |
| db_z_info &= C_028040_TILE_SURFACE_ENABLE; |
| } |
| |
| db_z_info &= C_028040_ZRANGE_PRECISION; |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) { |
| db_z_info_reg = R_028038_DB_Z_INFO; |
| } else { |
| db_z_info_reg = R_028040_DB_Z_INFO; |
| } |
| |
| /* When we don't know the last fast clear value we need to emit a |
| * conditional packet that will eventually skip the following |
| * SET_CONTEXT_REG packet. |
| */ |
| if (requires_cond_exec) { |
| uint64_t va = radv_get_tc_compat_zrange_va(image, iview->vk.base_mip_level); |
| |
| radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0)); |
| radeon_emit(cmd_buffer->cs, va); |
| radeon_emit(cmd_buffer->cs, va >> 32); |
| radeon_emit(cmd_buffer->cs, 0); |
| radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */ |
| } |
| |
| radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); |
| } |
| |
| static void |
| radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, |
| struct radv_image_view *iview, VkImageLayout layout) |
| { |
| const struct radv_image *image = iview->image; |
| uint32_t db_z_info = ds->db_z_info; |
| uint32_t db_stencil_info = ds->db_stencil_info; |
| uint32_t db_htile_surface = ds->db_htile_surface; |
| |
| if (!radv_layout_is_htile_compressed( |
| cmd_buffer->device, image, layout, |
| radv_image_queue_family_mask(image, cmd_buffer->qf, |
| cmd_buffer->qf))) { |
| db_z_info &= C_028040_TILE_SURFACE_ENABLE; |
| db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); |
| } |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3 && |
| !cmd_buffer->state.render.vrs_att.iview) { |
| db_htile_surface &= C_028ABC_VRS_HTILE_ENCODING; |
| } |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view); |
| radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, db_htile_surface); |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { |
| radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); |
| radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size); |
| |
| if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 6); |
| } else { |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7); |
| radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1)); |
| } |
| radeon_emit(cmd_buffer->cs, db_z_info); |
| radeon_emit(cmd_buffer->cs, db_stencil_info); |
| radeon_emit(cmd_buffer->cs, ds->db_z_read_base); |
| radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); |
| radeon_emit(cmd_buffer->cs, ds->db_z_read_base); |
| radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5); |
| radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); |
| radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); |
| radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); |
| radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); |
| radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32); |
| } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) { |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3); |
| radeon_emit(cmd_buffer->cs, ds->db_htile_data_base); |
| radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32)); |
| radeon_emit(cmd_buffer->cs, ds->db_depth_size); |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10); |
| radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */ |
| radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */ |
| radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */ |
| radeon_emit(cmd_buffer->cs, |
| S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */ |
| radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */ |
| radeon_emit(cmd_buffer->cs, |
| S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ |
| radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */ |
| radeon_emit(cmd_buffer->cs, |
| S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */ |
| radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */ |
| radeon_emit(cmd_buffer->cs, |
| S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2); |
| radeon_emit(cmd_buffer->cs, ds->db_z_info2); |
| radeon_emit(cmd_buffer->cs, ds->db_stencil_info2); |
| } else { |
| radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); |
| |
| radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9); |
| radeon_emit(cmd_buffer->cs, ds->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ |
| radeon_emit(cmd_buffer->cs, db_z_info); /* R_028040_DB_Z_INFO */ |
| radeon_emit(cmd_buffer->cs, db_stencil_info); /* R_028044_DB_STENCIL_INFO */ |
| radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* R_028048_DB_Z_READ_BASE */ |
| radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* R_02804C_DB_STENCIL_READ_BASE */ |
| radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* R_028050_DB_Z_WRITE_BASE */ |
| radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* R_028054_DB_STENCIL_WRITE_BASE */ |
| radeon_emit(cmd_buffer->cs, ds->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ |
| radeon_emit(cmd_buffer->cs, ds->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ |
| } |
| |
| /* Update the ZRANGE_PRECISION value for the TC-compat bug. */ |
| radv_update_zrange_precision(cmd_buffer, ds, iview, layout, true); |
| |
| radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, |
| ds->pa_su_poly_offset_db_fmt_cntl); |
| } |
| |
| /** |
| * Update the fast clear depth/stencil values if the image is bound as a |
| * depth/stencil buffer. |
| */ |
| static void |
| radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, |
| const struct radv_image_view *iview, |
| VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) |
| { |
| const struct radv_image *image = iview->image; |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| |
| if (cmd_buffer->state.render.ds_att.iview == NULL || |
| cmd_buffer->state.render.ds_att.iview->image != image) |
| return; |
| |
| if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); |
| radeon_emit(cs, ds_clear_value.stencil); |
| radeon_emit(cs, fui(ds_clear_value.depth)); |
| } else if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { |
| radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(ds_clear_value.depth)); |
| } else { |
| assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); |
| radeon_set_context_reg(cs, R_028028_DB_STENCIL_CLEAR, ds_clear_value.stencil); |
| } |
| |
| /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is |
| * only needed when clearing Z to 0.0. |
| */ |
| if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && ds_clear_value.depth == 0.0) { |
| radv_update_zrange_precision(cmd_buffer, &cmd_buffer->state.render.ds_att.ds, iview, |
| cmd_buffer->state.render.ds_att.layout, false); |
| } |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| } |
| |
| /** |
| * Set the clear depth/stencil values to the image's metadata. |
| */ |
| static void |
| radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, |
| const VkImageSubresourceRange *range, |
| VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) |
| { |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| uint32_t level_count = radv_get_levelCount(image, range); |
| |
| if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel); |
| |
| /* Use the fastest way when both aspects are used. */ |
| radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + 2 * level_count, cmd_buffer->state.predicating)); |
| radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); |
| radeon_emit(cs, va); |
| radeon_emit(cs, va >> 32); |
| |
| for (uint32_t l = 0; l < level_count; l++) { |
| radeon_emit(cs, ds_clear_value.stencil); |
| radeon_emit(cs, fui(ds_clear_value.depth)); |
| } |
| } else { |
| /* Otherwise we need one WRITE_DATA packet per level. */ |
| for (uint32_t l = 0; l < level_count; l++) { |
| uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel + l); |
| unsigned value; |
| |
| if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { |
| value = fui(ds_clear_value.depth); |
| va += 4; |
| } else { |
| assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); |
| value = ds_clear_value.stencil; |
| } |
| |
| radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating)); |
| radeon_emit(cs, |
| S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); |
| radeon_emit(cs, va); |
| radeon_emit(cs, va >> 32); |
| radeon_emit(cs, value); |
| } |
| } |
| } |
| |
| /** |
| * Update the TC-compat metadata value for this image. |
| */ |
| static void |
| radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, |
| const VkImageSubresourceRange *range, uint32_t value) |
| { |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| |
| if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug) |
| return; |
| |
| uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel); |
| uint32_t level_count = radv_get_levelCount(image, range); |
| |
| radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + level_count, cmd_buffer->state.predicating)); |
| radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); |
| radeon_emit(cs, va); |
| radeon_emit(cs, va >> 32); |
| |
| for (uint32_t l = 0; l < level_count; l++) |
| radeon_emit(cs, value); |
| } |
| |
| static void |
| radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, |
| const struct radv_image_view *iview, |
| VkClearDepthStencilValue ds_clear_value) |
| { |
| VkImageSubresourceRange range = { |
| .aspectMask = iview->vk.aspects, |
| .baseMipLevel = iview->vk.base_mip_level, |
| .levelCount = iview->vk.level_count, |
| .baseArrayLayer = iview->vk.base_array_layer, |
| .layerCount = iview->vk.layer_count, |
| }; |
| uint32_t cond_val; |
| |
| /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last |
| * depth clear value is 0.0f. |
| */ |
| cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0; |
| |
| radv_set_tc_compat_zrange_metadata(cmd_buffer, iview->image, &range, cond_val); |
| } |
| |
| /** |
| * Update the clear depth/stencil values for this image. |
| */ |
| void |
| radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, |
| const struct radv_image_view *iview, |
| VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) |
| { |
| VkImageSubresourceRange range = { |
| .aspectMask = iview->vk.aspects, |
| .baseMipLevel = iview->vk.base_mip_level, |
| .levelCount = iview->vk.level_count, |
| .baseArrayLayer = iview->vk.base_array_layer, |
| .layerCount = iview->vk.layer_count, |
| }; |
| struct radv_image *image = iview->image; |
| |
| assert(radv_htile_enabled(image, range.baseMipLevel)); |
| |
| radv_set_ds_clear_metadata(cmd_buffer, iview->image, &range, ds_clear_value, aspects); |
| |
| if (radv_image_is_tc_compat_htile(image) && (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { |
| radv_update_tc_compat_zrange_metadata(cmd_buffer, iview, ds_clear_value); |
| } |
| |
| radv_update_bound_fast_clear_ds(cmd_buffer, iview, ds_clear_value, aspects); |
| } |
| |
| /** |
| * Load the clear depth/stencil values from the image's metadata. |
| */ |
| static void |
| radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview) |
| { |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| const struct radv_image *image = iview->image; |
| VkImageAspectFlags aspects = vk_format_aspects(image->vk.format); |
| uint64_t va = radv_get_ds_clear_value_va(image, iview->vk.base_mip_level); |
| unsigned reg_offset = 0, reg_count = 0; |
| |
| assert(radv_image_has_htile(image)); |
| |
| if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { |
| ++reg_count; |
| } else { |
| ++reg_offset; |
| va += 4; |
| } |
| if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) |
| ++reg_count; |
| |
| uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset; |
| |
| if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) { |
| radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0)); |
| radeon_emit(cs, va); |
| radeon_emit(cs, va >> 32); |
| radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); |
| radeon_emit(cs, reg_count); |
| } else { |
| radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); |
| radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | |
| (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); |
| radeon_emit(cs, va); |
| radeon_emit(cs, va >> 32); |
| radeon_emit(cs, reg >> 2); |
| radeon_emit(cs, 0); |
| |
| radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); |
| radeon_emit(cs, 0); |
| } |
| } |
| |
| /* |
| * With DCC some colors don't require CMASK elimination before being |
| * used as a texture. This sets a predicate value to determine if the |
| * cmask eliminate is required. |
| */ |
| void |
| radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, |
| const VkImageSubresourceRange *range, bool value) |
| { |
| if (!image->fce_pred_offset) |
| return; |
| |
| uint64_t pred_val = value; |
| uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel); |
| uint32_t level_count = radv_get_levelCount(image, range); |
| uint32_t count = 2 * level_count; |
| |
| radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); |
| radeon_emit(cmd_buffer->cs, |
| S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); |
| radeon_emit(cmd_buffer->cs, va); |
| radeon_emit(cmd_buffer->cs, va >> 32); |
| |
| for (uint32_t l = 0; l < level_count; l++) { |
| radeon_emit(cmd_buffer->cs, pred_val); |
| radeon_emit(cmd_buffer->cs, pred_val >> 32); |
| } |
| } |
| |
| /** |
| * Update the DCC predicate to reflect the compression state. |
| */ |
| void |
| radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, |
| const VkImageSubresourceRange *range, bool value) |
| { |
| if (image->dcc_pred_offset == 0) |
| return; |
| |
| uint64_t pred_val = value; |
| uint64_t va = radv_image_get_dcc_pred_va(image, range->baseMipLevel); |
| uint32_t level_count = radv_get_levelCount(image, range); |
| uint32_t count = 2 * level_count; |
| |
| assert(radv_dcc_enabled(image, range->baseMipLevel)); |
| |
| radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); |
| radeon_emit(cmd_buffer->cs, |
| S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); |
| radeon_emit(cmd_buffer->cs, va); |
| radeon_emit(cmd_buffer->cs, va >> 32); |
| |
| for (uint32_t l = 0; l < level_count; l++) { |
| radeon_emit(cmd_buffer->cs, pred_val); |
| radeon_emit(cmd_buffer->cs, pred_val >> 32); |
| } |
| } |
| |
| /** |
| * Update the fast clear color values if the image is bound as a color buffer. |
| */ |
| static void |
| radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, |
| int cb_idx, uint32_t color_values[2]) |
| { |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| |
| if (cb_idx >= cmd_buffer->state.render.color_att_count || |
| cmd_buffer->state.render.color_att[cb_idx].iview == NULL || |
| cmd_buffer->state.render.color_att[cb_idx].iview->image != image) |
| return; |
| |
| radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2); |
| radeon_emit(cs, color_values[0]); |
| radeon_emit(cs, color_values[1]); |
| |
| cmd_buffer->state.context_roll_without_scissor_emitted = true; |
| } |
| |
| /** |
| * Set the clear color values to the image's metadata. |
| */ |
| static void |
| radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, |
| const VkImageSubresourceRange *range, uint32_t color_values[2]) |
| { |
| struct radeon_cmdbuf *cs = cmd_buffer->cs; |
| uint32_t level_count = radv_get_levelCount(image, range); |
| uint32_t count = 2 * level_count; |
| |
| assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel)); |
| |
| if (radv_image_has_clear_value(image)) { |
| uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel); |
| |
| radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating)); |
| radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); |
| radeon_emit(cs, va); |
| radeon_emit(cs, va >> 32); |
| |
| for (uint32_t l = 0; l < level_count; l++) { |
| radeon_emit(cs, color_values[0]); |
| radeon_emit(cs, color_values[1]); |
| } |
| } else { |
| /* Some default value we can set in the update. */ |
| assert(color_values[0] == 0 && color_values[1] == 0); |
| } |
| } |
| |
| /** |
| * Update the clear color values for this image. |
| */ |
| void |
| radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, |
| const struct radv_image_view *iview, int cb_idx, |
| uint32_t color_values[2]) |
| { |
| struct radv_image *image = iview->image; |
| VkImageSubresourceRange range = { |
| .aspectMask = iview->vk.aspects, |
| .baseMipLevel = iview->vk.base_mip_level, |
| .levelCount = iview->vk.level_count, |
| .baseArrayLayer = iview->vk.base_array_layer, |
| .layerCount = iview->vk.layer_count, |
| }; |
| |
| assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, iview->vk.base_mip_level)); |
| |
| /* Do not need to update the clear value for images that are fast cleared with the comp-to-single |
| * mode because the hardware gets the value from the image directly. |
| |