| /* |
| * Copyright © 2021 Raspberry Pi Ltd |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "v3dv_private.h" |
| #include "broadcom/common/v3d_macros.h" |
| #include "broadcom/cle/v3dx_pack.h" |
| #include "broadcom/compiler/v3d_compiler.h" |
| |
| static uint8_t |
| blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants, |
| bool *needs_dual_src) |
| { |
| switch (factor) { |
| case VK_BLEND_FACTOR_ZERO: |
| case VK_BLEND_FACTOR_ONE: |
| case VK_BLEND_FACTOR_SRC_COLOR: |
| case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: |
| case VK_BLEND_FACTOR_DST_COLOR: |
| case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: |
| case VK_BLEND_FACTOR_SRC_ALPHA: |
| case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: |
| case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: |
| return factor; |
| case VK_BLEND_FACTOR_CONSTANT_COLOR: |
| case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: |
| case VK_BLEND_FACTOR_CONSTANT_ALPHA: |
| case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: |
| *needs_constants = true; |
| return factor; |
| case VK_BLEND_FACTOR_DST_ALPHA: |
| return dst_alpha_one ? V3D_BLEND_FACTOR_ONE : |
| V3D_BLEND_FACTOR_DST_ALPHA; |
| case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: |
| return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO : |
| V3D_BLEND_FACTOR_INV_DST_ALPHA; |
| |
| /* For dual source blending we need to fallback to software as the hardware |
| * has no support for it. |
| */ |
| case VK_BLEND_FACTOR_SRC1_COLOR: |
| case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: |
| case VK_BLEND_FACTOR_SRC1_ALPHA: |
| case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: |
| assert(needs_dual_src); |
| *needs_dual_src = true; |
| return VK_BLEND_FACTOR_ZERO; |
| default: |
| unreachable("Unknown blend factor."); |
| } |
| } |
| |
| static void |
| pack_blend(struct v3dv_pipeline *pipeline, |
| const VkPipelineColorBlendStateCreateInfo *cb_info) |
| { |
| /* By default, we are not enabling blending and all color channel writes are |
| * enabled. Color write enables are independent of whether blending is |
| * enabled or not. |
| * |
| * Vulkan specifies color write masks so that bits set correspond to |
| * enabled channels. Our hardware does it the other way around. |
| */ |
| pipeline->blend.enables = 0; |
| pipeline->blend.color_write_masks = 0; /* All channels enabled */ |
| |
| if (!cb_info) |
| return; |
| |
| const struct vk_render_pass_state *ri = &pipeline->rendering_info; |
| if (ri->color_attachment_count == 0) |
| return; |
| |
| assert(ri->color_attachment_count == cb_info->attachmentCount); |
| pipeline->blend.needs_color_constants = false; |
| uint32_t color_write_masks = 0; |
| |
| bool needs_dual_src = false; |
| for (uint32_t i = 0; i < ri->color_attachment_count; i++) { |
| const VkPipelineColorBlendAttachmentState *b_state = |
| &cb_info->pAttachments[i]; |
| |
| const VkFormat vk_format = ri->color_attachment_formats[i]; |
| if (vk_format == VK_FORMAT_UNDEFINED) |
| continue; |
| |
| color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i); |
| |
| if (!b_state->blendEnable) |
| continue; |
| |
| const struct v3dv_format *format = v3dX(get_format)(vk_format); |
| |
| /* We only do blending with render pass attachments, so we should not have |
| * multiplanar images here |
| */ |
| assert(format->plane_count == 1); |
| bool dst_alpha_one = (format->planes[0].swizzle[3] == PIPE_SWIZZLE_1); |
| |
| uint8_t rt_mask = 1 << i; |
| pipeline->blend.enables |= rt_mask; |
| |
| v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) { |
| config.render_target_mask = rt_mask; |
| |
| config.color_blend_mode = b_state->colorBlendOp; |
| config.color_blend_dst_factor = |
| blend_factor(b_state->dstColorBlendFactor, dst_alpha_one, |
| &pipeline->blend.needs_color_constants, |
| &needs_dual_src); |
| config.color_blend_src_factor = |
| blend_factor(b_state->srcColorBlendFactor, dst_alpha_one, |
| &pipeline->blend.needs_color_constants, |
| &needs_dual_src); |
| |
| config.alpha_blend_mode = b_state->alphaBlendOp; |
| config.alpha_blend_dst_factor = |
| blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one, |
| &pipeline->blend.needs_color_constants, |
| &needs_dual_src); |
| config.alpha_blend_src_factor = |
| blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one, |
| &pipeline->blend.needs_color_constants, |
| &needs_dual_src); |
| } |
| } |
| |
| /* We may want to fallback to software in other cases in the future such |
| * as for formats not supported by the blend hardware. |
| */ |
| pipeline->blend.use_software = V3D_DBG(SOFT_BLEND) || needs_dual_src; |
| pipeline->blend.color_write_masks = color_write_masks; |
| } |
| |
| /* This requires that pack_blend() had been called before so we can set |
| * the overall blend enable bit in the CFG_BITS packet. |
| */ |
| static void |
| pack_cfg_bits(struct v3dv_pipeline *pipeline, |
| const VkPipelineDepthStencilStateCreateInfo *ds_info, |
| const VkPipelineRasterizationStateCreateInfo *rs_info, |
| const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info, |
| const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info, |
| const VkPipelineMultisampleStateCreateInfo *ms_info) |
| { |
| assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS)); |
| |
| pipeline->msaa = |
| ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; |
| |
| v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) { |
| /* This is required to pass line rasterization tests in CTS while |
| * exposing, at least, a minimum of 4-bits of subpixel precision |
| * (the minimum requirement). |
| */ |
| if (ls_info && |
| ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) |
| config.line_rasterization = V3D_LINE_RASTERIZATION_DIAMOND_EXIT; |
| else |
| config.line_rasterization = V3D_LINE_RASTERIZATION_PERP_END_CAPS; |
| |
| if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) { |
| config.direct3d_wireframe_triangles_mode = true; |
| config.direct3d_point_fill_mode = |
| rs_info->polygonMode == VK_POLYGON_MODE_POINT; |
| } |
| |
| /* diamond-exit rasterization does not support oversample */ |
| config.rasterizer_oversample_mode = |
| (config.line_rasterization == V3D_LINE_RASTERIZATION_PERP_END_CAPS && |
| pipeline->msaa) ? 1 : 0; |
| |
| /* From the Vulkan spec: |
| * |
| * "Provoking Vertex: |
| * |
| * The vertex in a primitive from which flat shaded attribute |
| * values are taken. This is generally the “first” vertex in the |
| * primitive, and depends on the primitive topology." |
| * |
| * First vertex is the Direct3D style for provoking vertex. OpenGL uses |
| * the last vertex by default. |
| */ |
| if (pv_info) { |
| config.direct3d_provoking_vertex = |
| pv_info->provokingVertexMode == |
| VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT; |
| } else { |
| config.direct3d_provoking_vertex = true; |
| } |
| |
| config.blend_enable = pipeline->blend.enables != 0 && |
| !pipeline->blend.use_software; |
| |
| #if V3D_VERSION >= 71 |
| /* From the Vulkan spec: |
| * |
| * "depthClampEnable controls whether to clamp the fragment’s depth |
| * values as described in Depth Test. If the pipeline is not created |
| * with VkPipelineRasterizationDepthClipStateCreateInfoEXT present |
| * then enabling depth clamp will also disable clipping primitives to |
| * the z planes of the frustrum as described in Primitive Clipping. |
| * Otherwise depth clipping is controlled by the state set in |
| * VkPipelineRasterizationDepthClipStateCreateInfoEXT." |
| */ |
| bool z_clamp_enable = rs_info && rs_info->depthClampEnable; |
| bool z_clip_enable = false; |
| const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info = |
| rs_info ? vk_find_struct_const(rs_info->pNext, |
| PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT) : |
| NULL; |
| if (clip_info) |
| z_clip_enable = clip_info->depthClipEnable; |
| else if (!z_clamp_enable) |
| z_clip_enable = true; |
| |
| if (z_clip_enable) { |
| config.z_clipping_mode = pipeline->negative_one_to_one ? |
| V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_ZERO_TO_ONE; |
| } else { |
| config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE; |
| } |
| |
| config.z_clamp_mode = z_clamp_enable; |
| #endif |
| }; |
| } |
| |
| uint32_t |
| v3dX(translate_stencil_op)(VkStencilOp op) |
| { |
| switch (op) { |
| case VK_STENCIL_OP_KEEP: |
| return V3D_STENCIL_OP_KEEP; |
| case VK_STENCIL_OP_ZERO: |
| return V3D_STENCIL_OP_ZERO; |
| case VK_STENCIL_OP_REPLACE: |
| return V3D_STENCIL_OP_REPLACE; |
| case VK_STENCIL_OP_INCREMENT_AND_CLAMP: |
| return V3D_STENCIL_OP_INCR; |
| case VK_STENCIL_OP_DECREMENT_AND_CLAMP: |
| return V3D_STENCIL_OP_DECR; |
| case VK_STENCIL_OP_INVERT: |
| return V3D_STENCIL_OP_INVERT; |
| case VK_STENCIL_OP_INCREMENT_AND_WRAP: |
| return V3D_STENCIL_OP_INCWRAP; |
| case VK_STENCIL_OP_DECREMENT_AND_WRAP: |
| return V3D_STENCIL_OP_DECWRAP; |
| default: |
| unreachable("bad stencil op"); |
| } |
| } |
| |
| static void |
| pack_single_stencil_cfg(struct v3dv_pipeline *pipeline, |
| uint8_t *stencil_cfg, |
| bool is_front, |
| bool is_back, |
| const VkStencilOpState *stencil_state, |
| const struct vk_graphics_pipeline_state *state) |
| { |
| /* From the Vulkan spec: |
| * |
| * "Reference is an integer reference value that is used in the unsigned |
| * stencil comparison. The reference value used by stencil comparison |
| * must be within the range [0,2^s-1] , where s is the number of bits in |
| * the stencil framebuffer attachment, otherwise the reference value is |
| * considered undefined." |
| * |
| * In our case, 's' is always 8, so we clamp to that to prevent our packing |
| * functions to assert in debug mode if they see larger values. |
| */ |
| v3dvx_pack(stencil_cfg, STENCIL_CFG, config) { |
| config.front_config = is_front; |
| config.back_config = is_back; |
| config.stencil_write_mask = stencil_state->writeMask & 0xff; |
| config.stencil_test_mask = stencil_state->compareMask & 0xff; |
| config.stencil_test_function = stencil_state->compareOp; |
| config.stencil_pass_op = |
| v3dX(translate_stencil_op)(stencil_state->passOp); |
| config.depth_test_fail_op = |
| v3dX(translate_stencil_op)(stencil_state->depthFailOp); |
| config.stencil_test_fail_op = |
| v3dX(translate_stencil_op)(stencil_state->failOp); |
| config.stencil_ref_value = stencil_state->reference & 0xff; |
| } |
| } |
| |
| static void |
| pack_stencil_cfg(struct v3dv_pipeline *pipeline, |
| const VkPipelineDepthStencilStateCreateInfo *ds_info, |
| const struct vk_graphics_pipeline_state *state) |
| { |
| assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG)); |
| |
| if (!ds_info || !ds_info->stencilTestEnable) |
| return; |
| |
| const struct vk_render_pass_state *ri = &pipeline->rendering_info; |
| if (ri->stencil_attachment_format == VK_FORMAT_UNDEFINED) |
| return; |
| |
| const bool any_dynamic_stencil_states = |
| BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) || |
| BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) || |
| BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) || |
| BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) || |
| BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_OP); |
| |
| /* If front != back or we have dynamic stencil state we can't emit a single |
| * packet for both faces. |
| */ |
| bool needs_front_and_back = false; |
| if ((any_dynamic_stencil_states) || |
| memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front))) { |
| needs_front_and_back = true; |
| } |
| |
| /* If the front and back configurations are the same we can emit both with |
| * a single packet. |
| */ |
| pipeline->emit_stencil_cfg[0] = true; |
| if (!needs_front_and_back) { |
| pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0], |
| true, true, &ds_info->front, state); |
| } else { |
| pipeline->emit_stencil_cfg[1] = true; |
| pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0], |
| true, false, &ds_info->front, state); |
| pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1], |
| false, true, &ds_info->back, state); |
| } |
| } |
| |
| |
| /* FIXME: Now that we are passing the vk_graphics_pipeline_state we could |
| * avoid passing all those parameters. But doing that we would need to change |
| * all the code that uses the VkXXX structures, and use instead the equivalent |
| * vk_xxx |
| */ |
| void |
| v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline, |
| const VkPipelineColorBlendStateCreateInfo *cb_info, |
| const VkPipelineDepthStencilStateCreateInfo *ds_info, |
| const VkPipelineRasterizationStateCreateInfo *rs_info, |
| const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info, |
| const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info, |
| const VkPipelineMultisampleStateCreateInfo *ms_info, |
| const struct vk_graphics_pipeline_state *state) |
| { |
| pack_blend(pipeline, cb_info); |
| pack_cfg_bits(pipeline, ds_info, rs_info, pv_info, ls_info, ms_info); |
| pack_stencil_cfg(pipeline, ds_info, state); |
| } |
| |
| static void |
| pack_shader_state_record(struct v3dv_pipeline *pipeline) |
| { |
| /* To siplify the code we ignore here GL_SHADER_STATE_RECORD_DRAW_INDEX |
| * used with 2712D0, since we know that has the same size as the regular |
| * version. |
| */ |
| assert(sizeof(pipeline->shader_state_record) >= |
| cl_packet_length(GL_SHADER_STATE_RECORD)); |
| |
| struct v3d_fs_prog_data *prog_data_fs = |
| pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; |
| |
| struct v3d_vs_prog_data *prog_data_vs = |
| pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; |
| |
| struct v3d_vs_prog_data *prog_data_vs_bin = |
| pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs; |
| |
| bool point_size_in_shaded_vertex_data; |
| if (!pipeline->has_gs) { |
| struct v3d_vs_prog_data *prog_data_vs = |
| pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; |
| point_size_in_shaded_vertex_data = prog_data_vs->writes_psiz; |
| } else { |
| struct v3d_gs_prog_data *prog_data_gs = |
| pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs; |
| point_size_in_shaded_vertex_data = prog_data_gs->writes_psiz; |
| } |
| |
| /* Note: we are not packing addresses, as we need the job (see |
| * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this |
| * point as they depend on dynamic info that can be set after create the |
| * pipeline (like viewport), . Would need to be filled later, so we are |
| * doing a partial prepacking. |
| */ |
| #if V3D_VERSION >= 71 |
| /* 2712D0 (V3D 7.1.10) has included draw index and base vertex, shuffling all |
| * the fields in the packet. Since the versioning framework doesn't handle |
| * revision numbers, the XML has a different shader state record packet |
| * including the new fields and we device at run time which packet we need |
| * to emit. |
| */ |
| if (v3d_device_has_draw_index(&pipeline->device->devinfo)) { |
| v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD_DRAW_INDEX, shader) { |
| shader.enable_clipping = true; |
| shader.point_size_in_shaded_vertex_data = point_size_in_shaded_vertex_data; |
| shader.fragment_shader_does_z_writes = prog_data_fs->writes_z; |
| shader.turn_off_early_z_test = prog_data_fs->disable_ez; |
| shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = |
| prog_data_fs->uses_center_w; |
| shader.enable_sample_rate_shading = |
| pipeline->sample_rate_shading || |
| (pipeline->msaa && prog_data_fs->force_per_sample_msaa); |
| shader.any_shader_reads_hardware_written_primitive_id = false; |
| shader.do_scoreboard_wait_on_first_thread_switch = |
| prog_data_fs->lock_scoreboard_on_first_thrsw; |
| shader.disable_implicit_point_line_varyings = |
| !prog_data_fs->uses_implicit_point_line_varyings; |
| shader.number_of_varyings_in_fragment_shader = prog_data_fs->num_inputs; |
| shader.coordinate_shader_input_vpm_segment_size = prog_data_vs_bin->vpm_input_size; |
| shader.vertex_shader_input_vpm_segment_size = prog_data_vs->vpm_input_size; |
| shader.coordinate_shader_output_vpm_segment_size = prog_data_vs_bin->vpm_output_size; |
| shader.vertex_shader_output_vpm_segment_size = prog_data_vs->vpm_output_size; |
| shader.min_coord_shader_input_segments_required_in_play = |
| pipeline->vpm_cfg_bin.As; |
| shader.min_vertex_shader_input_segments_required_in_play = |
| pipeline->vpm_cfg.As; |
| shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = |
| pipeline->vpm_cfg_bin.Ve; |
| shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = |
| pipeline->vpm_cfg.Ve; |
| shader.coordinate_shader_4_way_threadable = prog_data_vs_bin->base.threads == 4; |
| shader.vertex_shader_4_way_threadable = prog_data_vs->base.threads == 4; |
| shader.fragment_shader_4_way_threadable = prog_data_fs->base.threads == 4; |
| shader.coordinate_shader_start_in_final_thread_section = prog_data_vs_bin->base.single_seg; |
| shader.vertex_shader_start_in_final_thread_section = prog_data_vs->base.single_seg; |
| shader.fragment_shader_start_in_final_thread_section = prog_data_fs->base.single_seg; |
| shader.vertex_id_read_by_coordinate_shader = prog_data_vs_bin->uses_vid; |
| shader.base_instance_id_read_by_coordinate_shader = prog_data_vs_bin->uses_biid; |
| shader.instance_id_read_by_coordinate_shader = prog_data_vs_bin->uses_iid; |
| shader.vertex_id_read_by_vertex_shader = prog_data_vs->uses_vid; |
| shader.base_instance_id_read_by_vertex_shader = prog_data_vs->uses_biid; |
| shader.instance_id_read_by_vertex_shader = prog_data_vs->uses_iid; |
| } |
| return; |
| } |
| #endif |
| |
| v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) { |
| shader.enable_clipping = true; |
| shader.point_size_in_shaded_vertex_data = point_size_in_shaded_vertex_data; |
| |
| /* Must be set if the shader modifies Z, discards, or modifies |
| * the sample mask. For any of these cases, the fragment |
| * shader needs to write the Z value (even just discards). |
| */ |
| shader.fragment_shader_does_z_writes = prog_data_fs->writes_z; |
| |
| /* Set if the EZ test must be disabled (due to shader side |
| * effects and the early_z flag not being present in the |
| * shader). |
| */ |
| shader.turn_off_early_z_test = prog_data_fs->disable_ez; |
| |
| shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = |
| prog_data_fs->uses_center_w; |
| |
| /* The description for gl_SampleID states that if a fragment shader reads |
| * it, then we should automatically activate per-sample shading. However, |
| * the Vulkan spec also states that if a framebuffer has no attachments: |
| * |
| * "The subpass continues to use the width, height, and layers of the |
| * framebuffer to define the dimensions of the rendering area, and the |
| * rasterizationSamples from each pipeline’s |
| * VkPipelineMultisampleStateCreateInfo to define the number of |
| * samples used in rasterization multisample rasterization." |
| * |
| * So in this scenario, if the pipeline doesn't enable multiple samples |
| * but the fragment shader accesses gl_SampleID we would be requested |
| * to do per-sample shading in single sample rasterization mode, which |
| * is pointless, so just disable it in that case. |
| */ |
| shader.enable_sample_rate_shading = |
| pipeline->sample_rate_shading || |
| (pipeline->msaa && prog_data_fs->force_per_sample_msaa); |
| |
| shader.any_shader_reads_hardware_written_primitive_id = false; |
| |
| shader.do_scoreboard_wait_on_first_thread_switch = |
| prog_data_fs->lock_scoreboard_on_first_thrsw; |
| shader.disable_implicit_point_line_varyings = |
| !prog_data_fs->uses_implicit_point_line_varyings; |
| |
| shader.number_of_varyings_in_fragment_shader = |
| prog_data_fs->num_inputs; |
| |
| /* Note: see previous note about addresses */ |
| /* shader.coordinate_shader_code_address */ |
| /* shader.vertex_shader_code_address */ |
| /* shader.fragment_shader_code_address */ |
| |
| #if V3D_VERSION == 42 |
| shader.coordinate_shader_propagate_nans = true; |
| shader.vertex_shader_propagate_nans = true; |
| shader.fragment_shader_propagate_nans = true; |
| |
| /* FIXME: Use combined input/output size flag in the common case (also |
| * on v3d, see v3dx_draw). |
| */ |
| shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = |
| prog_data_vs_bin->separate_segments; |
| shader.vertex_shader_has_separate_input_and_output_vpm_blocks = |
| prog_data_vs->separate_segments; |
| shader.coordinate_shader_input_vpm_segment_size = |
| prog_data_vs_bin->separate_segments ? |
| prog_data_vs_bin->vpm_input_size : 1; |
| shader.vertex_shader_input_vpm_segment_size = |
| prog_data_vs->separate_segments ? |
| prog_data_vs->vpm_input_size : 1; |
| #endif |
| |
| /* On V3D 7.1 there isn't a specific flag to set if we are using |
| * shared/separate segments or not. We just set the value of |
| * vpm_input_size to 0, and set output to the max needed. That should be |
| * already properly set on prog_data_vs_bin |
| */ |
| #if V3D_VERSION == 71 |
| shader.coordinate_shader_input_vpm_segment_size = |
| prog_data_vs_bin->vpm_input_size; |
| shader.vertex_shader_input_vpm_segment_size = |
| prog_data_vs->vpm_input_size; |
| #endif |
| |
| shader.coordinate_shader_output_vpm_segment_size = |
| prog_data_vs_bin->vpm_output_size; |
| shader.vertex_shader_output_vpm_segment_size = |
| prog_data_vs->vpm_output_size; |
| |
| /* Note: see previous note about addresses */ |
| /* shader.coordinate_shader_uniforms_address */ |
| /* shader.vertex_shader_uniforms_address */ |
| /* shader.fragment_shader_uniforms_address */ |
| |
| shader.min_coord_shader_input_segments_required_in_play = |
| pipeline->vpm_cfg_bin.As; |
| shader.min_vertex_shader_input_segments_required_in_play = |
| pipeline->vpm_cfg.As; |
| |
| shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = |
| pipeline->vpm_cfg_bin.Ve; |
| shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = |
| pipeline->vpm_cfg.Ve; |
| |
| shader.coordinate_shader_4_way_threadable = |
| prog_data_vs_bin->base.threads == 4; |
| shader.vertex_shader_4_way_threadable = |
| prog_data_vs->base.threads == 4; |
| shader.fragment_shader_4_way_threadable = |
| prog_data_fs->base.threads == 4; |
| |
| shader.coordinate_shader_start_in_final_thread_section = |
| prog_data_vs_bin->base.single_seg; |
| shader.vertex_shader_start_in_final_thread_section = |
| prog_data_vs->base.single_seg; |
| shader.fragment_shader_start_in_final_thread_section = |
| prog_data_fs->base.single_seg; |
| |
| shader.vertex_id_read_by_coordinate_shader = |
| prog_data_vs_bin->uses_vid; |
| shader.base_instance_id_read_by_coordinate_shader = |
| prog_data_vs_bin->uses_biid; |
| shader.instance_id_read_by_coordinate_shader = |
| prog_data_vs_bin->uses_iid; |
| shader.vertex_id_read_by_vertex_shader = |
| prog_data_vs->uses_vid; |
| shader.base_instance_id_read_by_vertex_shader = |
| prog_data_vs->uses_biid; |
| shader.instance_id_read_by_vertex_shader = |
| prog_data_vs->uses_iid; |
| |
| /* Note: see previous note about addresses */ |
| /* shader.address_of_default_attribute_values */ |
| } |
| } |
| |
| static void |
| pack_vcm_cache_size(struct v3dv_pipeline *pipeline) |
| { |
| assert(sizeof(pipeline->vcm_cache_size) == |
| cl_packet_length(VCM_CACHE_SIZE)); |
| |
| v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) { |
| vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc; |
| vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc; |
| } |
| } |
| |
| /* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */ |
| static uint8_t |
| get_attr_type(const struct util_format_description *desc) |
| { |
| uint32_t r_size = desc->channel[0].size; |
| uint8_t attr_type = ATTRIBUTE_FLOAT; |
| |
| switch (desc->channel[0].type) { |
| case UTIL_FORMAT_TYPE_FLOAT: |
| if (r_size == 32) { |
| attr_type = ATTRIBUTE_FLOAT; |
| } else { |
| assert(r_size == 16); |
| attr_type = ATTRIBUTE_HALF_FLOAT; |
| } |
| break; |
| |
| case UTIL_FORMAT_TYPE_SIGNED: |
| case UTIL_FORMAT_TYPE_UNSIGNED: |
| switch (r_size) { |
| case 32: |
| attr_type = ATTRIBUTE_INT; |
| break; |
| case 16: |
| attr_type = ATTRIBUTE_SHORT; |
| break; |
| case 10: |
| attr_type = ATTRIBUTE_INT2_10_10_10; |
| break; |
| case 8: |
| attr_type = ATTRIBUTE_BYTE; |
| break; |
| default: |
| mesa_loge("format %s unsupported\n", desc->name); |
| abort(); |
| } |
| break; |
| |
| default: |
| mesa_loge("format %s unsupported\n", desc->name); |
| abort(); |
| } |
| |
| return attr_type; |
| } |
| |
| static void |
| pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline, |
| uint32_t index, |
| const VkVertexInputAttributeDescription *vi_desc) |
| { |
| const uint32_t packet_length = |
| cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); |
| |
| const struct util_format_description *desc = |
| vk_format_description(vi_desc->format); |
| |
| uint32_t binding = vi_desc->binding; |
| |
| v3dvx_pack(&pipeline->vertex_attrs[index * packet_length], |
| GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { |
| |
| /* vec_size == 0 means 4 */ |
| attr.vec_size = desc->nr_channels & 3; |
| attr.signed_int_type = (desc->channel[0].type == |
| UTIL_FORMAT_TYPE_SIGNED); |
| attr.normalized_int_type = desc->channel[0].normalized; |
| attr.read_as_int_uint = desc->channel[0].pure_integer; |
| |
| attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor, |
| V3D_MAX_VERTEX_ATTRIB_DIVISOR); |
| attr.type = get_attr_type(desc); |
| } |
| } |
| |
| void |
| v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline, |
| const VkPipelineVertexInputStateCreateInfo *vi_info, |
| const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info) |
| { |
| pack_shader_state_record(pipeline); |
| pack_vcm_cache_size(pipeline); |
| |
| pipeline->vb_count = vi_info->vertexBindingDescriptionCount; |
| for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { |
| const VkVertexInputBindingDescription *desc = |
| &vi_info->pVertexBindingDescriptions[i]; |
| |
| pipeline->vb[desc->binding].instance_divisor = desc->inputRate; |
| } |
| |
| if (vd_info) { |
| for (uint32_t i = 0; i < vd_info->vertexBindingDivisorCount; i++) { |
| const VkVertexInputBindingDivisorDescriptionEXT *desc = |
| &vd_info->pVertexBindingDivisors[i]; |
| |
| pipeline->vb[desc->binding].instance_divisor = desc->divisor; |
| } |
| } |
| |
| pipeline->va_count = 0; |
| struct v3d_vs_prog_data *prog_data_vs = |
| pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; |
| |
| for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { |
| const VkVertexInputAttributeDescription *desc = |
| &vi_info->pVertexAttributeDescriptions[i]; |
| uint32_t location = desc->location + VERT_ATTRIB_GENERIC0; |
| |
| /* We use a custom driver_location_map instead of |
| * nir_find_variable_with_location because if we were able to get the |
| * shader variant from the cache, we would not have the nir shader |
| * available. |
| */ |
| uint32_t driver_location = |
| prog_data_vs->driver_location_map[location]; |
| |
| if (driver_location != -1) { |
| assert(driver_location < MAX_VERTEX_ATTRIBS); |
| pipeline->va[driver_location].offset = desc->offset; |
| pipeline->va[driver_location].binding = desc->binding; |
| pipeline->va[driver_location].vk_format = desc->format; |
| |
| pack_shader_state_attribute_record(pipeline, driver_location, desc); |
| |
| pipeline->va_count++; |
| } |
| } |
| } |
| |
| #if V3D_VERSION == 42 |
| static bool |
| pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline) |
| { |
| for (uint8_t i = 0; i < pipeline->va_count; i++) { |
| if (vk_format_is_int(pipeline->va[i].vk_format)) |
| return true; |
| } |
| return false; |
| } |
| #endif |
| |
| bool |
| v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline) |
| { |
| #if V3D_VERSION == 42 |
| return pipeline_has_integer_vertex_attrib(pipeline); |
| #endif |
| |
| return false; |
| } |
| |
| /* @pipeline can be NULL. In that case we assume the most common case. For |
| * example, for v42 we assume in that case that all the attributes have a |
| * float format (we only create an all-float BO once and we reuse it with all |
| * float pipelines), otherwise we look at the actual type of each attribute |
| * used with the specific pipeline passed in. |
| */ |
| struct v3dv_bo * |
| v3dX(create_default_attribute_values)(struct v3dv_device *device, |
| struct v3dv_pipeline *pipeline) |
| { |
| #if V3D_VERSION >= 71 |
| return NULL; |
| #endif |
| |
| uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4; |
| struct v3dv_bo *bo; |
| |
| bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true); |
| |
| if (!bo) { |
| mesa_loge("failed to allocate memory for the default " |
| "attribute values\n"); |
| return NULL; |
| } |
| |
| bool ok = v3dv_bo_map(device, bo, size); |
| if (!ok) { |
| mesa_loge("failed to map default attribute values buffer\n"); |
| return NULL; |
| } |
| |
| uint32_t *attrs = bo->map; |
| uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0; |
| for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) { |
| attrs[i * 4 + 0] = 0; |
| attrs[i * 4 + 1] = 0; |
| attrs[i * 4 + 2] = 0; |
| VkFormat attr_format = |
| pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED; |
| if (i < va_count && vk_format_is_int(attr_format)) { |
| attrs[i * 4 + 3] = 1; |
| } else { |
| attrs[i * 4 + 3] = fui(1.0); |
| } |
| } |
| |
| v3dv_bo_unmap(device, bo); |
| |
| return bo; |
| } |