| /* |
| * Copyright © 2018 Valve Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| */ |
| |
| #include "nir.h" |
| |
| /* This pass computes for each ssa definition if it is uniform. |
| * That is, the variable has the same value for all invocations |
| * of the group. |
| * |
| * This divergence analysis pass expects the shader to be in LCSSA-form. |
| * |
| * This algorithm implements "The Simple Divergence Analysis" from |
| * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira. |
| * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS), |
| * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2> |
| */ |
| |
| static bool |
| visit_cf_list(bool *divergent, struct exec_list *list, |
| nir_divergence_options options, gl_shader_stage stage); |
| |
| static bool |
| visit_alu(bool *divergent, nir_alu_instr *instr) |
| { |
| if (divergent[instr->dest.dest.ssa.index]) |
| return false; |
| |
| unsigned num_src = nir_op_infos[instr->op].num_inputs; |
| |
| for (unsigned i = 0; i < num_src; i++) { |
| if (divergent[instr->src[i].src.ssa->index]) { |
| divergent[instr->dest.dest.ssa.index] = true; |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| static bool |
| visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr, |
| nir_divergence_options options, gl_shader_stage stage) |
| { |
| if (!nir_intrinsic_infos[instr->intrinsic].has_dest) |
| return false; |
| |
| if (divergent[instr->dest.ssa.index]) |
| return false; |
| |
| bool is_divergent = false; |
| switch (instr->intrinsic) { |
| /* Intrinsics which are always uniform */ |
| case nir_intrinsic_shader_clock: |
| case nir_intrinsic_ballot: |
| case nir_intrinsic_read_invocation: |
| case nir_intrinsic_read_first_invocation: |
| case nir_intrinsic_vote_any: |
| case nir_intrinsic_vote_all: |
| case nir_intrinsic_vote_feq: |
| case nir_intrinsic_vote_ieq: |
| case nir_intrinsic_load_work_dim: |
| case nir_intrinsic_load_work_group_id: |
| case nir_intrinsic_load_num_work_groups: |
| case nir_intrinsic_load_local_group_size: |
| case nir_intrinsic_load_subgroup_id: |
| case nir_intrinsic_load_num_subgroups: |
| case nir_intrinsic_load_subgroup_size: |
| case nir_intrinsic_load_subgroup_eq_mask: |
| case nir_intrinsic_load_subgroup_ge_mask: |
| case nir_intrinsic_load_subgroup_gt_mask: |
| case nir_intrinsic_load_subgroup_le_mask: |
| case nir_intrinsic_load_subgroup_lt_mask: |
| case nir_intrinsic_first_invocation: |
| case nir_intrinsic_load_base_instance: |
| case nir_intrinsic_load_base_vertex: |
| case nir_intrinsic_load_first_vertex: |
| case nir_intrinsic_load_draw_id: |
| case nir_intrinsic_load_is_indexed_draw: |
| case nir_intrinsic_load_viewport_scale: |
| case nir_intrinsic_load_alpha_ref_float: |
| case nir_intrinsic_load_user_clip_plane: |
| case nir_intrinsic_load_viewport_x_scale: |
| case nir_intrinsic_load_viewport_y_scale: |
| case nir_intrinsic_load_viewport_z_scale: |
| case nir_intrinsic_load_viewport_offset: |
| case nir_intrinsic_load_viewport_z_offset: |
| case nir_intrinsic_load_blend_const_color_a_float: |
| case nir_intrinsic_load_blend_const_color_b_float: |
| case nir_intrinsic_load_blend_const_color_g_float: |
| case nir_intrinsic_load_blend_const_color_r_float: |
| case nir_intrinsic_load_blend_const_color_rgba: |
| case nir_intrinsic_load_blend_const_color_aaaa8888_unorm: |
| case nir_intrinsic_load_blend_const_color_rgba8888_unorm: |
| is_divergent = false; |
| break; |
| |
| /* Intrinsics with divergence depending on shader stage and hardware */ |
| case nir_intrinsic_load_input: |
| is_divergent = divergent[instr->src[0].ssa->index]; |
| if (stage == MESA_SHADER_FRAGMENT) |
| is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); |
| else if (stage == MESA_SHADER_TESS_EVAL) |
| is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup); |
| else |
| is_divergent = true; |
| break; |
| case nir_intrinsic_load_output: |
| assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT); |
| is_divergent = divergent[instr->src[0].ssa->index]; |
| if (stage == MESA_SHADER_TESS_CTRL) |
| is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup); |
| else |
| is_divergent = true; |
| break; |
| case nir_intrinsic_load_layer_id: |
| case nir_intrinsic_load_front_face: |
| assert(stage == MESA_SHADER_FRAGMENT); |
| is_divergent = !(options & nir_divergence_single_prim_per_subgroup); |
| break; |
| case nir_intrinsic_load_view_index: |
| assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL); |
| if (options & nir_divergence_view_index_uniform) |
| is_divergent = false; |
| else if (stage == MESA_SHADER_FRAGMENT) |
| is_divergent = !(options & nir_divergence_single_prim_per_subgroup); |
| break; |
| case nir_intrinsic_load_fs_input_interp_deltas: |
| assert(stage == MESA_SHADER_FRAGMENT); |
| is_divergent = divergent[instr->src[0].ssa->index]; |
| is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); |
| break; |
| case nir_intrinsic_load_primitive_id: |
| if (stage == MESA_SHADER_FRAGMENT) |
| is_divergent = !(options & nir_divergence_single_prim_per_subgroup); |
| else if (stage == MESA_SHADER_TESS_CTRL) |
| is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup); |
| else if (stage == MESA_SHADER_TESS_EVAL) |
| is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); |
| else |
| unreachable("Invalid stage for load_primitive_id"); |
| break; |
| case nir_intrinsic_load_tess_level_inner: |
| case nir_intrinsic_load_tess_level_outer: |
| if (stage == MESA_SHADER_TESS_CTRL) |
| is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup); |
| else if (stage == MESA_SHADER_TESS_EVAL) |
| is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); |
| else |
| unreachable("Invalid stage for load_primitive_tess_level_*"); |
| break; |
| case nir_intrinsic_load_patch_vertices_in: |
| if (stage == MESA_SHADER_TESS_EVAL) |
| is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); |
| else |
| assert(stage == MESA_SHADER_TESS_CTRL); |
| break; |
| |
| /* Clustered reductions are uniform if cluster_size == subgroup_size or |
| * the source is uniform and the operation is invariant. |
| * Inclusive scans are uniform if |
| * the source is uniform and the operation is invariant |
| */ |
| case nir_intrinsic_reduce: |
| if (nir_intrinsic_cluster_size(instr) == 0) |
| return false; |
| /* fallthrough */ |
| case nir_intrinsic_inclusive_scan: { |
| nir_op op = nir_intrinsic_reduction_op(instr); |
| is_divergent = divergent[instr->src[0].ssa->index]; |
| if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin && |
| op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax && |
| op != nir_op_iand && op != nir_op_ior) |
| is_divergent = true; |
| break; |
| } |
| |
| /* Intrinsics with divergence depending on sources */ |
| case nir_intrinsic_ballot_bitfield_extract: |
| case nir_intrinsic_ballot_find_lsb: |
| case nir_intrinsic_ballot_find_msb: |
| case nir_intrinsic_ballot_bit_count_reduce: |
| case nir_intrinsic_shuffle: |
| case nir_intrinsic_shuffle_xor: |
| case nir_intrinsic_shuffle_up: |
| case nir_intrinsic_shuffle_down: |
| case nir_intrinsic_quad_broadcast: |
| case nir_intrinsic_quad_swap_horizontal: |
| case nir_intrinsic_quad_swap_vertical: |
| case nir_intrinsic_quad_swap_diagonal: |
| case nir_intrinsic_load_deref: |
| case nir_intrinsic_load_ubo: |
| case nir_intrinsic_load_ssbo: |
| case nir_intrinsic_load_shared: |
| case nir_intrinsic_load_global: |
| case nir_intrinsic_load_uniform: |
| case nir_intrinsic_load_push_constant: |
| case nir_intrinsic_load_constant: |
| case nir_intrinsic_load_sample_pos_from_id: |
| case nir_intrinsic_load_kernel_input: |
| case nir_intrinsic_image_load: |
| case nir_intrinsic_image_deref_load: |
| case nir_intrinsic_bindless_image_load: |
| case nir_intrinsic_image_samples: |
| case nir_intrinsic_image_deref_samples: |
| case nir_intrinsic_bindless_image_samples: |
| case nir_intrinsic_get_buffer_size: |
| case nir_intrinsic_image_size: |
| case nir_intrinsic_image_deref_size: |
| case nir_intrinsic_bindless_image_size: |
| case nir_intrinsic_copy_deref: |
| case nir_intrinsic_deref_buffer_array_length: |
| case nir_intrinsic_vulkan_resource_index: |
| case nir_intrinsic_vulkan_resource_reindex: |
| case nir_intrinsic_load_vulkan_descriptor: |
| case nir_intrinsic_atomic_counter_read: |
| case nir_intrinsic_atomic_counter_read_deref: |
| case nir_intrinsic_quad_swizzle_amd: |
| case nir_intrinsic_masked_swizzle_amd: { |
| unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; |
| for (unsigned i = 0; i < num_srcs; i++) { |
| if (divergent[instr->src[i].ssa->index]) { |
| is_divergent = true; |
| break; |
| } |
| } |
| break; |
| } |
| |
| /* Intrinsics which are always divergent */ |
| case nir_intrinsic_load_color0: |
| case nir_intrinsic_load_color1: |
| case nir_intrinsic_load_param: |
| case nir_intrinsic_load_sample_id: |
| case nir_intrinsic_load_sample_id_no_per_sample: |
| case nir_intrinsic_load_sample_mask_in: |
| case nir_intrinsic_load_interpolated_input: |
| case nir_intrinsic_load_barycentric_pixel: |
| case nir_intrinsic_load_barycentric_centroid: |
| case nir_intrinsic_load_barycentric_sample: |
| case nir_intrinsic_load_barycentric_at_sample: |
| case nir_intrinsic_load_barycentric_at_offset: |
| case nir_intrinsic_interp_deref_at_offset: |
| case nir_intrinsic_interp_deref_at_sample: |
| case nir_intrinsic_interp_deref_at_centroid: |
| case nir_intrinsic_load_tess_coord: |
| case nir_intrinsic_load_point_coord: |
| case nir_intrinsic_load_frag_coord: |
| case nir_intrinsic_load_sample_pos: |
| case nir_intrinsic_load_vertex_id_zero_base: |
| case nir_intrinsic_load_vertex_id: |
| case nir_intrinsic_load_per_vertex_input: |
| case nir_intrinsic_load_per_vertex_output: |
| case nir_intrinsic_load_instance_id: |
| case nir_intrinsic_load_invocation_id: |
| case nir_intrinsic_load_local_invocation_id: |
| case nir_intrinsic_load_local_invocation_index: |
| case nir_intrinsic_load_global_invocation_id: |
| case nir_intrinsic_load_global_invocation_index: |
| case nir_intrinsic_load_subgroup_invocation: |
| case nir_intrinsic_load_helper_invocation: |
| case nir_intrinsic_is_helper_invocation: |
| case nir_intrinsic_load_scratch: |
| case nir_intrinsic_deref_atomic_add: |
| case nir_intrinsic_deref_atomic_imin: |
| case nir_intrinsic_deref_atomic_umin: |
| case nir_intrinsic_deref_atomic_imax: |
| case nir_intrinsic_deref_atomic_umax: |
| case nir_intrinsic_deref_atomic_and: |
| case nir_intrinsic_deref_atomic_or: |
| case nir_intrinsic_deref_atomic_xor: |
| case nir_intrinsic_deref_atomic_exchange: |
| case nir_intrinsic_deref_atomic_comp_swap: |
| case nir_intrinsic_deref_atomic_fadd: |
| case nir_intrinsic_deref_atomic_fmin: |
| case nir_intrinsic_deref_atomic_fmax: |
| case nir_intrinsic_deref_atomic_fcomp_swap: |
| case nir_intrinsic_ssbo_atomic_add: |
| case nir_intrinsic_ssbo_atomic_imin: |
| case nir_intrinsic_ssbo_atomic_umin: |
| case nir_intrinsic_ssbo_atomic_imax: |
| case nir_intrinsic_ssbo_atomic_umax: |
| case nir_intrinsic_ssbo_atomic_and: |
| case nir_intrinsic_ssbo_atomic_or: |
| case nir_intrinsic_ssbo_atomic_xor: |
| case nir_intrinsic_ssbo_atomic_exchange: |
| case nir_intrinsic_ssbo_atomic_comp_swap: |
| case nir_intrinsic_ssbo_atomic_fadd: |
| case nir_intrinsic_ssbo_atomic_fmax: |
| case nir_intrinsic_ssbo_atomic_fmin: |
| case nir_intrinsic_ssbo_atomic_fcomp_swap: |
| case nir_intrinsic_image_deref_atomic_add: |
| case nir_intrinsic_image_deref_atomic_min: |
| case nir_intrinsic_image_deref_atomic_max: |
| case nir_intrinsic_image_deref_atomic_and: |
| case nir_intrinsic_image_deref_atomic_or: |
| case nir_intrinsic_image_deref_atomic_xor: |
| case nir_intrinsic_image_deref_atomic_exchange: |
| case nir_intrinsic_image_deref_atomic_comp_swap: |
| case nir_intrinsic_image_deref_atomic_fadd: |
| case nir_intrinsic_image_atomic_add: |
| case nir_intrinsic_image_atomic_min: |
| case nir_intrinsic_image_atomic_max: |
| case nir_intrinsic_image_atomic_and: |
| case nir_intrinsic_image_atomic_or: |
| case nir_intrinsic_image_atomic_xor: |
| case nir_intrinsic_image_atomic_exchange: |
| case nir_intrinsic_image_atomic_comp_swap: |
| case nir_intrinsic_image_atomic_fadd: |
| case nir_intrinsic_bindless_image_atomic_add: |
| case nir_intrinsic_bindless_image_atomic_min: |
| case nir_intrinsic_bindless_image_atomic_max: |
| case nir_intrinsic_bindless_image_atomic_and: |
| case nir_intrinsic_bindless_image_atomic_or: |
| case nir_intrinsic_bindless_image_atomic_xor: |
| case nir_intrinsic_bindless_image_atomic_exchange: |
| case nir_intrinsic_bindless_image_atomic_comp_swap: |
| case nir_intrinsic_bindless_image_atomic_fadd: |
| case nir_intrinsic_shared_atomic_add: |
| case nir_intrinsic_shared_atomic_imin: |
| case nir_intrinsic_shared_atomic_umin: |
| case nir_intrinsic_shared_atomic_imax: |
| case nir_intrinsic_shared_atomic_umax: |
| case nir_intrinsic_shared_atomic_and: |
| case nir_intrinsic_shared_atomic_or: |
| case nir_intrinsic_shared_atomic_xor: |
| case nir_intrinsic_shared_atomic_exchange: |
| case nir_intrinsic_shared_atomic_comp_swap: |
| case nir_intrinsic_shared_atomic_fadd: |
| case nir_intrinsic_shared_atomic_fmin: |
| case nir_intrinsic_shared_atomic_fmax: |
| case nir_intrinsic_shared_atomic_fcomp_swap: |
| case nir_intrinsic_global_atomic_add: |
| case nir_intrinsic_global_atomic_imin: |
| case nir_intrinsic_global_atomic_umin: |
| case nir_intrinsic_global_atomic_imax: |
| case nir_intrinsic_global_atomic_umax: |
| case nir_intrinsic_global_atomic_and: |
| case nir_intrinsic_global_atomic_or: |
| case nir_intrinsic_global_atomic_xor: |
| case nir_intrinsic_global_atomic_exchange: |
| case nir_intrinsic_global_atomic_comp_swap: |
| case nir_intrinsic_global_atomic_fadd: |
| case nir_intrinsic_global_atomic_fmin: |
| case nir_intrinsic_global_atomic_fmax: |
| case nir_intrinsic_global_atomic_fcomp_swap: |
| case nir_intrinsic_atomic_counter_add: |
| case nir_intrinsic_atomic_counter_min: |
| case nir_intrinsic_atomic_counter_max: |
| case nir_intrinsic_atomic_counter_and: |
| case nir_intrinsic_atomic_counter_or: |
| case nir_intrinsic_atomic_counter_xor: |
| case nir_intrinsic_atomic_counter_inc: |
| case nir_intrinsic_atomic_counter_pre_dec: |
| case nir_intrinsic_atomic_counter_post_dec: |
| case nir_intrinsic_atomic_counter_exchange: |
| case nir_intrinsic_atomic_counter_comp_swap: |
| case nir_intrinsic_atomic_counter_add_deref: |
| case nir_intrinsic_atomic_counter_min_deref: |
| case nir_intrinsic_atomic_counter_max_deref: |
| case nir_intrinsic_atomic_counter_and_deref: |
| case nir_intrinsic_atomic_counter_or_deref: |
| case nir_intrinsic_atomic_counter_xor_deref: |
| case nir_intrinsic_atomic_counter_inc_deref: |
| case nir_intrinsic_atomic_counter_pre_dec_deref: |
| case nir_intrinsic_atomic_counter_post_dec_deref: |
| case nir_intrinsic_atomic_counter_exchange_deref: |
| case nir_intrinsic_atomic_counter_comp_swap_deref: |
| case nir_intrinsic_exclusive_scan: |
| case nir_intrinsic_ballot_bit_count_exclusive: |
| case nir_intrinsic_ballot_bit_count_inclusive: |
| case nir_intrinsic_write_invocation_amd: |
| case nir_intrinsic_mbcnt_amd: |
| is_divergent = true; |
| break; |
| |
| default: |
| #ifdef NDEBUG |
| is_divergent = true; |
| break; |
| #else |
| nir_print_instr(&instr->instr, stderr); |
| unreachable("\nNIR divergence analysis: Unhandled intrinsic."); |
| #endif |
| } |
| |
| divergent[instr->dest.ssa.index] = is_divergent; |
| return is_divergent; |
| } |
| |
| static bool |
| visit_tex(bool *divergent, nir_tex_instr *instr) |
| { |
| if (divergent[instr->dest.ssa.index]) |
| return false; |
| |
| bool is_divergent = false; |
| |
| for (unsigned i = 0; i < instr->num_srcs; i++) { |
| switch (instr->src[i].src_type) { |
| case nir_tex_src_sampler_deref: |
| case nir_tex_src_sampler_handle: |
| case nir_tex_src_sampler_offset: |
| is_divergent |= divergent[instr->src[i].src.ssa->index] && |
| instr->sampler_non_uniform; |
| break; |
| case nir_tex_src_texture_deref: |
| case nir_tex_src_texture_handle: |
| case nir_tex_src_texture_offset: |
| is_divergent |= divergent[instr->src[i].src.ssa->index] && |
| instr->texture_non_uniform; |
| break; |
| default: |
| is_divergent |= divergent[instr->src[i].src.ssa->index]; |
| break; |
| } |
| } |
| |
| divergent[instr->dest.ssa.index] = is_divergent; |
| return is_divergent; |
| } |
| |
| static bool |
| visit_phi(bool *divergent, nir_phi_instr *instr) |
| { |
| /* There are 3 types of phi instructions: |
| * (1) gamma: represent the joining point of different paths |
| * created by an “if-then-else” branch. |
| * The resulting value is divergent if the branch condition |
| * or any of the source values is divergent. |
| * |
| * (2) mu: which only exist at loop headers, |
| * merge initial and loop-carried values. |
| * The resulting value is divergent if any source value |
| * is divergent or a divergent loop continue condition |
| * is associated with a different ssa-def. |
| * |
| * (3) eta: represent values that leave a loop. |
| * The resulting value is divergent if the source value is divergent |
| * or any loop exit condition is divergent for a value which is |
| * not loop-invariant. |
| * (note: there should be no phi for loop-invariant variables.) |
| */ |
| |
| if (divergent[instr->dest.ssa.index]) |
| return false; |
| |
| nir_foreach_phi_src(src, instr) { |
| /* if any source value is divergent, the resulting value is divergent */ |
| if (divergent[src->src.ssa->index]) { |
| divergent[instr->dest.ssa.index] = true; |
| return true; |
| } |
| } |
| |
| nir_cf_node *prev = nir_cf_node_prev(&instr->instr.block->cf_node); |
| |
| if (!prev) { |
| /* mu: if no predecessor node exists, the phi must be at a loop header */ |
| nir_loop *loop = nir_cf_node_as_loop(instr->instr.block->cf_node.parent); |
| prev = nir_cf_node_prev(&loop->cf_node); |
| nir_ssa_def* same = NULL; |
| bool all_same = true; |
| |
| /* first, check if all loop-carried values are from the same ssa-def */ |
| nir_foreach_phi_src(src, instr) { |
| if (src->pred == nir_cf_node_as_block(prev)) |
| continue; |
| if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef) |
| continue; |
| if (!same) |
| same = src->src.ssa; |
| else if (same != src->src.ssa) |
| all_same = false; |
| } |
| |
| /* if all loop-carried values are the same, the resulting value is uniform */ |
| if (all_same) |
| return false; |
| |
| /* check if the loop-carried values come from different ssa-defs |
| * and the corresponding condition is divergent. */ |
| nir_foreach_phi_src(src, instr) { |
| /* skip the loop preheader */ |
| if (src->pred == nir_cf_node_as_block(prev)) |
| continue; |
| |
| /* skip the unconditional back-edge */ |
| if (src->pred == nir_loop_last_block(loop)) |
| continue; |
| |
| /* if the value is undef, we don't need to check the condition */ |
| if (src->src.ssa->parent_instr->type == nir_instr_type_ssa_undef) |
| continue; |
| |
| nir_cf_node *current = src->pred->cf_node.parent; |
| /* check recursively the conditions if any is divergent */ |
| while (current->type != nir_cf_node_loop) { |
| assert (current->type == nir_cf_node_if); |
| nir_if *if_node = nir_cf_node_as_if(current); |
| if (divergent[if_node->condition.ssa->index]) { |
| divergent[instr->dest.ssa.index] = true; |
| return true; |
| } |
| current = current->parent; |
| } |
| assert(current == &loop->cf_node); |
| } |
| |
| } else if (prev->type == nir_cf_node_if) { |
| /* if only one of the incoming values is defined, the resulting value is uniform */ |
| unsigned defined_srcs = 0; |
| nir_foreach_phi_src(src, instr) { |
| if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) |
| defined_srcs++; |
| } |
| if (defined_srcs <= 1) |
| return false; |
| |
| /* gamma: check if the condition is divergent */ |
| nir_if *if_node = nir_cf_node_as_if(prev); |
| if (divergent[if_node->condition.ssa->index]) { |
| divergent[instr->dest.ssa.index] = true; |
| return true; |
| } |
| |
| } else { |
| /* eta: the predecessor must be a loop */ |
| assert(prev->type == nir_cf_node_loop); |
| |
| /* Check if any loop exit condition is divergent: |
| * That is any break happens under divergent condition or |
| * a break is preceeded by a divergent continue |
| */ |
| nir_foreach_phi_src(src, instr) { |
| nir_cf_node *current = src->pred->cf_node.parent; |
| |
| /* check recursively the conditions if any is divergent */ |
| while (current->type != nir_cf_node_loop) { |
| assert(current->type == nir_cf_node_if); |
| nir_if *if_node = nir_cf_node_as_if(current); |
| if (divergent[if_node->condition.ssa->index]) { |
| divergent[instr->dest.ssa.index] = true; |
| return true; |
| } |
| current = current->parent; |
| } |
| assert(current == prev); |
| |
| /* check if any divergent continue happened before the break */ |
| nir_foreach_block_in_cf_node(block, prev) { |
| if (block == src->pred) |
| break; |
| if (!nir_block_ends_in_jump(block)) |
| continue; |
| |
| nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block)); |
| if (jump->type != nir_jump_continue) |
| continue; |
| |
| current = block->cf_node.parent; |
| bool is_divergent = false; |
| while (current != prev) { |
| /* the continue belongs to an inner loop */ |
| if (current->type == nir_cf_node_loop) { |
| is_divergent = false; |
| break; |
| } |
| assert(current->type == nir_cf_node_if); |
| nir_if *if_node = nir_cf_node_as_if(current); |
| is_divergent |= divergent[if_node->condition.ssa->index]; |
| current = current->parent; |
| } |
| |
| if (is_divergent) { |
| divergent[instr->dest.ssa.index] = true; |
| return true; |
| } |
| } |
| } |
| } |
| |
| return false; |
| } |
| |
| static bool |
| visit_load_const(bool *divergent, nir_load_const_instr *instr) |
| { |
| return false; |
| } |
| |
| static bool |
| visit_ssa_undef(bool *divergent, nir_ssa_undef_instr *instr) |
| { |
| return false; |
| } |
| |
| static bool |
| nir_variable_mode_is_uniform(nir_variable_mode mode) { |
| switch (mode) { |
| case nir_var_uniform: |
| case nir_var_mem_ubo: |
| case nir_var_mem_ssbo: |
| case nir_var_mem_shared: |
| case nir_var_mem_global: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| static bool |
| nir_variable_is_uniform(nir_variable *var, nir_divergence_options options, |
| gl_shader_stage stage) |
| { |
| if (nir_variable_mode_is_uniform(var->data.mode)) |
| return true; |
| |
| if (stage == MESA_SHADER_FRAGMENT && |
| (options & nir_divergence_single_prim_per_subgroup) && |
| var->data.mode == nir_var_shader_in && |
| var->data.interpolation == INTERP_MODE_FLAT) |
| return true; |
| |
| if (stage == MESA_SHADER_TESS_CTRL && |
| (options & nir_divergence_single_patch_per_tcs_subgroup) && |
| var->data.mode == nir_var_shader_out && var->data.patch) |
| return true; |
| |
| if (stage == MESA_SHADER_TESS_EVAL && |
| (options & nir_divergence_single_patch_per_tes_subgroup) && |
| var->data.mode == nir_var_shader_in && var->data.patch) |
| return true; |
| |
| return false; |
| } |
| |
| static bool |
| visit_deref(bool *divergent, nir_deref_instr *deref, |
| nir_divergence_options options, gl_shader_stage stage) |
| { |
| if (divergent[deref->dest.ssa.index]) |
| return false; |
| |
| bool is_divergent = false; |
| switch (deref->deref_type) { |
| case nir_deref_type_var: |
| is_divergent = !nir_variable_is_uniform(deref->var, options, stage); |
| break; |
| case nir_deref_type_array: |
| case nir_deref_type_ptr_as_array: |
| is_divergent = divergent[deref->arr.index.ssa->index]; |
| /* fallthrough */ |
| case nir_deref_type_struct: |
| case nir_deref_type_array_wildcard: |
| is_divergent |= divergent[deref->parent.ssa->index]; |
| break; |
| case nir_deref_type_cast: |
| is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) || |
| divergent[deref->parent.ssa->index]; |
| break; |
| } |
| |
| divergent[deref->dest.ssa.index] = is_divergent; |
| return is_divergent; |
| } |
| |
| static bool |
| visit_block(bool *divergent, nir_block *block, nir_divergence_options options, |
| gl_shader_stage stage) |
| { |
| bool has_changed = false; |
| |
| nir_foreach_instr(instr, block) { |
| switch (instr->type) { |
| case nir_instr_type_alu: |
| has_changed |= visit_alu(divergent, nir_instr_as_alu(instr)); |
| break; |
| case nir_instr_type_intrinsic: |
| has_changed |= visit_intrinsic(divergent, nir_instr_as_intrinsic(instr), |
| options, stage); |
| break; |
| case nir_instr_type_tex: |
| has_changed |= visit_tex(divergent, nir_instr_as_tex(instr)); |
| break; |
| case nir_instr_type_phi: |
| has_changed |= visit_phi(divergent, nir_instr_as_phi(instr)); |
| break; |
| case nir_instr_type_load_const: |
| has_changed |= visit_load_const(divergent, nir_instr_as_load_const(instr)); |
| break; |
| case nir_instr_type_ssa_undef: |
| has_changed |= visit_ssa_undef(divergent, nir_instr_as_ssa_undef(instr)); |
| break; |
| case nir_instr_type_deref: |
| has_changed |= visit_deref(divergent, nir_instr_as_deref(instr), |
| options, stage); |
| break; |
| case nir_instr_type_jump: |
| break; |
| case nir_instr_type_call: |
| case nir_instr_type_parallel_copy: |
| unreachable("NIR divergence analysis: Unsupported instruction type."); |
| } |
| } |
| |
| return has_changed; |
| } |
| |
| static bool |
| visit_if(bool *divergent, nir_if *if_stmt, nir_divergence_options options, gl_shader_stage stage) |
| { |
| return visit_cf_list(divergent, &if_stmt->then_list, options, stage) | |
| visit_cf_list(divergent, &if_stmt->else_list, options, stage); |
| } |
| |
| static bool |
| visit_loop(bool *divergent, nir_loop *loop, nir_divergence_options options, gl_shader_stage stage) |
| { |
| bool has_changed = false; |
| bool repeat = true; |
| |
| /* TODO: restructure this and the phi handling more efficiently */ |
| while (repeat) { |
| repeat = visit_cf_list(divergent, &loop->body, options, stage); |
| has_changed |= repeat; |
| } |
| |
| return has_changed; |
| } |
| |
| static bool |
| visit_cf_list(bool *divergent, struct exec_list *list, |
| nir_divergence_options options, gl_shader_stage stage) |
| { |
| bool has_changed = false; |
| |
| foreach_list_typed(nir_cf_node, node, node, list) { |
| switch (node->type) { |
| case nir_cf_node_block: |
| has_changed |= visit_block(divergent, nir_cf_node_as_block(node), |
| options, stage); |
| break; |
| case nir_cf_node_if: |
| has_changed |= visit_if(divergent, nir_cf_node_as_if(node), |
| options, stage); |
| break; |
| case nir_cf_node_loop: |
| has_changed |= visit_loop(divergent, nir_cf_node_as_loop(node), |
| options, stage); |
| break; |
| case nir_cf_node_function: |
| unreachable("NIR divergence analysis: Unsupported cf_node type."); |
| } |
| } |
| |
| return has_changed; |
| } |
| |
| |
| bool* |
| nir_divergence_analysis(nir_shader *shader, nir_divergence_options options) |
| { |
| nir_function_impl *impl = nir_shader_get_entrypoint(shader); |
| bool *t = rzalloc_array(shader, bool, impl->ssa_alloc); |
| |
| visit_cf_list(t, &impl->body, options, shader->info.stage); |
| |
| return t; |
| } |