| /* |
| * Copyright © 2021 Google, Inc. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "nir.h" |
| #include "nir_builder.h" |
| |
| /* |
| * This pass tries to reduce the bitsize of phi instructions by either |
| * moving narrowing conversions from the phi's consumers to the phi's |
| * sources, if all the uses of the phi are equivalent narrowing |
| * instructions. In other words, convert: |
| * |
| * vec1 32 ssa_124 = load_const (0x00000000) |
| * ... |
| * loop { |
| * ... |
| * vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53 |
| * vec1 16 ssa_8 = i2imp ssa_155 |
| * ... |
| * vec1 32 ssa_53 = i2i32 ssa_52 |
| * } |
| * |
| * into: |
| * |
| * vec1 32 ssa_124 = load_const (0x00000000) |
| * vec1 16 ssa_156 = i2imp ssa_124 |
| * ... |
| * loop { |
| * ... |
| * vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157 |
| * ... |
| * vec1 32 ssa_53 = i2i32 ssa_52 |
| * vec1 16 ssa_157 = i2i16 ssa_53 |
| * } |
| * |
| * Or failing that, tries to push widening conversion of phi srcs to |
| * the phi def. In this case, since load_const is frequently one |
| * of the phi sources this pass checks if can be narrowed without a |
| * loss of precision: |
| * |
| * vec1 32 ssa_0 = load_const (0x00000000) |
| * ... |
| * loop { |
| * ... |
| * vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19 |
| * ... |
| * vec1 16 ssa_18 = iadd ssa_21, ssa_3 |
| * vec1 32 ssa_19 = i2i32 ssa_18 |
| * } |
| * |
| * into: |
| * |
| * vec1 32 ssa_0 = load_const (0x00000000) |
| * vec1 16 ssa_22 = i2i16 ssa_0 |
| * ... |
| * loop { |
| * ... |
| * vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18 |
| * vec1 32 ssa_23 = i2i32 ssa_8 |
| * ... |
| * vec1 16 ssa_18 = iadd ssa_21, ssa_3 |
| * } |
| * |
| * Note that either transformations can convert x2ymp into x2y16, which |
| * is normally done later in nir_opt_algebraic_late(), losing the option |
| * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts |
| * cannot see through phis. |
| */ |
| |
| #define INVALID_OP nir_num_opcodes |
| |
| /** |
| * Get the corresponding exact conversion for a x2ymp conversion |
| */ |
| static nir_op |
| concrete_conversion(nir_op op) |
| { |
| switch (op) { |
| case nir_op_i2imp: return nir_op_i2i16; |
| case nir_op_i2fmp: return nir_op_i2f16; |
| case nir_op_u2fmp: return nir_op_u2f16; |
| case nir_op_f2fmp: return nir_op_f2f16; |
| case nir_op_f2imp: return nir_op_f2i16; |
| case nir_op_f2ump: return nir_op_f2u16; |
| default: return op; |
| } |
| } |
| |
| static nir_op |
| narrowing_conversion_op(nir_instr *instr, nir_op current_op) |
| { |
| if (instr->type != nir_instr_type_alu) |
| return INVALID_OP; |
| |
| nir_op op = nir_instr_as_alu(instr)->op; |
| switch (op) { |
| case nir_op_i2imp: |
| case nir_op_i2i16: |
| case nir_op_i2fmp: |
| case nir_op_i2f16: |
| case nir_op_u2fmp: |
| case nir_op_u2f16: |
| case nir_op_f2fmp: |
| case nir_op_f2f16: |
| case nir_op_f2imp: |
| case nir_op_f2i16: |
| case nir_op_f2ump: |
| case nir_op_f2u16: |
| case nir_op_f2f16_rtne: |
| case nir_op_f2f16_rtz: |
| break; |
| default: |
| return INVALID_OP; |
| } |
| |
| /* If we've already picked a conversion op from a previous phi use, |
| * make sure it is compatible with the current use |
| */ |
| if (current_op != INVALID_OP) { |
| if (current_op != op) { |
| /* If we have different conversions, but one can be converted |
| * to the other, then let's do that: |
| */ |
| if (concrete_conversion(current_op) == concrete_conversion(op)) { |
| op = concrete_conversion(op); |
| } else { |
| return INVALID_OP; |
| } |
| } |
| } |
| |
| return op; |
| } |
| |
| static nir_op |
| widening_conversion_op(nir_instr *instr, unsigned *bit_size) |
| { |
| if (instr->type != nir_instr_type_alu) |
| return INVALID_OP; |
| |
| nir_alu_instr *alu = nir_instr_as_alu(instr); |
| switch (alu->op) { |
| case nir_op_i2i32: |
| case nir_op_i2f32: |
| case nir_op_u2f32: |
| case nir_op_f2f32: |
| case nir_op_f2i32: |
| case nir_op_f2u32: |
| break; |
| default: |
| return INVALID_OP; |
| } |
| |
| *bit_size = nir_src_bit_size(alu->src[0].src); |
| |
| /* We also need to check that the conversion's dest was actually |
| * wider: |
| */ |
| if (nir_dest_bit_size(alu->dest.dest) <= *bit_size) |
| return INVALID_OP; |
| |
| return alu->op; |
| } |
| |
| static nir_alu_type |
| op_to_type(nir_op op) |
| { |
| return nir_alu_type_get_base_type(nir_op_infos[op].output_type); |
| } |
| |
| /* Try to move narrowing instructions consuming the phi into the phi's |
| * sources to reduce the phi's precision: |
| */ |
| static bool |
| try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi) |
| { |
| nir_op op = INVALID_OP; |
| |
| assert(phi->dest.is_ssa); |
| |
| /* If the phi has already been narrowed, nothing more to do: */ |
| if (phi->dest.ssa.bit_size != 32) |
| return false; |
| |
| /* Are the only uses of the phi conversion instructions, and |
| * are they all the same conversion? |
| */ |
| nir_foreach_use (use, &phi->dest.ssa) { |
| op = narrowing_conversion_op(use->parent_instr, op); |
| |
| /* Not a (compatible) narrowing conversion: */ |
| if (op == INVALID_OP) |
| return false; |
| } |
| |
| /* an if_uses means the phi is used directly in a conditional, ie. |
| * without a conversion |
| */ |
| if (!list_is_empty(&phi->dest.ssa.if_uses)) |
| return false; |
| |
| /* If the phi has no uses, then nothing to do: */ |
| if (op == INVALID_OP) |
| return false; |
| |
| /* construct replacement phi instruction: */ |
| nir_phi_instr *new_phi = nir_phi_instr_create(b->shader); |
| nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, |
| phi->dest.ssa.num_components, |
| nir_alu_type_get_type_size(nir_op_infos[op].output_type), |
| NULL); |
| |
| /* Push the conversion into the new phi sources: */ |
| nir_foreach_phi_src (src, phi) { |
| assert(src->src.is_ssa); |
| |
| /* insert new conversion instr in block of original phi src: */ |
| b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr); |
| nir_ssa_def *old_src = src->src.ssa; |
| nir_ssa_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL); |
| |
| /* and add corresponding phi_src to the new_phi: */ |
| nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src)); |
| } |
| |
| /* And finally rewrite the original uses of the original phi uses to |
| * directly use the new phi, skipping the conversion out of the orig |
| * phi |
| */ |
| nir_foreach_use (use, &phi->dest.ssa) { |
| /* We've previously established that all the uses were alu |
| * conversion ops. Turn them into movs instead. |
| */ |
| nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr); |
| alu->op = nir_op_mov; |
| } |
| nir_ssa_def_rewrite_uses(&phi->dest.ssa, &new_phi->dest.ssa); |
| |
| /* And finally insert the new phi after all sources are in place: */ |
| b->cursor = nir_after_instr(&phi->instr); |
| nir_builder_instr_insert(b, &new_phi->instr); |
| |
| return true; |
| } |
| |
| static bool |
| can_convert_load_const(nir_load_const_instr *lc, nir_op op) |
| { |
| nir_alu_type type = op_to_type(op); |
| |
| /* Note that we only handle phi's with bit_size == 32: */ |
| assert(lc->def.bit_size == 32); |
| |
| for (unsigned i = 0; i < lc->def.num_components; i++) { |
| switch (type) { |
| case nir_type_int: |
| if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32) |
| return false; |
| break; |
| case nir_type_uint: |
| if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32) |
| return false; |
| break; |
| case nir_type_float: |
| if (lc->value[i].f32 != _mesa_half_to_float( |
| _mesa_float_to_half(lc->value[i].f32))) |
| return false; |
| break; |
| default: |
| unreachable("bad type"); |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| /* Check all the phi sources to see if they are the same widening op, in |
| * which case we can push the widening op to the other side of the phi |
| */ |
| static nir_op |
| find_widening_op(nir_phi_instr *phi, unsigned *bit_size) |
| { |
| nir_op op = INVALID_OP; |
| |
| bool has_load_const = false; |
| *bit_size = 0; |
| |
| nir_foreach_phi_src (src, phi) { |
| assert(src->src.is_ssa); |
| |
| nir_instr *instr = src->src.ssa->parent_instr; |
| if (instr->type == nir_instr_type_load_const) { |
| has_load_const = true; |
| continue; |
| } |
| |
| unsigned src_bit_size; |
| nir_op src_op = widening_conversion_op(instr, &src_bit_size); |
| |
| /* Not a widening conversion: */ |
| if (src_op == INVALID_OP) |
| return INVALID_OP; |
| |
| /* If it is a widening conversion, it needs to be the same op as |
| * other phi sources: |
| */ |
| if ((op != INVALID_OP) && (op != src_op)) |
| return INVALID_OP; |
| |
| if (*bit_size && (*bit_size != src_bit_size)) |
| return INVALID_OP; |
| |
| op = src_op; |
| *bit_size = src_bit_size; |
| } |
| |
| if ((op == INVALID_OP) || !has_load_const) |
| return op; |
| |
| /* If we could otherwise move widening sources, but load_const is |
| * one of the phi sources (and does not have a widening conversion, |
| * but could have a narrowing->widening sequence inserted without |
| * loss of precision), then we could insert a narrowing->widening |
| * sequence to make the rest of the transformation possible: |
| */ |
| nir_foreach_phi_src (src, phi) { |
| assert(src->src.is_ssa); |
| |
| nir_instr *instr = src->src.ssa->parent_instr; |
| if (instr->type != nir_instr_type_load_const) |
| continue; |
| |
| if (!can_convert_load_const(nir_instr_as_load_const(instr), op)) |
| return INVALID_OP; |
| } |
| |
| return op; |
| } |
| |
| /* Try to move widening conversions into the phi to the phi's output |
| * to reduce the phi's precision: |
| */ |
| static bool |
| try_move_widening_src(nir_builder *b, nir_phi_instr *phi) |
| { |
| assert(phi->dest.is_ssa); |
| |
| /* If the phi has already been narrowed, nothing more to do: */ |
| if (phi->dest.ssa.bit_size != 32) |
| return false; |
| |
| unsigned bit_size; |
| nir_op op = find_widening_op(phi, &bit_size); |
| |
| if (op == INVALID_OP) |
| return false; |
| |
| /* construct replacement phi instruction: */ |
| nir_phi_instr *new_phi = nir_phi_instr_create(b->shader); |
| nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, |
| phi->dest.ssa.num_components, |
| bit_size, NULL); |
| |
| /* Remove the widening conversions from the phi sources: */ |
| nir_foreach_phi_src (src, phi) { |
| assert(src->src.is_ssa); |
| |
| nir_instr *instr = src->src.ssa->parent_instr; |
| nir_ssa_def *new_src; |
| |
| b->cursor = nir_after_instr(instr); |
| |
| if (instr->type == nir_instr_type_load_const) { |
| /* if the src is a load_const, we've already verified that it |
| * is safe to insert a narrowing conversion to make the rest |
| * of this transformation legal: |
| */ |
| nir_load_const_instr *lc = nir_instr_as_load_const(instr); |
| |
| if (op_to_type(op) == nir_type_float) { |
| new_src = nir_f2f16(b, &lc->def); |
| } else { |
| new_src = nir_i2i16(b, &lc->def); |
| } |
| } else { |
| /* at this point we know the sources source is a conversion: */ |
| nir_alu_instr *alu = nir_instr_as_alu(instr); |
| |
| /* The conversion we are stripping off could have had a swizzle, |
| * so replace it with a mov if necessary: |
| */ |
| unsigned num_comp = nir_dest_num_components(alu->dest.dest); |
| new_src = nir_mov_alu(b, alu->src[0], num_comp); |
| } |
| |
| /* add corresponding phi_src to the new_phi: */ |
| nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src)); |
| } |
| |
| /* And insert the new phi after all sources are in place: */ |
| b->cursor = nir_after_instr(&phi->instr); |
| nir_builder_instr_insert(b, &new_phi->instr); |
| |
| /* And finally add back the widening conversion after the phi, |
| * and re-write the original phi's uses |
| */ |
| b->cursor = nir_after_instr_and_phis(&new_phi->instr); |
| nir_ssa_def *def = nir_build_alu(b, op, &new_phi->dest.ssa, NULL, NULL, NULL); |
| |
| nir_ssa_def_rewrite_uses(&phi->dest.ssa, def); |
| |
| return true; |
| } |
| |
| static bool |
| lower_phi(nir_builder *b, nir_phi_instr *phi) |
| { |
| bool progress = try_move_narrowing_dst(b, phi); |
| if (!progress) |
| progress = try_move_widening_src(b, phi); |
| return progress; |
| } |
| |
| bool |
| nir_opt_phi_precision(nir_shader *shader) |
| { |
| bool progress = false; |
| |
| /* If 8b or 16b bit_sizes are not used, no point to run this pass: */ |
| unsigned bit_sizes_used = shader->info.bit_sizes_float | |
| shader->info.bit_sizes_int; |
| |
| if (!bit_sizes_used) { |
| nir_shader_gather_info(shader, nir_shader_get_entrypoint(shader)); |
| bit_sizes_used = shader->info.bit_sizes_float | |
| shader->info.bit_sizes_int; |
| } |
| |
| if (!(bit_sizes_used & (8 | 16))) |
| return false; |
| |
| nir_foreach_function(function, shader) { |
| if (!function->impl) |
| continue; |
| |
| nir_builder b; |
| nir_builder_init(&b, function->impl); |
| |
| nir_foreach_block (block, function->impl) { |
| nir_foreach_instr_safe (instr, block) { |
| if (instr->type != nir_instr_type_phi) |
| break; |
| |
| progress |= lower_phi(&b, nir_instr_as_phi(instr)); |
| } |
| } |
| |
| if (progress) { |
| nir_metadata_preserve(function->impl, |
| nir_metadata_block_index | |
| nir_metadata_dominance); |
| } else { |
| nir_metadata_preserve(function->impl, nir_metadata_all); |
| } |
| } |
| |
| return progress; |
| } |
| |