| /* |
| * Copyright © 2022 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "intel_nir.h" |
| #include "compiler/nir/nir_builder.h" |
| |
| /** |
| * Implement a peephole pass to convert integer multiplications to imul32x16. |
| */ |
| |
| struct pass_data { |
| struct hash_table *range_ht; |
| }; |
| |
| static void |
| replace_imul_instr(nir_builder *b, nir_alu_instr *imul, unsigned small_val, |
| nir_op new_opcode) |
| { |
| assert(small_val == 0 || small_val == 1); |
| |
| b->cursor = nir_before_instr(&imul->instr); |
| |
| nir_alu_instr *imul_32x16 = nir_alu_instr_create(b->shader, new_opcode); |
| |
| nir_alu_src_copy(&imul_32x16->src[0], &imul->src[1 - small_val]); |
| nir_alu_src_copy(&imul_32x16->src[1], &imul->src[small_val]); |
| |
| nir_def_init(&imul_32x16->instr, &imul_32x16->def, |
| imul->def.num_components, 32); |
| |
| nir_def_rewrite_uses(&imul->def, |
| &imul_32x16->def); |
| |
| nir_builder_instr_insert(b, &imul_32x16->instr); |
| |
| nir_instr_remove(&imul->instr); |
| nir_instr_free(&imul->instr); |
| } |
| |
| enum root_operation { |
| non_unary = 0, |
| integer_neg = 1 << 0, |
| integer_abs = 1 << 1, |
| integer_neg_abs = integer_neg | integer_abs, |
| invalid_root = 255 |
| }; |
| |
| static enum root_operation |
| signed_integer_range_analysis(nir_shader *shader, struct hash_table *range_ht, |
| nir_scalar scalar, int *lo, int *hi) |
| { |
| if (nir_scalar_is_const(scalar)) { |
| *lo = nir_scalar_as_int(scalar); |
| *hi = *lo; |
| return non_unary; |
| } |
| |
| if (nir_scalar_is_alu(scalar)) { |
| switch (nir_scalar_alu_op(scalar)) { |
| case nir_op_iabs: |
| signed_integer_range_analysis(shader, range_ht, |
| nir_scalar_chase_alu_src(scalar, 0), |
| lo, hi); |
| |
| if (*lo == INT32_MIN) { |
| *hi = INT32_MAX; |
| } else { |
| const int32_t a = abs(*lo); |
| const int32_t b = abs(*hi); |
| |
| *lo = MIN2(a, b); |
| *hi = MAX2(a, b); |
| } |
| |
| /* Absolute value wipes out any inner negations, and it is redundant |
| * with any inner absolute values. |
| */ |
| return integer_abs; |
| |
| case nir_op_ineg: { |
| const enum root_operation root = |
| signed_integer_range_analysis(shader, range_ht, |
| nir_scalar_chase_alu_src(scalar, 0), |
| lo, hi); |
| |
| if (*lo == INT32_MIN) { |
| *hi = INT32_MAX; |
| } else { |
| const int32_t a = -(*lo); |
| const int32_t b = -(*hi); |
| |
| *lo = MIN2(a, b); |
| *hi = MAX2(a, b); |
| } |
| |
| /* Negation of a negation cancels out, but negation of absolute value |
| * must preserve the integer_abs bit. |
| */ |
| return root ^ integer_neg; |
| } |
| |
| case nir_op_imax: { |
| int src0_lo, src0_hi; |
| int src1_lo, src1_hi; |
| |
| signed_integer_range_analysis(shader, range_ht, |
| nir_scalar_chase_alu_src(scalar, 0), |
| &src0_lo, &src0_hi); |
| signed_integer_range_analysis(shader, range_ht, |
| nir_scalar_chase_alu_src(scalar, 1), |
| &src1_lo, &src1_hi); |
| |
| *lo = MAX2(src0_lo, src1_lo); |
| *hi = MAX2(src0_hi, src1_hi); |
| |
| return non_unary; |
| } |
| |
| case nir_op_imin: { |
| int src0_lo, src0_hi; |
| int src1_lo, src1_hi; |
| |
| signed_integer_range_analysis(shader, range_ht, |
| nir_scalar_chase_alu_src(scalar, 0), |
| &src0_lo, &src0_hi); |
| signed_integer_range_analysis(shader, range_ht, |
| nir_scalar_chase_alu_src(scalar, 1), |
| &src1_lo, &src1_hi); |
| |
| *lo = MIN2(src0_lo, src1_lo); |
| *hi = MIN2(src0_hi, src1_hi); |
| |
| return non_unary; |
| } |
| |
| default: |
| break; |
| } |
| } |
| |
| /* Any value with the sign-bit set is problematic. Consider the case when |
| * bound is 0x80000000. As an unsigned value, this means the value must be |
| * in the range [0, 0x80000000]. As a signed value, it means the value must |
| * be in the range [0, INT_MAX] or it must be INT_MIN. |
| * |
| * If bound is -2, it means the value is either in the range [INT_MIN, -2] |
| * or it is in the range [0, INT_MAX]. |
| * |
| * This function only returns a single, contiguous range. The union of the |
| * two ranges for any value of bound with the sign-bit set is [INT_MIN, |
| * INT_MAX]. |
| */ |
| const int32_t bound = nir_unsigned_upper_bound(shader, range_ht, |
| scalar, NULL); |
| if (bound < 0) { |
| *lo = INT32_MIN; |
| *hi = INT32_MAX; |
| } else { |
| *lo = 0; |
| *hi = bound; |
| } |
| |
| return non_unary; |
| } |
| |
| static bool |
| intel_nir_opt_peephole_imul32x16_instr(nir_builder *b, |
| nir_instr *instr, |
| void *cb_data) |
| { |
| struct pass_data *d = (struct pass_data *) cb_data; |
| struct hash_table *range_ht = d->range_ht; |
| |
| if (instr->type != nir_instr_type_alu) |
| return false; |
| |
| nir_alu_instr *imul = nir_instr_as_alu(instr); |
| if (imul->op != nir_op_imul) |
| return false; |
| |
| if (imul->def.bit_size != 32) |
| return false; |
| |
| nir_op new_opcode = nir_num_opcodes; |
| |
| unsigned i; |
| for (i = 0; i < 2; i++) { |
| if (!nir_src_is_const(imul->src[i].src)) |
| continue; |
| |
| int64_t lo = INT64_MAX; |
| int64_t hi = INT64_MIN; |
| |
| for (unsigned comp = 0; comp < imul->def.num_components; comp++) { |
| int64_t v = nir_src_comp_as_int(imul->src[i].src, comp); |
| |
| if (v < lo) |
| lo = v; |
| |
| if (v > hi) |
| hi = v; |
| } |
| |
| if (lo >= INT16_MIN && hi <= INT16_MAX) { |
| new_opcode = nir_op_imul_32x16; |
| break; |
| } else if (lo >= 0 && hi <= UINT16_MAX) { |
| new_opcode = nir_op_umul_32x16; |
| break; |
| } |
| } |
| |
| if (new_opcode != nir_num_opcodes) { |
| replace_imul_instr(b, imul, i, new_opcode); |
| return true; |
| } |
| |
| if (imul->def.num_components > 1) |
| return false; |
| |
| const nir_scalar imul_scalar = { &imul->def, 0 }; |
| int idx = -1; |
| enum root_operation prev_root = invalid_root; |
| |
| for (i = 0; i < 2; i++) { |
| /* All constants were previously processed. There is nothing more to |
| * learn from a constant here. |
| */ |
| if (imul->src[i].src.ssa->parent_instr->type == nir_instr_type_load_const) |
| continue; |
| |
| nir_scalar scalar = nir_scalar_chase_alu_src(imul_scalar, i); |
| int lo = INT32_MIN; |
| int hi = INT32_MAX; |
| |
| const enum root_operation root = |
| signed_integer_range_analysis(b->shader, range_ht, scalar, &lo, &hi); |
| |
| /* Copy propagation (in the backend) has trouble handling cases like |
| * |
| * mov(8) g60<1>D -g59<8,8,1>D |
| * mul(8) g61<1>D g63<8,8,1>D g60<16,8,2>W |
| * |
| * If g59 had absolute value instead of negation, even improved copy |
| * propagation would not be able to make progress. |
| * |
| * In cases where both sources to the integer multiplication can fit in |
| * 16-bits, choose the source that does not have a source modifier. |
| */ |
| if (root < prev_root) { |
| if (lo >= INT16_MIN && hi <= INT16_MAX) { |
| new_opcode = nir_op_imul_32x16; |
| idx = i; |
| prev_root = root; |
| |
| if (root == non_unary) |
| break; |
| } else if (lo >= 0 && hi <= UINT16_MAX) { |
| new_opcode = nir_op_umul_32x16; |
| idx = i; |
| prev_root = root; |
| |
| if (root == non_unary) |
| break; |
| } |
| } |
| } |
| |
| if (new_opcode == nir_num_opcodes) { |
| assert(idx == -1); |
| assert(prev_root == invalid_root); |
| return false; |
| } |
| |
| assert(idx != -1); |
| assert(prev_root != invalid_root); |
| |
| replace_imul_instr(b, imul, idx, new_opcode); |
| return true; |
| } |
| |
| bool |
| intel_nir_opt_peephole_imul32x16(nir_shader *shader) |
| { |
| struct pass_data cb_data; |
| |
| cb_data.range_ht = _mesa_pointer_hash_table_create(NULL); |
| |
| bool progress = nir_shader_instructions_pass(shader, |
| intel_nir_opt_peephole_imul32x16_instr, |
| nir_metadata_control_flow, |
| &cb_data); |
| |
| _mesa_hash_table_destroy(cb_data.range_ht, NULL); |
| |
| return progress; |
| } |
| |