src/intel/compiler/intel_nir_opt_peephole_imul32x16.c - third_party/mesa - Git at Google

 /*
  * Copyright © 2022 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #include "intel_nir.h"
 #include "compiler/nir/nir_builder.h"

 /**
  * Implement a peephole pass to convert integer multiplications to imul32x16.
  */

 struct pass_data {
    struct hash_table *range_ht;
 };

 static void
 replace_imul_instr(nir_builder *b, nir_alu_instr *imul, unsigned small_val,
                    nir_op new_opcode)
 {
    assert(small_val == 0 || small_val == 1);

    b->cursor = nir_before_instr(&imul->instr);

    nir_alu_instr *imul_32x16 = nir_alu_instr_create(b->shader, new_opcode);

    nir_alu_src_copy(&imul_32x16->src[0], &imul->src[1 - small_val]);
    nir_alu_src_copy(&imul_32x16->src[1], &imul->src[small_val]);

    nir_def_init(&imul_32x16->instr, &imul_32x16->def,
                 imul->def.num_components, 32);

    nir_def_rewrite_uses(&imul->def,
                             &imul_32x16->def);

    nir_builder_instr_insert(b, &imul_32x16->instr);

    nir_instr_remove(&imul->instr);
    nir_instr_free(&imul->instr);
 }

 enum root_operation {
    non_unary = 0,
    integer_neg = 1 << 0,
    integer_abs = 1 << 1,
    integer_neg_abs = integer_neg | integer_abs,
    invalid_root = 255
 };

 static enum root_operation
 signed_integer_range_analysis(nir_shader *shader, struct hash_table *range_ht,
                               nir_scalar scalar, int *lo, int *hi)
 {
    if (nir_scalar_is_const(scalar)) {
       *lo = nir_scalar_as_int(scalar);
       *hi = *lo;
       return non_unary;
    }

    if (nir_scalar_is_alu(scalar)) {
       switch (nir_scalar_alu_op(scalar)) {
       case nir_op_iabs:
          signed_integer_range_analysis(shader, range_ht,
                                        nir_scalar_chase_alu_src(scalar, 0),
                                        lo, hi);

          if (*lo == INT32_MIN) {
             *hi = INT32_MAX;
          } else {
             const int32_t a = abs(*lo);
             const int32_t b = abs(*hi);

             *lo = MIN2(a, b);
             *hi = MAX2(a, b);
          }

          /* Absolute value wipes out any inner negations, and it is redundant
           * with any inner absolute values.
           */
          return integer_abs;

       case nir_op_ineg: {
          const enum root_operation root =
             signed_integer_range_analysis(shader, range_ht,
                                           nir_scalar_chase_alu_src(scalar, 0),
                                           lo, hi);

          if (*lo == INT32_MIN) {
             *hi = INT32_MAX;
          } else {
             const int32_t a = -(*lo);
             const int32_t b = -(*hi);

             *lo = MIN2(a, b);
             *hi = MAX2(a, b);
          }

          /* Negation of a negation cancels out, but negation of absolute value
           * must preserve the integer_abs bit.
           */
          return root ^ integer_neg;
       }

       case nir_op_imax: {
          int src0_lo, src0_hi;
          int src1_lo, src1_hi;

          signed_integer_range_analysis(shader, range_ht,
                                        nir_scalar_chase_alu_src(scalar, 0),
                                        &src0_lo, &src0_hi);
          signed_integer_range_analysis(shader, range_ht,
                                        nir_scalar_chase_alu_src(scalar, 1),
                                        &src1_lo, &src1_hi);

          *lo = MAX2(src0_lo, src1_lo);
          *hi = MAX2(src0_hi, src1_hi);

          return non_unary;
       }

       case nir_op_imin: {
          int src0_lo, src0_hi;
          int src1_lo, src1_hi;

          signed_integer_range_analysis(shader, range_ht,
                                        nir_scalar_chase_alu_src(scalar, 0),
                                        &src0_lo, &src0_hi);
          signed_integer_range_analysis(shader, range_ht,
                                        nir_scalar_chase_alu_src(scalar, 1),
                                        &src1_lo, &src1_hi);

          *lo = MIN2(src0_lo, src1_lo);
          *hi = MIN2(src0_hi, src1_hi);

          return non_unary;
       }

       default:
          break;
       }
    }

    /* Any value with the sign-bit set is problematic. Consider the case when
     * bound is 0x80000000. As an unsigned value, this means the value must be
     * in the range [0, 0x80000000]. As a signed value, it means the value must
     * be in the range [0, INT_MAX] or it must be INT_MIN.
     *
     * If bound is -2, it means the value is either in the range [INT_MIN, -2]
     * or it is in the range [0, INT_MAX].
     *
     * This function only returns a single, contiguous range. The union of the
     * two ranges for any value of bound with the sign-bit set is [INT_MIN,
     * INT_MAX].
     */
    const int32_t bound = nir_unsigned_upper_bound(shader, range_ht,
                                                      scalar, NULL);
    if (bound < 0) {
       *lo = INT32_MIN;
       *hi = INT32_MAX;
    } else {
       *lo = 0;
       *hi = bound;
    }

    return non_unary;
 }

 static bool
 intel_nir_opt_peephole_imul32x16_instr(nir_builder *b,
                                        nir_instr *instr,
                                        void *cb_data)
 {
    struct pass_data *d = (struct pass_data *) cb_data;
    struct hash_table *range_ht = d->range_ht;

    if (instr->type != nir_instr_type_alu)
       return false;

    nir_alu_instr *imul = nir_instr_as_alu(instr);
    if (imul->op != nir_op_imul)
       return false;

    if (imul->def.bit_size != 32)
       return false;

    nir_op new_opcode = nir_num_opcodes;

    unsigned i;
    for (i = 0; i < 2; i++) {
       if (!nir_src_is_const(imul->src[i].src))
          continue;

       int64_t lo = INT64_MAX;
       int64_t hi = INT64_MIN;

       for (unsigned comp = 0; comp < imul->def.num_components; comp++) {
          int64_t v = nir_src_comp_as_int(imul->src[i].src, comp);

          if (v < lo)
             lo = v;

          if (v > hi)
             hi = v;
       }

       if (lo >= INT16_MIN && hi <= INT16_MAX) {
          new_opcode = nir_op_imul_32x16;
          break;
       } else if (lo >= 0 && hi <= UINT16_MAX) {
          new_opcode = nir_op_umul_32x16;
          break;
       }
    }

    if (new_opcode != nir_num_opcodes) {
       replace_imul_instr(b, imul, i, new_opcode);
       return true;
    }

    if (imul->def.num_components > 1)
       return false;

    const nir_scalar imul_scalar = { &imul->def, 0 };
    int idx = -1;
    enum root_operation prev_root = invalid_root;

    for (i = 0; i < 2; i++) {
       /* All constants were previously processed.  There is nothing more to
        * learn from a constant here.
        */
       if (imul->src[i].src.ssa->parent_instr->type == nir_instr_type_load_const)
          continue;

       nir_scalar scalar = nir_scalar_chase_alu_src(imul_scalar, i);
       int lo = INT32_MIN;
       int hi = INT32_MAX;

       const enum root_operation root =
          signed_integer_range_analysis(b->shader, range_ht, scalar, &lo, &hi);

       /* Copy propagation (in the backend) has trouble handling cases like
        *
        *    mov(8)          g60<1>D         -g59<8,8,1>D
        *    mul(8)          g61<1>D         g63<8,8,1>D     g60<16,8,2>W
        *
        * If g59 had absolute value instead of negation, even improved copy
        * propagation would not be able to make progress.
        *
        * In cases where both sources to the integer multiplication can fit in
        * 16-bits, choose the source that does not have a source modifier.
        */
       if (root < prev_root) {
          if (lo >= INT16_MIN && hi <= INT16_MAX) {
             new_opcode = nir_op_imul_32x16;
             idx = i;
             prev_root = root;

             if (root == non_unary)
                break;
          } else if (lo >= 0 && hi <= UINT16_MAX) {
             new_opcode = nir_op_umul_32x16;
             idx = i;
             prev_root = root;

             if (root == non_unary)
                break;
          }
       }
    }

    if (new_opcode == nir_num_opcodes) {
       assert(idx == -1);
       assert(prev_root == invalid_root);
       return false;
    }

    assert(idx != -1);
    assert(prev_root != invalid_root);

    replace_imul_instr(b, imul, idx, new_opcode);
    return true;
 }

 bool
 intel_nir_opt_peephole_imul32x16(nir_shader *shader)
 {
    struct pass_data cb_data;

    cb_data.range_ht = _mesa_pointer_hash_table_create(NULL);

    bool progress = nir_shader_instructions_pass(shader,
                                                 intel_nir_opt_peephole_imul32x16_instr,
                                                 nir_metadata_control_flow,
                                                 &cb_data);

    _mesa_hash_table_destroy(cb_data.range_ht, NULL);

    return progress;
 }
	/*
	* Copyright © 2022 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	#include "intel_nir.h"
	#include "compiler/nir/nir_builder.h"

	/**
	* Implement a peephole pass to convert integer multiplications to imul32x16.
	*/

	struct pass_data {
	struct hash_table *range_ht;
	};

	static void
	replace_imul_instr(nir_builder b, nir_alu_instr imul, unsigned small_val,
	nir_op new_opcode)
	{
	assert(small_val == 0 \|\| small_val == 1);

	b->cursor = nir_before_instr(&imul->instr);

	nir_alu_instr *imul_32x16 = nir_alu_instr_create(b->shader, new_opcode);

	nir_alu_src_copy(&imul_32x16->src[0], &imul->src[1 - small_val]);
	nir_alu_src_copy(&imul_32x16->src[1], &imul->src[small_val]);

	nir_def_init(&imul_32x16->instr, &imul_32x16->def,
	imul->def.num_components, 32);

	nir_def_rewrite_uses(&imul->def,
	&imul_32x16->def);

	nir_builder_instr_insert(b, &imul_32x16->instr);

	nir_instr_remove(&imul->instr);
	nir_instr_free(&imul->instr);
	}

	enum root_operation {
	non_unary = 0,
	integer_neg = 1 << 0,
	integer_abs = 1 << 1,
	integer_neg_abs = integer_neg \| integer_abs,
	invalid_root = 255
	};

	static enum root_operation
	signed_integer_range_analysis(nir_shader shader, struct hash_table range_ht,
	nir_scalar scalar, int lo, int hi)
	{
	if (nir_scalar_is_const(scalar)) {
	*lo = nir_scalar_as_int(scalar);
	hi = lo;
	return non_unary;
	}

	if (nir_scalar_is_alu(scalar)) {
	switch (nir_scalar_alu_op(scalar)) {
	case nir_op_iabs:
	signed_integer_range_analysis(shader, range_ht,
	nir_scalar_chase_alu_src(scalar, 0),
	lo, hi);

	if (*lo == INT32_MIN) {
	*hi = INT32_MAX;
	} else {
	const int32_t a = abs(*lo);
	const int32_t b = abs(*hi);

	*lo = MIN2(a, b);
	*hi = MAX2(a, b);
	}

	/* Absolute value wipes out any inner negations, and it is redundant
	* with any inner absolute values.
	*/
	return integer_abs;

	case nir_op_ineg: {
	const enum root_operation root =
	signed_integer_range_analysis(shader, range_ht,
	nir_scalar_chase_alu_src(scalar, 0),
	lo, hi);

	if (*lo == INT32_MIN) {
	*hi = INT32_MAX;
	} else {
	const int32_t a = -(*lo);
	const int32_t b = -(*hi);

	*lo = MIN2(a, b);
	*hi = MAX2(a, b);
	}

	/* Negation of a negation cancels out, but negation of absolute value
	* must preserve the integer_abs bit.
	*/
	return root ^ integer_neg;
	}

	case nir_op_imax: {
	int src0_lo, src0_hi;
	int src1_lo, src1_hi;

	signed_integer_range_analysis(shader, range_ht,
	nir_scalar_chase_alu_src(scalar, 0),
	&src0_lo, &src0_hi);
	signed_integer_range_analysis(shader, range_ht,
	nir_scalar_chase_alu_src(scalar, 1),
	&src1_lo, &src1_hi);

	*lo = MAX2(src0_lo, src1_lo);
	*hi = MAX2(src0_hi, src1_hi);

	return non_unary;
	}

	case nir_op_imin: {
	int src0_lo, src0_hi;
	int src1_lo, src1_hi;

	signed_integer_range_analysis(shader, range_ht,
	nir_scalar_chase_alu_src(scalar, 0),
	&src0_lo, &src0_hi);
	signed_integer_range_analysis(shader, range_ht,
	nir_scalar_chase_alu_src(scalar, 1),
	&src1_lo, &src1_hi);

	*lo = MIN2(src0_lo, src1_lo);
	*hi = MIN2(src0_hi, src1_hi);

	return non_unary;
	}

	default:
	break;
	}
	}

	/* Any value with the sign-bit set is problematic. Consider the case when
	* bound is 0x80000000. As an unsigned value, this means the value must be
	* in the range [0, 0x80000000]. As a signed value, it means the value must
	* be in the range [0, INT_MAX] or it must be INT_MIN.
	*
	* If bound is -2, it means the value is either in the range [INT_MIN, -2]
	* or it is in the range [0, INT_MAX].
	*
	* This function only returns a single, contiguous range. The union of the
	* two ranges for any value of bound with the sign-bit set is [INT_MIN,
	* INT_MAX].
	*/
	const int32_t bound = nir_unsigned_upper_bound(shader, range_ht,
	scalar, NULL);
	if (bound < 0) {
	*lo = INT32_MIN;
	*hi = INT32_MAX;
	} else {
	*lo = 0;
	*hi = bound;
	}

	return non_unary;
	}

	static bool
	intel_nir_opt_peephole_imul32x16_instr(nir_builder *b,
	nir_instr *instr,
	void *cb_data)
	{
	struct pass_data d = (struct pass_data ) cb_data;
	struct hash_table *range_ht = d->range_ht;

	if (instr->type != nir_instr_type_alu)
	return false;

	nir_alu_instr *imul = nir_instr_as_alu(instr);
	if (imul->op != nir_op_imul)
	return false;

	if (imul->def.bit_size != 32)
	return false;

	nir_op new_opcode = nir_num_opcodes;

	unsigned i;
	for (i = 0; i < 2; i++) {
	if (!nir_src_is_const(imul->src[i].src))
	continue;

	int64_t lo = INT64_MAX;
	int64_t hi = INT64_MIN;

	for (unsigned comp = 0; comp < imul->def.num_components; comp++) {
	int64_t v = nir_src_comp_as_int(imul->src[i].src, comp);

	if (v < lo)
	lo = v;

	if (v > hi)
	hi = v;
	}

	if (lo >= INT16_MIN && hi <= INT16_MAX) {
	new_opcode = nir_op_imul_32x16;
	break;
	} else if (lo >= 0 && hi <= UINT16_MAX) {
	new_opcode = nir_op_umul_32x16;
	break;
	}
	}

	if (new_opcode != nir_num_opcodes) {
	replace_imul_instr(b, imul, i, new_opcode);
	return true;
	}

	if (imul->def.num_components > 1)
	return false;

	const nir_scalar imul_scalar = { &imul->def, 0 };
	int idx = -1;
	enum root_operation prev_root = invalid_root;

	for (i = 0; i < 2; i++) {
	/* All constants were previously processed. There is nothing more to
	* learn from a constant here.
	*/
	if (imul->src[i].src.ssa->parent_instr->type == nir_instr_type_load_const)
	continue;

	nir_scalar scalar = nir_scalar_chase_alu_src(imul_scalar, i);
	int lo = INT32_MIN;
	int hi = INT32_MAX;

	const enum root_operation root =
	signed_integer_range_analysis(b->shader, range_ht, scalar, &lo, &hi);

	/* Copy propagation (in the backend) has trouble handling cases like
	*
	* mov(8) g60<1>D -g59<8,8,1>D
	* mul(8) g61<1>D g63<8,8,1>D g60<16,8,2>W
	*
	* If g59 had absolute value instead of negation, even improved copy
	* propagation would not be able to make progress.
	*
	* In cases where both sources to the integer multiplication can fit in
	* 16-bits, choose the source that does not have a source modifier.
	*/
	if (root < prev_root) {
	if (lo >= INT16_MIN && hi <= INT16_MAX) {
	new_opcode = nir_op_imul_32x16;
	idx = i;
	prev_root = root;

	if (root == non_unary)
	break;
	} else if (lo >= 0 && hi <= UINT16_MAX) {
	new_opcode = nir_op_umul_32x16;
	idx = i;
	prev_root = root;

	if (root == non_unary)
	break;
	}
	}
	}

	if (new_opcode == nir_num_opcodes) {
	assert(idx == -1);
	assert(prev_root == invalid_root);
	return false;
	}

	assert(idx != -1);
	assert(prev_root != invalid_root);

	replace_imul_instr(b, imul, idx, new_opcode);
	return true;
	}

	bool
	intel_nir_opt_peephole_imul32x16(nir_shader *shader)
	{
	struct pass_data cb_data;

	cb_data.range_ht = _mesa_pointer_hash_table_create(NULL);

	bool progress = nir_shader_instructions_pass(shader,
	intel_nir_opt_peephole_imul32x16_instr,
	nir_metadata_control_flow,
	&cb_data);

	_mesa_hash_table_destroy(cb_data.range_ht, NULL);

	return progress;
	}