| /* |
| * Copyright © 2010 Intel Corporation |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #include "brw_shader.h" |
| #include "brw_builder.h" |
| #include "util/half_float.h" |
| |
| static uint64_t |
| src_as_uint(const brw_reg &src) |
| { |
| assert(src.file == IMM); |
| |
| switch (src.type) { |
| case BRW_TYPE_W: |
| return (uint64_t)(int16_t)(src.ud & 0xffff); |
| |
| case BRW_TYPE_UW: |
| return (uint64_t)(uint16_t)(src.ud & 0xffff); |
| |
| case BRW_TYPE_D: |
| return (uint64_t)src.d; |
| |
| case BRW_TYPE_UD: |
| return (uint64_t)src.ud; |
| |
| case BRW_TYPE_Q: |
| return src.d64; |
| |
| case BRW_TYPE_UQ: |
| return src.u64; |
| |
| default: |
| unreachable("Invalid integer type."); |
| } |
| } |
| |
| static double |
| src_as_float(const brw_reg &src) |
| { |
| assert(src.file == IMM); |
| |
| switch (src.type) { |
| case BRW_TYPE_HF: |
| return _mesa_half_to_float((uint16_t)src.d); |
| |
| case BRW_TYPE_F: |
| return src.f; |
| |
| case BRW_TYPE_DF: |
| return src.df; |
| |
| default: |
| unreachable("Invalid float type."); |
| } |
| } |
| |
| static brw_reg |
| brw_imm_for_type(uint64_t value, enum brw_reg_type type) |
| { |
| switch (type) { |
| case BRW_TYPE_W: |
| return brw_imm_w(value); |
| |
| case BRW_TYPE_UW: |
| return brw_imm_uw(value); |
| |
| case BRW_TYPE_D: |
| return brw_imm_d(value); |
| |
| case BRW_TYPE_UD: |
| return brw_imm_ud(value); |
| |
| case BRW_TYPE_Q: |
| return brw_imm_d(value); |
| |
| case BRW_TYPE_UQ: |
| return brw_imm_uq(value); |
| |
| default: |
| unreachable("Invalid integer type."); |
| } |
| } |
| |
| /** |
| * Converts a MAD to an ADD by folding the multiplicand sources. |
| */ |
| static void |
| fold_multiplicands_of_MAD(brw_inst *inst) |
| { |
| assert(inst->opcode == BRW_OPCODE_MAD); |
| assert (inst->src[1].file == IMM && |
| inst->src[2].file == IMM && |
| !brw_type_is_vector_imm(inst->src[1].type) && |
| !brw_type_is_vector_imm(inst->src[2].type)); |
| |
| if (brw_type_is_int(inst->src[1].type)) { |
| const uint64_t imm1 = src_as_uint(inst->src[1]); |
| const uint64_t imm2 = src_as_uint(inst->src[2]); |
| |
| brw_reg product = brw_imm_ud(imm1 * imm2); |
| |
| inst->src[1] = retype(product, |
| brw_type_larger_of(inst->src[1].type, |
| inst->src[2].type)); |
| } else { |
| const double product = src_as_float(inst->src[1]) * |
| src_as_float(inst->src[2]); |
| |
| switch (brw_type_larger_of(inst->src[1].type, |
| inst->src[2].type)) { |
| case BRW_TYPE_HF: |
| inst->src[1] = retype(brw_imm_w(_mesa_float_to_half(product)), |
| BRW_TYPE_HF); |
| break; |
| |
| case BRW_TYPE_F: |
| inst->src[1] = brw_imm_f(product); |
| break; |
| |
| case BRW_TYPE_DF: |
| unreachable("float64 should be impossible."); |
| break; |
| |
| default: |
| unreachable("Invalid float type."); |
| } |
| } |
| |
| inst->opcode = BRW_OPCODE_ADD; |
| inst->resize_sources(2); |
| } |
| |
| bool |
| brw_opt_constant_fold_instruction(const intel_device_info *devinfo, brw_inst *inst) |
| { |
| brw_reg result; |
| |
| result.file = BAD_FILE; |
| |
| switch (inst->opcode) { |
| case BRW_OPCODE_ADD: |
| if (inst->src[0].file != IMM || inst->src[1].file != IMM) |
| break; |
| |
| if (brw_type_is_int(inst->src[0].type)) { |
| const uint64_t src0 = src_as_uint(inst->src[0]); |
| const uint64_t src1 = src_as_uint(inst->src[1]); |
| |
| result = brw_imm_for_type(src0 + src1, inst->dst.type); |
| } else { |
| assert(inst->src[0].type == BRW_TYPE_F); |
| result = brw_imm_f(inst->src[0].f + inst->src[1].f); |
| } |
| |
| break; |
| |
| case BRW_OPCODE_ADD3: |
| if (inst->src[0].file == IMM && |
| inst->src[1].file == IMM && |
| inst->src[2].file == IMM) { |
| const uint64_t src0 = src_as_uint(inst->src[0]); |
| const uint64_t src1 = src_as_uint(inst->src[1]); |
| const uint64_t src2 = src_as_uint(inst->src[2]); |
| |
| result = brw_imm_for_type(src0 + src1 + src2, inst->dst.type); |
| } |
| |
| break; |
| |
| case BRW_OPCODE_AND: |
| if (inst->src[0].file == IMM && inst->src[1].file == IMM) { |
| const uint64_t src0 = src_as_uint(inst->src[0]); |
| const uint64_t src1 = src_as_uint(inst->src[1]); |
| |
| result = brw_imm_for_type(src0 & src1, inst->dst.type); |
| break; |
| } |
| |
| break; |
| |
| case BRW_OPCODE_MAD: |
| if (inst->src[0].file == IMM && |
| inst->src[1].file == IMM && |
| inst->src[2].file == IMM && |
| !brw_type_is_vector_imm(inst->src[0].type) && |
| !brw_type_is_vector_imm(inst->src[1].type) && |
| !brw_type_is_vector_imm(inst->src[2].type)) { |
| fold_multiplicands_of_MAD(inst); |
| assert(inst->opcode == BRW_OPCODE_ADD); |
| |
| ASSERTED bool folded = brw_opt_constant_fold_instruction(devinfo, inst); |
| assert(folded); |
| |
| return true; |
| } |
| |
| break; |
| |
| case BRW_OPCODE_MUL: |
| if (brw_type_is_float(inst->src[1].type)) |
| break; |
| |
| /* From the BDW PRM, Vol 2a, "mul - Multiply": |
| * |
| * "When multiplying integer datatypes, if src0 is DW and src1 |
| * is W, irrespective of the destination datatype, the |
| * accumulator maintains full 48-bit precision." |
| * ... |
| * "When multiplying integer data types, if one of the sources |
| * is a DW, the resulting full precision data is stored in |
| * the accumulator." |
| * |
| * There are also similar notes in earlier PRMs. |
| * |
| * The MOV instruction can copy the bits of the source, but it |
| * does not clear the higher bits of the accumulator. So, because |
| * we might use the full accumulator in the MUL/MACH macro, we |
| * shouldn't replace such MULs with MOVs. |
| */ |
| if ((brw_type_size_bytes(inst->src[0].type) == 4 || |
| brw_type_size_bytes(inst->src[1].type) == 4) && |
| (inst->dst.is_accumulator() || |
| inst->writes_accumulator_implicitly(devinfo))) |
| break; |
| |
| if (inst->src[0].is_zero() || inst->src[1].is_zero()) { |
| result = brw_imm_d(0); |
| break; |
| } |
| |
| if (inst->src[0].file == IMM && inst->src[1].file == IMM) { |
| const uint64_t src0 = src_as_uint(inst->src[0]); |
| const uint64_t src1 = src_as_uint(inst->src[1]); |
| |
| result = brw_imm_for_type(src0 * src1, inst->dst.type); |
| break; |
| } |
| break; |
| |
| case BRW_OPCODE_OR: |
| if (inst->src[0].file == IMM && inst->src[1].file == IMM) { |
| const uint64_t src0 = src_as_uint(inst->src[0]); |
| const uint64_t src1 = src_as_uint(inst->src[1]); |
| |
| result = brw_imm_for_type(src0 | src1, inst->dst.type); |
| break; |
| } |
| |
| break; |
| |
| case BRW_OPCODE_SHL: |
| if (inst->src[0].file == IMM && inst->src[1].file == IMM) { |
| /* It's not currently possible to generate this, and this constant |
| * folding does not handle it. |
| */ |
| assert(!inst->saturate); |
| |
| switch (brw_type_size_bytes(inst->src[0].type)) { |
| case 2: |
| result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f))); |
| break; |
| case 4: |
| result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f)); |
| break; |
| case 8: |
| result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f)); |
| break; |
| default: |
| /* Just in case a future platform re-enables B or UB types. */ |
| unreachable("Invalid source size."); |
| } |
| |
| result = retype(result, inst->dst.type); |
| } |
| break; |
| |
| case SHADER_OPCODE_BROADCAST: |
| if (inst->src[0].file == IMM) { |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->force_writemask_all = true; |
| inst->resize_sources(1); |
| |
| /* The destination of BROADCAST will always be is_scalar, so the |
| * allocation will always be REG_SIZE * reg_unit. Adjust the |
| * exec_size to match. |
| */ |
| inst->exec_size = 8 * reg_unit(devinfo); |
| assert(inst->size_written == inst->dst.component_size(inst->exec_size)); |
| |
| return true; |
| } |
| break; |
| |
| case SHADER_OPCODE_SHUFFLE: |
| if (inst->src[0].file == IMM) |
| result = inst->src[0]; |
| |
| break; |
| |
| case FS_OPCODE_DDX_COARSE: |
| case FS_OPCODE_DDX_FINE: |
| case FS_OPCODE_DDY_COARSE: |
| case FS_OPCODE_DDY_FINE: |
| if (is_uniform(inst->src[0]) || inst->src[0].is_scalar) |
| result = retype(brw_imm_uq(0), inst->dst.type); |
| |
| break; |
| |
| default: |
| break; |
| } |
| |
| if (result.file != BAD_FILE) { |
| assert(result.file == IMM); |
| |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->src[0] = result; |
| inst->resize_sources(1); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool |
| brw_opt_algebraic(brw_shader &s) |
| { |
| const intel_device_info *devinfo = s.devinfo; |
| bool progress = false; |
| |
| foreach_block_and_inst_safe(block, brw_inst, inst, s.cfg) { |
| if (brw_opt_constant_fold_instruction(devinfo, inst)) { |
| progress = true; |
| continue; |
| } |
| |
| switch (inst->opcode) { |
| case BRW_OPCODE_ADD: |
| if (brw_type_is_int(inst->src[1].type) && |
| inst->src[1].is_zero()) { |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->resize_sources(1); |
| progress = true; |
| } |
| |
| break; |
| |
| case BRW_OPCODE_ADD3: { |
| const unsigned num_imm = (inst->src[0].file == IMM) + |
| (inst->src[1].file == IMM) + |
| (inst->src[2].file == IMM); |
| |
| /* If there is more than one immediate value, fold the values and |
| * convert the instruction to either ADD or MOV. |
| */ |
| assert(num_imm < 3); |
| if (num_imm == 2) { |
| uint64_t sum = 0; |
| brw_reg src; |
| |
| for (unsigned i = 0; i < 3; i++) { |
| if (inst->src[i].file == IMM) { |
| sum += src_as_uint(inst->src[i]); |
| } else { |
| assert(src.file == BAD_FILE); |
| src = inst->src[i]; |
| } |
| } |
| |
| assert(src.file != BAD_FILE); |
| |
| if (uint32_t(sum) == 0) { |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->src[0] = src; |
| inst->resize_sources(1); |
| } else { |
| inst->opcode = BRW_OPCODE_ADD; |
| inst->src[0] = src; |
| inst->src[1] = brw_imm_ud(sum); |
| inst->resize_sources(2); |
| } |
| |
| progress = true; |
| } else if (num_imm == 1) { |
| /* If there is a single constant, and that constant is zero, |
| * convert the instruction to regular ADD. |
| */ |
| for (unsigned i = 0; i < 3; i++) { |
| if (inst->src[i].is_zero()) { |
| inst->opcode = BRW_OPCODE_ADD; |
| inst->src[i] = inst->src[2]; |
| inst->resize_sources(2); |
| progress = true; |
| break; |
| } |
| } |
| } |
| |
| break; |
| } |
| |
| case BRW_OPCODE_MOV: |
| if ((inst->conditional_mod == BRW_CONDITIONAL_Z || |
| inst->conditional_mod == BRW_CONDITIONAL_NZ) && |
| inst->dst.is_null() && |
| (inst->src[0].abs || inst->src[0].negate)) { |
| inst->src[0].abs = false; |
| inst->src[0].negate = false; |
| progress = true; |
| break; |
| } |
| |
| if (inst->src[0].file != IMM) |
| break; |
| |
| if (inst->saturate) { |
| /* Full mixed-type saturates don't happen. However, we can end up |
| * with things like: |
| * |
| * mov.sat(8) g21<1>DF -1F |
| * |
| * Other mixed-size-but-same-base-type cases may also be possible. |
| */ |
| if (inst->dst.type != inst->src[0].type && |
| inst->dst.type != BRW_TYPE_DF && |
| inst->src[0].type != BRW_TYPE_F) |
| unreachable("unimplemented: saturate mixed types"); |
| |
| if (brw_reg_saturate_immediate(&inst->src[0])) { |
| inst->saturate = false; |
| progress = true; |
| } |
| } |
| break; |
| |
| case BRW_OPCODE_MUL: |
| if (brw_type_is_int(inst->src[0].type)){ |
| /* From the BDW PRM, Vol 2a, "mul - Multiply": |
| * |
| * "When multiplying integer datatypes, if src0 is DW and src1 |
| * is W, irrespective of the destination datatype, the |
| * accumulator maintains full 48-bit precision." |
| * ... |
| * "When multiplying integer data types, if one of the sources |
| * is a DW, the resulting full precision data is stored in the |
| * accumulator." |
| * |
| * There are also similar notes in earlier PRMs. |
| * |
| * The MOV instruction can copy the bits of the source, but it |
| * does not clear the higher bits of the accumulator. So, because |
| * we might use the full accumulator in the MUL/MACH macro, we |
| * shouldn't replace such MULs with MOVs. |
| */ |
| if ((brw_type_size_bytes(inst->src[0].type) == 4 || |
| brw_type_size_bytes(inst->src[1].type) == 4) && |
| (inst->dst.is_accumulator() || |
| inst->writes_accumulator_implicitly(devinfo))) |
| break; |
| |
| for (unsigned i = 0; i < 2; i++) { |
| /* a * 1 = a */ |
| if (inst->src[i].is_one()) { |
| inst->opcode = BRW_OPCODE_MOV; |
| } else if (inst->src[i].is_negative_one()) { |
| /* a * -1 = -a */ |
| inst->opcode = BRW_OPCODE_MOV; |
| |
| /* If the source other than the -1 is immediate, just |
| * toggling the negation flag will not work. Due to the |
| * previous call to brw_constant_fold_instruction, this |
| * should not be possible. |
| */ |
| assert(inst->src[1 - i].file != IMM); |
| inst->src[1 - i].negate = !inst->src[1 - i].negate; |
| } |
| |
| if (inst->opcode == BRW_OPCODE_MOV) { |
| /* If the literal 1 was src0, put the old src1 in src0. */ |
| if (i == 0) |
| inst->src[0] = inst->src[1]; |
| |
| inst->resize_sources(1); |
| progress = true; |
| break; |
| } |
| } |
| } |
| break; |
| |
| case BRW_OPCODE_NOT: |
| /* not.nz null, g17 |
| * |
| * becomes |
| * |
| * mov.z null, g17 |
| * |
| * These are equivalent, but the latter is easier for cmod prop. |
| */ |
| if (inst->dst.is_null() && |
| inst->conditional_mod != BRW_CONDITIONAL_NONE) { |
| assert(!inst->src[0].abs); |
| |
| if (!inst->src[0].negate) |
| inst->conditional_mod = brw_negate_cmod(inst->conditional_mod); |
| |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->src[0].negate = false; |
| progress = true; |
| } |
| break; |
| |
| case BRW_OPCODE_OR: |
| if (inst->src[0].equals(inst->src[1]) || inst->src[1].is_zero()) { |
| /* On Gfx8+, the OR instruction can have a source modifier that |
| * performs logical not on the operand. Cases of 'OR r0, ~r1, 0' |
| * or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV. |
| */ |
| if (inst->src[0].negate) { |
| inst->opcode = BRW_OPCODE_NOT; |
| inst->src[0].negate = false; |
| } else { |
| inst->opcode = BRW_OPCODE_MOV; |
| } |
| inst->resize_sources(1); |
| progress = true; |
| break; |
| } |
| break; |
| case BRW_OPCODE_CMP: |
| if ((inst->conditional_mod == BRW_CONDITIONAL_Z || |
| inst->conditional_mod == BRW_CONDITIONAL_NZ) && |
| inst->src[1].is_zero() && |
| (inst->src[0].abs || inst->src[0].negate)) { |
| inst->src[0].abs = false; |
| inst->src[0].negate = false; |
| progress = true; |
| break; |
| } |
| break; |
| case BRW_OPCODE_SEL: |
| /* Floating point SEL.CMOD may flush denorms to zero. We don't have |
| * enough information at this point in compilation to know whether or |
| * not it is safe to remove that. |
| * |
| * Integer SEL or SEL without a conditional modifier is just a fancy |
| * MOV. Those are always safe to eliminate. |
| */ |
| if (inst->src[0].equals(inst->src[1]) && |
| (!brw_type_is_float(inst->dst.type) || |
| inst->conditional_mod == BRW_CONDITIONAL_NONE)) { |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->predicate = BRW_PREDICATE_NONE; |
| inst->predicate_inverse = false; |
| inst->conditional_mod = BRW_CONDITIONAL_NONE; |
| inst->resize_sources(1); |
| progress = true; |
| } else if (inst->saturate && inst->src[1].file == IMM) { |
| switch (inst->conditional_mod) { |
| case BRW_CONDITIONAL_LE: |
| case BRW_CONDITIONAL_L: |
| switch (inst->src[1].type) { |
| case BRW_TYPE_F: |
| if (inst->src[1].f >= 1.0f) { |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->conditional_mod = BRW_CONDITIONAL_NONE; |
| inst->resize_sources(1); |
| progress = true; |
| } |
| break; |
| default: |
| break; |
| } |
| break; |
| case BRW_CONDITIONAL_GE: |
| case BRW_CONDITIONAL_G: |
| switch (inst->src[1].type) { |
| case BRW_TYPE_F: |
| if (inst->src[1].f <= 0.0f) { |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->conditional_mod = BRW_CONDITIONAL_NONE; |
| inst->resize_sources(1); |
| progress = true; |
| } |
| break; |
| default: |
| break; |
| } |
| break; |
| default: |
| break; |
| } |
| } |
| break; |
| case BRW_OPCODE_CSEL: |
| if (brw_type_is_float(inst->dst.type)) { |
| /* This transformation can both clean up spurious modifiers |
| * (making assembly dumps easier to read) and convert GE with -abs |
| * to LE with abs. See abs handling below. |
| */ |
| if (inst->src[2].negate) { |
| inst->conditional_mod = brw_swap_cmod(inst->conditional_mod); |
| inst->src[2].negate = false; |
| progress = true; |
| } |
| |
| if (inst->src[2].abs) { |
| switch (inst->conditional_mod) { |
| case BRW_CONDITIONAL_Z: |
| case BRW_CONDITIONAL_NZ: |
| inst->src[2].abs = false; |
| progress = true; |
| break; |
| |
| case BRW_CONDITIONAL_LE: |
| /* Converting to Z can help constant propagation into src0 |
| * and src1. |
| */ |
| inst->conditional_mod = BRW_CONDITIONAL_Z; |
| inst->src[2].abs = false; |
| progress = true; |
| break; |
| |
| default: |
| /* GE or L conditions with absolute value could be used to |
| * implement isnan(x) in CSEL. Transforming G with absolute |
| * value to NZ is **not** NaN safe. |
| */ |
| break; |
| } |
| } |
| } else if (brw_type_is_sint(inst->src[2].type)) { |
| /* Integer transformations are more challenging than floating |
| * point transformations due to INT_MIN == -(INT_MIN) == |
| * abs(INT_MIN). |
| */ |
| if (inst->src[2].negate && inst->src[2].abs) { |
| switch (inst->conditional_mod) { |
| case BRW_CONDITIONAL_GE: |
| inst->src[2].negate = false; |
| inst->src[2].abs = false; |
| inst->conditional_mod = BRW_CONDITIONAL_Z; |
| progress = true; |
| break; |
| case BRW_CONDITIONAL_L: |
| inst->src[2].negate = false; |
| inst->src[2].abs = false; |
| inst->conditional_mod = BRW_CONDITIONAL_NZ; |
| progress = true; |
| break; |
| case BRW_CONDITIONAL_G: |
| /* This is a contradtion. -abs(x) cannot be > 0. */ |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->src[0] = inst->src[1]; |
| inst->resize_sources(1); |
| progress = true; |
| break; |
| case BRW_CONDITIONAL_LE: |
| /* This is a tautology. -abs(x) must be <= 0. */ |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->resize_sources(1); |
| progress = true; |
| break; |
| case BRW_CONDITIONAL_Z: |
| case BRW_CONDITIONAL_NZ: |
| inst->src[2].negate = false; |
| inst->src[2].abs = false; |
| progress = true; |
| break; |
| default: |
| unreachable("Impossible icsel condition."); |
| } |
| } |
| } |
| break; |
| case BRW_OPCODE_MAD: |
| if (inst->src[1].file == IMM && |
| inst->src[2].file == IMM && |
| !brw_type_is_vector_imm(inst->src[1].type) && |
| !brw_type_is_vector_imm(inst->src[2].type)) { |
| fold_multiplicands_of_MAD(inst); |
| |
| /* This could result in (x + 0). For floats, we want to leave this |
| * as an ADD so that a subnormal x will get flushed to zero. |
| */ |
| assert(inst->opcode == BRW_OPCODE_ADD); |
| progress = true; |
| break; |
| } |
| |
| if (inst->src[1].is_one()) { |
| inst->opcode = BRW_OPCODE_ADD; |
| inst->src[1] = inst->src[2]; |
| inst->resize_sources(2); |
| progress = true; |
| } else if (inst->src[2].is_one()) { |
| inst->opcode = BRW_OPCODE_ADD; |
| inst->resize_sources(2); |
| progress = true; |
| } |
| break; |
| case SHADER_OPCODE_BROADCAST: |
| if (is_uniform(inst->src[0])) { |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->force_writemask_all = true; |
| |
| /* The destination of BROADCAST will always be is_scalar, so the |
| * allocation will always be REG_SIZE * reg_unit. Adjust the |
| * exec_size to match. |
| */ |
| inst->exec_size = 8 * reg_unit(devinfo); |
| assert(inst->size_written == inst->dst.component_size(inst->exec_size)); |
| inst->resize_sources(1); |
| progress = true; |
| } else if (inst->src[1].file == IMM) { |
| inst->opcode = BRW_OPCODE_MOV; |
| /* It's possible that the selected component will be too large and |
| * overflow the register. This can happen if someone does a |
| * readInvocation() from GLSL or SPIR-V and provides an OOB |
| * invocationIndex. If this happens and we some how manage |
| * to constant fold it in and get here, then component() may cause |
| * us to start reading outside of the VGRF which will lead to an |
| * assert later. Instead, just let it wrap around if it goes over |
| * exec_size. |
| */ |
| const unsigned comp = inst->src[1].ud & (inst->exec_size - 1); |
| inst->src[0] = component(inst->src[0], comp); |
| inst->force_writemask_all = true; |
| inst->exec_size = 8 * reg_unit(devinfo); |
| assert(inst->size_written == inst->dst.component_size(inst->exec_size)); |
| inst->resize_sources(1); |
| progress = true; |
| } |
| break; |
| |
| case SHADER_OPCODE_SHUFFLE: |
| if (is_uniform(inst->src[0])) { |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->resize_sources(1); |
| progress = true; |
| } else if (inst->src[1].file == IMM) { |
| inst->opcode = BRW_OPCODE_MOV; |
| inst->src[0] = component(inst->src[0], |
| inst->src[1].ud); |
| inst->resize_sources(1); |
| progress = true; |
| } |
| break; |
| |
| default: |
| break; |
| } |
| |
| /* Ensure that the correct source has the immediate value. 2-source |
| * instructions must have the immediate in src[1]. On Gfx12 and later, |
| * some 3-source instructions can have the immediate in src[0] or |
| * src[2]. It's complicated, so don't mess with 3-source instructions |
| * here. |
| */ |
| if (progress && inst->sources == 2 && inst->is_commutative()) { |
| if (inst->src[0].file == IMM) { |
| brw_reg tmp = inst->src[1]; |
| inst->src[1] = inst->src[0]; |
| inst->src[0] = tmp; |
| } |
| } |
| } |
| |
| if (progress) |
| s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTION_DATA_FLOW | |
| BRW_DEPENDENCY_INSTRUCTION_DETAIL); |
| |
| return progress; |
| } |