| /* |
| * Copyright © 2024 Intel Corporation |
| * |
| * SPDX-License-Identifier: MIT |
| */ |
| #include "brw_shader.h" |
| #include "brw_cfg.h" |
| #include "brw_analysis.h" |
| #include "brw_builder.h" |
| |
| /* Duplicated from brw_def_analysis::fully_defines. */ |
| static bool |
| fully_defines(const brw_shader &s, brw_inst *inst) |
| { |
| return s.alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written && |
| !inst->is_partial_write(); |
| } |
| |
| bool |
| brw_insert_load_reg(brw_shader &s) |
| { |
| bool progress = false; |
| |
| const brw_def_analysis &defs = s.def_analysis.require(); |
| |
| foreach_block_and_inst_safe(block, brw_inst, inst, s.cfg) { |
| /* These should not exist yet. */ |
| assert(inst->opcode != SHADER_OPCODE_LOAD_REG); |
| |
| /* These opcodes may have the right source and destination patterns to |
| * have their sources replaced by load_reg, but these instructions are |
| * special and / or wierd. They should not be modified. |
| */ |
| if (inst->opcode == SHADER_OPCODE_UNDEF || |
| inst->opcode == BRW_OPCODE_DPAS) { |
| continue; |
| } |
| |
| /* If the destination is non-VGRF adding load_reg instructions will not |
| * help. If the destination is already SSA, nothing needs to be done. |
| */ |
| if (inst->dst.file != VGRF || defs.get(inst->dst) != NULL) |
| continue; |
| |
| /* If there is a source that would cause def_analysis::update_for_reads |
| * to mark the def as invalid, adding load_reg for the sources will not |
| * help. |
| */ |
| if (inst->reads_accumulator_implicitly()) |
| continue; |
| |
| bool bad_source = false; |
| for (int i = 0; i < inst->sources; i++) { |
| if (inst->src[i].file == ARF && |
| (inst->src[i].nr == BRW_ARF_ADDRESS || |
| inst->src[i].nr == BRW_ARF_ACCUMULATOR || |
| inst->src[i].nr == BRW_ARF_FLAG)) { |
| bad_source = true; |
| break; |
| } |
| } |
| |
| if (bad_source) |
| continue; |
| |
| /* If the instruction does not fully define the destination, adding |
| * load_reg instructions will not help. |
| */ |
| if (!fully_defines(s, inst)) |
| continue; |
| |
| if (inst->exec_size < 8) |
| continue; |
| |
| assert(inst->exec_size == 8 || inst->exec_size == 16 || |
| inst->exec_size == 32); |
| |
| const unsigned mask = (inst->exec_size / 8) - 1; |
| |
| /* Replace any non-SSA sources with load_reg of the source. */ |
| const brw_builder bld = brw_builder(inst); |
| for (int i = 0; i < inst->sources; i++) { |
| /* LOAD_REG only operates on VGRF sources. If the source is not VGRF, |
| * skip it. |
| */ |
| if (inst->src[i].file != VGRF) |
| continue; |
| |
| /* The source is already a def, so don't add a LOAD_REG. */ |
| if (defs.get(inst->src[i]) != NULL) |
| continue; |
| |
| /* Cases of stride != 1 are difficult to handle correctly. For |
| * example, when stride is 0, the source may have been written by |
| * NoMask instruction that cannot be seen from here. In this case, |
| * emitting a non-NoMask LOAD_REG may not actually copy the value |
| * that the instruction is trying to read. |
| * |
| * This means that is_scalar sources in larger exec sizes are not |
| * handled. Since enough information is available in the source, this |
| * could be added later. |
| */ |
| if (inst->src[i].stride != 1) |
| continue; |
| |
| /* If the size of the VGRF allocation is not an even multiple of |
| * the SIMD size, don't emit a load_reg. This can occur for sparse |
| * texture loads. These will have SIMD-size values for the texture |
| * data and a single SIMD1 register for the residency information. |
| */ |
| if ((s.alloc.sizes[inst->src[i].nr] & mask) != 0) |
| continue; |
| |
| brw_reg_type t = |
| brw_type_with_size(BRW_TYPE_UD, |
| brw_type_size_bits(inst->src[i].type)); |
| brw_reg old_src = brw_vgrf(inst->src[i].nr, t); |
| brw_reg new_src; |
| |
| /* Since the sources of a LOAD_REG will likely not be defs, |
| * none of the existing optimizations passes will eliminate |
| * redundant LOAD_REG instructions. Search back though this |
| * block to find a LOAD_REG of the same value to avoid emitting |
| * too many redundant instructions. |
| */ |
| foreach_inst_in_block_reverse_starting_from(brw_inst, scan_inst, inst) { |
| if (scan_inst->dst.file == old_src.file && |
| scan_inst->dst.nr == old_src.nr) { |
| break; |
| } |
| |
| if (scan_inst->opcode == SHADER_OPCODE_LOAD_REG && |
| scan_inst->exec_size == inst->exec_size && |
| scan_inst->force_writemask_all == inst->force_writemask_all && |
| old_src.equals(scan_inst->src[0])) { |
| new_src = scan_inst->dst; |
| break; |
| } |
| } |
| |
| if (new_src.file == BAD_FILE) |
| new_src = bld.LOAD_REG(old_src); |
| |
| inst->src[i].nr = new_src.nr; |
| progress = true; |
| } |
| } |
| |
| if (progress) |
| s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS | |
| BRW_DEPENDENCY_VARIABLES); |
| |
| return progress; |
| } |
| |
| bool |
| brw_lower_load_reg(brw_shader &s) |
| { |
| bool progress = false; |
| |
| foreach_block_and_inst_safe(block, brw_inst, inst, s.cfg) { |
| if (inst->opcode == SHADER_OPCODE_LOAD_REG) { |
| const brw_builder ibld = brw_builder(inst); |
| |
| const unsigned bytes = inst->size_written; |
| const unsigned type_bytes = brw_type_size_bytes(inst->dst.type); |
| const unsigned bytes_per_mov = inst->exec_size * type_bytes; |
| |
| for (unsigned i = 0; i < bytes; i += bytes_per_mov) { |
| ibld.MOV(byte_offset(inst->dst, i), |
| byte_offset(inst->src[0], i)); |
| } |
| |
| inst->remove(); |
| progress = true; |
| } |
| } |
| |
| if (progress) |
| s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS | |
| BRW_DEPENDENCY_VARIABLES); |
| |
| return progress; |
| } |