| /* |
| * Copyright © 2015 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| /** @file |
| * |
| * Implements a pass that validates various invariants of the IR. The current |
| * pass only validates that GRF's uses are sane. More can be added later. |
| */ |
| |
| #include "brw_shader.h" |
| #include "brw_cfg.h" |
| #include "brw_eu.h" |
| |
| #define fsv_assert(assertion) \ |
| { \ |
| if (!(assertion)) { \ |
| fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \ |
| _mesa_shader_stage_to_abbrev(s.stage)); \ |
| brw_print_instruction(s, inst, stderr); \ |
| fprintf(stderr, "%s:%d: '%s' failed\n", __FILE__, __LINE__, #assertion); \ |
| abort(); \ |
| } \ |
| } |
| |
| #define fsv_assert_eq(A, B) \ |
| { \ |
| uintptr_t a = uintptr_t(A); \ |
| uintptr_t b = uintptr_t(B); \ |
| if (a != b) { \ |
| fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \ |
| _mesa_shader_stage_to_abbrev(s.stage)); \ |
| brw_print_instruction(s, inst, stderr); \ |
| fprintf(stderr, "%s:%d: A == B failed\n", __FILE__, __LINE__); \ |
| fprintf(stderr, " A = %s = %" PRIuPTR "\n", #A, a); \ |
| fprintf(stderr, " B = %s = %" PRIuPTR "\n", #B, b); \ |
| abort(); \ |
| } \ |
| } |
| |
| #define fsv_assert_ne(A, B) \ |
| { \ |
| unsigned a = (A); \ |
| unsigned b = (B); \ |
| if (a == b) { \ |
| fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \ |
| _mesa_shader_stage_to_abbrev(s.stage)); \ |
| brw_print_instruction(s, inst, stderr); \ |
| fprintf(stderr, "%s:%d: A != B failed\n", __FILE__, __LINE__); \ |
| fprintf(stderr, " A = %s = %u\n", #A, a); \ |
| fprintf(stderr, " B = %s = %u\n", #B, b); \ |
| abort(); \ |
| } \ |
| } |
| |
| #define fsv_assert_lte(A, B) \ |
| { \ |
| unsigned a = (A); \ |
| unsigned b = (B); \ |
| if (a > b) { \ |
| fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \ |
| _mesa_shader_stage_to_abbrev(s.stage)); \ |
| brw_print_instruction(s, inst, stderr); \ |
| fprintf(stderr, "%s:%d: A <= B failed\n", __FILE__, __LINE__); \ |
| fprintf(stderr, " A = %s = %u\n", #A, a); \ |
| fprintf(stderr, " B = %s = %u\n", #B, b); \ |
| abort(); \ |
| } \ |
| } |
| |
| #ifndef NDEBUG |
| static inline bool |
| is_ud_imm(const brw_reg ®) |
| { |
| return reg.file == IMM && reg.type == BRW_TYPE_UD; |
| } |
| |
| static inline bool |
| is_d_imm(const brw_reg ®) |
| { |
| return reg.file == IMM && reg.type == BRW_TYPE_D; |
| } |
| |
| static void |
| validate_memory_logical(const brw_shader &s, const brw_inst *inst) |
| { |
| const intel_device_info *devinfo = s.devinfo; |
| |
| fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_OPCODE])); |
| fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_MODE])); |
| fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_BINDING_TYPE])); |
| fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS])); |
| fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_ALIGNMENT])); |
| fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_DATA_SIZE])); |
| fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_COMPONENTS])); |
| fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_FLAGS])); |
| fsv_assert(is_d_imm(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET])); |
| |
| enum lsc_opcode op = (enum lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud; |
| enum memory_flags flags = (memory_flags)inst->src[MEMORY_LOGICAL_FLAGS].ud; |
| bool transpose = flags & MEMORY_FLAG_TRANSPOSE; |
| bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS; |
| enum memory_logical_mode mode = |
| (memory_logical_mode)inst->src[MEMORY_LOGICAL_MODE].ud; |
| |
| enum lsc_data_size data_size = |
| (enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud; |
| unsigned data_size_B = lsc_data_size_bytes(data_size); |
| |
| if (!devinfo->has_lsc) { |
| fsv_assert(data_size == LSC_DATA_SIZE_D8U32 || |
| data_size == LSC_DATA_SIZE_D16U32 || |
| data_size == LSC_DATA_SIZE_D32 || |
| data_size == LSC_DATA_SIZE_D64); |
| |
| if (transpose) { |
| const unsigned min_alignment = |
| mode == MEMORY_MODE_SHARED_LOCAL ? 16 : 4; |
| fsv_assert(inst->src[MEMORY_LOGICAL_ALIGNMENT].ud >= min_alignment); |
| } |
| } |
| |
| fsv_assert(!transpose || !include_helpers); |
| fsv_assert(!transpose || lsc_opcode_has_transpose(op)); |
| |
| if (inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT) |
| fsv_assert(inst->src[MEMORY_LOGICAL_BINDING].file == BAD_FILE); |
| |
| if (inst->src[MEMORY_LOGICAL_DATA1].file != BAD_FILE) { |
| fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud == |
| inst->components_read(MEMORY_LOGICAL_DATA1)); |
| |
| fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].type == |
| inst->src[MEMORY_LOGICAL_DATA1].type); |
| } |
| |
| if (inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE) { |
| fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud == |
| inst->components_read(MEMORY_LOGICAL_DATA0)); |
| |
| fsv_assert(brw_type_size_bytes(inst->src[MEMORY_LOGICAL_DATA0].type) == |
| data_size_B); |
| } |
| |
| if (inst->dst.file != BAD_FILE) |
| fsv_assert(brw_type_size_bytes(inst->dst.type) == data_size_B); |
| |
| /** TGM messages cannot have a base offset */ |
| if (mode == MEMORY_MODE_TYPED) |
| fsv_assert(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d == 0); |
| |
| /* Offset must be DWord aligned */ |
| fsv_assert((inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d % 4) == 0); |
| |
| switch (inst->opcode) { |
| case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: |
| fsv_assert(op == LSC_OP_LOAD || op == LSC_OP_LOAD_CMASK || |
| op == LSC_OP_LOAD_CMASK_MSRT); |
| fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE); |
| fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE); |
| break; |
| case SHADER_OPCODE_MEMORY_STORE_LOGICAL: |
| fsv_assert(lsc_opcode_is_store(op)); |
| fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE); |
| fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE); |
| break; |
| case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: |
| fsv_assert(lsc_opcode_is_atomic(op)); |
| fsv_assert((inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE) |
| == (lsc_op_num_data_values(op) < 1)); |
| fsv_assert((inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE) |
| == (lsc_op_num_data_values(op) < 2)); |
| fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud == 1); |
| fsv_assert(!include_helpers); |
| break; |
| default: |
| unreachable("invalid opcode"); |
| } |
| } |
| |
| static const char * |
| brw_shader_phase_to_string(enum brw_shader_phase phase) |
| { |
| switch (phase) { |
| case BRW_SHADER_PHASE_INITIAL: return "INITIAL"; |
| case BRW_SHADER_PHASE_AFTER_NIR: return "AFTER_NIR"; |
| case BRW_SHADER_PHASE_AFTER_OPT_LOOP: return "AFTER_OPT_LOOP"; |
| case BRW_SHADER_PHASE_AFTER_EARLY_LOWERING: return "AFTER_EARLY_LOWERING"; |
| case BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING: return "AFTER_MIDDLE_LOWERING"; |
| case BRW_SHADER_PHASE_AFTER_LATE_LOWERING: return "AFTER_LATE_LOWERING"; |
| case BRW_SHADER_PHASE_AFTER_REGALLOC: return "AFTER_REGALLOC"; |
| case BRW_SHADER_PHASE_INVALID: break; |
| } |
| unreachable("invalid_phase"); |
| return NULL; |
| } |
| |
| static void |
| brw_validate_instruction_phase(const brw_shader &s, brw_inst *inst) |
| { |
| enum brw_shader_phase invalid_from = BRW_SHADER_PHASE_INVALID; |
| |
| switch (inst->opcode) { |
| case FS_OPCODE_FB_WRITE_LOGICAL: |
| case FS_OPCODE_FB_READ_LOGICAL: |
| case SHADER_OPCODE_TEX_LOGICAL: |
| case SHADER_OPCODE_TXD_LOGICAL: |
| case SHADER_OPCODE_TXF_LOGICAL: |
| case SHADER_OPCODE_TXL_LOGICAL: |
| case SHADER_OPCODE_TXS_LOGICAL: |
| case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: |
| case FS_OPCODE_TXB_LOGICAL: |
| case SHADER_OPCODE_TXF_CMS_W_LOGICAL: |
| case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: |
| case SHADER_OPCODE_TXF_MCS_LOGICAL: |
| case SHADER_OPCODE_LOD_LOGICAL: |
| case SHADER_OPCODE_TG4_LOGICAL: |
| case SHADER_OPCODE_TG4_BIAS_LOGICAL: |
| case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: |
| case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: |
| case SHADER_OPCODE_TG4_OFFSET_LOGICAL: |
| case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: |
| case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: |
| case SHADER_OPCODE_SAMPLEINFO_LOGICAL: |
| case SHADER_OPCODE_GET_BUFFER_SIZE: |
| case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: |
| case SHADER_OPCODE_MEMORY_STORE_LOGICAL: |
| case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: |
| case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: |
| case FS_OPCODE_INTERPOLATE_AT_SAMPLE: |
| case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: |
| case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: |
| case SHADER_OPCODE_BTD_SPAWN_LOGICAL: |
| case SHADER_OPCODE_BTD_RETIRE_LOGICAL: |
| case RT_OPCODE_TRACE_RAY_LOGICAL: |
| case SHADER_OPCODE_URB_READ_LOGICAL: |
| case SHADER_OPCODE_URB_WRITE_LOGICAL: |
| case SHADER_OPCODE_REDUCE: |
| case SHADER_OPCODE_INCLUSIVE_SCAN: |
| case SHADER_OPCODE_EXCLUSIVE_SCAN: |
| case SHADER_OPCODE_VOTE_ANY: |
| case SHADER_OPCODE_VOTE_ALL: |
| case SHADER_OPCODE_VOTE_EQUAL: |
| case SHADER_OPCODE_BALLOT: |
| case SHADER_OPCODE_QUAD_SWAP: |
| case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL: |
| case SHADER_OPCODE_READ_FROM_CHANNEL: |
| case SHADER_OPCODE_LOAD_REG: |
| invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING; |
| break; |
| |
| case SHADER_OPCODE_LOAD_PAYLOAD: |
| invalid_from = BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING; |
| break; |
| |
| default: |
| /* Nothing to do. */ |
| break; |
| } |
| |
| assert(s.phase < BRW_SHADER_PHASE_INVALID); |
| if (s.phase >= invalid_from) { |
| fprintf(stderr, "INVALID INSTRUCTION IN PHASE: %s\n", |
| brw_shader_phase_to_string(s.phase)); |
| brw_print_instruction(s, inst, stderr); |
| abort(); |
| } |
| } |
| |
| void |
| brw_validate(const brw_shader &s) |
| { |
| const intel_device_info *devinfo = s.devinfo; |
| |
| if (!s.cfg) |
| return; |
| |
| s.cfg->validate(_mesa_shader_stage_to_abbrev(s.stage)); |
| |
| foreach_block(block, s.cfg) { |
| /* Track the last used address register. Usage of the address register |
| * in the IR should be limited to within a block, otherwise we would |
| * unable to schedule some instructions without spilling the address |
| * register to a VGRF. |
| * |
| * Another pattern we stick to when using the address register in the IR |
| * is that we write and read the register in pairs of instruction. |
| */ |
| uint32_t last_used_address_register[16] = {}; |
| |
| foreach_inst_in_block (brw_inst, inst, block) { |
| brw_validate_instruction_phase(s, inst); |
| |
| switch (inst->opcode) { |
| case SHADER_OPCODE_SEND: |
| fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1])); |
| break; |
| |
| case SHADER_OPCODE_SEND_GATHER: |
| fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1])); |
| fsv_assert(devinfo->ver >= 30); |
| break; |
| |
| case BRW_OPCODE_MOV: |
| fsv_assert(inst->sources == 1); |
| break; |
| |
| case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: |
| case SHADER_OPCODE_MEMORY_STORE_LOGICAL: |
| case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: |
| validate_memory_logical(s, inst); |
| break; |
| |
| case SHADER_OPCODE_MEMORY_FENCE: |
| case SHADER_OPCODE_INTERLOCK: |
| fsv_assert(inst->exec_size == 1); |
| fsv_assert(inst->force_writemask_all); |
| fsv_assert(inst->sources == 2); |
| fsv_assert(is_ud_imm(inst->src[1])); /* commit enable */ |
| break; |
| |
| case SHADER_OPCODE_LOAD_REG: { |
| fsv_assert_eq(inst->sources, 1); |
| fsv_assert_eq(s.alloc.sizes[inst->dst.nr] * REG_SIZE, inst->size_written); |
| fsv_assert(!inst->is_partial_write()); |
| fsv_assert_lte(inst->src[0].stride, 1); |
| |
| /* For example, if file == UNIFORM, stride will be zero and offset |
| * may be non-zero. |
| */ |
| if (inst->src[0].stride != 0) |
| fsv_assert_eq(inst->src[0].offset, 0); |
| |
| const brw_def_analysis &defs = s.def_analysis.require(); |
| fsv_assert_eq(inst, defs.get(inst->dst)); |
| |
| break; |
| } |
| |
| default: |
| break; |
| } |
| |
| /* On Xe2, the "write the accumulator in addition to the explicit |
| * destination" bit no longer exists. Try to catch uses of this |
| * feature earlier in the process. |
| */ |
| if (devinfo->ver >= 20 && inst->writes_accumulator) { |
| fsv_assert(inst->dst.is_accumulator() || |
| inst->opcode == BRW_OPCODE_ADDC || |
| inst->opcode == BRW_OPCODE_MACH || |
| inst->opcode == BRW_OPCODE_SUBB); |
| } |
| |
| if (inst->is_3src(s.compiler)) { |
| const unsigned integer_sources = |
| brw_type_is_int(inst->src[0].type) + |
| brw_type_is_int(inst->src[1].type) + |
| brw_type_is_int(inst->src[2].type); |
| const unsigned float_sources = |
| brw_type_is_float_or_bfloat(inst->src[0].type) + |
| brw_type_is_float_or_bfloat(inst->src[1].type) + |
| brw_type_is_float_or_bfloat(inst->src[2].type); |
| |
| fsv_assert((integer_sources == 3 && float_sources == 0) || |
| (integer_sources == 0 && float_sources == 3)); |
| |
| if (devinfo->ver >= 10) { |
| for (unsigned i = 0; i < 3; i++) { |
| if (inst->src[i].file == IMM) |
| continue; |
| |
| switch (inst->src[i].vstride) { |
| case BRW_VERTICAL_STRIDE_0: |
| case BRW_VERTICAL_STRIDE_4: |
| case BRW_VERTICAL_STRIDE_8: |
| case BRW_VERTICAL_STRIDE_16: |
| break; |
| |
| case BRW_VERTICAL_STRIDE_1: |
| fsv_assert_lte(12, devinfo->ver); |
| break; |
| |
| case BRW_VERTICAL_STRIDE_2: |
| fsv_assert_lte(devinfo->ver, 11); |
| break; |
| |
| default: |
| fsv_assert(!"invalid vstride"); |
| break; |
| } |
| } |
| } else if (s.grf_used != 0) { |
| /* Only perform the pre-Gfx10 checks after register allocation |
| * has occured. |
| * |
| * Many passes (e.g., constant copy propagation) will |
| * genenerate invalid 3-source instructions with the |
| * expectation that later passes (e.g., combine constants) will |
| * fix them. |
| */ |
| for (unsigned i = 0; i < 3; i++) { |
| fsv_assert_ne(inst->src[i].file, IMM); |
| |
| /* A stride of 1 (the usual case) or 0, with a special |
| * "repctrl" bit, is allowed. The repctrl bit doesn't work |
| * for 64-bit datatypes, so if the source type is 64-bit |
| * then only a stride of 1 is allowed. From the Broadwell |
| * PRM, Volume 7 "3D Media GPGPU", page 944: |
| * |
| * This is applicable to 32b datatypes and 16b datatype. |
| * 64b datatypes cannot use the replicate control. |
| */ |
| const unsigned stride_in_bytes = byte_stride(inst->src[i]); |
| const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type); |
| if (stride_in_bytes == 0) { |
| /* If the source is_scalar, then the stride will be |
| * converted to <4;4,1> in brw_lower_scalar_fp64_MAD |
| * after SIMD splitting. |
| */ |
| if (!inst->src[i].is_scalar) |
| fsv_assert_lte(size_in_bytes, 4); |
| } else { |
| fsv_assert_eq(stride_in_bytes, size_in_bytes); |
| } |
| } |
| } |
| } |
| |
| if (inst->dst.file == VGRF) { |
| fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst), |
| s.alloc.sizes[inst->dst.nr]); |
| if (inst->exec_size > 1) |
| fsv_assert_ne(inst->dst.stride, 0); |
| } else if (inst->dst.is_address()) { |
| fsv_assert(inst->dst.nr != 0); |
| } |
| |
| bool read_address_reg = false; |
| for (unsigned i = 0; i < inst->sources; i++) { |
| if (inst->src[i].file == VGRF) { |
| fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i), |
| s.alloc.sizes[inst->src[i].nr]); |
| } else if (inst->src[i].is_address()) { |
| fsv_assert(inst->src[i].nr != 0); |
| for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2) { |
| fsv_assert_eq(inst->src[i].nr, |
| last_used_address_register[inst->src[i].address_slot(hw)]); |
| } |
| read_address_reg = true; |
| } |
| } |
| |
| /* Accumulator Registers, bspec 47251: |
| * |
| * "When destination is accumulator with offset 0, destination |
| * horizontal stride must be 1." |
| */ |
| if (intel_needs_workaround(devinfo, 14014617373) && |
| inst->dst.is_accumulator() && |
| phys_subnr(devinfo, inst->dst) == 0) { |
| fsv_assert_eq(inst->dst.hstride, 1); |
| } |
| |
| if (inst->is_math() && intel_needs_workaround(devinfo, 22016140776)) { |
| /* Wa_22016140776: |
| * |
| * Scalar broadcast on HF math (packed or unpacked) must not be |
| * used. Compiler must use a mov instruction to expand the |
| * scalar value to a vector before using in a HF (packed or |
| * unpacked) math operation. |
| * |
| * Since copy propagation knows about this restriction, nothing |
| * should be able to generate these invalid source strides. Detect |
| * potential problems sooner rather than later. |
| */ |
| if (devinfo->ver >= 20 && inst->writes_accumulator) { |
| fsv_assert(inst->dst.is_accumulator() || |
| inst->opcode == BRW_OPCODE_ADDC || |
| inst->opcode == BRW_OPCODE_MACH || |
| inst->opcode == BRW_OPCODE_SUBB); |
| } |
| |
| if (inst->is_3src(s.compiler)) { |
| const unsigned integer_sources = |
| brw_type_is_int(inst->src[0].type) + |
| brw_type_is_int(inst->src[1].type) + |
| brw_type_is_int(inst->src[2].type); |
| const unsigned float_sources = |
| brw_type_is_float(inst->src[0].type) + |
| brw_type_is_float(inst->src[1].type) + |
| brw_type_is_float(inst->src[2].type); |
| |
| fsv_assert((integer_sources == 3 && float_sources == 0) || |
| (integer_sources == 0 && float_sources == 3)); |
| |
| if (devinfo->ver >= 10) { |
| for (unsigned i = 0; i < 3; i++) { |
| if (inst->src[i].file == IMM) |
| continue; |
| |
| switch (inst->src[i].vstride) { |
| case BRW_VERTICAL_STRIDE_0: |
| case BRW_VERTICAL_STRIDE_4: |
| case BRW_VERTICAL_STRIDE_8: |
| case BRW_VERTICAL_STRIDE_16: |
| break; |
| |
| case BRW_VERTICAL_STRIDE_1: |
| fsv_assert_lte(12, devinfo->ver); |
| break; |
| |
| case BRW_VERTICAL_STRIDE_2: |
| fsv_assert_lte(devinfo->ver, 11); |
| break; |
| |
| default: |
| fsv_assert(!"invalid vstride"); |
| break; |
| } |
| } |
| } else if (s.grf_used != 0) { |
| /* Only perform the pre-Gfx10 checks after register |
| * allocation has occured. |
| * |
| * Many passes (e.g., constant copy propagation) will |
| * genenerate invalid 3-source instructions with the |
| * expectation that later passes (e.g., combine constants) |
| * will fix them. |
| */ |
| for (unsigned i = 0; i < 3; i++) { |
| fsv_assert_ne(inst->src[i].file, IMM); |
| |
| /* A stride of 1 (the usual case) or 0, with a special |
| * "repctrl" bit, is allowed. The repctrl bit doesn't |
| * work for 64-bit datatypes, so if the source type is |
| * 64-bit then only a stride of 1 is allowed. From the |
| * Broadwell PRM, Volume 7 "3D Media GPGPU", page 944: |
| * |
| * This is applicable to 32b datatypes and 16b |
| * datatype. 64b datatypes cannot use the replicate |
| * control. |
| */ |
| const unsigned stride_in_bytes = byte_stride(inst->src[i]); |
| const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type); |
| if (stride_in_bytes == 0) { |
| fsv_assert_lte(size_in_bytes, 4); |
| } else { |
| fsv_assert_eq(stride_in_bytes, size_in_bytes); |
| } |
| } |
| } |
| } |
| |
| if (inst->dst.file == VGRF) { |
| fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst), |
| s.alloc.sizes[inst->dst.nr]); |
| } |
| |
| for (unsigned i = 0; i < inst->sources; i++) { |
| fsv_assert(inst->src[i].is_scalar || |
| !is_uniform(inst->src[i]) || |
| inst->src[i].type != BRW_TYPE_HF); |
| } |
| } |
| |
| /* Update the last used address register. */ |
| if (read_address_reg) { |
| /* When an instruction only reads the address register, we assume |
| * the read parts are never going to be used again. |
| */ |
| for (unsigned i = 0; i < inst->sources; i++) { |
| if (!inst->src[i].is_address()) |
| continue; |
| for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2) |
| last_used_address_register[inst->src[i].address_slot(hw)] = 0; |
| } |
| } |
| if (inst->dst.is_address()) { |
| /* For the written part of the address register */ |
| for (unsigned hw = 0; hw < inst->size_written; hw += 2) |
| last_used_address_register[inst->dst.address_slot(hw)] = inst->dst.nr; |
| } else if (inst->uses_address_register_implicitly()) { |
| /* If the instruction is making use of the address register, |
| * discard the entire thing. |
| */ |
| memset(last_used_address_register, 0, |
| sizeof(last_used_address_register)); |
| } |
| } |
| } |
| } |
| #endif |