| /* |
| * Copyright © 2010 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "brw_cfg.h" |
| #include "brw_eu.h" |
| #include "brw_fs.h" |
| #include "brw_nir.h" |
| #include "brw_private.h" |
| #include "dev/intel_debug.h" |
| #include "util/macros.h" |
| |
| bool |
| fs_reg_saturate_immediate(fs_reg *reg) |
| { |
| union { |
| unsigned ud; |
| int d; |
| float f; |
| double df; |
| } imm, sat_imm = { 0 }; |
| |
| const unsigned size = type_sz(reg->type); |
| |
| /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise |
| * irrelevant, so just check the size of the type and copy from/to an |
| * appropriately sized field. |
| */ |
| if (size < 8) |
| imm.ud = reg->ud; |
| else |
| imm.df = reg->df; |
| |
| switch (reg->type) { |
| case BRW_REGISTER_TYPE_UD: |
| case BRW_REGISTER_TYPE_D: |
| case BRW_REGISTER_TYPE_UW: |
| case BRW_REGISTER_TYPE_W: |
| case BRW_REGISTER_TYPE_UQ: |
| case BRW_REGISTER_TYPE_Q: |
| /* Nothing to do. */ |
| return false; |
| case BRW_REGISTER_TYPE_F: |
| sat_imm.f = SATURATE(imm.f); |
| break; |
| case BRW_REGISTER_TYPE_DF: |
| sat_imm.df = SATURATE(imm.df); |
| break; |
| case BRW_REGISTER_TYPE_UB: |
| case BRW_REGISTER_TYPE_B: |
| unreachable("no UB/B immediates"); |
| case BRW_REGISTER_TYPE_V: |
| case BRW_REGISTER_TYPE_UV: |
| case BRW_REGISTER_TYPE_VF: |
| unreachable("unimplemented: saturate vector immediate"); |
| case BRW_REGISTER_TYPE_HF: |
| unreachable("unimplemented: saturate HF immediate"); |
| case BRW_REGISTER_TYPE_NF: |
| unreachable("no NF immediates"); |
| } |
| |
| if (size < 8) { |
| if (imm.ud != sat_imm.ud) { |
| reg->ud = sat_imm.ud; |
| return true; |
| } |
| } else { |
| if (imm.df != sat_imm.df) { |
| reg->df = sat_imm.df; |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| bool |
| fs_reg_negate_immediate(fs_reg *reg) |
| { |
| switch (reg->type) { |
| case BRW_REGISTER_TYPE_D: |
| case BRW_REGISTER_TYPE_UD: |
| reg->d = -reg->d; |
| return true; |
| case BRW_REGISTER_TYPE_W: |
| case BRW_REGISTER_TYPE_UW: { |
| uint16_t value = -(int16_t)reg->ud; |
| reg->ud = value | (uint32_t)value << 16; |
| return true; |
| } |
| case BRW_REGISTER_TYPE_F: |
| reg->f = -reg->f; |
| return true; |
| case BRW_REGISTER_TYPE_VF: |
| reg->ud ^= 0x80808080; |
| return true; |
| case BRW_REGISTER_TYPE_DF: |
| reg->df = -reg->df; |
| return true; |
| case BRW_REGISTER_TYPE_UQ: |
| case BRW_REGISTER_TYPE_Q: |
| reg->d64 = -reg->d64; |
| return true; |
| case BRW_REGISTER_TYPE_UB: |
| case BRW_REGISTER_TYPE_B: |
| unreachable("no UB/B immediates"); |
| case BRW_REGISTER_TYPE_UV: |
| case BRW_REGISTER_TYPE_V: |
| assert(!"unimplemented: negate UV/V immediate"); |
| case BRW_REGISTER_TYPE_HF: |
| reg->ud ^= 0x80008000; |
| return true; |
| case BRW_REGISTER_TYPE_NF: |
| unreachable("no NF immediates"); |
| } |
| |
| return false; |
| } |
| |
| bool |
| fs_reg_abs_immediate(fs_reg *reg) |
| { |
| switch (reg->type) { |
| case BRW_REGISTER_TYPE_D: |
| reg->d = abs(reg->d); |
| return true; |
| case BRW_REGISTER_TYPE_W: { |
| uint16_t value = abs((int16_t)reg->ud); |
| reg->ud = value | (uint32_t)value << 16; |
| return true; |
| } |
| case BRW_REGISTER_TYPE_F: |
| reg->f = fabsf(reg->f); |
| return true; |
| case BRW_REGISTER_TYPE_DF: |
| reg->df = fabs(reg->df); |
| return true; |
| case BRW_REGISTER_TYPE_VF: |
| reg->ud &= ~0x80808080; |
| return true; |
| case BRW_REGISTER_TYPE_Q: |
| reg->d64 = imaxabs(reg->d64); |
| return true; |
| case BRW_REGISTER_TYPE_UB: |
| case BRW_REGISTER_TYPE_B: |
| unreachable("no UB/B immediates"); |
| case BRW_REGISTER_TYPE_UQ: |
| case BRW_REGISTER_TYPE_UD: |
| case BRW_REGISTER_TYPE_UW: |
| case BRW_REGISTER_TYPE_UV: |
| /* Presumably the absolute value modifier on an unsigned source is a |
| * nop, but it would be nice to confirm. |
| */ |
| assert(!"unimplemented: abs unsigned immediate"); |
| case BRW_REGISTER_TYPE_V: |
| assert(!"unimplemented: abs V immediate"); |
| case BRW_REGISTER_TYPE_HF: |
| reg->ud &= ~0x80008000; |
| return true; |
| case BRW_REGISTER_TYPE_NF: |
| unreachable("no NF immediates"); |
| } |
| |
| return false; |
| } |
| |
| bool |
| fs_reg::is_zero() const |
| { |
| if (file != IMM) |
| return false; |
| |
| assert(type_sz(type) > 1); |
| |
| switch (type) { |
| case BRW_REGISTER_TYPE_HF: |
| assert((d & 0xffff) == ((d >> 16) & 0xffff)); |
| return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000; |
| case BRW_REGISTER_TYPE_F: |
| return f == 0; |
| case BRW_REGISTER_TYPE_DF: |
| return df == 0; |
| case BRW_REGISTER_TYPE_W: |
| case BRW_REGISTER_TYPE_UW: |
| assert((d & 0xffff) == ((d >> 16) & 0xffff)); |
| return (d & 0xffff) == 0; |
| case BRW_REGISTER_TYPE_D: |
| case BRW_REGISTER_TYPE_UD: |
| return d == 0; |
| case BRW_REGISTER_TYPE_UQ: |
| case BRW_REGISTER_TYPE_Q: |
| return u64 == 0; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| fs_reg::is_one() const |
| { |
| if (file != IMM) |
| return false; |
| |
| assert(type_sz(type) > 1); |
| |
| switch (type) { |
| case BRW_REGISTER_TYPE_HF: |
| assert((d & 0xffff) == ((d >> 16) & 0xffff)); |
| return (d & 0xffff) == 0x3c00; |
| case BRW_REGISTER_TYPE_F: |
| return f == 1.0f; |
| case BRW_REGISTER_TYPE_DF: |
| return df == 1.0; |
| case BRW_REGISTER_TYPE_W: |
| case BRW_REGISTER_TYPE_UW: |
| assert((d & 0xffff) == ((d >> 16) & 0xffff)); |
| return (d & 0xffff) == 1; |
| case BRW_REGISTER_TYPE_D: |
| case BRW_REGISTER_TYPE_UD: |
| return d == 1; |
| case BRW_REGISTER_TYPE_UQ: |
| case BRW_REGISTER_TYPE_Q: |
| return u64 == 1; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| fs_reg::is_negative_one() const |
| { |
| if (file != IMM) |
| return false; |
| |
| assert(type_sz(type) > 1); |
| |
| switch (type) { |
| case BRW_REGISTER_TYPE_HF: |
| assert((d & 0xffff) == ((d >> 16) & 0xffff)); |
| return (d & 0xffff) == 0xbc00; |
| case BRW_REGISTER_TYPE_F: |
| return f == -1.0; |
| case BRW_REGISTER_TYPE_DF: |
| return df == -1.0; |
| case BRW_REGISTER_TYPE_W: |
| assert((d & 0xffff) == ((d >> 16) & 0xffff)); |
| return (d & 0xffff) == 0xffff; |
| case BRW_REGISTER_TYPE_D: |
| return d == -1; |
| case BRW_REGISTER_TYPE_Q: |
| return d64 == -1; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| fs_reg::is_null() const |
| { |
| return file == ARF && nr == BRW_ARF_NULL; |
| } |
| |
| |
| bool |
| fs_reg::is_accumulator() const |
| { |
| return file == ARF && nr == BRW_ARF_ACCUMULATOR; |
| } |
| |
| bool |
| fs_inst::is_commutative() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_AND: |
| case BRW_OPCODE_OR: |
| case BRW_OPCODE_XOR: |
| case BRW_OPCODE_ADD: |
| case BRW_OPCODE_ADD3: |
| case SHADER_OPCODE_MULH: |
| return true; |
| |
| case BRW_OPCODE_MUL: |
| /* Integer multiplication of dword and word sources is not actually |
| * commutative. The DW source must be first. |
| */ |
| return !brw_reg_type_is_integer(src[0].type) || |
| type_sz(src[0].type) == type_sz(src[1].type); |
| |
| case BRW_OPCODE_SEL: |
| /* MIN and MAX are commutative. */ |
| if (conditional_mod == BRW_CONDITIONAL_GE || |
| conditional_mod == BRW_CONDITIONAL_L) { |
| return true; |
| } |
| FALLTHROUGH; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| fs_inst::is_3src(const struct brw_compiler *compiler) const |
| { |
| return ::is_3src(&compiler->isa, opcode); |
| } |
| |
| bool |
| fs_inst::is_math() const |
| { |
| return (opcode == SHADER_OPCODE_RCP || |
| opcode == SHADER_OPCODE_RSQ || |
| opcode == SHADER_OPCODE_SQRT || |
| opcode == SHADER_OPCODE_EXP2 || |
| opcode == SHADER_OPCODE_LOG2 || |
| opcode == SHADER_OPCODE_SIN || |
| opcode == SHADER_OPCODE_COS || |
| opcode == SHADER_OPCODE_INT_QUOTIENT || |
| opcode == SHADER_OPCODE_INT_REMAINDER || |
| opcode == SHADER_OPCODE_POW); |
| } |
| |
| bool |
| fs_inst::is_control_flow_begin() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_DO: |
| case BRW_OPCODE_IF: |
| case BRW_OPCODE_ELSE: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| fs_inst::is_control_flow_end() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_ELSE: |
| case BRW_OPCODE_WHILE: |
| case BRW_OPCODE_ENDIF: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| fs_inst::is_control_flow() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_DO: |
| case BRW_OPCODE_WHILE: |
| case BRW_OPCODE_IF: |
| case BRW_OPCODE_ELSE: |
| case BRW_OPCODE_ENDIF: |
| case BRW_OPCODE_BREAK: |
| case BRW_OPCODE_CONTINUE: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| fs_inst::uses_indirect_addressing() const |
| { |
| switch (opcode) { |
| case SHADER_OPCODE_BROADCAST: |
| case SHADER_OPCODE_CLUSTER_BROADCAST: |
| case SHADER_OPCODE_MOV_INDIRECT: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| fs_inst::can_do_saturate() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_ADD: |
| case BRW_OPCODE_ADD3: |
| case BRW_OPCODE_ASR: |
| case BRW_OPCODE_AVG: |
| case BRW_OPCODE_CSEL: |
| case BRW_OPCODE_DP2: |
| case BRW_OPCODE_DP3: |
| case BRW_OPCODE_DP4: |
| case BRW_OPCODE_DPH: |
| case BRW_OPCODE_DP4A: |
| case BRW_OPCODE_LINE: |
| case BRW_OPCODE_LRP: |
| case BRW_OPCODE_MAC: |
| case BRW_OPCODE_MAD: |
| case BRW_OPCODE_MATH: |
| case BRW_OPCODE_MOV: |
| case BRW_OPCODE_MUL: |
| case SHADER_OPCODE_MULH: |
| case BRW_OPCODE_PLN: |
| case BRW_OPCODE_RNDD: |
| case BRW_OPCODE_RNDE: |
| case BRW_OPCODE_RNDU: |
| case BRW_OPCODE_RNDZ: |
| case BRW_OPCODE_SEL: |
| case BRW_OPCODE_SHL: |
| case BRW_OPCODE_SHR: |
| case FS_OPCODE_LINTERP: |
| case SHADER_OPCODE_COS: |
| case SHADER_OPCODE_EXP2: |
| case SHADER_OPCODE_LOG2: |
| case SHADER_OPCODE_POW: |
| case SHADER_OPCODE_RCP: |
| case SHADER_OPCODE_RSQ: |
| case SHADER_OPCODE_SIN: |
| case SHADER_OPCODE_SQRT: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| fs_inst::reads_accumulator_implicitly() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_MAC: |
| case BRW_OPCODE_MACH: |
| case BRW_OPCODE_SADA2: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| fs_inst::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const |
| { |
| return writes_accumulator || |
| (opcode == FS_OPCODE_LINTERP && !devinfo->has_pln) || |
| (eot && intel_needs_workaround(devinfo, 14010017096)); |
| } |
| |
| bool |
| fs_inst::has_side_effects() const |
| { |
| switch (opcode) { |
| case SHADER_OPCODE_SEND: |
| return send_has_side_effects; |
| |
| case BRW_OPCODE_SYNC: |
| case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: |
| case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: |
| case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: |
| case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: |
| case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: |
| case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: |
| case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: |
| case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: |
| case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: |
| case SHADER_OPCODE_MEMORY_FENCE: |
| case SHADER_OPCODE_INTERLOCK: |
| case SHADER_OPCODE_URB_WRITE_LOGICAL: |
| case FS_OPCODE_FB_WRITE_LOGICAL: |
| case SHADER_OPCODE_BARRIER: |
| case SHADER_OPCODE_RND_MODE: |
| case SHADER_OPCODE_FLOAT_CONTROL_MODE: |
| case FS_OPCODE_SCHEDULING_FENCE: |
| case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: |
| case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: |
| case SHADER_OPCODE_BTD_SPAWN_LOGICAL: |
| case SHADER_OPCODE_BTD_RETIRE_LOGICAL: |
| case RT_OPCODE_TRACE_RAY_LOGICAL: |
| return true; |
| default: |
| return eot; |
| } |
| } |
| |
| bool |
| fs_inst::is_volatile() const |
| { |
| switch (opcode) { |
| case SHADER_OPCODE_SEND: |
| return send_is_volatile; |
| |
| case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: |
| case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: |
| case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: |
| case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: |
| case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: |
| case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| #ifndef NDEBUG |
| static bool |
| inst_is_in_block(const bblock_t *block, const fs_inst *inst) |
| { |
| const exec_node *n = inst; |
| |
| /* Find the tail sentinel. If the tail sentinel is the sentinel from the |
| * list header in the bblock_t, then this instruction is in that basic |
| * block. |
| */ |
| while (!n->is_tail_sentinel()) |
| n = n->get_next(); |
| |
| return n == &block->instructions.tail_sentinel; |
| } |
| #endif |
| |
| static void |
| adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) |
| { |
| for (bblock_t *block_iter = start_block->next(); |
| block_iter; |
| block_iter = block_iter->next()) { |
| block_iter->start_ip += ip_adjustment; |
| block_iter->end_ip += ip_adjustment; |
| } |
| } |
| |
| void |
| fs_inst::insert_after(bblock_t *block, fs_inst *inst) |
| { |
| assert(this != inst); |
| assert(block->end_ip_delta == 0); |
| |
| if (!this->is_head_sentinel()) |
| assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
| |
| block->end_ip++; |
| |
| adjust_later_block_ips(block, 1); |
| |
| exec_node::insert_after(inst); |
| } |
| |
| void |
| fs_inst::insert_before(bblock_t *block, fs_inst *inst) |
| { |
| assert(this != inst); |
| assert(block->end_ip_delta == 0); |
| |
| if (!this->is_tail_sentinel()) |
| assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
| |
| block->end_ip++; |
| |
| adjust_later_block_ips(block, 1); |
| |
| exec_node::insert_before(inst); |
| } |
| |
| void |
| fs_inst::remove(bblock_t *block, bool defer_later_block_ip_updates) |
| { |
| assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
| |
| if (defer_later_block_ip_updates) { |
| block->end_ip_delta--; |
| } else { |
| assert(block->end_ip_delta == 0); |
| adjust_later_block_ips(block, -1); |
| } |
| |
| if (block->start_ip == block->end_ip) { |
| if (block->end_ip_delta != 0) { |
| adjust_later_block_ips(block, block->end_ip_delta); |
| block->end_ip_delta = 0; |
| } |
| |
| block->cfg->remove_block(block); |
| } else { |
| block->end_ip--; |
| } |
| |
| exec_node::remove(); |
| } |
| |
| extern "C" const unsigned * |
| brw_compile_tes(const struct brw_compiler *compiler, |
| brw_compile_tes_params *params) |
| { |
| const struct intel_device_info *devinfo = compiler->devinfo; |
| nir_shader *nir = params->base.nir; |
| const struct brw_tes_prog_key *key = params->key; |
| const struct intel_vue_map *input_vue_map = params->input_vue_map; |
| struct brw_tes_prog_data *prog_data = params->prog_data; |
| |
| const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TES); |
| |
| prog_data->base.base.stage = MESA_SHADER_TESS_EVAL; |
| prog_data->base.base.ray_queries = nir->info.ray_queries; |
| |
| nir->info.inputs_read = key->inputs_read; |
| nir->info.patch_inputs_read = key->patch_inputs_read; |
| |
| brw_nir_apply_key(nir, compiler, &key->base, 8); |
| brw_nir_lower_tes_inputs(nir, input_vue_map); |
| brw_nir_lower_vue_outputs(nir); |
| brw_postprocess_nir(nir, compiler, debug_enabled, |
| key->base.robust_flags); |
| |
| brw_compute_vue_map(devinfo, &prog_data->base.vue_map, |
| nir->info.outputs_written, |
| nir->info.separate_shader, 1); |
| |
| unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; |
| |
| assert(output_size_bytes >= 1); |
| if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) { |
| params->base.error_str = ralloc_strdup(params->base.mem_ctx, |
| "DS outputs exceed maximum size"); |
| return NULL; |
| } |
| |
| prog_data->base.clip_distance_mask = |
| ((1 << nir->info.clip_distance_array_size) - 1); |
| prog_data->base.cull_distance_mask = |
| ((1 << nir->info.cull_distance_array_size) - 1) << |
| nir->info.clip_distance_array_size; |
| |
| prog_data->include_primitive_id = |
| BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); |
| |
| /* URB entry sizes are stored as a multiple of 64 bytes. */ |
| prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; |
| |
| prog_data->base.urb_read_length = 0; |
| |
| STATIC_ASSERT(INTEL_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); |
| STATIC_ASSERT(INTEL_TESS_PARTITIONING_ODD_FRACTIONAL == |
| TESS_SPACING_FRACTIONAL_ODD - 1); |
| STATIC_ASSERT(INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL == |
| TESS_SPACING_FRACTIONAL_EVEN - 1); |
| |
| prog_data->partitioning = |
| (enum intel_tess_partitioning) (nir->info.tess.spacing - 1); |
| |
| switch (nir->info.tess._primitive_mode) { |
| case TESS_PRIMITIVE_QUADS: |
| prog_data->domain = INTEL_TESS_DOMAIN_QUAD; |
| break; |
| case TESS_PRIMITIVE_TRIANGLES: |
| prog_data->domain = INTEL_TESS_DOMAIN_TRI; |
| break; |
| case TESS_PRIMITIVE_ISOLINES: |
| prog_data->domain = INTEL_TESS_DOMAIN_ISOLINE; |
| break; |
| default: |
| unreachable("invalid domain shader primitive mode"); |
| } |
| |
| if (nir->info.tess.point_mode) { |
| prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_POINT; |
| } else if (nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) { |
| prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_LINE; |
| } else { |
| /* Hardware winding order is backwards from OpenGL */ |
| prog_data->output_topology = |
| nir->info.tess.ccw ? INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW |
| : INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW; |
| } |
| |
| if (unlikely(debug_enabled)) { |
| fprintf(stderr, "TES Input "); |
| brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL); |
| fprintf(stderr, "TES Output "); |
| brw_print_vue_map(stderr, &prog_data->base.vue_map, |
| MESA_SHADER_TESS_EVAL); |
| } |
| |
| const unsigned dispatch_width = devinfo->ver >= 20 ? 16 : 8; |
| fs_visitor v(compiler, ¶ms->base, &key->base, |
| &prog_data->base.base, nir, dispatch_width, |
| params->base.stats != NULL, debug_enabled); |
| if (!v.run_tes()) { |
| params->base.error_str = |
| ralloc_strdup(params->base.mem_ctx, v.fail_msg); |
| return NULL; |
| } |
| |
| assert(v.payload().num_regs % reg_unit(devinfo) == 0); |
| prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo); |
| |
| prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8; |
| |
| fs_generator g(compiler, ¶ms->base, |
| &prog_data->base.base, MESA_SHADER_TESS_EVAL); |
| if (unlikely(debug_enabled)) { |
| g.enable_debug(ralloc_asprintf(params->base.mem_ctx, |
| "%s tessellation evaluation shader %s", |
| nir->info.label ? nir->info.label |
| : "unnamed", |
| nir->info.name)); |
| } |
| |
| g.generate_code(v.cfg, dispatch_width, v.shader_stats, |
| v.performance_analysis.require(), params->base.stats); |
| |
| g.add_const_data(nir->constant_data, nir->constant_data_size); |
| |
| return g.get_assembly(); |
| } |