| /* |
| * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com> |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include "compiler/nir/nir_builder.h" |
| #include "bifrost_compile.h" |
| #include "bifrost_opts.h" |
| #include "bifrost_sched.h" |
| #include "compiler_defines.h" |
| #include "disassemble.h" |
| #include "bifrost_print.h" |
| |
| #define BI_DEBUG |
| |
| static int |
| glsl_type_size(const struct glsl_type *type, bool bindless) |
| { |
| return glsl_count_attribute_slots(type, false); |
| } |
| |
| static void |
| optimize_nir(nir_shader *nir) |
| { |
| bool progress; |
| |
| NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0); |
| NIR_PASS(progress, nir, nir_lower_regs_to_ssa); |
| |
| do { |
| progress = false; |
| |
| NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0); |
| |
| NIR_PASS(progress, nir, nir_lower_var_copies); |
| NIR_PASS(progress, nir, nir_lower_vars_to_ssa); |
| |
| NIR_PASS(progress, nir, nir_copy_prop); |
| NIR_PASS(progress, nir, nir_opt_constant_folding); |
| |
| NIR_PASS(progress, nir, nir_lower_vars_to_ssa); |
| NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL); |
| NIR_PASS(progress, nir, nir_opt_if, true); |
| |
| } while (progress); |
| |
| NIR_PASS(progress, nir, nir_copy_prop); |
| NIR_PASS(progress, nir, nir_opt_dce); |
| } |
| |
| static unsigned |
| nir_src_index(compiler_context *ctx, nir_src *src) |
| { |
| if (src->is_ssa) |
| return src->ssa->index; |
| else |
| return ctx->func->impl->ssa_alloc + src->reg.reg->index; |
| } |
| |
| static unsigned |
| nir_dest_index(compiler_context *ctx, nir_dest *dst) |
| { |
| if (dst->is_ssa) |
| return dst->ssa.index; |
| else |
| return ctx->func->impl->ssa_alloc + dst->reg.reg->index; |
| } |
| |
| static unsigned |
| nir_alu_src_index(compiler_context *ctx, nir_alu_src *src) |
| { |
| return nir_src_index(ctx, &src->src); |
| } |
| |
| struct bifrost_instruction * |
| mir_alloc_ins(struct bifrost_instruction instr) |
| { |
| struct bifrost_instruction *heap_ins = malloc(sizeof(instr)); |
| memcpy(heap_ins, &instr, sizeof(instr)); |
| return heap_ins; |
| } |
| |
| static void |
| emit_mir_instruction(struct compiler_context *ctx, struct bifrost_instruction instr) |
| { |
| list_addtail(&(mir_alloc_ins(instr))->link, &ctx->current_block->instructions); |
| } |
| |
| static void |
| bifrost_block_add_successor(bifrost_block *block, bifrost_block *successor) |
| { |
| assert(block->num_successors < ARRAY_SIZE(block->successors)); |
| block->successors[block->num_successors++] = successor; |
| } |
| |
| static void |
| emit_load_const(struct compiler_context *ctx, nir_load_const_instr *instr) |
| { |
| nir_ssa_def def = instr->def; |
| |
| float *v = ralloc_array(NULL, float, 1); |
| nir_const_load_to_arr(v, instr, f32); |
| _mesa_hash_table_u64_insert(ctx->ssa_constants, def.index + 1, v); |
| } |
| |
| static uint32_t |
| alloc_mir_temp(struct compiler_context *ctx) |
| { |
| return SSA_TEMP_VALUE(ctx->mir_temp++); |
| } |
| |
| static uint32_t |
| emit_ld_vary_addr_constant(struct compiler_context *ctx, uint32_t location) |
| { |
| // LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12 |
| // ... |
| // ST_VAR.v4 T1, R12, R13, R14, R4 |
| |
| // R61-R62 is filled with information needed for varying interpolation |
| // This loads a vec3 with the information that ST_VAR needs to work |
| |
| uint32_t mir_temp_location = alloc_mir_temp(ctx); |
| // This instruction loads a vec3 starting from the initial register |
| struct bifrost_instruction instr = { |
| .op = op_ld_var_addr, |
| .dest_components = 3, |
| .ssa_args = { |
| .dest = mir_temp_location, |
| .src0 = SSA_FIXED_REGISTER(61), |
| .src1 = SSA_FIXED_REGISTER(62), |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| .literal_args[0] = location, |
| }; |
| emit_mir_instruction(ctx, instr); |
| |
| return mir_temp_location; |
| } |
| |
| // XXX: Doesn't support duplicated values in the components! |
| // RA WILL fail! |
| static void |
| emit_create_vector(struct compiler_context *ctx, unsigned dest, unsigned num_comps, uint32_t *comps) |
| { |
| assert(num_comps <= 4 && "Can't make a vector larger than 4 components"); |
| |
| // This instruction loads a vec3 starting from the initial register |
| struct bifrost_instruction instr = { |
| .op = op_create_vector, |
| .dest_components = num_comps, |
| .ssa_args = { |
| .dest = dest, |
| } |
| }; |
| |
| uint32_t *srcs[4] = { |
| &instr.ssa_args.src0, |
| &instr.ssa_args.src1, |
| &instr.ssa_args.src2, |
| &instr.ssa_args.src3, |
| }; |
| |
| for (unsigned i = 0; i < 4; ++i) { |
| if (i < num_comps) |
| *srcs[i] = comps[i]; |
| else |
| *srcs[i] = SSA_INVALID_VALUE; |
| } |
| emit_mir_instruction(ctx, instr); |
| } |
| |
| static uint32_t |
| emit_extract_vector_element(struct compiler_context *ctx, unsigned ssa_vector, unsigned element) |
| { |
| uint32_t mir_temp_location = alloc_mir_temp(ctx); |
| // This instruction loads a vec3 starting from the initial register |
| struct bifrost_instruction instr = { |
| .op = op_extract_element, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = mir_temp_location, |
| .src0 = ssa_vector, |
| .src1 = SSA_INVALID_VALUE, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| .literal_args[0] = element, |
| }; |
| emit_mir_instruction(ctx, instr); |
| |
| return mir_temp_location; |
| } |
| static uint32_t |
| emit_movi(struct compiler_context *ctx, uint32_t literal) |
| { |
| uint32_t mir_temp_location = alloc_mir_temp(ctx); |
| // This instruction loads a vec3 starting from the initial register |
| struct bifrost_instruction instr = { |
| .op = op_movi, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = mir_temp_location, |
| .src0 = SSA_INVALID_VALUE, |
| .src1 = SSA_INVALID_VALUE, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| .literal_args[0] = literal, |
| }; |
| emit_mir_instruction(ctx, instr); |
| |
| return mir_temp_location; |
| } |
| |
| static unsigned |
| nir_alu_src_index_scalar(compiler_context *ctx, nir_alu_instr *nir_instr, unsigned src) |
| { |
| // NIR uses a combination of single channels plus swizzles to determine which component is pulled out of a source |
| for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) { |
| if (!nir_alu_instr_channel_used(nir_instr, src, c)) |
| continue; |
| // Pull the swizzle from this element that is active and use it as the source |
| unsigned element = nir_instr->src[src].swizzle[c]; |
| |
| // Create an op that extracts an element from a vector |
| return emit_extract_vector_element(ctx, nir_alu_src_index(ctx, &nir_instr->src[src]), element); |
| } |
| assert(0); |
| return 0; |
| } |
| |
| static void |
| emit_intrinsic(struct compiler_context *ctx, nir_intrinsic_instr *nir_instr) |
| { |
| nir_const_value *const_offset; |
| unsigned offset, reg; |
| |
| switch (nir_instr->intrinsic) { |
| case nir_intrinsic_load_ubo: { |
| nir_const_value *location = nir_src_as_const_value(nir_instr->src[0]); |
| const_offset = nir_src_as_const_value(nir_instr->src[1]); |
| assert (location && "no indirect ubo selection"); |
| assert (const_offset && "no indirect inputs"); |
| |
| enum bifrost_ir_ops op; |
| |
| // load_ubo <UBO binding>, <byte offset> |
| // ld_ubo <byte offset>, <UBO binding> |
| switch (nir_dest_num_components(nir_instr->dest)) { |
| case 1: |
| op = op_ld_ubo_v1; |
| break; |
| case 2: |
| op = op_ld_ubo_v2; |
| break; |
| case 3: |
| op = op_ld_ubo_v3; |
| break; |
| case 4: |
| op = op_ld_ubo_v4; |
| break; |
| default: |
| assert(0); |
| break; |
| } |
| |
| reg = nir_dest_index(ctx, &nir_instr->dest); |
| struct bifrost_instruction instr = { |
| .op = op, |
| .dest_components = nir_dest_num_components(nir_instr->dest), |
| .ssa_args = { |
| .dest = reg, |
| .src0 = SSA_INVALID_VALUE, |
| .src1 = SSA_INVALID_VALUE, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| .literal_args[0] = nir_src_as_uint(nir_instr->src[1]), |
| .literal_args[1] = nir_src_as_uint(nir_instr->src[0]), |
| }; |
| |
| emit_mir_instruction(ctx, instr); |
| break; |
| } |
| case nir_intrinsic_store_ssbo: { |
| nir_const_value *location = nir_src_as_const_value(nir_instr->src[1]); |
| const_offset = nir_src_as_const_value(nir_instr->src[2]); |
| assert (location && "no indirect ubo selection"); |
| assert (const_offset && "no indirect inputs"); |
| |
| // store_ssbo <Value>, <binding>, <offset> |
| // store_vN <Addr>, <Value> |
| reg = nir_src_index(ctx, &nir_instr->src[0]); |
| |
| enum bifrost_ir_ops op; |
| switch (nir_src_num_components(nir_instr->src[0])) { |
| case 1: |
| op = op_store_v1; |
| break; |
| case 2: |
| op = op_store_v2; |
| break; |
| case 3: |
| op = op_store_v3; |
| break; |
| case 4: |
| op = op_store_v4; |
| break; |
| default: |
| assert(0); |
| break; |
| } |
| |
| struct bifrost_instruction instr = { |
| .op = op, |
| .dest_components = 0, |
| .ssa_args = { |
| .dest = SSA_INVALID_VALUE, |
| .src0 = reg, |
| .src1 = SSA_INVALID_VALUE, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| .literal_args[0] = nir_src_as_uint(nir_instr->src[2]), |
| }; |
| emit_mir_instruction(ctx, instr); |
| break; |
| } |
| case nir_intrinsic_load_uniform: |
| offset = nir_intrinsic_base(nir_instr); |
| |
| if (nir_src_is_const(nir_instr->src[0])) { |
| offset += nir_src_as_uint(nir_instr->src[0]); |
| } else { |
| assert(0 && "Can't handle indirect load_uniform"); |
| } |
| |
| reg = nir_dest_index(ctx, &nir_instr->dest); |
| |
| unsigned num_components = nir_dest_num_components(nir_instr->dest); |
| if (num_components == 1) { |
| struct bifrost_instruction instr = { |
| .op = op_mov, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = reg, |
| .src0 = SSA_FIXED_UREGISTER(offset), |
| .src1 = SSA_INVALID_VALUE, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| }; |
| emit_mir_instruction(ctx, instr); |
| } else { |
| uint32_t comps[4]; |
| |
| for (unsigned i = 0; i < nir_dest_num_components(nir_instr->dest); ++i) { |
| uint32_t temp_dest = alloc_mir_temp(ctx); |
| comps[i] = temp_dest; |
| struct bifrost_instruction instr = { |
| .op = op_mov, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = temp_dest, |
| .src0 = SSA_FIXED_UREGISTER(offset + (i * 4)), |
| .src1 = SSA_INVALID_VALUE, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| }; |
| emit_mir_instruction(ctx, instr); |
| } |
| |
| emit_create_vector(ctx, reg, num_components, comps); |
| } |
| break; |
| |
| case nir_intrinsic_load_input: { |
| const_offset = nir_src_as_const_value(nir_instr->src[0]); |
| assert (const_offset && "no indirect inputs"); |
| |
| offset = nir_intrinsic_base(nir_instr) + nir_src_as_uint(nir_instr->src[0]); |
| |
| reg = nir_dest_index(ctx, &nir_instr->dest); |
| |
| enum bifrost_ir_ops op; |
| switch (nir_dest_num_components(nir_instr->dest)) { |
| case 1: |
| op = op_ld_attr_v1; |
| break; |
| case 2: |
| op = op_ld_attr_v2; |
| break; |
| case 3: |
| op = op_ld_attr_v3; |
| break; |
| case 4: |
| op = op_ld_attr_v4; |
| break; |
| default: |
| assert(0); |
| break; |
| } |
| |
| struct bifrost_instruction instr = { |
| .op = op, |
| .dest_components = nir_dest_num_components(nir_instr->dest), |
| .ssa_args = { |
| .dest = reg, |
| .src0 = offset, |
| .src1 = SSA_INVALID_VALUE, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| } |
| }; |
| |
| emit_mir_instruction(ctx, instr); |
| break; |
| } |
| case nir_intrinsic_store_output: { |
| const_offset = nir_src_as_const_value(nir_instr->src[1]); |
| assert(const_offset && "no indirect outputs"); |
| |
| offset = nir_intrinsic_base(nir_instr); |
| if (ctx->stage == MESA_SHADER_FRAGMENT) { |
| int comp = nir_intrinsic_component(nir_instr); |
| offset += comp; |
| // XXX: Once we support more than colour output then this will need to change |
| void *entry = _mesa_hash_table_u64_search(ctx->outputs_nir_to_bi, offset + FRAG_RESULT_DATA0 + 1); |
| |
| if (!entry) { |
| printf("WARNING: skipping fragment output\n"); |
| break; |
| } |
| |
| offset = (uintptr_t) (entry) - 1; |
| reg = nir_src_index(ctx, &nir_instr->src[0]); |
| |
| enum bifrost_ir_ops op; |
| switch (nir_src_num_components(nir_instr->src[0])) { |
| case 1: |
| op = op_store_v1; |
| break; |
| case 2: |
| op = op_store_v2; |
| break; |
| case 3: |
| op = op_store_v3; |
| break; |
| case 4: |
| op = op_store_v4; |
| break; |
| default: |
| assert(0); |
| break; |
| } |
| |
| // XXX: All offsets aren't vec4 aligned. Will need to adjust this in the future |
| // XXX: This needs to offset correctly in to memory so the blend step can pick it up |
| uint32_t movi = emit_movi(ctx, offset * 16); |
| uint32_t movi2 = emit_movi(ctx, 0); |
| |
| uint32_t comps[2] = { |
| movi, movi2, |
| }; |
| uint32_t offset_val = alloc_mir_temp(ctx); |
| emit_create_vector(ctx, offset_val, 2, comps); |
| |
| struct bifrost_instruction instr = { |
| .op = op, |
| .dest_components = 0, |
| .ssa_args = { |
| .dest = SSA_INVALID_VALUE, |
| .src0 = offset_val, |
| .src1 = reg, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| } |
| }; |
| emit_mir_instruction(ctx, instr); |
| } else if (ctx->stage == MESA_SHADER_VERTEX) { |
| int comp = nir_intrinsic_component(nir_instr); |
| offset += comp; |
| void *entry = _mesa_hash_table_u64_search(ctx->varying_nir_to_bi, offset + 2); |
| |
| if (!entry) { |
| printf("WARNING: skipping varying\n"); |
| break; |
| } |
| |
| offset = (uintptr_t) (entry) - 1; |
| |
| reg = nir_src_index(ctx, &nir_instr->src[0]); |
| // LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12 |
| // ... |
| // ST_VAR.v4 T1, R12, R13, R14, R4 |
| |
| offset = emit_ld_vary_addr_constant(ctx, offset); |
| enum bifrost_ir_ops op; |
| switch (nir_src_num_components(nir_instr->src[0])) { |
| case 1: |
| op = op_st_vary_v1; |
| break; |
| case 2: |
| op = op_st_vary_v2; |
| break; |
| case 3: |
| op = op_st_vary_v3; |
| break; |
| case 4: |
| op = op_st_vary_v4; |
| break; |
| default: |
| assert(0); |
| break; |
| } |
| |
| struct bifrost_instruction instr = { |
| .op = op, |
| .dest_components = 0, |
| .ssa_args = { |
| .dest = SSA_INVALID_VALUE, |
| .src0 = offset, |
| .src1 = reg, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| } |
| }; |
| emit_mir_instruction(ctx, instr); |
| } else { |
| assert(0 && "Unknown store_output stage"); |
| } |
| break; |
| } |
| default: |
| printf ("Unhandled intrinsic %s\n", nir_intrinsic_infos[nir_instr->intrinsic].name); |
| break; |
| } |
| } |
| |
| #define ALU_CASE(arguments, nir, name) \ |
| case nir_op_##nir: \ |
| argument_count = arguments; \ |
| op = op_##name; \ |
| break |
| #define ALU_CASE_MOD(arguments, nir, name, modifiers) \ |
| case nir_op_##nir: \ |
| argument_count = arguments; \ |
| op = op_##name; \ |
| src_modifiers = modifiers; \ |
| break |
| |
| static void |
| emit_alu(struct compiler_context *ctx, nir_alu_instr *nir_instr) |
| { |
| unsigned dest = nir_dest_index(ctx, &nir_instr->dest.dest); |
| unsigned op = ~0U, argument_count; |
| unsigned src_modifiers = 0; |
| |
| switch (nir_instr->op) { |
| ALU_CASE(2, fmul, fmul_f32); |
| ALU_CASE(2, fadd, fadd_f32); |
| ALU_CASE_MOD(2, fsub, fadd_f32, SOURCE_MODIFIER(1, SRC_MOD_NEG)); |
| ALU_CASE(1, ftrunc, trunc); |
| ALU_CASE(1, fceil, ceil); |
| ALU_CASE(1, ffloor, floor); |
| ALU_CASE(1, fround_even, roundeven); |
| ALU_CASE(1, frcp, frcp_fast_f32); |
| ALU_CASE(2, fmax, max_f32); |
| ALU_CASE(2, fmin, min_f32); |
| ALU_CASE(2, iadd, add_i32); |
| ALU_CASE(2, isub, sub_i32); |
| ALU_CASE(2, imul, mul_i32); |
| ALU_CASE(2, iand, and_i32); |
| ALU_CASE(2, ior, or_i32); |
| ALU_CASE(2, ixor, xor_i32); |
| ALU_CASE(2, ishl, lshift_i32); |
| ALU_CASE(2, ushr, rshift_i32); |
| ALU_CASE(2, ishr, arshift_i32); |
| case nir_op_ineg: { |
| unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0); |
| printf("ineg 0x%08x\n", src0); |
| struct bifrost_instruction instr = { |
| .op = op_sub_i32, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = dest, |
| .src0 = SSA_FIXED_CONST_0, |
| .src1 = src0, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| }; |
| |
| emit_mir_instruction(ctx, instr); |
| return; |
| |
| } |
| case nir_op_vec2: { |
| uint32_t comps[3] = { |
| nir_alu_src_index(ctx, &nir_instr->src[0]), |
| nir_alu_src_index(ctx, &nir_instr->src[1]), |
| }; |
| emit_create_vector(ctx, dest, 2, comps); |
| return; |
| break; |
| } |
| case nir_op_vec3: { |
| uint32_t comps[3] = { |
| nir_alu_src_index(ctx, &nir_instr->src[0]), |
| nir_alu_src_index(ctx, &nir_instr->src[1]), |
| nir_alu_src_index(ctx, &nir_instr->src[2]), |
| }; |
| emit_create_vector(ctx, dest, 3, comps); |
| return; |
| break; |
| } |
| case nir_op_vec4: { |
| uint32_t comps[4] = { |
| nir_alu_src_index(ctx, &nir_instr->src[0]), |
| nir_alu_src_index(ctx, &nir_instr->src[1]), |
| nir_alu_src_index(ctx, &nir_instr->src[2]), |
| nir_alu_src_index(ctx, &nir_instr->src[3]), |
| }; |
| emit_create_vector(ctx, dest, 4, comps); |
| return; |
| break; |
| } |
| case nir_op_fdiv: { |
| unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0); |
| unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1); |
| uint32_t mir_temp_location = alloc_mir_temp(ctx); |
| { |
| struct bifrost_instruction instr = { |
| .op = op_frcp_fast_f32, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = mir_temp_location, |
| .src0 = src1, |
| .src1 = SSA_INVALID_VALUE, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| }; |
| emit_mir_instruction(ctx, instr); |
| } |
| |
| struct bifrost_instruction instr = { |
| .op = op_fmul_f32, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = dest, |
| .src0 = src0, |
| .src1 = src1, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| .src_modifiers = src_modifiers, |
| }; |
| |
| emit_mir_instruction(ctx, instr); |
| return; |
| break; |
| } |
| case nir_op_umin: |
| case nir_op_imin: |
| case nir_op_umax: |
| case nir_op_imax: { |
| unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0); |
| unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1); |
| struct bifrost_instruction instr = { |
| .op = op_csel_i32, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = dest, |
| .src0 = src0, |
| .src1 = src1, |
| .src2 = src0, |
| .src3 = src1, |
| }, |
| .src_modifiers = src_modifiers, |
| .literal_args[0] = 0, /* XXX: Comparison operator */ |
| }; |
| |
| emit_mir_instruction(ctx, instr); |
| return; |
| break; |
| } |
| case nir_op_umin3: |
| case nir_op_imin3: |
| case nir_op_umax3: |
| case nir_op_imax3: { |
| unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0); |
| unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1); |
| unsigned src2 = nir_alu_src_index_scalar(ctx, nir_instr, 2); |
| |
| unsigned op = 0; |
| if (nir_instr->op == nir_op_umin3) |
| op = op_umin3_i32; |
| else if (nir_instr->op == nir_op_imin3) |
| op = op_imin3_i32; |
| else if (nir_instr->op == nir_op_umax3) |
| op = op_umax3_i32; |
| else if (nir_instr->op == nir_op_imax3) |
| op = op_imax3_i32; |
| struct bifrost_instruction instr = { |
| .op = op, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = dest, |
| .src0 = src0, |
| .src1 = src1, |
| .src2 = src2, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| .src_modifiers = src_modifiers, |
| }; |
| |
| emit_mir_instruction(ctx, instr); |
| |
| return; |
| break; |
| } |
| case nir_op_ine: { |
| uint32_t movi = emit_movi(ctx, ~0U); |
| unsigned src0 = nir_alu_src_index(ctx, &nir_instr->src[0]); |
| unsigned src1 = nir_alu_src_index(ctx, &nir_instr->src[1]); |
| struct bifrost_instruction instr = { |
| .op = op_csel_i32, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = dest, |
| .src0 = src0, |
| .src1 = src1, |
| .src2 = movi, |
| .src3 = SSA_FIXED_CONST_0, |
| }, |
| .src_modifiers = src_modifiers, |
| .literal_args[0] = CSEL_IEQ, /* XXX: Comparison operator */ |
| }; |
| |
| emit_mir_instruction(ctx, instr); |
| return; |
| break; |
| } |
| default: |
| printf("Unhandled ALU op %s\n", nir_op_infos[nir_instr->op].name); |
| return; |
| } |
| |
| unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0); |
| unsigned src1 = argument_count >= 2 ? nir_alu_src_index_scalar(ctx, nir_instr, 1) : SSA_INVALID_VALUE; |
| unsigned src2 = argument_count >= 3 ? nir_alu_src_index_scalar(ctx, nir_instr, 2) : SSA_INVALID_VALUE; |
| unsigned src3 = argument_count >= 4 ? nir_alu_src_index_scalar(ctx, nir_instr, 3) : SSA_INVALID_VALUE; |
| |
| struct bifrost_instruction instr = { |
| .op = op, |
| .dest_components = 1, |
| .ssa_args = { |
| .dest = dest, |
| .src0 = src0, |
| .src1 = src1, |
| .src2 = src2, |
| .src3 = src3, |
| }, |
| .src_modifiers = src_modifiers, |
| }; |
| |
| emit_mir_instruction(ctx, instr); |
| } |
| |
| static void |
| emit_instr(struct compiler_context *ctx, struct nir_instr *instr) |
| { |
| switch (instr->type) { |
| case nir_instr_type_load_const: |
| emit_load_const(ctx, nir_instr_as_load_const(instr)); |
| break; |
| case nir_instr_type_intrinsic: |
| emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); |
| break; |
| case nir_instr_type_alu: |
| emit_alu(ctx, nir_instr_as_alu(instr)); |
| break; |
| case nir_instr_type_tex: |
| printf("Unhandled NIR inst tex\n"); |
| break; |
| case nir_instr_type_jump: |
| printf("Unhandled NIR inst jump\n"); |
| break; |
| case nir_instr_type_ssa_undef: |
| printf("Unhandled NIR inst ssa_undef\n"); |
| break; |
| default: |
| printf("Unhandled instruction type\n"); |
| break; |
| } |
| |
| } |
| |
| static bifrost_block * |
| emit_block(struct compiler_context *ctx, nir_block *block) |
| { |
| bifrost_block *this_block = calloc(sizeof(bifrost_block), 1); |
| list_addtail(&this_block->link, &ctx->blocks); |
| |
| ++ctx->block_count; |
| |
| /* Add this block to be a successor to the previous block */ |
| if (ctx->current_block) |
| bifrost_block_add_successor(ctx->current_block, this_block); |
| |
| /* Set up current block */ |
| list_inithead(&this_block->instructions); |
| ctx->current_block = this_block; |
| |
| nir_foreach_instr(instr, block) { |
| emit_instr(ctx, instr); |
| ++ctx->instruction_count; |
| } |
| |
| #ifdef BI_DEBUG |
| print_mir_block(this_block, false); |
| #endif |
| return this_block; |
| } |
| |
| void |
| emit_if(struct compiler_context *ctx, nir_if *nir_inst); |
| |
| static struct bifrost_block * |
| emit_cf_list(struct compiler_context *ctx, struct exec_list *list) |
| { |
| struct bifrost_block *start_block = NULL; |
| foreach_list_typed(nir_cf_node, node, node, list) { |
| switch (node->type) { |
| case nir_cf_node_block: { |
| bifrost_block *block = emit_block(ctx, nir_cf_node_as_block(node)); |
| |
| if (!start_block) |
| start_block = block; |
| |
| break; |
| } |
| |
| case nir_cf_node_if: |
| emit_if(ctx, nir_cf_node_as_if(node)); |
| break; |
| |
| default: |
| case nir_cf_node_loop: |
| case nir_cf_node_function: |
| assert(0); |
| break; |
| } |
| } |
| |
| return start_block; |
| } |
| |
| void |
| emit_if(struct compiler_context *ctx, nir_if *nir_inst) |
| { |
| |
| // XXX: Conditional branch instruction can do a variety of comparisons with the sources |
| // Merge the source instruction `ine` with our conditional branch |
| { |
| uint32_t movi = emit_movi(ctx, ~0U); |
| struct bifrost_instruction instr = { |
| .op = op_branch, |
| .dest_components = 0, |
| .ssa_args = { |
| .dest = SSA_INVALID_VALUE, |
| .src0 = nir_src_index(ctx, &nir_inst->condition), |
| .src1 = movi, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| .src_modifiers = 0, |
| .literal_args[0] = BR_COND_EQ, /* XXX: Comparison Arg type */ |
| .literal_args[1] = 0, /* XXX: Branch target */ |
| }; |
| |
| emit_mir_instruction(ctx, instr); |
| } |
| |
| bifrost_instruction *true_branch = mir_last_instr_in_block(ctx->current_block); |
| |
| bifrost_block *true_block = emit_cf_list(ctx, &nir_inst->then_list); |
| |
| { |
| struct bifrost_instruction instr = { |
| .op = op_branch, |
| .dest_components = 0, |
| .ssa_args = { |
| .dest = SSA_INVALID_VALUE, |
| .src0 = SSA_INVALID_VALUE, |
| .src1 = SSA_INVALID_VALUE, |
| .src2 = SSA_INVALID_VALUE, |
| .src3 = SSA_INVALID_VALUE, |
| }, |
| .src_modifiers = 0, |
| .literal_args[0] = BR_ALWAYS, /* XXX: ALWAYS */ |
| .literal_args[1] = 0, /* XXX: Branch target */ |
| }; |
| |
| emit_mir_instruction(ctx, instr); |
| } |
| bifrost_instruction *true_exit_branch = mir_last_instr_in_block(ctx->current_block); |
| |
| unsigned false_idx = ctx->block_count; |
| unsigned inst_count = ctx->instruction_count; |
| |
| bifrost_block *false_block = emit_cf_list(ctx, &nir_inst->else_list); |
| |
| unsigned if_footer_idx = ctx->block_count; |
| assert(true_block); |
| assert(false_block); |
| |
| |
| if (ctx->instruction_count == inst_count) { |
| // If the else branch didn't have anything in it then we can remove the dead jump |
| mir_remove_instr(true_exit_branch); |
| } else { |
| true_exit_branch->literal_args[1] = if_footer_idx; |
| } |
| |
| true_branch->literal_args[1] = false_idx; |
| } |
| |
| int |
| bifrost_compile_shader_nir(nir_shader *nir, struct bifrost_program *program) |
| { |
| struct compiler_context ictx = { |
| .nir = nir, |
| .stage = nir->info.stage, |
| }; |
| |
| struct compiler_context *ctx = &ictx; |
| |
| ctx->mir_temp = 0; |
| |
| /* Initialize at a global (not block) level hash tables */ |
| ctx->ssa_constants = _mesa_hash_table_u64_create(NULL); |
| ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL); |
| |
| /* Assign actual uniform location, skipping over samplers */ |
| ctx->uniform_nir_to_bi = _mesa_hash_table_u64_create(NULL); |
| |
| nir_foreach_variable(var, &nir->uniforms) { |
| if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue; |
| |
| for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) { |
| int id = ctx->uniform_count++; |
| _mesa_hash_table_u64_insert(ctx->uniform_nir_to_bi, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1))); |
| } |
| } |
| |
| if (ctx->stage == MESA_SHADER_VERTEX) { |
| ctx->varying_nir_to_bi = _mesa_hash_table_u64_create(NULL); |
| nir_foreach_variable(var, &nir->outputs) { |
| if (var->data.location < VARYING_SLOT_VAR0) { |
| if (var->data.location == VARYING_SLOT_POS) |
| ctx->varying_count++; |
| _mesa_hash_table_u64_insert(ctx->varying_nir_to_bi, var->data.driver_location + 1, (void *) ((uintptr_t) (1))); |
| |
| continue; |
| } |
| |
| for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) { |
| for (int comp = 0; comp < 4; ++comp) { |
| int id = comp + ctx->varying_count++; |
| _mesa_hash_table_u64_insert(ctx->varying_nir_to_bi, var->data.driver_location + col + comp + 1, (void *) ((uintptr_t) (id + 1))); |
| } |
| } |
| } |
| |
| } else if (ctx->stage == MESA_SHADER_FRAGMENT) { |
| ctx->outputs_nir_to_bi = _mesa_hash_table_u64_create(NULL); |
| nir_foreach_variable(var, &nir->outputs) { |
| if (var->data.location >= FRAG_RESULT_DATA0 && var->data.location <= FRAG_RESULT_DATA7) { |
| int id = ctx->outputs_count++; |
| printf("Driver location: %d with id %d\n", var->data.location + 1, id); |
| _mesa_hash_table_u64_insert(ctx->outputs_nir_to_bi, var->data.location + 1, (void *) ((uintptr_t) (id + 1))); |
| } |
| } |
| } |
| |
| /* Optimisation passes */ |
| optimize_nir(nir); |
| |
| #ifdef BI_DEBUG |
| nir_print_shader(nir, stdout); |
| #endif |
| |
| /* Generate machine IR for shader */ |
| nir_foreach_function(func, nir) { |
| nir_builder _b; |
| ctx->b = &_b; |
| nir_builder_init(ctx->b, func->impl); |
| |
| list_inithead(&ctx->blocks); |
| ctx->block_count = 0; |
| ctx->func = func; |
| |
| emit_cf_list(ctx, &func->impl->body); |
| |
| break; // XXX: Once we support multi function shaders then implement |
| } |
| |
| util_dynarray_init(&program->compiled, NULL); |
| |
| // MIR pre-RA optimizations |
| |
| bool progress = false; |
| |
| do { |
| progress = false; |
| mir_foreach_block(ctx, block) { |
| // XXX: Not yet working |
| // progress |= bifrost_opt_branch_fusion(ctx, block); |
| } |
| } while (progress); |
| |
| schedule_program(ctx); |
| |
| #ifdef BI_DEBUG |
| nir_print_shader(nir, stdout); |
| disassemble_bifrost(program->compiled.data, program->compiled.size, false); |
| #endif |
| return 0; |
| } |