src/panfrost/bifrost/bifrost_compile.c - third_party/mesa - Git at Google

 /*
  * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */

 #include "compiler/nir/nir_builder.h"
 #include "bifrost_compile.h"
 #include "bifrost_opts.h"
 #include "bifrost_sched.h"
 #include "compiler_defines.h"
 #include "disassemble.h"
 #include "bifrost_print.h"

 #define BI_DEBUG

 static int
 glsl_type_size(const struct glsl_type *type, bool bindless)
 {
         return glsl_count_attribute_slots(type, false);
 }

 static void
 optimize_nir(nir_shader *nir)
 {
         bool progress;

         NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
         NIR_PASS(progress, nir, nir_lower_regs_to_ssa);

         do {
                 progress = false;

                 NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0);

                 NIR_PASS(progress, nir, nir_lower_var_copies);
                 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);

                 NIR_PASS(progress, nir, nir_copy_prop);
                 NIR_PASS(progress, nir, nir_opt_constant_folding);

                 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
                 NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL);
                 NIR_PASS(progress, nir, nir_opt_if, true);

         } while (progress);

         NIR_PASS(progress, nir, nir_copy_prop);
         NIR_PASS(progress, nir, nir_opt_dce);
 }

 static unsigned
 nir_src_index(compiler_context *ctx, nir_src *src)
 {
         if (src->is_ssa)
                 return src->ssa->index;
         else
                 return ctx->func->impl->ssa_alloc + src->reg.reg->index;
 }

 static unsigned
 nir_dest_index(compiler_context *ctx, nir_dest *dst)
 {
         if (dst->is_ssa)
                 return dst->ssa.index;
         else
                 return ctx->func->impl->ssa_alloc + dst->reg.reg->index;
 }

 static unsigned
 nir_alu_src_index(compiler_context *ctx, nir_alu_src *src)
 {
         return nir_src_index(ctx, &src->src);
 }

 struct bifrost_instruction *
 mir_alloc_ins(struct bifrost_instruction instr)
 {
         struct bifrost_instruction *heap_ins = malloc(sizeof(instr));
         memcpy(heap_ins, &instr, sizeof(instr));
         return heap_ins;
 }

 static void
 emit_mir_instruction(struct compiler_context *ctx, struct bifrost_instruction instr)
 {
         list_addtail(&(mir_alloc_ins(instr))->link, &ctx->current_block->instructions);
 }

 static void
 bifrost_block_add_successor(bifrost_block *block, bifrost_block *successor)
 {
         assert(block->num_successors < ARRAY_SIZE(block->successors));
         block->successors[block->num_successors++] = successor;
 }

 static void
 emit_load_const(struct compiler_context *ctx, nir_load_const_instr *instr)
 {
         nir_ssa_def def = instr->def;

         float *v = ralloc_array(NULL, float, 1);
         nir_const_load_to_arr(v, instr, f32);
         _mesa_hash_table_u64_insert(ctx->ssa_constants, def.index + 1, v);
 }

 static uint32_t
 alloc_mir_temp(struct compiler_context *ctx)
 {
         return SSA_TEMP_VALUE(ctx->mir_temp++);
 }

 static uint32_t
 emit_ld_vary_addr_constant(struct compiler_context *ctx, uint32_t location)
 {
         // LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12
         // ...
         // ST_VAR.v4 T1, R12, R13, R14, R4

         // R61-R62 is filled with information needed for varying interpolation
         // This loads a vec3 with the information that ST_VAR needs to work

         uint32_t mir_temp_location = alloc_mir_temp(ctx);
         // This instruction loads a vec3 starting from the initial register
         struct bifrost_instruction instr = {
                 .op = op_ld_var_addr,
                 .dest_components = 3,
                 .ssa_args = {
                         .dest = mir_temp_location,
                         .src0 = SSA_FIXED_REGISTER(61),
                         .src1 = SSA_FIXED_REGISTER(62),
                         .src2 = SSA_INVALID_VALUE,
                         .src3 = SSA_INVALID_VALUE,
                 },
                 .literal_args[0] = location,
         };
         emit_mir_instruction(ctx, instr);

         return mir_temp_location;
 }

 // XXX: Doesn't support duplicated values in the components!
 // RA WILL fail!
 static void
 emit_create_vector(struct compiler_context *ctx, unsigned dest, unsigned num_comps, uint32_t *comps)
 {
         assert(num_comps <= 4 && "Can't make a vector larger than 4 components");

         // This instruction loads a vec3 starting from the initial register
         struct bifrost_instruction instr = {
                 .op = op_create_vector,
                 .dest_components = num_comps,
                 .ssa_args = {
                         .dest = dest,
                 }
         };

         uint32_t *srcs[4] = {
                 &instr.ssa_args.src0,
                 &instr.ssa_args.src1,
                 &instr.ssa_args.src2,
                 &instr.ssa_args.src3,
         };

         for (unsigned i = 0; i < 4; ++i) {
                 if (i < num_comps)
                         *srcs[i] = comps[i];
                 else
                         *srcs[i] = SSA_INVALID_VALUE;
         }
         emit_mir_instruction(ctx, instr);
 }

 static uint32_t
 emit_extract_vector_element(struct compiler_context *ctx, unsigned ssa_vector, unsigned element)
 {
         uint32_t mir_temp_location = alloc_mir_temp(ctx);
         // This instruction loads a vec3 starting from the initial register
         struct bifrost_instruction instr = {
                 .op = op_extract_element,
                 .dest_components = 1,
                 .ssa_args = {
                         .dest = mir_temp_location,
                         .src0 = ssa_vector,
                         .src1 = SSA_INVALID_VALUE,
                         .src2 = SSA_INVALID_VALUE,
                         .src3 = SSA_INVALID_VALUE,
                 },
                 .literal_args[0] = element,
         };
         emit_mir_instruction(ctx, instr);

         return mir_temp_location;
 }
 static uint32_t
 emit_movi(struct compiler_context *ctx, uint32_t literal)
 {
         uint32_t mir_temp_location = alloc_mir_temp(ctx);
         // This instruction loads a vec3 starting from the initial register
         struct bifrost_instruction instr = {
                 .op = op_movi,
                 .dest_components = 1,
                 .ssa_args = {
                         .dest = mir_temp_location,
                         .src0 = SSA_INVALID_VALUE,
                         .src1 = SSA_INVALID_VALUE,
                         .src2 = SSA_INVALID_VALUE,
                         .src3 = SSA_INVALID_VALUE,
                 },
                 .literal_args[0] = literal,
         };
         emit_mir_instruction(ctx, instr);

         return mir_temp_location;
 }

 static unsigned
 nir_alu_src_index_scalar(compiler_context *ctx, nir_alu_instr *nir_instr, unsigned src)
 {
         // NIR uses a combination of single channels plus swizzles to determine which component is pulled out of a source
         for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
                 if (!nir_alu_instr_channel_used(nir_instr, src, c))
                         continue;
                 // Pull the swizzle from this element that is active and use it as the source
                 unsigned element = nir_instr->src[src].swizzle[c];

                 // Create an op that extracts an element from a vector
                 return emit_extract_vector_element(ctx, nir_alu_src_index(ctx, &nir_instr->src[src]), element);
         }
         assert(0);
         return 0;
 }

 static void
 emit_intrinsic(struct compiler_context *ctx, nir_intrinsic_instr *nir_instr)
 {
         nir_const_value *const_offset;
         unsigned offset, reg;

         switch (nir_instr->intrinsic) {
         case nir_intrinsic_load_ubo: {
                 nir_const_value *location = nir_src_as_const_value(nir_instr->src[0]);
                 const_offset = nir_src_as_const_value(nir_instr->src[1]);
                 assert (location && "no indirect ubo selection");
                 assert (const_offset && "no indirect inputs");

                 enum bifrost_ir_ops op;

                 // load_ubo <UBO binding>, <byte offset>
                 // ld_ubo <byte offset>, <UBO binding>
                 switch (nir_dest_num_components(nir_instr->dest)) {
                 case 1:
                         op = op_ld_ubo_v1;
                         break;
                 case 2:
                         op = op_ld_ubo_v2;
                         break;
                 case 3:
                         op = op_ld_ubo_v3;
                         break;
                 case 4:
                         op = op_ld_ubo_v4;
                         break;
                 default:
                         assert(0);
                         break;
                 }

                 reg = nir_dest_index(ctx, &nir_instr->dest);
                 struct bifrost_instruction instr = {
                         .op = op,
                         .dest_components = nir_dest_num_components(nir_instr->dest),
                         .ssa_args = {
                                 .dest = reg,
                                 .src0 = SSA_INVALID_VALUE,
                                 .src1 = SSA_INVALID_VALUE,
                                 .src2 = SSA_INVALID_VALUE,
                                 .src3 = SSA_INVALID_VALUE,
                         },
                         .literal_args[0] = nir_src_as_uint(nir_instr->src[1]),
                         .literal_args[1] = nir_src_as_uint(nir_instr->src[0]),
                 };

                 emit_mir_instruction(ctx, instr);
                 break;
         }
         case nir_intrinsic_store_ssbo: {
                 nir_const_value *location = nir_src_as_const_value(nir_instr->src[1]);
                 const_offset = nir_src_as_const_value(nir_instr->src[2]);
                 assert (location && "no indirect ubo selection");
                 assert (const_offset && "no indirect inputs");

                 // store_ssbo <Value>, <binding>, <offset>
                 // store_vN <Addr>, <Value>
                 reg = nir_src_index(ctx, &nir_instr->src[0]);

                 enum bifrost_ir_ops op;
                 switch (nir_src_num_components(nir_instr->src[0])) {
                 case 1:
                         op = op_store_v1;
                         break;
                 case 2:
                         op = op_store_v2;
                         break;
                 case 3:
                         op = op_store_v3;
                         break;
                 case 4:
                         op = op_store_v4;
                         break;
                 default:
                         assert(0);
                         break;
                 }

                 struct bifrost_instruction instr = {
                         .op = op,
                         .dest_components = 0,
                         .ssa_args = {
                                 .dest = SSA_INVALID_VALUE,
                                 .src0 = reg,
                                 .src1 = SSA_INVALID_VALUE,
                                 .src2 = SSA_INVALID_VALUE,
                                 .src3 = SSA_INVALID_VALUE,
                         },
                         .literal_args[0] = nir_src_as_uint(nir_instr->src[2]),
                 };
                 emit_mir_instruction(ctx, instr);
                 break;
         }
         case nir_intrinsic_load_uniform:
                 offset = nir_intrinsic_base(nir_instr);

                 if (nir_src_is_const(nir_instr->src[0])) {
                         offset += nir_src_as_uint(nir_instr->src[0]);
                 } else {
                         assert(0 && "Can't handle indirect load_uniform");
                 }

                 reg = nir_dest_index(ctx, &nir_instr->dest);

                 unsigned num_components = nir_dest_num_components(nir_instr->dest);
                 if (num_components == 1) {
                         struct bifrost_instruction instr = {
                                 .op = op_mov,
                                 .dest_components = 1,
                                 .ssa_args = {
                                         .dest = reg,
                                         .src0 = SSA_FIXED_UREGISTER(offset),
                                         .src1 = SSA_INVALID_VALUE,
                                         .src2 = SSA_INVALID_VALUE,
                                         .src3 = SSA_INVALID_VALUE,
                                 },
                         };
                         emit_mir_instruction(ctx, instr);
                 } else {
                         uint32_t comps[4];

                         for (unsigned i = 0; i < nir_dest_num_components(nir_instr->dest); ++i) {
                                 uint32_t temp_dest = alloc_mir_temp(ctx);
                                 comps[i] = temp_dest;
                                 struct bifrost_instruction instr = {
                                         .op = op_mov,
                                         .dest_components = 1,
                                         .ssa_args = {
                                                 .dest = temp_dest,
                                                 .src0 = SSA_FIXED_UREGISTER(offset + (i * 4)),
                                                 .src1 = SSA_INVALID_VALUE,
                                                 .src2 = SSA_INVALID_VALUE,
                                                 .src3 = SSA_INVALID_VALUE,
                                         },
                                 };
                                 emit_mir_instruction(ctx, instr);
                         }

                         emit_create_vector(ctx, reg, num_components, comps);
                 }
                 break;

         case nir_intrinsic_load_input: {
                 const_offset = nir_src_as_const_value(nir_instr->src[0]);
                 assert (const_offset && "no indirect inputs");

                 offset = nir_intrinsic_base(nir_instr) + nir_src_as_uint(nir_instr->src[0]);

                 reg = nir_dest_index(ctx, &nir_instr->dest);

                 enum bifrost_ir_ops op;
                 switch (nir_dest_num_components(nir_instr->dest)) {
                 case 1:
                         op = op_ld_attr_v1;
                         break;
                 case 2:
                         op = op_ld_attr_v2;
                         break;
                 case 3:
                         op = op_ld_attr_v3;
                         break;
                 case 4:
                         op = op_ld_attr_v4;
                         break;
                 default:
                         assert(0);
                         break;
                 }

                 struct bifrost_instruction instr = {
                         .op = op,
                         .dest_components = nir_dest_num_components(nir_instr->dest),
                         .ssa_args = {
                                 .dest = reg,
                                 .src0 = offset,
                                 .src1 = SSA_INVALID_VALUE,
                                 .src2 = SSA_INVALID_VALUE,
                                 .src3 = SSA_INVALID_VALUE,
                         }
                 };

                 emit_mir_instruction(ctx, instr);
                 break;
         }
         case nir_intrinsic_store_output: {
                 const_offset = nir_src_as_const_value(nir_instr->src[1]);
                 assert(const_offset && "no indirect outputs");

                 offset = nir_intrinsic_base(nir_instr);
                 if (ctx->stage == MESA_SHADER_FRAGMENT) {
                         int comp = nir_intrinsic_component(nir_instr);
                         offset += comp;
                         // XXX: Once we support more than colour output then this will need to change
                         void *entry = _mesa_hash_table_u64_search(ctx->outputs_nir_to_bi, offset + FRAG_RESULT_DATA0 + 1);

                         if (!entry) {
                                 printf("WARNING: skipping fragment output\n");
                                 break;
                         }

                         offset = (uintptr_t) (entry) - 1;
                         reg = nir_src_index(ctx, &nir_instr->src[0]);

                         enum bifrost_ir_ops op;
                         switch (nir_src_num_components(nir_instr->src[0])) {
                         case 1:
                                 op = op_store_v1;
                                 break;
                         case 2:
                                 op = op_store_v2;
                                 break;
                         case 3:
                                 op = op_store_v3;
                                 break;
                         case 4:
                                 op = op_store_v4;
                                 break;
                         default:
                                 assert(0);
                                 break;
                         }

                         // XXX: All offsets aren't vec4 aligned. Will need to adjust this in the future
                         // XXX: This needs to offset correctly in to memory so the blend step can pick it up
                         uint32_t movi = emit_movi(ctx, offset * 16);
                         uint32_t movi2 = emit_movi(ctx, 0);

                         uint32_t comps[2] = {
                                 movi, movi2,
                         };
                         uint32_t offset_val = alloc_mir_temp(ctx);
                         emit_create_vector(ctx, offset_val, 2, comps);

                         struct bifrost_instruction instr = {
                                 .op = op,
                                 .dest_components = 0,
                                 .ssa_args = {
                                         .dest = SSA_INVALID_VALUE,
                                         .src0 = offset_val,
                                         .src1 = reg,
                                         .src2 = SSA_INVALID_VALUE,
                                         .src3 = SSA_INVALID_VALUE,
                                 }
                         };
                         emit_mir_instruction(ctx, instr);
                 } else if (ctx->stage == MESA_SHADER_VERTEX) {
                         int comp = nir_intrinsic_component(nir_instr);
                         offset += comp;
                         void *entry = _mesa_hash_table_u64_search(ctx->varying_nir_to_bi, offset + 2);

                         if (!entry) {
                                 printf("WARNING: skipping varying\n");
                                 break;
                         }

                         offset = (uintptr_t) (entry) - 1;

                         reg = nir_src_index(ctx, &nir_instr->src[0]);
                         // LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12
                         // ...
                         // ST_VAR.v4 T1, R12, R13, R14, R4

                         offset = emit_ld_vary_addr_constant(ctx, offset);
                         enum bifrost_ir_ops op;
                         switch (nir_src_num_components(nir_instr->src[0])) {
                         case 1:
                                 op = op_st_vary_v1;
                                 break;
                         case 2:
                                 op = op_st_vary_v2;
                                 break;
                         case 3:
                                 op = op_st_vary_v3;
                                 break;
                         case 4:
                                 op = op_st_vary_v4;
                                 break;
                         default:
                                 assert(0);
                                 break;
                         }

                         struct bifrost_instruction instr = {
                                 .op = op,
                                 .dest_components = 0,
                                 .ssa_args = {
                                         .dest = SSA_INVALID_VALUE,
                                         .src0 = offset,
                                         .src1 = reg,
                                         .src2 = SSA_INVALID_VALUE,
                                         .src3 = SSA_INVALID_VALUE,
                                 }
                         };
                         emit_mir_instruction(ctx, instr);
                 } else {
                         assert(0 && "Unknown store_output stage");
                 }
                 break;
         }
         default:
                 printf ("Unhandled intrinsic %s\n", nir_intrinsic_infos[nir_instr->intrinsic].name);
                 break;
         }
 }

 #define ALU_CASE(arguments, nir, name) \
 	case nir_op_##nir: \
                 argument_count = arguments; \
 		op = op_##name; \
 		break
 #define ALU_CASE_MOD(arguments, nir, name, modifiers) \
 	case nir_op_##nir: \
                 argument_count = arguments; \
 		op = op_##name; \
                 src_modifiers = modifiers; \
 		break

 static void
 emit_alu(struct compiler_context *ctx, nir_alu_instr *nir_instr)
 {
         unsigned dest = nir_dest_index(ctx, &nir_instr->dest.dest);
         unsigned op = ~0U, argument_count;
         unsigned src_modifiers = 0;

         switch (nir_instr->op) {
                 ALU_CASE(2, fmul, fmul_f32);
                 ALU_CASE(2, fadd, fadd_f32);
                 ALU_CASE_MOD(2, fsub, fadd_f32, SOURCE_MODIFIER(1, SRC_MOD_NEG));
                 ALU_CASE(1, ftrunc, trunc);
                 ALU_CASE(1, fceil, ceil);
                 ALU_CASE(1, ffloor, floor);
                 ALU_CASE(1, fround_even, roundeven);
                 ALU_CASE(1, frcp, frcp_fast_f32);
                 ALU_CASE(2, fmax, max_f32);
                 ALU_CASE(2, fmin, min_f32);
                 ALU_CASE(2, iadd, add_i32);
                 ALU_CASE(2, isub, sub_i32);
                 ALU_CASE(2, imul, mul_i32);
                 ALU_CASE(2, iand, and_i32);
                 ALU_CASE(2, ior, or_i32);
                 ALU_CASE(2, ixor, xor_i32);
                 ALU_CASE(2, ishl, lshift_i32);
                 ALU_CASE(2, ushr, rshift_i32);
                 ALU_CASE(2, ishr, arshift_i32);
         case nir_op_ineg: {
                 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
                 printf("ineg 0x%08x\n", src0);
                 struct bifrost_instruction instr = {
                         .op = op_sub_i32,
                         .dest_components = 1,
                         .ssa_args = {
                                 .dest = dest,
                                 .src0 = SSA_FIXED_CONST_0,
                                 .src1 = src0,
                                 .src2 = SSA_INVALID_VALUE,
                                 .src3 = SSA_INVALID_VALUE,
                         },
                 };

                 emit_mir_instruction(ctx, instr);
                 return;

         }
         case nir_op_vec2: {
                 uint32_t comps[3] = {
                         nir_alu_src_index(ctx, &nir_instr->src[0]),
                         nir_alu_src_index(ctx, &nir_instr->src[1]),
                 };
                 emit_create_vector(ctx, dest, 2, comps);
                 return;
                 break;
         }
         case nir_op_vec3: {
                 uint32_t comps[3] = {
                         nir_alu_src_index(ctx, &nir_instr->src[0]),
                         nir_alu_src_index(ctx, &nir_instr->src[1]),
                         nir_alu_src_index(ctx, &nir_instr->src[2]),
                 };
                 emit_create_vector(ctx, dest, 3, comps);
                 return;
                 break;
         }
         case nir_op_vec4: {
                 uint32_t comps[4] = {
                         nir_alu_src_index(ctx, &nir_instr->src[0]),
                         nir_alu_src_index(ctx, &nir_instr->src[1]),
                         nir_alu_src_index(ctx, &nir_instr->src[2]),
                         nir_alu_src_index(ctx, &nir_instr->src[3]),
                 };
                 emit_create_vector(ctx, dest, 4, comps);
                 return;
                 break;
         }
         case nir_op_fdiv: {
                 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
                 unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
                 uint32_t mir_temp_location = alloc_mir_temp(ctx);
                 {
                         struct bifrost_instruction instr = {
                                 .op = op_frcp_fast_f32,
                                 .dest_components = 1,
                                 .ssa_args = {
                                         .dest = mir_temp_location,
                                         .src0 = src1,
                                         .src1 = SSA_INVALID_VALUE,
                                         .src2 = SSA_INVALID_VALUE,
                                         .src3 = SSA_INVALID_VALUE,
                                 },
                         };
                         emit_mir_instruction(ctx, instr);
                 }

                 struct bifrost_instruction instr = {
                         .op = op_fmul_f32,
                         .dest_components = 1,
                         .ssa_args = {
                                 .dest = dest,
                                 .src0 = src0,
                                 .src1 = src1,
                                 .src2 = SSA_INVALID_VALUE,
                                 .src3 = SSA_INVALID_VALUE,
                         },
                         .src_modifiers = src_modifiers,
                 };

                 emit_mir_instruction(ctx, instr);
                 return;
                 break;
         }
         case nir_op_umin:
         case nir_op_imin:
         case nir_op_umax:
         case nir_op_imax: {
                 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
                 unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
                 struct bifrost_instruction instr = {
                         .op = op_csel_i32,
                         .dest_components = 1,
                         .ssa_args = {
                                 .dest = dest,
                                 .src0 = src0,
                                 .src1 = src1,
                                 .src2 = src0,
                                 .src3 = src1,
                         },
                         .src_modifiers = src_modifiers,
                         .literal_args[0] = 0, /* XXX: Comparison operator */
                 };

                 emit_mir_instruction(ctx, instr);
                 return;
                 break;
         }
         case nir_op_umin3:
         case nir_op_imin3:
         case nir_op_umax3:
         case nir_op_imax3: {
                 unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
                 unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
                 unsigned src2 = nir_alu_src_index_scalar(ctx, nir_instr, 2);

                 unsigned op = 0;
                 if (nir_instr->op == nir_op_umin3)
                         op = op_umin3_i32;
                 else if (nir_instr->op == nir_op_imin3)
                         op = op_imin3_i32;
                 else if (nir_instr->op == nir_op_umax3)
                         op = op_umax3_i32;
                 else if (nir_instr->op == nir_op_imax3)
                         op = op_imax3_i32;
                 struct bifrost_instruction instr = {
                         .op = op,
                         .dest_components = 1,
                         .ssa_args = {
                                 .dest = dest,
                                 .src0 = src0,
                                 .src1 = src1,
                                 .src2 = src2,
                                 .src3 = SSA_INVALID_VALUE,
                         },
                         .src_modifiers = src_modifiers,
                 };

                 emit_mir_instruction(ctx, instr);

                 return;
                 break;
         }
         case nir_op_ine: {
                 uint32_t movi = emit_movi(ctx, ~0U);
                 unsigned src0 = nir_alu_src_index(ctx, &nir_instr->src[0]);
                 unsigned src1 = nir_alu_src_index(ctx, &nir_instr->src[1]);
                 struct bifrost_instruction instr = {
                         .op = op_csel_i32,
                         .dest_components = 1,
                         .ssa_args = {
                                 .dest = dest,
                                 .src0 = src0,
                                 .src1 = src1,
                                 .src2 = movi,
                                 .src3 = SSA_FIXED_CONST_0,
                         },
                         .src_modifiers = src_modifiers,
                         .literal_args[0] = CSEL_IEQ, /* XXX: Comparison operator */
                 };

                 emit_mir_instruction(ctx, instr);
                 return;
                 break;
         }
         default:
                 printf("Unhandled ALU op %s\n", nir_op_infos[nir_instr->op].name);
                 return;
         }

         unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
         unsigned src1 = argument_count >= 2 ? nir_alu_src_index_scalar(ctx, nir_instr, 1) : SSA_INVALID_VALUE;
         unsigned src2 = argument_count >= 3 ? nir_alu_src_index_scalar(ctx, nir_instr, 2) : SSA_INVALID_VALUE;
         unsigned src3 = argument_count >= 4 ? nir_alu_src_index_scalar(ctx, nir_instr, 3) : SSA_INVALID_VALUE;

         struct bifrost_instruction instr = {
                 .op = op,
                 .dest_components = 1,
                 .ssa_args = {
                         .dest = dest,
                         .src0 = src0,
                         .src1 = src1,
                         .src2 = src2,
                         .src3 = src3,
                 },
                 .src_modifiers = src_modifiers,
         };

         emit_mir_instruction(ctx, instr);
 }

 static void
 emit_instr(struct compiler_context *ctx, struct nir_instr *instr)
 {
         switch (instr->type) {
         case nir_instr_type_load_const:
                 emit_load_const(ctx, nir_instr_as_load_const(instr));
                 break;
         case nir_instr_type_intrinsic:
                 emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
                 break;
         case nir_instr_type_alu:
                 emit_alu(ctx, nir_instr_as_alu(instr));
                 break;
         case nir_instr_type_tex:
                 printf("Unhandled NIR inst tex\n");
                 break;
         case nir_instr_type_jump:
                 printf("Unhandled NIR inst jump\n");
                 break;
         case nir_instr_type_ssa_undef:
                 printf("Unhandled NIR inst ssa_undef\n");
                 break;
         default:
                 printf("Unhandled instruction type\n");
                 break;
         }

 }

 static bifrost_block *
 emit_block(struct compiler_context *ctx, nir_block *block)
 {
         bifrost_block *this_block = calloc(sizeof(bifrost_block), 1);
         list_addtail(&this_block->link, &ctx->blocks);

         ++ctx->block_count;

         /* Add this block to be a successor to the previous block */
         if (ctx->current_block)
                 bifrost_block_add_successor(ctx->current_block, this_block);

         /* Set up current block */
         list_inithead(&this_block->instructions);
         ctx->current_block = this_block;

         nir_foreach_instr(instr, block) {
                 emit_instr(ctx, instr);
                 ++ctx->instruction_count;
         }

 #ifdef BI_DEBUG
         print_mir_block(this_block, false);
 #endif
         return this_block;
 }

 void
 emit_if(struct compiler_context *ctx, nir_if *nir_inst);

 static struct bifrost_block *
 emit_cf_list(struct compiler_context *ctx, struct exec_list *list)
 {
         struct bifrost_block *start_block = NULL;
         foreach_list_typed(nir_cf_node, node, node, list) {
                 switch (node->type) {
                 case nir_cf_node_block: {
                         bifrost_block *block = emit_block(ctx, nir_cf_node_as_block(node));

                         if (!start_block)
                                 start_block = block;

                         break;
                 }

                 case nir_cf_node_if:
                         emit_if(ctx, nir_cf_node_as_if(node));
                         break;

                 default:
                 case nir_cf_node_loop:
                 case nir_cf_node_function:
                         assert(0);
                         break;
                 }
         }

         return start_block;
 }

 void
 emit_if(struct compiler_context *ctx, nir_if *nir_inst)
 {

         // XXX: Conditional branch instruction can do a variety of comparisons with the sources
         // Merge the source instruction `ine` with our conditional branch
         {
                 uint32_t movi = emit_movi(ctx, ~0U);
                 struct bifrost_instruction instr = {
                         .op = op_branch,
                         .dest_components = 0,
                         .ssa_args = {
                                 .dest = SSA_INVALID_VALUE,
                                 .src0 = nir_src_index(ctx, &nir_inst->condition),
                                 .src1 = movi,
                                 .src2 = SSA_INVALID_VALUE,
                                 .src3 = SSA_INVALID_VALUE,
                         },
                         .src_modifiers = 0,
                         .literal_args[0] = BR_COND_EQ, /* XXX: Comparison Arg type */
                         .literal_args[1] = 0, /* XXX: Branch target */
                 };

                 emit_mir_instruction(ctx, instr);
         }

         bifrost_instruction *true_branch = mir_last_instr_in_block(ctx->current_block);

         bifrost_block *true_block = emit_cf_list(ctx, &nir_inst->then_list);

         {
                 struct bifrost_instruction instr = {
                         .op = op_branch,
                         .dest_components = 0,
                         .ssa_args = {
                                 .dest = SSA_INVALID_VALUE,
                                 .src0 = SSA_INVALID_VALUE,
                                 .src1 = SSA_INVALID_VALUE,
                                 .src2 = SSA_INVALID_VALUE,
                                 .src3 = SSA_INVALID_VALUE,
                         },
                         .src_modifiers = 0,
                         .literal_args[0] = BR_ALWAYS, /* XXX: ALWAYS */
                         .literal_args[1] = 0, /* XXX: Branch target */
                 };

                 emit_mir_instruction(ctx, instr);
         }
         bifrost_instruction *true_exit_branch = mir_last_instr_in_block(ctx->current_block);

         unsigned false_idx = ctx->block_count;
         unsigned inst_count = ctx->instruction_count;

         bifrost_block *false_block = emit_cf_list(ctx, &nir_inst->else_list);

         unsigned if_footer_idx = ctx->block_count;
         assert(true_block);
         assert(false_block);


         if (ctx->instruction_count == inst_count) {
                 // If the else branch didn't have anything in it then we can remove the dead jump
                 mir_remove_instr(true_exit_branch);
         } else {
                 true_exit_branch->literal_args[1] = if_footer_idx;
         }

         true_branch->literal_args[1] = false_idx;
 }

 int
 bifrost_compile_shader_nir(nir_shader *nir, struct bifrost_program *program)
 {
         struct compiler_context ictx = {
                 .nir = nir,
                 .stage = nir->info.stage,
         };

         struct compiler_context *ctx = &ictx;

         ctx->mir_temp = 0;

         /* Initialize at a global (not block) level hash tables */
         ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
         ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);

         /* Assign actual uniform location, skipping over samplers */
         ctx->uniform_nir_to_bi  = _mesa_hash_table_u64_create(NULL);

         nir_foreach_variable(var, &nir->uniforms) {
                 if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue;

                 for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) {
                         int id = ctx->uniform_count++;
                         _mesa_hash_table_u64_insert(ctx->uniform_nir_to_bi, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1)));
                 }
         }

         if (ctx->stage == MESA_SHADER_VERTEX) {
                 ctx->varying_nir_to_bi = _mesa_hash_table_u64_create(NULL);
                 nir_foreach_variable(var, &nir->outputs) {
                         if (var->data.location < VARYING_SLOT_VAR0) {
                                 if (var->data.location == VARYING_SLOT_POS)
                                         ctx->varying_count++;
                                 _mesa_hash_table_u64_insert(ctx->varying_nir_to_bi, var->data.driver_location + 1, (void *) ((uintptr_t) (1)));

                                 continue;
                         }

                         for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) {
                                 for (int comp = 0; comp < 4; ++comp) {
                                         int id = comp + ctx->varying_count++;
                                         _mesa_hash_table_u64_insert(ctx->varying_nir_to_bi, var->data.driver_location + col + comp + 1, (void *) ((uintptr_t) (id + 1)));
                                 }
                         }
                 }

         } else if (ctx->stage == MESA_SHADER_FRAGMENT) {
                 ctx->outputs_nir_to_bi = _mesa_hash_table_u64_create(NULL);
                 nir_foreach_variable(var, &nir->outputs) {
                         if (var->data.location >= FRAG_RESULT_DATA0 && var->data.location <= FRAG_RESULT_DATA7) {
                                 int id = ctx->outputs_count++;
                                 printf("Driver location: %d with id %d\n", var->data.location + 1, id);
                                 _mesa_hash_table_u64_insert(ctx->outputs_nir_to_bi, var->data.location + 1, (void *) ((uintptr_t) (id + 1)));
                         }
                 }
         }

         /* Optimisation passes */
         optimize_nir(nir);

 #ifdef BI_DEBUG
         nir_print_shader(nir, stdout);
 #endif

         /* Generate machine IR for shader */
         nir_foreach_function(func, nir) {
                 nir_builder _b;
                 ctx->b = &_b;
                 nir_builder_init(ctx->b, func->impl);

                 list_inithead(&ctx->blocks);
                 ctx->block_count = 0;
                 ctx->func = func;

                 emit_cf_list(ctx, &func->impl->body);

                 break; // XXX: Once we support multi function shaders then implement
         }

         util_dynarray_init(&program->compiled, NULL);

         // MIR pre-RA optimizations

         bool progress = false;

         do {
                 progress = false;
                 mir_foreach_block(ctx, block) {
                         // XXX: Not yet working
 //                        progress |= bifrost_opt_branch_fusion(ctx, block);
                 }
         } while (progress);

         schedule_program(ctx);

 #ifdef BI_DEBUG
         nir_print_shader(nir, stdout);
         disassemble_bifrost(program->compiled.data, program->compiled.size, false);
 #endif
         return 0;
 }