blob: 061eab11a9d89cafd9a389206c1ebd112a3df689 [file] [log] [blame]
/*
* Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "compiler/nir/nir_builder.h"
#include "bifrost_compile.h"
#include "bifrost_opts.h"
#include "bifrost_sched.h"
#include "compiler_defines.h"
#include "disassemble.h"
#include "bifrost_print.h"
#define BI_DEBUG
static int
glsl_type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
static void
optimize_nir(nir_shader *nir)
{
bool progress;
NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
do {
progress = false;
NIR_PASS(progress, nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
NIR_PASS(progress, nir, nir_lower_var_copies);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL);
NIR_PASS(progress, nir, nir_opt_if, true);
} while (progress);
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_dce);
}
static unsigned
nir_src_index(compiler_context *ctx, nir_src *src)
{
if (src->is_ssa)
return src->ssa->index;
else
return ctx->func->impl->ssa_alloc + src->reg.reg->index;
}
static unsigned
nir_dest_index(compiler_context *ctx, nir_dest *dst)
{
if (dst->is_ssa)
return dst->ssa.index;
else
return ctx->func->impl->ssa_alloc + dst->reg.reg->index;
}
static unsigned
nir_alu_src_index(compiler_context *ctx, nir_alu_src *src)
{
return nir_src_index(ctx, &src->src);
}
struct bifrost_instruction *
mir_alloc_ins(struct bifrost_instruction instr)
{
struct bifrost_instruction *heap_ins = malloc(sizeof(instr));
memcpy(heap_ins, &instr, sizeof(instr));
return heap_ins;
}
static void
emit_mir_instruction(struct compiler_context *ctx, struct bifrost_instruction instr)
{
list_addtail(&(mir_alloc_ins(instr))->link, &ctx->current_block->instructions);
}
static void
bifrost_block_add_successor(bifrost_block *block, bifrost_block *successor)
{
assert(block->num_successors < ARRAY_SIZE(block->successors));
block->successors[block->num_successors++] = successor;
}
static void
emit_load_const(struct compiler_context *ctx, nir_load_const_instr *instr)
{
nir_ssa_def def = instr->def;
float *v = ralloc_array(NULL, float, 1);
nir_const_load_to_arr(v, instr, f32);
_mesa_hash_table_u64_insert(ctx->ssa_constants, def.index + 1, v);
}
static uint32_t
alloc_mir_temp(struct compiler_context *ctx)
{
return SSA_TEMP_VALUE(ctx->mir_temp++);
}
static uint32_t
emit_ld_vary_addr_constant(struct compiler_context *ctx, uint32_t location)
{
// LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12
// ...
// ST_VAR.v4 T1, R12, R13, R14, R4
// R61-R62 is filled with information needed for varying interpolation
// This loads a vec3 with the information that ST_VAR needs to work
uint32_t mir_temp_location = alloc_mir_temp(ctx);
// This instruction loads a vec3 starting from the initial register
struct bifrost_instruction instr = {
.op = op_ld_var_addr,
.dest_components = 3,
.ssa_args = {
.dest = mir_temp_location,
.src0 = SSA_FIXED_REGISTER(61),
.src1 = SSA_FIXED_REGISTER(62),
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
.literal_args[0] = location,
};
emit_mir_instruction(ctx, instr);
return mir_temp_location;
}
// XXX: Doesn't support duplicated values in the components!
// RA WILL fail!
static void
emit_create_vector(struct compiler_context *ctx, unsigned dest, unsigned num_comps, uint32_t *comps)
{
assert(num_comps <= 4 && "Can't make a vector larger than 4 components");
// This instruction loads a vec3 starting from the initial register
struct bifrost_instruction instr = {
.op = op_create_vector,
.dest_components = num_comps,
.ssa_args = {
.dest = dest,
}
};
uint32_t *srcs[4] = {
&instr.ssa_args.src0,
&instr.ssa_args.src1,
&instr.ssa_args.src2,
&instr.ssa_args.src3,
};
for (unsigned i = 0; i < 4; ++i) {
if (i < num_comps)
*srcs[i] = comps[i];
else
*srcs[i] = SSA_INVALID_VALUE;
}
emit_mir_instruction(ctx, instr);
}
static uint32_t
emit_extract_vector_element(struct compiler_context *ctx, unsigned ssa_vector, unsigned element)
{
uint32_t mir_temp_location = alloc_mir_temp(ctx);
// This instruction loads a vec3 starting from the initial register
struct bifrost_instruction instr = {
.op = op_extract_element,
.dest_components = 1,
.ssa_args = {
.dest = mir_temp_location,
.src0 = ssa_vector,
.src1 = SSA_INVALID_VALUE,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
.literal_args[0] = element,
};
emit_mir_instruction(ctx, instr);
return mir_temp_location;
}
static uint32_t
emit_movi(struct compiler_context *ctx, uint32_t literal)
{
uint32_t mir_temp_location = alloc_mir_temp(ctx);
// This instruction loads a vec3 starting from the initial register
struct bifrost_instruction instr = {
.op = op_movi,
.dest_components = 1,
.ssa_args = {
.dest = mir_temp_location,
.src0 = SSA_INVALID_VALUE,
.src1 = SSA_INVALID_VALUE,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
.literal_args[0] = literal,
};
emit_mir_instruction(ctx, instr);
return mir_temp_location;
}
static unsigned
nir_alu_src_index_scalar(compiler_context *ctx, nir_alu_instr *nir_instr, unsigned src)
{
// NIR uses a combination of single channels plus swizzles to determine which component is pulled out of a source
for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
if (!nir_alu_instr_channel_used(nir_instr, src, c))
continue;
// Pull the swizzle from this element that is active and use it as the source
unsigned element = nir_instr->src[src].swizzle[c];
// Create an op that extracts an element from a vector
return emit_extract_vector_element(ctx, nir_alu_src_index(ctx, &nir_instr->src[src]), element);
}
assert(0);
return 0;
}
static void
emit_intrinsic(struct compiler_context *ctx, nir_intrinsic_instr *nir_instr)
{
nir_const_value *const_offset;
unsigned offset, reg;
switch (nir_instr->intrinsic) {
case nir_intrinsic_load_ubo: {
nir_const_value *location = nir_src_as_const_value(nir_instr->src[0]);
const_offset = nir_src_as_const_value(nir_instr->src[1]);
assert (location && "no indirect ubo selection");
assert (const_offset && "no indirect inputs");
enum bifrost_ir_ops op;
// load_ubo <UBO binding>, <byte offset>
// ld_ubo <byte offset>, <UBO binding>
switch (nir_dest_num_components(nir_instr->dest)) {
case 1:
op = op_ld_ubo_v1;
break;
case 2:
op = op_ld_ubo_v2;
break;
case 3:
op = op_ld_ubo_v3;
break;
case 4:
op = op_ld_ubo_v4;
break;
default:
assert(0);
break;
}
reg = nir_dest_index(ctx, &nir_instr->dest);
struct bifrost_instruction instr = {
.op = op,
.dest_components = nir_dest_num_components(nir_instr->dest),
.ssa_args = {
.dest = reg,
.src0 = SSA_INVALID_VALUE,
.src1 = SSA_INVALID_VALUE,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
.literal_args[0] = nir_src_as_uint(nir_instr->src[1]),
.literal_args[1] = nir_src_as_uint(nir_instr->src[0]),
};
emit_mir_instruction(ctx, instr);
break;
}
case nir_intrinsic_store_ssbo: {
nir_const_value *location = nir_src_as_const_value(nir_instr->src[1]);
const_offset = nir_src_as_const_value(nir_instr->src[2]);
assert (location && "no indirect ubo selection");
assert (const_offset && "no indirect inputs");
// store_ssbo <Value>, <binding>, <offset>
// store_vN <Addr>, <Value>
reg = nir_src_index(ctx, &nir_instr->src[0]);
enum bifrost_ir_ops op;
switch (nir_src_num_components(nir_instr->src[0])) {
case 1:
op = op_store_v1;
break;
case 2:
op = op_store_v2;
break;
case 3:
op = op_store_v3;
break;
case 4:
op = op_store_v4;
break;
default:
assert(0);
break;
}
struct bifrost_instruction instr = {
.op = op,
.dest_components = 0,
.ssa_args = {
.dest = SSA_INVALID_VALUE,
.src0 = reg,
.src1 = SSA_INVALID_VALUE,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
.literal_args[0] = nir_src_as_uint(nir_instr->src[2]),
};
emit_mir_instruction(ctx, instr);
break;
}
case nir_intrinsic_load_uniform:
offset = nir_intrinsic_base(nir_instr);
if (nir_src_is_const(nir_instr->src[0])) {
offset += nir_src_as_uint(nir_instr->src[0]);
} else {
assert(0 && "Can't handle indirect load_uniform");
}
reg = nir_dest_index(ctx, &nir_instr->dest);
unsigned num_components = nir_dest_num_components(nir_instr->dest);
if (num_components == 1) {
struct bifrost_instruction instr = {
.op = op_mov,
.dest_components = 1,
.ssa_args = {
.dest = reg,
.src0 = SSA_FIXED_UREGISTER(offset),
.src1 = SSA_INVALID_VALUE,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
};
emit_mir_instruction(ctx, instr);
} else {
uint32_t comps[4];
for (unsigned i = 0; i < nir_dest_num_components(nir_instr->dest); ++i) {
uint32_t temp_dest = alloc_mir_temp(ctx);
comps[i] = temp_dest;
struct bifrost_instruction instr = {
.op = op_mov,
.dest_components = 1,
.ssa_args = {
.dest = temp_dest,
.src0 = SSA_FIXED_UREGISTER(offset + (i * 4)),
.src1 = SSA_INVALID_VALUE,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
};
emit_mir_instruction(ctx, instr);
}
emit_create_vector(ctx, reg, num_components, comps);
}
break;
case nir_intrinsic_load_input: {
const_offset = nir_src_as_const_value(nir_instr->src[0]);
assert (const_offset && "no indirect inputs");
offset = nir_intrinsic_base(nir_instr) + nir_src_as_uint(nir_instr->src[0]);
reg = nir_dest_index(ctx, &nir_instr->dest);
enum bifrost_ir_ops op;
switch (nir_dest_num_components(nir_instr->dest)) {
case 1:
op = op_ld_attr_v1;
break;
case 2:
op = op_ld_attr_v2;
break;
case 3:
op = op_ld_attr_v3;
break;
case 4:
op = op_ld_attr_v4;
break;
default:
assert(0);
break;
}
struct bifrost_instruction instr = {
.op = op,
.dest_components = nir_dest_num_components(nir_instr->dest),
.ssa_args = {
.dest = reg,
.src0 = offset,
.src1 = SSA_INVALID_VALUE,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
}
};
emit_mir_instruction(ctx, instr);
break;
}
case nir_intrinsic_store_output: {
const_offset = nir_src_as_const_value(nir_instr->src[1]);
assert(const_offset && "no indirect outputs");
offset = nir_intrinsic_base(nir_instr);
if (ctx->stage == MESA_SHADER_FRAGMENT) {
int comp = nir_intrinsic_component(nir_instr);
offset += comp;
// XXX: Once we support more than colour output then this will need to change
void *entry = _mesa_hash_table_u64_search(ctx->outputs_nir_to_bi, offset + FRAG_RESULT_DATA0 + 1);
if (!entry) {
printf("WARNING: skipping fragment output\n");
break;
}
offset = (uintptr_t) (entry) - 1;
reg = nir_src_index(ctx, &nir_instr->src[0]);
enum bifrost_ir_ops op;
switch (nir_src_num_components(nir_instr->src[0])) {
case 1:
op = op_store_v1;
break;
case 2:
op = op_store_v2;
break;
case 3:
op = op_store_v3;
break;
case 4:
op = op_store_v4;
break;
default:
assert(0);
break;
}
// XXX: All offsets aren't vec4 aligned. Will need to adjust this in the future
// XXX: This needs to offset correctly in to memory so the blend step can pick it up
uint32_t movi = emit_movi(ctx, offset * 16);
uint32_t movi2 = emit_movi(ctx, 0);
uint32_t comps[2] = {
movi, movi2,
};
uint32_t offset_val = alloc_mir_temp(ctx);
emit_create_vector(ctx, offset_val, 2, comps);
struct bifrost_instruction instr = {
.op = op,
.dest_components = 0,
.ssa_args = {
.dest = SSA_INVALID_VALUE,
.src0 = offset_val,
.src1 = reg,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
}
};
emit_mir_instruction(ctx, instr);
} else if (ctx->stage == MESA_SHADER_VERTEX) {
int comp = nir_intrinsic_component(nir_instr);
offset += comp;
void *entry = _mesa_hash_table_u64_search(ctx->varying_nir_to_bi, offset + 2);
if (!entry) {
printf("WARNING: skipping varying\n");
break;
}
offset = (uintptr_t) (entry) - 1;
reg = nir_src_index(ctx, &nir_instr->src[0]);
// LD_VAR_ADDR.f32 {R0, T1}, R61, R62, location:1, R12
// ...
// ST_VAR.v4 T1, R12, R13, R14, R4
offset = emit_ld_vary_addr_constant(ctx, offset);
enum bifrost_ir_ops op;
switch (nir_src_num_components(nir_instr->src[0])) {
case 1:
op = op_st_vary_v1;
break;
case 2:
op = op_st_vary_v2;
break;
case 3:
op = op_st_vary_v3;
break;
case 4:
op = op_st_vary_v4;
break;
default:
assert(0);
break;
}
struct bifrost_instruction instr = {
.op = op,
.dest_components = 0,
.ssa_args = {
.dest = SSA_INVALID_VALUE,
.src0 = offset,
.src1 = reg,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
}
};
emit_mir_instruction(ctx, instr);
} else {
assert(0 && "Unknown store_output stage");
}
break;
}
default:
printf ("Unhandled intrinsic %s\n", nir_intrinsic_infos[nir_instr->intrinsic].name);
break;
}
}
#define ALU_CASE(arguments, nir, name) \
case nir_op_##nir: \
argument_count = arguments; \
op = op_##name; \
break
#define ALU_CASE_MOD(arguments, nir, name, modifiers) \
case nir_op_##nir: \
argument_count = arguments; \
op = op_##name; \
src_modifiers = modifiers; \
break
static void
emit_alu(struct compiler_context *ctx, nir_alu_instr *nir_instr)
{
unsigned dest = nir_dest_index(ctx, &nir_instr->dest.dest);
unsigned op = ~0U, argument_count;
unsigned src_modifiers = 0;
switch (nir_instr->op) {
ALU_CASE(2, fmul, fmul_f32);
ALU_CASE(2, fadd, fadd_f32);
ALU_CASE_MOD(2, fsub, fadd_f32, SOURCE_MODIFIER(1, SRC_MOD_NEG));
ALU_CASE(1, ftrunc, trunc);
ALU_CASE(1, fceil, ceil);
ALU_CASE(1, ffloor, floor);
ALU_CASE(1, fround_even, roundeven);
ALU_CASE(1, frcp, frcp_fast_f32);
ALU_CASE(2, fmax, max_f32);
ALU_CASE(2, fmin, min_f32);
ALU_CASE(2, iadd, add_i32);
ALU_CASE(2, isub, sub_i32);
ALU_CASE(2, imul, mul_i32);
ALU_CASE(2, iand, and_i32);
ALU_CASE(2, ior, or_i32);
ALU_CASE(2, ixor, xor_i32);
ALU_CASE(2, ishl, lshift_i32);
ALU_CASE(2, ushr, rshift_i32);
ALU_CASE(2, ishr, arshift_i32);
case nir_op_ineg: {
unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
printf("ineg 0x%08x\n", src0);
struct bifrost_instruction instr = {
.op = op_sub_i32,
.dest_components = 1,
.ssa_args = {
.dest = dest,
.src0 = SSA_FIXED_CONST_0,
.src1 = src0,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
};
emit_mir_instruction(ctx, instr);
return;
}
case nir_op_vec2: {
uint32_t comps[3] = {
nir_alu_src_index(ctx, &nir_instr->src[0]),
nir_alu_src_index(ctx, &nir_instr->src[1]),
};
emit_create_vector(ctx, dest, 2, comps);
return;
break;
}
case nir_op_vec3: {
uint32_t comps[3] = {
nir_alu_src_index(ctx, &nir_instr->src[0]),
nir_alu_src_index(ctx, &nir_instr->src[1]),
nir_alu_src_index(ctx, &nir_instr->src[2]),
};
emit_create_vector(ctx, dest, 3, comps);
return;
break;
}
case nir_op_vec4: {
uint32_t comps[4] = {
nir_alu_src_index(ctx, &nir_instr->src[0]),
nir_alu_src_index(ctx, &nir_instr->src[1]),
nir_alu_src_index(ctx, &nir_instr->src[2]),
nir_alu_src_index(ctx, &nir_instr->src[3]),
};
emit_create_vector(ctx, dest, 4, comps);
return;
break;
}
case nir_op_fdiv: {
unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
uint32_t mir_temp_location = alloc_mir_temp(ctx);
{
struct bifrost_instruction instr = {
.op = op_frcp_fast_f32,
.dest_components = 1,
.ssa_args = {
.dest = mir_temp_location,
.src0 = src1,
.src1 = SSA_INVALID_VALUE,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
};
emit_mir_instruction(ctx, instr);
}
struct bifrost_instruction instr = {
.op = op_fmul_f32,
.dest_components = 1,
.ssa_args = {
.dest = dest,
.src0 = src0,
.src1 = src1,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
.src_modifiers = src_modifiers,
};
emit_mir_instruction(ctx, instr);
return;
break;
}
case nir_op_umin:
case nir_op_imin:
case nir_op_umax:
case nir_op_imax: {
unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
struct bifrost_instruction instr = {
.op = op_csel_i32,
.dest_components = 1,
.ssa_args = {
.dest = dest,
.src0 = src0,
.src1 = src1,
.src2 = src0,
.src3 = src1,
},
.src_modifiers = src_modifiers,
.literal_args[0] = 0, /* XXX: Comparison operator */
};
emit_mir_instruction(ctx, instr);
return;
break;
}
case nir_op_umin3:
case nir_op_imin3:
case nir_op_umax3:
case nir_op_imax3: {
unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
unsigned src1 = nir_alu_src_index_scalar(ctx, nir_instr, 1);
unsigned src2 = nir_alu_src_index_scalar(ctx, nir_instr, 2);
unsigned op = 0;
if (nir_instr->op == nir_op_umin3)
op = op_umin3_i32;
else if (nir_instr->op == nir_op_imin3)
op = op_imin3_i32;
else if (nir_instr->op == nir_op_umax3)
op = op_umax3_i32;
else if (nir_instr->op == nir_op_imax3)
op = op_imax3_i32;
struct bifrost_instruction instr = {
.op = op,
.dest_components = 1,
.ssa_args = {
.dest = dest,
.src0 = src0,
.src1 = src1,
.src2 = src2,
.src3 = SSA_INVALID_VALUE,
},
.src_modifiers = src_modifiers,
};
emit_mir_instruction(ctx, instr);
return;
break;
}
case nir_op_ine: {
uint32_t movi = emit_movi(ctx, ~0U);
unsigned src0 = nir_alu_src_index(ctx, &nir_instr->src[0]);
unsigned src1 = nir_alu_src_index(ctx, &nir_instr->src[1]);
struct bifrost_instruction instr = {
.op = op_csel_i32,
.dest_components = 1,
.ssa_args = {
.dest = dest,
.src0 = src0,
.src1 = src1,
.src2 = movi,
.src3 = SSA_FIXED_CONST_0,
},
.src_modifiers = src_modifiers,
.literal_args[0] = CSEL_IEQ, /* XXX: Comparison operator */
};
emit_mir_instruction(ctx, instr);
return;
break;
}
default:
printf("Unhandled ALU op %s\n", nir_op_infos[nir_instr->op].name);
return;
}
unsigned src0 = nir_alu_src_index_scalar(ctx, nir_instr, 0);
unsigned src1 = argument_count >= 2 ? nir_alu_src_index_scalar(ctx, nir_instr, 1) : SSA_INVALID_VALUE;
unsigned src2 = argument_count >= 3 ? nir_alu_src_index_scalar(ctx, nir_instr, 2) : SSA_INVALID_VALUE;
unsigned src3 = argument_count >= 4 ? nir_alu_src_index_scalar(ctx, nir_instr, 3) : SSA_INVALID_VALUE;
struct bifrost_instruction instr = {
.op = op,
.dest_components = 1,
.ssa_args = {
.dest = dest,
.src0 = src0,
.src1 = src1,
.src2 = src2,
.src3 = src3,
},
.src_modifiers = src_modifiers,
};
emit_mir_instruction(ctx, instr);
}
static void
emit_instr(struct compiler_context *ctx, struct nir_instr *instr)
{
switch (instr->type) {
case nir_instr_type_load_const:
emit_load_const(ctx, nir_instr_as_load_const(instr));
break;
case nir_instr_type_intrinsic:
emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
break;
case nir_instr_type_alu:
emit_alu(ctx, nir_instr_as_alu(instr));
break;
case nir_instr_type_tex:
printf("Unhandled NIR inst tex\n");
break;
case nir_instr_type_jump:
printf("Unhandled NIR inst jump\n");
break;
case nir_instr_type_ssa_undef:
printf("Unhandled NIR inst ssa_undef\n");
break;
default:
printf("Unhandled instruction type\n");
break;
}
}
static bifrost_block *
emit_block(struct compiler_context *ctx, nir_block *block)
{
bifrost_block *this_block = calloc(sizeof(bifrost_block), 1);
list_addtail(&this_block->link, &ctx->blocks);
++ctx->block_count;
/* Add this block to be a successor to the previous block */
if (ctx->current_block)
bifrost_block_add_successor(ctx->current_block, this_block);
/* Set up current block */
list_inithead(&this_block->instructions);
ctx->current_block = this_block;
nir_foreach_instr(instr, block) {
emit_instr(ctx, instr);
++ctx->instruction_count;
}
#ifdef BI_DEBUG
print_mir_block(this_block, false);
#endif
return this_block;
}
void
emit_if(struct compiler_context *ctx, nir_if *nir_inst);
static struct bifrost_block *
emit_cf_list(struct compiler_context *ctx, struct exec_list *list)
{
struct bifrost_block *start_block = NULL;
foreach_list_typed(nir_cf_node, node, node, list) {
switch (node->type) {
case nir_cf_node_block: {
bifrost_block *block = emit_block(ctx, nir_cf_node_as_block(node));
if (!start_block)
start_block = block;
break;
}
case nir_cf_node_if:
emit_if(ctx, nir_cf_node_as_if(node));
break;
default:
case nir_cf_node_loop:
case nir_cf_node_function:
assert(0);
break;
}
}
return start_block;
}
void
emit_if(struct compiler_context *ctx, nir_if *nir_inst)
{
// XXX: Conditional branch instruction can do a variety of comparisons with the sources
// Merge the source instruction `ine` with our conditional branch
{
uint32_t movi = emit_movi(ctx, ~0U);
struct bifrost_instruction instr = {
.op = op_branch,
.dest_components = 0,
.ssa_args = {
.dest = SSA_INVALID_VALUE,
.src0 = nir_src_index(ctx, &nir_inst->condition),
.src1 = movi,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
.src_modifiers = 0,
.literal_args[0] = BR_COND_EQ, /* XXX: Comparison Arg type */
.literal_args[1] = 0, /* XXX: Branch target */
};
emit_mir_instruction(ctx, instr);
}
bifrost_instruction *true_branch = mir_last_instr_in_block(ctx->current_block);
bifrost_block *true_block = emit_cf_list(ctx, &nir_inst->then_list);
{
struct bifrost_instruction instr = {
.op = op_branch,
.dest_components = 0,
.ssa_args = {
.dest = SSA_INVALID_VALUE,
.src0 = SSA_INVALID_VALUE,
.src1 = SSA_INVALID_VALUE,
.src2 = SSA_INVALID_VALUE,
.src3 = SSA_INVALID_VALUE,
},
.src_modifiers = 0,
.literal_args[0] = BR_ALWAYS, /* XXX: ALWAYS */
.literal_args[1] = 0, /* XXX: Branch target */
};
emit_mir_instruction(ctx, instr);
}
bifrost_instruction *true_exit_branch = mir_last_instr_in_block(ctx->current_block);
unsigned false_idx = ctx->block_count;
unsigned inst_count = ctx->instruction_count;
bifrost_block *false_block = emit_cf_list(ctx, &nir_inst->else_list);
unsigned if_footer_idx = ctx->block_count;
assert(true_block);
assert(false_block);
if (ctx->instruction_count == inst_count) {
// If the else branch didn't have anything in it then we can remove the dead jump
mir_remove_instr(true_exit_branch);
} else {
true_exit_branch->literal_args[1] = if_footer_idx;
}
true_branch->literal_args[1] = false_idx;
}
int
bifrost_compile_shader_nir(nir_shader *nir, struct bifrost_program *program)
{
struct compiler_context ictx = {
.nir = nir,
.stage = nir->info.stage,
};
struct compiler_context *ctx = &ictx;
ctx->mir_temp = 0;
/* Initialize at a global (not block) level hash tables */
ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
/* Assign actual uniform location, skipping over samplers */
ctx->uniform_nir_to_bi = _mesa_hash_table_u64_create(NULL);
nir_foreach_variable(var, &nir->uniforms) {
if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue;
for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) {
int id = ctx->uniform_count++;
_mesa_hash_table_u64_insert(ctx->uniform_nir_to_bi, var->data.driver_location + col + 1, (void *) ((uintptr_t) (id + 1)));
}
}
if (ctx->stage == MESA_SHADER_VERTEX) {
ctx->varying_nir_to_bi = _mesa_hash_table_u64_create(NULL);
nir_foreach_variable(var, &nir->outputs) {
if (var->data.location < VARYING_SLOT_VAR0) {
if (var->data.location == VARYING_SLOT_POS)
ctx->varying_count++;
_mesa_hash_table_u64_insert(ctx->varying_nir_to_bi, var->data.driver_location + 1, (void *) ((uintptr_t) (1)));
continue;
}
for (int col = 0; col < glsl_get_matrix_columns(var->type); ++col) {
for (int comp = 0; comp < 4; ++comp) {
int id = comp + ctx->varying_count++;
_mesa_hash_table_u64_insert(ctx->varying_nir_to_bi, var->data.driver_location + col + comp + 1, (void *) ((uintptr_t) (id + 1)));
}
}
}
} else if (ctx->stage == MESA_SHADER_FRAGMENT) {
ctx->outputs_nir_to_bi = _mesa_hash_table_u64_create(NULL);
nir_foreach_variable(var, &nir->outputs) {
if (var->data.location >= FRAG_RESULT_DATA0 && var->data.location <= FRAG_RESULT_DATA7) {
int id = ctx->outputs_count++;
printf("Driver location: %d with id %d\n", var->data.location + 1, id);
_mesa_hash_table_u64_insert(ctx->outputs_nir_to_bi, var->data.location + 1, (void *) ((uintptr_t) (id + 1)));
}
}
}
/* Optimisation passes */
optimize_nir(nir);
#ifdef BI_DEBUG
nir_print_shader(nir, stdout);
#endif
/* Generate machine IR for shader */
nir_foreach_function(func, nir) {
nir_builder _b;
ctx->b = &_b;
nir_builder_init(ctx->b, func->impl);
list_inithead(&ctx->blocks);
ctx->block_count = 0;
ctx->func = func;
emit_cf_list(ctx, &func->impl->body);
break; // XXX: Once we support multi function shaders then implement
}
util_dynarray_init(&program->compiled, NULL);
// MIR pre-RA optimizations
bool progress = false;
do {
progress = false;
mir_foreach_block(ctx, block) {
// XXX: Not yet working
// progress |= bifrost_opt_branch_fusion(ctx, block);
}
} while (progress);
schedule_program(ctx);
#ifdef BI_DEBUG
nir_print_shader(nir, stdout);
disassemble_bifrost(program->compiled.data, program->compiled.size, false);
#endif
return 0;
}