blob: 1de4fa093f785462894609d75da08c1f20142cf9 [file] [log] [blame]
/*
* Copyright 2024 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "nir.h"
#include "nir_builder.h"
/*
* Lower calls to functions prefixed "nir_*" to the NIR ALU instruction or
* intrinsic represented. This matches functions of the form:
*
* nir_[op name](__optional mangling suffix)
*
* These functions return a value if the instruction has a destination. They
* take all instruction sources as parameters, followed by parameters for each
* ordered intrinsic index if any.
*
* Mangling allows for multiple definitions of the same instruction with
* different vector lengths and bit sizes. This could be combined with
* __attribute_((overloadable)) for seamless overloads.
*
* In effect, this pass re-implements nir_builder dynamically. This exposes
* low-level hardware intrinsics to internal driver programs. It is intended for
* use with internal OpenCL but should theoretically work for GLSL too.
*/
static void
lower_builtin_alu(nir_builder *b, nir_call_instr *call, nir_op op)
{
const nir_op_info info = nir_op_infos[op];
nir_def *srcs[NIR_ALU_MAX_INPUTS];
for (unsigned s = 0; s < info.num_inputs; ++s) {
srcs[s] = call->params[1 + s].ssa;
}
nir_def *res = nir_build_alu_src_arr(b, op, srcs);
nir_store_deref(b, nir_src_as_deref(call->params[0]), res,
nir_component_mask(res->num_components));
}
static void
lower_builtin_intr(nir_builder *b, nir_call_instr *call, nir_intrinsic_op op)
{
nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, op);
const nir_intrinsic_info info = nir_intrinsic_infos[op];
/* If there is a destination, the first parameter is the return deref */
unsigned src = info.has_dest ? 1 : 0;
assert(call->num_params == (src + info.num_srcs + info.num_indices));
/* The next parameters are the intrinsic sources */
for (unsigned s = 0; s < info.num_srcs; ++s) {
intr->src[s] = nir_src_for_ssa(call->params[src++].ssa);
}
/* The remaining parameters are the intrinsic indices */
for (unsigned s = 0; s < info.num_indices; ++s) {
uint64_t val = nir_src_as_uint(call->params[src++]);
intr->const_index[info.index_map[info.indices[s]] - 1] = val;
}
/* Some intrinsics must infer num_components from a particular source. */
for (unsigned s = 0; s < info.num_srcs; ++s) {
if (info.src_components[s] == 0) {
intr->num_components = intr->src[s].ssa->num_components;
break;
}
}
/* Insert the instruction before any store_deref */
nir_builder_instr_insert(b, &intr->instr);
/* If there is a destination, plumb it through the return deref */
if (info.has_dest) {
nir_deref_instr *deref = nir_src_as_deref(call->params[0]);
unsigned bit_size = glsl_get_bit_size(deref->type);
unsigned num_components = MAX2(glsl_get_length(deref->type), 1);
nir_def_init(&intr->instr, &intr->def, num_components, bit_size);
nir_store_deref(b, deref, &intr->def, nir_component_mask(num_components));
if (info.dest_components == 0 && intr->num_components == 0) {
intr->num_components = num_components;
}
}
}
static bool
lower(nir_builder *b, nir_instr *instr, void *data)
{
/* All builtins are exposed as function calls */
if (instr->type != nir_instr_type_call)
return false;
nir_call_instr *call = nir_instr_as_call(instr);
nir_function *func = call->callee;
/* We reserve all functions prefixed nir_* as builtins needing lowering. */
if (strncmp("nir_", func->name, strlen("nir_")) != 0)
return false;
/* Strip the nir_ prefix to give the name of an ALU opcode or intrinsic. Also
* strip the __* suffix if present: we don't need mangling information, we
* can recover vector lengths / bit sizes from the NIR. This implements a
* crude form of function overloading.
*/
const char *intr_name = func->name + strlen("nir_");
const char *suffix = strstr(intr_name, "__");
unsigned len = (suffix != NULL) ? (suffix - intr_name) : strlen(intr_name);
/* From this point on, we must not fail. Remove the call. */
b->cursor = nir_instr_remove(&call->instr);
/* Look for an ALU opcode */
for (unsigned i = 0; i < ARRAY_SIZE(nir_op_infos); ++i) {
if (strncmp(intr_name, nir_op_infos[i].name, len) == 0 &&
strlen(nir_op_infos[i].name) == len) {
lower_builtin_alu(b, call, i);
return true;
}
}
/* Look for an intrinsic */
for (unsigned i = 0; i < ARRAY_SIZE(nir_intrinsic_infos); ++i) {
if (strncmp(intr_name, nir_intrinsic_infos[i].name, len) == 0 &&
strlen(nir_intrinsic_infos[i].name) == len) {
lower_builtin_intr(b, call, i);
return true;
}
}
/* We must have matched something! */
fprintf(stderr, "unknown opcode %s\n", func->name);
unreachable("invalid nir opcode/intrinsic");
}
bool
nir_lower_calls_to_builtins(nir_shader *s)
{
return nir_shader_instructions_pass(s, lower, nir_metadata_none, NULL);
}