blob: 98c99d0020eb7b38a9f5cd4fde4d066956dcb766 [file] [log] [blame]
/*
* Copyright © 2016 Bas Nieuwenhuizen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "ac_nir_to_llvm.h"
#include "ac_gpu_info.h"
#include "ac_binary.h"
#include "ac_llvm_build.h"
#include "ac_llvm_util.h"
#include "ac_shader_abi.h"
#include "ac_shader_util.h"
#include "nir/nir.h"
#include "nir/nir_deref.h"
#include "sid.h"
#include "util/bitscan.h"
#include "util/u_math.h"
#include <llvm/Config/llvm-config.h>
struct ac_nir_context {
struct ac_llvm_context ac;
struct ac_shader_abi *abi;
const struct ac_shader_args *args;
gl_shader_stage stage;
shader_info *info;
LLVMValueRef *ssa_defs;
struct ac_llvm_pointer scratch;
struct ac_llvm_pointer constant_data;
struct hash_table *defs;
struct hash_table *phis;
struct hash_table *verified_interp;
LLVMValueRef main_function;
LLVMBasicBlockRef continue_block;
LLVMBasicBlockRef break_block;
};
static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
const nir_instr *instr, bool image);
static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
enum ac_descriptor_type desc_type, const nir_instr *instr,
LLVMValueRef index, bool image, bool write);
static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_ssa_def *def)
{
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
if (def->num_components > 1) {
type = LLVMVectorType(type, def->num_components);
}
return type;
}
static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
{
assert(src.is_ssa);
return nir->ssa_defs[src.ssa->index];
}
static LLVMValueRef get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned c_off)
{
LLVMValueRef ptr = get_src(ctx, src);
ptr = LLVMBuildAdd(ctx->ac.builder, ptr, LLVMConstInt(ctx->ac.i32, c_off, 0), "");
/* LDS is used here as a i8 pointer. */
return LLVMBuildGEP2(ctx->ac.builder, ctx->ac.i8, ctx->ac.lds.value, &ptr, 1, "");
}
static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, const struct nir_block *b)
{
struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
return (LLVMBasicBlockRef)entry->data;
}
static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, nir_alu_src src,
unsigned num_components)
{
LLVMValueRef value = get_src(ctx, src.src);
bool need_swizzle = false;
assert(value);
unsigned src_components = ac_get_llvm_num_components(value);
for (unsigned i = 0; i < num_components; ++i) {
assert(src.swizzle[i] < src_components);
if (src.swizzle[i] != i)
need_swizzle = true;
}
if (need_swizzle || num_components != src_components) {
LLVMValueRef masks[] = {LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
if (src_components > 1 && num_components == 1) {
value = LLVMBuildExtractElement(ctx->ac.builder, value, masks[0], "");
} else if (src_components == 1 && num_components > 1) {
LLVMValueRef values[] = {value, value, value, value};
value = ac_build_gather_values(&ctx->ac, values, num_components);
} else {
LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, swizzle, "");
}
}
assert(!src.negate);
assert(!src.abs);
return value;
}
static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, LLVMIntPredicate pred,
LLVMValueRef src0, LLVMValueRef src1)
{
src0 = ac_to_integer(ctx, src0);
src1 = ac_to_integer(ctx, src1);
return LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
}
static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, LLVMRealPredicate pred,
LLVMValueRef src0, LLVMValueRef src1)
{
src0 = ac_to_float(ctx, src0);
src1 = ac_to_float(ctx, src1);
return LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
}
static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, const char *intrin,
LLVMTypeRef result_type, LLVMValueRef src0)
{
char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
};
ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
}
static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx, const char *intrin,
LLVMTypeRef result_type, LLVMValueRef src0)
{
if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
return emit_intrin_1f_param(ctx, intrin, result_type, src0);
LLVMTypeRef elem_type = LLVMGetElementType(result_type);
LLVMValueRef ret = LLVMGetUndef(result_type);
/* Scalarize the intrinsic, because vectors are not supported. */
for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)),
};
ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
ret = LLVMBuildInsertElement(
ctx->builder, ret,
ac_build_intrinsic(ctx, name, elem_type, params, 1, AC_FUNC_ATTR_READNONE),
LLVMConstInt(ctx->i32, i, 0), "");
}
return ret;
}
static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, const char *intrin,
LLVMTypeRef result_type, LLVMValueRef src0,
LLVMValueRef src1)
{
char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
ac_to_float(ctx, src1),
};
ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
}
static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, const char *intrin,
LLVMTypeRef result_type, LLVMValueRef src0,
LLVMValueRef src1, LLVMValueRef src2)
{
char name[64], type[64];
LLVMValueRef params[] = {
ac_to_float(ctx, src0),
ac_to_float(ctx, src1),
ac_to_float(ctx, src2),
};
ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
assert(length < sizeof(name));
return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
}
static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1,
LLVMValueRef src2)
{
LLVMTypeRef src1_type = LLVMTypeOf(src1);
LLVMTypeRef src2_type = LLVMTypeOf(src2);
if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
} else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
}
return LLVMBuildSelect(ctx->builder, src0, ac_to_integer_or_pointer(ctx, src1),
ac_to_integer_or_pointer(ctx, src2), "");
}
static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
}
static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, const char *intrin,
LLVMValueRef src0, LLVMValueRef src1)
{
LLVMTypeRef ret_type;
LLVMTypeRef types[] = {ctx->i32, ctx->i1};
LLVMValueRef res;
LLVMValueRef params[] = {src0, src1};
ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true);
res = ac_build_intrinsic(ctx, intrin, ret_type, params, 2, AC_FUNC_ATTR_READNONE);
res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
return res;
}
static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
{
assert(ac_get_elem_bits(ctx, LLVMTypeOf(src0)) == 1);
switch (bitsize) {
case 16:
if (LLVMGetTypeKind(LLVMTypeOf(src0)) == LLVMVectorTypeKind) {
assert(LLVMGetVectorSize(LLVMTypeOf(src0)) == 2);
LLVMValueRef f[] = {
LLVMBuildSelect(ctx->builder, ac_llvm_extract_elem(ctx, src0, 0),
ctx->f16_1, ctx->f16_0, ""),
LLVMBuildSelect(ctx->builder, ac_llvm_extract_elem(ctx, src0, 1),
ctx->f16_1, ctx->f16_0, ""),
};
return ac_build_gather_values(ctx, f, 2);
}
return LLVMBuildSelect(ctx->builder, src0, ctx->f16_1, ctx->f16_0, "");
case 32:
return LLVMBuildSelect(ctx->builder, src0, ctx->f32_1, ctx->f32_0, "");
case 64:
return LLVMBuildSelect(ctx->builder, src0, ctx->f64_1, ctx->f64_0, "");
default:
unreachable("Unsupported bit size.");
}
}
static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
src0 = ac_to_float(ctx, src0);
LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
return LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, "");
}
static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
{
switch (bitsize) {
case 8:
return LLVMBuildSelect(ctx->builder, src0, ctx->i8_1, ctx->i8_0, "");
case 16:
return LLVMBuildSelect(ctx->builder, src0, ctx->i16_1, ctx->i16_0, "");
case 32:
return LLVMBuildSelect(ctx->builder, src0, ctx->i32_1, ctx->i32_0, "");
case 64:
return LLVMBuildSelect(ctx->builder, src0, ctx->i64_1, ctx->i64_0, "");
default:
unreachable("Unsupported bit size.");
}
}
static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
return LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, "");
}
static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
LLVMValueRef result;
LLVMValueRef cond = NULL;
src0 = ac_to_float(ctx, src0);
result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
if (ctx->gfx_level >= GFX8) {
LLVMValueRef args[2];
/* Check if the result is a denormal - and flush to 0 if so. */
args[0] = result;
args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
cond =
ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
}
/* need to convert back up to f32 */
result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
if (ctx->gfx_level >= GFX8)
result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
else {
/* for GFX6-GFX7 */
/* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
* so compare the result and flush to 0 if it's smaller.
*/
LLVMValueRef temp, cond2;
temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
cond = LLVMBuildFCmp(
ctx->builder, LLVMRealOGT,
LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
temp, "");
cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE, temp, ctx->f32_0, "");
cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
}
return result;
}
static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,
LLVMValueRef src1)
{
LLVMValueRef dst64, result;
src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
return result;
}
static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,
LLVMValueRef src1)
{
LLVMValueRef dst64, result;
src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
return result;
}
static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset)
{
/* mask = ((1 << bits) - 1) << offset */
return LLVMBuildShl(
ctx->builder,
LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""),
offset, "");
}
static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx, LLVMValueRef mask,
LLVMValueRef insert, LLVMValueRef base)
{
/* Calculate:
* (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
* Use the right-hand side, which the LLVM backend can convert to V_BFI.
*/
return LLVMBuildXor(
ctx->builder, base,
LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
}
static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0,
LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
LLVMValueRef args[2]))
{
LLVMValueRef comp[2];
src0 = ac_to_float(ctx, src0);
comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
}
static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0)
{
LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
LLVMValueRef temps[2], val;
int i;
for (i = 0; i < 2; i++) {
val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
}
return ac_build_gather_values(ctx, temps, 2);
}
static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, nir_op op, LLVMValueRef src0)
{
unsigned mask;
int idx;
LLVMValueRef result;
if (op == nir_op_fddx_fine)
mask = AC_TID_MASK_LEFT;
else if (op == nir_op_fddy_fine)
mask = AC_TID_MASK_TOP;
else
mask = AC_TID_MASK_TOP_LEFT;
/* for DDX we want to next X pixel, DDY next Y pixel. */
if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx)
idx = 1;
else
idx = 2;
result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
return result;
}
struct waterfall_context {
LLVMBasicBlockRef phi_bb[2];
bool use_waterfall;
};
/* To deal with divergent descriptors we can create a loop that handles all
* lanes with the same descriptor on a given iteration (henceforth a
* waterfall loop).
*
* These helper create the begin and end of the loop leaving the caller
* to implement the body.
*
* params:
* - ctx is the usal nir context
* - wctx is a temporary struct containing some loop info. Can be left uninitialized.
* - value is the possibly divergent value for which we built the loop
* - divergent is whether value is actually divergent. If false we just pass
* things through.
*/
static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,
LLVMValueRef value, bool divergent)
{
/* If the app claims the value is divergent but it is constant we can
* end up with a dynamic index of NULL. */
if (!value)
divergent = false;
wctx->use_waterfall = divergent;
if (!divergent)
return value;
ac_build_bgnloop(&ctx->ac, 6000);
LLVMValueRef active = LLVMConstInt(ctx->ac.i1, 1, false);
LLVMValueRef scalar_value[NIR_MAX_VEC_COMPONENTS];
for (unsigned i = 0; i < ac_get_llvm_num_components(value); i++) {
LLVMValueRef comp = ac_llvm_extract_elem(&ctx->ac, value, i);
scalar_value[i] = ac_build_readlane(&ctx->ac, comp, NULL);
active = LLVMBuildAnd(ctx->ac.builder, active,
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, comp, scalar_value[i], ""), "");
}
wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
ac_build_ifcc(&ctx->ac, active, 6001);
return ac_build_gather_values(&ctx->ac, scalar_value, ac_get_llvm_num_components(value));
}
static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,
LLVMValueRef value)
{
LLVMValueRef ret = NULL;
LLVMValueRef phi_src[2];
LLVMValueRef cc_phi_src[2] = {
LLVMConstInt(ctx->ac.i32, 0, false),
LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
};
if (!wctx->use_waterfall)
return value;
wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);
ac_build_endif(&ctx->ac, 6001);
if (value) {
phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
phi_src[1] = value;
ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
}
/*
* By using the optimization barrier on the exit decision, we decouple
* the operations from the break, and hence avoid LLVM hoisting the
* opteration into the break block.
*/
LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
ac_build_optimization_barrier(&ctx->ac, &cc, false);
LLVMValueRef active =
LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
ac_build_ifcc(&ctx->ac, active, 6002);
ac_build_break(&ctx->ac);
ac_build_endif(&ctx->ac, 6002);
ac_build_endloop(&ctx->ac, 6000);
return ret;
}
static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
{
LLVMValueRef src[16], result = NULL;
unsigned num_components = instr->dest.dest.ssa.num_components;
unsigned src_components;
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
switch (instr->op) {
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4:
case nir_op_vec5:
case nir_op_vec8:
case nir_op_vec16:
case nir_op_unpack_32_2x16:
case nir_op_unpack_64_2x32:
case nir_op_unpack_64_4x16:
src_components = 1;
break;
case nir_op_pack_half_2x16:
case nir_op_pack_snorm_2x16:
case nir_op_pack_unorm_2x16:
case nir_op_pack_uint_2x16:
case nir_op_pack_sint_2x16:
case nir_op_pack_32_2x16:
case nir_op_pack_64_2x32:
src_components = 2;
break;
case nir_op_unpack_half_2x16:
src_components = 1;
break;
case nir_op_cube_face_coord_amd:
case nir_op_cube_face_index_amd:
src_components = 3;
break;
case nir_op_pack_32_4x8:
case nir_op_pack_64_4x16:
src_components = 4;
break;
default:
src_components = num_components;
break;
}
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
src[i] = get_alu_src(ctx, instr->src[i], src_components);
switch (instr->op) {
case nir_op_mov:
result = src[0];
break;
case nir_op_fneg:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
/* fneg will be optimized by backend compiler with sign
* bit removed via XOR. This is probably a LLVM bug.
*/
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
}
break;
case nir_op_ineg:
if (instr->no_unsigned_wrap)
result = LLVMBuildNUWNeg(ctx->ac.builder, src[0], "");
else if (instr->no_signed_wrap)
result = LLVMBuildNSWNeg(ctx->ac.builder, src[0], "");
else
result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
break;
case nir_op_inot:
result = LLVMBuildNot(ctx->ac.builder, src[0], "");
break;
case nir_op_iadd:
if (instr->no_unsigned_wrap)
result = LLVMBuildNUWAdd(ctx->ac.builder, src[0], src[1], "");
else if (instr->no_signed_wrap)
result = LLVMBuildNSWAdd(ctx->ac.builder, src[0], src[1], "");
else
result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_uadd_sat:
case nir_op_iadd_sat: {
char name[64], type[64];
ac_build_type_name_for_intr(def_type, type, sizeof(type));
snprintf(name, sizeof(name), "llvm.%cadd.sat.%s",
instr->op == nir_op_uadd_sat ? 'u' : 's', type);
result = ac_build_intrinsic(&ctx->ac, name, def_type, src, 2, AC_FUNC_ATTR_READNONE);
break;
}
case nir_op_usub_sat:
case nir_op_isub_sat: {
char name[64], type[64];
ac_build_type_name_for_intr(def_type, type, sizeof(type));
snprintf(name, sizeof(name), "llvm.%csub.sat.%s",
instr->op == nir_op_usub_sat ? 'u' : 's', type);
result = ac_build_intrinsic(&ctx->ac, name, def_type, src, 2, AC_FUNC_ATTR_READNONE);
break;
}
case nir_op_fadd:
src[0] = ac_to_float(&ctx->ac, src[0]);
src[1] = ac_to_float(&ctx->ac, src[1]);
result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_fsub:
src[0] = ac_to_float(&ctx->ac, src[0]);
src[1] = ac_to_float(&ctx->ac, src[1]);
result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_isub:
if (instr->no_unsigned_wrap)
result = LLVMBuildNUWSub(ctx->ac.builder, src[0], src[1], "");
else if (instr->no_signed_wrap)
result = LLVMBuildNSWSub(ctx->ac.builder, src[0], src[1], "");
else
result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_imul:
if (instr->no_unsigned_wrap)
result = LLVMBuildNUWMul(ctx->ac.builder, src[0], src[1], "");
else if (instr->no_signed_wrap)
result = LLVMBuildNSWMul(ctx->ac.builder, src[0], src[1], "");
else
result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_imod:
result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_umod:
result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_irem:
result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_idiv:
result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_udiv:
result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_fmul:
src[0] = ac_to_float(&ctx->ac, src[0]);
src[1] = ac_to_float(&ctx->ac, src[1]);
result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_fmulz:
assert(LLVM_VERSION_MAJOR >= 12);
src[0] = ac_to_float(&ctx->ac, src[0]);
src[1] = ac_to_float(&ctx->ac, src[1]);
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
src, 2, AC_FUNC_ATTR_READNONE);
break;
case nir_op_frcp:
/* For doubles, we need precise division to pass GLCTS. */
if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && ac_get_type_size(def_type) == 8) {
result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1, ac_to_float(&ctx->ac, src[0]), "");
} else {
result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp",
ac_to_float_type(&ctx->ac, def_type), src[0]);
}
if (ctx->abi->clamp_div_by_zero)
result = ac_build_fmin(&ctx->ac, result,
LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
break;
case nir_op_iand:
result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_ior:
result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_ixor:
result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_ishl: {
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
LLVMTypeRef type = LLVMTypeOf(src[0]);
src[1] = LLVMBuildAnd(ctx->ac.builder, src[1],
LLVMConstInt(type, LLVMGetIntTypeWidth(type) - 1, false), "");
result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
break;
}
case nir_op_ishr: {
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
LLVMTypeRef type = LLVMTypeOf(src[0]);
src[1] = LLVMBuildAnd(ctx->ac.builder, src[1],
LLVMConstInt(type, LLVMGetIntTypeWidth(type) - 1, false), "");
result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
break;
}
case nir_op_ushr: {
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
LLVMTypeRef type = LLVMTypeOf(src[0]);
src[1] = LLVMBuildAnd(ctx->ac.builder, src[1],
LLVMConstInt(type, LLVMGetIntTypeWidth(type) - 1, false), "");
result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
break;
}
case nir_op_ilt:
result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
break;
case nir_op_ine:
result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
break;
case nir_op_ieq:
result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
break;
case nir_op_ige:
result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
break;
case nir_op_ult:
result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
break;
case nir_op_uge:
result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
break;
case nir_op_feq:
result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
break;
case nir_op_fneu:
result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
break;
case nir_op_flt:
result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
break;
case nir_op_fge:
result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
break;
case nir_op_fabs:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.fabs", ac_to_float_type(&ctx->ac, def_type), src[0]);
if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
/* fabs will be optimized by backend compiler with sign
* bit removed via AND.
*/
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
}
break;
case nir_op_fsat:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = ac_build_fsat(&ctx->ac, src[0],
ac_to_float_type(&ctx->ac, def_type));
break;
case nir_op_iabs:
result = emit_iabs(&ctx->ac, src[0]);
break;
case nir_op_imax:
result = ac_build_imax(&ctx->ac, src[0], src[1]);
break;
case nir_op_imin:
result = ac_build_imin(&ctx->ac, src[0], src[1]);
break;
case nir_op_umax:
result = ac_build_umax(&ctx->ac, src[0], src[1]);
break;
case nir_op_umin:
result = ac_build_umin(&ctx->ac, src[0], src[1]);
break;
case nir_op_isign:
result = ac_build_isign(&ctx->ac, src[0]);
break;
case nir_op_fsign:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = ac_build_fsign(&ctx->ac, src[0]);
break;
case nir_op_ffloor:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.floor", ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_ftrunc:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.trunc", ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fceil:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.ceil", ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fround_even:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.rint", ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_ffract:
result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fsin:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.sin", ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fcos:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.cos", ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fsin_amd:
case nir_op_fcos_amd:
/* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */
if (ctx->ac.gfx_level < GFX9)
src[0] = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
ac_to_float_type(&ctx->ac, def_type), src[0]);
result =
emit_intrin_1f_param(&ctx->ac, instr->op == nir_op_fsin_amd ? "llvm.amdgcn.sin" : "llvm.amdgcn.cos",
ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fsqrt:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fexp2:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.exp2", ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_flog2:
result =
emit_intrin_1f_param(&ctx->ac, "llvm.log2", ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_frsq:
result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rsq",
ac_to_float_type(&ctx->ac, def_type), src[0]);
if (ctx->abi->clamp_div_by_zero)
result = ac_build_fmin(&ctx->ac, result,
LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
break;
case nir_op_frexp_exp:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = ac_build_frexp_exp(&ctx->ac, src[0], ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
result = LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");
break;
case nir_op_frexp_sig:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
break;
case nir_op_fpow:
if (instr->dest.dest.ssa.bit_size != 32) {
/* 16 and 64 bits */
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
ac_to_float_type(&ctx->ac, def_type), src[0]);
result = LLVMBuildFMul(ctx->ac.builder, result, ac_to_float(&ctx->ac, src[1]), "");
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
ac_to_float_type(&ctx->ac, def_type), result);
break;
}
if (LLVM_VERSION_MAJOR >= 12) {
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
ac_to_float_type(&ctx->ac, def_type), src[0]);
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
(LLVMValueRef[]){result, ac_to_float(&ctx->ac, src[1])},
2, AC_FUNC_ATTR_READNONE);
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
ac_to_float_type(&ctx->ac, def_type), result);
break;
}
/* Older LLVM doesn't have fmul.legacy. */
result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),
src[0], src[1]);
break;
case nir_op_fmax:
result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
src[0], src[1]);
if (ctx->ac.gfx_level < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
/* Only pre-GFX9 chips do not flush denorms. */
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
}
break;
case nir_op_fmin:
result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", ac_to_float_type(&ctx->ac, def_type),
src[0], src[1]);
if (ctx->ac.gfx_level < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
/* Only pre-GFX9 chips do not flush denorms. */
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
}
break;
case nir_op_ffma:
/* FMA is slow on gfx6-8, so it shouldn't be used. */
assert(instr->dest.dest.ssa.bit_size != 32 || ctx->ac.gfx_level >= GFX9);
result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", ac_to_float_type(&ctx->ac, def_type),
src[0], src[1], src[2]);
break;
case nir_op_ffmaz:
assert(LLVM_VERSION_MAJOR >= 12 && ctx->ac.gfx_level >= GFX10_3);
src[0] = ac_to_float(&ctx->ac, src[0]);
src[1] = ac_to_float(&ctx->ac, src[1]);
src[2] = ac_to_float(&ctx->ac, src[2]);
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fma.legacy", ctx->ac.f32,
src, 3, AC_FUNC_ATTR_READNONE);
break;
case nir_op_ldexp:
src[0] = ac_to_float(&ctx->ac, src[0]);
if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2,
AC_FUNC_ATTR_READNONE);
else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2,
AC_FUNC_ATTR_READNONE);
else
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2,
AC_FUNC_ATTR_READNONE);
break;
case nir_op_bfm:
result = emit_bfm(&ctx->ac, src[0], src[1]);
break;
case nir_op_bitfield_select:
result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
break;
case nir_op_ubfe:
result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
break;
case nir_op_ibfe:
result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
break;
case nir_op_bitfield_reverse:
result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
break;
case nir_op_bit_count:
result = ac_build_bit_count(&ctx->ac, src[0]);
break;
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4:
case nir_op_vec5:
case nir_op_vec8:
case nir_op_vec16:
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
src[i] = ac_to_integer(&ctx->ac, src[i]);
result = ac_build_gather_values(&ctx->ac, src, num_components);
break;
case nir_op_f2i8:
case nir_op_f2i16:
case nir_op_f2imp:
case nir_op_f2i32:
case nir_op_f2i64:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
break;
case nir_op_f2u8:
case nir_op_f2u16:
case nir_op_f2ump:
case nir_op_f2u32:
case nir_op_f2u64:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
break;
case nir_op_i2f16:
case nir_op_i2fmp:
case nir_op_i2f32:
case nir_op_i2f64:
result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_u2f16:
case nir_op_u2fmp:
case nir_op_u2f32:
case nir_op_u2f64:
result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_f2f16_rtz:
case nir_op_f2f16:
case nir_op_f2fmp:
src[0] = ac_to_float(&ctx->ac, src[0]);
/* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
* all f32->f16 conversions have to round towards zero, because both scalar
* and vec2 down-conversions have to round equally.
*/
if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) {
src[0] = ac_to_float(&ctx->ac, src[0]);
if (LLVMTypeOf(src[0]) == ctx->ac.f64)
src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
/* Fast path conversion. This only works if NIR is vectorized
* to vec2 16.
*/
if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
LLVMValueRef args[] = {
ac_llvm_extract_elem(&ctx->ac, src[0], 0),
ac_llvm_extract_elem(&ctx->ac, src[0], 1),
};
result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
break;
}
assert(ac_get_llvm_num_components(src[0]) == 1);
LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->ac.f32)};
result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
} else {
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
result =
LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
else
result =
LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
}
break;
case nir_op_f2f16_rtne:
case nir_op_f2f32:
case nir_op_f2f64:
src[0] = ac_to_float(&ctx->ac, src[0]);
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
else
result =
LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_u2u8:
case nir_op_u2u16:
case nir_op_u2u32:
case nir_op_u2u64:
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
else
result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
break;
case nir_op_i2i8:
case nir_op_i2i16:
case nir_op_i2imp:
case nir_op_i2i32:
case nir_op_i2i64:
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
else
result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
break;
case nir_op_bcsel:
result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
break;
case nir_op_find_lsb:
result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
break;
case nir_op_ufind_msb:
result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32, false);
break;
case nir_op_ifind_msb:
result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
break;
case nir_op_ufind_msb_rev:
result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32, true);
break;
case nir_op_ifind_msb_rev:
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.sffbh.i32", ctx->ac.i32, &src[0], 1,
AC_FUNC_ATTR_READNONE);
break;
case nir_op_uclz: {
LLVMValueRef params[2] = {
src[0],
ctx->ac.i1false,
};
result = ac_build_intrinsic(&ctx->ac, "llvm.ctlz.i32", ctx->ac.i32, params, 2, AC_FUNC_ATTR_READNONE);
break;
}
case nir_op_uadd_carry:
result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
break;
case nir_op_usub_borrow:
result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
break;
case nir_op_b2f16:
case nir_op_b2f32:
case nir_op_b2f64:
result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
break;
case nir_op_f2b1:
result = emit_f2b(&ctx->ac, src[0]);
break;
case nir_op_b2i8:
case nir_op_b2i16:
case nir_op_b2i32:
case nir_op_b2i64:
result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
break;
case nir_op_i2b1:
case nir_op_b2b1: /* after loads */
result = emit_i2b(&ctx->ac, src[0]);
break;
case nir_op_b2b16: /* before stores */
result = LLVMBuildZExt(ctx->ac.builder, src[0], ctx->ac.i16, "");
break;
case nir_op_b2b32: /* before stores */
result = LLVMBuildZExt(ctx->ac.builder, src[0], ctx->ac.i32, "");
break;
case nir_op_fquantize2f16:
result = emit_f2f16(&ctx->ac, src[0]);
break;
case nir_op_umul_high:
result = emit_umul_high(&ctx->ac, src[0], src[1]);
break;
case nir_op_imul_high:
result = emit_imul_high(&ctx->ac, src[0], src[1]);
break;
case nir_op_pack_half_2x16:
result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
break;
case nir_op_pack_half_2x16_split:
src[0] = ac_to_float(&ctx->ac, src[0]);
src[1] = ac_to_float(&ctx->ac, src[1]);
result = LLVMBuildBitCast(ctx->ac.builder,
ac_build_cvt_pkrtz_f16(&ctx->ac, src),
ctx->ac.i32, "");
break;
case nir_op_pack_snorm_2x16:
result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
break;
case nir_op_pack_unorm_2x16:
result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
break;
case nir_op_pack_uint_2x16: {
LLVMValueRef comp[2];
comp[0] = LLVMBuildExtractElement(ctx->ac.builder, src[0], ctx->ac.i32_0, "");
comp[1] = LLVMBuildExtractElement(ctx->ac.builder, src[0], ctx->ac.i32_1, "");
result = ac_build_cvt_pk_u16(&ctx->ac, comp, 16, false);
break;
}
case nir_op_pack_sint_2x16: {
LLVMValueRef comp[2];
comp[0] = LLVMBuildExtractElement(ctx->ac.builder, src[0], ctx->ac.i32_0, "");
comp[1] = LLVMBuildExtractElement(ctx->ac.builder, src[0], ctx->ac.i32_1, "");
result = ac_build_cvt_pk_i16(&ctx->ac, comp, 16, false);
break;
}
case nir_op_unpack_half_2x16:
result = emit_unpack_half_2x16(&ctx->ac, src[0]);
break;
case nir_op_unpack_half_2x16_split_x: {
assert(ac_get_llvm_num_components(src[0]) == 1);
LLVMValueRef tmp = emit_unpack_half_2x16(&ctx->ac, src[0]);
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
break;
}
case nir_op_unpack_half_2x16_split_y: {
assert(ac_get_llvm_num_components(src[0]) == 1);
LLVMValueRef tmp = emit_unpack_half_2x16(&ctx->ac, src[0]);
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
break;
}
case nir_op_fddx:
case nir_op_fddy:
case nir_op_fddx_fine:
case nir_op_fddy_fine:
case nir_op_fddx_coarse:
case nir_op_fddy_coarse:
result = emit_ddxy(ctx, instr->op, src[0]);
break;
case nir_op_unpack_64_4x16: {
result = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v4i16, "");
break;
}
case nir_op_pack_64_4x16: {
result = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.i64, "");
break;
}
case nir_op_unpack_64_2x32: {
result = LLVMBuildBitCast(ctx->ac.builder, src[0],
ctx->ac.v2i32, "");
break;
}
case nir_op_unpack_64_2x32_split_x: {
assert(ac_get_llvm_num_components(src[0]) == 1);
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
break;
}
case nir_op_unpack_64_2x32_split_y: {
assert(ac_get_llvm_num_components(src[0]) == 1);
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
break;
}
case nir_op_pack_64_2x32: {
result = LLVMBuildBitCast(ctx->ac.builder, src[0],
ctx->ac.i64, "");
break;
}
case nir_op_pack_64_2x32_split: {
LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
break;
}
case nir_op_pack_32_4x8:
case nir_op_pack_32_2x16: {
result = LLVMBuildBitCast(ctx->ac.builder, src[0],
ctx->ac.i32, "");
break;
}
case nir_op_pack_32_2x16_split: {
LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
break;
}
case nir_op_unpack_32_2x16: {
result = LLVMBuildBitCast(ctx->ac.builder, src[0],
ctx->ac.v2i16, "");
break;
}
case nir_op_unpack_32_2x16_split_x: {
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
break;
}
case nir_op_unpack_32_2x16_split_y: {
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
break;
}
case nir_op_cube_face_coord_amd: {
src[0] = ac_to_float(&ctx->ac, src[0]);
LLVMValueRef results[2];
LLVMValueRef in[3];
for (unsigned chan = 0; chan < 3; chan++)
in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3,
AC_FUNC_ATTR_READNONE);
results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3,
AC_FUNC_ATTR_READNONE);
LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3,
AC_FUNC_ATTR_READNONE);
results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
result = ac_build_gather_values(&ctx->ac, results, 2);
break;
}
case nir_op_cube_face_index_amd: {
src[0] = ac_to_float(&ctx->ac, src[0]);
LLVMValueRef in[3];
for (unsigned chan = 0; chan < 3; chan++)
in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", ctx->ac.f32, in, 3,
AC_FUNC_ATTR_READNONE);
break;
}
case nir_op_extract_u8:
case nir_op_extract_i8:
case nir_op_extract_u16:
case nir_op_extract_i16: {
bool is_signed = instr->op == nir_op_extract_i16 || instr->op == nir_op_extract_i8;
unsigned size = instr->op == nir_op_extract_u8 || instr->op == nir_op_extract_i8 ? 8 : 16;
LLVMValueRef offset = LLVMConstInt(LLVMTypeOf(src[0]), nir_src_as_uint(instr->src[1].src) * size, false);
result = LLVMBuildLShr(ctx->ac.builder, src[0], offset, "");
result = LLVMBuildTrunc(ctx->ac.builder, result, LLVMIntTypeInContext(ctx->ac.context, size), "");
if (is_signed)
result = LLVMBuildSExt(ctx->ac.builder, result, LLVMTypeOf(src[0]), "");
else
result = LLVMBuildZExt(ctx->ac.builder, result, LLVMTypeOf(src[0]), "");
break;
}
case nir_op_insert_u8:
case nir_op_insert_u16: {
unsigned size = instr->op == nir_op_insert_u8 ? 8 : 16;
LLVMValueRef offset = LLVMConstInt(LLVMTypeOf(src[0]), nir_src_as_uint(instr->src[1].src) * size, false);
LLVMValueRef mask = LLVMConstInt(LLVMTypeOf(src[0]), u_bit_consecutive(0, size), false);
result = LLVMBuildShl(ctx->ac.builder, LLVMBuildAnd(ctx->ac.builder, src[0], mask, ""), offset, "");
break;
}
case nir_op_sdot_4x8_iadd:
case nir_op_sdot_4x8_iadd_sat: {
if (ctx->ac.gfx_level >= GFX11) {
result = ac_build_sudot_4x8(&ctx->ac, src[0], src[1], src[2],
instr->op == nir_op_sdot_4x8_iadd_sat, 0x3);
} else {
const char *name = "llvm.amdgcn.sdot4";
src[3] = LLVMConstInt(ctx->ac.i1, instr->op == nir_op_sdot_4x8_iadd_sat, false);
result = ac_build_intrinsic(&ctx->ac, name, def_type, src, 4, AC_FUNC_ATTR_READNONE);
}
break;
}
case nir_op_sudot_4x8_iadd:
case nir_op_sudot_4x8_iadd_sat: {
result = ac_build_sudot_4x8(&ctx->ac, src[0], src[1], src[2],
instr->op == nir_op_sudot_4x8_iadd_sat, 0x1);
break;
}
case nir_op_udot_4x8_uadd:
case nir_op_udot_4x8_uadd_sat: {
const char *name = "llvm.amdgcn.udot4";
src[3] = LLVMConstInt(ctx->ac.i1, instr->op == nir_op_udot_4x8_uadd_sat, false);
result = ac_build_intrinsic(&ctx->ac, name, def_type, src, 4, AC_FUNC_ATTR_READNONE);
break;
}
case nir_op_sdot_2x16_iadd:
case nir_op_udot_2x16_uadd:
case nir_op_sdot_2x16_iadd_sat:
case nir_op_udot_2x16_uadd_sat: {
const char *name = instr->op == nir_op_sdot_2x16_iadd ||
instr->op == nir_op_sdot_2x16_iadd_sat
? "llvm.amdgcn.sdot2" : "llvm.amdgcn.udot2";
src[0] = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
src[1] = LLVMBuildBitCast(ctx->ac.builder, src[1], ctx->ac.v2i16, "");
src[3] = LLVMConstInt(ctx->ac.i1, instr->op == nir_op_sdot_2x16_iadd_sat ||
instr->op == nir_op_udot_2x16_uadd_sat, false);
result = ac_build_intrinsic(&ctx->ac, name, def_type, src, 4, AC_FUNC_ATTR_READNONE);
break;
}
case nir_op_sad_u8x4:
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.sad.u8", ctx->ac.i32,
(LLVMValueRef[]){src[0], src[1], src[2]}, 3,
AC_FUNC_ATTR_READNONE);
break;
default:
fprintf(stderr, "Unknown NIR alu instr: ");
nir_print_instr(&instr->instr, stderr);
fprintf(stderr, "\n");
return false;
}
if (result) {
assert(instr->dest.dest.is_ssa);
result = ac_to_integer_or_pointer(&ctx->ac, result);
ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
}
return true;
}
static bool visit_load_const(struct ac_nir_context *ctx, const nir_load_const_instr *instr)
{
LLVMValueRef values[16], value = NULL;
LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
for (unsigned i = 0; i < instr->def.num_components; ++i) {
switch (instr->def.bit_size) {
case 1:
values[i] = LLVMConstInt(element_type, instr->value[i].b, false);
break;
case 8:
values[i] = LLVMConstInt(element_type, instr->value[i].u8, false);
break;
case 16:
values[i] = LLVMConstInt(element_type, instr->value[i].u16, false);
break;
case 32:
values[i] = LLVMConstInt(element_type, instr->value[i].u32, false);
break;
case 64:
values[i] = LLVMConstInt(element_type, instr->value[i].u64, false);
break;
default:
fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size);
return false;
}
}
if (instr->def.num_components > 1) {
value = LLVMConstVector(values, instr->def.num_components);
} else
value = values[0];
ctx->ssa_defs[instr->def.index] = value;
return true;
}
/* Gather4 should follow the same rules as bilinear filtering, but the hardware
* incorrectly forces nearest filtering if the texture format is integer.
* The only effect it has on Gather4, which always returns 4 texels for
* bilinear filtering, is that the final coordinates are off by 0.5 of
* the texel size.
*
* The workaround is to subtract 0.5 from the unnormalized coordinates,
* or (0.5 / size) from the normalized coordinates.
*
* However, cube textures with 8_8_8_8 data formats require a different
* workaround of overriding the num format to USCALED/SSCALED. This would lose
* precision in 32-bit data formats, so it needs to be applied dynamically at
* runtime. In this case, return an i1 value that indicates whether the
* descriptor was overridden (and hence a fixup of the sampler result is needed).
*/
static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, struct ac_image_args *args,
const nir_tex_instr *instr)
{
nir_alu_type stype = nir_alu_type_get_base_type(instr->dest_type);
LLVMValueRef wa_8888 = NULL;
LLVMValueRef half_texel[2];
LLVMValueRef result;
assert(stype == nir_type_int || stype == nir_type_uint);
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
LLVMValueRef formats;
LLVMValueRef data_format;
LLVMValueRef wa_formats;
formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
data_format = LLVMBuildLShr(ctx->builder, formats, LLVMConstInt(ctx->i32, 20, false), "");
data_format =
LLVMBuildAnd(ctx->builder, data_format, LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
wa_8888 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, data_format,
LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
uint32_t wa_num_format = stype == nir_type_uint
? S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED)
: S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
wa_formats = LLVMBuildAnd(ctx->builder, formats,
LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), "");
wa_formats =
LLVMBuildOr(ctx->builder, wa_formats, LLVMConstInt(ctx->i32, wa_num_format, false), "");
formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");
args->resource =
LLVMBuildInsertElement(ctx->builder, args->resource, formats, ctx->i32_1, "");
}
if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
assert(!wa_8888);
half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
} else {
struct ac_image_args resinfo = {0};
LLVMBasicBlockRef bbs[2];
LLVMValueRef unnorm = NULL;
LLVMValueRef default_offset = ctx->f32_0;
if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D && !instr->is_array) {
/* In vulkan, whether the sampler uses unnormalized
* coordinates or not is a dynamic property of the
* sampler. Hence, to figure out whether or not we
* need to divide by the texture size, we need to test
* the sampler at runtime. This tests the bit set by
* radv_init_sampler().
*/
LLVMValueRef sampler0 =
LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");
sampler0 = LLVMBuildLShr(ctx->builder, sampler0, LLVMConstInt(ctx->i32, 15, false), "");
sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");
unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");
default_offset = LLVMConstReal(ctx->f32, -0.5);
}
bbs[0] = LLVMGetInsertBlock(ctx->builder);
if (wa_8888 || unnorm) {
assert(!(wa_8888 && unnorm));
LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;
/* Skip the texture size query entirely if we don't need it. */
ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);
bbs[1] = LLVMGetInsertBlock(ctx->builder);
}
/* Query the texture size. */
resinfo.dim = ac_get_sampler_dim(ctx->gfx_level, instr->sampler_dim, instr->is_array);
resinfo.opcode = ac_image_get_resinfo;
resinfo.dmask = 0xf;
resinfo.lod = ctx->i32_0;
resinfo.resource = args->resource;
resinfo.attributes = AC_FUNC_ATTR_READNONE;
LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);
/* Compute -0.5 / size. */
for (unsigned c = 0; c < 2; c++) {
half_texel[c] =
LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, 0), "");
half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
half_texel[c] =
LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), "");
}
if (wa_8888 || unnorm) {
ac_build_endif(ctx, 2000);
for (unsigned c = 0; c < 2; c++) {
LLVMValueRef values[2] = {default_offset, half_texel[c]};
half_texel[c] = ac_build_phi(ctx, ctx->f32, 2, values, bbs);
}
}
}
for (unsigned c = 0; c < 2; c++) {
LLVMValueRef tmp;
tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
}
args->attributes = AC_FUNC_ATTR_READNONE;
result = ac_build_image_opcode(ctx, args);
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
LLVMValueRef tmp, tmp2;
/* if the cube workaround is in place, f2i the result. */
for (unsigned c = 0; c < 4; c++) {
tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
if (stype == nir_type_uint)
tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
else
tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");
tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
result =
LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
}
}
return result;
}
static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr,
struct ac_image_args *args)
{
assert((!args->tfe || !args->d16) && "unsupported");
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
assert(instr->dest.is_ssa);
/* Buffers don't support A16. */
if (args->a16)
args->coords[0] = LLVMBuildZExt(ctx->ac.builder, args->coords[0], ctx->ac.i32, "");
return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0,
util_last_bit(mask), 0, true,
instr->dest.ssa.bit_size == 16,
args->tfe);
}
args->opcode = ac_image_sample;
switch (instr->op) {
case nir_texop_txf:
case nir_texop_txf_ms:
case nir_texop_samples_identical:
args->opcode = args->level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS
? ac_image_load
: ac_image_load_mip;
args->level_zero = false;
break;
case nir_texop_txs:
case nir_texop_query_levels:
case nir_texop_texture_samples:
assert(!"should have been lowered");
break;
case nir_texop_tex:
if (ctx->stage != MESA_SHADER_FRAGMENT &&
(ctx->stage != MESA_SHADER_COMPUTE ||
ctx->info->cs.derivative_group == DERIVATIVE_GROUP_NONE)) {
assert(!args->lod);
args->level_zero = true;
}
break;
case nir_texop_tg4:
args->opcode = ac_image_gather4;
if (!args->lod && !args->bias)
args->level_zero = true;
break;
case nir_texop_lod:
args->opcode = ac_image_get_lod;
break;
case nir_texop_fragment_fetch_amd:
case nir_texop_fragment_mask_fetch_amd:
args->opcode = ac_image_load;
args->level_zero = false;
break;
default:
break;
}
/* Aldebaran doesn't have image_sample_lz, but image_sample behaves like lz. */
if (!ctx->ac.has_3d_cube_border_color_mipmap)
args->level_zero = false;
if (instr->op == nir_texop_tg4 && ctx->ac.gfx_level <= GFX8 &&
(instr->dest_type & (nir_type_int | nir_type_uint))) {
return lower_gather4_integer(&ctx->ac, args, instr);
}
/* Fixup for GFX9 which allocates 1D textures as 2D. */
if (instr->op == nir_texop_lod && ctx->ac.gfx_level == GFX9) {
if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) {
args->coords[1] = ctx->ac.i32_0;
}
}
args->attributes = AC_FUNC_ATTR_READNONE;
bool cs_derivs =
ctx->stage == MESA_SHADER_COMPUTE && ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
/* Prevent texture instructions with implicit derivatives from being
* sinked into branches. */
switch (instr->op) {
case nir_texop_tex:
case nir_texop_txb:
case nir_texop_lod:
args->attributes |= AC_FUNC_ATTR_CONVERGENT;
break;
default:
break;
}
}
return ac_build_image_opcode(&ctx->ac, args);
}
static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
LLVMValueRef ptr, addr;
LLVMValueRef src0 = get_src(ctx, instr->src[0]);
unsigned index = nir_intrinsic_base(instr);
addr = LLVMConstInt(ctx->ac.i32, index, 0);
addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
/* Load constant values from user SGPRS when possible, otherwise
* fallback to the default path that loads directly from memory.
*/
if (LLVMIsConstant(src0) && instr->dest.ssa.bit_size >= 32) {
unsigned count = instr->dest.ssa.num_components;
unsigned offset = index;
if (instr->dest.ssa.bit_size == 64)
count *= 2;
offset += LLVMConstIntGetZExtValue(src0);
offset /= 4;
uint64_t mask = BITFIELD64_MASK(count) << offset;
if ((ctx->args->inline_push_const_mask | mask) == ctx->args->inline_push_const_mask &&
offset + count <= (sizeof(ctx->args->inline_push_const_mask) * 8u)) {
LLVMValueRef *const push_constants = alloca(count * sizeof(LLVMValueRef));
unsigned arg_index =
util_bitcount64(ctx->args->inline_push_const_mask & BITFIELD64_MASK(offset));
for (unsigned i = 0; i < count; i++)
push_constants[i] = ac_get_arg(&ctx->ac, ctx->args->inline_push_consts[arg_index++]);
LLVMValueRef res = ac_build_gather_values(&ctx->ac, push_constants, count);
return instr->dest.ssa.bit_size == 64
? LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "")
: res;
}
}
struct ac_llvm_pointer pc = ac_get_ptr_arg(&ctx->ac, ctx->args, ctx->args->push_constants);
ptr = LLVMBuildGEP2(ctx->ac.builder, pc.t, pc.v, &addr, 1, "");
if (instr->dest.ssa.bit_size == 8) {
unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
LLVMValueRef res = LLVMBuildLoad2(ctx->ac.builder, vec_type, ptr, "");
LLVMValueRef params[3];
if (load_dwords > 1) {
LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,
LLVMConstInt(ctx->ac.i32, 1, false), "");
params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,
LLVMConstInt(ctx->ac.i32, 0, false), "");
} else {
res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
params[0] = ctx->ac.i32_0;
params[1] = res;
}
params[2] = addr;
res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
res = LLVMBuildTrunc(
ctx->ac.builder, res,
LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
if (instr->dest.ssa.num_components > 1)
res = LLVMBuildBitCast(ctx->ac.builder, res,
LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
return res;
} else if (instr->dest.ssa.bit_size == 16) {
unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
LLVMValueRef res = LLVMBuildLoad2(ctx->ac.builder, vec_type, ptr, "");
res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
LLVMValueRef mask[] = {
LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
LLVMConstInt(ctx->ac.i32, 4, false)};
LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
LLVMValueRef shuffle_aligned =
LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
LLVMValueRef shuffle_unaligned =
LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
}
LLVMTypeRef ptr_type = get_def_type(ctx, &instr->dest.ssa);
ptr = ac_cast_ptr(&ctx->ac, ptr, ptr_type);
return LLVMBuildLoad2(ctx->ac.builder, ptr_type, ptr, "");
}
static LLVMValueRef visit_get_ssbo_size(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
bool non_uniform = nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM;
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, get_src(ctx, instr->src[0]), false, non_uniform);
return LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, false), "");
}
static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
unsigned start, unsigned count)
{
LLVMValueRef mask[] = {ctx->i32_0, ctx->i32_1, LLVMConstInt(ctx->i32, 2, false),
LLVMConstInt(ctx->i32, 3, false)};
unsigned src_elements = ac_get_llvm_num_components(src);
if (count == src_elements) {
assert(start == 0);
return src;
} else if (count == 1) {
assert(start < src_elements);
return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");
} else {
assert(start + count <= src_elements);
assert(count <= 4);
LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
}
}
static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qualifier access,
bool may_store_unaligned, bool writeonly_memory)
{
unsigned cache_policy = 0;
/* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All
* store opcodes not aligned to a dword are affected. The only way to
* get unaligned stores is through shader images.
*/
if (((may_store_unaligned && ctx->ac.gfx_level == GFX6) ||
/* If this is write-only, don't keep data in L1 to prevent
* evicting L1 cache lines that may be needed by other
* instructions.
*/
writeonly_memory || access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
cache_policy |= ac_glc;
}
if (access & ACCESS_STREAM_CACHE_POLICY)
cache_policy |= ac_slc | ac_glc;
return cache_policy;
}
static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
const nir_intrinsic_instr *instr, nir_src src)
{
return enter_waterfall(ctx, wctx, get_src(ctx, src),
nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
}
static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
if (ctx->ac.postponed_kill) {
LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, "");
ac_build_ifcc(&ctx->ac, cond, 7000);
}
LLVMValueRef src_data = get_src(ctx, instr->src[0]);
int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
unsigned writemask = nir_intrinsic_write_mask(instr);
enum gl_access_qualifier access = nir_intrinsic_access(instr);
bool writeonly_memory = access & ACCESS_NON_READABLE;
unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
struct waterfall_context wctx;
LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true, false);
LLVMValueRef base_data = src_data;
base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
while (writemask) {
int start, count;
LLVMValueRef data, offset;
LLVMTypeRef data_type;
u_bit_scan_consecutive_range(&writemask, &start, &count);
if (count == 3 && elem_size_bytes != 4) {
writemask |= 1 << (start + 2);
count = 2;
}
int num_bytes = count * elem_size_bytes; /* count in bytes */
/* we can only store 4 DWords at the same time.
* can only happen for 64 Bit vectors. */
if (num_bytes > 16) {
writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
count = 2;
num_bytes = 16;
}
/* check alignment of 16 Bit stores */
if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
count = 1;
num_bytes = 2;
}
/* Due to alignment issues, split stores of 8-bit/16-bit
* vectors.
*/
if (ctx->ac.gfx_level == GFX6 && count > 1 && elem_size_bytes < 4) {
writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
count = 1;
num_bytes = elem_size_bytes;
}
data = extract_vector_range(&ctx->ac, base_data, start, count);
offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
if (num_bytes == 1) {
ac_build_buffer_store_byte(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
} else if (num_bytes == 2) {
ac_build_buffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
} else {
switch (num_bytes) {
case 16: /* v4f32 */
data_type = ctx->ac.v4f32;
break;
case 12: /* v3f32 */
data_type = ctx->ac.v3f32;
break;
case 8: /* v2f32 */
data_type = ctx->ac.v2f32;
break;
case 4: /* f32 */
data_type = ctx->ac.f32;
break;
default:
unreachable("Malformed vector store.");
}
data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
ac_build_buffer_store_dword(&ctx->ac, rsrc, data, NULL, offset,
ctx->ac.i32_0, cache_policy);
}
}
exit_waterfall(ctx, &wctx, NULL);
if (ctx->ac.postponed_kill)
ac_build_endif(&ctx->ac, 7000);
}
static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, LLVMValueRef descriptor,
LLVMValueRef offset, LLVMValueRef compare,
LLVMValueRef exchange, bool image)
{
LLVMBasicBlockRef start_block = NULL, then_block = NULL;
if (ctx->abi->robust_buffer_access || image) {
LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
start_block = LLVMGetInsertBlock(ctx->ac.builder);
ac_build_ifcc(&ctx->ac, cond, -1);
then_block = LLVMGetInsertBlock(ctx->ac.builder);
}
if (image)
offset = LLVMBuildMul(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, 8, false), "");
LLVMValueRef ptr_parts[2] = {
ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
LLVMBuildAnd(ctx->ac.builder, ac_llvm_extract_elem(&ctx->ac, descriptor, 1),
LLVMConstInt(ctx->ac.i32, 65535, 0), "")};
ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");
ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");
offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");
LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");
ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL),
"");
LLVMValueRef result =
ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
if (ctx->abi->robust_buffer_access || image) {
ac_build_endif(&ctx->ac, -1);
LLVMBasicBlockRef incoming_blocks[2] = {
start_block,
then_block,
};
LLVMValueRef incoming_values[2] = {
LLVMConstInt(ctx->ac.i64, 0, 0),
result,
};
LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
return ret;
} else {
return result;
}
}
static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
if (ctx->ac.postponed_kill) {
LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, "");
ac_build_ifcc(&ctx->ac, cond, 7001);
}
LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
const char *op;
char name[64], type[8];
LLVMValueRef params[6], descriptor;
LLVMValueRef result;
int arg_count = 0;
struct waterfall_context wctx;
LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
switch (instr->intrinsic) {
case nir_intrinsic_ssbo_atomic_add:
op = "add";
break;
case nir_intrinsic_ssbo_atomic_imin:
op = "smin";
break;
case nir_intrinsic_ssbo_atomic_umin:
op = "umin";
break;
case nir_intrinsic_ssbo_atomic_imax:
op = "smax";
break;
case nir_intrinsic_ssbo_atomic_umax:
op = "umax";
break;
case nir_intrinsic_ssbo_atomic_and:
op = "and";
break;
case nir_intrinsic_ssbo_atomic_or:
op = "or";
break;
case nir_intrinsic_ssbo_atomic_xor:
op = "xor";
break;
case nir_intrinsic_ssbo_atomic_exchange:
op = "swap";
break;
case nir_intrinsic_ssbo_atomic_comp_swap:
op = "cmpswap";
break;
case nir_intrinsic_ssbo_atomic_fmin:
op = "fmin";
break;
case nir_intrinsic_ssbo_atomic_fmax:
op = "fmax";
break;
default:
abort();
}
descriptor = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true, false);
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap && return_type == ctx->ac.i64) {
result = emit_ssbo_comp_swap_64(ctx, descriptor, get_src(ctx, instr->src[1]),
get_src(ctx, instr->src[2]), get_src(ctx, instr->src[3]), false);
} else {
LLVMValueRef data = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
}
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_fmin ||
instr->intrinsic == nir_intrinsic_ssbo_atomic_fmax) {
data = ac_to_float(&ctx->ac, data);
return_type = LLVMTypeOf(data);
}
params[arg_count++] = data;
params[arg_count++] = descriptor;
params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
params[arg_count++] = ctx->ac.i32_0; /* soffset */
params[arg_count++] = ctx->ac.i32_0; /* slc */
ac_build_type_name_for_intr(return_type, type, sizeof(type));
snprintf(name, sizeof(name), "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
result = ac_build_intrinsic(&ctx->ac, name, return_type, params, arg_count, 0);
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_fmin ||
instr->intrinsic == nir_intrinsic_ssbo_atomic_fmax) {
result = ac_to_integer(&ctx->ac, result);
}
}
result = exit_waterfall(ctx, &wctx, result);
if (ctx->ac.postponed_kill)
ac_build_endif(&ctx->ac, 7001);
return result;
}
static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
struct waterfall_context wctx;
LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
int elem_size_bytes = instr->dest.ssa.bit_size / 8;
int num_components = instr->num_components;
enum gl_access_qualifier access = nir_intrinsic_access(instr);
unsigned cache_policy = get_cache_policy(ctx, access, false, false);
LLVMValueRef offset = get_src(ctx, instr->src[1]);
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false, false);
LLVMValueRef vindex = ctx->ac.i32_0;
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
LLVMValueRef results[4];
for (int i = 0; i < num_components;) {
int num_elems = num_components - i;
if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
num_elems = 1;
if (num_elems * elem_size_bytes > 16)
num_elems = 16 / elem_size_bytes;
int load_bytes = num_elems * elem_size_bytes;
LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, offset, immoffset, "");
LLVMValueRef ret;
if (load_bytes == 1) {
ret = ac_build_buffer_load_byte(&ctx->ac, rsrc, voffset, ctx->ac.i32_0,
cache_policy);
} else if (load_bytes == 2) {
ret = ac_build_buffer_load_short(&ctx->ac, rsrc, voffset, ctx->ac.i32_0,
cache_policy);
} else {
int num_channels = util_next_power_of_two(load_bytes) / 4;
bool can_speculate = access & ACCESS_CAN_REORDER;
ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, voffset, ctx->ac.i32_0,
ctx->ac.f32, cache_policy, can_speculate, false);
}
LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
for (unsigned j = 0; j < num_elems; j++) {
results[i + j] =
LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
}
i += num_elems;
}
LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components);
return exit_waterfall(ctx, &wctx, ret);
}
static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
const nir_intrinsic_instr *instr)
{
return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),
nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
}
static LLVMValueRef get_global_address(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr,
LLVMTypeRef type)
{
bool is_store = instr->intrinsic == nir_intrinsic_store_global ||
instr->intrinsic == nir_intrinsic_store_global_amd;
LLVMValueRef addr = get_src(ctx, instr->src[is_store ? 1 : 0]);
LLVMTypeRef ptr_type = LLVMPointerType(type, AC_ADDR_SPACE_GLOBAL);
if (nir_intrinsic_has_base(instr)) {
/* _amd variants */
uint32_t base = nir_intrinsic_base(instr);
unsigned num_src = nir_intrinsic_infos[instr->intrinsic].num_srcs;
LLVMValueRef offset = get_src(ctx, instr->src[num_src - 1]);
offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, base, false), "");
LLVMTypeRef i8_ptr_type = LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_GLOBAL);
addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, i8_ptr_type, "");
addr = LLVMBuildGEP2(ctx->ac.builder, ctx->ac.i8, addr, &offset, 1, "");
return LLVMBuildPointerCast(ctx->ac.builder, addr, ptr_type, "");
} else {
return LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
}
}
static LLVMValueRef visit_load_global(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
LLVMValueRef val;
LLVMValueRef addr = get_global_address(ctx, instr, result_type);
val = LLVMBuildLoad2(ctx->ac.builder, result_type, addr, "");
if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) {
LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
LLVMSetAlignment(val, ac_get_type_size(result_type));
}
return val;
}
static void visit_store_global(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
if (ctx->ac.postponed_kill) {
LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, "");
ac_build_ifcc(&ctx->ac, cond, 7002);
}
LLVMValueRef data = get_src(ctx, instr->src[0]);
LLVMTypeRef type = LLVMTypeOf(data);
LLVMValueRef addr = get_global_address(ctx, instr, type);
LLVMValueRef val;
val = LLVMBuildStore(ctx->ac.builder, data, addr);
if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) {
LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
LLVMSetAlignment(val, ac_get_type_size(type));
}
if (ctx->ac.postponed_kill)
ac_build_endif(&ctx->ac, 7002);
}
static LLVMValueRef visit_global_atomic(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
if (ctx->ac.postponed_kill) {
LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, "");
ac_build_ifcc(&ctx->ac, cond, 7002);
}
LLVMValueRef data = get_src(ctx, instr->src[1]);
LLVMAtomicRMWBinOp op;
LLVMValueRef result;
/* use "singlethread" sync scope to implement relaxed ordering */
const char *sync_scope = "singlethread-one-as";
if (instr->intrinsic == nir_intrinsic_global_atomic_fmin ||
instr->intrinsic == nir_intrinsic_global_atomic_fmax ||
instr->intrinsic == nir_intrinsic_global_atomic_fmin_amd ||
instr->intrinsic == nir_intrinsic_global_atomic_fmax_amd) {
data = ac_to_float(&ctx->ac, data);
}
LLVMTypeRef data_type = LLVMTypeOf(data);
LLVMValueRef addr = get_global_address(ctx, instr, data_type);
if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap ||
instr->intrinsic == nir_intrinsic_global_atomic_comp_swap_amd) {
LLVMValueRef data1 = get_src(ctx, instr->src[2]);
result = ac_build_atomic_cmp_xchg(&ctx->ac, addr, data, data1, sync_scope);
result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
} else if (instr->intrinsic == nir_intrinsic_global_atomic_fmin ||
instr->intrinsic == nir_intrinsic_global_atomic_fmax ||
instr->intrinsic == nir_intrinsic_global_atomic_fmin_amd ||
instr->intrinsic == nir_intrinsic_global_atomic_fmax_amd) {
const char *op = instr->intrinsic == nir_intrinsic_global_atomic_fmin ? "fmin" : "fmax";
char name[64], type[8];
LLVMValueRef params[2];
int arg_count = 0;
params[arg_count++] = addr;
params[arg_count++] = data;
ac_build_type_name_for_intr(data_type, type, sizeof(type));
snprintf(name, sizeof(name), "llvm.amdgcn.global.atomic.%s.%s.p1%s.%s", op, type, type, type);
result = ac_build_intrinsic(&ctx->ac, name, data_type, params, arg_count, 0);
result = ac_to_integer(&ctx->ac, result);
} else {
switch (instr->intrinsic) {
case nir_intrinsic_global_atomic_add:
case nir_intrinsic_global_atomic_add_amd:
op = LLVMAtomicRMWBinOpAdd;
break;
case nir_intrinsic_global_atomic_umin:
case nir_intrinsic_global_atomic_umin_amd:
op = LLVMAtomicRMWBinOpUMin;
break;
case nir_intrinsic_global_atomic_umax:
case nir_intrinsic_global_atomic_umax_amd:
op = LLVMAtomicRMWBinOpUMax;
break;
case nir_intrinsic_global_atomic_imin:
case nir_intrinsic_global_atomic_imin_amd:
op = LLVMAtomicRMWBinOpMin;
break;
case nir_intrinsic_global_atomic_imax:
case nir_intrinsic_global_atomic_imax_amd:
op = LLVMAtomicRMWBinOpMax;
break;
case nir_intrinsic_global_atomic_and:
case nir_intrinsic_global_atomic_and_amd:
op = LLVMAtomicRMWBinOpAnd;
break;
case nir_intrinsic_global_atomic_or:
case nir_intrinsic_global_atomic_or_amd:
op = LLVMAtomicRMWBinOpOr;
break;
case nir_intrinsic_global_atomic_xor:
case nir_intrinsic_global_atomic_xor_amd:
op = LLVMAtomicRMWBinOpXor;
break;
case nir_intrinsic_global_atomic_exchange:
case nir_intrinsic_global_atomic_exchange_amd:
op = LLVMAtomicRMWBinOpXchg;
break;
default:
unreachable("Invalid global atomic operation");
}
result = ac_build_atomic_rmw(&ctx->ac, op, addr, ac_to_integer(&ctx->ac, data), sync_scope);
}
if (ctx->ac.postponed_kill)
ac_build_endif(&ctx->ac, 7002);
return result;
}
static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
struct waterfall_context wctx;
LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);
LLVMValueRef ret;
LLVMValueRef rsrc = rsrc_base;
LLVMValueRef offset = get_src(ctx, instr->src[1]);
int num_components = instr->num_components;
if (ctx->abi->load_ubo)
rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
/* Convert to a scalar 32-bit load. */
if (instr->dest.ssa.bit_size == 64)
num_components *= 2;
else if (instr->dest.ssa.bit_size == 16)
num_components = DIV_ROUND_UP(num_components, 2);
else if (instr->dest.ssa.bit_size == 8)
num_components = DIV_ROUND_UP(num_components, 4);
ret =
ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL,
ctx->ac.f32, 0, true, true);
/* Convert to the original type. */
if (instr->dest.ssa.bit_size == 64) {
ret = LLVMBuildBitCast(ctx->ac.builder, ret,
LLVMVectorType(ctx->ac.i64, num_components / 2), "");
} else if (instr->dest.ssa.bit_size == 16) {
ret = LLVMBuildBitCast(ctx->ac.builder, ret,
LLVMVectorType(ctx->ac.i16, num_components * 2), "");
} else if (instr->dest.ssa.bit_size == 8) {
ret = LLVMBuildBitCast(ctx->ac.builder, ret,
LLVMVectorType(ctx->ac.i8, num_components * 4), "");
}
ret = ac_trim_vector(&ctx->ac, ret, instr->num_components);
ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
return exit_waterfall(ctx, &wctx, ret);
}
static unsigned type_scalar_size_bytes(const struct glsl_type *type)
{
assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type));
return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
}
static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
{
if (ctx->ac.postponed_kill) {
LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, "");
ac_build_ifcc(&ctx->ac, cond, 7002);
}
unsigned base = nir_intrinsic_base(instr);
unsigned writemask = nir_intrinsic_write_mask(instr);
unsigned component = nir_intrinsic_component(instr);
LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
ASSERTED nir_src offset = *nir_get_io_offset_src(instr);
/* No indirect indexing is allowed here. */
assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
case 16:
case 32:
break;
case 64:
unreachable("64-bit IO should have been lowered to 32 bits");
return;
default:
unreachable("unhandled store_output bit size");
return;
}
writemask <<= component;
for (unsigned chan = 0; chan < 8; chan++) {
if (!(writemask & (1 << chan)))
continue;