blob: b092e10d0634192fa724fa092eb16de624e3190a [file]
/*
* Copyright 2024 Valve Corporation
* Copyright 2024 Alyssa Rosenzweig
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
* SPDX-License-Identifier: MIT
*/
#include "pipe/p_defines.h"
#include "vulkan/vulkan_core.h"
#include "agx_abi.h"
#include "agx_nir_texture.h"
#include "hk_cmd_buffer.h"
#include "hk_descriptor_set.h"
#include "hk_descriptor_set_layout.h"
#include "hk_shader.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir_builder_opcodes.h"
#include "nir_intrinsics.h"
#include "nir_intrinsics_indices.h"
#include "shader_enums.h"
#include "vk_pipeline.h"
struct lower_descriptors_ctx {
const struct hk_descriptor_set_layout *set_layouts[HK_MAX_SETS];
bool clamp_desc_array_bounds;
nir_address_format ubo_addr_format;
nir_address_format ssbo_addr_format;
};
static const struct hk_descriptor_set_binding_layout *
get_binding_layout(uint32_t set, uint32_t binding,
const struct lower_descriptors_ctx *ctx)
{
assert(set < HK_MAX_SETS);
assert(ctx->set_layouts[set] != NULL);
const struct hk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
assert(binding < set_layout->binding_count);
return &set_layout->binding[binding];
}
static nir_def *
load_speculatable(nir_builder *b, unsigned num_components, unsigned bit_size,
nir_def *addr, unsigned align)
{
return nir_build_load_global_constant(b, num_components, bit_size, addr,
.align_mul = align,
.access = ACCESS_CAN_SPECULATE);
}
static nir_def *
load_root(nir_builder *b, unsigned num_components, unsigned bit_size,
nir_def *offset, unsigned align)
{
nir_def *addr = nir_iadd(b, nir_load_root_agx(b), nir_u2u64(b, offset));
return load_speculatable(b, num_components, bit_size, addr, align);
}
static bool
lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
const struct lower_descriptors_ctx *ctx)
{
assert(load->intrinsic == nir_intrinsic_load_constant);
unreachable("todo: stick an address in the root descriptor or something");
uint32_t base = nir_intrinsic_base(load);
uint32_t range = nir_intrinsic_range(load);
b->cursor = nir_before_instr(&load->instr);
nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
nir_def *data = nir_load_ubo(
b, load->def.num_components, load->def.bit_size, nir_imm_int(b, 0),
offset, .align_mul = nir_intrinsic_align_mul(load),
.align_offset = nir_intrinsic_align_offset(load), .range_base = base,
.range = range);
nir_def_rewrite_uses(&load->def, data);
return true;
}
static nir_def *
load_descriptor_set_addr(nir_builder *b, uint32_t set,
UNUSED const struct lower_descriptors_ctx *ctx)
{
uint32_t set_addr_offset =
hk_root_descriptor_offset(sets) + set * sizeof(uint64_t);
return load_root(b, 1, 64, nir_imm_int(b, set_addr_offset), 8);
}
static nir_def *
load_dynamic_buffer_start(nir_builder *b, uint32_t set,
const struct lower_descriptors_ctx *ctx)
{
int dynamic_buffer_start_imm = 0;
for (uint32_t s = 0; s < set; s++) {
if (ctx->set_layouts[s] == NULL) {
dynamic_buffer_start_imm = -1;
break;
}
dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count;
}
if (dynamic_buffer_start_imm >= 0) {
return nir_imm_int(b, dynamic_buffer_start_imm);
} else {
uint32_t root_offset =
hk_root_descriptor_offset(set_dynamic_buffer_start) + set;
return nir_u2u32(b, load_root(b, 1, 8, nir_imm_int(b, root_offset), 1));
}
}
static nir_def *
load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
uint32_t set, uint32_t binding, nir_def *index,
unsigned offset_B, const struct lower_descriptors_ctx *ctx)
{
const struct hk_descriptor_set_binding_layout *binding_layout =
get_binding_layout(set, binding, ctx);
if (ctx->clamp_desc_array_bounds)
index =
nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
switch (binding_layout->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
/* Get the index in the root descriptor table dynamic_buffers array. */
nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx);
index = nir_iadd(b, index,
nir_iadd_imm(b, dynamic_buffer_start,
binding_layout->dynamic_buffer_index));
nir_def *root_desc_offset = nir_iadd_imm(
b, nir_imul_imm(b, index, sizeof(struct hk_buffer_address)),
hk_root_descriptor_offset(dynamic_buffers));
assert(num_components == 4 && bit_size == 32);
nir_def *desc = load_root(b, 4, 32, root_desc_offset, 16);
/* We know a priori that the the .w compnent (offset) is zero */
return nir_vector_insert_imm(b, desc, nir_imm_int(b, 0), 3);
}
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
nir_def *base_addr = nir_iadd_imm(
b, load_descriptor_set_addr(b, set, ctx), binding_layout->offset);
assert(binding_layout->stride == 1);
const uint32_t binding_size = binding_layout->array_size;
/* Convert it to nir_address_format_64bit_bounded_global */
assert(num_components == 4 && bit_size == 32);
return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
nir_unpack_64_2x32_split_y(b, base_addr),
nir_imm_int(b, binding_size), nir_imm_int(b, 0));
}
default: {
assert(binding_layout->stride > 0);
nir_def *desc_ubo_offset =
nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
binding_layout->offset + offset_B);
unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1));
desc_align_mul = MIN2(desc_align_mul, 16);
unsigned desc_align_offset = binding_layout->offset + offset_B;
desc_align_offset %= desc_align_mul;
nir_def *desc;
nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
desc = nir_load_global_constant_offset(
b, num_components, bit_size, set_addr, desc_ubo_offset,
.align_mul = desc_align_mul, .align_offset = desc_align_offset,
.access = ACCESS_CAN_SPECULATE);
if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
/* We know a priori that the the .w compnent (offset) is zero */
assert(num_components == 4 && bit_size == 32);
desc = nir_vector_insert_imm(b, desc, nir_imm_int(b, 0), 3);
}
return desc;
}
}
}
static bool
is_idx_intrin(nir_intrinsic_instr *intrin)
{
while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
intrin = nir_src_as_intrinsic(intrin->src[0]);
if (intrin == NULL)
return false;
}
return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
}
static nir_def *
load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
const struct lower_descriptors_ctx *ctx)
{
nir_def *index = nir_imm_int(b, 0);
while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
index = nir_iadd(b, index, intrin->src[1].ssa);
intrin = nir_src_as_intrinsic(intrin->src[0]);
}
assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
uint32_t set = nir_intrinsic_desc_set(intrin);
uint32_t binding = nir_intrinsic_binding(intrin);
index = nir_iadd(b, index, intrin->src[0].ssa);
return load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
}
static bool
try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
const struct lower_descriptors_ctx *ctx)
{
ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
b->cursor = nir_before_instr(&intrin->instr);
nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]);
if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) {
assert(desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
return false;
}
nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx);
nir_def_rewrite_uses(&intrin->def, desc);
return true;
}
static bool
_lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
uint32_t root_table_offset)
{
b->cursor = nir_instr_remove(&intrin->instr);
assert((root_table_offset & 3) == 0 && "aligned");
nir_def *val = load_root(b, intrin->def.num_components, intrin->def.bit_size,
nir_imm_int(b, root_table_offset), 4);
nir_def_rewrite_uses(&intrin->def, val);
return true;
}
#define lower_sysval_to_root_table(b, intrin, member) \
_lower_sysval_to_root_table(b, intrin, hk_root_descriptor_offset(member))
static bool
lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
const struct lower_descriptors_ctx *ctx)
{
const uint32_t push_region_offset = hk_root_descriptor_offset(push);
const uint32_t base = nir_intrinsic_base(load);
b->cursor = nir_before_instr(&load->instr);
nir_def *offset =
nir_iadd_imm(b, load->src[0].ssa, push_region_offset + base);
nir_def *val = load_root(b, load->def.num_components, load->def.bit_size,
offset, load->def.bit_size / 8);
nir_def_rewrite_uses(&load->def, val);
return true;
}
static void
get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref,
uint32_t *set, uint32_t *binding, nir_def **index)
{
if (deref->deref_type == nir_deref_type_array) {
*index = deref->arr.index.ssa;
deref = nir_deref_instr_parent(deref);
} else {
*index = nir_imm_int(b, 0);
}
assert(deref->deref_type == nir_deref_type_var);
nir_variable *var = deref->var;
*set = var->data.descriptor_set;
*binding = var->data.binding;
}
static nir_def *
load_resource_deref_desc(nir_builder *b, unsigned num_components,
unsigned bit_size, nir_deref_instr *deref,
unsigned offset_B,
const struct lower_descriptors_ctx *ctx)
{
uint32_t set, binding;
nir_def *index;
get_resource_deref_binding(b, deref, &set, &binding, &index);
return load_descriptor(b, num_components, bit_size, set, binding, index,
offset_B, ctx);
}
static bool
lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intr,
const struct lower_descriptors_ctx *ctx)
{
b->cursor = nir_before_instr(&intr->instr);
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
/* Reads and queries use the texture descriptor; writes and atomics PBE. */
unsigned offs;
if (intr->intrinsic != nir_intrinsic_image_deref_load &&
intr->intrinsic != nir_intrinsic_image_deref_sparse_load &&
intr->intrinsic != nir_intrinsic_image_deref_size &&
intr->intrinsic != nir_intrinsic_image_deref_samples) {
offs = offsetof(struct hk_storage_image_descriptor, pbe_offset);
} else {
offs = offsetof(struct hk_storage_image_descriptor, tex_offset);
}
nir_def *offset = load_resource_deref_desc(b, 1, 32, deref, offs, ctx);
nir_rewrite_image_intrinsic(intr, nir_load_texture_handle_agx(b, offset),
true);
return true;
}
static VkQueryPipelineStatisticFlagBits
translate_pipeline_stat_bit(enum pipe_statistics_query_index pipe)
{
switch (pipe) {
case PIPE_STAT_QUERY_IA_VERTICES:
return VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT;
case PIPE_STAT_QUERY_IA_PRIMITIVES:
return VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT;
case PIPE_STAT_QUERY_VS_INVOCATIONS:
return VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT;
case PIPE_STAT_QUERY_GS_INVOCATIONS:
return VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT;
case PIPE_STAT_QUERY_GS_PRIMITIVES:
return VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT;
case PIPE_STAT_QUERY_C_INVOCATIONS:
return VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT;
case PIPE_STAT_QUERY_C_PRIMITIVES:
return VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT;
case PIPE_STAT_QUERY_PS_INVOCATIONS:
return VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT;
case PIPE_STAT_QUERY_HS_INVOCATIONS:
return VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT;
case PIPE_STAT_QUERY_DS_INVOCATIONS:
return VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT;
case PIPE_STAT_QUERY_CS_INVOCATIONS:
return VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT;
case PIPE_STAT_QUERY_TS_INVOCATIONS:
return VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT;
case PIPE_STAT_QUERY_MS_INVOCATIONS:
return VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT;
}
unreachable("invalid statistic");
}
static bool
lower_uvs_index(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
unsigned *nr_vbos = data;
switch (intrin->intrinsic) {
case nir_intrinsic_load_uvs_index_agx: {
gl_varying_slot slot = nir_intrinsic_io_semantics(intrin).location;
unsigned offset = hk_root_descriptor_offset(draw.uvs_index[slot]);
b->cursor = nir_instr_remove(&intrin->instr);
nir_def *val = load_root(b, 1, 8, nir_imm_int(b, offset), 1);
nir_def_rewrite_uses(&intrin->def, nir_u2u16(b, val));
return true;
}
case nir_intrinsic_load_shader_part_tests_zs_agx:
return lower_sysval_to_root_table(b, intrin, draw.no_epilog_discard);
case nir_intrinsic_load_api_sample_mask_agx:
return lower_sysval_to_root_table(b, intrin, draw.api_sample_mask);
case nir_intrinsic_load_sample_positions_agx:
return lower_sysval_to_root_table(b, intrin, draw.ppp_multisamplectl);
case nir_intrinsic_load_depth_never_agx:
return lower_sysval_to_root_table(b, intrin, draw.force_never_in_shader);
case nir_intrinsic_load_geometry_param_buffer_poly:
return lower_sysval_to_root_table(b, intrin, draw.geometry_params);
case nir_intrinsic_load_vs_output_buffer_poly:
return lower_sysval_to_root_table(b, intrin, draw.vertex_output_buffer);
case nir_intrinsic_load_vs_outputs_poly:
return lower_sysval_to_root_table(b, intrin, draw.vertex_outputs);
case nir_intrinsic_load_tess_param_buffer_poly:
return lower_sysval_to_root_table(b, intrin, draw.tess_params);
case nir_intrinsic_load_rasterization_stream:
return lower_sysval_to_root_table(b, intrin, draw.rasterization_stream);
case nir_intrinsic_load_is_first_fan_agx: {
unsigned offset = hk_root_descriptor_offset(draw.provoking);
b->cursor = nir_instr_remove(&intrin->instr);
nir_def *val = load_root(b, 1, 16, nir_imm_int(b, offset), 2);
nir_def_rewrite_uses(&intrin->def, nir_ieq_imm(b, val, 1));
return true;
}
case nir_intrinsic_load_provoking_last: {
unsigned offset = hk_root_descriptor_offset(draw.provoking);
b->cursor = nir_instr_remove(&intrin->instr);
nir_def *val = load_root(b, 1, 16, nir_imm_int(b, offset), 2);
nir_def_rewrite_uses(&intrin->def, nir_b2b32(b, nir_ieq_imm(b, val, 2)));
return true;
}
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_first_vertex:
case nir_intrinsic_load_base_instance:
case nir_intrinsic_load_draw_id:
case nir_intrinsic_load_input_assembly_buffer_poly: {
b->cursor = nir_instr_remove(&intrin->instr);
unsigned base = AGX_ABI_VUNI_FIRST_VERTEX(*nr_vbos);
unsigned size = 32;
if (intrin->intrinsic == nir_intrinsic_load_base_instance) {
base = AGX_ABI_VUNI_BASE_INSTANCE(*nr_vbos);
} else if (intrin->intrinsic == nir_intrinsic_load_draw_id) {
base = AGX_ABI_VUNI_DRAW_ID(*nr_vbos);
size = 16;
} else if (intrin->intrinsic ==
nir_intrinsic_load_input_assembly_buffer_poly) {
base = AGX_ABI_VUNI_INPUT_ASSEMBLY(*nr_vbos);
size = 64;
}
nir_def *val = nir_load_preamble(b, 1, size, .base = base);
nir_def_rewrite_uses(&intrin->def,
nir_u2uN(b, val, intrin->def.bit_size));
return true;
}
case nir_intrinsic_load_stat_query_address_agx: {
b->cursor = nir_instr_remove(&intrin->instr);
unsigned off1 = hk_root_descriptor_offset(draw.pipeline_stats);
unsigned off2 = hk_root_descriptor_offset(draw.pipeline_stats_flags);
nir_def *base = load_root(b, 1, 64, nir_imm_int(b, off1), 8);
nir_def *flags = load_root(b, 1, 16, nir_imm_int(b, off2), 2);
unsigned query = nir_intrinsic_base(intrin);
VkQueryPipelineStatisticFlagBits bit = translate_pipeline_stat_bit(query);
/* Prefix sum to find the compacted offset */
nir_def *idx = nir_bit_count(b, nir_iand_imm(b, flags, bit - 1));
nir_def *addr = nir_iadd(
b, base, nir_imul_imm(b, nir_u2u64(b, idx), sizeof(uint64_t)));
/* The above returns garbage if the query isn't actually enabled, handle
* that case.
*
* TODO: Optimize case where we *know* the query is present?
*/
nir_def *present = nir_ine_imm(b, nir_iand_imm(b, flags, bit), 0);
/* Sometimes we insert a GS internally, it should not contribute to GS
* statistics. This is not strictly needed for Vulkan but vkd3d-proton
* tests it and we should avoid the surprising behaviour.
*/
if (query == PIPE_STAT_QUERY_GS_INVOCATIONS ||
query == PIPE_STAT_QUERY_GS_PRIMITIVES) {
unsigned api_gs_offset = hk_root_descriptor_offset(draw.api_gs);
nir_def *api_gs =
load_root(b, 1, 16, nir_imm_int(b, api_gs_offset), 4);
present = nir_iand(b, present, nir_ine_imm(b, api_gs, 0));
}
addr = nir_bcsel(b, present, addr,
nir_imm_int64(b, AGX_SCRATCH_PAGE_ADDRESS));
nir_def_rewrite_uses(&intrin->def, addr);
return true;
}
default:
return false;
}
}
bool
hk_lower_uvs_index(nir_shader *s, unsigned nr_vbos)
{
return nir_shader_intrinsics_pass(s, lower_uvs_index,
nir_metadata_control_flow, &nr_vbos);
}
static bool
try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
const struct lower_descriptors_ctx *ctx)
{
switch (intrin->intrinsic) {
case nir_intrinsic_load_constant:
return lower_load_constant(b, intrin, ctx);
case nir_intrinsic_load_vulkan_descriptor:
return try_lower_load_vulkan_descriptor(b, intrin, ctx);
case nir_intrinsic_load_workgroup_size:
unreachable("Should have been lowered by nir_lower_cs_intrinsics()");
case nir_intrinsic_load_base_workgroup_id:
return lower_sysval_to_root_table(b, intrin, cs.base_group);
case nir_intrinsic_load_push_constant:
return lower_load_push_constant(b, intrin, ctx);
case nir_intrinsic_load_view_index:
return lower_sysval_to_root_table(b, intrin, draw.view_index);
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_sparse_load:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic:
case nir_intrinsic_image_deref_atomic_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples:
case nir_intrinsic_image_deref_store_block_agx:
return lower_image_intrin(b, intrin, ctx);
case nir_intrinsic_load_num_workgroups: {
b->cursor = nir_instr_remove(&intrin->instr);
unsigned offset = hk_root_descriptor_offset(cs.group_count_addr);
nir_def *ptr = load_root(b, 1, 64, nir_imm_int(b, offset), 4);
nir_def *val = load_speculatable(b, 3, 32, ptr, 4);
nir_def_rewrite_uses(&intrin->def, val);
return true;
}
default:
return false;
}
}
static bool
lower_tex(nir_builder *b, nir_tex_instr *tex,
const struct lower_descriptors_ctx *ctx)
{
b->cursor = nir_before_instr(&tex->instr);
nir_def *texture = nir_steal_tex_src(tex, nir_tex_src_texture_deref);
nir_def *sampler = nir_steal_tex_src(tex, nir_tex_src_sampler_deref);
if (!texture) {
assert(!sampler);
return false;
}
nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
const uint32_t plane =
plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
const uint64_t plane_offset_B =
plane * sizeof(struct hk_sampled_image_descriptor);
/* LOD bias is passed in the descriptor set, rather than embedded into
* the sampler descriptor. There's no spot in the hardware descriptor,
* plus this saves on precious sampler heap spots.
*/
if (tex->op == nir_texop_lod_bias) {
unsigned offs =
offsetof(struct hk_sampled_image_descriptor, lod_bias_fp16);
nir_def *bias = load_resource_deref_desc(
b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
plane_offset_B + offs, ctx);
nir_def_replace(&tex->def, bias);
return true;
}
if (tex->op == nir_texop_image_min_lod_agx) {
assert(tex->dest_type == nir_type_float16 ||
tex->dest_type == nir_type_uint16);
unsigned offs =
tex->dest_type == nir_type_float16
? offsetof(struct hk_sampled_image_descriptor, min_lod_fp16)
: offsetof(struct hk_sampled_image_descriptor, min_lod_uint16);
nir_def *min = load_resource_deref_desc(
b, 1, 16, nir_src_as_deref(nir_src_for_ssa(texture)),
plane_offset_B + offs, ctx);
nir_def_replace(&tex->def, min);
return true;
}
if (tex->op == nir_texop_has_custom_border_color_agx) {
unsigned offs = offsetof(struct hk_sampled_image_descriptor,
clamp_0_sampler_index_or_negative);
nir_def *res = load_resource_deref_desc(
b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
plane_offset_B + offs, ctx);
nir_def_replace(&tex->def, nir_ige_imm(b, res, 0));
return true;
}
if (tex->op == nir_texop_custom_border_color_agx) {
unsigned offs = offsetof(struct hk_sampled_image_descriptor, border);
nir_def *border = load_resource_deref_desc(
b, 4, 32, nir_src_as_deref(nir_src_for_ssa(sampler)),
plane_offset_B + offs, ctx);
nir_alu_type T = nir_alu_type_get_base_type(tex->dest_type);
border = nir_convert_to_bit_size(b, border, T, tex->def.bit_size);
nir_def_replace(&tex->def, border);
return true;
}
{
unsigned offs =
offsetof(struct hk_sampled_image_descriptor, image_offset);
nir_def *offset = load_resource_deref_desc(
b, 1, 32, nir_src_as_deref(nir_src_for_ssa(texture)),
plane_offset_B + offs, ctx);
nir_def *handle = nir_load_texture_handle_agx(b, offset);
nir_tex_instr_add_src(tex, nir_tex_src_texture_handle, handle);
}
if (sampler != NULL) {
unsigned offs =
offsetof(struct hk_sampled_image_descriptor, sampler_index);
if (tex->backend_flags & AGX_TEXTURE_FLAG_CLAMP_TO_0) {
offs = offsetof(struct hk_sampled_image_descriptor,
clamp_0_sampler_index_or_negative);
}
nir_def *index = load_resource_deref_desc(
b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
plane_offset_B + offs, ctx);
nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, index);
}
return true;
}
static bool
try_lower_descriptors_instr(nir_builder *b, nir_instr *instr, void *_data)
{
const struct lower_descriptors_ctx *ctx = _data;
switch (instr->type) {
case nir_instr_type_tex:
return lower_tex(b, nir_instr_as_tex(instr), ctx);
case nir_instr_type_intrinsic:
return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx);
default:
return false;
}
}
static bool
lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
const struct lower_descriptors_ctx *ctx)
{
const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
return false;
b->cursor = nir_instr_remove(&intrin->instr);
uint32_t set = nir_intrinsic_desc_set(intrin);
uint32_t binding = nir_intrinsic_binding(intrin);
nir_def *index = intrin->src[0].ssa;
const struct hk_descriptor_set_binding_layout *binding_layout =
get_binding_layout(set, binding, ctx);
nir_def *binding_addr;
uint8_t binding_stride;
switch (binding_layout->type) {
case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset);
binding_stride = binding_layout->stride;
break;
}
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
const uint32_t root_desc_addr_offset =
hk_root_descriptor_offset(root_desc_addr);
nir_def *root_desc_addr =
load_root(b, 1, 64, nir_imm_int(b, root_desc_addr_offset), 8);
nir_def *dynamic_buffer_start =
nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
binding_layout->dynamic_buffer_index);
nir_def *dynamic_binding_offset =
nir_iadd_imm(b,
nir_imul_imm(b, dynamic_buffer_start,
sizeof(struct hk_buffer_address)),
hk_root_descriptor_offset(dynamic_buffers));
binding_addr =
nir_iadd(b, root_desc_addr, nir_u2u64(b, dynamic_binding_offset));
binding_stride = sizeof(struct hk_buffer_address);
break;
}
default:
unreachable("Not an SSBO descriptor");
}
/* Tuck the stride in the top 8 bits of the binding address */
binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56);
const uint32_t binding_size = binding_layout->array_size * binding_stride;
nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride);
nir_def *addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr),
nir_unpack_64_2x32_split_y(b, binding_addr),
nir_imm_int(b, binding_size), offset_in_binding);
nir_def_rewrite_uses(&intrin->def, addr);
return true;
}
static bool
lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin,
const struct lower_descriptors_ctx *ctx)
{
const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
return false;
b->cursor = nir_instr_remove(&intrin->instr);
nir_def *addr = intrin->src[0].ssa;
nir_def *index = intrin->src[1].ssa;
nir_def *addr_high32 = nir_channel(b, addr, 1);
nir_def *stride = nir_ushr_imm(b, addr_high32, 24);
nir_def *offset = nir_imul(b, index, stride);
addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format, nir_var_mem_ssbo,
offset);
nir_def_rewrite_uses(&intrin->def, addr);
return true;
}
static bool
lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
const struct lower_descriptors_ctx *ctx)
{
const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
return false;
b->cursor = nir_instr_remove(&intrin->instr);
nir_def *addr = intrin->src[0].ssa;
nir_def *desc;
switch (ctx->ssbo_addr_format) {
case nir_address_format_64bit_global_32bit_offset: {
nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
nir_def *offset = nir_channel(b, addr, 3);
/* Mask off the binding stride */
base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
desc = nir_load_global_constant_offset(b, 4, 32, base, offset,
.align_mul = 16, .align_offset = 0,
.access = ACCESS_CAN_SPECULATE);
break;
}
case nir_address_format_64bit_bounded_global: {
nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
nir_def *size = nir_channel(b, addr, 2);
nir_def *offset = nir_channel(b, addr, 3);
/* Mask off the binding stride */
base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
desc = nir_load_global_constant_bounded(
b, 4, 32, base, offset, size, .align_mul = 16, .align_offset = 0,
.access = ACCESS_CAN_SPECULATE);
break;
}
default:
unreachable("Unknown address mode");
}
nir_def_rewrite_uses(&intrin->def, desc);
return true;
}
static bool
lower_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intr, void *_data)
{
const struct lower_descriptors_ctx *ctx = _data;
switch (intr->intrinsic) {
case nir_intrinsic_vulkan_resource_index:
return lower_ssbo_resource_index(b, intr, ctx);
case nir_intrinsic_vulkan_resource_reindex:
return lower_ssbo_resource_reindex(b, intr, ctx);
case nir_intrinsic_load_vulkan_descriptor:
return lower_load_ssbo_descriptor(b, intr, ctx);
default:
return false;
}
}
bool
hk_nir_lower_descriptors(nir_shader *nir,
const struct vk_pipeline_robustness_state *rs,
uint32_t set_layout_count,
struct vk_descriptor_set_layout *const *set_layouts)
{
struct lower_descriptors_ctx ctx = {
.clamp_desc_array_bounds =
rs->storage_buffers !=
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
rs->uniform_buffers !=
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
.ssbo_addr_format = hk_buffer_addr_format(rs->storage_buffers),
.ubo_addr_format = hk_buffer_addr_format(rs->uniform_buffers),
};
assert(set_layout_count <= HK_MAX_SETS);
for (uint32_t s = 0; s < set_layout_count; s++) {
if (set_layouts[s] != NULL)
ctx.set_layouts[s] = vk_to_hk_descriptor_set_layout(set_layouts[s]);
}
/* First lower everything but complex SSBOs, then lower complex SSBOs.
*
* TODO: See if we can unify this, not sure if the fast path matters on
* Apple. This is inherited from NVK.
*/
bool pass_lower_descriptors = nir_shader_instructions_pass(
nir, try_lower_descriptors_instr, nir_metadata_control_flow, &ctx);
bool pass_lower_ssbo = nir_shader_intrinsics_pass(
nir, lower_ssbo_descriptor, nir_metadata_control_flow, &ctx);
return pass_lower_descriptors || pass_lower_ssbo;
}