blob: e4455574a6049fa3fd82ace56bbdc85174de1060 [file] [log] [blame]
/*
* Copyright © 2020 Raspberry Pi Ltd
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "compiler/v3d_compiler.h"
#include "compiler/nir/nir_builder.h"
/* Vulkan's robustBufferAccess feature is only concerned with buffers that are
* bound through descriptor sets, so shared memory is not included, but it may
* be useful to enable this for debugging.
*/
const bool robust_shared_enabled = false;
static void
rewrite_offset(nir_builder *b,
nir_intrinsic_instr *instr,
uint32_t type_sz,
uint32_t buffer_idx,
uint32_t offset_src,
nir_intrinsic_op buffer_size_op)
{
b->cursor = nir_before_instr(&instr->instr);
/* Get size of the buffer */
nir_intrinsic_instr *size =
nir_intrinsic_instr_create(b->shader, buffer_size_op);
size->src[0] = nir_src_for_ssa(nir_imm_int(b, buffer_idx));
nir_ssa_dest_init(&size->instr, &size->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &size->instr);
/* Compute the maximum offset being accessed and if it is
* out of bounds rewrite it to 0 to ensure the access is
* within bounds.
*/
const uint32_t access_size = instr->num_components * type_sz;
nir_ssa_def *max_access_offset =
nir_iadd(b, instr->src[offset_src].ssa,
nir_imm_int(b, access_size - 1));
nir_ssa_def *offset =
nir_bcsel(b, nir_uge(b, max_access_offset, &size->dest.ssa),
nir_imm_int(b, 0),
instr->src[offset_src].ssa);
/* Rewrite offset */
nir_instr_rewrite_src(&instr->instr, &instr->src[offset_src],
nir_src_for_ssa(offset));
}
static void
lower_buffer_load(struct v3d_compile *c,
nir_builder *b,
nir_intrinsic_instr *instr)
{
uint32_t type_sz = nir_dest_bit_size(instr->dest) / 8;
uint32_t index = nir_src_comp_as_uint(instr->src[0], 0);
nir_intrinsic_op op;
if (instr->intrinsic == nir_intrinsic_load_ubo) {
op = nir_intrinsic_get_ubo_size;
if (c->key->environment == V3D_ENVIRONMENT_VULKAN)
index--;
} else {
op = nir_intrinsic_get_ssbo_size;
}
rewrite_offset(b, instr, type_sz, index, 1, op);
}
static void
lower_buffer_store(struct v3d_compile *c,
nir_builder *b,
nir_intrinsic_instr *instr)
{
uint32_t type_sz = nir_src_bit_size(instr->src[0]) / 8;
uint32_t index = nir_src_comp_as_uint(instr->src[1], 0);
rewrite_offset(b, instr, type_sz, index, 2, nir_intrinsic_get_ssbo_size);
}
static void
lower_buffer_atomic(struct v3d_compile *c,
nir_builder *b,
nir_intrinsic_instr *instr)
{
uint32_t index = nir_src_comp_as_uint(instr->src[0], 0);
rewrite_offset(b, instr, 4, index, 1, nir_intrinsic_get_ssbo_size);
}
static void
lower_buffer_shared(struct v3d_compile *c,
nir_builder *b,
nir_intrinsic_instr *instr)
{
uint32_t type_sz, offset_src;
if (instr->intrinsic == nir_intrinsic_load_shared) {
offset_src = 0;
type_sz = nir_dest_bit_size(instr->dest) / 8;
} else if (instr->intrinsic == nir_intrinsic_store_shared) {
offset_src = 1;
type_sz = nir_src_bit_size(instr->src[0]) / 8;
} else {
/* atomic */
offset_src = 0;
type_sz = 4;
}
b->cursor = nir_before_instr(&instr->instr);
const uint32_t access_size = instr->num_components * type_sz;
nir_ssa_def *max_access_offset =
nir_iadd(b, instr->src[offset_src].ssa,
nir_imm_int(b, access_size - 1));
nir_ssa_def *offset =
nir_bcsel(b, nir_uge(b, max_access_offset,
nir_imm_int(b, c->s->info.shared_size)),
nir_imm_int(b, 0),
instr->src[offset_src].ssa);
nir_instr_rewrite_src(&instr->instr, &instr->src[offset_src],
nir_src_for_ssa(offset));
}
static bool
lower_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
{
struct v3d_compile *c = _state;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_ubo:
if (c->key->robust_uniform_access) {
lower_buffer_load(c, b, intr);
return true;
}
return false;
case nir_intrinsic_load_ssbo:
if (c->key->robust_storage_access) {
lower_buffer_load(c, b, intr);
return true;
}
return false;
case nir_intrinsic_store_ssbo:
if (c->key->robust_storage_access) {
lower_buffer_store(c, b, intr);
return true;
}
return false;
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_ssbo_atomic_comp_swap:
if (c->key->robust_storage_access) {
lower_buffer_atomic(c, b, intr);
return true;
}
return false;
case nir_intrinsic_store_shared:
case nir_intrinsic_load_shared:
case nir_intrinsic_shared_atomic_add:
case nir_intrinsic_shared_atomic_imin:
case nir_intrinsic_shared_atomic_umin:
case nir_intrinsic_shared_atomic_imax:
case nir_intrinsic_shared_atomic_umax:
case nir_intrinsic_shared_atomic_and:
case nir_intrinsic_shared_atomic_or:
case nir_intrinsic_shared_atomic_xor:
case nir_intrinsic_shared_atomic_exchange:
case nir_intrinsic_shared_atomic_comp_swap:
if (robust_shared_enabled) {
lower_buffer_shared(c, b, intr);
return true;
}
return false;
default:
return false;
}
}
static void
lower_image(struct v3d_compile *c,
nir_builder *b,
nir_intrinsic_instr *instr)
{
b->cursor = nir_before_instr(&instr->instr);
uint32_t num_coords = nir_image_intrinsic_coord_components(instr);
bool is_array = nir_intrinsic_image_array(instr);
uint32_t image_idx = nir_src_as_uint(instr->src[0]);
nir_ssa_def *coord = instr->src[1].ssa;
/* Get image size */
nir_intrinsic_instr *size_inst =
nir_intrinsic_instr_create(c->s, nir_intrinsic_image_size);
size_inst->src[0] = nir_src_for_ssa(nir_imm_int(b, image_idx));
size_inst->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_intrinsic_set_image_array(size_inst, is_array);
size_inst->num_components = num_coords;
nir_ssa_dest_init(&size_inst->instr, &size_inst->dest,
num_coords, 32, NULL);
nir_ssa_def *size = &size_inst->dest.ssa;
nir_builder_instr_insert(b, &size_inst->instr);
/* Emit condition for out-of-bounds access */
nir_ssa_def *x = nir_channel(b, coord, 0);
nir_ssa_def *w = nir_channel(b, size, 0);
nir_ssa_def *oob_cond = nir_uge(b, x, w);
if (num_coords > 1) {
nir_ssa_def *y = nir_channel(b, coord, 1);
nir_ssa_def *h = nir_channel(b, size, 1);
oob_cond = nir_ior(b, oob_cond, nir_uge(b, y, h));
}
if (num_coords > 2) {
nir_ssa_def *z = nir_channel(b, coord, 2);
nir_ssa_def *d = nir_channel(b, size, 2);
if (nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_CUBE)
d = nir_imul(b, nir_imm_int(b, 6), d);
oob_cond = nir_ior(b, oob_cond, nir_uge(b, z, d));
}
if (instr->intrinsic != nir_intrinsic_image_store) {
/* For out of bounds loads/atomics we want to return a zero
* result. Loads may happen on integer or float images, but
* because a zero vector has the same bit representation for
* both we don't need to know the image format to return a
* valid vector.
*
* Notice we can't use bcsel for this since we need to prevent
* execution of the original instruction in case of OOB access.
*/
nir_ssa_def *res1, *res2;
nir_push_if(b, oob_cond);
if (instr->intrinsic == nir_intrinsic_image_load)
res1 = nir_imm_vec4(b, 0, 0, 0, 0);
else
res1 = nir_imm_int(b, 0);
nir_push_else(b, NULL);
nir_instr *orig =
nir_instr_clone(b->shader, &instr->instr);
nir_builder_instr_insert(b, orig);
res2 = &nir_instr_as_intrinsic(orig)->dest.ssa;
nir_pop_if(b, NULL);
nir_ssa_def *res = nir_if_phi(b, res1, res2);
nir_ssa_def_rewrite_uses(&instr->dest.ssa, res);
} else {
/* Drop OOB stores */
assert(instr->intrinsic == nir_intrinsic_image_store);
nir_push_if(b, nir_inot(b, oob_cond));
nir_instr *orig =
nir_instr_clone(b->shader, &instr->instr);
nir_builder_instr_insert(b, orig);
nir_pop_if(b, NULL);
}
/* Drop original instruction */
nir_instr_remove(&instr->instr);
}
static bool
lower_image_instr(nir_builder *b, nir_instr *instr, void *_state)
{
struct v3d_compile *c = _state;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_image_load:
case nir_intrinsic_image_store:
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
lower_image(c, b, intr);
return true;
default:
return false;
}
}
bool
v3d_nir_lower_robust_buffer_access(nir_shader *s, struct v3d_compile *c)
{
return nir_shader_instructions_pass(s, lower_buffer_instr,
nir_metadata_block_index |
nir_metadata_dominance, c);
}
bool
v3d_nir_lower_robust_image_access(nir_shader *s, struct v3d_compile *c)
{
return nir_shader_instructions_pass(s, lower_image_instr,
nir_metadata_block_index |
nir_metadata_dominance, c);
}