blob: df770bd90e6cccdb2e32fa006a8ef93e26df0733 [file] [log] [blame]
/*
* Copyright (C) 2018-2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __AGX_PUBLIC_H_
#define __AGX_PUBLIC_H_
#include "compiler/nir/nir.h"
#include "util/u_dynarray.h"
#include "asahi/lib/agx_pack.h"
enum agx_push_type {
/* Array of 64-bit pointers to the base addresses (BASES) and array of
* 16-bit sizes for optional bounds checking (SIZES) */
AGX_PUSH_UBO_BASES,
AGX_PUSH_UBO_SIZES,
AGX_PUSH_VBO_SIZES,
AGX_PUSH_SSBO_BASES,
AGX_PUSH_SSBO_SIZES,
/* 64-bit VBO base pointer */
AGX_PUSH_VBO_BASE,
/* Push the attached constant memory */
AGX_PUSH_CONSTANTS,
/* Push the content of a UBO */
AGX_PUSH_UBO_DATA,
/* RGBA blend constant (FP32) */
AGX_PUSH_BLEND_CONST,
AGX_PUSH_TEXTURE_BASE,
/* Keep last */
AGX_PUSH_NUM_TYPES
};
static_assert(AGX_PUSH_NUM_TYPES < (1 << 8), "type overflow");
struct agx_push {
/* Contents to push */
enum agx_push_type type : 8;
/* Base of where to push, indexed in 16-bit units. The uniform file contains
* 512 = 2^9 such units. */
unsigned base : 9;
/* Number of 16-bit units to push */
unsigned length : 9;
/* If set, rather than pushing the specified data, push a pointer to the
* specified data. This is slower to access but enables indirect access, as
* the uniform file does not support indirection. */
bool indirect : 1;
union {
struct {
uint16_t ubo;
uint16_t offset;
} ubo_data;
uint32_t vbo;
};
};
/* Arbitrary */
#define AGX_MAX_PUSH_RANGES (16)
#define AGX_MAX_VARYINGS (32)
struct agx_varyings_vs {
/* The first index used for FP16 varyings. Indices less than this are treated
* as FP32. This may require remapping slots to guarantee.
*/
unsigned base_index_fp16;
/* The total number of vertex shader indices output. Must be at least
* base_index_fp16.
*/
unsigned nr_index;
/* If the slot is written, this is the base index that the first component
* of the slot is written to. The next components are found in the next
* indices. If less than base_index_fp16, this is a 32-bit slot (with 4
* indices for the 4 components), else this is a 16-bit slot (with 2
* indices for the 4 components). This must be less than nr_index.
*
* If the slot is not written, this must be ~0.
*/
unsigned slots[VARYING_SLOT_MAX];
};
/* Conservative bound */
#define AGX_MAX_CF_BINDINGS (VARYING_SLOT_MAX)
struct agx_varyings_fs {
/* Number of coefficient registers used */
unsigned nr_cf;
/* Number of coefficient register bindings */
unsigned nr_bindings;
/* Whether gl_FragCoord.z is read */
bool reads_z;
/* Coefficient register bindings */
struct {
/* Base coefficient register */
unsigned cf_base;
/* Slot being bound */
gl_varying_slot slot;
/* First component bound.
*
* Must be 2 (Z) or 3 (W) if slot == VARYING_SLOT_POS.
*/
unsigned offset : 2;
/* Number of components bound */
unsigned count : 3;
/* Is smooth shading enabled? If false, flat shading is used */
bool smooth : 1;
/* Perspective correct interpolation */
bool perspective : 1;
} bindings[AGX_MAX_CF_BINDINGS];
};
union agx_varyings {
struct agx_varyings_vs vs;
struct agx_varyings_fs fs;
};
struct agx_shader_info {
unsigned push_count;
unsigned push_ranges;
struct agx_push push[AGX_MAX_PUSH_RANGES];
union agx_varyings varyings;
/* Does the shader have a preamble? If so, it is at offset preamble_offset.
* The main shader is at offset main_offset. The preamble is executed first.
*/
bool has_preamble;
unsigned preamble_offset, main_offset;
/* Does the shader read the tilebuffer? */
bool reads_tib;
/* Does the shader write point size? */
bool writes_psiz;
/* Does the shader control the sample mask? */
bool writes_sample_mask;
/* Is colour output omitted? */
bool no_colour_output;
/* Number of 16-bit registers used by the main shader and preamble
* respectively.
*/
unsigned nr_gprs, nr_preamble_gprs;
};
#define AGX_MAX_RTS (8)
#define AGX_MAX_ATTRIBS (16)
#define AGX_MAX_VBUFS (16)
enum agx_format {
AGX_FORMAT_I8 = 0,
AGX_FORMAT_I16 = 1,
AGX_FORMAT_I32 = 2,
AGX_FORMAT_F16 = 3,
AGX_FORMAT_U8NORM = 4,
AGX_FORMAT_S8NORM = 5,
AGX_FORMAT_U16NORM = 6,
AGX_FORMAT_S16NORM = 7,
AGX_FORMAT_RGB10A2 = 8,
AGX_FORMAT_SRGBA8 = 10,
AGX_FORMAT_RG11B10F = 12,
AGX_FORMAT_RGB9E5 = 13,
/* Keep last */
AGX_NUM_FORMATS,
};
/* Returns the number of bits at the bottom of the address required to be zero.
* That is, returns the base-2 logarithm of the minimum alignment for an
* agx_format, where the minimum alignment is 2^n where n is the result of this
* function. The offset argument to device_load is left-shifted by this amount
* in the hardware */
static inline unsigned
agx_format_shift(enum agx_format format)
{
switch (format) {
case AGX_FORMAT_I8:
case AGX_FORMAT_U8NORM:
case AGX_FORMAT_S8NORM:
case AGX_FORMAT_SRGBA8:
return 0;
case AGX_FORMAT_I16:
case AGX_FORMAT_F16:
case AGX_FORMAT_U16NORM:
case AGX_FORMAT_S16NORM:
return 1;
case AGX_FORMAT_I32:
case AGX_FORMAT_RGB10A2:
case AGX_FORMAT_RG11B10F:
case AGX_FORMAT_RGB9E5:
return 2;
default:
unreachable("invalid format");
}
}
struct agx_attribute {
uint32_t divisor;
unsigned buf : 5;
unsigned src_offset : 16;
unsigned nr_comps_minus_1 : 2;
enum agx_format format : 4;
unsigned padding : 5;
};
struct agx_vs_shader_key {
unsigned num_vbufs;
unsigned vbuf_strides[AGX_MAX_VBUFS];
struct agx_attribute attributes[AGX_MAX_ATTRIBS];
};
struct agx_fs_shader_key {
enum agx_format tib_formats[AGX_MAX_RTS];
/* Normally, access to the tilebuffer must be guarded by appropriate fencing
* instructions to ensure correct results in the presence of out-of-order
* hardware optimizations. However, specially dispatched clear shaders are
* not subject to these conditions and can omit the wait instructions.
*
* Must (only) be set for special clear shaders.
*
* Must not be used with sample mask writes (including discards) or
* tilebuffer loads (including blending).
*/
bool ignore_tib_dependencies;
};
struct agx_shader_key {
union {
struct agx_vs_shader_key vs;
struct agx_fs_shader_key fs;
};
};
void
agx_compile_shader_nir(nir_shader *nir,
struct agx_shader_key *key,
struct util_debug_callback *debug,
struct util_dynarray *binary,
struct agx_shader_info *out);
static const nir_shader_compiler_options agx_nir_options = {
.lower_fdiv = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.lower_flrp16 = true,
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fmod = true,
.lower_ifind_msb = true,
.lower_find_lsb = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_scmp = true,
.lower_isign = true,
.lower_fsign = true,
.lower_iabs = true,
.lower_fdph = true,
.lower_ffract = true,
.lower_pack_split = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_cs_local_index_to_id = true,
.has_cs_global_id = true,
.vectorize_io = true,
.use_interpolated_input_intrinsics = true,
.lower_rotate = true,
.has_fsub = true,
.has_isub = true,
.max_unroll_iterations = 32,
.lower_uniforms_to_ubo = true,
.force_indirect_unrolling_sampler = true,
.force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
.lower_int64_options = (nir_lower_int64_options) ~(nir_lower_iadd64 | nir_lower_imul_2x32_64),
.lower_doubles_options = nir_lower_dmod,
};
#endif