blob: 928e92e10d1fa534d52e96015b85d8dc06617c99 [file] [log] [blame]
/*
* Copyright © 2011 Intel Corporation
* Copyright © 2022 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* This lowering pass generates GLSL code that manually packs varyings into
* vec4 slots, for the benefit of back-ends that don't support packed varyings
* natively.
*
* For example, the following shader:
*
* out mat3x2 foo; // location=4, location_frac=0
* out vec3 bar[2]; // location=5, location_frac=2
*
* main()
* {
* ...
* }
*
* Is rewritten to:
*
* mat3x2 foo;
* vec3 bar[2];
* out vec4 packed4; // location=4, location_frac=0
* out vec4 packed5; // location=5, location_frac=0
* out vec4 packed6; // location=6, location_frac=0
*
* main()
* {
* ...
* packed4.xy = foo[0];
* packed4.zw = foo[1];
* packed5.xy = foo[2];
* packed5.zw = bar[0].xy;
* packed6.x = bar[0].z;
* packed6.yzw = bar[1];
* }
*
* This lowering pass properly handles "double parking" of a varying vector
* across two varying slots. For example, in the code above, two of the
* components of bar[0] are stored in packed5, and the remaining component is
* stored in packed6.
*
* Note that in theory, the extra instructions may cause some loss of
* performance. However, hopefully in most cases the performance loss will
* either be absorbed by a later optimization pass, or it will be offset by
* memory bandwidth savings (because fewer varyings are used).
*
* This lowering pass also packs flat floats, ints, and uints together, by
* using ivec4 as the base type of flat "varyings", and using appropriate
* casts to convert floats and uints into ints.
*
* This lowering pass also handles varyings whose type is a struct or an array
* of struct. Structs are packed in order and with no gaps, so there may be a
* performance penalty due to structure elements being double-parked.
*
* Lowering of geometry shader inputs is slightly more complex, since geometry
* inputs are always arrays, so we need to lower arrays to arrays. For
* example, the following input:
*
* in struct Foo {
* float f;
* vec3 v;
* vec2 a[2];
* } arr[3]; // location=4, location_frac=0
*
* Would get lowered like this if it occurred in a fragment shader:
*
* struct Foo {
* float f;
* vec3 v;
* vec2 a[2];
* } arr[3];
* in vec4 packed4; // location=4, location_frac=0
* in vec4 packed5; // location=5, location_frac=0
* in vec4 packed6; // location=6, location_frac=0
* in vec4 packed7; // location=7, location_frac=0
* in vec4 packed8; // location=8, location_frac=0
* in vec4 packed9; // location=9, location_frac=0
*
* main()
* {
* arr[0].f = packed4.x;
* arr[0].v = packed4.yzw;
* arr[0].a[0] = packed5.xy;
* arr[0].a[1] = packed5.zw;
* arr[1].f = packed6.x;
* arr[1].v = packed6.yzw;
* arr[1].a[0] = packed7.xy;
* arr[1].a[1] = packed7.zw;
* arr[2].f = packed8.x;
* arr[2].v = packed8.yzw;
* arr[2].a[0] = packed9.xy;
* arr[2].a[1] = packed9.zw;
* ...
* }
*
* But it would get lowered like this if it occurred in a geometry shader:
*
* struct Foo {
* float f;
* vec3 v;
* vec2 a[2];
* } arr[3];
* in vec4 packed4[3]; // location=4, location_frac=0
* in vec4 packed5[3]; // location=5, location_frac=0
*
* main()
* {
* arr[0].f = packed4[0].x;
* arr[0].v = packed4[0].yzw;
* arr[0].a[0] = packed5[0].xy;
* arr[0].a[1] = packed5[0].zw;
* arr[1].f = packed4[1].x;
* arr[1].v = packed4[1].yzw;
* arr[1].a[0] = packed5[1].xy;
* arr[1].a[1] = packed5[1].zw;
* arr[2].f = packed4[2].x;
* arr[2].v = packed4[2].yzw;
* arr[2].a[0] = packed5[2].xy;
* arr[2].a[1] = packed5[2].zw;
* ...
* }
*/
#include "nir.h"
#include "nir_builder.h"
#include "gl_nir.h"
#include "gl_nir_linker.h"
#include "program/prog_instruction.h"
#include "main/mtypes.h"
/**
* Visitor that performs varying packing. For each varying declared in the
* shader, this visitor determines whether it needs to be packed. If so, it
* demotes it to an ordinary global, creates new packed varyings, and
* generates assignments to convert between the original varying and the
* packed varying.
*/
struct lower_packed_varyings_state
{
const struct gl_constants *consts;
struct gl_shader_program *prog;
/**
* Memory context used to allocate new instructions for the shader.
*/
void *mem_ctx;
/**
* Number of generic varying slots which are used by this shader. This is
* used to allocate temporary intermediate data structures. If any varying
* used by this shader has a location greater than or equal to
* VARYING_SLOT_VAR0 + locations_used, an assertion will fire.
*/
unsigned locations_used;
const uint8_t* components;
/**
* Array of pointers to the packed varyings that have been created for each
* generic varying slot. NULL entries in this array indicate varying slots
* for which a packed varying has not been created yet.
*/
nir_variable **packed_varyings;
nir_shader *shader;
nir_function_impl *impl;
nir_builder b;
/**
* Type of varying which is being lowered in this pass (either
* nir_var_shader_in or ir_var_shader_out).
*/
nir_variable_mode mode;
/**
* If we are currently lowering geometry shader inputs, the number of input
* vertices the geometry shader accepts. Otherwise zero.
*/
unsigned gs_input_vertices;
bool disable_varying_packing;
bool disable_xfb_packing;
bool xfb_enabled;
bool ifc_exposed_to_query_api;
};
bool
lower_packed_varying_needs_lowering(nir_shader *shader, nir_variable *var,
bool xfb_enabled, bool disable_xfb_packing,
bool disable_varying_packing)
{
/* Things composed of vec4's, varyings with explicitly assigned
* locations or varyings marked as must_be_shader_input (which might be used
* by interpolateAt* functions) shouldn't be lowered. Everything else can be.
*/
if (var->data.explicit_location || var->data.must_be_shader_input)
return false;
const struct glsl_type *type = var->type;
if (nir_is_arrayed_io(var, shader->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
/* Some drivers (e.g. panfrost) don't support packing of transform
* feedback varyings.
*/
if (disable_xfb_packing && var->data.is_xfb &&
!(glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) &&
xfb_enabled)
return false;
/* Override disable_varying_packing if the var is only used by transform
* feedback. Also override it if transform feedback is enabled and the
* variable is an array, struct or matrix as the elements of these types
* will always have the same interpolation and therefore are safe to pack.
*/
if (disable_varying_packing && !var->data.is_xfb_only &&
!((glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) &&
xfb_enabled))
return false;
type = glsl_without_array(type);
if (glsl_get_vector_elements(type) == 4 && !glsl_type_is_64bit(type))
return false;
return true;
}
/**
* If no packed varying has been created for the given varying location yet,
* create it and add it to the shader.
*
* The newly created varying inherits its interpolation parameters from \c
* unpacked_var. Its base type is ivec4 if we are lowering a flat varying,
* vec4 otherwise.
*/
static void
create_or_update_packed_varying(struct lower_packed_varyings_state *state,
nir_variable *unpacked_var,
const char *name, unsigned location,
unsigned slot, unsigned vertex_index)
{
assert(slot < state->locations_used);
if (state->packed_varyings[slot] == NULL) {
assert(state->components[slot] != 0);
assert(name);
nir_variable *packed_var = rzalloc(state->shader, nir_variable);
packed_var->name = ralloc_asprintf(packed_var, "packed:%s", name);
packed_var->data.mode = state->mode;
bool is_interpolation_flat =
unpacked_var->data.interpolation == INTERP_MODE_FLAT ||
glsl_contains_integer(unpacked_var->type) ||
glsl_contains_double(unpacked_var->type);
const struct glsl_type *packed_type;
if (is_interpolation_flat)
packed_type = glsl_vector_type(GLSL_TYPE_INT, state->components[slot]);
else
packed_type = glsl_vector_type(GLSL_TYPE_FLOAT, state->components[slot]);
if (state->gs_input_vertices != 0) {
packed_type =
glsl_array_type(packed_type, state->gs_input_vertices, 0);
}
packed_var->type = packed_type;
packed_var->data.centroid = unpacked_var->data.centroid;
packed_var->data.sample = unpacked_var->data.sample;
packed_var->data.patch = unpacked_var->data.patch;
packed_var->data.interpolation = is_interpolation_flat ?
(unsigned) INTERP_MODE_FLAT : unpacked_var->data.interpolation;
packed_var->data.location = location;
packed_var->data.precision = unpacked_var->data.precision;
packed_var->data.always_active_io = unpacked_var->data.always_active_io;
packed_var->data.stream = NIR_STREAM_PACKED;
nir_shader_add_variable(state->shader, packed_var);
state->packed_varyings[slot] = packed_var;
} else {
nir_variable *var = state->packed_varyings[slot];
/* The slot needs to be marked as always active if any variable that got
* packed there was.
*/
var->data.always_active_io |= unpacked_var->data.always_active_io;
/* For geometry shader inputs, only update the packed variable name the
* first time we visit each component.
*/
if (state->gs_input_vertices == 0 || vertex_index == 0) {
assert(name);
ralloc_asprintf_append((char **) &var->name, ",%s", name);
}
}
}
/**
* Retrieve the packed varying corresponding to the given varying location.
*
* \param vertex_index: if we are lowering geometry shader inputs, then this
* indicates which vertex we are currently lowering. Otherwise it is ignored.
*/
static nir_deref_instr *
get_packed_varying_deref(struct lower_packed_varyings_state *state,
unsigned location, nir_variable *unpacked_var,
const char *name, unsigned vertex_index)
{
unsigned slot = location - VARYING_SLOT_VAR0;
assert(slot < state->locations_used);
create_or_update_packed_varying(state, unpacked_var, name, location, slot,
vertex_index);
nir_deref_instr *deref =
nir_build_deref_var(&state->b, state->packed_varyings[slot]);
if (state->gs_input_vertices != 0) {
/* When lowering GS inputs, the packed variable is an array, so we need
* to dereference it using vertex_index.
*/
nir_load_const_instr *c_idx =
nir_load_const_instr_create(state->b.shader, 1, 32);
c_idx->value[0].u32 = vertex_index;
nir_builder_instr_insert(&state->b, &c_idx->instr);
deref = nir_build_deref_array(&state->b, deref, &c_idx->def);
}
return deref;
}
struct packing_store_values {
bool is_64bit;
unsigned writemasks[2];
nir_def *values[2];
nir_deref_instr *deref;
};
/**
* Make an ir_assignment from \c rhs to \c lhs, performing appropriate
* bitcasts if necessary to match up types.
*
* This function is called when packing varyings.
*/
static struct packing_store_values *
bitwise_assign_pack(struct lower_packed_varyings_state *state,
nir_deref_instr *packed_deref,
nir_deref_instr *unpacked_deref,
const struct glsl_type *unpacked_type,
nir_def *value,
unsigned writemask)
{
nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
enum glsl_base_type packed_base_type = glsl_get_base_type(packed_var->type);
enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type);
struct packing_store_values *store_state =
calloc(1, sizeof(struct packing_store_values));
if (unpacked_base_type != packed_base_type) {
/* Since we only mix types in flat varyings, and we always store flat
* varyings as type ivec4, we need only produce conversions from (uint
* or float) to int.
*/
assert(packed_base_type == GLSL_TYPE_INT);
switch (unpacked_base_type) {
case GLSL_TYPE_UINT:
case GLSL_TYPE_FLOAT:
value = nir_mov(&state->b, value);
break;
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
assert(glsl_get_vector_elements(unpacked_type) <= 2);
if (glsl_get_vector_elements(unpacked_type) == 2) {
assert(glsl_get_vector_elements(packed_var->type) == 4);
unsigned swiz_x = 0;
unsigned writemask = 0x3;
nir_def *swizzle = nir_swizzle(&state->b, value, &swiz_x, 1);
store_state->is_64bit = true;
store_state->deref = packed_deref;
store_state->values[0] = nir_unpack_64_2x32(&state->b, swizzle);
store_state->writemasks[0] = writemask;
unsigned swiz_y = 1;
writemask = 0xc;
swizzle = nir_swizzle(&state->b, value, &swiz_y, 1);
store_state->deref = packed_deref;
store_state->values[1] = nir_unpack_64_2x32(&state->b, swizzle);
store_state->writemasks[1] = writemask;
return store_state;
} else {
value = nir_unpack_64_2x32(&state->b, value);
}
break;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
value = nir_unpack_64_2x32(&state->b, value);
break;
default:
assert(!"Unexpected type conversion while lowering varyings");
break;
}
}
store_state->deref = packed_deref;
store_state->values[0] = value;
store_state->writemasks[0] = writemask;
return store_state;
}
/**
* This function is called when unpacking varyings.
*/
static struct packing_store_values *
bitwise_assign_unpack(struct lower_packed_varyings_state *state,
nir_deref_instr *unpacked_deref,
nir_deref_instr *packed_deref,
const struct glsl_type *unpacked_type,
nir_def *value, unsigned writemask)
{
nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
const struct glsl_type *packed_type = glsl_without_array(packed_var->type);
enum glsl_base_type packed_base_type = glsl_get_base_type(packed_type);
enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type);
struct packing_store_values *store_state =
calloc(1, sizeof(struct packing_store_values));
if (unpacked_base_type != packed_base_type) {
/* Since we only mix types in flat varyings, and we always store flat
* varyings as type ivec4, we need only produce conversions from int to
* (uint or float).
*/
assert(packed_base_type == GLSL_TYPE_INT);
switch (unpacked_base_type) {
case GLSL_TYPE_UINT:
case GLSL_TYPE_FLOAT:
value = nir_mov(&state->b, value);
break;
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
assert(glsl_get_vector_elements(unpacked_type) <= 2);
if (glsl_get_vector_elements(unpacked_type) == 2) {
assert(glsl_get_vector_elements(packed_type) == 4);
unsigned swiz_xy[2] = {0, 1};
writemask = 1 << (ffs(writemask) - 1);
store_state->is_64bit = true;
store_state->deref = unpacked_deref;
store_state->values[0] =
nir_pack_64_2x32(&state->b,
nir_swizzle(&state->b, value, swiz_xy, 2));
store_state->writemasks[0] = writemask;
unsigned swiz_zw[2] = {2, 3};
writemask = writemask << 1;
store_state->deref = unpacked_deref;
store_state->values[1] =
nir_pack_64_2x32(&state->b,
nir_swizzle(&state->b, value, swiz_zw, 2));
store_state->writemasks[1] = writemask;
return store_state;
} else {
value = nir_pack_64_2x32(&state->b, value);
}
break;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
value = nir_pack_64_2x32(&state->b, value);
break;
default:
assert(!"Unexpected type conversion while lowering varyings");
break;
}
}
store_state->deref = unpacked_deref;
store_state->values[0] = value;
store_state->writemasks[0] = writemask;
return store_state;
}
static void
create_store_deref(struct lower_packed_varyings_state *state,
nir_deref_instr *deref, nir_def *value,
unsigned writemask, bool is_64bit)
{
/* If dest and value have different number of components pack the srcs
* into a vector.
*/
const struct glsl_type *type = glsl_without_array(deref->type);
unsigned comps = glsl_get_vector_elements(type);
if (value->num_components != comps) {
nir_def *srcs[4];
unsigned comp = 0;
for (unsigned i = 0; i < comps; i++) {
if (writemask & (1 << i)) {
if (is_64bit && state->mode == nir_var_shader_in)
srcs[i] = value;
else
srcs[i] = nir_swizzle(&state->b, value, &comp, 1);
comp++;
} else {
srcs[i] = nir_undef(&state->b, 1,
glsl_type_is_64bit(type) ? 64 : 32);
}
}
value = nir_vec(&state->b, srcs, comps);
}
nir_store_deref(&state->b, deref, value, writemask);
}
static unsigned
lower_varying(struct lower_packed_varyings_state *state,
nir_def *rhs_swizzle, unsigned writemask,
const struct glsl_type *type, unsigned fine_location,
nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
const char *name, bool gs_input_toplevel, unsigned vertex_index);
/**
* Recursively pack or unpack a varying for which we need to iterate over its
* constituent elements.
* This takes care of both arrays and matrices.
*
* \param gs_input_toplevel should be set to true if we are lowering geometry
* shader inputs, and we are currently lowering the whole input variable
* (i.e. we are lowering the array whose index selects the vertex).
*
* \param vertex_index: if we are lowering geometry shader inputs, and the
* level of the array that we are currently lowering is *not* the top level,
* then this indicates which vertex we are currently lowering. Otherwise it
* is ignored.
*/
static unsigned
lower_arraylike(struct lower_packed_varyings_state *state,
nir_def *rhs_swizzle, unsigned writemask,
const struct glsl_type *type, unsigned fine_location,
nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
const char *name, bool gs_input_toplevel, unsigned vertex_index)
{
unsigned array_size = glsl_get_length(type);
unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
if (array_size * dmul + fine_location % 4 > 4) {
fine_location = ALIGN_POT(fine_location, dmul);
}
type = glsl_get_array_element(type);
for (unsigned i = 0; i < array_size; i++) {
nir_load_const_instr *c_idx =
nir_load_const_instr_create(state->b.shader, 1, 32);
c_idx->value[0].u32 = i;
nir_builder_instr_insert(&state->b, &c_idx->instr);
nir_deref_instr *unpacked_array_deref =
nir_build_deref_array(&state->b, unpacked_var_deref, &c_idx->def);
if (gs_input_toplevel) {
/* Geometry shader inputs are a special case. Instead of storing
* each element of the array at a different location, all elements
* are at the same location, but with a different vertex index.
*/
(void) lower_varying(state, rhs_swizzle, writemask, type, fine_location,
unpacked_var, unpacked_array_deref, name, false, i);
} else {
char *subscripted_name = name ?
ralloc_asprintf(state->mem_ctx, "%s[%d]", name, i) : NULL;
fine_location =
lower_varying(state, rhs_swizzle, writemask, type, fine_location,
unpacked_var, unpacked_array_deref,
subscripted_name, false, vertex_index);
}
}
return fine_location;
}
/**
* Recursively pack or unpack the given varying (or portion of a varying) by
* traversing all of its constituent vectors.
*
* \param fine_location is the location where the first constituent vector
* should be packed--the word "fine" indicates that this location is expressed
* in multiples of a float, rather than multiples of a vec4 as is used
* elsewhere in Mesa.
*
* \param gs_input_toplevel should be set to true if we are lowering geometry
* shader inputs, and we are currently lowering the whole input variable
* (i.e. we are lowering the array whose index selects the vertex).
*
* \param vertex_index: if we are lowering geometry shader inputs, and the
* level of the array that we are currently lowering is *not* the top level,
* then this indicates which vertex we are currently lowering. Otherwise it
* is ignored.
*
* \return the location where the next constituent vector (after this one)
* should be packed.
*/
static unsigned
lower_varying(struct lower_packed_varyings_state *state,
nir_def *rhs_swizzle, unsigned writemask,
const struct glsl_type *type, unsigned fine_location,
nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
const char *name, bool gs_input_toplevel, unsigned vertex_index)
{
unsigned dmul = glsl_type_is_64bit(type) ? 2 : 1;
/* When gs_input_toplevel is set, we should be looking at a geometry shader
* input array.
*/
assert(!gs_input_toplevel || glsl_type_is_array(type));
if (glsl_type_is_struct(type)) {
unsigned struct_len = glsl_get_length(type);
for (unsigned i = 0; i < struct_len; i++) {
const char *field_name = glsl_get_struct_elem_name(type, i);
char *deref_name = name ?
ralloc_asprintf(state->mem_ctx, "%s.%s", name, field_name) :
NULL;
const struct glsl_type *field_type = glsl_get_struct_field(type, i);
nir_deref_instr *unpacked_struct_deref =
nir_build_deref_struct(&state->b, unpacked_var_deref, i);
fine_location = lower_varying(state, rhs_swizzle, writemask, field_type,
fine_location, unpacked_var,
unpacked_struct_deref, deref_name,
false, vertex_index);
}
return fine_location;
} else if (glsl_type_is_array(type)) {
/* Arrays are packed/unpacked by considering each array element in
* sequence.
*/
return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location,
unpacked_var, unpacked_var_deref, name,
gs_input_toplevel, vertex_index);
} else if (glsl_type_is_matrix(type)) {
/* Matrices are packed/unpacked by considering each column vector in
* sequence.
*/
return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location,
unpacked_var, unpacked_var_deref, name, false,
vertex_index);
} else if (glsl_get_vector_elements(type) * dmul + fine_location % 4 > 4) {
/* We don't have code to split up 64bit variable between two
* varying slots, instead we add padding if necessary.
*/
unsigned aligned_fine_location = ALIGN_POT(fine_location, dmul);
if (aligned_fine_location != fine_location) {
return lower_varying(state, rhs_swizzle, writemask, type,
aligned_fine_location, unpacked_var,
unpacked_var_deref, name, false, vertex_index);
}
/* This vector is going to be "double parked" across two varying slots,
* so handle it as two separate assignments. For doubles, a dvec3/dvec4
* can end up being spread over 3 slots. However the second splitting
* will happen later, here we just always want to split into 2.
*/
unsigned left_components, right_components;
unsigned left_swizzle_values[4] = { 0, 0, 0, 0 };
unsigned right_swizzle_values[4] = { 0, 0, 0, 0 };
char left_swizzle_name[4] = { 0, 0, 0, 0 };
char right_swizzle_name[4] = { 0, 0, 0, 0 };
left_components = 4 - fine_location % 4;
if (glsl_type_is_64bit(type)) {
left_components /= 2;
assert(left_components > 0);
}
right_components = glsl_get_vector_elements(type) - left_components;
/* If set use previously set writemask to offset the following
* swizzle/writemasks. This can happen when spliting a dvec, etc across
* slots.
*/
unsigned offset = 0;
if (writemask) {
for (unsigned i = 0; i < left_components; i++) {
/* Keep going until we find the first component of the write */
if (!(writemask & (1 << i))) {
offset++;
} else
break;
}
}
for (unsigned i = 0; i < left_components; i++) {
left_swizzle_values[i] = i + offset;
left_swizzle_name[i] = "xyzw"[i + offset];
}
for (unsigned i = 0; i < right_components; i++) {
right_swizzle_values[i] = i + left_components + offset;
right_swizzle_name[i] = "xyzw"[i + left_components + offset];
}
if (left_components) {
char *left_name = name ?
ralloc_asprintf(state->mem_ctx, "%s.%s", name, left_swizzle_name) :
NULL;
nir_def *left_swizzle = NULL;
unsigned left_writemask = ~0u;
if (state->mode == nir_var_shader_out) {
nir_def *ssa_def = rhs_swizzle ?
rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
left_swizzle =
nir_swizzle(&state->b, ssa_def,
left_swizzle_values, left_components);
} else {
left_writemask = ((1 << left_components) - 1) << offset;
}
const struct glsl_type *swiz_type =
glsl_vector_type(glsl_get_base_type(type), left_components);
fine_location = lower_varying(state, left_swizzle, left_writemask, swiz_type,
fine_location, unpacked_var, unpacked_var_deref,
left_name, false, vertex_index);
} else {
/* Top up the fine location to the next slot */
fine_location++;
}
char *right_name = name ?
ralloc_asprintf(state->mem_ctx, "%s.%s", name, right_swizzle_name) :
NULL;
nir_def *right_swizzle = NULL;
unsigned right_writemask = ~0u;
if (state->mode == nir_var_shader_out) {
nir_def *ssa_def = rhs_swizzle ?
rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
right_swizzle =
nir_swizzle(&state->b, ssa_def,
right_swizzle_values, right_components);
} else {
right_writemask = ((1 << right_components) - 1) << (left_components + offset);
}
const struct glsl_type *swiz_type =
glsl_vector_type(glsl_get_base_type(type), right_components);
return lower_varying(state, right_swizzle, right_writemask, swiz_type,
fine_location, unpacked_var, unpacked_var_deref,
right_name, false, vertex_index);
} else {
/* No special handling is necessary; (un)pack the old varying (now temp)
* from/into the new packed varying.
*/
unsigned components = glsl_get_vector_elements(type) * dmul;
unsigned location = fine_location / 4;
unsigned location_frac = fine_location % 4;
assert(state->components[location - VARYING_SLOT_VAR0] >= components);
nir_deref_instr *packed_deref =
get_packed_varying_deref(state, location, unpacked_var, name,
vertex_index);
nir_variable *packed_var =
state->packed_varyings[location - VARYING_SLOT_VAR0];
if (unpacked_var->data.stream != 0) {
assert(unpacked_var->data.stream < 4);
for (unsigned i = 0; i < components; ++i) {
packed_var->data.stream |=
unpacked_var->data.stream << (2 * (location_frac + i));
}
}
struct packing_store_values *store_value;
if (state->mode == nir_var_shader_out) {
unsigned writemask = ((1 << components) - 1) << location_frac;
nir_def *value = rhs_swizzle ? rhs_swizzle :
nir_load_deref(&state->b, unpacked_var_deref);
store_value =
bitwise_assign_pack(state, packed_deref, unpacked_var_deref, type,
value, writemask);
} else {
unsigned swizzle_values[4] = { 0, 0, 0, 0 };
for (unsigned i = 0; i < components; ++i) {
swizzle_values[i] = i + location_frac;
}
nir_def *ssa_def = &packed_deref->def;
ssa_def = nir_load_deref(&state->b, packed_deref);
nir_def *swizzle =
nir_swizzle(&state->b, ssa_def, swizzle_values, components);
store_value = bitwise_assign_unpack(state, unpacked_var_deref,
packed_deref, type, swizzle,
writemask);
}
create_store_deref(state, store_value->deref, store_value->values[0],
store_value->writemasks[0], store_value->is_64bit);
if (store_value->is_64bit) {
create_store_deref(state, store_value->deref, store_value->values[1],
store_value->writemasks[1], store_value->is_64bit);
}
free(store_value);
return fine_location + components;
}
}
/* Recursively pack varying. */
static void
pack_output_var(struct lower_packed_varyings_state *state, nir_variable *var)
{
nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var);
lower_varying(state, NULL, ~0u, var->type,
var->data.location * 4 + var->data.location_frac,
var, unpacked_var_deref, var->name,
state->gs_input_vertices != 0, 0);
}
static void
lower_output_var(struct lower_packed_varyings_state *state, nir_variable *var)
{
if (var->data.mode != state->mode ||
var->data.location < VARYING_SLOT_VAR0 ||
!lower_packed_varying_needs_lowering(state->shader, var,
state->xfb_enabled,
state->disable_xfb_packing,
state->disable_varying_packing))
return;
/* Skip any new packed varyings we just added */
if (strncmp("packed:", var->name, 7) == 0)
return;
/* This lowering pass is only capable of packing floats and ints
* together when their interpolation mode is "flat". Treat integers as
* being flat when the interpolation mode is none.
*/
assert(var->data.interpolation == INTERP_MODE_FLAT ||
var->data.interpolation == INTERP_MODE_NONE ||
!glsl_contains_integer(var->type));
if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) {
struct set *resource_set = _mesa_pointer_set_create(NULL);
nir_add_packed_var_to_resource_list(state->consts, state->prog,
resource_set, var,
state->shader->info.stage,
GL_PROGRAM_OUTPUT);
_mesa_set_destroy(resource_set, NULL);
}
/* Change the old varying into an ordinary global. */
var->data.mode = nir_var_shader_temp;
nir_foreach_block(block, state->impl) {
if (state->shader->info.stage != MESA_SHADER_GEOMETRY) {
/* For shaders other than geometry, outputs need to be lowered before
* each return statement and at the end of main()
*/
if (nir_block_ends_in_return_or_halt(block)) {
state->b.cursor = nir_before_instr(nir_block_last_instr(block));
pack_output_var(state, var);
} else if (block == nir_impl_last_block(state->impl)) {
state->b.cursor = nir_after_block(block);
pack_output_var(state, var);
}
} else {
/* For geometry shaders, outputs need to be lowered before each call
* to EmitVertex()
*/
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_emit_vertex)
continue;
state->b.cursor = nir_before_instr(instr);
pack_output_var(state, var);
}
}
}
}
static void
lower_packed_outputs(struct lower_packed_varyings_state *state)
{
nir_foreach_shader_out_variable_safe(var, state->shader) {
lower_output_var(state, var);
}
}
static void
lower_packed_inputs(struct lower_packed_varyings_state *state)
{
/* Shader inputs need to be lowered at the beginning of main() so set bulder
* cursor to insert packing code at the start of the main function.
*/
state->b.cursor = nir_before_impl(state->impl);
/* insert new varyings, lower old ones to locals and add unpacking code a
* the start of the shader.
*/
nir_foreach_shader_in_variable_safe(var, state->shader) {
if (var->data.mode != state->mode ||
var->data.location < VARYING_SLOT_VAR0 ||
!lower_packed_varying_needs_lowering(state->shader, var,
state->xfb_enabled,
state->disable_xfb_packing,
state->disable_varying_packing))
continue;
/* Skip any new packed varyings we just added */
if (strncmp("packed:", var->name, 7) == 0)
continue;
/* This lowering pass is only capable of packing floats and ints
* together when their interpolation mode is "flat". Treat integers as
* being flat when the interpolation mode is none.
*/
assert(var->data.interpolation == INTERP_MODE_FLAT ||
var->data.interpolation == INTERP_MODE_NONE ||
!glsl_contains_integer(var->type));
/* Program interface needs to expose varyings in case of SSO. Add the
* variable for program resource list before it gets modified and lost.
*/
if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) {
struct set *resource_set = _mesa_pointer_set_create(NULL);
nir_add_packed_var_to_resource_list(state->consts, state->prog,
resource_set, var,
state->shader->info.stage,
GL_PROGRAM_INPUT);
_mesa_set_destroy(resource_set, NULL);
}
/* Change the old varying into an ordinary global. */
var->data.mode = nir_var_shader_temp;
/* Recursively unpack varying. */
nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var);
lower_varying(state, NULL, ~0u, var->type,
var->data.location * 4 + var->data.location_frac,
var, unpacked_var_deref, var->name,
state->gs_input_vertices != 0, 0);
}
}
void
gl_nir_lower_packed_varyings(const struct gl_constants *consts,
struct gl_shader_program *prog,
void *mem_ctx, unsigned locations_used,
const uint8_t *components,
nir_variable_mode mode, unsigned gs_input_vertices,
struct gl_linked_shader *linked_shader,
bool disable_varying_packing,
bool disable_xfb_packing, bool xfb_enabled)
{
struct lower_packed_varyings_state state;
nir_shader *shader = linked_shader->Program->nir;
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(shader->info.stage != MESA_SHADER_COMPUTE);
/* assert that functions have been inlined before packing is called */
nir_foreach_function(f, shader) {
assert(f->impl == impl);
}
state.b = nir_builder_create(impl);
state.consts = consts;
state.prog = prog;
state.mem_ctx = mem_ctx;
state.shader = shader;
state.impl = impl;
state.locations_used = locations_used;
state.components = components;
state.mode = mode;
state.gs_input_vertices = gs_input_vertices;
state.disable_varying_packing = disable_varying_packing;
state.disable_xfb_packing = disable_xfb_packing;
state.xfb_enabled = xfb_enabled;
state.packed_varyings =
(nir_variable **) rzalloc_array_size(mem_ctx, sizeof(nir_variable *),
locations_used);
/* Determine if the shader interface is exposed to api query */
struct gl_linked_shader *linked_shaders[MESA_SHADER_STAGES];
unsigned num_shaders = 0;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i])
linked_shaders[num_shaders++] = prog->_LinkedShaders[i];
}
if (mode == nir_var_shader_in) {
state.ifc_exposed_to_query_api = linked_shaders[0] == linked_shader;
lower_packed_inputs(&state);
} else {
state.ifc_exposed_to_query_api =
linked_shaders[num_shaders - 1] == linked_shader;
lower_packed_outputs(&state);
}
nir_lower_global_vars_to_local(shader);
nir_fixup_deref_modes(shader);
}