src/asahi/compiler/agx_nir_lower_interpolation.c - third_party/mesa - Git at Google

 /*
  * Copyright 2023 Valve Corporation
  * SPDX-License-Identifier: MIT
  */

 #include "compiler/shader_enums.h"
 #include "agx_compile.h"
 #include "nir.h"
 #include "nir_builder.h"
 #include "nir_builder_opcodes.h"
 #include "nir_intrinsics.h"
 #include "nir_intrinsics_indices.h"

 /*
  * In AGX, the values of fragment shader inputs are represented as coefficient
  * vectors <A, B, C>, which are dotted with <x, y, 1> to perform interpolation.
  * x and y are relative to the tile. In other words, A and B are the
  * screen-space partial derivatives of the input, and C is the value at the
  * corner of the tile.
  *
  * For some interpolation modes, the dot product happens in the iterator
  * hardware. Other modes are implemented in this file, by lowering to math on
  * the coefficient vectors.
  */

 /* XXX: It's not clear what this is for, but seems necessary */
 static nir_def *
 cf_valid(nir_builder *b, nir_def *cf)
 {
    nir_def *bit = nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, cf, 0), 1), 0);

    /* XXX: Apple's compiler actually checks that the significand is nonzero and
     * the exponent is 0 or 1. This is probably a typo -- it doesn't make any
     * logical sense.  Presumably they just meant to check for denorms, so let's
     * do that. Either way the tests pass.
     */
    nir_def *cf01 = nir_trim_vector(b, cf, 2);
    return nir_ior(b, bit, nir_fisnormal(b, cf01));
 }

 static nir_def *
 interpolate_at_offset(nir_builder *b, nir_def *cf, nir_def *offset,
                       bool perspective)
 {
    /* Get the coordinate of the pixel within the tile */
    nir_def *pixel_coords = nir_load_pixel_coord(b);
    nir_def *tile_offs = nir_umod_imm(b, pixel_coords, 32);

    /* Convert to float, getting the center of the pixel */
    nir_def *center = nir_fadd_imm(b, nir_u2f32(b, tile_offs), 0.5);

    /* Calculate the location to interpolate. offset is defined relative to the
     * center of the pixel and is a float.
     */
    nir_def *pos = nir_fadd(b, center, nir_f2f32(b, offset));

    /* Interpolate with the given coefficients */
    nir_def *interp = nir_ffma(b, nir_channel(b, pos, 1), nir_channel(b, cf, 1),
                               nir_channel(b, cf, 2));

    interp = nir_ffma(b, nir_channel(b, pos, 0), nir_channel(b, cf, 0), interp);

    /* Divide by RHW. This load will be lowered recursively. */
    if (perspective) {
       nir_def *bary = nir_load_barycentric_at_offset(
          b, 32, offset, .interp_mode = INTERP_MODE_NOPERSPECTIVE);

       nir_def *rhw = nir_load_interpolated_input(
          b, 1, 32, bary, nir_imm_int(b, 0), .component = 3,
          .io_semantics = {
             .location = VARYING_SLOT_POS,
             .num_slots = 1,
          });

       interp = nir_fdiv(b, interp, rhw);
    }

    /* Replace invalid interpolations with the constant channel */
    return nir_bcsel(b, cf_valid(b, cf), interp, nir_channel(b, cf, 2));
 }

 static nir_def *
 interpolate_flat(nir_builder *b, nir_def *coefficients)
 {
    /* Same value anywhere, so just take the constant (affine) component. For
     * triangle fans with the first provoking vertex, the CF layout is slightly
     * different. I am unsure why, but Apple does the same and the bcsel is
     * required for corrctness.
     */
    return nir_bcsel(b, nir_load_is_first_fan_agx(b),
                     nir_channel(b, coefficients, 1),
                     nir_channel(b, coefficients, 2));
 }

 static enum glsl_interp_mode
 interp_mode_for_load(nir_intrinsic_instr *load)
 {
    if (load->intrinsic == nir_intrinsic_load_input)
       return INTERP_MODE_FLAT;
    else
       return nir_intrinsic_interp_mode(nir_src_as_intrinsic(load->src[0]));
 }

 static bool
 needs_lower(const nir_instr *instr, UNUSED const void *_)
 {
    if (instr->type != nir_instr_type_intrinsic)
       return false;

    const nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);

    /* at_offset barycentrics need to be lowered */
    if (load->intrinsic == nir_intrinsic_load_interpolated_input) {
       return (nir_src_as_intrinsic(load->src[0])->intrinsic ==
               nir_intrinsic_load_barycentric_at_offset);
    }

    /* Flat shading always lowered */
    return (load->intrinsic == nir_intrinsic_load_input);
 }

 static nir_def *
 interpolate_channel(nir_builder *b, nir_intrinsic_instr *load, unsigned channel)
 {
    nir_def *coefficients = nir_load_coefficients_agx(
       b, nir_get_io_offset_src(load)->ssa,
       .component = nir_intrinsic_component(load) + channel,
       .interp_mode = interp_mode_for_load(load),
       .io_semantics = nir_intrinsic_io_semantics(load));

    if (load->intrinsic == nir_intrinsic_load_input) {
       assert(load->def.bit_size == 32);

       if (nir_intrinsic_io_semantics(load).location == VARYING_SLOT_LAYER)
          return nir_load_layer_id(b);
       else
          return interpolate_flat(b, coefficients);
    } else {
       nir_intrinsic_instr *bary = nir_src_as_intrinsic(load->src[0]);

       nir_def *interp = interpolate_at_offset(
          b, coefficients, bary->src[0].ssa,
          nir_intrinsic_interp_mode(bary) != INTERP_MODE_NOPERSPECTIVE);

       return nir_f2fN(b, interp, load->def.bit_size);
    }
 }

 static nir_def *
 lower(nir_builder *b, nir_instr *instr, void *data)
 {
    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);

    /* Each component is loaded separated */
    nir_def *values[NIR_MAX_VEC_COMPONENTS] = {NULL};
    for (unsigned i = 0; i < intr->def.num_components; ++i) {
       values[i] = interpolate_channel(b, intr, i);
    }

    return nir_vec(b, values, intr->def.num_components);
 }

 bool
 agx_nir_lower_interpolation(nir_shader *s)
 {
    assert(s->info.stage == MESA_SHADER_FRAGMENT);

    return nir_shader_lower_instructions(s, needs_lower, lower, NULL);
 }
	/*
	* Copyright 2023 Valve Corporation
	* SPDX-License-Identifier: MIT
	*/

	#include "compiler/shader_enums.h"
	#include "agx_compile.h"
	#include "nir.h"
	#include "nir_builder.h"
	#include "nir_builder_opcodes.h"
	#include "nir_intrinsics.h"
	#include "nir_intrinsics_indices.h"

	/*
	* In AGX, the values of fragment shader inputs are represented as coefficient
	* vectors <A, B, C>, which are dotted with <x, y, 1> to perform interpolation.
	* x and y are relative to the tile. In other words, A and B are the
	* screen-space partial derivatives of the input, and C is the value at the
	* corner of the tile.
	*
	* For some interpolation modes, the dot product happens in the iterator
	* hardware. Other modes are implemented in this file, by lowering to math on
	* the coefficient vectors.
	*/

	/* XXX: It's not clear what this is for, but seems necessary */
	static nir_def *
	cf_valid(nir_builder b, nir_def cf)
	{
	nir_def *bit = nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, cf, 0), 1), 0);

	/* XXX: Apple's compiler actually checks that the significand is nonzero and
	* the exponent is 0 or 1. This is probably a typo -- it doesn't make any
	* logical sense. Presumably they just meant to check for denorms, so let's
	* do that. Either way the tests pass.
	*/
	nir_def *cf01 = nir_trim_vector(b, cf, 2);
	return nir_ior(b, bit, nir_fisnormal(b, cf01));
	}

	static nir_def *
	interpolate_at_offset(nir_builder b, nir_def cf, nir_def *offset,
	bool perspective)
	{
	/* Get the coordinate of the pixel within the tile */
	nir_def *pixel_coords = nir_load_pixel_coord(b);
	nir_def *tile_offs = nir_umod_imm(b, pixel_coords, 32);

	/* Convert to float, getting the center of the pixel */
	nir_def *center = nir_fadd_imm(b, nir_u2f32(b, tile_offs), 0.5);

	/* Calculate the location to interpolate. offset is defined relative to the
	* center of the pixel and is a float.
	*/
	nir_def *pos = nir_fadd(b, center, nir_f2f32(b, offset));

	/* Interpolate with the given coefficients */
	nir_def *interp = nir_ffma(b, nir_channel(b, pos, 1), nir_channel(b, cf, 1),
	nir_channel(b, cf, 2));

	interp = nir_ffma(b, nir_channel(b, pos, 0), nir_channel(b, cf, 0), interp);

	/* Divide by RHW. This load will be lowered recursively. */
	if (perspective) {
	nir_def *bary = nir_load_barycentric_at_offset(
	b, 32, offset, .interp_mode = INTERP_MODE_NOPERSPECTIVE);

	nir_def *rhw = nir_load_interpolated_input(
	b, 1, 32, bary, nir_imm_int(b, 0), .component = 3,
	.io_semantics = {
	.location = VARYING_SLOT_POS,
	.num_slots = 1,
	});

	interp = nir_fdiv(b, interp, rhw);
	}

	/* Replace invalid interpolations with the constant channel */
	return nir_bcsel(b, cf_valid(b, cf), interp, nir_channel(b, cf, 2));
	}

	static nir_def *
	interpolate_flat(nir_builder b, nir_def coefficients)
	{
	/* Same value anywhere, so just take the constant (affine) component. For
	* triangle fans with the first provoking vertex, the CF layout is slightly
	* different. I am unsure why, but Apple does the same and the bcsel is
	* required for corrctness.
	*/
	return nir_bcsel(b, nir_load_is_first_fan_agx(b),
	nir_channel(b, coefficients, 1),
	nir_channel(b, coefficients, 2));
	}

	static enum glsl_interp_mode
	interp_mode_for_load(nir_intrinsic_instr *load)
	{
	if (load->intrinsic == nir_intrinsic_load_input)
	return INTERP_MODE_FLAT;
	else
	return nir_intrinsic_interp_mode(nir_src_as_intrinsic(load->src[0]));
	}

	static bool
	needs_lower(const nir_instr instr, UNUSED const void _)
	{
	if (instr->type != nir_instr_type_intrinsic)
	return false;

	const nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);

	/* at_offset barycentrics need to be lowered */
	if (load->intrinsic == nir_intrinsic_load_interpolated_input) {
	return (nir_src_as_intrinsic(load->src[0])->intrinsic ==
	nir_intrinsic_load_barycentric_at_offset);
	}

	/* Flat shading always lowered */
	return (load->intrinsic == nir_intrinsic_load_input);
	}

	static nir_def *
	interpolate_channel(nir_builder b, nir_intrinsic_instr load, unsigned channel)
	{
	nir_def *coefficients = nir_load_coefficients_agx(
	b, nir_get_io_offset_src(load)->ssa,
	.component = nir_intrinsic_component(load) + channel,
	.interp_mode = interp_mode_for_load(load),
	.io_semantics = nir_intrinsic_io_semantics(load));

	if (load->intrinsic == nir_intrinsic_load_input) {
	assert(load->def.bit_size == 32);

	if (nir_intrinsic_io_semantics(load).location == VARYING_SLOT_LAYER)
	return nir_load_layer_id(b);
	else
	return interpolate_flat(b, coefficients);
	} else {
	nir_intrinsic_instr *bary = nir_src_as_intrinsic(load->src[0]);

	nir_def *interp = interpolate_at_offset(
	b, coefficients, bary->src[0].ssa,
	nir_intrinsic_interp_mode(bary) != INTERP_MODE_NOPERSPECTIVE);

	return nir_f2fN(b, interp, load->def.bit_size);
	}
	}

	static nir_def *
	lower(nir_builder b, nir_instr instr, void *data)
	{
	nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);

	/* Each component is loaded separated */
	nir_def *values[NIR_MAX_VEC_COMPONENTS] = {NULL};
	for (unsigned i = 0; i < intr->def.num_components; ++i) {
	values[i] = interpolate_channel(b, intr, i);
	}

	return nir_vec(b, values, intr->def.num_components);
	}

	bool
	agx_nir_lower_interpolation(nir_shader *s)
	{
	assert(s->info.stage == MESA_SHADER_FRAGMENT);

	return nir_shader_lower_instructions(s, needs_lower, lower, NULL);
	}