src/compiler/nir/nir_lower_blend.c - third_party/mesa - Git at Google

 /*
  * Copyright (C) 2019-2021 Collabora, Ltd.
  * Copyright (C) 2019 Alyssa Rosenzweig
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 /**
  * @file
  *
  * Implements the fragment pipeline (blending and writeout) in software, to be
  * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
  * shader variant on typical GPUs. This pass is useful if hardware lacks
  * fixed-function blending in part or in full.
  */

 #include "nir_lower_blend.h"
 #include "compiler/nir/nir.h"
 #include "compiler/nir/nir_builder.h"
 #include "compiler/nir/nir_format_convert.h"
 #include "util/blend.h"

 struct ctx {
    const nir_lower_blend_options *options;
    nir_def *src1[8];
 };

 /* Given processed factors, combine them per a blend function */

 static nir_def *
 nir_blend_func(
    nir_builder *b,
    enum pipe_blend_func func,
    nir_def *src, nir_def *dst)
 {
    switch (func) {
    case PIPE_BLEND_ADD:
       return nir_fadd(b, src, dst);
    case PIPE_BLEND_SUBTRACT:
       return nir_fsub(b, src, dst);
    case PIPE_BLEND_REVERSE_SUBTRACT:
       return nir_fsub(b, dst, src);
    case PIPE_BLEND_MIN:
       return nir_fmin(b, src, dst);
    case PIPE_BLEND_MAX:
       return nir_fmax(b, src, dst);
    }

    unreachable("Invalid blend function");
 }

 /* Does this blend function multiply by a blend factor? */

 static bool
 nir_blend_factored(enum pipe_blend_func func)
 {
    switch (func) {
    case PIPE_BLEND_ADD:
    case PIPE_BLEND_SUBTRACT:
    case PIPE_BLEND_REVERSE_SUBTRACT:
       return true;
    default:
       return false;
    }
 }

 /* Compute a src_alpha_saturate factor */
 static nir_def *
 nir_alpha_saturate(
    nir_builder *b,
    nir_def *src, nir_def *dst,
    unsigned chan)
 {
    nir_def *Asrc = nir_channel(b, src, 3);
    nir_def *Adst = nir_channel(b, dst, 3);
    nir_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
    nir_def *Adsti = nir_fsub(b, one, Adst);

    return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
 }

 /* Returns a scalar single factor, unmultiplied */

 static nir_def *
 nir_blend_factor_value(
    nir_builder *b,
    nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
    unsigned chan,
    enum pipe_blendfactor factor_without_invert)
 {
    switch (factor_without_invert) {
    case PIPE_BLENDFACTOR_ONE:
       return nir_imm_floatN_t(b, 1.0, src->bit_size);
    case PIPE_BLENDFACTOR_SRC_COLOR:
       return nir_channel(b, src, chan);
    case PIPE_BLENDFACTOR_SRC1_COLOR:
       return nir_channel(b, src1, chan);
    case PIPE_BLENDFACTOR_DST_COLOR:
       return nir_channel(b, dst, chan);
    case PIPE_BLENDFACTOR_SRC_ALPHA:
       return nir_channel(b, src, 3);
    case PIPE_BLENDFACTOR_SRC1_ALPHA:
       return nir_channel(b, src1, 3);
    case PIPE_BLENDFACTOR_DST_ALPHA:
       return nir_channel(b, dst, 3);
    case PIPE_BLENDFACTOR_CONST_COLOR:
       return nir_channel(b, bconst, chan);
    case PIPE_BLENDFACTOR_CONST_ALPHA:
       return nir_channel(b, bconst, 3);
    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
       return nir_alpha_saturate(b, src, dst, chan);
    default:
       assert(util_blendfactor_is_inverted(factor_without_invert));
       unreachable("Unexpected inverted factor");
    }
 }

 static nir_def *
 nir_build_fsat_signed(nir_builder *b, nir_def *x)
 {
    return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size),
                      nir_imm_floatN_t(b, +1.0, x->bit_size));
 }

 static nir_def *
 nir_fsat_to_format(nir_builder *b, nir_def *x, enum pipe_format format)
 {
    if (util_format_is_unorm(format))
       return nir_fsat(b, x);
    else if (util_format_is_snorm(format))
       return nir_build_fsat_signed(b, x);
    else
       return x;
 }

 static bool
 channel_uses_dest(nir_lower_blend_channel chan)
 {
    /* If blend factors are ignored, dest is used (min/max) */
    if (!nir_blend_factored(chan.func))
       return true;

    /* If dest has a nonzero factor, it is used */
    if (chan.dst_factor != PIPE_BLENDFACTOR_ZERO)
       return true;

    /* Else, check the source factor */
    switch (util_blendfactor_without_invert(chan.src_factor)) {
    case PIPE_BLENDFACTOR_DST_COLOR:
    case PIPE_BLENDFACTOR_DST_ALPHA:
    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
       return true;
    default:
       return false;
    }
 }

 static nir_def *
 nir_blend_factor(
    nir_builder *b,
    nir_def *raw_scalar,
    nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
    unsigned chan,
    enum pipe_blendfactor factor,
    enum pipe_format format)
 {
    nir_def *f =
       nir_blend_factor_value(b, src, src1, dst, bconst, chan,
                              util_blendfactor_without_invert(factor));

    if (util_blendfactor_is_inverted(factor))
       f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);

    return nir_fmul(b, raw_scalar, f);
 }

 /* Given a colormask, "blend" with the destination */

 static nir_def *
 nir_color_mask(
    nir_builder *b,
    unsigned mask,
    nir_def *src,
    nir_def *dst)
 {
    return nir_vec4(b,
                    nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
                    nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
                    nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
                    nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
 }

 static nir_def *
 nir_logicop_func(
    nir_builder *b,
    enum pipe_logicop func,
    nir_def *src, nir_def *dst, nir_def *bitmask)
 {
    switch (func) {
    case PIPE_LOGICOP_CLEAR:
       return nir_imm_ivec4(b, 0, 0, 0, 0);
    case PIPE_LOGICOP_NOR:
       return nir_ixor(b, nir_ior(b, src, dst), bitmask);
    case PIPE_LOGICOP_AND_INVERTED:
       return nir_iand(b, nir_ixor(b, src, bitmask), dst);
    case PIPE_LOGICOP_COPY_INVERTED:
       return nir_ixor(b, src, bitmask);
    case PIPE_LOGICOP_AND_REVERSE:
       return nir_iand(b, src, nir_ixor(b, dst, bitmask));
    case PIPE_LOGICOP_INVERT:
       return nir_ixor(b, dst, bitmask);
    case PIPE_LOGICOP_XOR:
       return nir_ixor(b, src, dst);
    case PIPE_LOGICOP_NAND:
       return nir_ixor(b, nir_iand(b, src, dst), bitmask);
    case PIPE_LOGICOP_AND:
       return nir_iand(b, src, dst);
    case PIPE_LOGICOP_EQUIV:
       return nir_ixor(b, nir_ixor(b, src, dst), bitmask);
    case PIPE_LOGICOP_NOOP:
       unreachable("optimized out");
    case PIPE_LOGICOP_OR_INVERTED:
       return nir_ior(b, nir_ixor(b, src, bitmask), dst);
    case PIPE_LOGICOP_COPY:
       return src;
    case PIPE_LOGICOP_OR_REVERSE:
       return nir_ior(b, src, nir_ixor(b, dst, bitmask));
    case PIPE_LOGICOP_OR:
       return nir_ior(b, src, dst);
    case PIPE_LOGICOP_SET:
       return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
    }

    unreachable("Invalid logciop function");
 }

 static nir_def *
 nir_blend_logicop(nir_builder *b,
                   enum pipe_format format, enum pipe_logicop func,
                   nir_def *src, nir_def *dst)
 {
    unsigned bit_size = src->bit_size;
    const struct util_format_description *format_desc =
       util_format_description(format);

    /* From section 17.3.9 ("Logical Operation") of the OpenGL 4.6 core spec:
     *
     *    Logical operation has no effect on a floating-point destination color
     *    buffer, or when FRAMEBUFFER_SRGB is enabled and the value of
     *    FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING for the framebuffer attachment
     *    corresponding to the destination buffer is SRGB (see section 9.2.3).
     *    However, if logical operation is enabled, blending is still disabled.
     */
    if (util_format_is_float(format) || util_format_is_srgb(format))
       return src;

    nir_alu_type type =
       util_format_is_pure_integer(format) ? nir_type_uint : nir_type_float;

    if (bit_size != 32) {
       src = nir_convert_to_bit_size(b, src, type, 32);
       dst = nir_convert_to_bit_size(b, dst, type, 32);
    }

    assert(src->num_components <= 4);
    assert(dst->num_components <= 4);

    unsigned bits[4];
    for (int i = 0; i < 4; ++i)
       bits[i] = format_desc->channel[i].size;

    if (util_format_is_unorm(format)) {
       src = nir_format_float_to_unorm(b, src, bits);
       dst = nir_format_float_to_unorm(b, dst, bits);
    } else if (util_format_is_snorm(format)) {
       src = nir_format_float_to_snorm(b, src, bits);
       dst = nir_format_float_to_snorm(b, dst, bits);
    } else {
       assert(util_format_is_pure_integer(format));
    }

    nir_const_value mask[4];
    for (int i = 0; i < 4; ++i)
       mask[i] = nir_const_value_for_uint(BITFIELD_MASK(bits[i]), 32);

    nir_def *out = nir_logicop_func(b, func, src, dst,
                                    nir_build_imm(b, 4, 32, mask));

    if (util_format_is_unorm(format)) {
       out = nir_format_unorm_to_float(b, out, bits);
    } else if (util_format_is_snorm(format)) {
       /* Sign extend before converting so the i2f in snorm_to_float works */
       out = nir_format_sign_extend_ivec(b, out, bits);
       out = nir_format_snorm_to_float(b, out, bits);
    } else {
       assert(util_format_is_pure_integer(format));
    }

    if (bit_size != 32)
       out = nir_convert_to_bit_size(b, out, type, bit_size);

    return out;
 }

 static bool
 channel_exists(const struct util_format_description *desc, unsigned i)
 {
    return (i < desc->nr_channels) &&
           desc->channel[i].type != UTIL_FORMAT_TYPE_VOID;
 }

 /* Given a blend state, the source color, and the destination color,
  * return the blended color
  */

 static nir_def *
 nir_blend(
    nir_builder *b,
    const nir_lower_blend_options *options,
    unsigned rt,
    nir_def *src, nir_def *src1, nir_def *dst)
 {
    /* Don't crash if src1 isn't written. It doesn't matter what dual colour we
     * blend with in that case, as long as we don't dereference NULL.
     */
    if (!src1)
       src1 = nir_imm_zero(b, 4, src->bit_size);

    /* Grab the blend constant ahead of time */
    nir_def *bconst;
    if (options->scalar_blend_const) {
       bconst = nir_vec4(b,
                         nir_load_blend_const_color_r_float(b),
                         nir_load_blend_const_color_g_float(b),
                         nir_load_blend_const_color_b_float(b),
                         nir_load_blend_const_color_a_float(b));
    } else {
       bconst = nir_load_blend_const_color_rgba(b);
    }

    if (src->bit_size == 16) {
       bconst = nir_f2f16(b, bconst);
       src1 = nir_f2f16(b, src1);
    }

    /* Fixed-point framebuffers require their inputs clamped. */
    enum pipe_format format = options->format[rt];

    /* The input colours need to be clamped to the format. Contrary to the
     * OpenGL/Vulkan specs, it really is the inputs that get clamped and not the
     * intermediate blend factors. This matches the CTS and hardware behaviour.
     */
    src = nir_fsat_to_format(b, src, format);
    bconst = nir_fsat_to_format(b, bconst, format);

    if (src1)
       src1 = nir_fsat_to_format(b, src1, format);

    /* DST_ALPHA reads back 1.0 if there is no alpha channel */
    const struct util_format_description *desc =
       util_format_description(format);

    nir_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
    nir_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);

    dst = nir_vec4(b,
                   channel_exists(desc, 0) ? nir_channel(b, dst, 0) : zero,
                   channel_exists(desc, 1) ? nir_channel(b, dst, 1) : zero,
                   channel_exists(desc, 2) ? nir_channel(b, dst, 2) : zero,
                   channel_exists(desc, 3) ? nir_channel(b, dst, 3) : one);

    /* We blend per channel and recombine later */
    nir_def *channels[4];

    for (unsigned c = 0; c < 4; ++c) {
       /* Decide properties based on channel */
       nir_lower_blend_channel chan =
          (c < 3) ? options->rt[rt].rgb : options->rt[rt].alpha;

       nir_def *psrc = nir_channel(b, src, c);
       nir_def *pdst = nir_channel(b, dst, c);

       if (nir_blend_factored(chan.func)) {
          psrc = nir_blend_factor(
             b, psrc,
             src, src1, dst, bconst, c,
             chan.src_factor, format);

          pdst = nir_blend_factor(
             b, pdst,
             src, src1, dst, bconst, c,
             chan.dst_factor, format);
       }

       channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
    }

    return nir_vec(b, channels, 4);
 }

 static int
 color_index_for_location(unsigned location)
 {
    assert(location != FRAG_RESULT_COLOR &&
           "gl_FragColor must be lowered before nir_lower_blend");

    if (location < FRAG_RESULT_DATA0)
       return -1;
    else
       return location - FRAG_RESULT_DATA0;
 }

 /*
  * Test if the blending options for a given channel encode the "replace" blend
  * mode: dest = source. In this case, blending may be specially optimized.
  */
 static bool
 nir_blend_replace_channel(const nir_lower_blend_channel *c)
 {
    return (c->func == PIPE_BLEND_ADD) &&
           (c->src_factor == PIPE_BLENDFACTOR_ONE) &&
           (c->dst_factor == PIPE_BLENDFACTOR_ZERO);
 }

 static bool
 nir_blend_replace_rt(const nir_lower_blend_rt *rt)
 {
    return nir_blend_replace_channel(&rt->rgb) &&
           nir_blend_replace_channel(&rt->alpha);
 }

 static bool
 nir_lower_blend_instr(nir_builder *b, nir_intrinsic_instr *store, void *data)
 {
    struct ctx *ctx = data;
    const nir_lower_blend_options *options = ctx->options;
    if (store->intrinsic != nir_intrinsic_store_output)
       return false;

    nir_io_semantics sem = nir_intrinsic_io_semantics(store);
    int rt = color_index_for_location(sem.location);

    /* No blend lowering requested on this RT */
    if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
       return false;

    /* Only process stores once. Pass flags are cleared by consume_dual_stores */
    if (store->instr.pass_flags)
       return false;

    store->instr.pass_flags = 1;

    /* Store are sunk to the bottom of the block to ensure that the dual
     * source colour is already written.
     */
    b->cursor = nir_after_block(store->instr.block);

    const enum pipe_format format = options->format[rt];
    enum pipe_logicop logicop_func = options->logicop_func;

    /* From the Vulkan spec ("Logical operations"):
     *
     *    Logical operations are not applied to floating-point or sRGB format
     *    color attachments...
     *
     *    If logicOpEnable is VK_TRUE... blending of all attachments is treated
     *    as if it were disabled. Any attachments using color formats for which
     *    logical operations are not supported simply pass through the color
     *    values unmodified.
     *
     * The semantic for unsupported formats is equivalent to a logicop of COPY.
     * It is /not/ equivalent to disabled logicops (which would incorrectly apply
     * blending). To implement this spec text with minimal special casing, we
     * override the logicop func to COPY for unsupported formats.
     */
    if (util_format_is_float(format) || util_format_is_srgb(format)) {
       logicop_func = PIPE_LOGICOP_COPY;
    }

    /* Don't bother copying the destination to the source for disabled RTs */
    if (options->rt[rt].colormask == 0 ||
        (options->logicop_enable && logicop_func == PIPE_LOGICOP_NOOP)) {

       nir_instr_remove(&store->instr);
       return true;
    }

    /* Grab the input color.  We always want 4 channels during blend.  Dead
     * code will clean up any channels we don't need.
     */
    nir_def *src = nir_pad_vector(b, store->src[0].ssa, 4);

    assert(nir_src_as_uint(store->src[1]) == 0 && "store_output invariant");

    /* Grab the previous fragment color if we need it */
    nir_def *dst;

    if (channel_uses_dest(options->rt[rt].rgb) ||
        channel_uses_dest(options->rt[rt].alpha) ||
        options->logicop_enable ||
        options->rt[rt].colormask != BITFIELD_MASK(4)) {

       b->shader->info.outputs_read |= BITFIELD64_BIT(sem.location);
       b->shader->info.fs.uses_fbfetch_output = true;
       b->shader->info.fs.uses_sample_shading = true;
       sem.fb_fetch_output = true;

       dst = nir_load_output(b, 4, nir_src_bit_size(store->src[0]),
                             nir_imm_int(b, 0),
                             .dest_type = nir_intrinsic_src_type(store),
                             .io_semantics = sem);
    } else {
       dst = nir_undef(b, 4, nir_src_bit_size(store->src[0]));
    }

    /* Blend the two colors per the passed options. We only call nir_blend if
     * blending is enabled with a blend mode other than replace (independent of
     * the color mask). That avoids unnecessary fsat instructions in the common
     * case where blending is disabled at an API level, but the driver calls
     * nir_blend (possibly for color masking).
     */
    nir_def *blended = src;

    if (options->logicop_enable) {
       blended = nir_blend_logicop(b, format, logicop_func, src, dst);
    } else if (!util_format_is_pure_integer(format) &&
               !nir_blend_replace_rt(&options->rt[rt])) {
       assert(!util_format_is_scaled(format));
       blended = nir_blend(b, options, rt, src, ctx->src1[rt], dst);
    }

    /* Apply a colormask if necessary */
    if (options->rt[rt].colormask != BITFIELD_MASK(4))
       blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);

    /* Shave off any components we don't want to store */
    const unsigned num_components = util_format_get_nr_components(format);
    blended = nir_trim_vector(b, blended, num_components);

    /* Grow or shrink the store destination as needed */
    store->num_components = num_components;
    nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(store) &
                                           nir_component_mask(num_components));

    /* Write out the final color instead of the input */
    nir_src_rewrite(&store->src[0], blended);

    /* Sink to bottom */
    nir_instr_remove(&store->instr);
    nir_builder_instr_insert(b, &store->instr);
    return true;
 }

 /*
  * Dual-source colours are only for blending, so when nir_lower_blend is used,
  * the dual source store_output is for us (only). Remove dual stores so the
  * backend doesn't have to deal with them, collecting the sources for blending.
  */
 static bool
 consume_dual_stores(nir_builder *b, nir_intrinsic_instr *store, void *data)
 {
    nir_def **outputs = data;
    if (store->intrinsic != nir_intrinsic_store_output)
       return false;

    /* While we're here, clear the pass flags for store_outputs, since we'll set
     * them later.
     */
    store->instr.pass_flags = 0;

    nir_io_semantics sem = nir_intrinsic_io_semantics(store);
    if (sem.dual_source_blend_index == 0)
       return false;

    int rt = color_index_for_location(sem.location);
    assert(rt >= 0 && rt < 8 && "bounds for dual-source blending");

    outputs[rt] = store->src[0].ssa;
    nir_instr_remove(&store->instr);
    return true;
 }

 /** Lower blending to framebuffer fetch and some math
  *
  * This pass requires that shader I/O is lowered to explicit load/store
  * instructions using nir_lower_io.
  */
 bool
 nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
 {
    assert(shader->info.stage == MESA_SHADER_FRAGMENT);

    struct ctx ctx = { .options = options };
    bool progress = nir_shader_intrinsics_pass(shader, consume_dual_stores,
                                               nir_metadata_control_flow,
                                               ctx.src1);

    progress |= nir_shader_intrinsics_pass(shader, nir_lower_blend_instr,
                                           nir_metadata_control_flow,
                                           &ctx);
    return progress;
 }
	/*
	* Copyright (C) 2019-2021 Collabora, Ltd.
	* Copyright (C) 2019 Alyssa Rosenzweig
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	/**
	* @file
	*
	* Implements the fragment pipeline (blending and writeout) in software, to be
	* run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
	* shader variant on typical GPUs. This pass is useful if hardware lacks
	* fixed-function blending in part or in full.
	*/

	#include "nir_lower_blend.h"
	#include "compiler/nir/nir.h"
	#include "compiler/nir/nir_builder.h"
	#include "compiler/nir/nir_format_convert.h"
	#include "util/blend.h"

	struct ctx {
	const nir_lower_blend_options *options;
	nir_def *src1[8];
	};

	/* Given processed factors, combine them per a blend function */

	static nir_def *
	nir_blend_func(
	nir_builder *b,
	enum pipe_blend_func func,
	nir_def src, nir_def dst)
	{
	switch (func) {
	case PIPE_BLEND_ADD:
	return nir_fadd(b, src, dst);
	case PIPE_BLEND_SUBTRACT:
	return nir_fsub(b, src, dst);
	case PIPE_BLEND_REVERSE_SUBTRACT:
	return nir_fsub(b, dst, src);
	case PIPE_BLEND_MIN:
	return nir_fmin(b, src, dst);
	case PIPE_BLEND_MAX:
	return nir_fmax(b, src, dst);
	}

	unreachable("Invalid blend function");
	}

	/* Does this blend function multiply by a blend factor? */

	static bool
	nir_blend_factored(enum pipe_blend_func func)
	{
	switch (func) {
	case PIPE_BLEND_ADD:
	case PIPE_BLEND_SUBTRACT:
	case PIPE_BLEND_REVERSE_SUBTRACT:
	return true;
	default:
	return false;
	}
	}

	/* Compute a src_alpha_saturate factor */
	static nir_def *
	nir_alpha_saturate(
	nir_builder *b,
	nir_def src, nir_def dst,
	unsigned chan)
	{
	nir_def *Asrc = nir_channel(b, src, 3);
	nir_def *Adst = nir_channel(b, dst, 3);
	nir_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
	nir_def *Adsti = nir_fsub(b, one, Adst);

	return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
	}

	/* Returns a scalar single factor, unmultiplied */

	static nir_def *
	nir_blend_factor_value(
	nir_builder *b,
	nir_def src, nir_def src1, nir_def dst, nir_def bconst,
	unsigned chan,
	enum pipe_blendfactor factor_without_invert)
	{
	switch (factor_without_invert) {
	case PIPE_BLENDFACTOR_ONE:
	return nir_imm_floatN_t(b, 1.0, src->bit_size);
	case PIPE_BLENDFACTOR_SRC_COLOR:
	return nir_channel(b, src, chan);
	case PIPE_BLENDFACTOR_SRC1_COLOR:
	return nir_channel(b, src1, chan);
	case PIPE_BLENDFACTOR_DST_COLOR:
	return nir_channel(b, dst, chan);
	case PIPE_BLENDFACTOR_SRC_ALPHA:
	return nir_channel(b, src, 3);
	case PIPE_BLENDFACTOR_SRC1_ALPHA:
	return nir_channel(b, src1, 3);
	case PIPE_BLENDFACTOR_DST_ALPHA:
	return nir_channel(b, dst, 3);
	case PIPE_BLENDFACTOR_CONST_COLOR:
	return nir_channel(b, bconst, chan);
	case PIPE_BLENDFACTOR_CONST_ALPHA:
	return nir_channel(b, bconst, 3);
	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
	return nir_alpha_saturate(b, src, dst, chan);
	default:
	assert(util_blendfactor_is_inverted(factor_without_invert));
	unreachable("Unexpected inverted factor");
	}
	}

	static nir_def *
	nir_build_fsat_signed(nir_builder b, nir_def x)
	{
	return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size),
	nir_imm_floatN_t(b, +1.0, x->bit_size));
	}

	static nir_def *
	nir_fsat_to_format(nir_builder b, nir_def x, enum pipe_format format)
	{
	if (util_format_is_unorm(format))
	return nir_fsat(b, x);
	else if (util_format_is_snorm(format))
	return nir_build_fsat_signed(b, x);
	else
	return x;
	}

	static bool
	channel_uses_dest(nir_lower_blend_channel chan)
	{
	/* If blend factors are ignored, dest is used (min/max) */
	if (!nir_blend_factored(chan.func))
	return true;

	/* If dest has a nonzero factor, it is used */
	if (chan.dst_factor != PIPE_BLENDFACTOR_ZERO)
	return true;

	/* Else, check the source factor */
	switch (util_blendfactor_without_invert(chan.src_factor)) {
	case PIPE_BLENDFACTOR_DST_COLOR:
	case PIPE_BLENDFACTOR_DST_ALPHA:
	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
	return true;
	default:
	return false;
	}
	}

	static nir_def *
	nir_blend_factor(
	nir_builder *b,
	nir_def *raw_scalar,
	nir_def src, nir_def src1, nir_def dst, nir_def bconst,
	unsigned chan,
	enum pipe_blendfactor factor,
	enum pipe_format format)
	{
	nir_def *f =
	nir_blend_factor_value(b, src, src1, dst, bconst, chan,
	util_blendfactor_without_invert(factor));

	if (util_blendfactor_is_inverted(factor))
	f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);

	return nir_fmul(b, raw_scalar, f);
	}

	/* Given a colormask, "blend" with the destination */

	static nir_def *
	nir_color_mask(
	nir_builder *b,
	unsigned mask,
	nir_def *src,
	nir_def *dst)
	{
	return nir_vec4(b,
	nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
	nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
	nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
	nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
	}

	static nir_def *
	nir_logicop_func(
	nir_builder *b,
	enum pipe_logicop func,
	nir_def src, nir_def dst, nir_def *bitmask)
	{
	switch (func) {
	case PIPE_LOGICOP_CLEAR:
	return nir_imm_ivec4(b, 0, 0, 0, 0);
	case PIPE_LOGICOP_NOR:
	return nir_ixor(b, nir_ior(b, src, dst), bitmask);
	case PIPE_LOGICOP_AND_INVERTED:
	return nir_iand(b, nir_ixor(b, src, bitmask), dst);
	case PIPE_LOGICOP_COPY_INVERTED:
	return nir_ixor(b, src, bitmask);
	case PIPE_LOGICOP_AND_REVERSE:
	return nir_iand(b, src, nir_ixor(b, dst, bitmask));
	case PIPE_LOGICOP_INVERT:
	return nir_ixor(b, dst, bitmask);
	case PIPE_LOGICOP_XOR:
	return nir_ixor(b, src, dst);
	case PIPE_LOGICOP_NAND:
	return nir_ixor(b, nir_iand(b, src, dst), bitmask);
	case PIPE_LOGICOP_AND:
	return nir_iand(b, src, dst);
	case PIPE_LOGICOP_EQUIV:
	return nir_ixor(b, nir_ixor(b, src, dst), bitmask);
	case PIPE_LOGICOP_NOOP:
	unreachable("optimized out");
	case PIPE_LOGICOP_OR_INVERTED:
	return nir_ior(b, nir_ixor(b, src, bitmask), dst);
	case PIPE_LOGICOP_COPY:
	return src;
	case PIPE_LOGICOP_OR_REVERSE:
	return nir_ior(b, src, nir_ixor(b, dst, bitmask));
	case PIPE_LOGICOP_OR:
	return nir_ior(b, src, dst);
	case PIPE_LOGICOP_SET:
	return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
	}

	unreachable("Invalid logciop function");
	}

	static nir_def *
	nir_blend_logicop(nir_builder *b,
	enum pipe_format format, enum pipe_logicop func,
	nir_def src, nir_def dst)
	{
	unsigned bit_size = src->bit_size;
	const struct util_format_description *format_desc =
	util_format_description(format);

	/* From section 17.3.9 ("Logical Operation") of the OpenGL 4.6 core spec:
	*
	* Logical operation has no effect on a floating-point destination color
	* buffer, or when FRAMEBUFFER_SRGB is enabled and the value of
	* FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING for the framebuffer attachment
	* corresponding to the destination buffer is SRGB (see section 9.2.3).
	* However, if logical operation is enabled, blending is still disabled.
	*/
	if (util_format_is_float(format) \|\| util_format_is_srgb(format))
	return src;

	nir_alu_type type =
	util_format_is_pure_integer(format) ? nir_type_uint : nir_type_float;

	if (bit_size != 32) {
	src = nir_convert_to_bit_size(b, src, type, 32);
	dst = nir_convert_to_bit_size(b, dst, type, 32);
	}

	assert(src->num_components <= 4);
	assert(dst->num_components <= 4);

	unsigned bits[4];
	for (int i = 0; i < 4; ++i)
	bits[i] = format_desc->channel[i].size;

	if (util_format_is_unorm(format)) {
	src = nir_format_float_to_unorm(b, src, bits);
	dst = nir_format_float_to_unorm(b, dst, bits);
	} else if (util_format_is_snorm(format)) {
	src = nir_format_float_to_snorm(b, src, bits);
	dst = nir_format_float_to_snorm(b, dst, bits);
	} else {
	assert(util_format_is_pure_integer(format));
	}

	nir_const_value mask[4];
	for (int i = 0; i < 4; ++i)
	mask[i] = nir_const_value_for_uint(BITFIELD_MASK(bits[i]), 32);

	nir_def *out = nir_logicop_func(b, func, src, dst,
	nir_build_imm(b, 4, 32, mask));

	if (util_format_is_unorm(format)) {
	out = nir_format_unorm_to_float(b, out, bits);
	} else if (util_format_is_snorm(format)) {
	/* Sign extend before converting so the i2f in snorm_to_float works */
	out = nir_format_sign_extend_ivec(b, out, bits);
	out = nir_format_snorm_to_float(b, out, bits);
	} else {
	assert(util_format_is_pure_integer(format));
	}

	if (bit_size != 32)
	out = nir_convert_to_bit_size(b, out, type, bit_size);

	return out;
	}

	static bool
	channel_exists(const struct util_format_description *desc, unsigned i)
	{
	return (i < desc->nr_channels) &&
	desc->channel[i].type != UTIL_FORMAT_TYPE_VOID;
	}

	/* Given a blend state, the source color, and the destination color,
	* return the blended color
	*/

	static nir_def *
	nir_blend(
	nir_builder *b,
	const nir_lower_blend_options *options,
	unsigned rt,
	nir_def src, nir_def src1, nir_def *dst)
	{
	/* Don't crash if src1 isn't written. It doesn't matter what dual colour we
	* blend with in that case, as long as we don't dereference NULL.
	*/
	if (!src1)
	src1 = nir_imm_zero(b, 4, src->bit_size);

	/* Grab the blend constant ahead of time */
	nir_def *bconst;
	if (options->scalar_blend_const) {
	bconst = nir_vec4(b,
	nir_load_blend_const_color_r_float(b),
	nir_load_blend_const_color_g_float(b),
	nir_load_blend_const_color_b_float(b),
	nir_load_blend_const_color_a_float(b));
	} else {
	bconst = nir_load_blend_const_color_rgba(b);
	}

	if (src->bit_size == 16) {
	bconst = nir_f2f16(b, bconst);
	src1 = nir_f2f16(b, src1);
	}

	/* Fixed-point framebuffers require their inputs clamped. */
	enum pipe_format format = options->format[rt];

	/* The input colours need to be clamped to the format. Contrary to the
	* OpenGL/Vulkan specs, it really is the inputs that get clamped and not the
	* intermediate blend factors. This matches the CTS and hardware behaviour.
	*/
	src = nir_fsat_to_format(b, src, format);
	bconst = nir_fsat_to_format(b, bconst, format);

	if (src1)
	src1 = nir_fsat_to_format(b, src1, format);

	/* DST_ALPHA reads back 1.0 if there is no alpha channel */
	const struct util_format_description *desc =
	util_format_description(format);

	nir_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
	nir_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);

	dst = nir_vec4(b,
	channel_exists(desc, 0) ? nir_channel(b, dst, 0) : zero,
	channel_exists(desc, 1) ? nir_channel(b, dst, 1) : zero,
	channel_exists(desc, 2) ? nir_channel(b, dst, 2) : zero,
	channel_exists(desc, 3) ? nir_channel(b, dst, 3) : one);

	/* We blend per channel and recombine later */
	nir_def *channels[4];

	for (unsigned c = 0; c < 4; ++c) {
	/* Decide properties based on channel */
	nir_lower_blend_channel chan =
	(c < 3) ? options->rt[rt].rgb : options->rt[rt].alpha;

	nir_def *psrc = nir_channel(b, src, c);
	nir_def *pdst = nir_channel(b, dst, c);

	if (nir_blend_factored(chan.func)) {
	psrc = nir_blend_factor(
	b, psrc,
	src, src1, dst, bconst, c,
	chan.src_factor, format);

	pdst = nir_blend_factor(
	b, pdst,
	src, src1, dst, bconst, c,
	chan.dst_factor, format);
	}

	channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
	}

	return nir_vec(b, channels, 4);
	}

	static int
	color_index_for_location(unsigned location)
	{
	assert(location != FRAG_RESULT_COLOR &&
	"gl_FragColor must be lowered before nir_lower_blend");

	if (location < FRAG_RESULT_DATA0)
	return -1;
	else
	return location - FRAG_RESULT_DATA0;
	}

	/*
	* Test if the blending options for a given channel encode the "replace" blend
	* mode: dest = source. In this case, blending may be specially optimized.
	*/
	static bool
	nir_blend_replace_channel(const nir_lower_blend_channel *c)
	{
	return (c->func == PIPE_BLEND_ADD) &&
	(c->src_factor == PIPE_BLENDFACTOR_ONE) &&
	(c->dst_factor == PIPE_BLENDFACTOR_ZERO);
	}

	static bool
	nir_blend_replace_rt(const nir_lower_blend_rt *rt)
	{
	return nir_blend_replace_channel(&rt->rgb) &&
	nir_blend_replace_channel(&rt->alpha);
	}

	static bool
	nir_lower_blend_instr(nir_builder b, nir_intrinsic_instr store, void *data)
	{
	struct ctx *ctx = data;
	const nir_lower_blend_options *options = ctx->options;
	if (store->intrinsic != nir_intrinsic_store_output)
	return false;

	nir_io_semantics sem = nir_intrinsic_io_semantics(store);
	int rt = color_index_for_location(sem.location);

	/* No blend lowering requested on this RT */
	if (rt < 0 \|\| options->format[rt] == PIPE_FORMAT_NONE)
	return false;

	/* Only process stores once. Pass flags are cleared by consume_dual_stores */
	if (store->instr.pass_flags)
	return false;

	store->instr.pass_flags = 1;

	/* Store are sunk to the bottom of the block to ensure that the dual
	* source colour is already written.
	*/
	b->cursor = nir_after_block(store->instr.block);

	const enum pipe_format format = options->format[rt];
	enum pipe_logicop logicop_func = options->logicop_func;

	/* From the Vulkan spec ("Logical operations"):
	*
	* Logical operations are not applied to floating-point or sRGB format
	* color attachments...
	*
	* If logicOpEnable is VK_TRUE... blending of all attachments is treated
	* as if it were disabled. Any attachments using color formats for which
	* logical operations are not supported simply pass through the color
	* values unmodified.
	*
	* The semantic for unsupported formats is equivalent to a logicop of COPY.
	* It is /not/ equivalent to disabled logicops (which would incorrectly apply
	* blending). To implement this spec text with minimal special casing, we
	* override the logicop func to COPY for unsupported formats.
	*/
	if (util_format_is_float(format) \|\| util_format_is_srgb(format)) {
	logicop_func = PIPE_LOGICOP_COPY;
	}

	/* Don't bother copying the destination to the source for disabled RTs */
	if (options->rt[rt].colormask == 0 \|\|
	(options->logicop_enable && logicop_func == PIPE_LOGICOP_NOOP)) {

	nir_instr_remove(&store->instr);
	return true;
	}

	/* Grab the input color. We always want 4 channels during blend. Dead
	* code will clean up any channels we don't need.
	*/
	nir_def *src = nir_pad_vector(b, store->src[0].ssa, 4);

	assert(nir_src_as_uint(store->src[1]) == 0 && "store_output invariant");

	/* Grab the previous fragment color if we need it */
	nir_def *dst;

	if (channel_uses_dest(options->rt[rt].rgb) \|\|
	channel_uses_dest(options->rt[rt].alpha) \|\|
	options->logicop_enable \|\|
	options->rt[rt].colormask != BITFIELD_MASK(4)) {

	b->shader->info.outputs_read \|= BITFIELD64_BIT(sem.location);
	b->shader->info.fs.uses_fbfetch_output = true;
	b->shader->info.fs.uses_sample_shading = true;
	sem.fb_fetch_output = true;

	dst = nir_load_output(b, 4, nir_src_bit_size(store->src[0]),
	nir_imm_int(b, 0),
	.dest_type = nir_intrinsic_src_type(store),
	.io_semantics = sem);
	} else {
	dst = nir_undef(b, 4, nir_src_bit_size(store->src[0]));
	}

	/* Blend the two colors per the passed options. We only call nir_blend if
	* blending is enabled with a blend mode other than replace (independent of
	* the color mask). That avoids unnecessary fsat instructions in the common
	* case where blending is disabled at an API level, but the driver calls
	* nir_blend (possibly for color masking).
	*/
	nir_def *blended = src;

	if (options->logicop_enable) {
	blended = nir_blend_logicop(b, format, logicop_func, src, dst);
	} else if (!util_format_is_pure_integer(format) &&
	!nir_blend_replace_rt(&options->rt[rt])) {
	assert(!util_format_is_scaled(format));
	blended = nir_blend(b, options, rt, src, ctx->src1[rt], dst);
	}

	/* Apply a colormask if necessary */
	if (options->rt[rt].colormask != BITFIELD_MASK(4))
	blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);

	/* Shave off any components we don't want to store */
	const unsigned num_components = util_format_get_nr_components(format);
	blended = nir_trim_vector(b, blended, num_components);

	/* Grow or shrink the store destination as needed */
	store->num_components = num_components;
	nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(store) &
	nir_component_mask(num_components));

	/* Write out the final color instead of the input */
	nir_src_rewrite(&store->src[0], blended);

	/* Sink to bottom */
	nir_instr_remove(&store->instr);
	nir_builder_instr_insert(b, &store->instr);
	return true;
	}

	/*
	* Dual-source colours are only for blending, so when nir_lower_blend is used,
	* the dual source store_output is for us (only). Remove dual stores so the
	* backend doesn't have to deal with them, collecting the sources for blending.
	*/
	static bool
	consume_dual_stores(nir_builder b, nir_intrinsic_instr store, void *data)
	{
	nir_def **outputs = data;
	if (store->intrinsic != nir_intrinsic_store_output)
	return false;

	/* While we're here, clear the pass flags for store_outputs, since we'll set
	* them later.
	*/
	store->instr.pass_flags = 0;

	nir_io_semantics sem = nir_intrinsic_io_semantics(store);
	if (sem.dual_source_blend_index == 0)
	return false;

	int rt = color_index_for_location(sem.location);
	assert(rt >= 0 && rt < 8 && "bounds for dual-source blending");

	outputs[rt] = store->src[0].ssa;
	nir_instr_remove(&store->instr);
	return true;
	}

	/** Lower blending to framebuffer fetch and some math
	*
	* This pass requires that shader I/O is lowered to explicit load/store
	* instructions using nir_lower_io.
	*/
	bool
	nir_lower_blend(nir_shader shader, const nir_lower_blend_options options)
	{
	assert(shader->info.stage == MESA_SHADER_FRAGMENT);

	struct ctx ctx = { .options = options };
	bool progress = nir_shader_intrinsics_pass(shader, consume_dual_stores,
	nir_metadata_control_flow,
	ctx.src1);

	progress \|= nir_shader_intrinsics_pass(shader, nir_lower_blend_instr,
	nir_metadata_control_flow,
	&ctx);
	return progress;
	}