src/panfrost/vulkan/panvk_vX_shader.c - third_party/mesa - Git at Google

 /*
  * Copyright © 2021 Collabora Ltd.
  *
  * Derived from tu_shader.c which is:
  * Copyright © 2019 Google LLC
  *
  * Also derived from anv_pipeline.c which is
  * Copyright © 2015 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */

 #include "genxml/gen_macros.h"

 #include "panvk_private.h"

 #include "nir_builder.h"
 #include "nir_deref.h"
 #include "nir_lower_blend.h"
 #include "nir_conversion_builder.h"
 #include "spirv/nir_spirv.h"
 #include "util/mesa-sha1.h"
 #include "vk_shader_module.h"

 #include "pan_shader.h"
 #include "util/pan_lower_framebuffer.h"

 #include "vk_util.h"

 static void
 panvk_init_sysvals(struct panfrost_sysvals *sysvals,
                    gl_shader_stage stage)
 {
    memset(sysvals, 0, sizeof(*sysvals));

 #define SYSVAL_SLOT(name) \
    (assert(offsetof(struct panvk_sysvals, name) % 16 == 0), \
     offsetof(struct panvk_sysvals, name) / 16)

 #define INIT_SYSVAL(name, SYSVAL) \
    sysvals->sysvals[SYSVAL_SLOT(name)] = PAN_SYSVAL_##SYSVAL

    if (gl_shader_stage_is_compute(stage)) {
       INIT_SYSVAL(num_work_groups, NUM_WORK_GROUPS);
       INIT_SYSVAL(local_group_size, LOCAL_GROUP_SIZE);
    } else {
       INIT_SYSVAL(viewport_scale, VIEWPORT_SCALE);
       INIT_SYSVAL(viewport_offset, VIEWPORT_OFFSET);
       INIT_SYSVAL(vertex_instance_offsets, VERTEX_INSTANCE_OFFSETS);
       INIT_SYSVAL(blend_constants, BLEND_CONSTANTS);
    }
    sysvals->sysval_count = SYSVAL_SLOT(dyn_ssbos);

 #undef SYSVAL_SLOT
 #undef INIT_SYSVAL
 }

 static bool
 panvk_inline_blend_constants(nir_builder *b, nir_instr *instr, void *data)
 {
    if (instr->type != nir_instr_type_intrinsic)
       return false;

    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
       return false;

    const nir_const_value *constants = data;

    b->cursor = nir_after_instr(instr);
    nir_ssa_def *constant = nir_build_imm(b, 4, 32, constants);
    nir_ssa_def_rewrite_uses(&intr->dest.ssa, constant);
    nir_instr_remove(instr);
    return true;
 }

 static void
 panvk_lower_blend(struct panfrost_device *pdev,
                   nir_shader *nir,
                   struct panfrost_compile_inputs *inputs,
                   struct pan_blend_state *blend_state,
                   bool static_blend_constants)
 {
    nir_lower_blend_options options = {
       .logicop_enable = blend_state->logicop_enable,
       .logicop_func = blend_state->logicop_func,
    };

    bool lower_blend = false;

    for (unsigned rt = 0; rt < blend_state->rt_count; rt++) {
       struct pan_blend_rt_state *rt_state = &blend_state->rts[rt];

       if (!panvk_per_arch(blend_needs_lowering)(pdev, blend_state, rt))
          continue;

       enum pipe_format fmt = rt_state->format;

       options.format[rt] = fmt;
       options.rt[rt].colormask = rt_state->equation.color_mask;

       if (!rt_state->equation.blend_enable) {
          static const nir_lower_blend_channel replace = {
             .func = BLEND_FUNC_ADD,
             .src_factor = BLEND_FACTOR_ZERO,
             .invert_src_factor = true,
             .dst_factor = BLEND_FACTOR_ZERO,
             .invert_dst_factor = false,
          };

          options.rt[rt].rgb = replace;
          options.rt[rt].alpha = replace;
       } else {
          options.rt[rt].rgb.func = rt_state->equation.rgb_func;
          options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
          options.rt[rt].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor;
          options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
          options.rt[rt].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor;
          options.rt[rt].alpha.func = rt_state->equation.alpha_func;
          options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
          options.rt[rt].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor;
          options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
          options.rt[rt].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor;
       }

       /* Update the equation to force a color replacement */
       rt_state->equation.color_mask = 0xf;
       rt_state->equation.rgb_func = BLEND_FUNC_ADD;
       rt_state->equation.rgb_src_factor = BLEND_FACTOR_ZERO;
       rt_state->equation.rgb_invert_src_factor = true;
       rt_state->equation.rgb_dst_factor = BLEND_FACTOR_ZERO;
       rt_state->equation.rgb_invert_dst_factor = false;
       rt_state->equation.alpha_func = BLEND_FUNC_ADD;
       rt_state->equation.alpha_src_factor = BLEND_FACTOR_ZERO;
       rt_state->equation.alpha_invert_src_factor = true;
       rt_state->equation.alpha_dst_factor = BLEND_FACTOR_ZERO;
       rt_state->equation.alpha_invert_dst_factor = false;
       lower_blend = true;

       inputs->bifrost.static_rt_conv = true;
       inputs->bifrost.rt_conv[rt] =
          GENX(pan_blend_get_internal_desc)(pdev, fmt, rt, 32, false) >> 32;
    }

    if (lower_blend) {
       NIR_PASS_V(nir, nir_lower_blend, &options);

       if (static_blend_constants) {
          const nir_const_value constants[4] = {
             { .f32 = CLAMP(blend_state->constants[0], 0.0f, 1.0f) },
             { .f32 = CLAMP(blend_state->constants[1], 0.0f, 1.0f) },
             { .f32 = CLAMP(blend_state->constants[2], 0.0f, 1.0f) },
             { .f32 = CLAMP(blend_state->constants[3], 0.0f, 1.0f) },
          };
          NIR_PASS_V(nir, nir_shader_instructions_pass,
                     panvk_inline_blend_constants,
                     nir_metadata_block_index |
                     nir_metadata_dominance,
                     (void *)constants);
       }
    }
 }

 static bool
 panvk_lower_load_push_constant(nir_builder *b, nir_instr *instr, void *data)
 {
    if (instr->type != nir_instr_type_intrinsic)
       return false;

    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    if (intr->intrinsic != nir_intrinsic_load_push_constant)
       return false;

    b->cursor = nir_before_instr(instr);
    nir_ssa_def *ubo_load =
       nir_load_ubo(b, nir_dest_num_components(intr->dest),
                    nir_dest_bit_size(intr->dest),
                    nir_imm_int(b, PANVK_PUSH_CONST_UBO_INDEX),
                    intr->src[0].ssa,
                    .align_mul = nir_dest_bit_size(intr->dest) / 8,
                    .align_offset = 0,
                    .range_base = nir_intrinsic_base(intr),
                    .range = nir_intrinsic_range(intr));
    nir_ssa_def_rewrite_uses(&intr->dest.ssa, ubo_load);
    nir_instr_remove(instr);
    return true;
 }

 static void
 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
 {
    assert(glsl_type_is_vector_or_scalar(type));

    uint32_t comp_size = glsl_type_is_boolean(type)
       ? 4 : glsl_get_bit_size(type) / 8;
    unsigned length = glsl_get_vector_elements(type);
    *size = comp_size * length,
    *align = comp_size * (length == 3 ? 4 : length);
 }

 struct panvk_shader *
 panvk_per_arch(shader_create)(struct panvk_device *dev,
                               gl_shader_stage stage,
                               const VkPipelineShaderStageCreateInfo *stage_info,
                               const struct panvk_pipeline_layout *layout,
                               unsigned sysval_ubo,
                               struct pan_blend_state *blend_state,
                               bool static_blend_constants,
                               const VkAllocationCallbacks *alloc)
 {
    VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
    struct panfrost_device *pdev = &dev->physical_device->pdev;
    struct panvk_shader *shader;

    shader = vk_zalloc2(&dev->vk.alloc, alloc, sizeof(*shader), 8,
                        VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
    if (!shader)
       return NULL;

    util_dynarray_init(&shader->binary, NULL);

    /* TODO these are made-up */
    const struct spirv_to_nir_options spirv_options = {
       .caps = {
          .variable_pointers = true,
       },
       .ubo_addr_format = nir_address_format_32bit_index_offset,
       .ssbo_addr_format = dev->vk.enabled_features.robustBufferAccess ?
                           nir_address_format_64bit_bounded_global :
                           nir_address_format_64bit_global_32bit_offset,
    };

    nir_shader *nir;
    VkResult result = vk_shader_module_to_nir(&dev->vk, module, stage,
                                              stage_info->pName,
                                              stage_info->pSpecializationInfo,
                                              &spirv_options,
                                              GENX(pan_shader_get_compiler_options)(),
                                              NULL, &nir);
    if (result != VK_SUCCESS) {
       vk_free2(&dev->vk.alloc, alloc, shader);
       return NULL;
    }

    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
               nir_shader_get_entrypoint(nir), true, true);

    struct panfrost_sysvals fixed_sysvals;
    panvk_init_sysvals(&fixed_sysvals, stage);

    struct panfrost_compile_inputs inputs = {
       .gpu_id = pdev->gpu_id,
       .no_ubo_to_push = true,
       .no_idvs = true, /* TODO */
       .fixed_sysval_ubo = sysval_ubo,
       .fixed_sysval_layout = &fixed_sysvals,
    };

    NIR_PASS_V(nir, nir_lower_indirect_derefs,
               nir_var_shader_in | nir_var_shader_out,
               UINT32_MAX);

    NIR_PASS_V(nir, nir_opt_copy_prop_vars);
    NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
    NIR_PASS_V(nir, nir_opt_trivial_continues);

    /* Do texture lowering here.  Yes, it's a duplication of the texture
     * lowering in bifrost_compile.  However, we need to lower texture stuff
     * now, before we call panvk_per_arch(nir_lower_descriptors)() because some
     * of the texture lowering generates nir_texop_txs which we handle as part
     * of descriptor lowering.
     *
     * TODO: We really should be doing this in common code, not dpulicated in
     * panvk.  In order to do that, we need to rework the panfrost compile
     * flow to look more like the Intel flow:
     *
     *  1. Compile SPIR-V to NIR and maybe do a tiny bit of lowering that needs
     *     to be done really early.
     *
     *  2. bi_preprocess_nir: Does common lowering and runs the optimization
     *     loop.  Nothing here should be API-specific.
     *
     *  3. Do additional lowering in panvk
     *
     *  4. bi_postprocess_nir: Does final lowering and runs the optimization
     *     loop again.  This can happen as part of the final compile.
     *
     * This would give us a better place to do panvk-specific lowering.
     */
    nir_lower_tex_options lower_tex_options = {
       .lower_txs_lod = true,
       .lower_txp = ~0,
       .lower_tg4_broadcom_swizzle = true,
       .lower_txd = true,
       .lower_invalid_implicit_lod = true,
    };
    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);

    NIR_PASS_V(nir, panvk_per_arch(nir_lower_descriptors),
               dev, layout, &shader->has_img_access);

    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
               nir_address_format_32bit_index_offset);
    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
               spirv_options.ssbo_addr_format);
    NIR_PASS_V(nir, nir_lower_explicit_io,
               nir_var_mem_push_const,
               nir_address_format_32bit_offset);

    if (gl_shader_stage_uses_workgroup(stage)) {
       if (!nir->info.shared_memory_explicit_layout) {
          NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
                     nir_var_mem_shared,
                     shared_type_info);
       }

       NIR_PASS_V(nir, nir_lower_explicit_io,
                  nir_var_mem_shared,
                  nir_address_format_32bit_offset);
    }

    NIR_PASS_V(nir, nir_shader_instructions_pass,
               panvk_lower_load_push_constant,
               nir_metadata_block_index |
               nir_metadata_dominance,
               (void *)layout);

    NIR_PASS_V(nir, nir_lower_system_values);
    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);

    NIR_PASS_V(nir, nir_split_var_copies);
    NIR_PASS_V(nir, nir_lower_var_copies);

    /* We have to run nir_lower_blend() after we've gotten rid of copies (it
     * requires load/store) and before we assign output locations.
     */
    if (stage == MESA_SHADER_FRAGMENT) {
       /* This is required for nir_lower_blend */
       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
       panvk_lower_blend(pdev, nir, &inputs, blend_state, static_blend_constants);
    }

    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage);
    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage);

    /* Needed to turn shader_temp into function_temp since the backend only
     * handles the latter for now.
     */
    NIR_PASS_V(nir, nir_lower_global_vars_to_local);

    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
    if (unlikely(dev->physical_device->instance->debug_flags & PANVK_DEBUG_NIR)) {
       fprintf(stderr, "translated nir:\n");
       nir_print_shader(nir, stderr);
    }

    GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &shader->info);

    /* System values shouldn't have changed */
    assert(memcmp(&shader->info.sysvals, &fixed_sysvals,
                  sizeof(fixed_sysvals)) == 0);

    /* Patch the descriptor count */
    shader->info.ubo_count = PANVK_NUM_BUILTIN_UBOS +
                             layout->num_ubos + layout->num_dyn_ubos;
    shader->info.sampler_count = layout->num_samplers;
    shader->info.texture_count = layout->num_textures;
    if (shader->has_img_access)
       shader->info.attribute_count += layout->num_imgs;

    shader->sysval_ubo = sysval_ubo;
    shader->local_size.x = nir->info.workgroup_size[0];
    shader->local_size.y = nir->info.workgroup_size[1];
    shader->local_size.z = nir->info.workgroup_size[2];

    ralloc_free(nir);

    return shader;
 }
	/*
	* Copyright © 2021 Collabora Ltd.
	*
	* Derived from tu_shader.c which is:
	* Copyright © 2019 Google LLC
	*
	* Also derived from anv_pipeline.c which is
	* Copyright © 2015 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	* DEALINGS IN THE SOFTWARE.
	*/

	#include "genxml/gen_macros.h"

	#include "panvk_private.h"

	#include "nir_builder.h"
	#include "nir_deref.h"
	#include "nir_lower_blend.h"
	#include "nir_conversion_builder.h"
	#include "spirv/nir_spirv.h"
	#include "util/mesa-sha1.h"
	#include "vk_shader_module.h"

	#include "pan_shader.h"
	#include "util/pan_lower_framebuffer.h"

	#include "vk_util.h"

	static void
	panvk_init_sysvals(struct panfrost_sysvals *sysvals,
	gl_shader_stage stage)
	{
	memset(sysvals, 0, sizeof(*sysvals));

	#define SYSVAL_SLOT(name) \
	(assert(offsetof(struct panvk_sysvals, name) % 16 == 0), \
	offsetof(struct panvk_sysvals, name) / 16)

	#define INIT_SYSVAL(name, SYSVAL) \
	sysvals->sysvals[SYSVAL_SLOT(name)] = PAN_SYSVAL_##SYSVAL

	if (gl_shader_stage_is_compute(stage)) {
	INIT_SYSVAL(num_work_groups, NUM_WORK_GROUPS);
	INIT_SYSVAL(local_group_size, LOCAL_GROUP_SIZE);
	} else {
	INIT_SYSVAL(viewport_scale, VIEWPORT_SCALE);
	INIT_SYSVAL(viewport_offset, VIEWPORT_OFFSET);
	INIT_SYSVAL(vertex_instance_offsets, VERTEX_INSTANCE_OFFSETS);
	INIT_SYSVAL(blend_constants, BLEND_CONSTANTS);
	}
	sysvals->sysval_count = SYSVAL_SLOT(dyn_ssbos);

	#undef SYSVAL_SLOT
	#undef INIT_SYSVAL
	}

	static bool
	panvk_inline_blend_constants(nir_builder b, nir_instr instr, void *data)
	{
	if (instr->type != nir_instr_type_intrinsic)
	return false;

	nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
	if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
	return false;

	const nir_const_value *constants = data;

	b->cursor = nir_after_instr(instr);
	nir_ssa_def *constant = nir_build_imm(b, 4, 32, constants);
	nir_ssa_def_rewrite_uses(&intr->dest.ssa, constant);
	nir_instr_remove(instr);
	return true;
	}

	static void
	panvk_lower_blend(struct panfrost_device *pdev,
	nir_shader *nir,
	struct panfrost_compile_inputs *inputs,
	struct pan_blend_state *blend_state,
	bool static_blend_constants)
	{
	nir_lower_blend_options options = {
	.logicop_enable = blend_state->logicop_enable,
	.logicop_func = blend_state->logicop_func,
	};

	bool lower_blend = false;

	for (unsigned rt = 0; rt < blend_state->rt_count; rt++) {
	struct pan_blend_rt_state *rt_state = &blend_state->rts[rt];

	if (!panvk_per_arch(blend_needs_lowering)(pdev, blend_state, rt))
	continue;

	enum pipe_format fmt = rt_state->format;

	options.format[rt] = fmt;
	options.rt[rt].colormask = rt_state->equation.color_mask;

	if (!rt_state->equation.blend_enable) {
	static const nir_lower_blend_channel replace = {
	.func = BLEND_FUNC_ADD,
	.src_factor = BLEND_FACTOR_ZERO,
	.invert_src_factor = true,
	.dst_factor = BLEND_FACTOR_ZERO,
	.invert_dst_factor = false,
	};

	options.rt[rt].rgb = replace;
	options.rt[rt].alpha = replace;
	} else {
	options.rt[rt].rgb.func = rt_state->equation.rgb_func;
	options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
	options.rt[rt].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor;
	options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
	options.rt[rt].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor;
	options.rt[rt].alpha.func = rt_state->equation.alpha_func;
	options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
	options.rt[rt].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor;
	options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
	options.rt[rt].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor;
	}

	/* Update the equation to force a color replacement */
	rt_state->equation.color_mask = 0xf;
	rt_state->equation.rgb_func = BLEND_FUNC_ADD;
	rt_state->equation.rgb_src_factor = BLEND_FACTOR_ZERO;
	rt_state->equation.rgb_invert_src_factor = true;
	rt_state->equation.rgb_dst_factor = BLEND_FACTOR_ZERO;
	rt_state->equation.rgb_invert_dst_factor = false;
	rt_state->equation.alpha_func = BLEND_FUNC_ADD;
	rt_state->equation.alpha_src_factor = BLEND_FACTOR_ZERO;
	rt_state->equation.alpha_invert_src_factor = true;
	rt_state->equation.alpha_dst_factor = BLEND_FACTOR_ZERO;
	rt_state->equation.alpha_invert_dst_factor = false;
	lower_blend = true;

	inputs->bifrost.static_rt_conv = true;
	inputs->bifrost.rt_conv[rt] =
	GENX(pan_blend_get_internal_desc)(pdev, fmt, rt, 32, false) >> 32;
	}

	if (lower_blend) {
	NIR_PASS_V(nir, nir_lower_blend, &options);

	if (static_blend_constants) {
	const nir_const_value constants[4] = {
	{ .f32 = CLAMP(blend_state->constants[0], 0.0f, 1.0f) },
	{ .f32 = CLAMP(blend_state->constants[1], 0.0f, 1.0f) },
	{ .f32 = CLAMP(blend_state->constants[2], 0.0f, 1.0f) },
	{ .f32 = CLAMP(blend_state->constants[3], 0.0f, 1.0f) },
	};
	NIR_PASS_V(nir, nir_shader_instructions_pass,
	panvk_inline_blend_constants,
	nir_metadata_block_index \|
	nir_metadata_dominance,
	(void *)constants);
	}
	}
	}

	static bool
	panvk_lower_load_push_constant(nir_builder b, nir_instr instr, void *data)
	{
	if (instr->type != nir_instr_type_intrinsic)
	return false;

	nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
	if (intr->intrinsic != nir_intrinsic_load_push_constant)
	return false;

	b->cursor = nir_before_instr(instr);
	nir_ssa_def *ubo_load =
	nir_load_ubo(b, nir_dest_num_components(intr->dest),
	nir_dest_bit_size(intr->dest),
	nir_imm_int(b, PANVK_PUSH_CONST_UBO_INDEX),
	intr->src[0].ssa,
	.align_mul = nir_dest_bit_size(intr->dest) / 8,
	.align_offset = 0,
	.range_base = nir_intrinsic_base(intr),
	.range = nir_intrinsic_range(intr));
	nir_ssa_def_rewrite_uses(&intr->dest.ssa, ubo_load);
	nir_instr_remove(instr);
	return true;
	}

	static void
	shared_type_info(const struct glsl_type type, unsigned size, unsigned *align)
	{
	assert(glsl_type_is_vector_or_scalar(type));

	uint32_t comp_size = glsl_type_is_boolean(type)
	? 4 : glsl_get_bit_size(type) / 8;
	unsigned length = glsl_get_vector_elements(type);
	size = comp_size length,
	align = comp_size (length == 3 ? 4 : length);
	}

	struct panvk_shader *
	panvk_per_arch(shader_create)(struct panvk_device *dev,
	gl_shader_stage stage,
	const VkPipelineShaderStageCreateInfo *stage_info,
	const struct panvk_pipeline_layout *layout,
	unsigned sysval_ubo,
	struct pan_blend_state *blend_state,
	bool static_blend_constants,
	const VkAllocationCallbacks *alloc)
	{
	VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
	struct panfrost_device *pdev = &dev->physical_device->pdev;
	struct panvk_shader *shader;

	shader = vk_zalloc2(&dev->vk.alloc, alloc, sizeof(*shader), 8,
	VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
	if (!shader)
	return NULL;

	util_dynarray_init(&shader->binary, NULL);

	/* TODO these are made-up */
	const struct spirv_to_nir_options spirv_options = {
	.caps = {
	.variable_pointers = true,
	},
	.ubo_addr_format = nir_address_format_32bit_index_offset,
	.ssbo_addr_format = dev->vk.enabled_features.robustBufferAccess ?
	nir_address_format_64bit_bounded_global :
	nir_address_format_64bit_global_32bit_offset,
	};

	nir_shader *nir;
	VkResult result = vk_shader_module_to_nir(&dev->vk, module, stage,
	stage_info->pName,
	stage_info->pSpecializationInfo,
	&spirv_options,
	GENX(pan_shader_get_compiler_options)(),
	NULL, &nir);
	if (result != VK_SUCCESS) {
	vk_free2(&dev->vk.alloc, alloc, shader);
	return NULL;
	}

	NIR_PASS_V(nir, nir_lower_io_to_temporaries,
	nir_shader_get_entrypoint(nir), true, true);

	struct panfrost_sysvals fixed_sysvals;
	panvk_init_sysvals(&fixed_sysvals, stage);

	struct panfrost_compile_inputs inputs = {
	.gpu_id = pdev->gpu_id,
	.no_ubo_to_push = true,
	.no_idvs = true, /* TODO */
	.fixed_sysval_ubo = sysval_ubo,
	.fixed_sysval_layout = &fixed_sysvals,
	};

	NIR_PASS_V(nir, nir_lower_indirect_derefs,
	nir_var_shader_in \| nir_var_shader_out,
	UINT32_MAX);

	NIR_PASS_V(nir, nir_opt_copy_prop_vars);
	NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
	NIR_PASS_V(nir, nir_opt_trivial_continues);

	/* Do texture lowering here. Yes, it's a duplication of the texture
	* lowering in bifrost_compile. However, we need to lower texture stuff
	* now, before we call panvk_per_arch(nir_lower_descriptors)() because some
	* of the texture lowering generates nir_texop_txs which we handle as part
	* of descriptor lowering.
	*
	* TODO: We really should be doing this in common code, not dpulicated in
	* panvk. In order to do that, we need to rework the panfrost compile
	* flow to look more like the Intel flow:
	*
	* 1. Compile SPIR-V to NIR and maybe do a tiny bit of lowering that needs
	* to be done really early.
	*
	* 2. bi_preprocess_nir: Does common lowering and runs the optimization
	* loop. Nothing here should be API-specific.
	*
	* 3. Do additional lowering in panvk
	*
	* 4. bi_postprocess_nir: Does final lowering and runs the optimization
	* loop again. This can happen as part of the final compile.
	*
	* This would give us a better place to do panvk-specific lowering.
	*/
	nir_lower_tex_options lower_tex_options = {
	.lower_txs_lod = true,
	.lower_txp = ~0,
	.lower_tg4_broadcom_swizzle = true,
	.lower_txd = true,
	.lower_invalid_implicit_lod = true,
	};
	NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);

	NIR_PASS_V(nir, panvk_per_arch(nir_lower_descriptors),
	dev, layout, &shader->has_img_access);

	NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
	nir_address_format_32bit_index_offset);
	NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
	spirv_options.ssbo_addr_format);
	NIR_PASS_V(nir, nir_lower_explicit_io,
	nir_var_mem_push_const,
	nir_address_format_32bit_offset);

	if (gl_shader_stage_uses_workgroup(stage)) {
	if (!nir->info.shared_memory_explicit_layout) {
	NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
	nir_var_mem_shared,
	shared_type_info);
	}

	NIR_PASS_V(nir, nir_lower_explicit_io,
	nir_var_mem_shared,
	nir_address_format_32bit_offset);
	}

	NIR_PASS_V(nir, nir_shader_instructions_pass,
	panvk_lower_load_push_constant,
	nir_metadata_block_index \|
	nir_metadata_dominance,
	(void *)layout);

	NIR_PASS_V(nir, nir_lower_system_values);
	NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);

	NIR_PASS_V(nir, nir_split_var_copies);
	NIR_PASS_V(nir, nir_lower_var_copies);

	/* We have to run nir_lower_blend() after we've gotten rid of copies (it
	* requires load/store) and before we assign output locations.
	*/
	if (stage == MESA_SHADER_FRAGMENT) {
	/* This is required for nir_lower_blend */
	NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
	panvk_lower_blend(pdev, nir, &inputs, blend_state, static_blend_constants);
	}

	nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage);
	nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage);

	/* Needed to turn shader_temp into function_temp since the backend only
	* handles the latter for now.
	*/
	NIR_PASS_V(nir, nir_lower_global_vars_to_local);

	nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
	if (unlikely(dev->physical_device->instance->debug_flags & PANVK_DEBUG_NIR)) {
	fprintf(stderr, "translated nir:\n");
	nir_print_shader(nir, stderr);
	}

	GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &shader->info);

	/* System values shouldn't have changed */
	assert(memcmp(&shader->info.sysvals, &fixed_sysvals,
	sizeof(fixed_sysvals)) == 0);

	/* Patch the descriptor count */
	shader->info.ubo_count = PANVK_NUM_BUILTIN_UBOS +
	layout->num_ubos + layout->num_dyn_ubos;
	shader->info.sampler_count = layout->num_samplers;
	shader->info.texture_count = layout->num_textures;
	if (shader->has_img_access)
	shader->info.attribute_count += layout->num_imgs;

	shader->sysval_ubo = sysval_ubo;
	shader->local_size.x = nir->info.workgroup_size[0];
	shader->local_size.y = nir->info.workgroup_size[1];
	shader->local_size.z = nir->info.workgroup_size[2];

	ralloc_free(nir);

	return shader;
	}