src/intel/compiler/brw_nir_lower_rt_intrinsics.c - third_party/mesa - Git at Google

 /*
  * Copyright (c) 2020 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #include "brw_nir_rt.h"
 #include "brw_nir_rt_builder.h"

 static nir_def *
 nir_build_vec3_mat_mult_col_major(nir_builder *b, nir_def *vec,
                                   nir_def *matrix[], bool translation)
 {
    nir_def *result_components[3] = {
       nir_channel(b, matrix[3], 0),
       nir_channel(b, matrix[3], 1),
       nir_channel(b, matrix[3], 2),
    };
    for (unsigned i = 0; i < 3; ++i) {
       for (unsigned j = 0; j < 3; ++j) {
          nir_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[j], 1 << i));
          result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
       }
    }
    return nir_vec(b, result_components, 3);
 }

 static nir_def *
 build_leaf_is_procedural(nir_builder *b, struct brw_nir_rt_mem_hit_defs *hit)
 {
    switch (b->shader->info.stage) {
    case MESA_SHADER_ANY_HIT:
       /* Any-hit shaders are always compiled into intersection shaders for
        * procedural geometry.  If we got here in an any-hit shader, it's for
        * triangles.
        */
       return nir_imm_false(b);

    case MESA_SHADER_INTERSECTION:
       return nir_imm_true(b);

    default:
       return nir_ieq_imm(b, hit->leaf_type,
                             BRW_RT_BVH_NODE_TYPE_PROCEDURAL);
    }
 }

 static bool
 lower_rt_intrinsics_impl(nir_function_impl *impl,
                          const struct brw_base_prog_key *key,
                          const struct intel_device_info *devinfo)
 {
    bool progress = false;

    nir_builder build = nir_builder_at(nir_before_impl(impl));
    nir_builder *b = &build;

    struct brw_nir_rt_globals_defs globals;
    brw_nir_rt_load_globals(b, &globals, devinfo);

    nir_def *hotzone_addr = brw_nir_rt_sw_hotzone_addr(b, devinfo);
    nir_def *hotzone = nir_load_global(b, hotzone_addr, 16, 4, 32);

    gl_shader_stage stage = b->shader->info.stage;
    struct brw_nir_rt_mem_ray_defs world_ray_in = {};
    struct brw_nir_rt_mem_ray_defs object_ray_in = {};
    struct brw_nir_rt_mem_hit_defs hit_in = {};
    switch (stage) {
    case MESA_SHADER_ANY_HIT:
    case MESA_SHADER_CLOSEST_HIT:
    case MESA_SHADER_INTERSECTION:
       brw_nir_rt_load_mem_hit(b, &hit_in,
                               stage == MESA_SHADER_CLOSEST_HIT, devinfo);
       brw_nir_rt_load_mem_ray(b, &object_ray_in,
                               BRW_RT_BVH_LEVEL_OBJECT, devinfo);
       FALLTHROUGH;

    case MESA_SHADER_MISS:
       brw_nir_rt_load_mem_ray(b, &world_ray_in,
                               BRW_RT_BVH_LEVEL_WORLD, devinfo);
       break;

    default:
       break;
    }

    nir_def *thread_stack_base_addr = brw_nir_rt_sw_stack_addr(b, devinfo);
    nir_def *stack_base_offset = nir_channel(b, hotzone, 0);
    nir_def *stack_base_addr =
       nir_iadd(b, thread_stack_base_addr, nir_u2u64(b, stack_base_offset));
    ASSERTED bool seen_scratch_base_ptr_load = false;
    ASSERTED bool found_resume = false;

    nir_foreach_block(block, impl) {
       nir_foreach_instr_safe(instr, block) {
          if (instr->type != nir_instr_type_intrinsic)
             continue;

          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);

          b->cursor = nir_after_instr(&intrin->instr);

          nir_def *sysval = NULL;
          switch (intrin->intrinsic) {
          case nir_intrinsic_load_scratch_base_ptr:
             assert(nir_intrinsic_base(intrin) == 1);
             seen_scratch_base_ptr_load = true;
             sysval = stack_base_addr;
             break;

          case nir_intrinsic_btd_stack_push_intel: {
             int32_t stack_size = nir_intrinsic_stack_size(intrin);
             if (stack_size > 0) {
                nir_def *child_stack_offset =
                   nir_iadd_imm(b, stack_base_offset, stack_size);
                nir_store_global(b, hotzone_addr, 16, child_stack_offset, 0x1);
             }
             nir_instr_remove(instr);
             break;
          }

          case nir_intrinsic_rt_resume:
             /* This is the first "interesting" instruction */
             assert(block == nir_start_block(impl));
             assert(!seen_scratch_base_ptr_load);
             found_resume = true;

             int32_t stack_size = nir_intrinsic_stack_size(intrin);
             if (stack_size > 0) {
                stack_base_offset =
                   nir_iadd_imm(b, stack_base_offset, -stack_size);
                nir_store_global(b, hotzone_addr, 16, stack_base_offset, 0x1);
                stack_base_addr = nir_iadd(b, thread_stack_base_addr,
                                           nir_u2u64(b, stack_base_offset));
             }
             nir_instr_remove(instr);
             break;

          case nir_intrinsic_load_uniform:
          case nir_intrinsic_load_push_constant:
             /* We don't want to lower this in the launch trampoline.
              *
              * Also if the driver chooses to use an inline push address, we
              * can do all the loading of the push constant in
              * assign_curb_setup() (more efficient as we can do NoMask
              * instructions for address calculations).
              */
             if (stage == MESA_SHADER_COMPUTE || key->uses_inline_push_addr)
                break;

             sysval = brw_nir_load_global_const(b, intrin,
                         nir_load_btd_global_arg_addr_intel(b),
                         BRW_RT_PUSH_CONST_OFFSET);

             break;

          case nir_intrinsic_load_ray_launch_id:
             sysval = nir_channels(b, hotzone, 0xe);
             break;

          case nir_intrinsic_load_ray_launch_size:
             sysval = globals.launch_size;
             break;

          case nir_intrinsic_load_ray_world_origin:
             sysval = world_ray_in.orig;
             break;

          case nir_intrinsic_load_ray_world_direction:
             sysval = world_ray_in.dir;
             break;

          case nir_intrinsic_load_ray_object_origin:
             if (stage == MESA_SHADER_CLOSEST_HIT) {
                struct brw_nir_rt_bvh_instance_leaf_defs leaf;
                brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr,
                                                  devinfo);

                sysval = nir_build_vec3_mat_mult_col_major(
                   b, world_ray_in.orig, leaf.world_to_object, true);
             } else {
                sysval = object_ray_in.orig;
             }
             break;

          case nir_intrinsic_load_ray_object_direction:
             if (stage == MESA_SHADER_CLOSEST_HIT) {
                struct brw_nir_rt_bvh_instance_leaf_defs leaf;
                brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr,
                                                  devinfo);

                sysval = nir_build_vec3_mat_mult_col_major(
                   b, world_ray_in.dir, leaf.world_to_object, false);
             } else {
                sysval = object_ray_in.dir;
             }
             break;

          case nir_intrinsic_load_ray_t_min:
             /* It shouldn't matter which we pull this from */
             sysval = world_ray_in.t_near;
             break;

          case nir_intrinsic_load_ray_t_max:
             if (stage == MESA_SHADER_MISS)
                sysval = world_ray_in.t_far;
             else
                sysval = hit_in.t;
             break;

          case nir_intrinsic_load_primitive_id:
             sysval = brw_nir_rt_load_primitive_id_from_hit(b,
                                                            build_leaf_is_procedural(b, &hit_in),
                                                            &hit_in);
             break;

          case nir_intrinsic_load_instance_id: {
             struct brw_nir_rt_bvh_instance_leaf_defs leaf;
             brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr, devinfo);
             sysval = leaf.instance_index;
             break;
          }

          case nir_intrinsic_load_ray_object_to_world: {
             struct brw_nir_rt_bvh_instance_leaf_defs leaf;
             brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr, devinfo);
             sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
             break;
          }

          case nir_intrinsic_load_ray_world_to_object: {
             struct brw_nir_rt_bvh_instance_leaf_defs leaf;
             brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr, devinfo);
             sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
             break;
          }

          case nir_intrinsic_load_ray_hit_kind: {
             nir_def *tri_hit_kind =
                nir_bcsel(b, hit_in.front_face,
                             nir_imm_int(b, BRW_RT_HIT_KIND_FRONT_FACE),
                             nir_imm_int(b, BRW_RT_HIT_KIND_BACK_FACE));
             sysval = nir_bcsel(b, build_leaf_is_procedural(b, &hit_in),
                                   hit_in.aabb_hit_kind, tri_hit_kind);
             break;
          }

          case nir_intrinsic_load_ray_flags:
             /* We need to fetch the original ray flags we stored in the
              * leaf pointer, because the actual ray flags we get here
              * will include any flags passed on the pipeline at creation
              * time, and the spec for IncomingRayFlagsKHR says:
              *   Setting pipeline flags on the raytracing pipeline must not
              *   cause any corresponding flags to be set in variables with
              *   this decoration.
              */
             sysval = nir_u2u32(b, world_ray_in.inst_leaf_ptr);
             break;

          case nir_intrinsic_load_cull_mask:
             sysval = nir_u2u32(b, world_ray_in.ray_mask);
             break;

          case nir_intrinsic_load_ray_geometry_index: {
             nir_def *geometry_index_dw =
                nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
                                1, 32);
             sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
             break;
          }

          case nir_intrinsic_load_ray_instance_custom_index: {
             struct brw_nir_rt_bvh_instance_leaf_defs leaf;
             brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr, devinfo);
             sysval = leaf.instance_id;
             break;
          }

          case nir_intrinsic_load_shader_record_ptr:
             /* We can't handle this intrinsic in resume shaders because the
              * handle we get there won't be from the original SBT.  The shader
              * call lowering/splitting pass should have ensured that this
              * value was spilled from the initial shader and unspilled in any
              * resume shaders that need it.
              */
             assert(!found_resume);
             sysval = nir_load_btd_local_arg_addr_intel(b);
             break;

          case nir_intrinsic_load_ray_base_mem_addr_intel:
             sysval = globals.base_mem_addr;
             break;

          case nir_intrinsic_load_ray_hw_stack_size_intel:
             sysval = nir_imul_imm(b, globals.hw_stack_size, 64);
             break;

          case nir_intrinsic_load_ray_sw_stack_size_intel:
             sysval = nir_imul_imm(b, globals.sw_stack_size, 64);
             break;

          case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
             sysval = globals.num_dss_rt_stacks;
             break;

          case nir_intrinsic_load_ray_hit_sbt_addr_intel:
             sysval = globals.hit_sbt_addr;
             break;

          case nir_intrinsic_load_ray_hit_sbt_stride_intel:
             sysval = globals.hit_sbt_stride;
             break;

          case nir_intrinsic_load_ray_miss_sbt_addr_intel:
             sysval = globals.miss_sbt_addr;
             break;

          case nir_intrinsic_load_ray_miss_sbt_stride_intel:
             sysval = globals.miss_sbt_stride;
             break;

          case nir_intrinsic_load_callable_sbt_addr_intel:
             sysval = globals.call_sbt_addr;
             break;

          case nir_intrinsic_load_callable_sbt_stride_intel:
             sysval = globals.call_sbt_stride;
             break;

          case nir_intrinsic_load_btd_resume_sbt_addr_intel:
             sysval = nir_pack_64_2x32_split(b,
                nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW),
                nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH));
             break;

          case nir_intrinsic_load_leaf_procedural_intel:
             sysval = build_leaf_is_procedural(b, &hit_in);
             break;

          case nir_intrinsic_load_ray_triangle_vertex_positions: {
             struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
             brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
             sysval = pos.positions[nir_intrinsic_column(intrin)];
             break;
          }

          case nir_intrinsic_load_leaf_opaque_intel: {
             if (stage == MESA_SHADER_INTERSECTION) {
                /* In intersection shaders, the opaque bit is passed to us in
                 * the front_face bit.
                 */
                sysval = hit_in.front_face;
             } else {
                nir_def *flags_dw =
                   nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
                                   1, 32);
                sysval = nir_i2b(b, nir_iand_imm(b, flags_dw, 1u << 30));
             }
             break;
          }

          default:
             continue;
          }

          progress = true;

          if (sysval) {
             nir_def_replace(&intrin->def, sysval);
          }
       }
    }

    nir_progress(true, impl,
                 progress ? nir_metadata_none : (nir_metadata_control_flow));
    return progress;
 }

 /** Lower ray-tracing system values and intrinsics
  *
  * In most 3D shader stages, intrinsics are a fairly thin wrapper around
  * hardware functionality and system values represent magic bits that come
  * into the shader from FF hardware.  Ray-tracing, however, looks a bit more
  * like the OpenGL 1.0 world where the underlying hardware is simple and most
  * of the API implementation is software.
  *
  * In particular, most things that are treated as system values (or built-ins
  * in SPIR-V) don't get magically dropped into registers for us.  Instead, we
  * have to fetch them from the relevant data structures shared with the
  * ray-tracing hardware.  Most come from either the RT_DISPATCH_GLOBALS or
  * from one of the MemHit data structures.  Some, such as primitive_id require
  * us to fetch the leaf address from the MemHit struct and then manually read
  * the data out of the BVH.  Instead of trying to emit all this code deep in
  * the back-end where we can't effectively optimize it, we lower it all to
  * global memory access in NIR.
  *
  * Once this pass is complete, the only real system values left are the two
  * argument pointer system values for BTD dispatch: btd_local_arg_addr and
  * btd_global_arg_addr.
  */
 bool
 brw_nir_lower_rt_intrinsics(nir_shader *nir,
                             const struct brw_base_prog_key *key,
                             const struct intel_device_info *devinfo)
 {
    bool progress = false;
    nir_foreach_function_impl(impl, nir) {
       progress |= lower_rt_intrinsics_impl(impl, key, devinfo);
    }
    return progress;
 }
	/*
	* Copyright (c) 2020 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	#include "brw_nir_rt.h"
	#include "brw_nir_rt_builder.h"

	static nir_def *
	nir_build_vec3_mat_mult_col_major(nir_builder b, nir_def vec,
	nir_def *matrix[], bool translation)
	{
	nir_def *result_components[3] = {
	nir_channel(b, matrix[3], 0),
	nir_channel(b, matrix[3], 1),
	nir_channel(b, matrix[3], 2),
	};
	for (unsigned i = 0; i < 3; ++i) {
	for (unsigned j = 0; j < 3; ++j) {
	nir_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[j], 1 << i));
	result_components[i] = (translation \|\| j) ? nir_fadd(b, result_components[i], v) : v;
	}
	}
	return nir_vec(b, result_components, 3);
	}

	static nir_def *
	build_leaf_is_procedural(nir_builder b, struct brw_nir_rt_mem_hit_defs hit)
	{
	switch (b->shader->info.stage) {
	case MESA_SHADER_ANY_HIT:
	/* Any-hit shaders are always compiled into intersection shaders for
	* procedural geometry. If we got here in an any-hit shader, it's for
	* triangles.
	*/
	return nir_imm_false(b);

	case MESA_SHADER_INTERSECTION:
	return nir_imm_true(b);

	default:
	return nir_ieq_imm(b, hit->leaf_type,
	BRW_RT_BVH_NODE_TYPE_PROCEDURAL);
	}
	}

	static bool
	lower_rt_intrinsics_impl(nir_function_impl *impl,
	const struct brw_base_prog_key *key,
	const struct intel_device_info *devinfo)
	{
	bool progress = false;

	nir_builder build = nir_builder_at(nir_before_impl(impl));
	nir_builder *b = &build;

	struct brw_nir_rt_globals_defs globals;
	brw_nir_rt_load_globals(b, &globals, devinfo);

	nir_def *hotzone_addr = brw_nir_rt_sw_hotzone_addr(b, devinfo);
	nir_def *hotzone = nir_load_global(b, hotzone_addr, 16, 4, 32);

	gl_shader_stage stage = b->shader->info.stage;
	struct brw_nir_rt_mem_ray_defs world_ray_in = {};
	struct brw_nir_rt_mem_ray_defs object_ray_in = {};
	struct brw_nir_rt_mem_hit_defs hit_in = {};
	switch (stage) {
	case MESA_SHADER_ANY_HIT:
	case MESA_SHADER_CLOSEST_HIT:
	case MESA_SHADER_INTERSECTION:
	brw_nir_rt_load_mem_hit(b, &hit_in,
	stage == MESA_SHADER_CLOSEST_HIT, devinfo);
	brw_nir_rt_load_mem_ray(b, &object_ray_in,
	BRW_RT_BVH_LEVEL_OBJECT, devinfo);
	FALLTHROUGH;

	case MESA_SHADER_MISS:
	brw_nir_rt_load_mem_ray(b, &world_ray_in,
	BRW_RT_BVH_LEVEL_WORLD, devinfo);
	break;

	default:
	break;
	}

	nir_def *thread_stack_base_addr = brw_nir_rt_sw_stack_addr(b, devinfo);
	nir_def *stack_base_offset = nir_channel(b, hotzone, 0);
	nir_def *stack_base_addr =
	nir_iadd(b, thread_stack_base_addr, nir_u2u64(b, stack_base_offset));
	ASSERTED bool seen_scratch_base_ptr_load = false;
	ASSERTED bool found_resume = false;

	nir_foreach_block(block, impl) {
	nir_foreach_instr_safe(instr, block) {
	if (instr->type != nir_instr_type_intrinsic)
	continue;

	nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);

	b->cursor = nir_after_instr(&intrin->instr);

	nir_def *sysval = NULL;
	switch (intrin->intrinsic) {
	case nir_intrinsic_load_scratch_base_ptr:
	assert(nir_intrinsic_base(intrin) == 1);
	seen_scratch_base_ptr_load = true;
	sysval = stack_base_addr;
	break;

	case nir_intrinsic_btd_stack_push_intel: {
	int32_t stack_size = nir_intrinsic_stack_size(intrin);
	if (stack_size > 0) {
	nir_def *child_stack_offset =
	nir_iadd_imm(b, stack_base_offset, stack_size);
	nir_store_global(b, hotzone_addr, 16, child_stack_offset, 0x1);
	}
	nir_instr_remove(instr);
	break;
	}

	case nir_intrinsic_rt_resume:
	/* This is the first "interesting" instruction */
	assert(block == nir_start_block(impl));
	assert(!seen_scratch_base_ptr_load);
	found_resume = true;

	int32_t stack_size = nir_intrinsic_stack_size(intrin);
	if (stack_size > 0) {
	stack_base_offset =
	nir_iadd_imm(b, stack_base_offset, -stack_size);
	nir_store_global(b, hotzone_addr, 16, stack_base_offset, 0x1);
	stack_base_addr = nir_iadd(b, thread_stack_base_addr,
	nir_u2u64(b, stack_base_offset));
	}
	nir_instr_remove(instr);
	break;

	case nir_intrinsic_load_uniform:
	case nir_intrinsic_load_push_constant:
	/* We don't want to lower this in the launch trampoline.
	*
	* Also if the driver chooses to use an inline push address, we
	* can do all the loading of the push constant in
	* assign_curb_setup() (more efficient as we can do NoMask
	* instructions for address calculations).
	*/
	if (stage == MESA_SHADER_COMPUTE \|\| key->uses_inline_push_addr)
	break;

	sysval = brw_nir_load_global_const(b, intrin,
	nir_load_btd_global_arg_addr_intel(b),
	BRW_RT_PUSH_CONST_OFFSET);

	break;

	case nir_intrinsic_load_ray_launch_id:
	sysval = nir_channels(b, hotzone, 0xe);
	break;

	case nir_intrinsic_load_ray_launch_size:
	sysval = globals.launch_size;
	break;

	case nir_intrinsic_load_ray_world_origin:
	sysval = world_ray_in.orig;
	break;

	case nir_intrinsic_load_ray_world_direction:
	sysval = world_ray_in.dir;
	break;

	case nir_intrinsic_load_ray_object_origin:
	if (stage == MESA_SHADER_CLOSEST_HIT) {
	struct brw_nir_rt_bvh_instance_leaf_defs leaf;
	brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr,
	devinfo);

	sysval = nir_build_vec3_mat_mult_col_major(
	b, world_ray_in.orig, leaf.world_to_object, true);
	} else {
	sysval = object_ray_in.orig;
	}
	break;

	case nir_intrinsic_load_ray_object_direction:
	if (stage == MESA_SHADER_CLOSEST_HIT) {
	struct brw_nir_rt_bvh_instance_leaf_defs leaf;
	brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr,
	devinfo);

	sysval = nir_build_vec3_mat_mult_col_major(
	b, world_ray_in.dir, leaf.world_to_object, false);
	} else {
	sysval = object_ray_in.dir;
	}
	break;

	case nir_intrinsic_load_ray_t_min:
	/* It shouldn't matter which we pull this from */
	sysval = world_ray_in.t_near;
	break;

	case nir_intrinsic_load_ray_t_max:
	if (stage == MESA_SHADER_MISS)
	sysval = world_ray_in.t_far;
	else
	sysval = hit_in.t;
	break;

	case nir_intrinsic_load_primitive_id:
	sysval = brw_nir_rt_load_primitive_id_from_hit(b,
	build_leaf_is_procedural(b, &hit_in),
	&hit_in);
	break;

	case nir_intrinsic_load_instance_id: {
	struct brw_nir_rt_bvh_instance_leaf_defs leaf;
	brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr, devinfo);
	sysval = leaf.instance_index;
	break;
	}

	case nir_intrinsic_load_ray_object_to_world: {
	struct brw_nir_rt_bvh_instance_leaf_defs leaf;
	brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr, devinfo);
	sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
	break;
	}

	case nir_intrinsic_load_ray_world_to_object: {
	struct brw_nir_rt_bvh_instance_leaf_defs leaf;
	brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr, devinfo);
	sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
	break;
	}

	case nir_intrinsic_load_ray_hit_kind: {
	nir_def *tri_hit_kind =
	nir_bcsel(b, hit_in.front_face,
	nir_imm_int(b, BRW_RT_HIT_KIND_FRONT_FACE),
	nir_imm_int(b, BRW_RT_HIT_KIND_BACK_FACE));
	sysval = nir_bcsel(b, build_leaf_is_procedural(b, &hit_in),
	hit_in.aabb_hit_kind, tri_hit_kind);
	break;
	}

	case nir_intrinsic_load_ray_flags:
	/* We need to fetch the original ray flags we stored in the
	* leaf pointer, because the actual ray flags we get here
	* will include any flags passed on the pipeline at creation
	* time, and the spec for IncomingRayFlagsKHR says:
	* Setting pipeline flags on the raytracing pipeline must not
	* cause any corresponding flags to be set in variables with
	* this decoration.
	*/
	sysval = nir_u2u32(b, world_ray_in.inst_leaf_ptr);
	break;

	case nir_intrinsic_load_cull_mask:
	sysval = nir_u2u32(b, world_ray_in.ray_mask);
	break;

	case nir_intrinsic_load_ray_geometry_index: {
	nir_def *geometry_index_dw =
	nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
	1, 32);
	sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
	break;
	}

	case nir_intrinsic_load_ray_instance_custom_index: {
	struct brw_nir_rt_bvh_instance_leaf_defs leaf;
	brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr, devinfo);
	sysval = leaf.instance_id;
	break;
	}

	case nir_intrinsic_load_shader_record_ptr:
	/* We can't handle this intrinsic in resume shaders because the
	* handle we get there won't be from the original SBT. The shader
	* call lowering/splitting pass should have ensured that this
	* value was spilled from the initial shader and unspilled in any
	* resume shaders that need it.
	*/
	assert(!found_resume);
	sysval = nir_load_btd_local_arg_addr_intel(b);
	break;

	case nir_intrinsic_load_ray_base_mem_addr_intel:
	sysval = globals.base_mem_addr;
	break;

	case nir_intrinsic_load_ray_hw_stack_size_intel:
	sysval = nir_imul_imm(b, globals.hw_stack_size, 64);
	break;

	case nir_intrinsic_load_ray_sw_stack_size_intel:
	sysval = nir_imul_imm(b, globals.sw_stack_size, 64);
	break;

	case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
	sysval = globals.num_dss_rt_stacks;
	break;

	case nir_intrinsic_load_ray_hit_sbt_addr_intel:
	sysval = globals.hit_sbt_addr;
	break;

	case nir_intrinsic_load_ray_hit_sbt_stride_intel:
	sysval = globals.hit_sbt_stride;
	break;

	case nir_intrinsic_load_ray_miss_sbt_addr_intel:
	sysval = globals.miss_sbt_addr;
	break;

	case nir_intrinsic_load_ray_miss_sbt_stride_intel:
	sysval = globals.miss_sbt_stride;
	break;

	case nir_intrinsic_load_callable_sbt_addr_intel:
	sysval = globals.call_sbt_addr;
	break;

	case nir_intrinsic_load_callable_sbt_stride_intel:
	sysval = globals.call_sbt_stride;
	break;

	case nir_intrinsic_load_btd_resume_sbt_addr_intel:
	sysval = nir_pack_64_2x32_split(b,
	nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW),
	nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH));
	break;

	case nir_intrinsic_load_leaf_procedural_intel:
	sysval = build_leaf_is_procedural(b, &hit_in);
	break;

	case nir_intrinsic_load_ray_triangle_vertex_positions: {
	struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
	brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
	sysval = pos.positions[nir_intrinsic_column(intrin)];
	break;
	}

	case nir_intrinsic_load_leaf_opaque_intel: {
	if (stage == MESA_SHADER_INTERSECTION) {
	/* In intersection shaders, the opaque bit is passed to us in
	* the front_face bit.
	*/
	sysval = hit_in.front_face;
	} else {
	nir_def *flags_dw =
	nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
	1, 32);
	sysval = nir_i2b(b, nir_iand_imm(b, flags_dw, 1u << 30));
	}
	break;
	}

	default:
	continue;
	}

	progress = true;

	if (sysval) {
	nir_def_replace(&intrin->def, sysval);
	}
	}
	}

	nir_progress(true, impl,
	progress ? nir_metadata_none : (nir_metadata_control_flow));
	return progress;
	}

	/** Lower ray-tracing system values and intrinsics
	*
	* In most 3D shader stages, intrinsics are a fairly thin wrapper around
	* hardware functionality and system values represent magic bits that come
	* into the shader from FF hardware. Ray-tracing, however, looks a bit more
	* like the OpenGL 1.0 world where the underlying hardware is simple and most
	* of the API implementation is software.
	*
	* In particular, most things that are treated as system values (or built-ins
	* in SPIR-V) don't get magically dropped into registers for us. Instead, we
	* have to fetch them from the relevant data structures shared with the
	* ray-tracing hardware. Most come from either the RT_DISPATCH_GLOBALS or
	* from one of the MemHit data structures. Some, such as primitive_id require
	* us to fetch the leaf address from the MemHit struct and then manually read
	* the data out of the BVH. Instead of trying to emit all this code deep in
	* the back-end where we can't effectively optimize it, we lower it all to
	* global memory access in NIR.
	*
	* Once this pass is complete, the only real system values left are the two
	* argument pointer system values for BTD dispatch: btd_local_arg_addr and
	* btd_global_arg_addr.
	*/
	bool
	brw_nir_lower_rt_intrinsics(nir_shader *nir,
	const struct brw_base_prog_key *key,
	const struct intel_device_info *devinfo)
	{
	bool progress = false;
	nir_foreach_function_impl(impl, nir) {
	progress \|= lower_rt_intrinsics_impl(impl, key, devinfo);
	}
	return progress;
	}