| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 |
| // REQUIRES: amdgpu-registered-target |
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1030 -emit-llvm -fcuda-is-device -o - %s | FileCheck %s --check-prefix=CHECK-GFX1030 |
| |
| #define __device__ __attribute__((device)) |
| |
| typedef unsigned int v4ui __attribute__((ext_vector_type(4))); |
| typedef float v4f __attribute__((ext_vector_type(4))); |
| typedef _Float16 v4h __attribute__((ext_vector_type(4))); |
| typedef unsigned long ulong; |
| |
| // CHECK-GFX1030-LABEL: define dso_local void @_Z34test_image_bvh_intersect_ray_h_hipPDv4_jjfDv4_fDv4_DF16_S2_S_( |
| // CHECK-GFX1030-SAME: ptr noundef [[OUT:%.*]], i32 noundef [[NODE:%.*]], float noundef [[TMAX:%.*]], <4 x float> noundef [[ORIGIN:%.*]], <4 x half> noundef [[DIR:%.*]], <4 x half> noundef [[INV_DIR:%.*]], <4 x i32> noundef [[EXT:%.*]]) #[[ATTR0:[0-9]+]] { |
| // CHECK-GFX1030-NEXT: [[ENTRY:.*:]] |
| // CHECK-GFX1030-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[NODE_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[TMAX_ADDR:%.*]] = alloca float, align 4, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[ORIGIN_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[DIR_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[INV_DIR_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[EXT_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[NODE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NODE_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[TMAX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMAX_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[ORIGIN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORIGIN_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[DIR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DIR_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[INV_DIR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[INV_DIR_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[EXT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[EXT_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: store i32 [[NODE]], ptr [[NODE_ADDR_ASCAST]], align 4 |
| // CHECK-GFX1030-NEXT: store float [[TMAX]], ptr [[TMAX_ADDR_ASCAST]], align 4 |
| // CHECK-GFX1030-NEXT: store <4 x float> [[ORIGIN]], ptr [[ORIGIN_ADDR_ASCAST]], align 16 |
| // CHECK-GFX1030-NEXT: store <4 x half> [[DIR]], ptr [[DIR_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: store <4 x half> [[INV_DIR]], ptr [[INV_DIR_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: store <4 x i32> [[EXT]], ptr [[EXT_ADDR_ASCAST]], align 16 |
| // CHECK-GFX1030-NEXT: [[TMP0:%.*]] = load i32, ptr [[NODE_ADDR_ASCAST]], align 4 |
| // CHECK-GFX1030-NEXT: [[TMP1:%.*]] = load float, ptr [[TMAX_ADDR_ASCAST]], align 4 |
| // CHECK-GFX1030-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ORIGIN_ADDR_ASCAST]], align 16 |
| // CHECK-GFX1030-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr [[DIR_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: [[TMP4:%.*]] = load <4 x half>, ptr [[INV_DIR_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[EXT_ADDR_ASCAST]], align 16 |
| // CHECK-GFX1030-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <3 x i32> <i32 0, i32 1, i32 2> |
| // CHECK-GFX1030-NEXT: [[TMP7:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <3 x i32> <i32 0, i32 1, i32 2> |
| // CHECK-GFX1030-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP4]], <4 x half> [[TMP4]], <3 x i32> <i32 0, i32 1, i32 2> |
| // CHECK-GFX1030-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v3f16(i32 [[TMP0]], float [[TMP1]], <3 x float> [[TMP6]], <3 x half> [[TMP7]], <3 x half> [[TMP8]], <4 x i32> [[TMP5]]) |
| // CHECK-GFX1030-NEXT: [[TMP10:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP10]], align 16 |
| // CHECK-GFX1030-NEXT: ret void |
| // |
| __device__ void test_image_bvh_intersect_ray_h_hip(v4ui* out, unsigned int node, float tmax, v4f origin, v4h dir, v4h inv_dir, v4ui ext) { |
| *out = __builtin_amdgcn_image_bvh_intersect_ray_h(node, tmax, origin, dir, inv_dir, ext); |
| } |
| |
| |
| // CHECK-GFX1030-LABEL: define dso_local void @_Z35test_image_bvh_intersect_ray_lh_hipPDv4_jmfDv4_fDv4_DF16_S2_S_( |
| // CHECK-GFX1030-SAME: ptr noundef [[OUT:%.*]], i64 noundef [[NODE:%.*]], float noundef [[TMAX:%.*]], <4 x float> noundef [[ORIGIN:%.*]], <4 x half> noundef [[DIR:%.*]], <4 x half> noundef [[INV_DIR:%.*]], <4 x i32> noundef [[EXT:%.*]]) #[[ATTR0]] { |
| // CHECK-GFX1030-NEXT: [[ENTRY:.*:]] |
| // CHECK-GFX1030-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[NODE_ADDR:%.*]] = alloca i64, align 8, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[TMAX_ADDR:%.*]] = alloca float, align 4, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[ORIGIN_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[DIR_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[INV_DIR_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[EXT_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) |
| // CHECK-GFX1030-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[NODE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NODE_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[TMAX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMAX_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[ORIGIN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORIGIN_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[DIR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DIR_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[INV_DIR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[INV_DIR_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: [[EXT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[EXT_ADDR]] to ptr |
| // CHECK-GFX1030-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: store i64 [[NODE]], ptr [[NODE_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: store float [[TMAX]], ptr [[TMAX_ADDR_ASCAST]], align 4 |
| // CHECK-GFX1030-NEXT: store <4 x float> [[ORIGIN]], ptr [[ORIGIN_ADDR_ASCAST]], align 16 |
| // CHECK-GFX1030-NEXT: store <4 x half> [[DIR]], ptr [[DIR_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: store <4 x half> [[INV_DIR]], ptr [[INV_DIR_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: store <4 x i32> [[EXT]], ptr [[EXT_ADDR_ASCAST]], align 16 |
| // CHECK-GFX1030-NEXT: [[TMP0:%.*]] = load i64, ptr [[NODE_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: [[TMP1:%.*]] = load float, ptr [[TMAX_ADDR_ASCAST]], align 4 |
| // CHECK-GFX1030-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ORIGIN_ADDR_ASCAST]], align 16 |
| // CHECK-GFX1030-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr [[DIR_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: [[TMP4:%.*]] = load <4 x half>, ptr [[INV_DIR_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[EXT_ADDR_ASCAST]], align 16 |
| // CHECK-GFX1030-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <3 x i32> <i32 0, i32 1, i32 2> |
| // CHECK-GFX1030-NEXT: [[TMP7:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <3 x i32> <i32 0, i32 1, i32 2> |
| // CHECK-GFX1030-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP4]], <4 x half> [[TMP4]], <3 x i32> <i32 0, i32 1, i32 2> |
| // CHECK-GFX1030-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v3f16(i64 [[TMP0]], float [[TMP1]], <3 x float> [[TMP6]], <3 x half> [[TMP7]], <3 x half> [[TMP8]], <4 x i32> [[TMP5]]) |
| // CHECK-GFX1030-NEXT: [[TMP10:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 |
| // CHECK-GFX1030-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP10]], align 16 |
| // CHECK-GFX1030-NEXT: ret void |
| // |
| __device__ void test_image_bvh_intersect_ray_lh_hip(v4ui* out, ulong node, float tmax, v4f origin, v4h dir, v4h inv_dir, v4ui ext) { |
| *out = __builtin_amdgcn_image_bvh_intersect_ray_lh(node, tmax, origin, dir, inv_dir, ext); |
| } |
| |