| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca -disable-promote-alloca-to-lds=1 < %s | FileCheck --check-prefix=BASE --check-prefix=DEFAULT %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca -disable-promote-alloca-to-lds=1 -amdgpu-promote-alloca-to-vector-vgpr-ratio=2 < %s | FileCheck --check-prefix=BASE %s --check-prefix=RATIO2 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca -disable-promote-alloca-to-lds=1 -amdgpu-promote-alloca-to-vector-vgpr-ratio=8 < %s | FileCheck --check-prefix=BASE %s --check-prefix=RATIO8 |
| |
| define amdgpu_kernel void @i32_24_elements(ptr %out) #0 { |
| ; DEFAULT-LABEL: define amdgpu_kernel void @i32_24_elements( |
| ; DEFAULT-SAME: ptr [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; DEFAULT-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; DEFAULT-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; DEFAULT-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; DEFAULT-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; DEFAULT-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; DEFAULT-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; DEFAULT-NEXT: [[ALLOCA:%.*]] = alloca [24 x i32], align 16, addrspace(5) |
| ; DEFAULT-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 96, i1 false) |
| ; DEFAULT-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 |
| ; DEFAULT-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 20 |
| ; DEFAULT-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 |
| ; DEFAULT-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 |
| ; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] |
| ; DEFAULT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 |
| ; DEFAULT-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 |
| ; DEFAULT-NEXT: ret void |
| ; |
| ; RATIO2-LABEL: define amdgpu_kernel void @i32_24_elements( |
| ; RATIO2-SAME: ptr [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; RATIO2-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; RATIO2-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; RATIO2-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; RATIO2-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; RATIO2-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; RATIO2-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; RATIO2-NEXT: [[ALLOCA:%.*]] = freeze <24 x i32> poison |
| ; RATIO2-NEXT: [[TMP1:%.*]] = extractelement <24 x i32> <i32 42, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 43, i32 0, i32 0, i32 0>, i32 [[SEL2]] |
| ; RATIO2-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 |
| ; RATIO2-NEXT: ret void |
| ; |
| ; RATIO8-LABEL: define amdgpu_kernel void @i32_24_elements( |
| ; RATIO8-SAME: ptr [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; RATIO8-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; RATIO8-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; RATIO8-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; RATIO8-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; RATIO8-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; RATIO8-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; RATIO8-NEXT: [[ALLOCA:%.*]] = alloca [24 x i32], align 16, addrspace(5) |
| ; RATIO8-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 96, i1 false) |
| ; RATIO8-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 |
| ; RATIO8-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 20 |
| ; RATIO8-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 |
| ; RATIO8-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 |
| ; RATIO8-NEXT: [[GEP:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] |
| ; RATIO8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 |
| ; RATIO8-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 |
| ; RATIO8-NEXT: ret void |
| ; |
| %x = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %y = tail call i32 @llvm.amdgcn.workitem.id.y() |
| %c1 = icmp uge i32 %x, 3 |
| %c2 = icmp uge i32 %y, 3 |
| %sel1 = select i1 %c1, i32 1, i32 2 |
| %sel2 = select i1 %c2, i32 0, i32 %sel1 |
| %alloca = alloca [24 x i32], align 16, addrspace(5) |
| call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 96, i1 false) |
| %gep.0 = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 0 |
| %gep.1 = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 20 |
| store i32 42, ptr addrspace(5) %gep.0 |
| store i32 43, ptr addrspace(5) %gep.1 |
| %gep = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 %sel2 |
| %load = load i32, ptr addrspace(5) %gep |
| store i32 %load, ptr %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @i32_24_elements_attrib(ptr %out) #1 { |
| ; DEFAULT-LABEL: define amdgpu_kernel void @i32_24_elements_attrib( |
| ; DEFAULT-SAME: ptr [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; DEFAULT-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; DEFAULT-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; DEFAULT-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; DEFAULT-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; DEFAULT-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; DEFAULT-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; DEFAULT-NEXT: [[ALLOCA:%.*]] = freeze <24 x i32> poison |
| ; DEFAULT-NEXT: [[TMP1:%.*]] = extractelement <24 x i32> <i32 42, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 43, i32 0, i32 0, i32 0>, i32 [[SEL2]] |
| ; DEFAULT-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 |
| ; DEFAULT-NEXT: ret void |
| ; |
| ; RATIO2-LABEL: define amdgpu_kernel void @i32_24_elements_attrib( |
| ; RATIO2-SAME: ptr [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; RATIO2-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; RATIO2-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; RATIO2-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; RATIO2-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; RATIO2-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; RATIO2-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; RATIO2-NEXT: [[ALLOCA:%.*]] = freeze <24 x i32> poison |
| ; RATIO2-NEXT: [[TMP1:%.*]] = extractelement <24 x i32> <i32 42, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 43, i32 0, i32 0, i32 0>, i32 [[SEL2]] |
| ; RATIO2-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 |
| ; RATIO2-NEXT: ret void |
| ; |
| ; RATIO8-LABEL: define amdgpu_kernel void @i32_24_elements_attrib( |
| ; RATIO8-SAME: ptr [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; RATIO8-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; RATIO8-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; RATIO8-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; RATIO8-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; RATIO8-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; RATIO8-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; RATIO8-NEXT: [[ALLOCA:%.*]] = alloca [24 x i32], align 16, addrspace(5) |
| ; RATIO8-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 96, i1 false) |
| ; RATIO8-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 |
| ; RATIO8-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 20 |
| ; RATIO8-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 |
| ; RATIO8-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 |
| ; RATIO8-NEXT: [[GEP:%.*]] = getelementptr inbounds [24 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] |
| ; RATIO8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 |
| ; RATIO8-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 |
| ; RATIO8-NEXT: ret void |
| ; |
| %x = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %y = tail call i32 @llvm.amdgcn.workitem.id.y() |
| %c1 = icmp uge i32 %x, 3 |
| %c2 = icmp uge i32 %y, 3 |
| %sel1 = select i1 %c1, i32 1, i32 2 |
| %sel2 = select i1 %c2, i32 0, i32 %sel1 |
| %alloca = alloca [24 x i32], align 16, addrspace(5) |
| call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 96, i1 false) |
| %gep.0 = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 0 |
| %gep.1 = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 20 |
| store i32 42, ptr addrspace(5) %gep.0 |
| store i32 43, ptr addrspace(5) %gep.1 |
| %gep = getelementptr inbounds [24 x i32], ptr addrspace(5) %alloca, i32 0, i32 %sel2 |
| %load = load i32, ptr addrspace(5) %gep |
| store i32 %load, ptr %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @i32_16_elements(ptr %out) #0 { |
| ; DEFAULT-LABEL: define amdgpu_kernel void @i32_16_elements( |
| ; DEFAULT-SAME: ptr [[OUT:%.*]]) #[[ATTR0]] { |
| ; DEFAULT-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; DEFAULT-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; DEFAULT-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; DEFAULT-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; DEFAULT-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; DEFAULT-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; DEFAULT-NEXT: [[ALLOCA:%.*]] = freeze <16 x i32> poison |
| ; DEFAULT-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> <i32 42, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 43>, i32 [[SEL2]] |
| ; DEFAULT-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 |
| ; DEFAULT-NEXT: ret void |
| ; |
| ; RATIO2-LABEL: define amdgpu_kernel void @i32_16_elements( |
| ; RATIO2-SAME: ptr [[OUT:%.*]]) #[[ATTR0]] { |
| ; RATIO2-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; RATIO2-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; RATIO2-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; RATIO2-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; RATIO2-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; RATIO2-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; RATIO2-NEXT: [[ALLOCA:%.*]] = freeze <16 x i32> poison |
| ; RATIO2-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> <i32 42, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 43>, i32 [[SEL2]] |
| ; RATIO2-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 |
| ; RATIO2-NEXT: ret void |
| ; |
| ; RATIO8-LABEL: define amdgpu_kernel void @i32_16_elements( |
| ; RATIO8-SAME: ptr [[OUT:%.*]]) #[[ATTR0]] { |
| ; RATIO8-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; RATIO8-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; RATIO8-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; RATIO8-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; RATIO8-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; RATIO8-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; RATIO8-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 16, addrspace(5) |
| ; RATIO8-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 64, i1 false) |
| ; RATIO8-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 |
| ; RATIO8-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 15 |
| ; RATIO8-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 |
| ; RATIO8-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 |
| ; RATIO8-NEXT: [[GEP:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] |
| ; RATIO8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 |
| ; RATIO8-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 |
| ; RATIO8-NEXT: ret void |
| ; |
| %x = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %y = tail call i32 @llvm.amdgcn.workitem.id.y() |
| %c1 = icmp uge i32 %x, 3 |
| %c2 = icmp uge i32 %y, 3 |
| %sel1 = select i1 %c1, i32 1, i32 2 |
| %sel2 = select i1 %c2, i32 0, i32 %sel1 |
| %alloca = alloca [16 x i32], align 16, addrspace(5) |
| call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 64, i1 false) |
| %gep.0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 0 |
| %gep.1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 15 |
| store i32 42, ptr addrspace(5) %gep.0 |
| store i32 43, ptr addrspace(5) %gep.1 |
| %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %sel2 |
| %load = load i32, ptr addrspace(5) %gep |
| store i32 %load, ptr %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @i32_16_elements_attrib(ptr %out) #2 { |
| ; DEFAULT-LABEL: define amdgpu_kernel void @i32_16_elements_attrib( |
| ; DEFAULT-SAME: ptr [[OUT:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; DEFAULT-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; DEFAULT-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; DEFAULT-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; DEFAULT-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; DEFAULT-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; DEFAULT-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; DEFAULT-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 16, addrspace(5) |
| ; DEFAULT-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 64, i1 false) |
| ; DEFAULT-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 |
| ; DEFAULT-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 15 |
| ; DEFAULT-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 |
| ; DEFAULT-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 |
| ; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] |
| ; DEFAULT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 |
| ; DEFAULT-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 |
| ; DEFAULT-NEXT: ret void |
| ; |
| ; RATIO2-LABEL: define amdgpu_kernel void @i32_16_elements_attrib( |
| ; RATIO2-SAME: ptr [[OUT:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; RATIO2-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; RATIO2-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; RATIO2-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; RATIO2-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; RATIO2-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; RATIO2-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; RATIO2-NEXT: [[ALLOCA:%.*]] = freeze <16 x i32> poison |
| ; RATIO2-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> <i32 42, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 43>, i32 [[SEL2]] |
| ; RATIO2-NEXT: store i32 [[TMP1]], ptr [[OUT]], align 4 |
| ; RATIO2-NEXT: ret void |
| ; |
| ; RATIO8-LABEL: define amdgpu_kernel void @i32_16_elements_attrib( |
| ; RATIO8-SAME: ptr [[OUT:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; RATIO8-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() |
| ; RATIO8-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() |
| ; RATIO8-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3 |
| ; RATIO8-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3 |
| ; RATIO8-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2 |
| ; RATIO8-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]] |
| ; RATIO8-NEXT: [[ALLOCA:%.*]] = alloca [16 x i32], align 16, addrspace(5) |
| ; RATIO8-NEXT: call void @llvm.memset.p5.i32(ptr addrspace(5) [[ALLOCA]], i8 0, i32 64, i1 false) |
| ; RATIO8-NEXT: [[GEP_0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0 |
| ; RATIO8-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 15 |
| ; RATIO8-NEXT: store i32 42, ptr addrspace(5) [[GEP_0]], align 4 |
| ; RATIO8-NEXT: store i32 43, ptr addrspace(5) [[GEP_1]], align 4 |
| ; RATIO8-NEXT: [[GEP:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[SEL2]] |
| ; RATIO8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[GEP]], align 4 |
| ; RATIO8-NEXT: store i32 [[LOAD]], ptr [[OUT]], align 4 |
| ; RATIO8-NEXT: ret void |
| ; |
| %x = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %y = tail call i32 @llvm.amdgcn.workitem.id.y() |
| %c1 = icmp uge i32 %x, 3 |
| %c2 = icmp uge i32 %y, 3 |
| %sel1 = select i1 %c1, i32 1, i32 2 |
| %sel2 = select i1 %c2, i32 0, i32 %sel1 |
| %alloca = alloca [16 x i32], align 16, addrspace(5) |
| call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 64, i1 false) |
| %gep.0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 0 |
| %gep.1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 15 |
| store i32 42, ptr addrspace(5) %gep.0 |
| store i32 43, ptr addrspace(5) %gep.1 |
| %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %sel2 |
| %load = load i32, ptr addrspace(5) %gep |
| store i32 %load, ptr %out |
| ret void |
| } |
| |
| declare i32 @llvm.amdgcn.workitem.id.x() |
| declare i32 @llvm.amdgcn.workitem.id.y() |
| declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) |
| |
| attributes #0 = { nounwind "amdgpu-promote-alloca-to-vector-max-regs"="24" "amdgpu-waves-per-eu"="4,4" } |
| attributes #1 = { nounwind "amdgpu-promote-alloca-to-vector-max-regs"="24" "amdgpu-waves-per-eu"="4,4" "amdgpu-promote-alloca-to-vector-vgpr-ratio"="2" } |
| attributes #2 = { nounwind "amdgpu-promote-alloca-to-vector-max-regs"="24" "amdgpu-waves-per-eu"="4,4" "amdgpu-promote-alloca-to-vector-vgpr-ratio"="8" } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; BASE: {{.*}} |