| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: opt -S < %s -passes=loop-vectorize -force-vector-width=4 | FileCheck %s |
| |
| target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" |
| target triple = "aarch64--linux-gnu" |
| |
| define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) { |
| ; CHECK-LABEL: define void @test0 |
| ; CHECK-SAME: (ptr noalias [[M3:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2 |
| ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[WIDE_LOAD]], <i16 10, i16 10, i16 10, i16 10> |
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 8 |
| ; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8 |
| ; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 |
| ; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 |
| ; CHECK-NEXT: [[TMP15:%.*]] = ashr exact i64 [[TMP11]], 32 |
| ; CHECK-NEXT: [[TMP16:%.*]] = ashr exact i64 [[TMP12]], 32 |
| ; CHECK-NEXT: [[TMP17:%.*]] = ashr exact i64 [[TMP13]], 32 |
| ; CHECK-NEXT: [[TMP18:%.*]] = ashr exact i64 [[TMP14]], 32 |
| ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP15]] |
| ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP16]] |
| ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP17]] |
| ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP18]] |
| ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0 |
| ; CHECK-NEXT: store i16 [[TMP23]], ptr [[TMP19]], align 2 |
| ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1 |
| ; CHECK-NEXT: store i16 [[TMP24]], ptr [[TMP20]], align 2 |
| ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2 |
| ; CHECK-NEXT: store i16 [[TMP25]], ptr [[TMP21]], align 2 |
| ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3 |
| ; CHECK-NEXT: store i16 [[TMP26]], ptr [[TMP22]], align 2 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 |
| ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: br i1 true, label [[FOR_INC1286_LOOPEXIT:%.*]], label [[SCALAR_PH]] |
| ; CHECK: scalar.ph: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: br label [[IF_THEN1165_US:%.*]] |
| ; CHECK: if.then1165.us: |
| ; CHECK-NEXT: [[INDVARS_IV1783:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT1784:%.*]], [[IF_THEN1165_US]] ] |
| ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV1783]] |
| ; CHECK-NEXT: [[L_A:%.*]] = load i16, ptr [[GEP_A]], align 2 |
| ; CHECK-NEXT: [[CONV1177_US:%.*]] = zext i16 [[L_A]] to i32 |
| ; CHECK-NEXT: [[ADD1178_US:%.*]] = add nsw i32 [[CONV1177_US]], 10 |
| ; CHECK-NEXT: [[CONV1179_US:%.*]] = trunc i32 [[ADD1178_US]] to i16 |
| ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV1783]] |
| ; CHECK-NEXT: [[L_B:%.*]] = load i64, ptr [[GEP_B]], align 8 |
| ; CHECK-NEXT: [[IDXPROM1181_US:%.*]] = ashr exact i64 [[L_B]], 32 |
| ; CHECK-NEXT: [[ARRAYIDX1185_US:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[IDXPROM1181_US]] |
| ; CHECK-NEXT: store i16 [[CONV1179_US]], ptr [[ARRAYIDX1185_US]], align 2 |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT1784]] = add nuw nsw i64 [[INDVARS_IV1783]], 1 |
| ; CHECK-NEXT: [[EXITCOND1785:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT1784]], 16 |
| ; CHECK-NEXT: br i1 [[EXITCOND1785]], label [[FOR_INC1286_LOOPEXIT]], label [[IF_THEN1165_US]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; CHECK: for.inc1286.loopexit: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %if.then1165.us |
| |
| if.then1165.us: ; preds = %if.then1165.us, %entry |
| %indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ] |
| %gep.A = getelementptr inbounds i16, ptr %A, i64 %indvars.iv1783 |
| %l.A = load i16, ptr %gep.A |
| %conv1177.us = zext i16 %l.A to i32 |
| %add1178.us = add nsw i32 %conv1177.us, 10 |
| %conv1179.us = trunc i32 %add1178.us to i16 |
| %gep.B = getelementptr inbounds i64, ptr %B, i64 %indvars.iv1783 |
| %l.B = load i64, ptr %gep.B |
| %idxprom1181.us = ashr exact i64 %l.B, 32 |
| %arrayidx1185.us = getelementptr inbounds i16, ptr %M3, i64 %idxprom1181.us |
| store i16 %conv1179.us, ptr %arrayidx1185.us, align 2 |
| %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1 |
| %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16 |
| br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us |
| |
| for.inc1286.loopexit: ; preds = %if.then1165.us |
| ret void |
| } |
| |
| define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias %C) { |
| ; CHECK-LABEL: define void @test1 |
| ; CHECK-SAME: (ptr noalias [[M3:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 |
| ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[C]], align 4 |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 2 |
| ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT]] to <4 x i16> |
| ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[WIDE_LOAD]], [[TMP7]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 |
| ; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 |
| ; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 |
| ; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 |
| ; CHECK-NEXT: [[TMP17:%.*]] = ashr exact i64 [[TMP13]], 32 |
| ; CHECK-NEXT: [[TMP18:%.*]] = ashr exact i64 [[TMP14]], 32 |
| ; CHECK-NEXT: [[TMP19:%.*]] = ashr exact i64 [[TMP15]], 32 |
| ; CHECK-NEXT: [[TMP20:%.*]] = ashr exact i64 [[TMP16]], 32 |
| ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP17]] |
| ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP18]] |
| ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP19]] |
| ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP20]] |
| ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i16> [[TMP8]], i32 0 |
| ; CHECK-NEXT: store i16 [[TMP25]], ptr [[TMP21]], align 2 |
| ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i16> [[TMP8]], i32 1 |
| ; CHECK-NEXT: store i16 [[TMP26]], ptr [[TMP22]], align 2 |
| ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[TMP8]], i32 2 |
| ; CHECK-NEXT: store i16 [[TMP27]], ptr [[TMP23]], align 2 |
| ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i16> [[TMP8]], i32 3 |
| ; CHECK-NEXT: store i16 [[TMP28]], ptr [[TMP24]], align 2 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 |
| ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: br i1 true, label [[FOR_INC1286_LOOPEXIT:%.*]], label [[SCALAR_PH]] |
| ; CHECK: scalar.ph: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: br label [[IF_THEN1165_US:%.*]] |
| ; CHECK: if.then1165.us: |
| ; CHECK-NEXT: [[INDVARS_IV1783:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT1784:%.*]], [[IF_THEN1165_US]] ] |
| ; CHECK-NEXT: [[FPTR:%.*]] = load i32, ptr [[C]], align 4 |
| ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV1783]] |
| ; CHECK-NEXT: [[L_A:%.*]] = load i16, ptr [[GEP_A]], align 2 |
| ; CHECK-NEXT: [[CONV1177_US:%.*]] = zext i16 [[L_A]] to i32 |
| ; CHECK-NEXT: [[ADD1178_US:%.*]] = add nsw i32 [[CONV1177_US]], [[FPTR]] |
| ; CHECK-NEXT: [[CONV1179_US:%.*]] = trunc i32 [[ADD1178_US]] to i16 |
| ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV1783]] |
| ; CHECK-NEXT: [[L_B:%.*]] = load i64, ptr [[GEP_B]], align 8 |
| ; CHECK-NEXT: [[IDXPROM1181_US:%.*]] = ashr exact i64 [[L_B]], 32 |
| ; CHECK-NEXT: [[ARRAYIDX1185_US:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[IDXPROM1181_US]] |
| ; CHECK-NEXT: store i16 [[CONV1179_US]], ptr [[ARRAYIDX1185_US]], align 2 |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT1784]] = add nuw nsw i64 [[INDVARS_IV1783]], 1 |
| ; CHECK-NEXT: [[EXITCOND1785:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT1784]], 16 |
| ; CHECK-NEXT: br i1 [[EXITCOND1785]], label [[FOR_INC1286_LOOPEXIT]], label [[IF_THEN1165_US]], !llvm.loop [[LOOP5:![0-9]+]] |
| ; CHECK: for.inc1286.loopexit: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %if.then1165.us |
| |
| if.then1165.us: ; preds = %if.then1165.us, %entry |
| %indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ] |
| %fptr = load i32, ptr %C, align 4 |
| %gep.A = getelementptr inbounds i16, ptr %A, i64 %indvars.iv1783 |
| %l.A = load i16, ptr %gep.A |
| %conv1177.us = zext i16 %l.A to i32 |
| %add1178.us = add nsw i32 %conv1177.us, %fptr |
| %conv1179.us = trunc i32 %add1178.us to i16 |
| %gep.B = getelementptr inbounds i64, ptr %B, i64 %indvars.iv1783 |
| %l.B = load i64, ptr %gep.B |
| %idxprom1181.us = ashr exact i64 %l.B, 32 |
| %arrayidx1185.us = getelementptr inbounds i16, ptr %M3, i64 %idxprom1181.us |
| store i16 %conv1179.us, ptr %arrayidx1185.us, align 2 |
| %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1 |
| %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16 |
| br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us |
| |
| for.inc1286.loopexit: ; preds = %if.then1165.us |
| ret void |
| } |