llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll - third_party/github.com/llvm/llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes="default<O3>" -S            < %s | FileCheck %s --check-prefix=SSE
 ; RUN: opt -passes="default<O3>" -S -mattr=avx < %s | FileCheck %s --check-prefix=AVX

 ; This test is based on https://github.com/llvm/llvm-project/issues/50778
 ; It's the unoptimized IR passed through -passes=mem2reg to remove obvious noise.
 ; This should show cooperation between instcombine, unrolling, inlining,
 ; and SLP to create the target-optimal vector math+logic ops.

 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

 define noundef i64 @foo(i64 noundef %0) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 44
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
 ; SSE-LABEL: @foo(
 ; SSE-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 44
 ; SSE-NEXT:    [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
 ; SSE-NEXT:    ret i64 [[TMP3]]
 ;
 ; AVX-LABEL: @foo(
 ; AVX-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 44
 ; AVX-NEXT:    [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
 ; AVX-NEXT:    ret i64 [[TMP3]]
 ;
   %2 = sub i64 1048575, %0
   %3 = shl i64 %2, 44
   ret i64 %3
 }

 define void @bar(ptr noundef %0) {
 ; SSE-LABEL: @bar(
 ; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[TMP0:%.*]], align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 44, i64 44>
 ; SSE-NEXT:    [[TMP4:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP3]]
 ; SSE-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP0]], align 8
 ; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
 ; SSE-NEXT:    [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
 ; SSE-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], <i64 44, i64 44>
 ; SSE-NEXT:    [[TMP8:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP7]]
 ; SSE-NEXT:    store <2 x i64> [[TMP8]], ptr [[TMP5]], align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
 ; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
 ; SSE-NEXT:    [[TMP11:%.*]] = shl <2 x i64> [[TMP10]], <i64 44, i64 44>
 ; SSE-NEXT:    [[TMP12:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP11]]
 ; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr [[TMP9]], align 8
 ; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 48
 ; SSE-NEXT:    [[TMP14:%.*]] = load <2 x i64>, ptr [[TMP13]], align 8
 ; SSE-NEXT:    [[TMP15:%.*]] = shl <2 x i64> [[TMP14]], <i64 44, i64 44>
 ; SSE-NEXT:    [[TMP16:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP15]]
 ; SSE-NEXT:    store <2 x i64> [[TMP16]], ptr [[TMP13]], align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bar(
 ; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = shl <4 x i64> [[TMP2]], <i64 44, i64 44, i64 44, i64 44>
 ; AVX-NEXT:    [[TMP4:%.*]] = sub nuw nsw <4 x i64> <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>, [[TMP3]]
 ; AVX-NEXT:    store <4 x i64> [[TMP4]], ptr [[TMP0]], align 8
 ; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
 ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
 ; AVX-NEXT:    [[TMP7:%.*]] = shl <4 x i64> [[TMP6]], <i64 44, i64 44, i64 44, i64 44>
 ; AVX-NEXT:    [[TMP8:%.*]] = sub nuw nsw <4 x i64> <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>, [[TMP7]]
 ; AVX-NEXT:    store <4 x i64> [[TMP8]], ptr [[TMP5]], align 8
 ; AVX-NEXT:    ret void
 ;
   br label %2

 2:                                                ; preds = %12, %1
   %.0 = phi i32 [ 0, %1 ], [ %13, %12 ]
   %3 = icmp slt i32 %.0, 8
   br i1 %3, label %5, label %4

 4:                                                ; preds = %2
   br label %14

 5:                                                ; preds = %2
   %6 = sext i32 %.0 to i64
   %7 = getelementptr inbounds i64, ptr %0, i64 %6
   %8 = load i64, ptr %7, align 8
   %9 = call noundef i64 @foo(i64 noundef %8)
   %10 = sext i32 %.0 to i64
   %11 = getelementptr inbounds i64, ptr %0, i64 %10
   store i64 %9, ptr %11, align 8
   br label %12

 12:                                               ; preds = %5
   %13 = add nsw i32 %.0, 1
   br label %2

 14:                                               ; preds = %4
   ret void
 }
	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -passes="default<O3>" -S < %s \| FileCheck %s --check-prefix=SSE
	; RUN: opt -passes="default<O3>" -S -mattr=avx < %s \| FileCheck %s --check-prefix=AVX

	; This test is based on https://github.com/llvm/llvm-project/issues/50778
	; It's the unoptimized IR passed through -passes=mem2reg to remove obvious noise.
	; This should show cooperation between instcombine, unrolling, inlining,
	; and SLP to create the target-optimal vector math+logic ops.

	target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-linux-gnu"

	define noundef i64 @foo(i64 noundef %0) {
	; CHECK-LABEL: @foo(
	; CHECK-NEXT: [[TMP2:%.]] = shl i64 [[TMP0:%.]], 44
	; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
	; CHECK-NEXT: ret i64 [[TMP3]]
	;
	; SSE-LABEL: @foo(
	; SSE-NEXT: [[TMP2:%.]] = shl i64 [[TMP0:%.]], 44
	; SSE-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
	; SSE-NEXT: ret i64 [[TMP3]]
	;
	; AVX-LABEL: @foo(
	; AVX-NEXT: [[TMP2:%.]] = shl i64 [[TMP0:%.]], 44
	; AVX-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
	; AVX-NEXT: ret i64 [[TMP3]]
	;
	%2 = sub i64 1048575, %0
	%3 = shl i64 %2, 44
	ret i64 %3
	}

	define void @bar(ptr noundef %0) {
	; SSE-LABEL: @bar(
	; SSE-NEXT: [[TMP2:%.]] = load <2 x i64>, ptr [[TMP0:%.]], align 8
	; SSE-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 44, i64 44>
	; SSE-NEXT: [[TMP4:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP3]]
	; SSE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP0]], align 8
	; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
	; SSE-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
	; SSE-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], <i64 44, i64 44>
	; SSE-NEXT: [[TMP8:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP7]]
	; SSE-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP5]], align 8
	; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
	; SSE-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
	; SSE-NEXT: [[TMP11:%.*]] = shl <2 x i64> [[TMP10]], <i64 44, i64 44>
	; SSE-NEXT: [[TMP12:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP11]]
	; SSE-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP9]], align 8
	; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 48
	; SSE-NEXT: [[TMP14:%.*]] = load <2 x i64>, ptr [[TMP13]], align 8
	; SSE-NEXT: [[TMP15:%.*]] = shl <2 x i64> [[TMP14]], <i64 44, i64 44>
	; SSE-NEXT: [[TMP16:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP15]]
	; SSE-NEXT: store <2 x i64> [[TMP16]], ptr [[TMP13]], align 8
	; SSE-NEXT: ret void
	;
	; AVX-LABEL: @bar(
	; AVX-NEXT: [[TMP2:%.]] = load <4 x i64>, ptr [[TMP0:%.]], align 8
	; AVX-NEXT: [[TMP3:%.*]] = shl <4 x i64> [[TMP2]], <i64 44, i64 44, i64 44, i64 44>
	; AVX-NEXT: [[TMP4:%.*]] = sub nuw nsw <4 x i64> <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>, [[TMP3]]
	; AVX-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP0]], align 8
	; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
	; AVX-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
	; AVX-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP6]], <i64 44, i64 44, i64 44, i64 44>
	; AVX-NEXT: [[TMP8:%.*]] = sub nuw nsw <4 x i64> <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>, [[TMP7]]
	; AVX-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP5]], align 8
	; AVX-NEXT: ret void
	;
	br label %2

	2: ; preds = %12, %1
	%.0 = phi i32 [ 0, %1 ], [ %13, %12 ]
	%3 = icmp slt i32 %.0, 8
	br i1 %3, label %5, label %4

	4: ; preds = %2
	br label %14

	5: ; preds = %2
	%6 = sext i32 %.0 to i64
	%7 = getelementptr inbounds i64, ptr %0, i64 %6
	%8 = load i64, ptr %7, align 8
	%9 = call noundef i64 @foo(i64 noundef %8)
	%10 = sext i32 %.0 to i64
	%11 = getelementptr inbounds i64, ptr %0, i64 %10
	store i64 %9, ptr %11, align 8
	br label %12

	12: ; preds = %5
	%13 = add nsw i32 %.0, 1
	br label %2

	14: ; preds = %4
	ret void
	}