llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll - third_party/github.com/llvm/llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s

 ; This tests that various ands, sexts, and zexts (and other operations)
 ; operating on vscale or the SVE count instructions can be eliminated
 ; (via demanded bits) due to their known limited range.

 ; On AArch64 vscale can be at most 16 (for a 2048-bit vector).
 ; The counting instructions (sans multiplier) have a value of at most 256
 ; (for a 2048-bit vector of i8s).

 define i32 @vscale_and_elimination() vscale_range(1,16) {
 ; CHECK-LABEL: vscale_and_elimination:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    lsr x8, x8, #4
 ; CHECK-NEXT:    and w9, w8, #0x1c
 ; CHECK-NEXT:    add w0, w8, w9
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %and_redundant = and i32 %vscale, 31
   %and_required = and i32 %vscale, 17179869180
   %result = add i32 %and_redundant, %and_required
   ret i32 %result
 }

 define i64 @cntb_and_elimination() {
 ; CHECK-LABEL: cntb_and_elimination:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntb x8
 ; CHECK-NEXT:    and x9, x8, #0x1fc
 ; CHECK-NEXT:    add x0, x8, x9
 ; CHECK-NEXT:    ret
   %cntb = call i64 @llvm.aarch64.sve.cntb(i32 31)
   %and_redundant = and i64 %cntb, 511
   %and_required = and i64 %cntb, 17179869180
   %result = add i64 %and_redundant, %and_required
   ret i64 %result
 }

 define i64 @cnth_and_elimination() {
 ; CHECK-LABEL: cnth_and_elimination:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cnth x8
 ; CHECK-NEXT:    and x9, x8, #0xfc
 ; CHECK-NEXT:    add x0, x8, x9
 ; CHECK-NEXT:    ret
   %cnth = call i64 @llvm.aarch64.sve.cnth(i32 31)
   %and_redundant = and i64 %cnth, 1023
   %and_required = and i64 %cnth, 17179869180
   %result = add i64 %and_redundant, %and_required
   ret i64 %result
 }

 define i64 @cntw_and_elimination() {
 ; CHECK-LABEL: cntw_and_elimination:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntw x8
 ; CHECK-NEXT:    and x9, x8, #0x7c
 ; CHECK-NEXT:    add x0, x8, x9
 ; CHECK-NEXT:    ret
   %cntw = call i64 @llvm.aarch64.sve.cntw(i32 31)
   %and_redundant = and i64 %cntw, 127
   %and_required = and i64 %cntw, 17179869180
   %result = add i64 %and_redundant, %and_required
   ret i64 %result
 }

 define i64 @cntd_and_elimination() {
 ; CHECK-LABEL: cntd_and_elimination:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntd x8
 ; CHECK-NEXT:    and x9, x8, #0x3c
 ; CHECK-NEXT:    add x0, x8, x9
 ; CHECK-NEXT:    ret
   %cntd = call i64 @llvm.aarch64.sve.cntd(i32 31)
   %and_redundant = and i64 %cntd, 63
   %and_required = and i64 %cntd, 17179869180
   %result = add i64 %and_redundant, %and_required
   ret i64 %result
 }

 define i64 @vscale_trunc_zext() vscale_range(1,16) {
 ; CHECK-LABEL: vscale_trunc_zext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    lsr x0, x8, #4
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %zext = zext i32 %vscale to i64
   ret i64 %zext
 }

 define i64 @vscale_trunc_sext() vscale_range(1,16) {
 ; CHECK-LABEL: vscale_trunc_sext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    lsr x0, x8, #4
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %sext = sext i32 %vscale to i64
   ret i64 %sext
 }

 define i64 @count_bytes_trunc_zext() {
 ; CHECK-LABEL: count_bytes_trunc_zext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntb x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
   %trunc = trunc i64 %cnt to i32
   %zext = zext i32 %trunc to i64
   ret i64 %zext
 }

 define i64 @count_halfs_trunc_zext() {
 ; CHECK-LABEL: count_halfs_trunc_zext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cnth x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
   %trunc = trunc i64 %cnt to i32
   %zext = zext i32 %trunc to i64
   ret i64 %zext
 }

 define i64 @count_words_trunc_zext() {
 ; CHECK-LABEL: count_words_trunc_zext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntw x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
   %trunc = trunc i64 %cnt to i32
   %zext = zext i32 %trunc to i64
   ret i64 %zext
 }

 define i64 @count_doubles_trunc_zext() {
 ; CHECK-LABEL: count_doubles_trunc_zext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntd x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
   %trunc = trunc i64 %cnt to i32
   %zext = zext i32 %trunc to i64
   ret i64 %zext
 }

 define i64 @count_bytes_trunc_sext() {
 ; CHECK-LABEL: count_bytes_trunc_sext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntb x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
   %trunc = trunc i64 %cnt to i32
   %sext = sext i32 %trunc to i64
   ret i64 %sext
 }

 define i64 @count_halfs_trunc_sext() {
 ; CHECK-LABEL: count_halfs_trunc_sext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cnth x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
   %trunc = trunc i64 %cnt to i32
   %sext = sext i32 %trunc to i64
   ret i64 %sext
 }

 define i64 @count_words_trunc_sext() {
 ; CHECK-LABEL: count_words_trunc_sext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntw x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
   %trunc = trunc i64 %cnt to i32
   %sext = sext i32 %trunc to i64
   ret i64 %sext
 }

 define i64 @count_doubles_trunc_sext() {
 ; CHECK-LABEL: count_doubles_trunc_sext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntd x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
   %trunc = trunc i64 %cnt to i32
   %sext = sext i32 %trunc to i64
   ret i64 %sext
 }

 define i32 @vscale_with_multiplier() vscale_range(1,16) {
 ; CHECK-LABEL: vscale_with_multiplier:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    mov w9, #5 // =0x5
 ; CHECK-NEXT:    lsr x8, x8, #4
 ; CHECK-NEXT:    mul x8, x8, x9
 ; CHECK-NEXT:    and w9, w8, #0x3f
 ; CHECK-NEXT:    add w0, w8, w9
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %mul = mul i32 %vscale, 5
   %and_redundant = and i32 %mul, 127
   %and_required = and i32 %mul, 63
   %result = add i32 %and_redundant, %and_required
   ret i32 %result
 }

 define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
 ; CHECK-LABEL: vscale_with_negative_multiplier:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    mov x9, #-5 // =0xfffffffffffffffb
 ; CHECK-NEXT:    lsr x8, x8, #4
 ; CHECK-NEXT:    mul x8, x8, x9
 ; CHECK-NEXT:    and w9, w8, #0xffffffc0
 ; CHECK-NEXT:    add w0, w8, w9
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %mul = mul i32 %vscale, -5
   %or_redundant = or i32 %mul, 4294967168
   %or_required = and i32 %mul, 4294967232
   %result = add i32 %or_redundant, %or_required
   ret i32 %result
 }

 define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) {
 ; CHECK-LABEL: pow2_vscale_with_negative_multiplier:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cntd x8
 ; CHECK-NEXT:    neg x9, x8
 ; CHECK-NEXT:    orr w9, w9, #0xfffffff0
 ; CHECK-NEXT:    sub w0, w9, w8
 ; CHECK-NEXT:    ret
   %vscale = call i32 @llvm.vscale.i32()
   %mul = mul i32 %vscale, -2
   %or_redundant = or i32 %mul, 4294967264
   %or_required = or i32 %mul, 4294967280
   %result = add i32 %or_redundant, %or_required
   ret i32 %result
 }

 declare i32 @llvm.vscale.i32()
 declare i64 @llvm.aarch64.sve.cntb(i32 %pattern)
 declare i64 @llvm.aarch64.sve.cnth(i32 %pattern)
 declare i64 @llvm.aarch64.sve.cntw(i32 %pattern)
 declare i64 @llvm.aarch64.sve.cntd(i32 %pattern)
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=aarch64 -mattr=+sve < %s \| FileCheck %s

	; This tests that various ands, sexts, and zexts (and other operations)
	; operating on vscale or the SVE count instructions can be eliminated
	; (via demanded bits) due to their known limited range.

	; On AArch64 vscale can be at most 16 (for a 2048-bit vector).
	; The counting instructions (sans multiplier) have a value of at most 256
	; (for a 2048-bit vector of i8s).

	define i32 @vscale_and_elimination() vscale_range(1,16) {
	; CHECK-LABEL: vscale_and_elimination:
	; CHECK: // %bb.0:
	; CHECK-NEXT: rdvl x8, #1
	; CHECK-NEXT: lsr x8, x8, #4
	; CHECK-NEXT: and w9, w8, #0x1c
	; CHECK-NEXT: add w0, w8, w9
	; CHECK-NEXT: ret
	%vscale = call i32 @llvm.vscale.i32()
	%and_redundant = and i32 %vscale, 31
	%and_required = and i32 %vscale, 17179869180
	%result = add i32 %and_redundant, %and_required
	ret i32 %result
	}

	define i64 @cntb_and_elimination() {
	; CHECK-LABEL: cntb_and_elimination:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cntb x8
	; CHECK-NEXT: and x9, x8, #0x1fc
	; CHECK-NEXT: add x0, x8, x9
	; CHECK-NEXT: ret
	%cntb = call i64 @llvm.aarch64.sve.cntb(i32 31)
	%and_redundant = and i64 %cntb, 511
	%and_required = and i64 %cntb, 17179869180
	%result = add i64 %and_redundant, %and_required
	ret i64 %result
	}

	define i64 @cnth_and_elimination() {
	; CHECK-LABEL: cnth_and_elimination:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cnth x8
	; CHECK-NEXT: and x9, x8, #0xfc
	; CHECK-NEXT: add x0, x8, x9
	; CHECK-NEXT: ret
	%cnth = call i64 @llvm.aarch64.sve.cnth(i32 31)
	%and_redundant = and i64 %cnth, 1023
	%and_required = and i64 %cnth, 17179869180
	%result = add i64 %and_redundant, %and_required
	ret i64 %result
	}

	define i64 @cntw_and_elimination() {
	; CHECK-LABEL: cntw_and_elimination:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cntw x8
	; CHECK-NEXT: and x9, x8, #0x7c
	; CHECK-NEXT: add x0, x8, x9
	; CHECK-NEXT: ret
	%cntw = call i64 @llvm.aarch64.sve.cntw(i32 31)
	%and_redundant = and i64 %cntw, 127
	%and_required = and i64 %cntw, 17179869180
	%result = add i64 %and_redundant, %and_required
	ret i64 %result
	}

	define i64 @cntd_and_elimination() {
	; CHECK-LABEL: cntd_and_elimination:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cntd x8
	; CHECK-NEXT: and x9, x8, #0x3c
	; CHECK-NEXT: add x0, x8, x9
	; CHECK-NEXT: ret
	%cntd = call i64 @llvm.aarch64.sve.cntd(i32 31)
	%and_redundant = and i64 %cntd, 63
	%and_required = and i64 %cntd, 17179869180
	%result = add i64 %and_redundant, %and_required
	ret i64 %result
	}

	define i64 @vscale_trunc_zext() vscale_range(1,16) {
	; CHECK-LABEL: vscale_trunc_zext:
	; CHECK: // %bb.0:
	; CHECK-NEXT: rdvl x8, #1
	; CHECK-NEXT: lsr x0, x8, #4
	; CHECK-NEXT: ret
	%vscale = call i32 @llvm.vscale.i32()
	%zext = zext i32 %vscale to i64
	ret i64 %zext
	}

	define i64 @vscale_trunc_sext() vscale_range(1,16) {
	; CHECK-LABEL: vscale_trunc_sext:
	; CHECK: // %bb.0:
	; CHECK-NEXT: rdvl x8, #1
	; CHECK-NEXT: lsr x0, x8, #4
	; CHECK-NEXT: ret
	%vscale = call i32 @llvm.vscale.i32()
	%sext = sext i32 %vscale to i64
	ret i64 %sext
	}

	define i64 @count_bytes_trunc_zext() {
	; CHECK-LABEL: count_bytes_trunc_zext:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cntb x0
	; CHECK-NEXT: ret
	%cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
	%trunc = trunc i64 %cnt to i32
	%zext = zext i32 %trunc to i64
	ret i64 %zext
	}

	define i64 @count_halfs_trunc_zext() {
	; CHECK-LABEL: count_halfs_trunc_zext:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cnth x0
	; CHECK-NEXT: ret
	%cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
	%trunc = trunc i64 %cnt to i32
	%zext = zext i32 %trunc to i64
	ret i64 %zext
	}

	define i64 @count_words_trunc_zext() {
	; CHECK-LABEL: count_words_trunc_zext:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cntw x0
	; CHECK-NEXT: ret
	%cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
	%trunc = trunc i64 %cnt to i32
	%zext = zext i32 %trunc to i64
	ret i64 %zext
	}

	define i64 @count_doubles_trunc_zext() {
	; CHECK-LABEL: count_doubles_trunc_zext:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cntd x0
	; CHECK-NEXT: ret
	%cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
	%trunc = trunc i64 %cnt to i32
	%zext = zext i32 %trunc to i64
	ret i64 %zext
	}

	define i64 @count_bytes_trunc_sext() {
	; CHECK-LABEL: count_bytes_trunc_sext:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cntb x0
	; CHECK-NEXT: ret
	%cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
	%trunc = trunc i64 %cnt to i32
	%sext = sext i32 %trunc to i64
	ret i64 %sext
	}

	define i64 @count_halfs_trunc_sext() {
	; CHECK-LABEL: count_halfs_trunc_sext:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cnth x0
	; CHECK-NEXT: ret
	%cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
	%trunc = trunc i64 %cnt to i32
	%sext = sext i32 %trunc to i64
	ret i64 %sext
	}

	define i64 @count_words_trunc_sext() {
	; CHECK-LABEL: count_words_trunc_sext:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cntw x0
	; CHECK-NEXT: ret
	%cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
	%trunc = trunc i64 %cnt to i32
	%sext = sext i32 %trunc to i64
	ret i64 %sext
	}

	define i64 @count_doubles_trunc_sext() {
	; CHECK-LABEL: count_doubles_trunc_sext:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cntd x0
	; CHECK-NEXT: ret
	%cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
	%trunc = trunc i64 %cnt to i32
	%sext = sext i32 %trunc to i64
	ret i64 %sext
	}

	define i32 @vscale_with_multiplier() vscale_range(1,16) {
	; CHECK-LABEL: vscale_with_multiplier:
	; CHECK: // %bb.0:
	; CHECK-NEXT: rdvl x8, #1
	; CHECK-NEXT: mov w9, #5 // =0x5
	; CHECK-NEXT: lsr x8, x8, #4
	; CHECK-NEXT: mul x8, x8, x9
	; CHECK-NEXT: and w9, w8, #0x3f
	; CHECK-NEXT: add w0, w8, w9
	; CHECK-NEXT: ret
	%vscale = call i32 @llvm.vscale.i32()
	%mul = mul i32 %vscale, 5
	%and_redundant = and i32 %mul, 127
	%and_required = and i32 %mul, 63
	%result = add i32 %and_redundant, %and_required
	ret i32 %result
	}

	define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
	; CHECK-LABEL: vscale_with_negative_multiplier:
	; CHECK: // %bb.0:
	; CHECK-NEXT: rdvl x8, #1
	; CHECK-NEXT: mov x9, #-5 // =0xfffffffffffffffb
	; CHECK-NEXT: lsr x8, x8, #4
	; CHECK-NEXT: mul x8, x8, x9
	; CHECK-NEXT: and w9, w8, #0xffffffc0
	; CHECK-NEXT: add w0, w8, w9
	; CHECK-NEXT: ret
	%vscale = call i32 @llvm.vscale.i32()
	%mul = mul i32 %vscale, -5
	%or_redundant = or i32 %mul, 4294967168
	%or_required = and i32 %mul, 4294967232
	%result = add i32 %or_redundant, %or_required
	ret i32 %result
	}

	define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) {
	; CHECK-LABEL: pow2_vscale_with_negative_multiplier:
	; CHECK: // %bb.0:
	; CHECK-NEXT: cntd x8
	; CHECK-NEXT: neg x9, x8
	; CHECK-NEXT: orr w9, w9, #0xfffffff0
	; CHECK-NEXT: sub w0, w9, w8
	; CHECK-NEXT: ret
	%vscale = call i32 @llvm.vscale.i32()
	%mul = mul i32 %vscale, -2
	%or_redundant = or i32 %mul, 4294967264
	%or_required = or i32 %mul, 4294967280
	%result = add i32 %or_redundant, %or_required
	ret i32 %result
	}

	declare i32 @llvm.vscale.i32()
	declare i64 @llvm.aarch64.sve.cntb(i32 %pattern)
	declare i64 @llvm.aarch64.sve.cnth(i32 %pattern)
	declare i64 @llvm.aarch64.sve.cntw(i32 %pattern)
	declare i64 @llvm.aarch64.sve.cntd(i32 %pattern)