| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s |
| |
| ; This tests that various ands, sexts, and zexts (and other operations) |
| ; operating on vscale or the SVE count instructions can be eliminated |
| ; (via demanded bits) due to their known limited range. |
| |
| ; On AArch64 vscale can be at most 16 (for a 2048-bit vector). |
| ; The counting instructions (sans multiplier) have a value of at most 256 |
| ; (for a 2048-bit vector of i8s). |
| |
| define i32 @vscale_and_elimination() vscale_range(1,16) { |
| ; CHECK-LABEL: vscale_and_elimination: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: lsr x8, x8, #4 |
| ; CHECK-NEXT: and w9, w8, #0x1c |
| ; CHECK-NEXT: add w0, w8, w9 |
| ; CHECK-NEXT: ret |
| %vscale = call i32 @llvm.vscale.i32() |
| %and_redundant = and i32 %vscale, 31 |
| %and_required = and i32 %vscale, 17179869180 |
| %result = add i32 %and_redundant, %and_required |
| ret i32 %result |
| } |
| |
| define i64 @cntb_and_elimination() { |
| ; CHECK-LABEL: cntb_and_elimination: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntb x8 |
| ; CHECK-NEXT: and x9, x8, #0x1fc |
| ; CHECK-NEXT: add x0, x8, x9 |
| ; CHECK-NEXT: ret |
| %cntb = call i64 @llvm.aarch64.sve.cntb(i32 31) |
| %and_redundant = and i64 %cntb, 511 |
| %and_required = and i64 %cntb, 17179869180 |
| %result = add i64 %and_redundant, %and_required |
| ret i64 %result |
| } |
| |
| define i64 @cnth_and_elimination() { |
| ; CHECK-LABEL: cnth_and_elimination: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cnth x8 |
| ; CHECK-NEXT: and x9, x8, #0xfc |
| ; CHECK-NEXT: add x0, x8, x9 |
| ; CHECK-NEXT: ret |
| %cnth = call i64 @llvm.aarch64.sve.cnth(i32 31) |
| %and_redundant = and i64 %cnth, 1023 |
| %and_required = and i64 %cnth, 17179869180 |
| %result = add i64 %and_redundant, %and_required |
| ret i64 %result |
| } |
| |
| define i64 @cntw_and_elimination() { |
| ; CHECK-LABEL: cntw_and_elimination: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntw x8 |
| ; CHECK-NEXT: and x9, x8, #0x7c |
| ; CHECK-NEXT: add x0, x8, x9 |
| ; CHECK-NEXT: ret |
| %cntw = call i64 @llvm.aarch64.sve.cntw(i32 31) |
| %and_redundant = and i64 %cntw, 127 |
| %and_required = and i64 %cntw, 17179869180 |
| %result = add i64 %and_redundant, %and_required |
| ret i64 %result |
| } |
| |
| define i64 @cntd_and_elimination() { |
| ; CHECK-LABEL: cntd_and_elimination: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntd x8 |
| ; CHECK-NEXT: and x9, x8, #0x3c |
| ; CHECK-NEXT: add x0, x8, x9 |
| ; CHECK-NEXT: ret |
| %cntd = call i64 @llvm.aarch64.sve.cntd(i32 31) |
| %and_redundant = and i64 %cntd, 63 |
| %and_required = and i64 %cntd, 17179869180 |
| %result = add i64 %and_redundant, %and_required |
| ret i64 %result |
| } |
| |
| define i64 @vscale_trunc_zext() vscale_range(1,16) { |
| ; CHECK-LABEL: vscale_trunc_zext: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: lsr x0, x8, #4 |
| ; CHECK-NEXT: ret |
| %vscale = call i32 @llvm.vscale.i32() |
| %zext = zext i32 %vscale to i64 |
| ret i64 %zext |
| } |
| |
| define i64 @vscale_trunc_sext() vscale_range(1,16) { |
| ; CHECK-LABEL: vscale_trunc_sext: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: lsr x0, x8, #4 |
| ; CHECK-NEXT: ret |
| %vscale = call i32 @llvm.vscale.i32() |
| %sext = sext i32 %vscale to i64 |
| ret i64 %sext |
| } |
| |
| define i64 @count_bytes_trunc_zext() { |
| ; CHECK-LABEL: count_bytes_trunc_zext: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntb x0 |
| ; CHECK-NEXT: ret |
| %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31) |
| %trunc = trunc i64 %cnt to i32 |
| %zext = zext i32 %trunc to i64 |
| ret i64 %zext |
| } |
| |
| define i64 @count_halfs_trunc_zext() { |
| ; CHECK-LABEL: count_halfs_trunc_zext: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cnth x0 |
| ; CHECK-NEXT: ret |
| %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31) |
| %trunc = trunc i64 %cnt to i32 |
| %zext = zext i32 %trunc to i64 |
| ret i64 %zext |
| } |
| |
| define i64 @count_words_trunc_zext() { |
| ; CHECK-LABEL: count_words_trunc_zext: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntw x0 |
| ; CHECK-NEXT: ret |
| %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31) |
| %trunc = trunc i64 %cnt to i32 |
| %zext = zext i32 %trunc to i64 |
| ret i64 %zext |
| } |
| |
| define i64 @count_doubles_trunc_zext() { |
| ; CHECK-LABEL: count_doubles_trunc_zext: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntd x0 |
| ; CHECK-NEXT: ret |
| %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31) |
| %trunc = trunc i64 %cnt to i32 |
| %zext = zext i32 %trunc to i64 |
| ret i64 %zext |
| } |
| |
| define i64 @count_bytes_trunc_sext() { |
| ; CHECK-LABEL: count_bytes_trunc_sext: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntb x0 |
| ; CHECK-NEXT: ret |
| %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31) |
| %trunc = trunc i64 %cnt to i32 |
| %sext = sext i32 %trunc to i64 |
| ret i64 %sext |
| } |
| |
| define i64 @count_halfs_trunc_sext() { |
| ; CHECK-LABEL: count_halfs_trunc_sext: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cnth x0 |
| ; CHECK-NEXT: ret |
| %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31) |
| %trunc = trunc i64 %cnt to i32 |
| %sext = sext i32 %trunc to i64 |
| ret i64 %sext |
| } |
| |
| define i64 @count_words_trunc_sext() { |
| ; CHECK-LABEL: count_words_trunc_sext: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntw x0 |
| ; CHECK-NEXT: ret |
| %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31) |
| %trunc = trunc i64 %cnt to i32 |
| %sext = sext i32 %trunc to i64 |
| ret i64 %sext |
| } |
| |
| define i64 @count_doubles_trunc_sext() { |
| ; CHECK-LABEL: count_doubles_trunc_sext: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntd x0 |
| ; CHECK-NEXT: ret |
| %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31) |
| %trunc = trunc i64 %cnt to i32 |
| %sext = sext i32 %trunc to i64 |
| ret i64 %sext |
| } |
| |
| define i32 @vscale_with_multiplier() vscale_range(1,16) { |
| ; CHECK-LABEL: vscale_with_multiplier: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: lsr x8, x8, #4 |
| ; CHECK-NEXT: mul x8, x8, x9 |
| ; CHECK-NEXT: and w9, w8, #0x3f |
| ; CHECK-NEXT: add w0, w8, w9 |
| ; CHECK-NEXT: ret |
| %vscale = call i32 @llvm.vscale.i32() |
| %mul = mul i32 %vscale, 5 |
| %and_redundant = and i32 %mul, 127 |
| %and_required = and i32 %mul, 63 |
| %result = add i32 %and_redundant, %and_required |
| ret i32 %result |
| } |
| |
| define i32 @vscale_with_negative_multiplier() vscale_range(1,16) { |
| ; CHECK-LABEL: vscale_with_negative_multiplier: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: rdvl x8, #1 |
| ; CHECK-NEXT: mov x9, #-5 // =0xfffffffffffffffb |
| ; CHECK-NEXT: lsr x8, x8, #4 |
| ; CHECK-NEXT: mul x8, x8, x9 |
| ; CHECK-NEXT: and w9, w8, #0xffffffc0 |
| ; CHECK-NEXT: add w0, w8, w9 |
| ; CHECK-NEXT: ret |
| %vscale = call i32 @llvm.vscale.i32() |
| %mul = mul i32 %vscale, -5 |
| %or_redundant = or i32 %mul, 4294967168 |
| %or_required = and i32 %mul, 4294967232 |
| %result = add i32 %or_redundant, %or_required |
| ret i32 %result |
| } |
| |
| define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) { |
| ; CHECK-LABEL: pow2_vscale_with_negative_multiplier: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cntd x8 |
| ; CHECK-NEXT: neg x9, x8 |
| ; CHECK-NEXT: orr w9, w9, #0xfffffff0 |
| ; CHECK-NEXT: sub w0, w9, w8 |
| ; CHECK-NEXT: ret |
| %vscale = call i32 @llvm.vscale.i32() |
| %mul = mul i32 %vscale, -2 |
| %or_redundant = or i32 %mul, 4294967264 |
| %or_required = or i32 %mul, 4294967280 |
| %result = add i32 %or_redundant, %or_required |
| ret i32 %result |
| } |
| |
| declare i32 @llvm.vscale.i32() |
| declare i64 @llvm.aarch64.sve.cntb(i32 %pattern) |
| declare i64 @llvm.aarch64.sve.cnth(i32 %pattern) |
| declare i64 @llvm.aarch64.sve.cntw(i32 %pattern) |
| declare i64 @llvm.aarch64.sve.cntd(i32 %pattern) |