| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefixes=CHECK,CHECK0 |
| ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=64 | FileCheck %s --check-prefixes=CHECK,CHECK64 |
| ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024 |
| |
| define i32 @basic(i32 noundef %num) { |
| ; CHECK-LABEL: basic: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: ret |
| entry: |
| ret i32 0 |
| } |
| |
| ; Non-streaming functions don't need hazards |
| define i32 @csr_d8_notsc(i32 noundef %num) { |
| ; CHECK-LABEL: csr_d8_notsc: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset b8, -16 |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: //APP |
| ; CHECK-NEXT: //NO_APP |
| ; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| ret i32 0 |
| } |
| |
| ; Very simple - doesn't require hazards |
| define i32 @basic_sc(i32 noundef %num) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: basic_sc: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: ret |
| entry: |
| ret i32 0 |
| } |
| |
| ; No fpr accesses/csrs - doesn't require hazards |
| define i32 @nocsr_alloci64(i64 %d) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: nocsr_alloci64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: mov x8, x0 |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: str x8, [sp, #8] |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i64 |
| store i64 %d, ptr %a |
| ret i32 0 |
| } |
| |
| ; No fpr accesses/csrs - doesn't require hazards |
| define i32 @csr_x20(i32 noundef %num) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: csr_x20: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x20, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: //APP |
| ; CHECK-NEXT: //NO_APP |
| ; CHECK-NEXT: ldr x20, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{x20}"() #1 |
| ret i32 0 |
| } |
| |
| ; CSR of d8. Make sure there is a gap between FPR and GPR |
| define i32 @csr_d8(i32 noundef %num) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: csr_d8: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK0-NEXT: .cfi_offset b8, -16 |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: csr_d8: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #144 |
| ; CHECK64-NEXT: str d8, [sp, #64] // 8-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 144 |
| ; CHECK64-NEXT: .cfi_offset b8, -80 |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: ldr d8, [sp, #64] // 8-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #144 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: csr_d8: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2064 |
| ; CHECK1024-NEXT: .cfi_offset w29, -8 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1040 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: add sp, sp, #1024 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr d8, [sp] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| ret i32 0 |
| } |
| |
| ; Stack fpr objects. |
| define i32 @nocsr_allocd(double %d) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: nocsr_allocd: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: sub sp, sp, #16 |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: str d0, [sp, #8] |
| ; CHECK0-NEXT: add sp, sp, #16 |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: nocsr_allocd: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #80 |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 80 |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: str d0, [sp, #72] |
| ; CHECK64-NEXT: add sp, sp, #80 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: nocsr_allocd: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2080 |
| ; CHECK1024-NEXT: .cfi_offset w29, -16 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: str d0, [sp, #1032] |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca double |
| store double %d, ptr %a |
| ret i32 0 |
| } |
| |
| define i32 @csr_d8d9(i32 noundef %num) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: csr_d8d9: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: stp d9, d8, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK0-NEXT: .cfi_offset b8, -8 |
| ; CHECK0-NEXT: .cfi_offset b9, -16 |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: ldp d9, d8, [sp], #16 // 16-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: csr_d8d9: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #144 |
| ; CHECK64-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 144 |
| ; CHECK64-NEXT: .cfi_offset b8, -72 |
| ; CHECK64-NEXT: .cfi_offset b9, -80 |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #144 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: csr_d8d9: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1056 |
| ; CHECK1024-NEXT: stp d9, d8, [sp] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1040] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2080 |
| ; CHECK1024-NEXT: .cfi_offset w29, -16 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1048 |
| ; CHECK1024-NEXT: .cfi_offset b9, -1056 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: add sp, sp, #1024 |
| ; CHECK1024-NEXT: ldp d9, d8, [sp] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1040] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1056 |
| ; CHECK1024-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{d8},~{d9}"() #1 |
| ret i32 0 |
| } |
| |
| define i32 @csr_d8_allocd(double %d) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: csr_d8_allocd: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: stp d8, d0, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK0-NEXT: .cfi_offset b8, -16 |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: csr_d8_allocd: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #160 |
| ; CHECK64-NEXT: stp d0, d8, [sp, #72] // 8-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 160 |
| ; CHECK64-NEXT: .cfi_offset b8, -80 |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #160 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: csr_d8_allocd: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2080 |
| ; CHECK1024-NEXT: .cfi_offset w29, -8 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1040 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: str d0, [sp, #1032] |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr d8, [sp] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca double |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| store double %d, ptr %a |
| ret i32 0 |
| } |
| |
| define i32 @csr_d8_alloci64(i64 %d) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: csr_d8_alloci64: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK0-NEXT: .cfi_offset b8, -16 |
| ; CHECK0-NEXT: mov x8, x0 |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: str x8, [sp, #8] |
| ; CHECK0-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: csr_d8_alloci64: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #160 |
| ; CHECK64-NEXT: str d8, [sp, #80] // 8-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 160 |
| ; CHECK64-NEXT: .cfi_offset b8, -80 |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: mov x8, x0 |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload |
| ; CHECK64-NEXT: str x8, [sp, #8] |
| ; CHECK64-NEXT: add sp, sp, #160 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: csr_d8_alloci64: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2080 |
| ; CHECK1024-NEXT: .cfi_offset w29, -8 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1040 |
| ; CHECK1024-NEXT: mov x8, x0 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: str x8, [sp, #8] |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr d8, [sp] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca i64 |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| store i64 %d, ptr %a |
| ret i32 0 |
| } |
| |
| ; Check the frame pointer is in the right place |
| define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible" "frame-pointer"="all" { |
| ; CHECK0-LABEL: csr_d8_allocd_framepointer: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK0-NEXT: add x29, sp, #16 |
| ; CHECK0-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK0-NEXT: .cfi_offset w30, -8 |
| ; CHECK0-NEXT: .cfi_offset w29, -16 |
| ; CHECK0-NEXT: .cfi_offset b8, -32 |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: str d0, [sp, #8] |
| ; CHECK0-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: csr_d8_allocd_framepointer: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #176 |
| ; CHECK64-NEXT: stp d0, d8, [sp, #72] // 8-byte Folded Spill |
| ; CHECK64-NEXT: stp x29, x30, [sp, #152] // 16-byte Folded Spill |
| ; CHECK64-NEXT: add x29, sp, #152 |
| ; CHECK64-NEXT: .cfi_def_cfa w29, 24 |
| ; CHECK64-NEXT: .cfi_offset w30, -16 |
| ; CHECK64-NEXT: .cfi_offset w29, -24 |
| ; CHECK64-NEXT: .cfi_offset b8, -96 |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: ldr x29, [sp, #152] // 8-byte Folded Reload |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #176 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: csr_d8_allocd_framepointer: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1056 |
| ; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: add x29, sp, #1032 |
| ; CHECK1024-NEXT: str x30, [sp, #1040] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: .cfi_def_cfa w29, 24 |
| ; CHECK1024-NEXT: .cfi_offset w30, -16 |
| ; CHECK1024-NEXT: .cfi_offset w29, -24 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1056 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: str d0, [sp, #1032] |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ldr x30, [sp, #1040] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr d8, [sp] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1056 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca double |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| store double %d, ptr %a |
| ret i32 0 |
| } |
| |
| ; sve stack objects should live with other fpr registers |
| define i32 @csr_d8_allocnxv4i32(i64 %d) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: csr_d8_allocnxv4i32: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: str x29, [sp, #8] // 8-byte Folded Spill |
| ; CHECK0-NEXT: addvl sp, sp, #-1 |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG |
| ; CHECK0-NEXT: .cfi_offset w29, -8 |
| ; CHECK0-NEXT: .cfi_offset b8, -16 |
| ; CHECK0-NEXT: mov z0.s, #0 // =0x0 |
| ; CHECK0-NEXT: ptrue p0.s |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: st1w { z0.s }, p0, [sp] |
| ; CHECK0-NEXT: addvl sp, sp, #1 |
| ; CHECK0-NEXT: ldr x29, [sp, #8] // 8-byte Folded Reload |
| ; CHECK0-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: csr_d8_allocnxv4i32: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: str d8, [sp, #-80]! // 8-byte Folded Spill |
| ; CHECK64-NEXT: str x29, [sp, #72] // 8-byte Folded Spill |
| ; CHECK64-NEXT: sub sp, sp, #64 |
| ; CHECK64-NEXT: addvl sp, sp, #-1 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 8 * VG |
| ; CHECK64-NEXT: .cfi_offset w29, -8 |
| ; CHECK64-NEXT: .cfi_offset b8, -80 |
| ; CHECK64-NEXT: mov z0.s, #0 // =0x0 |
| ; CHECK64-NEXT: ptrue p0.s |
| ; CHECK64-NEXT: add x8, sp, #64 |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: st1w { z0.s }, p0, [x8] |
| ; CHECK64-NEXT: addvl sp, sp, #1 |
| ; CHECK64-NEXT: add sp, sp, #64 |
| ; CHECK64-NEXT: ldr x29, [sp, #72] // 8-byte Folded Reload |
| ; CHECK64-NEXT: ldr d8, [sp], #80 // 8-byte Folded Reload |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: csr_d8_allocnxv4i32: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: addvl sp, sp, #-1 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 8 * VG |
| ; CHECK1024-NEXT: .cfi_offset w29, -8 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1040 |
| ; CHECK1024-NEXT: mov z0.s, #0 // =0x0 |
| ; CHECK1024-NEXT: ptrue p0.s |
| ; CHECK1024-NEXT: add x8, sp, #1024 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: st1w { z0.s }, p0, [x8] |
| ; CHECK1024-NEXT: addvl sp, sp, #1 |
| ; CHECK1024-NEXT: add sp, sp, #1024 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr d8, [sp] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca <vscale x 4 x i32> |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| store <vscale x 4 x i32> zeroinitializer, ptr %a |
| ret i32 0 |
| } |
| |
| define i32 @csr_x18_25_d8_15_allocdi64(i64 %d, double %e) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: csr_x18_25_d8_15_allocdi64: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: sub sp, sp, #144 |
| ; CHECK0-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str x25, [sp, #80] // 8-byte Folded Spill |
| ; CHECK0-NEXT: stp x24, x23, [sp, #96] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x22, x21, [sp, #112] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x20, x19, [sp, #128] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 144 |
| ; CHECK0-NEXT: .cfi_offset w19, -8 |
| ; CHECK0-NEXT: .cfi_offset w20, -16 |
| ; CHECK0-NEXT: .cfi_offset w21, -24 |
| ; CHECK0-NEXT: .cfi_offset w22, -32 |
| ; CHECK0-NEXT: .cfi_offset w23, -40 |
| ; CHECK0-NEXT: .cfi_offset w24, -48 |
| ; CHECK0-NEXT: .cfi_offset w25, -64 |
| ; CHECK0-NEXT: .cfi_offset b8, -72 |
| ; CHECK0-NEXT: .cfi_offset b9, -80 |
| ; CHECK0-NEXT: .cfi_offset b10, -88 |
| ; CHECK0-NEXT: .cfi_offset b11, -96 |
| ; CHECK0-NEXT: .cfi_offset b12, -104 |
| ; CHECK0-NEXT: .cfi_offset b13, -112 |
| ; CHECK0-NEXT: .cfi_offset b14, -120 |
| ; CHECK0-NEXT: .cfi_offset b15, -128 |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: mov x8, x0 |
| ; CHECK0-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr x25, [sp, #80] // 8-byte Folded Reload |
| ; CHECK0-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: ldp x24, x23, [sp, #96] // 16-byte Folded Reload |
| ; CHECK0-NEXT: str x8, [sp, #88] |
| ; CHECK0-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload |
| ; CHECK0-NEXT: str d0, [sp, #8] |
| ; CHECK0-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload |
| ; CHECK0-NEXT: add sp, sp, #144 |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: csr_x18_25_d8_15_allocdi64: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #288 |
| ; CHECK64-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x29, x25, [sp, #224] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x24, x23, [sp, #240] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x22, x21, [sp, #256] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x20, x19, [sp, #272] // 16-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 288 |
| ; CHECK64-NEXT: .cfi_offset w19, -8 |
| ; CHECK64-NEXT: .cfi_offset w20, -16 |
| ; CHECK64-NEXT: .cfi_offset w21, -24 |
| ; CHECK64-NEXT: .cfi_offset w22, -32 |
| ; CHECK64-NEXT: .cfi_offset w23, -40 |
| ; CHECK64-NEXT: .cfi_offset w24, -48 |
| ; CHECK64-NEXT: .cfi_offset w25, -56 |
| ; CHECK64-NEXT: .cfi_offset w29, -64 |
| ; CHECK64-NEXT: .cfi_offset b8, -136 |
| ; CHECK64-NEXT: .cfi_offset b9, -144 |
| ; CHECK64-NEXT: .cfi_offset b10, -152 |
| ; CHECK64-NEXT: .cfi_offset b11, -160 |
| ; CHECK64-NEXT: .cfi_offset b12, -168 |
| ; CHECK64-NEXT: .cfi_offset b13, -176 |
| ; CHECK64-NEXT: .cfi_offset b14, -184 |
| ; CHECK64-NEXT: .cfi_offset b15, -192 |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: mov x8, x0 |
| ; CHECK64-NEXT: ldp x20, x19, [sp, #272] // 16-byte Folded Reload |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: ldp x22, x21, [sp, #256] // 16-byte Folded Reload |
| ; CHECK64-NEXT: str x8, [sp, #8] |
| ; CHECK64-NEXT: ldp x24, x23, [sp, #240] // 16-byte Folded Reload |
| ; CHECK64-NEXT: str d0, [sp, #88] |
| ; CHECK64-NEXT: ldp x29, x25, [sp, #224] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #288 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: csr_x18_25_d8_15_allocdi64: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1152 |
| ; CHECK1024-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x25, [sp, #1096] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x24, [sp, #1104] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x23, [sp, #1112] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x22, [sp, #1120] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x21, [sp, #1128] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x20, [sp, #1136] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x19, [sp, #1144] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1056 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2208 |
| ; CHECK1024-NEXT: .cfi_offset w19, -8 |
| ; CHECK1024-NEXT: .cfi_offset w20, -16 |
| ; CHECK1024-NEXT: .cfi_offset w21, -24 |
| ; CHECK1024-NEXT: .cfi_offset w22, -32 |
| ; CHECK1024-NEXT: .cfi_offset w23, -40 |
| ; CHECK1024-NEXT: .cfi_offset w24, -48 |
| ; CHECK1024-NEXT: .cfi_offset w25, -56 |
| ; CHECK1024-NEXT: .cfi_offset w29, -64 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1096 |
| ; CHECK1024-NEXT: .cfi_offset b9, -1104 |
| ; CHECK1024-NEXT: .cfi_offset b10, -1112 |
| ; CHECK1024-NEXT: .cfi_offset b11, -1120 |
| ; CHECK1024-NEXT: .cfi_offset b12, -1128 |
| ; CHECK1024-NEXT: .cfi_offset b13, -1136 |
| ; CHECK1024-NEXT: .cfi_offset b14, -1144 |
| ; CHECK1024-NEXT: .cfi_offset b15, -1152 |
| ; CHECK1024-NEXT: mov x8, x0 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: str x8, [sp, #8] |
| ; CHECK1024-NEXT: str d0, [sp, #1048] |
| ; CHECK1024-NEXT: add sp, sp, #1056 |
| ; CHECK1024-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x19, [sp, #1144] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x20, [sp, #1136] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x21, [sp, #1128] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x22, [sp, #1120] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x23, [sp, #1112] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x24, [sp, #1104] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x25, [sp, #1096] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1152 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca i64 |
| %b = alloca double |
| tail call void asm sideeffect "", "~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25}"() |
| tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() |
| store i64 %d, ptr %a |
| store double %e, ptr %b |
| ret i32 0 |
| } |
| |
| define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarch64_pstate_sm_body" "target-features"="+sme" { |
| ; CHECK0-LABEL: csr_x18_25_d8_15_allocdi64_locallystreaming: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: sub sp, sp, #176 |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 176 |
| ; CHECK0-NEXT: rdsvl x9, #1 |
| ; CHECK0-NEXT: stp d15, d14, [sp, #48] // 16-byte Folded Spill |
| ; CHECK0-NEXT: lsr x9, x9, #3 |
| ; CHECK0-NEXT: stp d13, d12, [sp, #64] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp d11, d10, [sp, #80] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str x9, [sp, #32] // 8-byte Folded Spill |
| ; CHECK0-NEXT: cntd x9 |
| ; CHECK0-NEXT: str x9, [sp, #40] // 8-byte Folded Spill |
| ; CHECK0-NEXT: stp d9, d8, [sp, #96] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str x25, [sp, #112] // 8-byte Folded Spill |
| ; CHECK0-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_offset w19, -8 |
| ; CHECK0-NEXT: .cfi_offset w20, -16 |
| ; CHECK0-NEXT: .cfi_offset w21, -24 |
| ; CHECK0-NEXT: .cfi_offset w22, -32 |
| ; CHECK0-NEXT: .cfi_offset w23, -40 |
| ; CHECK0-NEXT: .cfi_offset w24, -48 |
| ; CHECK0-NEXT: .cfi_offset w25, -64 |
| ; CHECK0-NEXT: .cfi_offset b8, -72 |
| ; CHECK0-NEXT: .cfi_offset b9, -80 |
| ; CHECK0-NEXT: .cfi_offset b10, -88 |
| ; CHECK0-NEXT: .cfi_offset b11, -96 |
| ; CHECK0-NEXT: .cfi_offset b12, -104 |
| ; CHECK0-NEXT: .cfi_offset b13, -112 |
| ; CHECK0-NEXT: .cfi_offset b14, -120 |
| ; CHECK0-NEXT: .cfi_offset b15, -128 |
| ; CHECK0-NEXT: .cfi_offset vg, -136 |
| ; CHECK0-NEXT: str d0, [sp, #8] // 8-byte Folded Spill |
| ; CHECK0-NEXT: smstart sm |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload |
| ; CHECK0-NEXT: str x0, [sp, #24] |
| ; CHECK0-NEXT: str d0, [sp, #16] |
| ; CHECK0-NEXT: smstop sm |
| ; CHECK0-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr x25, [sp, #112] // 8-byte Folded Reload |
| ; CHECK0-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d9, d8, [sp, #96] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d11, d10, [sp, #80] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d13, d12, [sp, #64] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d15, d14, [sp, #48] // 16-byte Folded Reload |
| ; CHECK0-NEXT: add sp, sp, #176 |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK0-NEXT: .cfi_restore w19 |
| ; CHECK0-NEXT: .cfi_restore w20 |
| ; CHECK0-NEXT: .cfi_restore w21 |
| ; CHECK0-NEXT: .cfi_restore w22 |
| ; CHECK0-NEXT: .cfi_restore w23 |
| ; CHECK0-NEXT: .cfi_restore w24 |
| ; CHECK0-NEXT: .cfi_restore w25 |
| ; CHECK0-NEXT: .cfi_restore b8 |
| ; CHECK0-NEXT: .cfi_restore b9 |
| ; CHECK0-NEXT: .cfi_restore b10 |
| ; CHECK0-NEXT: .cfi_restore b11 |
| ; CHECK0-NEXT: .cfi_restore b12 |
| ; CHECK0-NEXT: .cfi_restore b13 |
| ; CHECK0-NEXT: .cfi_restore b14 |
| ; CHECK0-NEXT: .cfi_restore b15 |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: csr_x18_25_d8_15_allocdi64_locallystreaming: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #304 |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 304 |
| ; CHECK64-NEXT: rdsvl x9, #1 |
| ; CHECK64-NEXT: stp d15, d14, [sp, #112] // 16-byte Folded Spill |
| ; CHECK64-NEXT: lsr x9, x9, #3 |
| ; CHECK64-NEXT: stp d13, d12, [sp, #128] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp d11, d10, [sp, #144] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str x9, [sp, #96] // 8-byte Folded Spill |
| ; CHECK64-NEXT: cntd x9 |
| ; CHECK64-NEXT: str x9, [sp, #104] // 8-byte Folded Spill |
| ; CHECK64-NEXT: stp d9, d8, [sp, #160] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x29, x25, [sp, #240] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_offset w19, -8 |
| ; CHECK64-NEXT: .cfi_offset w20, -16 |
| ; CHECK64-NEXT: .cfi_offset w21, -24 |
| ; CHECK64-NEXT: .cfi_offset w22, -32 |
| ; CHECK64-NEXT: .cfi_offset w23, -40 |
| ; CHECK64-NEXT: .cfi_offset w24, -48 |
| ; CHECK64-NEXT: .cfi_offset w25, -56 |
| ; CHECK64-NEXT: .cfi_offset w29, -64 |
| ; CHECK64-NEXT: .cfi_offset b8, -136 |
| ; CHECK64-NEXT: .cfi_offset b9, -144 |
| ; CHECK64-NEXT: .cfi_offset b10, -152 |
| ; CHECK64-NEXT: .cfi_offset b11, -160 |
| ; CHECK64-NEXT: .cfi_offset b12, -168 |
| ; CHECK64-NEXT: .cfi_offset b13, -176 |
| ; CHECK64-NEXT: .cfi_offset b14, -184 |
| ; CHECK64-NEXT: .cfi_offset b15, -192 |
| ; CHECK64-NEXT: .cfi_offset vg, -200 |
| ; CHECK64-NEXT: str d0, [sp, #80] // 8-byte Folded Spill |
| ; CHECK64-NEXT: smstart sm |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: ldr d0, [sp, #80] // 8-byte Folded Reload |
| ; CHECK64-NEXT: str x0, [sp, #8] |
| ; CHECK64-NEXT: str d0, [sp, #88] |
| ; CHECK64-NEXT: smstop sm |
| ; CHECK64-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp x29, x25, [sp, #240] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d9, d8, [sp, #160] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d11, d10, [sp, #144] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d13, d12, [sp, #128] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d15, d14, [sp, #112] // 16-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #304 |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK64-NEXT: .cfi_restore w19 |
| ; CHECK64-NEXT: .cfi_restore w20 |
| ; CHECK64-NEXT: .cfi_restore w21 |
| ; CHECK64-NEXT: .cfi_restore w22 |
| ; CHECK64-NEXT: .cfi_restore w23 |
| ; CHECK64-NEXT: .cfi_restore w24 |
| ; CHECK64-NEXT: .cfi_restore w25 |
| ; CHECK64-NEXT: .cfi_restore w29 |
| ; CHECK64-NEXT: .cfi_restore b8 |
| ; CHECK64-NEXT: .cfi_restore b9 |
| ; CHECK64-NEXT: .cfi_restore b10 |
| ; CHECK64-NEXT: .cfi_restore b11 |
| ; CHECK64-NEXT: .cfi_restore b12 |
| ; CHECK64-NEXT: .cfi_restore b13 |
| ; CHECK64-NEXT: .cfi_restore b14 |
| ; CHECK64-NEXT: .cfi_restore b15 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: csr_x18_25_d8_15_allocdi64_locallystreaming: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: rdsvl x9, #1 |
| ; CHECK1024-NEXT: lsr x9, x9, #3 |
| ; CHECK1024-NEXT: sub sp, sp, #1168 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 1168 |
| ; CHECK1024-NEXT: str x9, [sp] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: cntd x9 |
| ; CHECK1024-NEXT: str x9, [sp, #8] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1104] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x25, [sp, #1112] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x24, [sp, #1120] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x23, [sp, #1128] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x22, [sp, #1136] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x21, [sp, #1144] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x20, [sp, #1152] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x19, [sp, #1160] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: .cfi_offset w19, -8 |
| ; CHECK1024-NEXT: .cfi_offset w20, -16 |
| ; CHECK1024-NEXT: .cfi_offset w21, -24 |
| ; CHECK1024-NEXT: .cfi_offset w22, -32 |
| ; CHECK1024-NEXT: .cfi_offset w23, -40 |
| ; CHECK1024-NEXT: .cfi_offset w24, -48 |
| ; CHECK1024-NEXT: .cfi_offset w25, -56 |
| ; CHECK1024-NEXT: .cfi_offset w29, -64 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1096 |
| ; CHECK1024-NEXT: .cfi_offset b9, -1104 |
| ; CHECK1024-NEXT: .cfi_offset b10, -1112 |
| ; CHECK1024-NEXT: .cfi_offset b11, -1120 |
| ; CHECK1024-NEXT: .cfi_offset b12, -1128 |
| ; CHECK1024-NEXT: .cfi_offset b13, -1136 |
| ; CHECK1024-NEXT: .cfi_offset b14, -1144 |
| ; CHECK1024-NEXT: .cfi_offset b15, -1152 |
| ; CHECK1024-NEXT: .cfi_offset vg, -1160 |
| ; CHECK1024-NEXT: sub sp, sp, #1056 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2224 |
| ; CHECK1024-NEXT: str d0, [sp, #1040] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: smstart sm |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: ldr d0, [sp, #1040] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: str x0, [sp, #8] |
| ; CHECK1024-NEXT: str d0, [sp, #1048] |
| ; CHECK1024-NEXT: smstop sm |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: add sp, sp, #1056 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 1168 |
| ; CHECK1024-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x19, [sp, #1160] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x20, [sp, #1152] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x21, [sp, #1144] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x22, [sp, #1136] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x23, [sp, #1128] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x24, [sp, #1120] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x25, [sp, #1112] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1104] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1168 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK1024-NEXT: .cfi_restore w19 |
| ; CHECK1024-NEXT: .cfi_restore w20 |
| ; CHECK1024-NEXT: .cfi_restore w21 |
| ; CHECK1024-NEXT: .cfi_restore w22 |
| ; CHECK1024-NEXT: .cfi_restore w23 |
| ; CHECK1024-NEXT: .cfi_restore w24 |
| ; CHECK1024-NEXT: .cfi_restore w25 |
| ; CHECK1024-NEXT: .cfi_restore w29 |
| ; CHECK1024-NEXT: .cfi_restore b8 |
| ; CHECK1024-NEXT: .cfi_restore b9 |
| ; CHECK1024-NEXT: .cfi_restore b10 |
| ; CHECK1024-NEXT: .cfi_restore b11 |
| ; CHECK1024-NEXT: .cfi_restore b12 |
| ; CHECK1024-NEXT: .cfi_restore b13 |
| ; CHECK1024-NEXT: .cfi_restore b14 |
| ; CHECK1024-NEXT: .cfi_restore b15 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca i64 |
| %b = alloca double |
| tail call void asm sideeffect "", "~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25}"() |
| tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() |
| store i64 %d, ptr %a |
| store double %e, ptr %b |
| ret i32 0 |
| } |
| |
| ; We don't currently handle fpr stack arguments very well (they are hopefully relatively rare). |
| define float @nocsr_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: nocsr_stackargs: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ldr s0, [sp] |
| ; CHECK-NEXT: ret |
| entry: |
| ret float %i |
| } |
| |
| define float @csr_x20_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: csr_x20_stackargs: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x20, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: ldr s0, [sp, #16] |
| ; CHECK-NEXT: //APP |
| ; CHECK-NEXT: //NO_APP |
| ; CHECK-NEXT: ldr x20, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{x20}"() #1 |
| ret float %i |
| } |
| |
| define float @csr_d8_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: csr_d8_stackargs: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK0-NEXT: .cfi_offset b8, -16 |
| ; CHECK0-NEXT: ldr s0, [sp, #16] |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: csr_d8_stackargs: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #144 |
| ; CHECK64-NEXT: str d8, [sp, #64] // 8-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 144 |
| ; CHECK64-NEXT: .cfi_offset b8, -80 |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: ldr s0, [sp, #144] |
| ; CHECK64-NEXT: ldr d8, [sp, #64] // 8-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #144 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: csr_d8_stackargs: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2064 |
| ; CHECK1024-NEXT: .cfi_offset w29, -8 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1040 |
| ; CHECK1024-NEXT: ldr s0, [sp, #2064] |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: add sp, sp, #1024 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr d8, [sp] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| ret float %i |
| } |
| |
| ; SVE calling conventions |
| define i32 @svecc_basic(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: svecc_basic: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: ret |
| entry: |
| ret i32 0 |
| } |
| |
| define i32 @svecc_csr_x20(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: svecc_csr_x20: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x20, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: //APP |
| ; CHECK-NEXT: //NO_APP |
| ; CHECK-NEXT: ldr x20, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{x20}"() #1 |
| ret i32 0 |
| } |
| |
| define i32 @svecc_csr_d8(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: svecc_csr_d8: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: addvl sp, sp, #-1 |
| ; CHECK0-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG |
| ; CHECK0-NEXT: .cfi_offset w29, -16 |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK0-NEXT: addvl sp, sp, #1 |
| ; CHECK0-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: svecc_csr_d8: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #80 |
| ; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill |
| ; CHECK64-NEXT: addvl sp, sp, #-1 |
| ; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK64-NEXT: sub sp, sp, #64 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 8 * VG |
| ; CHECK64-NEXT: .cfi_offset w29, -16 |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: add sp, sp, #64 |
| ; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK64-NEXT: addvl sp, sp, #1 |
| ; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #80 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: svecc_csr_d8: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: addvl sp, sp, #-1 |
| ; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 8 * VG |
| ; CHECK1024-NEXT: .cfi_offset w29, -16 |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: add sp, sp, #1024 |
| ; CHECK1024-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: addvl sp, sp, #1 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| ret i32 0 |
| } |
| |
| define i32 @svecc_csr_d8d9(i32 noundef %num, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: svecc_csr_d8d9: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: addvl sp, sp, #-2 |
| ; CHECK0-NEXT: str z9, [sp] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG |
| ; CHECK0-NEXT: .cfi_offset w29, -16 |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: ldr z9, [sp] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: addvl sp, sp, #2 |
| ; CHECK0-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: svecc_csr_d8d9: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #80 |
| ; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill |
| ; CHECK64-NEXT: addvl sp, sp, #-2 |
| ; CHECK64-NEXT: str z9, [sp] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: sub sp, sp, #64 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG |
| ; CHECK64-NEXT: .cfi_offset w29, -16 |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 80 - 16 * VG |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: add sp, sp, #64 |
| ; CHECK64-NEXT: ldr z9, [sp] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: addvl sp, sp, #2 |
| ; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #80 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: svecc_csr_d8d9: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: addvl sp, sp, #-2 |
| ; CHECK1024-NEXT: str z9, [sp] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 16 * VG |
| ; CHECK1024-NEXT: .cfi_offset w29, -16 |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1040 - 16 * VG |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: add sp, sp, #1024 |
| ; CHECK1024-NEXT: ldr z9, [sp] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: addvl sp, sp, #2 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{d8},~{d9}"() #1 |
| ret i32 0 |
| } |
| |
| define i32 @svecc_csr_d8_allocd(double %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: svecc_csr_d8_allocd: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: addvl sp, sp, #-1 |
| ; CHECK0-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG |
| ; CHECK0-NEXT: .cfi_offset w29, -16 |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: addvl x8, sp, #1 |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK0-NEXT: str d0, [x8, #8] |
| ; CHECK0-NEXT: addvl sp, sp, #1 |
| ; CHECK0-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: svecc_csr_d8_allocd: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #80 |
| ; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill |
| ; CHECK64-NEXT: addvl sp, sp, #-1 |
| ; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK64-NEXT: sub sp, sp, #80 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 160 + 8 * VG |
| ; CHECK64-NEXT: .cfi_offset w29, -16 |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: str d0, [sp, #72] |
| ; CHECK64-NEXT: add sp, sp, #80 |
| ; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK64-NEXT: addvl sp, sp, #1 |
| ; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #80 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: svecc_csr_d8_allocd: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: addvl sp, sp, #-1 |
| ; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2080 + 8 * VG |
| ; CHECK1024-NEXT: .cfi_offset w29, -16 |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: str d0, [sp, #1032] |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: addvl sp, sp, #1 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca double |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| store double %d, ptr %a |
| ret i32 0 |
| } |
| |
| define i32 @svecc_csr_d8_alloci64(i64 %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: svecc_csr_d8_alloci64: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: addvl sp, sp, #-1 |
| ; CHECK0-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG |
| ; CHECK0-NEXT: .cfi_offset w29, -16 |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: mov x8, x0 |
| ; CHECK0-NEXT: addvl x9, sp, #1 |
| ; CHECK0-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: str x8, [x9, #8] |
| ; CHECK0-NEXT: addvl sp, sp, #1 |
| ; CHECK0-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: svecc_csr_d8_alloci64: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #80 |
| ; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill |
| ; CHECK64-NEXT: addvl sp, sp, #-1 |
| ; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK64-NEXT: sub sp, sp, #80 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x01, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 160 + 8 * VG |
| ; CHECK64-NEXT: .cfi_offset w29, -16 |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG |
| ; CHECK64-NEXT: mov x8, x0 |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: str x8, [sp, #8] |
| ; CHECK64-NEXT: add sp, sp, #80 |
| ; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK64-NEXT: addvl sp, sp, #1 |
| ; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #80 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: svecc_csr_d8_alloci64: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: addvl sp, sp, #-1 |
| ; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xa0, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2080 + 8 * VG |
| ; CHECK1024-NEXT: .cfi_offset w29, -16 |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG |
| ; CHECK1024-NEXT: mov x8, x0 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: str x8, [sp, #8] |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: addvl sp, sp, #1 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca i64 |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| store i64 %d, ptr %a |
| ret i32 0 |
| } |
| |
| define i32 @svecc_csr_d8_allocnxv4i32(i64 %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: svecc_csr_d8_allocnxv4i32: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: addvl sp, sp, #-1 |
| ; CHECK0-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK0-NEXT: addvl sp, sp, #-1 |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG |
| ; CHECK0-NEXT: .cfi_offset w29, -16 |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG |
| ; CHECK0-NEXT: mov z0.s, #0 // =0x0 |
| ; CHECK0-NEXT: ptrue p0.s |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: st1w { z0.s }, p0, [sp] |
| ; CHECK0-NEXT: addvl sp, sp, #1 |
| ; CHECK0-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK0-NEXT: addvl sp, sp, #1 |
| ; CHECK0-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: svecc_csr_d8_allocnxv4i32: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #80 |
| ; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill |
| ; CHECK64-NEXT: addvl sp, sp, #-1 |
| ; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK64-NEXT: sub sp, sp, #64 |
| ; CHECK64-NEXT: addvl sp, sp, #-1 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG |
| ; CHECK64-NEXT: .cfi_offset w29, -16 |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xb0, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 80 - 8 * VG |
| ; CHECK64-NEXT: mov z0.s, #0 // =0x0 |
| ; CHECK64-NEXT: ptrue p0.s |
| ; CHECK64-NEXT: add x8, sp, #64 |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: st1w { z0.s }, p0, [x8] |
| ; CHECK64-NEXT: add sp, sp, #64 |
| ; CHECK64-NEXT: addvl sp, sp, #1 |
| ; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK64-NEXT: addvl sp, sp, #1 |
| ; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #80 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: svecc_csr_d8_allocnxv4i32: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: addvl sp, sp, #-1 |
| ; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: addvl sp, sp, #-1 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2064 + 16 * VG |
| ; CHECK1024-NEXT: .cfi_offset w29, -16 |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xf0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1040 - 8 * VG |
| ; CHECK1024-NEXT: mov z0.s, #0 // =0x0 |
| ; CHECK1024-NEXT: ptrue p0.s |
| ; CHECK1024-NEXT: add x8, sp, #1024 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: st1w { z0.s }, p0, [x8] |
| ; CHECK1024-NEXT: add sp, sp, #1024 |
| ; CHECK1024-NEXT: addvl sp, sp, #1 |
| ; CHECK1024-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: addvl sp, sp, #1 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca <vscale x 4 x i32> |
| tail call void asm sideeffect "", "~{d8}"() #1 |
| store <vscale x 4 x i32> zeroinitializer, ptr %a |
| ret i32 0 |
| } |
| |
| define i32 @svecc_csr_x18_25_d8_15_allocdi64(i64 %d, double %e, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: svecc_csr_x18_25_d8_15_allocdi64: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: stp x29, x25, [sp, #-64]! // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill |
| ; CHECK0-NEXT: addvl sp, sp, #-8 |
| ; CHECK0-NEXT: str z15, [sp] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z14, [sp, #1, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z13, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z12, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z11, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z10, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z9, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z8, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: sub sp, sp, #16 |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 64 * VG |
| ; CHECK0-NEXT: .cfi_offset w19, -8 |
| ; CHECK0-NEXT: .cfi_offset w20, -16 |
| ; CHECK0-NEXT: .cfi_offset w21, -24 |
| ; CHECK0-NEXT: .cfi_offset w22, -32 |
| ; CHECK0-NEXT: .cfi_offset w23, -40 |
| ; CHECK0-NEXT: .cfi_offset w24, -48 |
| ; CHECK0-NEXT: .cfi_offset w25, -56 |
| ; CHECK0-NEXT: .cfi_offset w29, -64 |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG |
| ; CHECK0-NEXT: mov x8, x0 |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: str x8, [sp, #8] |
| ; CHECK0-NEXT: str d0, [sp], #16 |
| ; CHECK0-NEXT: ldr z15, [sp] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z14, [sp, #1, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z13, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z12, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z11, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z10, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z9, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z8, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: addvl sp, sp, #8 |
| ; CHECK0-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp x29, x25, [sp], #64 // 16-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: svecc_csr_x18_25_d8_15_allocdi64: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #128 |
| ; CHECK64-NEXT: stp x29, x25, [sp, #64] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill |
| ; CHECK64-NEXT: addvl sp, sp, #-8 |
| ; CHECK64-NEXT: str z15, [sp] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z14, [sp, #1, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z13, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z12, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z11, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z10, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z9, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z8, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: sub sp, sp, #96 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x01, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 224 + 64 * VG |
| ; CHECK64-NEXT: .cfi_offset w19, -8 |
| ; CHECK64-NEXT: .cfi_offset w20, -16 |
| ; CHECK64-NEXT: .cfi_offset w21, -24 |
| ; CHECK64-NEXT: .cfi_offset w22, -32 |
| ; CHECK64-NEXT: .cfi_offset w23, -40 |
| ; CHECK64-NEXT: .cfi_offset w24, -48 |
| ; CHECK64-NEXT: .cfi_offset w25, -56 |
| ; CHECK64-NEXT: .cfi_offset w29, -64 |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG |
| ; CHECK64-NEXT: mov x8, x0 |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: str x8, [sp, #8] |
| ; CHECK64-NEXT: str d0, [sp, #88] |
| ; CHECK64-NEXT: add sp, sp, #96 |
| ; CHECK64-NEXT: ldr z15, [sp] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z14, [sp, #1, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z13, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z12, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z11, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z10, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z9, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z8, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: addvl sp, sp, #8 |
| ; CHECK64-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp x29, x25, [sp, #64] // 16-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #128 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: svecc_csr_x18_25_d8_15_allocdi64: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1088 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x25, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x24, [sp, #1040] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x23, [sp, #1048] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x22, [sp, #1056] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x21, [sp, #1064] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x20, [sp, #1072] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x19, [sp, #1080] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: addvl sp, sp, #-8 |
| ; CHECK1024-NEXT: str z15, [sp] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z14, [sp, #1, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z13, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z12, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z11, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z10, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z9, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z8, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1056 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2144 + 64 * VG |
| ; CHECK1024-NEXT: .cfi_offset w19, -8 |
| ; CHECK1024-NEXT: .cfi_offset w20, -16 |
| ; CHECK1024-NEXT: .cfi_offset w21, -24 |
| ; CHECK1024-NEXT: .cfi_offset w22, -32 |
| ; CHECK1024-NEXT: .cfi_offset w23, -40 |
| ; CHECK1024-NEXT: .cfi_offset w24, -48 |
| ; CHECK1024-NEXT: .cfi_offset w25, -56 |
| ; CHECK1024-NEXT: .cfi_offset w29, -64 |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG |
| ; CHECK1024-NEXT: mov x8, x0 |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: str x8, [sp, #8] |
| ; CHECK1024-NEXT: str d0, [sp, #1048] |
| ; CHECK1024-NEXT: add sp, sp, #1056 |
| ; CHECK1024-NEXT: ldr z15, [sp] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z14, [sp, #1, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z13, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z12, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z11, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z10, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z9, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z8, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: addvl sp, sp, #8 |
| ; CHECK1024-NEXT: ldr x19, [sp, #1080] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x20, [sp, #1072] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x21, [sp, #1064] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x22, [sp, #1056] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x23, [sp, #1048] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x24, [sp, #1040] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x25, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1088 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca i64 |
| %b = alloca double |
| tail call void asm sideeffect "", "~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25}"() |
| tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() |
| store i64 %d, ptr %a |
| store double %e, ptr %b |
| ret i32 0 |
| } |
| |
| |
| define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1>] %arg1, [2 x <vscale x 4 x i1>] %arg2) nounwind "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: sve_signature_pred_2xv4i1: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov p1.b, p3.b |
| ; CHECK-NEXT: mov p0.b, p2.b |
| ; CHECK-NEXT: ret |
| ret [2 x <vscale x 4 x i1>] %arg2 |
| } |
| |
| define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1_caller([2 x <vscale x 4 x i1>] %arg1, [2 x <vscale x 4 x i1>] %arg2) nounwind "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: sve_signature_pred_2xv4i1_caller: |
| ; CHECK0: // %bb.0: |
| ; CHECK0-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK0-NEXT: addvl sp, sp, #-1 |
| ; CHECK0-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: mov p5.b, p0.b |
| ; CHECK0-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: mov p4.b, p1.b |
| ; CHECK0-NEXT: mov p0.b, p2.b |
| ; CHECK0-NEXT: mov p1.b, p3.b |
| ; CHECK0-NEXT: mov p2.b, p5.b |
| ; CHECK0-NEXT: mov p3.b, p4.b |
| ; CHECK0-NEXT: bl sve_signature_pred_2xv4i1 |
| ; CHECK0-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: addvl sp, sp, #1 |
| ; CHECK0-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: sve_signature_pred_2xv4i1_caller: |
| ; CHECK64: // %bb.0: |
| ; CHECK64-NEXT: sub sp, sp, #80 |
| ; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK64-NEXT: addvl sp, sp, #-1 |
| ; CHECK64-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: sub sp, sp, #64 |
| ; CHECK64-NEXT: mov p4.b, p1.b |
| ; CHECK64-NEXT: mov p5.b, p0.b |
| ; CHECK64-NEXT: mov p0.b, p2.b |
| ; CHECK64-NEXT: mov p1.b, p3.b |
| ; CHECK64-NEXT: mov p2.b, p5.b |
| ; CHECK64-NEXT: mov p3.b, p4.b |
| ; CHECK64-NEXT: bl sve_signature_pred_2xv4i1 |
| ; CHECK64-NEXT: add sp, sp, #64 |
| ; CHECK64-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: addvl sp, sp, #1 |
| ; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #80 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: sve_signature_pred_2xv4i1_caller: |
| ; CHECK1024: // %bb.0: |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: addvl sp, sp, #-1 |
| ; CHECK1024-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: mov p4.b, p1.b |
| ; CHECK1024-NEXT: mov p5.b, p0.b |
| ; CHECK1024-NEXT: mov p0.b, p2.b |
| ; CHECK1024-NEXT: mov p1.b, p3.b |
| ; CHECK1024-NEXT: mov p2.b, p5.b |
| ; CHECK1024-NEXT: mov p3.b, p4.b |
| ; CHECK1024-NEXT: bl sve_signature_pred_2xv4i1 |
| ; CHECK1024-NEXT: add sp, sp, #1024 |
| ; CHECK1024-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: addvl sp, sp, #1 |
| ; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| %res = call [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1>] %arg2, [2 x <vscale x 4 x i1>] %arg1) |
| ret [2 x <vscale x 4 x i1>] %res |
| } |
| |
| define i32 @f128_libcall(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3, i32 %a, i32 %b) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: f128_libcall: |
| ; CHECK0: // %bb.0: |
| ; CHECK0-NEXT: sub sp, sp, #176 |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 176 |
| ; CHECK0-NEXT: cntd x9 |
| ; CHECK0-NEXT: stp d15, d14, [sp, #64] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp d13, d12, [sp, #80] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp d11, d10, [sp, #96] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp d9, d8, [sp, #112] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x30, x9, [sp, #128] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_offset w19, -8 |
| ; CHECK0-NEXT: .cfi_offset w20, -16 |
| ; CHECK0-NEXT: .cfi_offset w21, -24 |
| ; CHECK0-NEXT: .cfi_offset w22, -32 |
| ; CHECK0-NEXT: .cfi_offset w30, -48 |
| ; CHECK0-NEXT: .cfi_offset b8, -56 |
| ; CHECK0-NEXT: .cfi_offset b9, -64 |
| ; CHECK0-NEXT: .cfi_offset b10, -72 |
| ; CHECK0-NEXT: .cfi_offset b11, -80 |
| ; CHECK0-NEXT: .cfi_offset b12, -88 |
| ; CHECK0-NEXT: .cfi_offset b13, -96 |
| ; CHECK0-NEXT: .cfi_offset b14, -104 |
| ; CHECK0-NEXT: .cfi_offset b15, -112 |
| ; CHECK0-NEXT: mov w19, w1 |
| ; CHECK0-NEXT: mov w20, w0 |
| ; CHECK0-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill |
| ; CHECK0-NEXT: stp q2, q3, [sp, #32] // 32-byte Folded Spill |
| ; CHECK0-NEXT: bl __arm_sme_state |
| ; CHECK0-NEXT: and x21, x0, #0x1 |
| ; CHECK0-NEXT: .cfi_offset vg, -40 |
| ; CHECK0-NEXT: tbz w21, #0, .LBB27_2 |
| ; CHECK0-NEXT: // %bb.1: |
| ; CHECK0-NEXT: smstop sm |
| ; CHECK0-NEXT: .LBB27_2: |
| ; CHECK0-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload |
| ; CHECK0-NEXT: bl __lttf2 |
| ; CHECK0-NEXT: tbz w21, #0, .LBB27_4 |
| ; CHECK0-NEXT: // %bb.3: |
| ; CHECK0-NEXT: smstart sm |
| ; CHECK0-NEXT: .LBB27_4: |
| ; CHECK0-NEXT: cmp w0, #0 |
| ; CHECK0-NEXT: .cfi_restore vg |
| ; CHECK0-NEXT: cset w21, lt |
| ; CHECK0-NEXT: bl __arm_sme_state |
| ; CHECK0-NEXT: and x22, x0, #0x1 |
| ; CHECK0-NEXT: .cfi_offset vg, -40 |
| ; CHECK0-NEXT: tbz w22, #0, .LBB27_6 |
| ; CHECK0-NEXT: // %bb.5: |
| ; CHECK0-NEXT: smstop sm |
| ; CHECK0-NEXT: .LBB27_6: |
| ; CHECK0-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload |
| ; CHECK0-NEXT: bl __getf2 |
| ; CHECK0-NEXT: tbz w22, #0, .LBB27_8 |
| ; CHECK0-NEXT: // %bb.7: |
| ; CHECK0-NEXT: smstart sm |
| ; CHECK0-NEXT: .LBB27_8: |
| ; CHECK0-NEXT: cmp w0, #0 |
| ; CHECK0-NEXT: cset w8, ge |
| ; CHECK0-NEXT: tst w8, w21 |
| ; CHECK0-NEXT: csel w0, w20, w19, ne |
| ; CHECK0-NEXT: .cfi_restore vg |
| ; CHECK0-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload |
| ; CHECK0-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d9, d8, [sp, #112] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d11, d10, [sp, #96] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d13, d12, [sp, #80] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d15, d14, [sp, #64] // 16-byte Folded Reload |
| ; CHECK0-NEXT: add sp, sp, #176 |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK0-NEXT: .cfi_restore w19 |
| ; CHECK0-NEXT: .cfi_restore w20 |
| ; CHECK0-NEXT: .cfi_restore w21 |
| ; CHECK0-NEXT: .cfi_restore w22 |
| ; CHECK0-NEXT: .cfi_restore w30 |
| ; CHECK0-NEXT: .cfi_restore b8 |
| ; CHECK0-NEXT: .cfi_restore b9 |
| ; CHECK0-NEXT: .cfi_restore b10 |
| ; CHECK0-NEXT: .cfi_restore b11 |
| ; CHECK0-NEXT: .cfi_restore b12 |
| ; CHECK0-NEXT: .cfi_restore b13 |
| ; CHECK0-NEXT: .cfi_restore b14 |
| ; CHECK0-NEXT: .cfi_restore b15 |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: f128_libcall: |
| ; CHECK64: // %bb.0: |
| ; CHECK64-NEXT: sub sp, sp, #320 |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 320 |
| ; CHECK64-NEXT: cntd x9 |
| ; CHECK64-NEXT: stp d15, d14, [sp, #128] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp d13, d12, [sp, #144] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp d11, d10, [sp, #160] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp d9, d8, [sp, #176] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x9, x22, [sp, #272] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x21, x20, [sp, #288] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str x19, [sp, #304] // 8-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_offset w19, -16 |
| ; CHECK64-NEXT: .cfi_offset w20, -24 |
| ; CHECK64-NEXT: .cfi_offset w21, -32 |
| ; CHECK64-NEXT: .cfi_offset w22, -40 |
| ; CHECK64-NEXT: .cfi_offset w30, -56 |
| ; CHECK64-NEXT: .cfi_offset w29, -64 |
| ; CHECK64-NEXT: .cfi_offset b8, -136 |
| ; CHECK64-NEXT: .cfi_offset b9, -144 |
| ; CHECK64-NEXT: .cfi_offset b10, -152 |
| ; CHECK64-NEXT: .cfi_offset b11, -160 |
| ; CHECK64-NEXT: .cfi_offset b12, -168 |
| ; CHECK64-NEXT: .cfi_offset b13, -176 |
| ; CHECK64-NEXT: .cfi_offset b14, -184 |
| ; CHECK64-NEXT: .cfi_offset b15, -192 |
| ; CHECK64-NEXT: mov w19, w1 |
| ; CHECK64-NEXT: mov w20, w0 |
| ; CHECK64-NEXT: stp q0, q1, [sp, #64] // 32-byte Folded Spill |
| ; CHECK64-NEXT: stp q2, q3, [sp, #96] // 32-byte Folded Spill |
| ; CHECK64-NEXT: bl __arm_sme_state |
| ; CHECK64-NEXT: and x21, x0, #0x1 |
| ; CHECK64-NEXT: .cfi_offset vg, -48 |
| ; CHECK64-NEXT: tbz w21, #0, .LBB27_2 |
| ; CHECK64-NEXT: // %bb.1: |
| ; CHECK64-NEXT: smstop sm |
| ; CHECK64-NEXT: .LBB27_2: |
| ; CHECK64-NEXT: ldp q0, q1, [sp, #64] // 32-byte Folded Reload |
| ; CHECK64-NEXT: bl __lttf2 |
| ; CHECK64-NEXT: tbz w21, #0, .LBB27_4 |
| ; CHECK64-NEXT: // %bb.3: |
| ; CHECK64-NEXT: smstart sm |
| ; CHECK64-NEXT: .LBB27_4: |
| ; CHECK64-NEXT: cmp w0, #0 |
| ; CHECK64-NEXT: .cfi_restore vg |
| ; CHECK64-NEXT: cset w21, lt |
| ; CHECK64-NEXT: bl __arm_sme_state |
| ; CHECK64-NEXT: and x22, x0, #0x1 |
| ; CHECK64-NEXT: .cfi_offset vg, -48 |
| ; CHECK64-NEXT: tbz w22, #0, .LBB27_6 |
| ; CHECK64-NEXT: // %bb.5: |
| ; CHECK64-NEXT: smstop sm |
| ; CHECK64-NEXT: .LBB27_6: |
| ; CHECK64-NEXT: ldp q0, q1, [sp, #96] // 32-byte Folded Reload |
| ; CHECK64-NEXT: bl __getf2 |
| ; CHECK64-NEXT: tbz w22, #0, .LBB27_8 |
| ; CHECK64-NEXT: // %bb.7: |
| ; CHECK64-NEXT: smstart sm |
| ; CHECK64-NEXT: .LBB27_8: |
| ; CHECK64-NEXT: cmp w0, #0 |
| ; CHECK64-NEXT: cset w8, ge |
| ; CHECK64-NEXT: tst w8, w21 |
| ; CHECK64-NEXT: csel w0, w20, w19, ne |
| ; CHECK64-NEXT: .cfi_restore vg |
| ; CHECK64-NEXT: ldp x20, x19, [sp, #296] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp x22, x21, [sp, #280] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d9, d8, [sp, #176] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d11, d10, [sp, #160] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d13, d12, [sp, #144] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d15, d14, [sp, #128] // 16-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #320 |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK64-NEXT: .cfi_restore w19 |
| ; CHECK64-NEXT: .cfi_restore w20 |
| ; CHECK64-NEXT: .cfi_restore w21 |
| ; CHECK64-NEXT: .cfi_restore w22 |
| ; CHECK64-NEXT: .cfi_restore w30 |
| ; CHECK64-NEXT: .cfi_restore w29 |
| ; CHECK64-NEXT: .cfi_restore b8 |
| ; CHECK64-NEXT: .cfi_restore b9 |
| ; CHECK64-NEXT: .cfi_restore b10 |
| ; CHECK64-NEXT: .cfi_restore b11 |
| ; CHECK64-NEXT: .cfi_restore b12 |
| ; CHECK64-NEXT: .cfi_restore b13 |
| ; CHECK64-NEXT: .cfi_restore b14 |
| ; CHECK64-NEXT: .cfi_restore b15 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: f128_libcall: |
| ; CHECK1024: // %bb.0: |
| ; CHECK1024-NEXT: sub sp, sp, #1152 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 1152 |
| ; CHECK1024-NEXT: cntd x9 |
| ; CHECK1024-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x22, [sp, #1112] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x21, [sp, #1120] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x20, [sp, #1128] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x19, [sp, #1136] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: .cfi_offset w19, -16 |
| ; CHECK1024-NEXT: .cfi_offset w20, -24 |
| ; CHECK1024-NEXT: .cfi_offset w21, -32 |
| ; CHECK1024-NEXT: .cfi_offset w22, -40 |
| ; CHECK1024-NEXT: .cfi_offset w30, -56 |
| ; CHECK1024-NEXT: .cfi_offset w29, -64 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1096 |
| ; CHECK1024-NEXT: .cfi_offset b9, -1104 |
| ; CHECK1024-NEXT: .cfi_offset b10, -1112 |
| ; CHECK1024-NEXT: .cfi_offset b11, -1120 |
| ; CHECK1024-NEXT: .cfi_offset b12, -1128 |
| ; CHECK1024-NEXT: .cfi_offset b13, -1136 |
| ; CHECK1024-NEXT: .cfi_offset b14, -1144 |
| ; CHECK1024-NEXT: .cfi_offset b15, -1152 |
| ; CHECK1024-NEXT: sub sp, sp, #1088 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2240 |
| ; CHECK1024-NEXT: mov w19, w1 |
| ; CHECK1024-NEXT: mov w20, w0 |
| ; CHECK1024-NEXT: str q3, [sp, #1072] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str q2, [sp, #1056] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str q1, [sp, #1040] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str q0, [sp, #1024] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: bl __arm_sme_state |
| ; CHECK1024-NEXT: and x21, x0, #0x1 |
| ; CHECK1024-NEXT: .cfi_offset vg, -48 |
| ; CHECK1024-NEXT: tbz w21, #0, .LBB27_2 |
| ; CHECK1024-NEXT: // %bb.1: |
| ; CHECK1024-NEXT: smstop sm |
| ; CHECK1024-NEXT: .LBB27_2: |
| ; CHECK1024-NEXT: ldr q0, [sp, #1024] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr q1, [sp, #1040] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: bl __lttf2 |
| ; CHECK1024-NEXT: tbz w21, #0, .LBB27_4 |
| ; CHECK1024-NEXT: // %bb.3: |
| ; CHECK1024-NEXT: smstart sm |
| ; CHECK1024-NEXT: .LBB27_4: |
| ; CHECK1024-NEXT: cmp w0, #0 |
| ; CHECK1024-NEXT: .cfi_restore vg |
| ; CHECK1024-NEXT: cset w21, lt |
| ; CHECK1024-NEXT: bl __arm_sme_state |
| ; CHECK1024-NEXT: and x22, x0, #0x1 |
| ; CHECK1024-NEXT: .cfi_offset vg, -48 |
| ; CHECK1024-NEXT: tbz w22, #0, .LBB27_6 |
| ; CHECK1024-NEXT: // %bb.5: |
| ; CHECK1024-NEXT: smstop sm |
| ; CHECK1024-NEXT: .LBB27_6: |
| ; CHECK1024-NEXT: ldr q0, [sp, #1056] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr q1, [sp, #1072] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: bl __getf2 |
| ; CHECK1024-NEXT: tbz w22, #0, .LBB27_8 |
| ; CHECK1024-NEXT: // %bb.7: |
| ; CHECK1024-NEXT: smstart sm |
| ; CHECK1024-NEXT: .LBB27_8: |
| ; CHECK1024-NEXT: cmp w0, #0 |
| ; CHECK1024-NEXT: cset w8, ge |
| ; CHECK1024-NEXT: tst w8, w21 |
| ; CHECK1024-NEXT: csel w0, w20, w19, ne |
| ; CHECK1024-NEXT: .cfi_restore vg |
| ; CHECK1024-NEXT: add sp, sp, #1088 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 1152 |
| ; CHECK1024-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x19, [sp, #1136] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x20, [sp, #1128] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x21, [sp, #1120] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x22, [sp, #1112] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1152 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK1024-NEXT: .cfi_restore w19 |
| ; CHECK1024-NEXT: .cfi_restore w20 |
| ; CHECK1024-NEXT: .cfi_restore w21 |
| ; CHECK1024-NEXT: .cfi_restore w22 |
| ; CHECK1024-NEXT: .cfi_restore w30 |
| ; CHECK1024-NEXT: .cfi_restore w29 |
| ; CHECK1024-NEXT: .cfi_restore b8 |
| ; CHECK1024-NEXT: .cfi_restore b9 |
| ; CHECK1024-NEXT: .cfi_restore b10 |
| ; CHECK1024-NEXT: .cfi_restore b11 |
| ; CHECK1024-NEXT: .cfi_restore b12 |
| ; CHECK1024-NEXT: .cfi_restore b13 |
| ; CHECK1024-NEXT: .cfi_restore b14 |
| ; CHECK1024-NEXT: .cfi_restore b15 |
| ; CHECK1024-NEXT: ret |
| %c0 = fcmp olt fp128 %v0, %v1 |
| %c1 = fcmp oge fp128 %v2, %v3 |
| %cr = and i1 %c1, %c0 |
| %sel = select i1 %cr, i32 %a, i32 %b |
| ret i32 %sel |
| } |
| |
| define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: svecc_call: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 48 |
| ; CHECK0-NEXT: cntd x9 |
| ; CHECK0-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x27, x19, [sp, #32] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_offset w19, -8 |
| ; CHECK0-NEXT: .cfi_offset w27, -16 |
| ; CHECK0-NEXT: .cfi_offset w28, -24 |
| ; CHECK0-NEXT: .cfi_offset w30, -40 |
| ; CHECK0-NEXT: .cfi_offset w29, -48 |
| ; CHECK0-NEXT: addvl sp, sp, #-18 |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 144 * VG |
| ; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG |
| ; CHECK0-NEXT: mov x8, x0 |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: bl __arm_sme_state |
| ; CHECK0-NEXT: and x19, x0, #0x1 |
| ; CHECK0-NEXT: .cfi_offset vg, -32 |
| ; CHECK0-NEXT: tbz w19, #0, .LBB28_2 |
| ; CHECK0-NEXT: // %bb.1: // %entry |
| ; CHECK0-NEXT: smstop sm |
| ; CHECK0-NEXT: .LBB28_2: // %entry |
| ; CHECK0-NEXT: mov x0, x8 |
| ; CHECK0-NEXT: mov w1, #45 // =0x2d |
| ; CHECK0-NEXT: mov w2, #37 // =0x25 |
| ; CHECK0-NEXT: bl memset |
| ; CHECK0-NEXT: tbz w19, #0, .LBB28_4 |
| ; CHECK0-NEXT: // %bb.3: // %entry |
| ; CHECK0-NEXT: smstart sm |
| ; CHECK0-NEXT: .LBB28_4: // %entry |
| ; CHECK0-NEXT: mov w0, #22647 // =0x5877 |
| ; CHECK0-NEXT: movk w0, #59491, lsl #16 |
| ; CHECK0-NEXT: .cfi_restore vg |
| ; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: addvl sp, sp, #18 |
| ; CHECK0-NEXT: .cfi_def_cfa wsp, 48 |
| ; CHECK0-NEXT: .cfi_restore z8 |
| ; CHECK0-NEXT: .cfi_restore z9 |
| ; CHECK0-NEXT: .cfi_restore z10 |
| ; CHECK0-NEXT: .cfi_restore z11 |
| ; CHECK0-NEXT: .cfi_restore z12 |
| ; CHECK0-NEXT: .cfi_restore z13 |
| ; CHECK0-NEXT: .cfi_restore z14 |
| ; CHECK0-NEXT: .cfi_restore z15 |
| ; CHECK0-NEXT: ldp x27, x19, [sp, #32] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload |
| ; CHECK0-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK0-NEXT: .cfi_restore w19 |
| ; CHECK0-NEXT: .cfi_restore w27 |
| ; CHECK0-NEXT: .cfi_restore w28 |
| ; CHECK0-NEXT: .cfi_restore w30 |
| ; CHECK0-NEXT: .cfi_restore w29 |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: svecc_call: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #112 |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK64-NEXT: cntd x9 |
| ; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x27, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_offset w19, -8 |
| ; CHECK64-NEXT: .cfi_offset w27, -16 |
| ; CHECK64-NEXT: .cfi_offset w28, -24 |
| ; CHECK64-NEXT: .cfi_offset w30, -40 |
| ; CHECK64-NEXT: .cfi_offset w29, -48 |
| ; CHECK64-NEXT: addvl sp, sp, #-18 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG |
| ; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 112 - 8 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 112 - 16 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 112 - 24 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 112 - 32 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 112 - 40 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 112 - 48 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 112 - 56 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 112 - 64 * VG |
| ; CHECK64-NEXT: sub sp, sp, #64 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x01, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 176 + 144 * VG |
| ; CHECK64-NEXT: mov x8, x0 |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: bl __arm_sme_state |
| ; CHECK64-NEXT: and x19, x0, #0x1 |
| ; CHECK64-NEXT: .cfi_offset vg, -32 |
| ; CHECK64-NEXT: tbz w19, #0, .LBB28_2 |
| ; CHECK64-NEXT: // %bb.1: // %entry |
| ; CHECK64-NEXT: smstop sm |
| ; CHECK64-NEXT: .LBB28_2: // %entry |
| ; CHECK64-NEXT: mov x0, x8 |
| ; CHECK64-NEXT: mov w1, #45 // =0x2d |
| ; CHECK64-NEXT: mov w2, #37 // =0x25 |
| ; CHECK64-NEXT: bl memset |
| ; CHECK64-NEXT: tbz w19, #0, .LBB28_4 |
| ; CHECK64-NEXT: // %bb.3: // %entry |
| ; CHECK64-NEXT: smstart sm |
| ; CHECK64-NEXT: .LBB28_4: // %entry |
| ; CHECK64-NEXT: mov w0, #22647 // =0x5877 |
| ; CHECK64-NEXT: movk w0, #59491, lsl #16 |
| ; CHECK64-NEXT: .cfi_restore vg |
| ; CHECK64-NEXT: add sp, sp, #64 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG |
| ; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: addvl sp, sp, #18 |
| ; CHECK64-NEXT: .cfi_def_cfa wsp, 112 |
| ; CHECK64-NEXT: .cfi_restore z8 |
| ; CHECK64-NEXT: .cfi_restore z9 |
| ; CHECK64-NEXT: .cfi_restore z10 |
| ; CHECK64-NEXT: .cfi_restore z11 |
| ; CHECK64-NEXT: .cfi_restore z12 |
| ; CHECK64-NEXT: .cfi_restore z13 |
| ; CHECK64-NEXT: .cfi_restore z14 |
| ; CHECK64-NEXT: .cfi_restore z15 |
| ; CHECK64-NEXT: ldp x27, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload |
| ; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #112 |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK64-NEXT: .cfi_restore w19 |
| ; CHECK64-NEXT: .cfi_restore w27 |
| ; CHECK64-NEXT: .cfi_restore w28 |
| ; CHECK64-NEXT: .cfi_restore w30 |
| ; CHECK64-NEXT: .cfi_restore w29 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: svecc_call: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1072 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 1072 |
| ; CHECK1024-NEXT: cntd x9 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x19, [sp, #1064] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: .cfi_offset w19, -8 |
| ; CHECK1024-NEXT: .cfi_offset w27, -16 |
| ; CHECK1024-NEXT: .cfi_offset w28, -24 |
| ; CHECK1024-NEXT: .cfi_offset w30, -40 |
| ; CHECK1024-NEXT: .cfi_offset w29, -48 |
| ; CHECK1024-NEXT: addvl sp, sp, #-18 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG |
| ; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1072 - 8 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1072 - 16 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1072 - 24 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1072 - 32 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1072 - 40 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1072 - 48 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1072 - 56 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1072 - 64 * VG |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2096 + 144 * VG |
| ; CHECK1024-NEXT: mov x8, x0 |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: bl __arm_sme_state |
| ; CHECK1024-NEXT: and x19, x0, #0x1 |
| ; CHECK1024-NEXT: .cfi_offset vg, -32 |
| ; CHECK1024-NEXT: tbz w19, #0, .LBB28_2 |
| ; CHECK1024-NEXT: // %bb.1: // %entry |
| ; CHECK1024-NEXT: smstop sm |
| ; CHECK1024-NEXT: .LBB28_2: // %entry |
| ; CHECK1024-NEXT: mov x0, x8 |
| ; CHECK1024-NEXT: mov w1, #45 // =0x2d |
| ; CHECK1024-NEXT: mov w2, #37 // =0x25 |
| ; CHECK1024-NEXT: bl memset |
| ; CHECK1024-NEXT: tbz w19, #0, .LBB28_4 |
| ; CHECK1024-NEXT: // %bb.3: // %entry |
| ; CHECK1024-NEXT: smstart sm |
| ; CHECK1024-NEXT: .LBB28_4: // %entry |
| ; CHECK1024-NEXT: mov w0, #22647 // =0x5877 |
| ; CHECK1024-NEXT: movk w0, #59491, lsl #16 |
| ; CHECK1024-NEXT: .cfi_restore vg |
| ; CHECK1024-NEXT: add sp, sp, #1024 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG |
| ; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: addvl sp, sp, #18 |
| ; CHECK1024-NEXT: .cfi_def_cfa wsp, 1072 |
| ; CHECK1024-NEXT: .cfi_restore z8 |
| ; CHECK1024-NEXT: .cfi_restore z9 |
| ; CHECK1024-NEXT: .cfi_restore z10 |
| ; CHECK1024-NEXT: .cfi_restore z11 |
| ; CHECK1024-NEXT: .cfi_restore z12 |
| ; CHECK1024-NEXT: .cfi_restore z13 |
| ; CHECK1024-NEXT: .cfi_restore z14 |
| ; CHECK1024-NEXT: .cfi_restore z15 |
| ; CHECK1024-NEXT: ldr x19, [sp, #1064] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1072 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK1024-NEXT: .cfi_restore w19 |
| ; CHECK1024-NEXT: .cfi_restore w27 |
| ; CHECK1024-NEXT: .cfi_restore w28 |
| ; CHECK1024-NEXT: .cfi_restore w30 |
| ; CHECK1024-NEXT: .cfi_restore w29 |
| ; CHECK1024-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 |
| %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37) |
| ret i32 -396142473 |
| } |
| |
| define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: svecc_alloca_call: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 48 |
| ; CHECK0-NEXT: cntd x9 |
| ; CHECK0-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x27, x19, [sp, #32] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_offset w19, -8 |
| ; CHECK0-NEXT: .cfi_offset w27, -16 |
| ; CHECK0-NEXT: .cfi_offset w28, -24 |
| ; CHECK0-NEXT: .cfi_offset w30, -40 |
| ; CHECK0-NEXT: .cfi_offset w29, -48 |
| ; CHECK0-NEXT: addvl sp, sp, #-18 |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 144 * VG |
| ; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG |
| ; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG |
| ; CHECK0-NEXT: sub sp, sp, #48 |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 96 + 144 * VG |
| ; CHECK0-NEXT: //APP |
| ; CHECK0-NEXT: //NO_APP |
| ; CHECK0-NEXT: bl __arm_sme_state |
| ; CHECK0-NEXT: and x19, x0, #0x1 |
| ; CHECK0-NEXT: .cfi_offset vg, -32 |
| ; CHECK0-NEXT: tbz w19, #0, .LBB29_2 |
| ; CHECK0-NEXT: // %bb.1: // %entry |
| ; CHECK0-NEXT: smstop sm |
| ; CHECK0-NEXT: .LBB29_2: // %entry |
| ; CHECK0-NEXT: mov x0, sp |
| ; CHECK0-NEXT: mov w1, #45 // =0x2d |
| ; CHECK0-NEXT: mov w2, #37 // =0x25 |
| ; CHECK0-NEXT: bl memset |
| ; CHECK0-NEXT: tbz w19, #0, .LBB29_4 |
| ; CHECK0-NEXT: // %bb.3: // %entry |
| ; CHECK0-NEXT: smstart sm |
| ; CHECK0-NEXT: .LBB29_4: // %entry |
| ; CHECK0-NEXT: mov w0, #22647 // =0x5877 |
| ; CHECK0-NEXT: movk w0, #59491, lsl #16 |
| ; CHECK0-NEXT: .cfi_restore vg |
| ; CHECK0-NEXT: add sp, sp, #48 |
| ; CHECK0-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 144 * VG |
| ; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK0-NEXT: addvl sp, sp, #18 |
| ; CHECK0-NEXT: .cfi_def_cfa wsp, 48 |
| ; CHECK0-NEXT: .cfi_restore z8 |
| ; CHECK0-NEXT: .cfi_restore z9 |
| ; CHECK0-NEXT: .cfi_restore z10 |
| ; CHECK0-NEXT: .cfi_restore z11 |
| ; CHECK0-NEXT: .cfi_restore z12 |
| ; CHECK0-NEXT: .cfi_restore z13 |
| ; CHECK0-NEXT: .cfi_restore z14 |
| ; CHECK0-NEXT: .cfi_restore z15 |
| ; CHECK0-NEXT: ldp x27, x19, [sp, #32] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload |
| ; CHECK0-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK0-NEXT: .cfi_restore w19 |
| ; CHECK0-NEXT: .cfi_restore w27 |
| ; CHECK0-NEXT: .cfi_restore w28 |
| ; CHECK0-NEXT: .cfi_restore w30 |
| ; CHECK0-NEXT: .cfi_restore w29 |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: svecc_alloca_call: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #112 |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK64-NEXT: cntd x9 |
| ; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x27, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_offset w19, -8 |
| ; CHECK64-NEXT: .cfi_offset w27, -16 |
| ; CHECK64-NEXT: .cfi_offset w28, -24 |
| ; CHECK64-NEXT: .cfi_offset w30, -40 |
| ; CHECK64-NEXT: .cfi_offset w29, -48 |
| ; CHECK64-NEXT: addvl sp, sp, #-18 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG |
| ; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 112 - 8 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 112 - 16 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 112 - 24 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 112 - 32 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 112 - 40 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 112 - 48 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 112 - 56 * VG |
| ; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x90, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 112 - 64 * VG |
| ; CHECK64-NEXT: sub sp, sp, #112 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x01, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 224 + 144 * VG |
| ; CHECK64-NEXT: //APP |
| ; CHECK64-NEXT: //NO_APP |
| ; CHECK64-NEXT: bl __arm_sme_state |
| ; CHECK64-NEXT: and x19, x0, #0x1 |
| ; CHECK64-NEXT: .cfi_offset vg, -32 |
| ; CHECK64-NEXT: tbz w19, #0, .LBB29_2 |
| ; CHECK64-NEXT: // %bb.1: // %entry |
| ; CHECK64-NEXT: smstop sm |
| ; CHECK64-NEXT: .LBB29_2: // %entry |
| ; CHECK64-NEXT: mov x0, sp |
| ; CHECK64-NEXT: mov w1, #45 // =0x2d |
| ; CHECK64-NEXT: mov w2, #37 // =0x25 |
| ; CHECK64-NEXT: bl memset |
| ; CHECK64-NEXT: tbz w19, #0, .LBB29_4 |
| ; CHECK64-NEXT: // %bb.3: // %entry |
| ; CHECK64-NEXT: smstart sm |
| ; CHECK64-NEXT: .LBB29_4: // %entry |
| ; CHECK64-NEXT: mov w0, #22647 // =0x5877 |
| ; CHECK64-NEXT: movk w0, #59491, lsl #16 |
| ; CHECK64-NEXT: .cfi_restore vg |
| ; CHECK64-NEXT: add sp, sp, #112 |
| ; CHECK64-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xf0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 112 + 144 * VG |
| ; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK64-NEXT: addvl sp, sp, #18 |
| ; CHECK64-NEXT: .cfi_def_cfa wsp, 112 |
| ; CHECK64-NEXT: .cfi_restore z8 |
| ; CHECK64-NEXT: .cfi_restore z9 |
| ; CHECK64-NEXT: .cfi_restore z10 |
| ; CHECK64-NEXT: .cfi_restore z11 |
| ; CHECK64-NEXT: .cfi_restore z12 |
| ; CHECK64-NEXT: .cfi_restore z13 |
| ; CHECK64-NEXT: .cfi_restore z14 |
| ; CHECK64-NEXT: .cfi_restore z15 |
| ; CHECK64-NEXT: ldp x27, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload |
| ; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #112 |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK64-NEXT: .cfi_restore w19 |
| ; CHECK64-NEXT: .cfi_restore w27 |
| ; CHECK64-NEXT: .cfi_restore w28 |
| ; CHECK64-NEXT: .cfi_restore w30 |
| ; CHECK64-NEXT: .cfi_restore w29 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: svecc_alloca_call: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1072 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 1072 |
| ; CHECK1024-NEXT: cntd x9 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x19, [sp, #1064] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: .cfi_offset w19, -8 |
| ; CHECK1024-NEXT: .cfi_offset w27, -16 |
| ; CHECK1024-NEXT: .cfi_offset w28, -24 |
| ; CHECK1024-NEXT: .cfi_offset w30, -40 |
| ; CHECK1024-NEXT: .cfi_offset w29, -48 |
| ; CHECK1024-NEXT: addvl sp, sp, #-18 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG |
| ; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1072 - 8 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1072 - 16 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1072 - 24 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1072 - 32 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1072 - 40 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1072 - 48 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1072 - 56 * VG |
| ; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xd0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1072 - 64 * VG |
| ; CHECK1024-NEXT: sub sp, sp, #1072 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xe0, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 2144 + 144 * VG |
| ; CHECK1024-NEXT: //APP |
| ; CHECK1024-NEXT: //NO_APP |
| ; CHECK1024-NEXT: bl __arm_sme_state |
| ; CHECK1024-NEXT: and x19, x0, #0x1 |
| ; CHECK1024-NEXT: .cfi_offset vg, -32 |
| ; CHECK1024-NEXT: tbz w19, #0, .LBB29_2 |
| ; CHECK1024-NEXT: // %bb.1: // %entry |
| ; CHECK1024-NEXT: smstop sm |
| ; CHECK1024-NEXT: .LBB29_2: // %entry |
| ; CHECK1024-NEXT: mov x0, sp |
| ; CHECK1024-NEXT: mov w1, #45 // =0x2d |
| ; CHECK1024-NEXT: mov w2, #37 // =0x25 |
| ; CHECK1024-NEXT: bl memset |
| ; CHECK1024-NEXT: tbz w19, #0, .LBB29_4 |
| ; CHECK1024-NEXT: // %bb.3: // %entry |
| ; CHECK1024-NEXT: smstart sm |
| ; CHECK1024-NEXT: .LBB29_4: // %entry |
| ; CHECK1024-NEXT: mov w0, #22647 // =0x5877 |
| ; CHECK1024-NEXT: movk w0, #59491, lsl #16 |
| ; CHECK1024-NEXT: .cfi_restore vg |
| ; CHECK1024-NEXT: add sp, sp, #1072 |
| ; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xb0, 0x08, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 1072 + 144 * VG |
| ; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK1024-NEXT: addvl sp, sp, #18 |
| ; CHECK1024-NEXT: .cfi_def_cfa wsp, 1072 |
| ; CHECK1024-NEXT: .cfi_restore z8 |
| ; CHECK1024-NEXT: .cfi_restore z9 |
| ; CHECK1024-NEXT: .cfi_restore z10 |
| ; CHECK1024-NEXT: .cfi_restore z11 |
| ; CHECK1024-NEXT: .cfi_restore z12 |
| ; CHECK1024-NEXT: .cfi_restore z13 |
| ; CHECK1024-NEXT: .cfi_restore z14 |
| ; CHECK1024-NEXT: .cfi_restore z15 |
| ; CHECK1024-NEXT: ldr x19, [sp, #1064] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1072 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK1024-NEXT: .cfi_restore w19 |
| ; CHECK1024-NEXT: .cfi_restore w27 |
| ; CHECK1024-NEXT: .cfi_restore w28 |
| ; CHECK1024-NEXT: .cfi_restore w30 |
| ; CHECK1024-NEXT: .cfi_restore w29 |
| ; CHECK1024-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 |
| %0 = alloca [37 x i8], align 16 |
| %call = call ptr @memset(ptr noundef nonnull %0, i32 noundef 45, i32 noundef 37) |
| ret i32 -396142473 |
| } |
| declare ptr @memset(ptr, i32, i32) |
| |
| define void @call_with_doubles() "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: call_with_doubles: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK0-NEXT: str x30, [sp, #8] // 8-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK0-NEXT: .cfi_offset w30, -8 |
| ; CHECK0-NEXT: .cfi_offset b8, -16 |
| ; CHECK0-NEXT: mov x8, #9221120237041090560 // =0x7ff8000000000000 |
| ; CHECK0-NEXT: fmov d8, x8 |
| ; CHECK0-NEXT: fmov d0, d8 |
| ; CHECK0-NEXT: bl calld |
| ; CHECK0-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload |
| ; CHECK0-NEXT: fmov d0, d8 |
| ; CHECK0-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload |
| ; CHECK0-NEXT: b calld |
| ; |
| ; CHECK64-LABEL: call_with_doubles: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #144 |
| ; CHECK64-NEXT: str d8, [sp, #64] // 8-byte Folded Spill |
| ; CHECK64-NEXT: str x30, [sp, #136] // 8-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 144 |
| ; CHECK64-NEXT: .cfi_offset w30, -8 |
| ; CHECK64-NEXT: .cfi_offset b8, -80 |
| ; CHECK64-NEXT: mov x8, #9221120237041090560 // =0x7ff8000000000000 |
| ; CHECK64-NEXT: fmov d8, x8 |
| ; CHECK64-NEXT: fmov d0, d8 |
| ; CHECK64-NEXT: bl calld |
| ; CHECK64-NEXT: fmov d0, d8 |
| ; CHECK64-NEXT: ldr x30, [sp, #136] // 8-byte Folded Reload |
| ; CHECK64-NEXT: ldr d8, [sp, #64] // 8-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #144 |
| ; CHECK64-NEXT: b calld |
| ; |
| ; CHECK1024-LABEL: call_with_doubles: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1056 |
| ; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x30, [sp, #1040] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2080 |
| ; CHECK1024-NEXT: .cfi_offset w30, -16 |
| ; CHECK1024-NEXT: .cfi_offset w29, -24 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1056 |
| ; CHECK1024-NEXT: mov x8, #9221120237041090560 // =0x7ff8000000000000 |
| ; CHECK1024-NEXT: fmov d8, x8 |
| ; CHECK1024-NEXT: fmov d0, d8 |
| ; CHECK1024-NEXT: bl calld |
| ; CHECK1024-NEXT: fmov d0, d8 |
| ; CHECK1024-NEXT: add sp, sp, #1024 |
| ; CHECK1024-NEXT: ldr x30, [sp, #1040] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr d8, [sp] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1056 |
| ; CHECK1024-NEXT: b calld |
| entry: |
| %call = tail call i32 @calld(double 0x7FF8000000000000) |
| %call.1 = tail call i32 @calld(double 0x7FF8000000000000) |
| ret void |
| } |
| declare i32 @calld(double) "aarch64_pstate_sm_compatible" |
| |
| ; Check that stack objects are ordererd fpr > hazard > gpr |
| define void @ordering_test(double %d, half %h, <4 x i32> %v) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: ordering_test: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: sub sp, sp, #48 |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 48 |
| ; CHECK0-NEXT: str wzr, [sp, #32] |
| ; CHECK0-NEXT: str d0, [sp, #24] |
| ; CHECK0-NEXT: str wzr, [sp, #44] |
| ; CHECK0-NEXT: str h1, [sp, #22] |
| ; CHECK0-NEXT: str wzr, [sp, #16] |
| ; CHECK0-NEXT: str q2, [sp], #48 |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: ordering_test: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #128 |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 128 |
| ; CHECK64-NEXT: stp wzr, wzr, [sp, #12] |
| ; CHECK64-NEXT: str d0, [sp, #120] |
| ; CHECK64-NEXT: str wzr, [sp, #28] |
| ; CHECK64-NEXT: str h1, [sp, #118] |
| ; CHECK64-NEXT: str q2, [sp, #96] |
| ; CHECK64-NEXT: add sp, sp, #128 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: ordering_test: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1088 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2128 |
| ; CHECK1024-NEXT: .cfi_offset w29, -16 |
| ; CHECK1024-NEXT: stp wzr, wzr, [sp, #12] |
| ; CHECK1024-NEXT: str d0, [sp, #1080] |
| ; CHECK1024-NEXT: str wzr, [sp, #28] |
| ; CHECK1024-NEXT: str h1, [sp, #1078] |
| ; CHECK1024-NEXT: str q2, [sp, #1056] |
| ; CHECK1024-NEXT: add sp, sp, #1088 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %i32 = alloca i32 |
| %i64 = alloca i64 |
| %f64 = alloca double |
| %f16 = alloca half |
| %i32b = alloca i32 |
| %v4i32 = alloca <4 x i32> |
| store i32 0, ptr %i64 |
| store double %d, ptr %f64 |
| store i32 0, ptr %i32 |
| store half %h, ptr %f16 |
| store i32 0, ptr %i32b |
| store <4 x i32> %v, ptr %v4i32 |
| ret void |
| } |
| |
| |
| define void @ordering_test_array(i64 %o, i64 %p, float %f, i32 %x) "aarch64_pstate_sm_compatible" { |
| ; CHECK0-LABEL: ordering_test_array: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: sub sp, sp, #272 |
| ; CHECK0-NEXT: str x29, [sp, #256] // 8-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 272 |
| ; CHECK0-NEXT: .cfi_offset w29, -16 |
| ; CHECK0-NEXT: add x8, sp, #128 |
| ; CHECK0-NEXT: str w2, [x8, x0, lsl #2] |
| ; CHECK0-NEXT: mov x8, sp |
| ; CHECK0-NEXT: str s0, [x8, x1, lsl #2] |
| ; CHECK0-NEXT: add sp, sp, #272 |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: ordering_test_array: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #400 |
| ; CHECK64-NEXT: str x29, [sp, #384] // 8-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 400 |
| ; CHECK64-NEXT: .cfi_offset w29, -16 |
| ; CHECK64-NEXT: mov x8, sp |
| ; CHECK64-NEXT: str w2, [x8, x0, lsl #2] |
| ; CHECK64-NEXT: add x8, sp, #192 |
| ; CHECK64-NEXT: str s0, [x8, x1, lsl #2] |
| ; CHECK64-NEXT: add sp, sp, #400 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: ordering_test_array: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1280 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 2320 |
| ; CHECK1024-NEXT: .cfi_offset w29, -16 |
| ; CHECK1024-NEXT: mov x8, sp |
| ; CHECK1024-NEXT: str w2, [x8, x0, lsl #2] |
| ; CHECK1024-NEXT: add x8, sp, #1152 |
| ; CHECK1024-NEXT: str s0, [x8, x1, lsl #2] |
| ; CHECK1024-NEXT: add sp, sp, #1280 |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1040 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %i32 = alloca [32 x i32] |
| %f32 = alloca [32 x float] |
| %g = getelementptr i32, ptr %i32, i64 %o |
| store i32 %x, ptr %g |
| %h = getelementptr float, ptr %f32, i64 %p |
| store float %f, ptr %h |
| ret void |
| } |
| |
| ; The VA register currently ends up in VLA space. Lets hope that doesn't come up very often. |
| define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "target-features"="+sme" { |
| ; CHECK0-LABEL: vastate: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK0-NEXT: cntd x9 |
| ; CHECK0-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK0-NEXT: str x9, [sp, #80] // 8-byte Folded Spill |
| ; CHECK0-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK0-NEXT: add x29, sp, #64 |
| ; CHECK0-NEXT: .cfi_def_cfa w29, 48 |
| ; CHECK0-NEXT: .cfi_offset w19, -8 |
| ; CHECK0-NEXT: .cfi_offset w20, -16 |
| ; CHECK0-NEXT: .cfi_offset w30, -40 |
| ; CHECK0-NEXT: .cfi_offset w29, -48 |
| ; CHECK0-NEXT: .cfi_offset b8, -56 |
| ; CHECK0-NEXT: .cfi_offset b9, -64 |
| ; CHECK0-NEXT: .cfi_offset b10, -72 |
| ; CHECK0-NEXT: .cfi_offset b11, -80 |
| ; CHECK0-NEXT: .cfi_offset b12, -88 |
| ; CHECK0-NEXT: .cfi_offset b13, -96 |
| ; CHECK0-NEXT: .cfi_offset b14, -104 |
| ; CHECK0-NEXT: .cfi_offset b15, -112 |
| ; CHECK0-NEXT: sub sp, sp, #16 |
| ; CHECK0-NEXT: rdsvl x8, #1 |
| ; CHECK0-NEXT: mov x9, sp |
| ; CHECK0-NEXT: mov w20, w0 |
| ; CHECK0-NEXT: msub x9, x8, x8, x9 |
| ; CHECK0-NEXT: mov sp, x9 |
| ; CHECK0-NEXT: stur x9, [x29, #-80] |
| ; CHECK0-NEXT: sub x9, x29, #80 |
| ; CHECK0-NEXT: sturh wzr, [x29, #-70] |
| ; CHECK0-NEXT: stur wzr, [x29, #-68] |
| ; CHECK0-NEXT: sturh w8, [x29, #-72] |
| ; CHECK0-NEXT: msr TPIDR2_EL0, x9 |
| ; CHECK0-NEXT: .cfi_offset vg, -32 |
| ; CHECK0-NEXT: smstop sm |
| ; CHECK0-NEXT: bl other |
| ; CHECK0-NEXT: smstart sm |
| ; CHECK0-NEXT: .cfi_restore vg |
| ; CHECK0-NEXT: smstart za |
| ; CHECK0-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK0-NEXT: sub x0, x29, #80 |
| ; CHECK0-NEXT: cbnz x8, .LBB33_2 |
| ; CHECK0-NEXT: // %bb.1: // %entry |
| ; CHECK0-NEXT: bl __arm_tpidr2_restore |
| ; CHECK0-NEXT: .LBB33_2: // %entry |
| ; CHECK0-NEXT: mov w0, w20 |
| ; CHECK0-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK0-NEXT: sub sp, x29, #64 |
| ; CHECK0-NEXT: .cfi_def_cfa wsp, 112 |
| ; CHECK0-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload |
| ; CHECK0-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK0-NEXT: .cfi_restore w19 |
| ; CHECK0-NEXT: .cfi_restore w20 |
| ; CHECK0-NEXT: .cfi_restore w30 |
| ; CHECK0-NEXT: .cfi_restore w29 |
| ; CHECK0-NEXT: .cfi_restore b8 |
| ; CHECK0-NEXT: .cfi_restore b9 |
| ; CHECK0-NEXT: .cfi_restore b10 |
| ; CHECK0-NEXT: .cfi_restore b11 |
| ; CHECK0-NEXT: .cfi_restore b12 |
| ; CHECK0-NEXT: .cfi_restore b13 |
| ; CHECK0-NEXT: .cfi_restore b14 |
| ; CHECK0-NEXT: .cfi_restore b15 |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: vastate: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: stp d15, d14, [sp, #-176]! // 16-byte Folded Spill |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 176 |
| ; CHECK64-NEXT: cntd x9 |
| ; CHECK64-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x29, x30, [sp, #128] // 16-byte Folded Spill |
| ; CHECK64-NEXT: stp x9, x20, [sp, #144] // 16-byte Folded Spill |
| ; CHECK64-NEXT: str x19, [sp, #160] // 8-byte Folded Spill |
| ; CHECK64-NEXT: add x29, sp, #128 |
| ; CHECK64-NEXT: .cfi_def_cfa w29, 48 |
| ; CHECK64-NEXT: .cfi_offset w19, -16 |
| ; CHECK64-NEXT: .cfi_offset w20, -24 |
| ; CHECK64-NEXT: .cfi_offset w30, -40 |
| ; CHECK64-NEXT: .cfi_offset w29, -48 |
| ; CHECK64-NEXT: .cfi_offset b8, -120 |
| ; CHECK64-NEXT: .cfi_offset b9, -128 |
| ; CHECK64-NEXT: .cfi_offset b10, -136 |
| ; CHECK64-NEXT: .cfi_offset b11, -144 |
| ; CHECK64-NEXT: .cfi_offset b12, -152 |
| ; CHECK64-NEXT: .cfi_offset b13, -160 |
| ; CHECK64-NEXT: .cfi_offset b14, -168 |
| ; CHECK64-NEXT: .cfi_offset b15, -176 |
| ; CHECK64-NEXT: sub sp, sp, #80 |
| ; CHECK64-NEXT: rdsvl x8, #1 |
| ; CHECK64-NEXT: mov x9, sp |
| ; CHECK64-NEXT: mov w20, w0 |
| ; CHECK64-NEXT: msub x9, x8, x8, x9 |
| ; CHECK64-NEXT: mov sp, x9 |
| ; CHECK64-NEXT: stur x9, [x29, #-208] |
| ; CHECK64-NEXT: sub x9, x29, #208 |
| ; CHECK64-NEXT: sturh wzr, [x29, #-198] |
| ; CHECK64-NEXT: stur wzr, [x29, #-196] |
| ; CHECK64-NEXT: sturh w8, [x29, #-200] |
| ; CHECK64-NEXT: msr TPIDR2_EL0, x9 |
| ; CHECK64-NEXT: .cfi_offset vg, -32 |
| ; CHECK64-NEXT: smstop sm |
| ; CHECK64-NEXT: bl other |
| ; CHECK64-NEXT: smstart sm |
| ; CHECK64-NEXT: .cfi_restore vg |
| ; CHECK64-NEXT: smstart za |
| ; CHECK64-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK64-NEXT: sub x0, x29, #208 |
| ; CHECK64-NEXT: cbnz x8, .LBB33_2 |
| ; CHECK64-NEXT: // %bb.1: // %entry |
| ; CHECK64-NEXT: bl __arm_tpidr2_restore |
| ; CHECK64-NEXT: .LBB33_2: // %entry |
| ; CHECK64-NEXT: mov w0, w20 |
| ; CHECK64-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK64-NEXT: sub sp, x29, #128 |
| ; CHECK64-NEXT: .cfi_def_cfa wsp, 176 |
| ; CHECK64-NEXT: ldp x20, x19, [sp, #152] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr d14, [sp, #8] // 8-byte Folded Reload |
| ; CHECK64-NEXT: ldp x29, x30, [sp, #128] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldr d15, [sp], #176 // 8-byte Folded Reload |
| ; CHECK64-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK64-NEXT: .cfi_restore w19 |
| ; CHECK64-NEXT: .cfi_restore w20 |
| ; CHECK64-NEXT: .cfi_restore w30 |
| ; CHECK64-NEXT: .cfi_restore w29 |
| ; CHECK64-NEXT: .cfi_restore b8 |
| ; CHECK64-NEXT: .cfi_restore b9 |
| ; CHECK64-NEXT: .cfi_restore b10 |
| ; CHECK64-NEXT: .cfi_restore b11 |
| ; CHECK64-NEXT: .cfi_restore b12 |
| ; CHECK64-NEXT: .cfi_restore b13 |
| ; CHECK64-NEXT: .cfi_restore b14 |
| ; CHECK64-NEXT: .cfi_restore b15 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: vastate: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1136 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 1136 |
| ; CHECK1024-NEXT: cntd x9 |
| ; CHECK1024-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK1024-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x20, [sp, #1120] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x19, [sp, #1128] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: add x29, sp, #1088 |
| ; CHECK1024-NEXT: .cfi_def_cfa w29, 48 |
| ; CHECK1024-NEXT: .cfi_offset w19, -8 |
| ; CHECK1024-NEXT: .cfi_offset w20, -16 |
| ; CHECK1024-NEXT: .cfi_offset w28, -24 |
| ; CHECK1024-NEXT: .cfi_offset w30, -40 |
| ; CHECK1024-NEXT: .cfi_offset w29, -48 |
| ; CHECK1024-NEXT: .cfi_offset b8, -1080 |
| ; CHECK1024-NEXT: .cfi_offset b9, -1088 |
| ; CHECK1024-NEXT: .cfi_offset b10, -1096 |
| ; CHECK1024-NEXT: .cfi_offset b11, -1104 |
| ; CHECK1024-NEXT: .cfi_offset b12, -1112 |
| ; CHECK1024-NEXT: .cfi_offset b13, -1120 |
| ; CHECK1024-NEXT: .cfi_offset b14, -1128 |
| ; CHECK1024-NEXT: .cfi_offset b15, -1136 |
| ; CHECK1024-NEXT: sub sp, sp, #1040 |
| ; CHECK1024-NEXT: rdsvl x8, #1 |
| ; CHECK1024-NEXT: mov x9, sp |
| ; CHECK1024-NEXT: mov w20, w0 |
| ; CHECK1024-NEXT: msub x9, x8, x8, x9 |
| ; CHECK1024-NEXT: mov sp, x9 |
| ; CHECK1024-NEXT: sub x10, x29, #1872 |
| ; CHECK1024-NEXT: stur x9, [x10, #-256] |
| ; CHECK1024-NEXT: sub x9, x29, #1862 |
| ; CHECK1024-NEXT: sub x10, x29, #1860 |
| ; CHECK1024-NEXT: sturh wzr, [x9, #-256] |
| ; CHECK1024-NEXT: sub x9, x29, #2128 |
| ; CHECK1024-NEXT: stur wzr, [x10, #-256] |
| ; CHECK1024-NEXT: sub x10, x29, #1864 |
| ; CHECK1024-NEXT: sturh w8, [x10, #-256] |
| ; CHECK1024-NEXT: msr TPIDR2_EL0, x9 |
| ; CHECK1024-NEXT: .cfi_offset vg, -32 |
| ; CHECK1024-NEXT: smstop sm |
| ; CHECK1024-NEXT: bl other |
| ; CHECK1024-NEXT: smstart sm |
| ; CHECK1024-NEXT: .cfi_restore vg |
| ; CHECK1024-NEXT: smstart za |
| ; CHECK1024-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK1024-NEXT: sub x0, x29, #2128 |
| ; CHECK1024-NEXT: cbnz x8, .LBB33_2 |
| ; CHECK1024-NEXT: // %bb.1: // %entry |
| ; CHECK1024-NEXT: bl __arm_tpidr2_restore |
| ; CHECK1024-NEXT: .LBB33_2: // %entry |
| ; CHECK1024-NEXT: mov w0, w20 |
| ; CHECK1024-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK1024-NEXT: sub sp, x29, #1088 |
| ; CHECK1024-NEXT: .cfi_def_cfa wsp, 1136 |
| ; CHECK1024-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x19, [sp, #1128] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x20, [sp, #1120] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1136 |
| ; CHECK1024-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK1024-NEXT: .cfi_restore w19 |
| ; CHECK1024-NEXT: .cfi_restore w20 |
| ; CHECK1024-NEXT: .cfi_restore w28 |
| ; CHECK1024-NEXT: .cfi_restore w30 |
| ; CHECK1024-NEXT: .cfi_restore w29 |
| ; CHECK1024-NEXT: .cfi_restore b8 |
| ; CHECK1024-NEXT: .cfi_restore b9 |
| ; CHECK1024-NEXT: .cfi_restore b10 |
| ; CHECK1024-NEXT: .cfi_restore b11 |
| ; CHECK1024-NEXT: .cfi_restore b12 |
| ; CHECK1024-NEXT: .cfi_restore b13 |
| ; CHECK1024-NEXT: .cfi_restore b14 |
| ; CHECK1024-NEXT: .cfi_restore b15 |
| ; CHECK1024-NEXT: ret |
| entry: |
| tail call void @other() |
| ret i32 %x |
| } |
| declare void @other() |
| |
| declare void @bar(ptr noundef) "aarch64_pstate_sm_compatible" |
| |
| define i32 @sve_stack_object_and_vla(double %d, i64 %sz) "aarch64_pstate_sm_compatible" "frame-pointer"="all" { |
| ; CHECK0-LABEL: sve_stack_object_and_vla: |
| ; CHECK0: // %bb.0: // %entry |
| ; CHECK0-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK0-NEXT: stp x28, x19, [sp, #16] // 16-byte Folded Spill |
| ; CHECK0-NEXT: mov x29, sp |
| ; CHECK0-NEXT: addvl sp, sp, #-1 |
| ; CHECK0-NEXT: mov x19, sp |
| ; CHECK0-NEXT: .cfi_def_cfa w29, 32 |
| ; CHECK0-NEXT: .cfi_offset w19, -8 |
| ; CHECK0-NEXT: .cfi_offset w28, -16 |
| ; CHECK0-NEXT: .cfi_offset w30, -24 |
| ; CHECK0-NEXT: .cfi_offset w29, -32 |
| ; CHECK0-NEXT: lsl x9, x0, #2 |
| ; CHECK0-NEXT: mov x8, sp |
| ; CHECK0-NEXT: add x9, x9, #15 |
| ; CHECK0-NEXT: and x9, x9, #0xfffffffffffffff0 |
| ; CHECK0-NEXT: sub x0, x8, x9 |
| ; CHECK0-NEXT: mov sp, x0 |
| ; CHECK0-NEXT: mov z0.s, #0 // =0x0 |
| ; CHECK0-NEXT: ptrue p0.s |
| ; CHECK0-NEXT: st1w { z0.s }, p0, [x29, #-1, mul vl] |
| ; CHECK0-NEXT: bl bar |
| ; CHECK0-NEXT: mov w0, wzr |
| ; CHECK0-NEXT: mov sp, x29 |
| ; CHECK0-NEXT: ldp x28, x19, [sp, #16] // 16-byte Folded Reload |
| ; CHECK0-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK0-NEXT: ret |
| ; |
| ; CHECK64-LABEL: sve_stack_object_and_vla: |
| ; CHECK64: // %bb.0: // %entry |
| ; CHECK64-NEXT: sub sp, sp, #96 |
| ; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK64-NEXT: add x29, sp, #64 |
| ; CHECK64-NEXT: stp x28, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK64-NEXT: sub sp, sp, #64 |
| ; CHECK64-NEXT: addvl sp, sp, #-1 |
| ; CHECK64-NEXT: mov x19, sp |
| ; CHECK64-NEXT: .cfi_def_cfa w29, 32 |
| ; CHECK64-NEXT: .cfi_offset w19, -8 |
| ; CHECK64-NEXT: .cfi_offset w28, -16 |
| ; CHECK64-NEXT: .cfi_offset w30, -24 |
| ; CHECK64-NEXT: .cfi_offset w29, -32 |
| ; CHECK64-NEXT: lsl x9, x0, #2 |
| ; CHECK64-NEXT: mov x8, sp |
| ; CHECK64-NEXT: add x9, x9, #15 |
| ; CHECK64-NEXT: and x9, x9, #0xfffffffffffffff0 |
| ; CHECK64-NEXT: sub x0, x8, x9 |
| ; CHECK64-NEXT: mov sp, x0 |
| ; CHECK64-NEXT: mov z0.s, #0 // =0x0 |
| ; CHECK64-NEXT: ptrue p0.s |
| ; CHECK64-NEXT: sub x8, x29, #64 |
| ; CHECK64-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl] |
| ; CHECK64-NEXT: bl bar |
| ; CHECK64-NEXT: mov w0, wzr |
| ; CHECK64-NEXT: sub sp, x29, #64 |
| ; CHECK64-NEXT: ldp x28, x19, [sp, #80] // 16-byte Folded Reload |
| ; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK64-NEXT: add sp, sp, #96 |
| ; CHECK64-NEXT: ret |
| ; |
| ; CHECK1024-LABEL: sve_stack_object_and_vla: |
| ; CHECK1024: // %bb.0: // %entry |
| ; CHECK1024-NEXT: sub sp, sp, #1056 |
| ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: add x29, sp, #1024 |
| ; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: str x19, [sp, #1048] // 8-byte Folded Spill |
| ; CHECK1024-NEXT: sub sp, sp, #1024 |
| ; CHECK1024-NEXT: addvl sp, sp, #-1 |
| ; CHECK1024-NEXT: mov x19, sp |
| ; CHECK1024-NEXT: .cfi_def_cfa w29, 32 |
| ; CHECK1024-NEXT: .cfi_offset w19, -8 |
| ; CHECK1024-NEXT: .cfi_offset w28, -16 |
| ; CHECK1024-NEXT: .cfi_offset w30, -24 |
| ; CHECK1024-NEXT: .cfi_offset w29, -32 |
| ; CHECK1024-NEXT: lsl x9, x0, #2 |
| ; CHECK1024-NEXT: mov x8, sp |
| ; CHECK1024-NEXT: add x9, x9, #15 |
| ; CHECK1024-NEXT: and x9, x9, #0xfffffffffffffff0 |
| ; CHECK1024-NEXT: sub x0, x8, x9 |
| ; CHECK1024-NEXT: mov sp, x0 |
| ; CHECK1024-NEXT: mov z0.s, #0 // =0x0 |
| ; CHECK1024-NEXT: ptrue p0.s |
| ; CHECK1024-NEXT: sub x8, x29, #1024 |
| ; CHECK1024-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl] |
| ; CHECK1024-NEXT: bl bar |
| ; CHECK1024-NEXT: mov w0, wzr |
| ; CHECK1024-NEXT: sub sp, x29, #1024 |
| ; CHECK1024-NEXT: ldr x19, [sp, #1048] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK1024-NEXT: add sp, sp, #1056 |
| ; CHECK1024-NEXT: ret |
| entry: |
| %a = alloca <vscale x 4 x i32> |
| %b = alloca i32, i64 %sz, align 4 |
| store <vscale x 4 x i32> zeroinitializer, ptr %a |
| call void @bar(ptr noundef nonnull %b) |
| ret i32 0 |
| } |