| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s |
| ; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s |
| |
| ; Tests for prolog sequences for stack probing, when using a 4KiB stack guard. |
| |
| ; The stack probing parameters in function attributes take precedence over |
| ; ones in the module flags. |
| |
| ; Small stack frame, no probing required. |
| define void @static_64(ptr %out) #0 { |
| ; CHECK-LABEL: static_64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #64 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 64 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #64 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 64, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; At 256 bytes we start to always create a frame pointer. No frame smaller then |
| ; this needs a probe, so we can use the saving of at least one CSR as a probe |
| ; at the top of our frame. |
| define void @static_256(ptr %out) #0 { |
| ; CHECK-LABEL: static_256: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #272 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 272 |
| ; CHECK-NEXT: str x29, [sp, #256] // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #272 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 256, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; At 1024 bytes, this is the largest frame which doesn't need probing. |
| define void @static_1024(ptr %out) #0 { |
| ; CHECK-LABEL: static_1024: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1040 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 1024, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; At 1024+16 bytes, this is the smallest frame which needs probing. |
| define void @static_1040(ptr %out) #0 { |
| ; CHECK-LABEL: static_1040: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1056 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 1040, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 4k bytes is the largest frame we can probe in one go. |
| define void @static_4096(ptr %out) #0 { |
| ; CHECK-LABEL: static_4096: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4112 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 4096, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 4k+16 bytes, still needs just one probe. |
| define void @static_4112(ptr %out) #0 { |
| ; CHECK-LABEL: static_4112: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4112 |
| ; CHECK-NEXT: str xzr, [sp], #-16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4128 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 4112, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 4k+1024 bytes, the largest frame which needs just one probe. |
| define void @static_5120(ptr %out) #0 { |
| ; CHECK-LABEL: static_5120: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4112 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 5136 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1040 |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 5120, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 4k+1024+16, the smallest frame which needs two probes. |
| define void @static_5136(ptr %out) #0 { |
| ; CHECK-LABEL: static_5136: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4112 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 5152 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1056 |
| ; CHECK-NEXT: add sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 5136, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 2*4k+1024, the largest frame needing two probes |
| define void @static_9216(ptr %out) #0 { |
| ; CHECK-LABEL: static_9216: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4112 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 8208 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 9232 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #2, lsl #12 // =8192 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1040 |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 9216, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 5*4k-16, the largest frame probed without a loop |
| define void @static_20464(ptr %out) #0 { |
| ; CHECK-LABEL: static_20464: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4112 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 8208 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 12304 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16400 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #4080 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 20480 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #4, lsl #12 // =16384 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4096 |
| ; CHECK-NEXT: add sp, sp, #4080 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 20464, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 5*4k, the smallest frame probed with a loop |
| define void @static_20480(ptr %out) #0 { |
| ; CHECK-LABEL: static_20480: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480 |
| ; CHECK-NEXT: .cfi_def_cfa w9, 20496 |
| ; CHECK-NEXT: .LBB10_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: cmp sp, x9 |
| ; CHECK-NEXT: b.ne .LBB10_1 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: .cfi_def_cfa_register wsp |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 20480, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 5*4k + 1024, large enough to use a loop, but not a multiple of 4KiB |
| ; so has a reminder, but no extra probe. |
| define void @static_21504(ptr %out) #0 { |
| ; CHECK-LABEL: static_21504: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480 |
| ; CHECK-NEXT: .cfi_def_cfa w9, 20496 |
| ; CHECK-NEXT: .LBB11_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: cmp sp, x9 |
| ; CHECK-NEXT: b.ne .LBB11_1 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: .cfi_def_cfa_register wsp |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 21520 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1040 |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 21504, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 5*4k+1040, large enough to use a loop, has a reminder and |
| ; an extra probe. |
| define void @static_21520(ptr %out) #0 { |
| ; CHECK-LABEL: static_21520: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480 |
| ; CHECK-NEXT: .cfi_def_cfa w9, 20496 |
| ; CHECK-NEXT: .LBB12_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: cmp sp, x9 |
| ; CHECK-NEXT: b.ne .LBB12_1 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: .cfi_def_cfa_register wsp |
| ; CHECK-NEXT: sub sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 21536 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1056 |
| ; CHECK-NEXT: add sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 21520, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; A small allocation, but with a very large alignment requirement. We do this |
| ; by moving SP far enough that a sufficiently-aligned block will exist |
| ; somewhere in the stack frame, so must probe the whole of that larger SP move. |
| define void @static_16_align_8192(ptr %out) #0 { |
| ; CHECK-LABEL: static_16_align_8192: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: sub x9, x9, #4080 |
| ; CHECK-NEXT: and x9, x9, #0xffffffffffffe000 |
| ; CHECK-NEXT: .LBB13_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: cmp sp, x9 |
| ; CHECK-NEXT: b.le .LBB13_3 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: // in Loop: Header=BB13_1 Depth=1 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: b .LBB13_1 |
| ; CHECK-NEXT: .LBB13_3: // %entry |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: ldr xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: .cfi_def_cfa wsp, 16 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w30 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 16, align 8192 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; A small allocation with a very large alignment requirement, but |
| ; nevertheless small enough as to not need a loop. |
| define void @static_16_align_2048(ptr %out) #0 { |
| ; CHECK-LABEL: static_16_align_2048: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #2032 |
| ; CHECK-NEXT: and sp, x9, #0xfffffffffffff800 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: .cfi_def_cfa wsp, 16 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w30 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 16, align 2048 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; A large(-ish) allocation with a very large alignment requirement, but |
| ; nevertheless small enough as to not need a loop. |
| define void @static_2032_align_2048(ptr %out) #0 { |
| ; CHECK-LABEL: static_2032_align_2048: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #2032 |
| ; CHECK-NEXT: and sp, x9, #0xfffffffffffff800 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: .cfi_def_cfa wsp, 16 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w30 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 2032, align 2048 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; Test stack probing is enabled by module flags |
| define void @static_9232(ptr %out) uwtable(async) { |
| ; CHECK-LABEL: static_9232: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #2, lsl #12 // =8192 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 8208 |
| ; CHECK-NEXT: sub sp, sp, #800 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 9008 |
| ; CHECK-NEXT: str xzr, [sp], #-240 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 9248 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #2, lsl #12 // =8192 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1056 |
| ; CHECK-NEXT: add sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 9232, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; Test for a tight upper bound on the amount of stack adjustment |
| ; due to stack realignment. No probes should appear. |
| define void @static_1008(ptr %out) #0 { |
| ; CHECK-LABEL: static_1008: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #1008 |
| ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: .cfi_def_cfa wsp, 16 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w30 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i32 1008, align 32 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" } |
| |
| !llvm.module.flags = !{!0, !1} |
| |
| !0 = !{i32 4, !"probe-stack", !"inline-asm"} |
| !1 = !{i32 8, !"stack-probe-size", i32 9000} |