|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOAVX | 
|  | ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16,avx | FileCheck %s --check-prefixes=CHECK,CHECK-AVX | 
|  |  | 
|  | ; Codegen of i128 without cx16 is tested in atomic-nocx16.ll | 
|  |  | 
|  | @var = global i128 0 | 
|  |  | 
|  | ; Due to the scheduling right after isel for cmpxchg and given the | 
|  | ; machine scheduler and copy coalescer do not mess up with physical | 
|  | ; register live-ranges, we end up with a useless copy. | 
|  | define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { | 
|  | ; CHECK-LABEL: val_compare_and_swap: | 
|  | ; CHECK:       ## %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NEXT:    movq %rcx, %rbx | 
|  | ; CHECK-NEXT:    movq %rsi, %rax | 
|  | ; CHECK-NEXT:    movq %r8, %rcx | 
|  | ; CHECK-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    retq | 
|  | %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval acquire acquire | 
|  | %val = extractvalue { i128, i1 } %pair, 0 | 
|  | ret i128 %val | 
|  | } | 
|  |  | 
|  | @cmpxchg16b_global = external dso_local global { i128, i128 }, align 16 | 
|  |  | 
|  | ;; Make sure we retain the offset of the global variable. | 
|  | define i128 @load_global_with_offset() nounwind { | 
|  | ; CHECK-NOAVX-LABEL: load_global_with_offset: | 
|  | ; CHECK-NOAVX:       ## %bb.0: ## %entry | 
|  | ; CHECK-NOAVX-NEXT:    pushq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    xorl %eax, %eax | 
|  | ; CHECK-NOAVX-NEXT:    xorl %edx, %edx | 
|  | ; CHECK-NOAVX-NEXT:    xorl %ecx, %ecx | 
|  | ; CHECK-NOAVX-NEXT:    xorl %ebx, %ebx | 
|  | ; CHECK-NOAVX-NEXT:    lock cmpxchg16b _cmpxchg16b_global+16(%rip) | 
|  | ; CHECK-NOAVX-NEXT:    popq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    retq | 
|  | ; | 
|  | ; CHECK-AVX-LABEL: load_global_with_offset: | 
|  | ; CHECK-AVX:       ## %bb.0: ## %entry | 
|  | ; CHECK-AVX-NEXT:    vmovdqa _cmpxchg16b_global+16(%rip), %xmm0 | 
|  | ; CHECK-AVX-NEXT:    vmovq %xmm0, %rax | 
|  | ; CHECK-AVX-NEXT:    vpextrq $1, %xmm0, %rdx | 
|  | ; CHECK-AVX-NEXT:    retq | 
|  | entry: | 
|  | %0 = load atomic i128, ptr getelementptr inbounds ({i128, i128}, ptr @cmpxchg16b_global, i64 0, i32 1) acquire, align 16 | 
|  | ret i128 %0 | 
|  | } | 
|  |  | 
|  | define void @fetch_and_nand(ptr %p, i128 %bits) { | 
|  | ; CHECK-LABEL: fetch_and_nand: | 
|  | ; CHECK:       ## %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NEXT:    movq %rdx, %r8 | 
|  | ; CHECK-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NEXT:    .p2align 4 | 
|  | ; CHECK-NEXT:  LBB2_1: ## %atomicrmw.start | 
|  | ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    movq %rdx, %rcx | 
|  | ; CHECK-NEXT:    andq %r8, %rcx | 
|  | ; CHECK-NEXT:    movq %rax, %rbx | 
|  | ; CHECK-NEXT:    andq %rsi, %rbx | 
|  | ; CHECK-NEXT:    notq %rbx | 
|  | ; CHECK-NEXT:    notq %rcx | 
|  | ; CHECK-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NEXT:    jne LBB2_1 | 
|  | ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NEXT:    movq %rax, _var(%rip) | 
|  | ; CHECK-NEXT:    movq %rdx, _var+8(%rip) | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    retq | 
|  | %val = atomicrmw nand ptr %p, i128 %bits release | 
|  | store i128 %val, ptr @var, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @fetch_and_or(ptr %p, i128 %bits) { | 
|  | ; CHECK-LABEL: fetch_and_or: | 
|  | ; CHECK:       ## %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NEXT:    movq %rdx, %r8 | 
|  | ; CHECK-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NEXT:    .p2align 4 | 
|  | ; CHECK-NEXT:  LBB3_1: ## %atomicrmw.start | 
|  | ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    movq %rax, %rbx | 
|  | ; CHECK-NEXT:    orq %rsi, %rbx | 
|  | ; CHECK-NEXT:    movq %rdx, %rcx | 
|  | ; CHECK-NEXT:    orq %r8, %rcx | 
|  | ; CHECK-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NEXT:    jne LBB3_1 | 
|  | ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NEXT:    movq %rax, _var(%rip) | 
|  | ; CHECK-NEXT:    movq %rdx, _var+8(%rip) | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    retq | 
|  | %val = atomicrmw or ptr %p, i128 %bits seq_cst | 
|  | store i128 %val, ptr @var, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @fetch_and_add(ptr %p, i128 %bits) { | 
|  | ; CHECK-LABEL: fetch_and_add: | 
|  | ; CHECK:       ## %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NEXT:    movq %rdx, %r8 | 
|  | ; CHECK-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NEXT:    .p2align 4 | 
|  | ; CHECK-NEXT:  LBB4_1: ## %atomicrmw.start | 
|  | ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    movq %rax, %rbx | 
|  | ; CHECK-NEXT:    addq %rsi, %rbx | 
|  | ; CHECK-NEXT:    movq %rdx, %rcx | 
|  | ; CHECK-NEXT:    adcq %r8, %rcx | 
|  | ; CHECK-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NEXT:    jne LBB4_1 | 
|  | ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NEXT:    movq %rax, _var(%rip) | 
|  | ; CHECK-NEXT:    movq %rdx, _var+8(%rip) | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    retq | 
|  | %val = atomicrmw add ptr %p, i128 %bits seq_cst | 
|  | store i128 %val, ptr @var, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @fetch_and_sub(ptr %p, i128 %bits) { | 
|  | ; CHECK-LABEL: fetch_and_sub: | 
|  | ; CHECK:       ## %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NEXT:    movq %rdx, %r8 | 
|  | ; CHECK-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NEXT:    .p2align 4 | 
|  | ; CHECK-NEXT:  LBB5_1: ## %atomicrmw.start | 
|  | ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    movq %rax, %rbx | 
|  | ; CHECK-NEXT:    subq %rsi, %rbx | 
|  | ; CHECK-NEXT:    movq %rdx, %rcx | 
|  | ; CHECK-NEXT:    sbbq %r8, %rcx | 
|  | ; CHECK-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NEXT:    jne LBB5_1 | 
|  | ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NEXT:    movq %rax, _var(%rip) | 
|  | ; CHECK-NEXT:    movq %rdx, _var+8(%rip) | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    retq | 
|  | %val = atomicrmw sub ptr %p, i128 %bits seq_cst | 
|  | store i128 %val, ptr @var, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @fetch_and_min(ptr %p, i128 %bits) { | 
|  | ; CHECK-LABEL: fetch_and_min: | 
|  | ; CHECK:       ## %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NEXT:    movq %rdx, %r8 | 
|  | ; CHECK-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NEXT:    .p2align 4 | 
|  | ; CHECK-NEXT:  LBB6_1: ## %atomicrmw.start | 
|  | ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    cmpq %rax, %rsi | 
|  | ; CHECK-NEXT:    movq %r8, %rcx | 
|  | ; CHECK-NEXT:    sbbq %rdx, %rcx | 
|  | ; CHECK-NEXT:    movq %r8, %rcx | 
|  | ; CHECK-NEXT:    cmovgeq %rdx, %rcx | 
|  | ; CHECK-NEXT:    movq %rsi, %rbx | 
|  | ; CHECK-NEXT:    cmovgeq %rax, %rbx | 
|  | ; CHECK-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NEXT:    jne LBB6_1 | 
|  | ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NEXT:    movq %rax, _var(%rip) | 
|  | ; CHECK-NEXT:    movq %rdx, _var+8(%rip) | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    retq | 
|  | %val = atomicrmw min ptr %p, i128 %bits seq_cst | 
|  | store i128 %val, ptr @var, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @fetch_and_max(ptr %p, i128 %bits) { | 
|  | ; CHECK-LABEL: fetch_and_max: | 
|  | ; CHECK:       ## %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NEXT:    movq %rdx, %r8 | 
|  | ; CHECK-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NEXT:    .p2align 4 | 
|  | ; CHECK-NEXT:  LBB7_1: ## %atomicrmw.start | 
|  | ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    cmpq %rax, %rsi | 
|  | ; CHECK-NEXT:    movq %r8, %rcx | 
|  | ; CHECK-NEXT:    sbbq %rdx, %rcx | 
|  | ; CHECK-NEXT:    movq %r8, %rcx | 
|  | ; CHECK-NEXT:    cmovlq %rdx, %rcx | 
|  | ; CHECK-NEXT:    movq %rsi, %rbx | 
|  | ; CHECK-NEXT:    cmovlq %rax, %rbx | 
|  | ; CHECK-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NEXT:    jne LBB7_1 | 
|  | ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NEXT:    movq %rax, _var(%rip) | 
|  | ; CHECK-NEXT:    movq %rdx, _var+8(%rip) | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    retq | 
|  | %val = atomicrmw max ptr %p, i128 %bits seq_cst | 
|  | store i128 %val, ptr @var, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @fetch_and_umin(ptr %p, i128 %bits) { | 
|  | ; CHECK-LABEL: fetch_and_umin: | 
|  | ; CHECK:       ## %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NEXT:    movq %rdx, %r8 | 
|  | ; CHECK-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NEXT:    .p2align 4 | 
|  | ; CHECK-NEXT:  LBB8_1: ## %atomicrmw.start | 
|  | ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    cmpq %rax, %rsi | 
|  | ; CHECK-NEXT:    movq %r8, %rcx | 
|  | ; CHECK-NEXT:    sbbq %rdx, %rcx | 
|  | ; CHECK-NEXT:    movq %r8, %rcx | 
|  | ; CHECK-NEXT:    cmovaeq %rdx, %rcx | 
|  | ; CHECK-NEXT:    movq %rsi, %rbx | 
|  | ; CHECK-NEXT:    cmovaeq %rax, %rbx | 
|  | ; CHECK-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NEXT:    jne LBB8_1 | 
|  | ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NEXT:    movq %rax, _var(%rip) | 
|  | ; CHECK-NEXT:    movq %rdx, _var+8(%rip) | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    retq | 
|  | %val = atomicrmw umin ptr %p, i128 %bits seq_cst | 
|  | store i128 %val, ptr @var, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @fetch_and_umax(ptr %p, i128 %bits) { | 
|  | ; CHECK-LABEL: fetch_and_umax: | 
|  | ; CHECK:       ## %bb.0: | 
|  | ; CHECK-NEXT:    pushq %rbx | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NEXT:    movq %rdx, %r8 | 
|  | ; CHECK-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NEXT:    .p2align 4 | 
|  | ; CHECK-NEXT:  LBB9_1: ## %atomicrmw.start | 
|  | ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    cmpq %rax, %rsi | 
|  | ; CHECK-NEXT:    movq %r8, %rcx | 
|  | ; CHECK-NEXT:    sbbq %rdx, %rcx | 
|  | ; CHECK-NEXT:    movq %r8, %rcx | 
|  | ; CHECK-NEXT:    cmovbq %rdx, %rcx | 
|  | ; CHECK-NEXT:    movq %rsi, %rbx | 
|  | ; CHECK-NEXT:    cmovbq %rax, %rbx | 
|  | ; CHECK-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NEXT:    jne LBB9_1 | 
|  | ; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NEXT:    movq %rax, _var(%rip) | 
|  | ; CHECK-NEXT:    movq %rdx, _var+8(%rip) | 
|  | ; CHECK-NEXT:    popq %rbx | 
|  | ; CHECK-NEXT:    retq | 
|  | %val = atomicrmw umax ptr %p, i128 %bits seq_cst | 
|  | store i128 %val, ptr @var, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define i128 @atomic_load_seq_cst(ptr %p) { | 
|  | ; CHECK-NOAVX-LABEL: atomic_load_seq_cst: | 
|  | ; CHECK-NOAVX:       ## %bb.0: | 
|  | ; CHECK-NOAVX-NEXT:    pushq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NOAVX-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NOAVX-NEXT:    xorl %eax, %eax | 
|  | ; CHECK-NOAVX-NEXT:    xorl %edx, %edx | 
|  | ; CHECK-NOAVX-NEXT:    xorl %ecx, %ecx | 
|  | ; CHECK-NOAVX-NEXT:    xorl %ebx, %ebx | 
|  | ; CHECK-NOAVX-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NOAVX-NEXT:    popq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    retq | 
|  | ; | 
|  | ; CHECK-AVX-LABEL: atomic_load_seq_cst: | 
|  | ; CHECK-AVX:       ## %bb.0: | 
|  | ; CHECK-AVX-NEXT:    vmovdqa (%rdi), %xmm0 | 
|  | ; CHECK-AVX-NEXT:    vmovq %xmm0, %rax | 
|  | ; CHECK-AVX-NEXT:    vpextrq $1, %xmm0, %rdx | 
|  | ; CHECK-AVX-NEXT:    retq | 
|  | %r = load atomic i128, ptr %p seq_cst, align 16 | 
|  | ret i128 %r | 
|  | } | 
|  |  | 
|  | define i128 @atomic_load_relaxed(ptr %p) { | 
|  | ; CHECK-NOAVX-LABEL: atomic_load_relaxed: | 
|  | ; CHECK-NOAVX:       ## %bb.0: | 
|  | ; CHECK-NOAVX-NEXT:    pushq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NOAVX-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NOAVX-NEXT:    xorl %eax, %eax | 
|  | ; CHECK-NOAVX-NEXT:    xorl %edx, %edx | 
|  | ; CHECK-NOAVX-NEXT:    xorl %ecx, %ecx | 
|  | ; CHECK-NOAVX-NEXT:    xorl %ebx, %ebx | 
|  | ; CHECK-NOAVX-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NOAVX-NEXT:    popq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    retq | 
|  | ; | 
|  | ; CHECK-AVX-LABEL: atomic_load_relaxed: | 
|  | ; CHECK-AVX:       ## %bb.0: | 
|  | ; CHECK-AVX-NEXT:    vmovdqa (%rdi), %xmm0 | 
|  | ; CHECK-AVX-NEXT:    vmovq %xmm0, %rax | 
|  | ; CHECK-AVX-NEXT:    vpextrq $1, %xmm0, %rdx | 
|  | ; CHECK-AVX-NEXT:    retq | 
|  | %r = load atomic i128, ptr %p monotonic, align 16 | 
|  | ret i128 %r | 
|  | } | 
|  |  | 
|  | define void @atomic_store_seq_cst(ptr %p, i128 %in) { | 
|  | ; CHECK-NOAVX-LABEL: atomic_store_seq_cst: | 
|  | ; CHECK-NOAVX:       ## %bb.0: | 
|  | ; CHECK-NOAVX-NEXT:    pushq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NOAVX-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NOAVX-NEXT:    movq %rdx, %rcx | 
|  | ; CHECK-NOAVX-NEXT:    movq %rsi, %rbx | 
|  | ; CHECK-NOAVX-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NOAVX-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NOAVX-NEXT:    .p2align 4 | 
|  | ; CHECK-NOAVX-NEXT:  LBB12_1: ## %atomicrmw.start | 
|  | ; CHECK-NOAVX-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NOAVX-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NOAVX-NEXT:    jne LBB12_1 | 
|  | ; CHECK-NOAVX-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NOAVX-NEXT:    popq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    retq | 
|  | ; | 
|  | ; CHECK-AVX-LABEL: atomic_store_seq_cst: | 
|  | ; CHECK-AVX:       ## %bb.0: | 
|  | ; CHECK-AVX-NEXT:    vmovq %rdx, %xmm0 | 
|  | ; CHECK-AVX-NEXT:    vmovq %rsi, %xmm1 | 
|  | ; CHECK-AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] | 
|  | ; CHECK-AVX-NEXT:    vmovdqa %xmm0, (%rdi) | 
|  | ; CHECK-AVX-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp) | 
|  | ; CHECK-AVX-NEXT:    retq | 
|  | store atomic i128 %in, ptr %p seq_cst, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @atomic_store_release(ptr %p, i128 %in) { | 
|  | ; CHECK-NOAVX-LABEL: atomic_store_release: | 
|  | ; CHECK-NOAVX:       ## %bb.0: | 
|  | ; CHECK-NOAVX-NEXT:    pushq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NOAVX-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NOAVX-NEXT:    movq %rdx, %rcx | 
|  | ; CHECK-NOAVX-NEXT:    movq %rsi, %rbx | 
|  | ; CHECK-NOAVX-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NOAVX-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NOAVX-NEXT:    .p2align 4 | 
|  | ; CHECK-NOAVX-NEXT:  LBB13_1: ## %atomicrmw.start | 
|  | ; CHECK-NOAVX-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NOAVX-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NOAVX-NEXT:    jne LBB13_1 | 
|  | ; CHECK-NOAVX-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NOAVX-NEXT:    popq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    retq | 
|  | ; | 
|  | ; CHECK-AVX-LABEL: atomic_store_release: | 
|  | ; CHECK-AVX:       ## %bb.0: | 
|  | ; CHECK-AVX-NEXT:    vmovq %rdx, %xmm0 | 
|  | ; CHECK-AVX-NEXT:    vmovq %rsi, %xmm1 | 
|  | ; CHECK-AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] | 
|  | ; CHECK-AVX-NEXT:    vmovdqa %xmm0, (%rdi) | 
|  | ; CHECK-AVX-NEXT:    retq | 
|  | store atomic i128 %in, ptr %p release, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @atomic_store_relaxed(ptr %p, i128 %in) { | 
|  | ; CHECK-NOAVX-LABEL: atomic_store_relaxed: | 
|  | ; CHECK-NOAVX:       ## %bb.0: | 
|  | ; CHECK-NOAVX-NEXT:    pushq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NOAVX-NEXT:    .cfi_offset %rbx, -16 | 
|  | ; CHECK-NOAVX-NEXT:    movq %rdx, %rcx | 
|  | ; CHECK-NOAVX-NEXT:    movq %rsi, %rbx | 
|  | ; CHECK-NOAVX-NEXT:    movq (%rdi), %rax | 
|  | ; CHECK-NOAVX-NEXT:    movq 8(%rdi), %rdx | 
|  | ; CHECK-NOAVX-NEXT:    .p2align 4 | 
|  | ; CHECK-NOAVX-NEXT:  LBB14_1: ## %atomicrmw.start | 
|  | ; CHECK-NOAVX-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NOAVX-NEXT:    lock cmpxchg16b (%rdi) | 
|  | ; CHECK-NOAVX-NEXT:    jne LBB14_1 | 
|  | ; CHECK-NOAVX-NEXT:  ## %bb.2: ## %atomicrmw.end | 
|  | ; CHECK-NOAVX-NEXT:    popq %rbx | 
|  | ; CHECK-NOAVX-NEXT:    retq | 
|  | ; | 
|  | ; CHECK-AVX-LABEL: atomic_store_relaxed: | 
|  | ; CHECK-AVX:       ## %bb.0: | 
|  | ; CHECK-AVX-NEXT:    vmovq %rdx, %xmm0 | 
|  | ; CHECK-AVX-NEXT:    vmovq %rsi, %xmm1 | 
|  | ; CHECK-AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] | 
|  | ; CHECK-AVX-NEXT:    vmovdqa %xmm0, (%rdi) | 
|  | ; CHECK-AVX-NEXT:    retq | 
|  | store atomic i128 %in, ptr %p unordered, align 16 | 
|  | ret void | 
|  | } | 
|  |  | 
|  |  |