| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
 | ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-NO-SHLD,X64-NO-BMI2-NO-SHLD | 
 | ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-SHLD,X64-NO-BMI2-HAVE-SHLD | 
 | ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-NO-SHLD,X64-HAVE-BMI2-NO-SHLD | 
 | ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-SHLD,X64-HAVE-BMI2-HAVE-SHLD | 
 | ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-NO-BMI2,X86-NO-SHLD,X86-NO-BMI2-NO-SHLD | 
 | ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-NO-BMI2,X86-SHLD,X86-NO-BMI2-HAVE-SHLD | 
 | ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-BMI2,X86-NO-SHLD,X86-HAVE-BMI2-NO-SHLD | 
 | ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-BMI2,X86-SHLD,X86-HAVE-BMI2-HAVE-SHLD | 
 |  | 
 | define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-LABEL: lshr_4bytes: | 
 | ; X64-NO-BMI2:       # %bb.0: | 
 | ; X64-NO-BMI2-NEXT:    movl (%rdi), %eax | 
 | ; X64-NO-BMI2-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-NEXT:    shrl %cl, %eax | 
 | ; X64-NO-BMI2-NEXT:    movl %eax, (%rdx) | 
 | ; X64-NO-BMI2-NEXT:    retq | 
 | ; | 
 | ; X64-BMI2-LABEL: lshr_4bytes: | 
 | ; X64-BMI2:       # %bb.0: | 
 | ; X64-BMI2-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-BMI2-NEXT:    shrxl %eax, (%rdi), %eax | 
 | ; X64-BMI2-NEXT:    movl %eax, (%rdx) | 
 | ; X64-BMI2-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-LABEL: lshr_4bytes: | 
 | ; X86-NO-BMI2:       # %bb.0: | 
 | ; X86-NO-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-NEXT:    movl (%edx), %edx | 
 | ; X86-NO-BMI2-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-NEXT:    shrl %cl, %edx | 
 | ; X86-NO-BMI2-NEXT:    movl %edx, (%eax) | 
 | ; X86-NO-BMI2-NEXT:    retl | 
 | ; | 
 | ; X86-BMI2-LABEL: lshr_4bytes: | 
 | ; X86-BMI2:       # %bb.0: | 
 | ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-BMI2-NEXT:    movzbl (%edx), %edx | 
 | ; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx | 
 | ; X86-BMI2-NEXT:    movl %ecx, (%eax) | 
 | ; X86-BMI2-NEXT:    retl | 
 |   %src = load i32, ptr %src.ptr, align 1 | 
 |   %bitOff = load i32, ptr %bitOff.ptr, align 1 | 
 |   %res = lshr i32 %src, %bitOff | 
 |   store i32 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-LABEL: shl_4bytes: | 
 | ; X64-NO-BMI2:       # %bb.0: | 
 | ; X64-NO-BMI2-NEXT:    movl (%rdi), %eax | 
 | ; X64-NO-BMI2-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-NEXT:    shll %cl, %eax | 
 | ; X64-NO-BMI2-NEXT:    movl %eax, (%rdx) | 
 | ; X64-NO-BMI2-NEXT:    retq | 
 | ; | 
 | ; X64-BMI2-LABEL: shl_4bytes: | 
 | ; X64-BMI2:       # %bb.0: | 
 | ; X64-BMI2-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-BMI2-NEXT:    shlxl %eax, (%rdi), %eax | 
 | ; X64-BMI2-NEXT:    movl %eax, (%rdx) | 
 | ; X64-BMI2-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-LABEL: shl_4bytes: | 
 | ; X86-NO-BMI2:       # %bb.0: | 
 | ; X86-NO-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-NEXT:    movl (%edx), %edx | 
 | ; X86-NO-BMI2-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NEXT:    movl %edx, (%eax) | 
 | ; X86-NO-BMI2-NEXT:    retl | 
 | ; | 
 | ; X86-BMI2-LABEL: shl_4bytes: | 
 | ; X86-BMI2:       # %bb.0: | 
 | ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-BMI2-NEXT:    movzbl (%edx), %edx | 
 | ; X86-BMI2-NEXT:    shlxl %edx, (%ecx), %ecx | 
 | ; X86-BMI2-NEXT:    movl %ecx, (%eax) | 
 | ; X86-BMI2-NEXT:    retl | 
 |   %src = load i32, ptr %src.ptr, align 1 | 
 |   %bitOff = load i32, ptr %bitOff.ptr, align 1 | 
 |   %res = shl i32 %src, %bitOff | 
 |   store i32 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-LABEL: ashr_4bytes: | 
 | ; X64-NO-BMI2:       # %bb.0: | 
 | ; X64-NO-BMI2-NEXT:    movl (%rdi), %eax | 
 | ; X64-NO-BMI2-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-NEXT:    sarl %cl, %eax | 
 | ; X64-NO-BMI2-NEXT:    movl %eax, (%rdx) | 
 | ; X64-NO-BMI2-NEXT:    retq | 
 | ; | 
 | ; X64-BMI2-LABEL: ashr_4bytes: | 
 | ; X64-BMI2:       # %bb.0: | 
 | ; X64-BMI2-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-BMI2-NEXT:    sarxl %eax, (%rdi), %eax | 
 | ; X64-BMI2-NEXT:    movl %eax, (%rdx) | 
 | ; X64-BMI2-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-LABEL: ashr_4bytes: | 
 | ; X86-NO-BMI2:       # %bb.0: | 
 | ; X86-NO-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-NEXT:    movl (%edx), %edx | 
 | ; X86-NO-BMI2-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-NEXT:    sarl %cl, %edx | 
 | ; X86-NO-BMI2-NEXT:    movl %edx, (%eax) | 
 | ; X86-NO-BMI2-NEXT:    retl | 
 | ; | 
 | ; X86-BMI2-LABEL: ashr_4bytes: | 
 | ; X86-BMI2:       # %bb.0: | 
 | ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-BMI2-NEXT:    movzbl (%edx), %edx | 
 | ; X86-BMI2-NEXT:    sarxl %edx, (%ecx), %ecx | 
 | ; X86-BMI2-NEXT:    movl %ecx, (%eax) | 
 | ; X86-BMI2-NEXT:    retl | 
 |   %src = load i32, ptr %src.ptr, align 1 | 
 |   %bitOff = load i32, ptr %bitOff.ptr, align 1 | 
 |   %res = ashr i32 %src, %bitOff | 
 |   store i32 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 |  | 
 | define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-LABEL: lshr_8bytes: | 
 | ; X64-NO-BMI2:       # %bb.0: | 
 | ; X64-NO-BMI2-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-NEXT:    shrq %cl, %rax | 
 | ; X64-NO-BMI2-NEXT:    movq %rax, (%rdx) | 
 | ; X64-NO-BMI2-NEXT:    retq | 
 | ; | 
 | ; X64-BMI2-LABEL: lshr_8bytes: | 
 | ; X64-BMI2:       # %bb.0: | 
 | ; X64-BMI2-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-BMI2-NEXT:    shrxq %rax, (%rdi), %rax | 
 | ; X64-BMI2-NEXT:    movq %rax, (%rdx) | 
 | ; X64-BMI2-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: lshr_8bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%ecx), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%ecx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%eax), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    notb %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%esi,%esi), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorl %ecx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    testb $32, %al | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    cmovnel %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    cmovel %esi, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 4(%edx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, (%edx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: lshr_8bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esi), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%esi), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    xorl %esi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    testb $32, %cl | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    cmovnel %edi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    cmovel %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, 4(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: lshr_8bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%edx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ecx, (%edx), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    notb %bl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%esi,%esi), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ecx, %esi, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorl %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    testb $32, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    cmovnel %edx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    cmovel %edx, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, 4(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, (%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_8bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esi), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%esi), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrxl %ecx, %esi, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    xorl %edi, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    testb $32, %cl | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    cmovnel %esi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    cmovel %esi, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, 4(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i64, ptr %src.ptr, align 1 | 
 |   %bitOff = load i64, ptr %bitOff.ptr, align 1 | 
 |   %res = lshr i64 %src, %bitOff | 
 |   store i64 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-LABEL: shl_8bytes: | 
 | ; X64-NO-BMI2:       # %bb.0: | 
 | ; X64-NO-BMI2-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-NEXT:    shlq %cl, %rax | 
 | ; X64-NO-BMI2-NEXT:    movq %rax, (%rdx) | 
 | ; X64-NO-BMI2-NEXT:    retq | 
 | ; | 
 | ; X64-BMI2-LABEL: shl_8bytes: | 
 | ; X64-BMI2:       # %bb.0: | 
 | ; X64-BMI2-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-BMI2-NEXT:    shlxq %rax, (%rdi), %rax | 
 | ; X64-BMI2-NEXT:    movq %rax, (%rdx) | 
 | ; X64-BMI2-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: shl_8bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%ecx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%ecx), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%eax), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    notb %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorl %ecx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    testb $32, %al | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    cmovnel %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    cmovel %esi, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, (%edx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, 4(%edx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: shl_8bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%edx), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%edx), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    xorl %esi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    testb $32, %cl | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    cmovnel %edi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    cmovel %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 4(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, (%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: shl_8bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%edx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, 4(%edx), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    notb %bl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %esi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edx, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorl %edx, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    testb $32, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    cmovnel %edi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    cmovel %edi, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, 4(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: shl_8bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%edx), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%edx), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shlxl %ecx, %esi, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    xorl %edi, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    testb $32, %cl | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    cmovnel %esi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    cmovel %esi, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 4(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, (%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i64, ptr %src.ptr, align 1 | 
 |   %bitOff = load i64, ptr %bitOff.ptr, align 1 | 
 |   %res = shl i64 %src, %bitOff | 
 |   store i64 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-LABEL: ashr_8bytes: | 
 | ; X64-NO-BMI2:       # %bb.0: | 
 | ; X64-NO-BMI2-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-NEXT:    sarq %cl, %rax | 
 | ; X64-NO-BMI2-NEXT:    movq %rax, (%rdx) | 
 | ; X64-NO-BMI2-NEXT:    retq | 
 | ; | 
 | ; X64-BMI2-LABEL: ashr_8bytes: | 
 | ; X64-BMI2:       # %bb.0: | 
 | ; X64-BMI2-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-BMI2-NEXT:    sarxq %rax, (%rdi), %rax | 
 | ; X64-BMI2-NEXT:    movq %rax, (%rdx) | 
 | ; X64-BMI2-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: ashr_8bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%ecx), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%ecx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%eax), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    notb %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%esi,%esi), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    sarl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    sarl $31, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    testb $32, %al | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    cmovnel %ebx, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    cmovel %ebx, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, 4(%edx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, (%edx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: ashr_8bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esi), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%esi), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    sarl %cl, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    sarl $31, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    testb $32, %cl | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    cmovnel %edi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    cmovel %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, 4(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: ashr_8bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%esi), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%edx), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, (%esi), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    notb %bl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%ecx,%ecx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    sarxl %edx, %ecx, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    sarl $31, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    testb $32, %dl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    cmovnel %esi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    cmovel %esi, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, (%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_8bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esi), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%esi), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    sarxl %ecx, %esi, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    sarl $31, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    testb $32, %cl | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    cmovnel %edi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    cmovel %edi, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, 4(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i64, ptr %src.ptr, align 1 | 
 |   %bitOff = load i64, ptr %bitOff.ptr, align 1 | 
 |   %res = ashr i64 %src, %bitOff | 
 |   store i64 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 |  | 
 | define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-NO-SHLD-LABEL: lshr_16bytes: | 
 | ; X64-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    notb %cl | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%rdi,%rdi), %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r8, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorl %ecx, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    testb $64, %al | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    cmovneq %rdi, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    cmoveq %rdi, %rcx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rcx, 8(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rsi, (%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-NO-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: | 
 | ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrq %cl, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rdi, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    xorl %edi, %edi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    testb $64, %cl | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    cmovneq %rsi, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    cmoveq %rsi, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, (%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-NO-SHLD-LABEL: lshr_16bytes: | 
 | ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, (%rdi), %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %edi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    notb %dil | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    leaq (%rax,%rax), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rdi, %r8, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rsi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %rax, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorl %esi, %esi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    testb $64, %cl | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    cmovneq %rax, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    cmoveq %rax, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rsi, 8(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, (%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rdi, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrxq %rcx, %rdi, %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    xorl %edi, %edi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    testb $64, %cl | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    cmovneq %rsi, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    cmoveq %rsi, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, (%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: lshr_16bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    subl $44, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%ecx), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%ecx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%ecx), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%ecx), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb (%eax), %dh | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, (%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrb $3, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andb $12, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%esp,%eax), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andb $31, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorb $31, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%esp,%eax), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%ebp,%ebp), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%esp,%eax), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%esp,%eax), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%ebx,%ebx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, 12(%ebp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 8(%ebp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, (%ebp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, 4(%ebp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl $44, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    subl $44, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%edx), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%edx), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%edx), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%edx), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, (%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrb $3, %dl | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    andb $12, %dl | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl %dl, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%esp,%ebx), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp,%ebx), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%esp,%ebx), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%esp,%ebx), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebp, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, 8(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 12(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, 4(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    addl $44, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: lshr_16bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    subl $44, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%ecx), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%ecx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%ecx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%ecx), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%eax), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, (%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrb $3, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andb $12, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl %cl, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%esp,%esi), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%esp,%esi), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %ebx, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andb $31, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorb $31, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%edi,%edi), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %edx, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, (%esp,%esi), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%esp,%esi), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %esi, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 12(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 8(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, (%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, 4(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl $44, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    subl $44, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%edx), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%edx), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%edx), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%edx), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, (%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrb $3, %dl | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    andb $12, %dl | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl %dl, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%esp,%ebp), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp,%ebp), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%esp,%ebp), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%esp,%ebp), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebp, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 8(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrxl %ecx, %ebp, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 12(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, 4(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    addl $44, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i128, ptr %src.ptr, align 1 | 
 |   %bitOff = load i128, ptr %bitOff.ptr, align 1 | 
 |   %res = lshr i128 %src, %bitOff | 
 |   store i128 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-NO-SHLD-LABEL: shl_16bytes: | 
 | ; X64-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    notb %cl | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %rdi, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorl %ecx, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    testb $64, %al | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    cmovneq %r8, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    cmoveq %r8, %rcx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rcx, (%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rsi, 8(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-NO-BMI2-HAVE-SHLD-LABEL: shl_16bytes: | 
 | ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shlq %cl, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rax, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    xorl %eax, %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    testb $64, %cl | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    cmovneq %rsi, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    cmoveq %rsi, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, (%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-NO-SHLD-LABEL: shl_16bytes: | 
 | ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rcx, 8(%rdi), %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %edi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    notb %dil | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rcx, %rax, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rdi, %rax, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rsi, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorl %esi, %esi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    testb $64, %cl | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    cmovneq %r8, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    cmoveq %r8, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rsi, (%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rax, 8(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: shl_16bytes: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rax, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shlxq %rcx, %rax, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    xorl %esi, %esi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    testb $64, %cl | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    cmovneq %rax, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    cmoveq %rax, %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, (%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: shl_16bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    subl $60, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%ecx), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%ecx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%ecx), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%ecx), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb (%eax), %dh | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrb $3, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andb $12, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    negb %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movsbl %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 32(%esp,%ebp), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 36(%esp,%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andb $31, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorb $31, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 44(%esp,%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 40(%esp,%ebp), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edi, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, 8(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, 12(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, 4(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl $60, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: shl_16bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    subl $32, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%edx), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%edx), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%edx), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%edx), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, (%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrb $3, %dl | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    andb $12, %dl | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    negb %dl | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movsbl %dl, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 24(%esp,%edi), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 28(%esp,%edi), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 16(%esp,%edi), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 20(%esp,%edi), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %ebx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, 8(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 12(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, (%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, 4(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    addl $32, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: shl_16bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    subl $44, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%ecx), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%ecx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%ecx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%ecx), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%eax), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, (%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrb $3, %al | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andb $12, %al | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    negb %al | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movsbl %al, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 16(%esp,%edx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 20(%esp,%edx), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %ecx, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %edi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andb $31, %al | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorb $31, %al | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, 28(%esp,%edx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 24(%esp,%edx), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %edx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %edx, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %ecx, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebx, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, (%ecx) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 8(%ecx) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, 12(%ecx) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, 4(%ecx) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl $44, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: shl_16bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    subl $32, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%edx), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%edx), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%edx), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%edx), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, (%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrb $3, %dl | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    andb $12, %dl | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    negb %dl | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movsbl %dl, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 24(%esp,%edi), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 28(%esp,%edi), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 16(%esp,%edi), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 20(%esp,%edi), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %ebx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shlxl %ecx, %ebx, %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, 8(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 12(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, (%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, 4(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    addl $32, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i128, ptr %src.ptr, align 1 | 
 |   %bitOff = load i128, ptr %bitOff.ptr, align 1 | 
 |   %res = shl i128 %src, %bitOff | 
 |   store i128 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-NO-SHLD-LABEL: ashr_16bytes: | 
 | ; X64-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    notb %cl | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%rdi,%rdi), %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r8, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    sarq %cl, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    testb $64, %al | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    cmovneq %r8, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    cmoveq %r8, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rsi, (%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-NO-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: | 
 | ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    sarq %cl, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rdi, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    testb $64, %cl | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    cmovneq %rsi, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    cmoveq %rsi, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, (%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-NO-SHLD-LABEL: ashr_16bytes: | 
 | ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, (%rdi), %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %edi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    notb %dil | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    leaq (%rax,%rax), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rdi, %r8, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rsi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    sarxq %rcx, %rax, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    sarq $63, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    testb $64, %cl | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    cmovneq %rsi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    cmoveq %rsi, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rax, 8(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, (%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rdi, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    sarxq %rcx, %rdi, %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    testb $64, %cl | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    cmovneq %rsi, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    cmoveq %rsi, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, (%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: ashr_16bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    subl $44, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%ecx), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%ecx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%ecx), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%ecx), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb (%eax), %dh | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, (%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    sarl $31, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrb $3, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andb $12, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%esp,%ebx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andb $31, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorb $31, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%esp,%ebx), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%ebp,%ebp), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%esp,%ebx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%esp,%ebx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebp, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dh, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    sarl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 12(%ebp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, 8(%ebp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, (%ebp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, 4(%ebp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl $44, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    subl $44, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%edx), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%edx), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%edx), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%edx), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, (%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    sarl $31, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrb $3, %dl | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    andb $12, %dl | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl %dl, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%esp,%ebx), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp,%ebx), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%esp,%ebx), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%esp,%ebx), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebp, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    sarl %cl, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, 8(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 12(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, 4(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    addl $44, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: ashr_16bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    subl $44, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%ecx), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%ecx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%ecx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%ecx), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%eax), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, (%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    sarl $31, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrb $3, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andb $12, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl %cl, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%esp,%esi), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%esp,%esi), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %ebx, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andb $31, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorb $31, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%edi,%edi), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %edx, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, (%esp,%esi), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%esp,%esi), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    sarxl %eax, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %esi, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 12(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 8(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, (%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, 4(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl $44, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    subl $44, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%edx), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%edx), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%edx), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%edx), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, (%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    sarl $31, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrb $3, %dl | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    andb $12, %dl | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl %dl, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%esp,%ebp), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp,%ebp), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%esp,%ebp), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%esp,%ebp), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebp, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 8(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    sarxl %ecx, %ebp, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 12(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, 4(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    addl $44, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i128, ptr %src.ptr, align 1 | 
 |   %bitOff = load i128, ptr %bitOff.ptr, align 1 | 
 |   %res = ashr i128 %src, %bitOff | 
 |   store i128 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 |  | 
 | define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes: | 
 | ; X64-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrb $6, %cl | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movzbl %cl, %r8d | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -64(%rsp,%r8,8), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -56(%rsp,%r8,8), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %esi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    andb $63, %sil | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorb $63, %sil | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -48(%rsp,%r8,8), %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%rbx,%rbx), %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r11, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    addq %rdi, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r10, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -40(%rsp,%r8,8), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%r8,%r8), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %rbx, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, 24(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r10, 16(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, (%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, 8(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: | 
 | ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrb $6, %al | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movzbl %al, %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -56(%rsp,%rax,8), %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -72(%rsp,%rax,8), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -64(%rsp,%rax,8), %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rsi, %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -48(%rsp,%rax,8), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rax, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r8, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrq %cl, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rsi, 16(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, 24(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, (%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, 8(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes: | 
 | ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrb $6, %cl | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movzbl %cl, %esi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -64(%rsp,%rsi,8), %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -56(%rsp,%rsi,8), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rax, %rcx, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rax, -72(%rsp,%rsi,8), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rax, %rdi, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -48(%rsp,%rsi,8), %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rax, %rsi, %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    # kill: def $al killed $al killed $rax def $rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andb $63, %al | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorb $63, %al | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rdi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rdi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r8, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rcx, %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rcx, %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r9, %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rsi, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rsi, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r10, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r11, 24(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rax, 16(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, (%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrb $6, %al | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl %al, %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -56(%rsp,%rax,8), %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -72(%rsp,%rax,8), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -64(%rsp,%rax,8), %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rsi, %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -48(%rsp,%rax,8), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rax, %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r8, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrxq %rcx, %rax, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, 16(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, 24(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, (%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, 8(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    subl $108, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%ebp), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 16(%ebp), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%ecx), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 20(%ebp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 24(%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 28(%ebp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrb $5, %al | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl %al, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 32(%esp,%edi,4), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 36(%esp,%edi,4), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %ch | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andb $31, %ch | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorb $31, %ch | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 40(%esp,%edi,4), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%edi,%edi), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %ebx, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 44(%esp,%esi,4), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 48(%esp,%esi,4), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%edx,%edx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %bl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %ebp, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edi, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 52(%esp,%esi,4), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %bl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 56(%esp,%esi,4), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%ebx,%ebx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %edi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 60(%esp,%eax,4), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 28(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, 24(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, 16(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, 20(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, 8(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 12(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, (%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 4(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl $108, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    subl $92, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%ebp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%ebp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%ebp), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%ebp), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 16(%ebp), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 20(%ebp), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 24(%ebp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 28(%ebp), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrb $5, %al | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl %al, %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 24(%esp,%ebp,4), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 20(%esp,%ebp,4), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 32(%esp,%ebp,4), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 28(%esp,%ebp,4), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 40(%esp,%ebp,4), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 36(%esp,%ebp,4), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 16(%esp,%ebp,4), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 44(%esp,%ebp,4), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 24(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 28(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 16(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, 20(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 8(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 12(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, (%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 4(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    addl $92, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    subl $108, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%eax), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%eax), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 16(%eax), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 20(%eax), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 24(%eax), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%eax), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrb $5, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl %cl, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 36(%esp,%esi,4), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 40(%esp,%esi,4), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %ecx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andb $31, %dl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorb $31, %dl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, 32(%esp,%esi,4), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 48(%esp,%esi,4), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 44(%esp,%esi,4), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %ecx, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 56(%esp,%esi,4), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%ebx,%ebx), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 52(%esp,%esi,4), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %edi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 60(%esp,%esi,4), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %esi, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebx, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 28(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, 24(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, 16(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 20(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 8(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 12(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, (%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 4(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl $108, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    subl $92, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%ecx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%ecx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%ecx), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%ecx), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 16(%ecx), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 20(%ecx), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 24(%ecx), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 28(%ecx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrb $5, %al | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl %al, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 24(%esp,%ebp,4), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 20(%esp,%ebp,4), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 32(%esp,%ebp,4), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 28(%esp,%ebp,4), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 40(%esp,%ebp,4), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 36(%esp,%ebp,4), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 16(%esp,%ebp,4), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 44(%esp,%ebp,4), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, 24(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrxl %ecx, %edi, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, 28(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 16(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, 20(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, 8(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, 12(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, 4(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    addl $92, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i256, ptr %src.ptr, align 1 | 
 |   %bitOff = load i256, ptr %bitOff.ptr, align 1 | 
 |   %res = lshr i256 %src, %bitOff | 
 |   store i256 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-NO-SHLD-LABEL: shl_32bytes: | 
 | ; X64-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrb $3, %cl | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    andb $24, %cl | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    negb %cl | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movsbq %cl, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -32(%rsp,%r10), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -24(%rsp,%r10), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %esi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    andb $63, %sil | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorb $63, %sil | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r11, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -8(%rsp,%r10), %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -16(%rsp,%r10), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r10, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r11, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r10, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, (%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, 16(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rbx, 24(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, 8(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes: | 
 | ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrb $3, %al | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    andb $24, %al | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    negb %al | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movsbq %al, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -24(%rsp,%rax), %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -16(%rsp,%rax), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rsi, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -40(%rsp,%rax), %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -32(%rsp,%rax), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rax, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %r8, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shlq %cl, %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rsi, 16(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 24(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, (%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, 8(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes: | 
 | ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrb $3, %cl | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andb $24, %cl | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    negb %cl | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movsbq %cl, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -40(%rsp,%rdi), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -32(%rsp,%rdi), %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rcx, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, -16(%rsp,%rdi), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -24(%rsp,%rdi), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rdi, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %r8, %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    # kill: def $al killed $al killed $rax def $rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andb $63, %al | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorb $63, %al | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rax, %r8, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rsi, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rax, %rdi, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r9, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rax, %rcx, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r10, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r11, (%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rax, 16(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rsi, 24(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, 8(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrb $3, %al | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    andb $24, %al | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    negb %al | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movsbq %al, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -24(%rsp,%rax), %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -16(%rsp,%rax), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rsi, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -40(%rsp,%rax), %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -32(%rsp,%rax), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rax, %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %r8, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shlxq %rcx, %r8, %rcx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, 16(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 24(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rcx, (%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, 8(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: shl_32bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    subl $108, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%ebp), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 16(%ebp), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb (%ecx), %ch | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 20(%ebp), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 24(%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 28(%ebp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %al | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrb $3, %al | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andb $28, %al | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    negb %al | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movsbl %al, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 64(%esp,%ebx), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 68(%esp,%ebx), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andb $31, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorb $31, %dl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 76(%esp,%ebx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 72(%esp,%ebx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 84(%esp,%ebx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 80(%esp,%ebx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edi, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 92(%esp,%ebx), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 88(%esp,%ebx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, 24(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, 28(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 16(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, 20(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 8(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 12(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl $108, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    subl $92, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%ebp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%ebp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%ebp), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%ebp), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 16(%ebp), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 20(%ebp), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 24(%ebp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 28(%ebp), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrb $3, %al | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    andb $28, %al | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    negb %al | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movsbl %al, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 56(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 60(%esp,%eax), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 52(%esp,%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 64(%esp,%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 68(%esp,%eax), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %ebx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 48(%esp,%eax), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 72(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 76(%esp,%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %eax, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 24(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, 28(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, 16(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, 20(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 8(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 12(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %ebx, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, (%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 4(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    addl $92, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    subl $108, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%eax), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%eax), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 16(%eax), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 20(%eax), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 24(%eax), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%eax), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrb $3, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andb $28, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    negb %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movsbl %cl, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 64(%esp,%esi), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 68(%esp,%esi), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %eax, %ecx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andb $31, %dl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorb $31, %dl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 72(%esp,%esi), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %edi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 76(%esp,%esi), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebp, %edi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %esi, %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 80(%esp,%ebp), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %ebx, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 84(%esp,%ebp), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %esi, %ebx, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %esi, %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ecx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %esi, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %eax, 92(%esp,%esi), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 88(%esp,%esi), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %eax, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %ebx, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %eax, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, (%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, 24(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, 28(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, 16(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 20(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 8(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 12(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl $108, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    subl $92, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%ecx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%ecx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%ecx), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%ecx), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 16(%ecx), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 20(%ecx), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 24(%ecx), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 28(%ecx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrb $3, %al | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    andb $28, %al | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    negb %al | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movsbl %al, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 56(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 60(%esp,%eax), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 52(%esp,%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %ebx, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 64(%esp,%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 68(%esp,%eax), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 48(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 72(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 76(%esp,%eax), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %eax, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 24(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, 28(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, 16(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, 20(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 8(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 12(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %esi # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shlxl %ecx, %esi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 4(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    addl $92, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i256, ptr %src.ptr, align 1 | 
 |   %bitOff = load i256, ptr %bitOff.ptr, align 1 | 
 |   %res = shl i256 %src, %bitOff | 
 |   store i256 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes: | 
 | ; X64-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrb $6, %cl | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movzbl %cl, %r8d | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -64(%rsp,%r8,8), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -56(%rsp,%r8,8), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %esi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    andb $63, %sil | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorb $63, %sil | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -48(%rsp,%r8,8), %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%rbx,%rbx), %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r11, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    addq %rdi, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r10, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -40(%rsp,%r8,8), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%r8,%r8), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %rbx, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    sarq %cl, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, 24(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r10, 16(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, (%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, 8(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: | 
 | ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrb $6, %al | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movzbl %al, %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -56(%rsp,%rax,8), %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -72(%rsp,%rax,8), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -64(%rsp,%rax,8), %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rsi, %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -48(%rsp,%rax,8), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rax, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r8, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    sarq %cl, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rsi, 16(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, 24(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, (%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, 8(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes: | 
 | ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%rsi), %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrb $6, %cl | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movzbl %cl, %esi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -64(%rsp,%rsi,8), %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -56(%rsp,%rsi,8), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rax, %rcx, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rax, -72(%rsp,%rsi,8), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rax, %rdi, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -48(%rsp,%rsi,8), %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    sarxq %rax, %rsi, %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    # kill: def $al killed $al killed $rax def $rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andb $63, %al | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorb $63, %al | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rdi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rdi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r8, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rcx, %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rcx, %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r9, %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rsi, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rsi, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r10, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r11, 24(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rax, 16(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, (%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%rsi), %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrb $6, %al | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl %al, %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -56(%rsp,%rax,8), %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -72(%rsp,%rax,8), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -64(%rsp,%rax,8), %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rsi, %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -48(%rsp,%rax,8), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rax, %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r8, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    sarxq %rcx, %rax, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, 16(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, 24(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, (%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, 8(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    subl $108, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%edx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%edx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%edx), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%edx), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 16(%edx), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 20(%edx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 24(%edx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 28(%edx), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    sarl $31, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrb $5, %al | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl %al, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 32(%esp,%ebp,4), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 36(%esp,%ebp,4), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %cl, %ch | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andb $31, %ch | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorb $31, %ch | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 40(%esp,%ebp,4), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%edi,%edi), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %bl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %edx, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 44(%esp,%ebp,4), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %bl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 48(%esp,%esi,4), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%edi,%edi), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %ebp, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 52(%esp,%ebx,4), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 56(%esp,%ebx,4), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%ebx,%ebx), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %edi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 60(%esp,%eax,4), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    sarl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 28(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, 24(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, 16(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, 20(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, 8(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 12(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, (%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 4(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl $108, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    subl $92, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%edx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%edx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%edx), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%edx), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 16(%edx), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 20(%edx), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 24(%edx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 28(%edx), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    sarl $31, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrb $5, %al | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movzbl %al, %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 24(%esp,%ebp,4), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 20(%esp,%ebp,4), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 32(%esp,%ebp,4), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 28(%esp,%ebp,4), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 40(%esp,%ebp,4), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 36(%esp,%ebp,4), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 16(%esp,%ebp,4), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 44(%esp,%ebp,4), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 24(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    sarl %cl, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 28(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 16(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, 20(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 8(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 12(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, (%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 4(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    addl $92, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    subl $108, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%eax), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%eax), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 16(%eax), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 20(%eax), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 24(%eax), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl (%eax), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    sarl $31, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrb $5, %cl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movzbl %cl, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 36(%esp,%esi,4), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 40(%esp,%esi,4), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %ecx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andb $31, %dl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorb $31, %dl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, 32(%esp,%esi,4), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 48(%esp,%esi,4), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 44(%esp,%esi,4), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %ecx, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 56(%esp,%esi,4), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%ebx,%ebx), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 52(%esp,%esi,4), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %edi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %eax, %ebx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 60(%esp,%esi,4), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    sarxl %eax, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %esi, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebx, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 28(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, 24(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, 16(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 20(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 8(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 12(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, (%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, 4(%esi) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl $108, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    subl $92, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 16(%eax), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 20(%eax), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 24(%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 28(%eax), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl (%ecx), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    sarl $31, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrb $5, %al | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movzbl %al, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 24(%esp,%ebp,4), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 20(%esp,%ebp,4), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 32(%esp,%ebp,4), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 28(%esp,%ebp,4), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 40(%esp,%ebp,4), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 36(%esp,%ebp,4), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 16(%esp,%ebp,4), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 44(%esp,%ebp,4), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, 24(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    sarxl %ecx, %edi, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, 28(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 16(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, 20(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, 8(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, 12(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, (%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, 4(%ebp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    addl $92, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i256, ptr %src.ptr, align 1 | 
 |   %bitOff = load i256, ptr %bitOff.ptr, align 1 | 
 |   %res = ashr i256 %src, %bitOff | 
 |   store i256 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 |  | 
 | define void @lshr_64bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-NO-SHLD-LABEL: lshr_64bytes: | 
 | ; X64-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %rbp | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %rax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %rcx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 32(%rdi), %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 40(%rdi), %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 48(%rdi), %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl (%rsi), %r8d | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %r8d, %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    andl $63, %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrl $3, %r8d | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    andl $56, %r8d | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -128(%rsp,%r8), %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -120(%rsp,%r8), %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %edi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    notl %edi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -112(%rsp,%r8), %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%r14,%r14), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %rsi, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %esi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorb $63, %sil | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    addq %r9, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r11, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -104(%rsp,%r8), %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r11, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -96(%rsp,%r8), %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%r15,%r15), %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r12, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    addq %r11, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r14, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -88(%rsp,%r8), %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r14, %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -80(%rsp,%r8), %rbp | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%rbp,%rbp), %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r13, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    addq %r14, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r15, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rbp | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -72(%rsp,%r8), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%rdi,%rdi), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %rbp, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, 56(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, 48(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r14, 32(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r12, 40(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r11, 16(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rbx, 24(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, (%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r10, 8(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    addq $8, %rsp | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %rbp | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-NO-BMI2-HAVE-SHLD-LABEL: lshr_64bytes: | 
 | ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pushq %r15 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pushq %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pushq %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %r10 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 32(%rdi), %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 40(%rdi), %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 48(%rdi), %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movl (%rsi), %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    andl $63, %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrl $3, %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    andl $56, %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -112(%rsp,%rax), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -128(%rsp,%rax), %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -120(%rsp,%rax), %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rdi, %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -96(%rsp,%rax), %r10 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -104(%rsp,%rax), %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r11, %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r10, %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r11, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -80(%rsp,%rax), %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -88(%rsp,%rax), %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r14, %r15 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r11, %r15 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r14, %r10 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -72(%rsp,%rax), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rax, %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r9, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrq %cl, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r11, 48(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, 56(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r10, 32(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r15, 40(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 16(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rbx, 24(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rsi, (%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, 8(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    popq %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    popq %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    popq %r15 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-NO-SHLD-LABEL: lshr_64bytes: | 
 | ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %rbp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r13 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r12 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 32(%rdi), %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 40(%rdi), %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 48(%rdi), %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl (%rsi), %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andl $63, %ecx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrl $3, %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andl $56, %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -120(%rsp,%rax), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -112(%rsp,%rax), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r8, %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, -128(%rsp,%rax), %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %esi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -104(%rsp,%rax), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r9, %r13 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %rdi, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -88(%rsp,%rax), %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r11, %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %r12d | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    notl %r12d | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rdi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %rdi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r15, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -96(%rsp,%rax), %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r15, %rbp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorb $63, %sil | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %r8, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %r8, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rbx, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    leaq (%r15,%r15), %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %rbx, %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r13, %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -80(%rsp,%rax), %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r15, %r13 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -72(%rsp,%rax), %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %rax, %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %r9, %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %r9, %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r10, %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    leaq (%r15,%r15), %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %r10, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r14, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %r11, %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %r11, %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rbp, %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rax, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %rax, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r13, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, 56(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rax, 48(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r11, 32(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r10, 40(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, 16(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rbx, 24(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, (%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq $8, %rsp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r12 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r13 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %rbp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_64bytes: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pushq %r15 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pushq %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pushq %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %r10 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 32(%rdi), %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 40(%rdi), %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 48(%rdi), %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%rsi), %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $63, %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrl $3, %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $56, %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -112(%rsp,%rax), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -128(%rsp,%rax), %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -120(%rsp,%rax), %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rdi, %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -96(%rsp,%rax), %r10 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -104(%rsp,%rax), %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r11, %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r10, %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r11, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -80(%rsp,%rax), %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -88(%rsp,%rax), %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r14, %r15 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r11, %r15 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r14, %r10 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -72(%rsp,%rax), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rax, %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r9, %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrxq %rcx, %rax, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r11, 48(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, 56(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r10, 32(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r15, 40(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 16(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rbx, 24(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, (%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, 8(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    popq %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    popq %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    popq %r15 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: lshr_64bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    subl $204, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 16(%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 20(%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 24(%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 28(%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 32(%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 36(%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 40(%edi), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 44(%edi), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 48(%edi), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 52(%edi), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 56(%edi), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 60(%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%edi), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%esp), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andl $31, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl $3, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andl $60, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 68(%esp,%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    notl %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 72(%esp,%edi), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%edx,%edx), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 64(%esp,%edi), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorb $31, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %cl, (%esp) # 1-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %eax, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 76(%esp,%edi), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 80(%esp,%edi), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%esi,%esi), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %eax, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 84(%esp,%edi), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 88(%esp,%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %eax, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %ebp, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 92(%esp,%edi), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 96(%esp,%edi), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %edx, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 100(%esp,%edi), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 104(%esp,%edi), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%edx,%edx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %ebp, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 108(%esp,%edi), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 112(%esp,%edi), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%ecx,%ecx), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %ebx, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edx, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 116(%esp,%edi), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 120(%esp,%edi), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%ecx,%ecx), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb (%esp), %ch # 1-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edx, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %al, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 124(%esp,%edi), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%edi,%edi), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, 60(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, 56(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, 48(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, 52(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, 40(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 44(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 32(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 36(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 24(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 28(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 16(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 20(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 8(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 12(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, (%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl $204, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: lshr_64bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    subl $188, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 16(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 20(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 24(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 32(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 36(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 40(%eax), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 44(%eax), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 48(%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 52(%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 56(%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 60(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    andl $31, %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrl $3, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    andl $60, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 56(%esp,%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 52(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 64(%esp,%eax), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 60(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 72(%esp,%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 68(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 80(%esp,%eax), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 76(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 88(%esp,%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 84(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 96(%esp,%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 92(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 104(%esp,%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 100(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 48(%esp,%eax), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 108(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, 56(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrl %cl, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 60(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, 48(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 52(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 40(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 44(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 32(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 36(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 24(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 28(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 16(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 20(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 8(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 12(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, (%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    addl $188, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: lshr_64bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    subl $204, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 16(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 20(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 24(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 32(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 36(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 40(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 44(%eax), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 48(%eax), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 52(%eax), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 56(%eax), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 60(%eax), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andl $31, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl $3, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andl $60, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 68(%esp,%ecx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 72(%esp,%ecx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %edi, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    notl %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%esi,%esi), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %eax, %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebx, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorb $31, %bl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %edi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, 64(%esp,%ecx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 80(%esp,%ecx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%esi,%esi), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %eax, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 76(%esp,%ecx), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ebp, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 88(%esp,%ecx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edi, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 84(%esp,%ecx), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ebp, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 96(%esp,%ecx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edi, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 92(%esp,%ecx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %esi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 104(%esp,%ecx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edi, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 100(%esp,%ecx), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ebp, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 112(%esp,%ecx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edi, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 108(%esp,%ecx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %esi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 120(%esp,%ecx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %esi, %edi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 116(%esp,%ecx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %edi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %eax, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 124(%esp,%ecx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %eax, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %eax, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ebx, %eax, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, 60(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, 56(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, 48(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, 52(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 40(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 44(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 32(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 36(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 24(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 28(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 16(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 20(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 8(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 12(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, (%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl $204, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_64bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    subl $188, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 16(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 20(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 24(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 32(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 36(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 40(%eax), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 44(%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 48(%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 52(%eax), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 56(%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 60(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $31, %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrl $3, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $60, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 56(%esp,%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 52(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 64(%esp,%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 60(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 72(%esp,%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 68(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 80(%esp,%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 76(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 88(%esp,%eax), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 84(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebp, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 96(%esp,%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 92(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 104(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 100(%esp,%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 48(%esp,%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 108(%esp,%eax), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 56(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, 48(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, 52(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, 40(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 44(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 32(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 36(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 24(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 28(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 16(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 20(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 8(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 12(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, (%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 60(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    addl $188, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i512, ptr %src.ptr, align 1 | 
 |   %bitOff = load i512, ptr %bitOff.ptr, align 1 | 
 |   %res = lshr i512 %src, %bitOff | 
 |   store i512 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | define void @shl_64bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-NO-SHLD-LABEL: shl_64bytes: | 
 | ; X64-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %rcx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 32(%rdi), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 40(%rdi), %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 48(%rdi), %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl (%rsi), %esi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    andl $63, %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrl $3, %esi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    andl $56, %esi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    negl %esi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movslq %esi, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -64(%rsp,%rbx), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -56(%rsp,%rbx), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %esi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorb $63, %sil | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r10, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -40(%rsp,%rbx), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r10, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -48(%rsp,%rbx), %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r15, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r14, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r15, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -24(%rsp,%rbx), %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r14, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -32(%rsp,%rbx), %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r13, %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r12, %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r13, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -8(%rsp,%rbx), %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -16(%rsp,%rbx), %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rbx, %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r12, %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %rbx, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, (%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r14, 48(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r13, 56(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r10, 32(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r15, 40(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, 16(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r11, 24(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, 8(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-NO-BMI2-HAVE-SHLD-LABEL: shl_64bytes: | 
 | ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pushq %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pushq %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pushq %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %rcx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 32(%rdi), %r10 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 40(%rdi), %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 48(%rdi), %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movl (%rsi), %esi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    andl $63, %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrl $3, %esi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    andl $56, %esi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    negl %esi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movslq %esi, %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -48(%rsp,%r9), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -40(%rsp,%r9), %r10 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r10, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rax, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -64(%rsp,%r9), %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -56(%rsp,%r9), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rdi, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -32(%rsp,%r9), %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -24(%rsp,%r9), %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rbx, %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %r11, %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %r10, %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -16(%rsp,%r9), %r10 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -8(%rsp,%r9), %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %r10, %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rbx, %r10 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %r8, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shlq %cl, %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r10, 48(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, 56(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r11, 32(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r14, 40(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, 16(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rsi, 24(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, (%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    addq $8, %rsp | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    popq %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    popq %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-NO-SHLD-LABEL: shl_64bytes: | 
 | ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %rbp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r13 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r12 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 32(%rdi), %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 40(%rdi), %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 48(%rdi), %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl (%rsi), %esi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andl $63, %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrl $3, %esi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andl $56, %esi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    negl %esi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movslq %esi, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -64(%rsp,%rsi), %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -56(%rsp,%rsi), %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rcx, %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -40(%rsp,%rsi), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rdi, %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -48(%rsp,%rsi), %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %r14, %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -24(%rsp,%rsi), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %r8, %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %r10, %r12 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %r13d | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorb $63, %r13b | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %r13, %r10, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r9, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -32(%rsp,%rsi), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %r9, %rbp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %r13, %r14, %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r11, %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, -8(%rsp,%rsi), %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -16(%rsp,%rsi), %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rax, %rsi, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %r13, %rcx, %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rbx, %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %r13, %r9, %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r15, %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %r13, %rdi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rbp, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %r13, %rsi, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r11, %rsi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrq %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %r13, %r8, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rax, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r12, (%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, 48(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rsi, 56(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, 32(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, 40(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, 16(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r14, 24(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r10, 8(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq $8, %rsp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r12 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r13 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %rbp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: shl_64bytes: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pushq %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pushq %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pushq %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %rcx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 32(%rdi), %r10 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 40(%rdi), %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 48(%rdi), %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%rsi), %esi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $63, %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrl $3, %esi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $56, %esi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    negl %esi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movslq %esi, %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -48(%rsp,%r8), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -40(%rsp,%r8), %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rax, %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -64(%rsp,%r8), %r10 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -56(%rsp,%r8), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rdi, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -32(%rsp,%r8), %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -24(%rsp,%r8), %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rbx, %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %r11, %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %r9, %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -16(%rsp,%r8), %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -8(%rsp,%r8), %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %r9, %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %rbx, %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shldq %cl, %r10, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shlxq %rcx, %r10, %rcx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, 48(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, 56(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r11, 32(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r14, 40(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, 16(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, 24(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rcx, (%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    addq $8, %rsp | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    popq %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    popq %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: shl_64bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    subl $204, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 16(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 20(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 24(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 32(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 36(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 40(%eax), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 44(%eax), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 48(%eax), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 52(%eax), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 56(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 60(%eax), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%ebp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl $3, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andl $60, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    subl %ecx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%eax), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%eax), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andl $31, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %ch | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorb $31, %ch | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %bl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 20(%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %bl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 16(%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %bl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 28(%ebp), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %bl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 24(%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 36(%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 32(%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 44(%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 40(%ebp), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 52(%eax), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    negl %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 176(%esp,%eax), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 60(%edi), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 56(%edi), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edx, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %dl, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %ebx, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, (%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, 56(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, 60(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, 48(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 52(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 40(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 44(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 32(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 36(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 24(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 28(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 16(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 20(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 8(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 12(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, 4(%ecx) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl $204, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: shl_64bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    subl $188, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 16(%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 20(%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 24(%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 28(%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 32(%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 36(%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 40(%ecx), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 44(%ecx), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 48(%ecx), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 52(%ecx), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 56(%ecx), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 60(%ecx), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%ecx), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrl $3, %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    andl $60, %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    leal {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    subl %ebp, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    andl $31, %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 16(%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 20(%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 24(%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 28(%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 32(%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 36(%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 40(%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 44(%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 56(%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 60(%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 52(%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    negl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 160(%esp,%ebp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 56(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, 60(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %ebx, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %eax, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 48(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, 52(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 40(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 44(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 32(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 36(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 24(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 28(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 16(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 20(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 8(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, 12(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, (%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 4(%ebp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    addl $188, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: shl_64bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    subl $204, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 16(%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 20(%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 24(%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 28(%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 32(%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 36(%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 40(%ebp), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 44(%ebp), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 48(%ebp), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 52(%ebp), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 56(%ebp), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 60(%ebp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%ebp), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%esp), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andl $31, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl $3, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andl $60, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal {{[0-9]+}}(%esp), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    subl %ebp, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%edi), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%edi), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorb $31, %bl | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %ecx, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ecx, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%edi), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %ecx, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%edi), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %eax, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 16(%edi), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %eax, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 20(%edi), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %esi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 24(%edi), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %ecx, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 28(%edi), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, (%esp), %eax # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %eax, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 32(%edi), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %eax, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 36(%edi), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %esi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %ecx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 40(%edi), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %ecx, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 44(%edi), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %eax, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 48(%edi), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %eax, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 52(%edi), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ecx, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %ebp, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %eax, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    negl %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, 188(%esp,%ecx), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 56(%edi), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edx, %edi, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %ebx, %edi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %eax, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, (%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 56(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, 60(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, 48(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 52(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 40(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 44(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 32(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 36(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%esp), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 24(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 28(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 16(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 20(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 8(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 12(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl $204, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: shl_64bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    subl $204, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 16(%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 20(%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 24(%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 28(%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 32(%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 36(%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 40(%ebx), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 44(%ebx), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 48(%ebx), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 52(%ebx), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 56(%ebx), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 60(%ebx), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%ebx), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm0, %xmm0 | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $31, %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrl $3, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $60, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    leal {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    subl %ebx, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%eax), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 16(%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 20(%eax), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 24(%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 28(%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 32(%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 36(%eax), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edi, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 40(%eax), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 44(%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %ebp, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 56(%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 60(%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 52(%eax), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %esi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    negl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 176(%esp,%ebx), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 56(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, 60(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shlxl %ecx, %edx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %ebx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shldl %cl, %edx, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 48(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, 52(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, 40(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 44(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 32(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 36(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 24(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 28(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 16(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 20(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 8(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 12(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, (%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, 4(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    addl $204, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i512, ptr %src.ptr, align 1 | 
 |   %bitOff = load i512, ptr %bitOff.ptr, align 1 | 
 |   %res = shl i512 %src, %bitOff | 
 |   store i512 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | define void @ashr_64bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { | 
 | ; X64-NO-BMI2-NO-SHLD-LABEL: ashr_64bytes: | 
 | ; X64-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %rbp | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    pushq %rax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %rcx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 32(%rdi), %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 40(%rdi), %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 48(%rdi), %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl (%rsi), %r8d | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %r8d, %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    andl $63, %eax | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrl $3, %r8d | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    andl $56, %r8d | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -128(%rsp,%r8), %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -120(%rsp,%r8), %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rsi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %edi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    notl %edi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -112(%rsp,%r8), %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%r14,%r14), %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %rsi, %r10 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %esi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    xorb $63, %sil | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    addq %r9, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r11, %r9 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -104(%rsp,%r8), %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r11, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -96(%rsp,%r8), %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%r15,%r15), %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r12, %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    addq %r11, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r14, %r11 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -88(%rsp,%r8), %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r14, %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -80(%rsp,%r8), %rbp | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%rbp,%rbp), %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r13, %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    addq %r14, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %r15, %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rbp | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq -72(%rsp,%r8), %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    leaq (%rdi,%rdi), %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    shlq %cl, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    orq %rbp, %r8 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    sarq %cl, %rdi | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rdi, 56(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r8, 48(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r14, 32(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r12, 40(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r11, 16(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rbx, 24(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r9, (%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %r10, 8(%rdx) | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    addq $8, %rsp | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %rbx | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r12 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r13 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r14 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %r15 | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    popq %rbp | 
 | ; X64-NO-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-NO-BMI2-HAVE-SHLD-LABEL: ashr_64bytes: | 
 | ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pushq %r15 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pushq %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pushq %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %r10 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 32(%rdi), %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 40(%rdi), %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 48(%rdi), %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movl (%rsi), %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    andl $63, %ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrl $3, %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    andl $56, %eax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -112(%rsp,%rax), %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -128(%rsp,%rax), %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -120(%rsp,%rax), %r9 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r9, %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rdi, %r8 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -96(%rsp,%rax), %r10 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -104(%rsp,%rax), %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r11, %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r10, %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r11, %rdi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -80(%rsp,%rax), %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -88(%rsp,%rax), %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r14, %r15 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r11, %r15 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r14, %r10 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq -72(%rsp,%rax), %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rax, %r11 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r9, %rsi | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    sarq %cl, %rax | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r11, 48(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rax, 56(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r10, 32(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r15, 40(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 16(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rbx, 24(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rsi, (%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %r8, 8(%rdx) | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    popq %rbx | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    popq %r14 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    popq %r15 | 
 | ; X64-NO-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-NO-SHLD-LABEL: ashr_64bytes: | 
 | ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %rbp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r13 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %r12 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pushq %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 24(%rdi), %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 32(%rdi), %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 40(%rdi), %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 48(%rdi), %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl (%rsi), %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andl $63, %ecx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrl $3, %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andl $56, %eax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -120(%rsp,%rax), %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -112(%rsp,%rax), %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r8, %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, -128(%rsp,%rax), %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %esi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -104(%rsp,%rax), %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r9, %r13 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %rdi, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -88(%rsp,%rax), %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r11, %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %r12d | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    notl %r12d | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rdi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %rdi, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r15, %rdi | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -96(%rsp,%rax), %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r15, %rbp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorb $63, %sil | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %r8, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %r8, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rbx, %r8 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    leaq (%r15,%r15), %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %rbx, %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r13, %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -80(%rsp,%rax), %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r15, %r13 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -72(%rsp,%rax), %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    sarxq %rcx, %rax, %rcx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %r9, %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %r9, %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r10, %r9 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    leaq (%r15,%r15), %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %r10, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r14, %r10 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %r11, %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %r11, %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rbp, %r11 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rax, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %rax, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r13, %rax | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rcx, 56(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rax, 48(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r11, 32(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r10, 40(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, 16(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rbx, 24(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, (%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, 8(%rdx) | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq $8, %rsp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %rbx | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r12 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r13 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r14 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r15 | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %rbp | 
 | ; X64-HAVE-BMI2-NO-SHLD-NEXT:    retq | 
 | ; | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_64bytes: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pushq %r15 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pushq %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pushq %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq (%rdi), %rcx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 8(%rdi), %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 16(%rdi), %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 24(%rdi), %r10 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 32(%rdi), %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 40(%rdi), %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 48(%rdi), %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq 56(%rdi), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%rsi), %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    sarq $63, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $63, %ecx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrl $3, %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $56, %eax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -112(%rsp,%rax), %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -128(%rsp,%rax), %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -120(%rsp,%rax), %r9 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r9, %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rdi, %r8 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -96(%rsp,%rax), %r10 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -104(%rsp,%rax), %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r11, %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r10, %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r11, %rdi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -80(%rsp,%rax), %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -88(%rsp,%rax), %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r14, %r15 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r11, %r15 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r14, %r10 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq -72(%rsp,%rax), %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %rax, %r11 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdq %cl, %r9, %rsi | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    sarxq %rcx, %rax, %rax | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r11, 48(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rax, 56(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r10, 32(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r15, 40(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rdi, 16(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rbx, 24(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, (%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %r8, 8(%rdx) | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    popq %rbx | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    popq %r14 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    popq %r15 | 
 | ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    retq | 
 | ; | 
 | ; X86-NO-BMI2-NO-SHLD-LABEL: ashr_64bytes: | 
 | ; X86-NO-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    subl $204, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 8(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 12(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 16(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 20(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 24(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 32(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 36(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 40(%eax), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 44(%eax), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 48(%eax), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 52(%eax), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 56(%eax), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 60(%eax), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%ebp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl (%esp), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    sarl $31, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andl $31, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl $3, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    andl $60, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 68(%esp,%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    notl %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 72(%esp,%ebp), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%edx,%edx), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 64(%esp,%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    xorb $31, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %cl, (%esp) # 1-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %eax, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 76(%esp,%ebp), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 80(%esp,%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%esi,%esi), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %eax, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 84(%esp,%ebp), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 88(%esp,%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %eax, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %ebx, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 92(%esp,%ebp), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 96(%esp,%ebp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %edx, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 100(%esp,%ebp), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 104(%esp,%ebp), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%edx,%edx), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %edi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 108(%esp,%ebp), %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 112(%esp,%ebp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%ecx,%ecx), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %ebx, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edx, %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 116(%esp,%ebp), %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 120(%esp,%ebp), %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%ecx,%ecx), %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb (%esp), %ch # 1-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %edx, %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %al, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shrl %cl, %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl 124(%esp,%ebp), %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    leal (%ebp,%ebp), %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movb %ch, %cl | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    shll %cl, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    orl %eax, %edx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    sarl %cl, %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebp, 60(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edx, 56(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %esi, 48(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %edi, 52(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ebx, 40(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 44(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 32(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 36(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 24(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 28(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 16(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 20(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 8(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 12(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, (%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    addl $204, %esp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-NO-BMI2-HAVE-SHLD-LABEL: ashr_64bytes: | 
 | ; X86-NO-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    subl $188, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 8(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 12(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 16(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 20(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 24(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 32(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 36(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 40(%eax), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 44(%eax), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 48(%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 52(%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 56(%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 60(%eax), %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    sarl $31, %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    andl $31, %ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrl $3, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    andl $60, %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 56(%esp,%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 52(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 64(%esp,%eax), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 60(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 72(%esp,%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 68(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 80(%esp,%eax), %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 76(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 88(%esp,%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 84(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, (%esp) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 96(%esp,%eax), %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 92(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 104(%esp,%eax), %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 100(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 48(%esp,%eax), %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl 108(%esp,%eax), %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %esi, 56(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    sarl %cl, %edx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edx, 60(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %edi, 48(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebx, 52(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 40(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 44(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 32(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 36(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 24(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 28(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 16(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 20(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 8(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 12(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ebp, (%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    addl $188, %esp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-NO-BMI2-HAVE-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-NO-SHLD-LABEL: ashr_64bytes: | 
 | ; X86-HAVE-BMI2-NO-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    subl $204, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 8(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 12(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 16(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 20(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 24(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 32(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 36(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 40(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 44(%eax), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 48(%eax), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 52(%eax), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 56(%eax), %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 60(%eax), %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl (%eax), %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    sarl $31, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andl $31, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrl $3, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    andl $60, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 68(%esp,%ebx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 72(%esp,%ebx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %edi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    notl %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%esi,%esi), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %eax, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    xorb $31, %al | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %eax, %edi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, 64(%esp,%ebx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %edi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 80(%esp,%ebx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %eax, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 76(%esp,%ebx), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ebp, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 88(%esp,%ebx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 84(%esp,%ebx), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ebp, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edi, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 96(%esp,%ebx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 92(%esp,%ebx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %esi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edi, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 104(%esp,%ebx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 100(%esp,%ebx), %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %ebp, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %ebp, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 112(%esp,%ebx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %edi, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 108(%esp,%ebx), %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %esi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %esi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %esi, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %ebp, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 120(%esp,%ebx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    leal (%eax,%eax), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %esi, %edi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 116(%esp,%ebx), %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %edi, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %edi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shrxl %edx, %eax, %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl 124(%esp,%ebx), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    sarxl %edx, %eax, %edx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl %eax, %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    shlxl %ecx, %eax, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    orl %esi, %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edx, 60(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebx, 56(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %edi, 48(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ebp, 52(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 40(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 44(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 32(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 36(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 24(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 28(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 16(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 20(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 8(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 12(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, (%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    addl $204, %esp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-NO-SHLD-NEXT:    retl | 
 | ; | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_64bytes: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD:       # %bb.0: | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    pushl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    subl $188, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 4(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 8(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 12(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 16(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 20(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 24(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 28(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 32(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 36(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 40(%eax), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 44(%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 48(%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 52(%eax), %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 56(%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 60(%eax), %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%eax), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    sarl $31, %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $31, %ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrl $3, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    andl $60, %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 56(%esp,%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 52(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 64(%esp,%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 60(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 72(%esp,%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 68(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 80(%esp,%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 76(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 88(%esp,%eax), %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 84(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebp, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, (%esp) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 96(%esp,%eax), %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 92(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edi, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 104(%esp,%eax), %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 100(%esp,%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %edx, %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %ebx, %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 48(%esp,%eax), %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl 108(%esp,%eax), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %eax, %edx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 56(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edi, 48(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %esi, 52(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebp, 40(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 44(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl (%esp), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 32(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 36(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 24(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 28(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 16(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 20(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 8(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 12(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    sarxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    # kill: def $cl killed $cl killed $ecx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    shrdl %cl, %esi, %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ebx, (%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %ecx, 4(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    movl %edx, 60(%eax) | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    addl $188, %esp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %esi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %edi | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebx | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    popl %ebp | 
 | ; X86-HAVE-BMI2-HAVE-SHLD-NEXT:    retl | 
 |   %src = load i512, ptr %src.ptr, align 1 | 
 |   %bitOff = load i512, ptr %bitOff.ptr, align 1 | 
 |   %res = ashr i512 %src, %bitOff | 
 |   store i512 %res, ptr %dst, align 1 | 
 |   ret void | 
 | } | 
 | ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: | 
 | ; ALL: {{.*}} | 
 | ; X64: {{.*}} | 
 | ; X64-NO-SHLD: {{.*}} | 
 | ; X64-SHLD: {{.*}} | 
 | ; X86: {{.*}} | 
 | ; X86-NO-SHLD: {{.*}} | 
 | ; X86-SHLD: {{.*}} |