|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | 
|  | ; RUN: llc -mtriple=i386-linux-gnu < %s | FileCheck -check-prefixes=CHECK,NOSSE %s | 
|  | ; RUN: llc -mtriple=i386-linux-gnu -mattr=+sse < %s | FileCheck -check-prefixes=CHECK,SSE %s | 
|  |  | 
|  | ; Make sure no assert without SSE2 and bfloat. Issue 92899 | 
|  |  | 
|  | define bfloat @return_arg_bf16(bfloat %x) #0 { | 
|  | ; CHECK-LABEL: return_arg_bf16: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; CHECK-NEXT:    retl | 
|  | ret bfloat %x | 
|  | } | 
|  |  | 
|  | define <2 x bfloat> @return_arg_v2bf16(<2 x bfloat> %x) #0 { | 
|  | ; CHECK-LABEL: return_arg_v2bf16: | 
|  | ; CHECK:       # %bb.0: | 
|  | ; CHECK-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; CHECK-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; CHECK-NEXT:    retl | 
|  | ret <2 x bfloat> %x | 
|  | } | 
|  |  | 
|  | define <3 x bfloat> @return_arg_v3bf16(<3 x bfloat> %x) #0 { | 
|  | ; NOSSE-LABEL: return_arg_v3bf16: | 
|  | ; NOSSE:       # %bb.0: | 
|  | ; NOSSE-NEXT:    pushl %edi | 
|  | ; NOSSE-NEXT:    pushl %esi | 
|  | ; NOSSE-NEXT:    pushl %eax | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %esi | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    # kill: def $ax killed $ax def $eax | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movzwl %si, %edi | 
|  | ; NOSSE-NEXT:    orl %eax, %edi | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, 4(%esi) | 
|  | ; NOSSE-NEXT:    movl %edi, (%esi) | 
|  | ; NOSSE-NEXT:    movl %esi, %eax | 
|  | ; NOSSE-NEXT:    addl $4, %esp | 
|  | ; NOSSE-NEXT:    popl %esi | 
|  | ; NOSSE-NEXT:    popl %edi | 
|  | ; NOSSE-NEXT:    retl $4 | 
|  | ; | 
|  | ; SSE-LABEL: return_arg_v3bf16: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    pushl %edi | 
|  | ; SSE-NEXT:    pushl %esi | 
|  | ; SSE-NEXT:    pushl %eax | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %esi | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    # kill: def $ax killed $ax def $eax | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movzwl %si, %edi | 
|  | ; SSE-NEXT:    orl %eax, %edi | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, 4(%esi) | 
|  | ; SSE-NEXT:    movl %edi, (%esi) | 
|  | ; SSE-NEXT:    movl %esi, %eax | 
|  | ; SSE-NEXT:    addl $4, %esp | 
|  | ; SSE-NEXT:    popl %esi | 
|  | ; SSE-NEXT:    popl %edi | 
|  | ; SSE-NEXT:    retl $4 | 
|  | ret <3 x bfloat> %x | 
|  | } | 
|  |  | 
|  | define <4 x bfloat> @return_arg_v4bf16(<4 x bfloat> %x) #0 { | 
|  | ; NOSSE-LABEL: return_arg_v4bf16: | 
|  | ; NOSSE:       # %bb.0: | 
|  | ; NOSSE-NEXT:    pushl %ebp | 
|  | ; NOSSE-NEXT:    pushl %ebx | 
|  | ; NOSSE-NEXT:    pushl %edi | 
|  | ; NOSSE-NEXT:    pushl %esi | 
|  | ; NOSSE-NEXT:    subl $12, %esp | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %esi | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %edi | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %ebx | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, 6(%ebp) | 
|  | ; NOSSE-NEXT:    movw %bx, 4(%ebp) | 
|  | ; NOSSE-NEXT:    movw %di, 2(%ebp) | 
|  | ; NOSSE-NEXT:    movw %si, (%ebp) | 
|  | ; NOSSE-NEXT:    movl %ebp, %eax | 
|  | ; NOSSE-NEXT:    addl $12, %esp | 
|  | ; NOSSE-NEXT:    popl %esi | 
|  | ; NOSSE-NEXT:    popl %edi | 
|  | ; NOSSE-NEXT:    popl %ebx | 
|  | ; NOSSE-NEXT:    popl %ebp | 
|  | ; NOSSE-NEXT:    retl $4 | 
|  | ; | 
|  | ; SSE-LABEL: return_arg_v4bf16: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    pushl %ebp | 
|  | ; SSE-NEXT:    pushl %ebx | 
|  | ; SSE-NEXT:    pushl %edi | 
|  | ; SSE-NEXT:    pushl %esi | 
|  | ; SSE-NEXT:    subl $12, %esp | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %esi | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %edi | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %ebx | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, 6(%ebp) | 
|  | ; SSE-NEXT:    movw %bx, 4(%ebp) | 
|  | ; SSE-NEXT:    movw %di, 2(%ebp) | 
|  | ; SSE-NEXT:    movw %si, (%ebp) | 
|  | ; SSE-NEXT:    movl %ebp, %eax | 
|  | ; SSE-NEXT:    addl $12, %esp | 
|  | ; SSE-NEXT:    popl %esi | 
|  | ; SSE-NEXT:    popl %edi | 
|  | ; SSE-NEXT:    popl %ebx | 
|  | ; SSE-NEXT:    popl %ebp | 
|  | ; SSE-NEXT:    retl $4 | 
|  | ret <4 x bfloat> %x | 
|  | } | 
|  |  | 
|  | define <8 x bfloat> @return_arg_v8bf16(<8 x bfloat> %x) #0 { | 
|  | ; NOSSE-LABEL: return_arg_v8bf16: | 
|  | ; NOSSE:       # %bb.0: | 
|  | ; NOSSE-NEXT:    pushl %ebp | 
|  | ; NOSSE-NEXT:    pushl %ebx | 
|  | ; NOSSE-NEXT:    pushl %edi | 
|  | ; NOSSE-NEXT:    pushl %esi | 
|  | ; NOSSE-NEXT:    subl $12, %esp | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %esi | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %edi | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %ebx | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, 14(%ebp) | 
|  | ; NOSSE-NEXT:    movw %bx, 12(%ebp) | 
|  | ; NOSSE-NEXT:    movw %di, 10(%ebp) | 
|  | ; NOSSE-NEXT:    movw %si, 8(%ebp) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 6(%ebp) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 4(%ebp) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 2(%ebp) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, (%ebp) | 
|  | ; NOSSE-NEXT:    movl %ebp, %eax | 
|  | ; NOSSE-NEXT:    addl $12, %esp | 
|  | ; NOSSE-NEXT:    popl %esi | 
|  | ; NOSSE-NEXT:    popl %edi | 
|  | ; NOSSE-NEXT:    popl %ebx | 
|  | ; NOSSE-NEXT:    popl %ebp | 
|  | ; NOSSE-NEXT:    retl $4 | 
|  | ; | 
|  | ; SSE-LABEL: return_arg_v8bf16: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    pushl %ebp | 
|  | ; SSE-NEXT:    pushl %ebx | 
|  | ; SSE-NEXT:    pushl %edi | 
|  | ; SSE-NEXT:    pushl %esi | 
|  | ; SSE-NEXT:    subl $12, %esp | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %esi | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %edi | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %ebx | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, 14(%ebp) | 
|  | ; SSE-NEXT:    movw %bx, 12(%ebp) | 
|  | ; SSE-NEXT:    movw %di, 10(%ebp) | 
|  | ; SSE-NEXT:    movw %si, 8(%ebp) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 6(%ebp) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 4(%ebp) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 2(%ebp) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, (%ebp) | 
|  | ; SSE-NEXT:    movl %ebp, %eax | 
|  | ; SSE-NEXT:    addl $12, %esp | 
|  | ; SSE-NEXT:    popl %esi | 
|  | ; SSE-NEXT:    popl %edi | 
|  | ; SSE-NEXT:    popl %ebx | 
|  | ; SSE-NEXT:    popl %ebp | 
|  | ; SSE-NEXT:    retl $4 | 
|  | ret <8 x bfloat> %x | 
|  | } | 
|  |  | 
|  | define <16 x bfloat> @return_arg_v16bf16(<16 x bfloat> %x) #0 { | 
|  | ; NOSSE-LABEL: return_arg_v16bf16: | 
|  | ; NOSSE:       # %bb.0: | 
|  | ; NOSSE-NEXT:    pushl %ebp | 
|  | ; NOSSE-NEXT:    pushl %ebx | 
|  | ; NOSSE-NEXT:    pushl %edi | 
|  | ; NOSSE-NEXT:    pushl %esi | 
|  | ; NOSSE-NEXT:    subl $28, %esp | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %esi | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %ebx | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %ebp | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edi | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, 30(%edi) | 
|  | ; NOSSE-NEXT:    movw %bp, 28(%edi) | 
|  | ; NOSSE-NEXT:    movw %bx, 26(%edi) | 
|  | ; NOSSE-NEXT:    movw %si, 24(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 22(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 20(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 18(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 16(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 14(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 12(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 10(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 8(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 6(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 4(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, 2(%edi) | 
|  | ; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; NOSSE-NEXT:    movw %ax, (%edi) | 
|  | ; NOSSE-NEXT:    movl %edi, %eax | 
|  | ; NOSSE-NEXT:    addl $28, %esp | 
|  | ; NOSSE-NEXT:    popl %esi | 
|  | ; NOSSE-NEXT:    popl %edi | 
|  | ; NOSSE-NEXT:    popl %ebx | 
|  | ; NOSSE-NEXT:    popl %ebp | 
|  | ; NOSSE-NEXT:    retl $4 | 
|  | ; | 
|  | ; SSE-LABEL: return_arg_v16bf16: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    pushl %ebp | 
|  | ; SSE-NEXT:    pushl %ebx | 
|  | ; SSE-NEXT:    pushl %edi | 
|  | ; SSE-NEXT:    pushl %esi | 
|  | ; SSE-NEXT:    subl $28, %esp | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %esi | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %ebx | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %ebp | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, 30(%edi) | 
|  | ; SSE-NEXT:    movw %bp, 28(%edi) | 
|  | ; SSE-NEXT:    movw %bx, 26(%edi) | 
|  | ; SSE-NEXT:    movw %si, 24(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 22(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 20(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 18(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 16(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 14(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 12(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 10(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 8(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 6(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 4(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, 2(%edi) | 
|  | ; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload | 
|  | ; SSE-NEXT:    movw %ax, (%edi) | 
|  | ; SSE-NEXT:    movl %edi, %eax | 
|  | ; SSE-NEXT:    addl $28, %esp | 
|  | ; SSE-NEXT:    popl %esi | 
|  | ; SSE-NEXT:    popl %edi | 
|  | ; SSE-NEXT:    popl %ebx | 
|  | ; SSE-NEXT:    popl %ebp | 
|  | ; SSE-NEXT:    retl $4 | 
|  | ret <16 x bfloat> %x | 
|  | } | 
|  |  | 
|  | declare bfloat @returns_bf16(bfloat) | 
|  | declare <2 x bfloat> @returns_v2bf16(<2 x bfloat>) | 
|  | declare <3 x bfloat> @returns_v3bf16(<3 x bfloat>) | 
|  | declare <4 x bfloat> @returns_v4bf16(<4 x bfloat>) | 
|  | declare <8 x bfloat> @returns_v8bf16(<8 x bfloat>) | 
|  | declare <16 x bfloat> @returns_v16bf16(<16 x bfloat>) | 
|  |  | 
|  | define void @call_ret_bf16(ptr %ptr) #0 { | 
|  | ; NOSSE-LABEL: call_ret_bf16: | 
|  | ; NOSSE:       # %bb.0: | 
|  | ; NOSSE-NEXT:    pushl %esi | 
|  | ; NOSSE-NEXT:    subl $8, %esp | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
|  | ; NOSSE-NEXT:    movzwl (%esi), %eax | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll returns_bf16@PLT | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, (%esi) | 
|  | ; NOSSE-NEXT:    addl $8, %esp | 
|  | ; NOSSE-NEXT:    popl %esi | 
|  | ; NOSSE-NEXT:    retl | 
|  | ; | 
|  | ; SSE-LABEL: call_ret_bf16: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    pushl %esi | 
|  | ; SSE-NEXT:    subl $8, %esp | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
|  | ; SSE-NEXT:    movzwl (%esi), %eax | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll returns_bf16@PLT | 
|  | ; SSE-NEXT:    fstps (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, (%esi) | 
|  | ; SSE-NEXT:    addl $8, %esp | 
|  | ; SSE-NEXT:    popl %esi | 
|  | ; SSE-NEXT:    retl | 
|  | %val = load bfloat, ptr %ptr | 
|  | %bf16 = call bfloat @returns_bf16(bfloat %val) | 
|  | store bfloat %bf16, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @call_ret_v2bf16(ptr %ptr) #0 { | 
|  | ; NOSSE-LABEL: call_ret_v2bf16: | 
|  | ; NOSSE:       # %bb.0: | 
|  | ; NOSSE-NEXT:    pushl %edi | 
|  | ; NOSSE-NEXT:    pushl %esi | 
|  | ; NOSSE-NEXT:    subl $20, %esp | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edi | 
|  | ; NOSSE-NEXT:    movzwl 2(%edi), %eax | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl (%edi), %eax | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll returns_v2bf16@PLT | 
|  | ; NOSSE-NEXT:    fxch %st(1) | 
|  | ; NOSSE-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movl %eax, %esi | 
|  | ; NOSSE-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload | 
|  | ; NOSSE-NEXT:    fstps (%esp) | 
|  | ; NOSSE-NEXT:    calll __truncsfbf2 | 
|  | ; NOSSE-NEXT:    movw %ax, 2(%edi) | 
|  | ; NOSSE-NEXT:    movw %si, (%edi) | 
|  | ; NOSSE-NEXT:    addl $20, %esp | 
|  | ; NOSSE-NEXT:    popl %esi | 
|  | ; NOSSE-NEXT:    popl %edi | 
|  | ; NOSSE-NEXT:    retl | 
|  | ; | 
|  | ; SSE-LABEL: call_ret_v2bf16: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    pushl %edi | 
|  | ; SSE-NEXT:    pushl %esi | 
|  | ; SSE-NEXT:    subl $36, %esp | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi | 
|  | ; SSE-NEXT:    movzwl 2(%edi), %eax | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl (%edi), %eax | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, (%esp) | 
|  | ; SSE-NEXT:    calll returns_v2bf16@PLT | 
|  | ; SSE-NEXT:    fxch %st(1) | 
|  | ; SSE-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill | 
|  | ; SSE-NEXT:    fstps (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movl %eax, %esi | 
|  | ; SSE-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload | 
|  | ; SSE-NEXT:    fstps (%esp) | 
|  | ; SSE-NEXT:    calll __truncsfbf2 | 
|  | ; SSE-NEXT:    movw %ax, 2(%edi) | 
|  | ; SSE-NEXT:    movw %si, (%edi) | 
|  | ; SSE-NEXT:    addl $36, %esp | 
|  | ; SSE-NEXT:    popl %esi | 
|  | ; SSE-NEXT:    popl %edi | 
|  | ; SSE-NEXT:    retl | 
|  | %val = load <2 x bfloat>, ptr %ptr | 
|  | %bf16 = call <2 x bfloat> @returns_v2bf16(<2 x bfloat> %val) | 
|  | store <2 x bfloat> %bf16, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @call_ret_v3bf16(ptr %ptr) #0 { | 
|  | ; NOSSE-LABEL: call_ret_v3bf16: | 
|  | ; NOSSE:       # %bb.0: | 
|  | ; NOSSE-NEXT:    pushl %esi | 
|  | ; NOSSE-NEXT:    subl $40, %esp | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
|  | ; NOSSE-NEXT:    movl (%esi), %eax | 
|  | ; NOSSE-NEXT:    movl 4(%esi), %ecx | 
|  | ; NOSSE-NEXT:    leal {{[0-9]+}}(%esp), %edx | 
|  | ; NOSSE-NEXT:    movl %edx, (%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %ecx | 
|  | ; NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl %eax, %ecx | 
|  | ; NOSSE-NEXT:    andl $-65536, %ecx # imm = 0xFFFF0000 | 
|  | ; NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    calll returns_v3bf16@PLT | 
|  | ; NOSSE-NEXT:    subl $4, %esp | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx | 
|  | ; NOSSE-NEXT:    movw %cx, 4(%esi) | 
|  | ; NOSSE-NEXT:    movl %eax, (%esi) | 
|  | ; NOSSE-NEXT:    addl $40, %esp | 
|  | ; NOSSE-NEXT:    popl %esi | 
|  | ; NOSSE-NEXT:    retl | 
|  | ; | 
|  | ; SSE-LABEL: call_ret_v3bf16: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    pushl %esi | 
|  | ; SSE-NEXT:    subl $40, %esp | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
|  | ; SSE-NEXT:    movl (%esi), %eax | 
|  | ; SSE-NEXT:    movl 4(%esi), %ecx | 
|  | ; SSE-NEXT:    leal {{[0-9]+}}(%esp), %edx | 
|  | ; SSE-NEXT:    movl %edx, (%esp) | 
|  | ; SSE-NEXT:    shll $16, %ecx | 
|  | ; SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl %eax, %ecx | 
|  | ; SSE-NEXT:    andl $-65536, %ecx # imm = 0xFFFF0000 | 
|  | ; SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    calll returns_v3bf16@PLT | 
|  | ; SSE-NEXT:    subl $4, %esp | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx | 
|  | ; SSE-NEXT:    movw %cx, 4(%esi) | 
|  | ; SSE-NEXT:    movl %eax, (%esi) | 
|  | ; SSE-NEXT:    addl $40, %esp | 
|  | ; SSE-NEXT:    popl %esi | 
|  | ; SSE-NEXT:    retl | 
|  | %val = load <3 x bfloat>, ptr %ptr | 
|  | %bf16 = call <3 x bfloat> @returns_v3bf16(<3 x bfloat> %val) | 
|  | store <3 x bfloat> %bf16, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @call_ret_v4bf16(ptr %ptr) #0 { | 
|  | ; NOSSE-LABEL: call_ret_v4bf16: | 
|  | ; NOSSE:       # %bb.0: | 
|  | ; NOSSE-NEXT:    pushl %ebx | 
|  | ; NOSSE-NEXT:    pushl %edi | 
|  | ; NOSSE-NEXT:    pushl %esi | 
|  | ; NOSSE-NEXT:    subl $48, %esp | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
|  | ; NOSSE-NEXT:    movzwl 2(%esi), %ecx | 
|  | ; NOSSE-NEXT:    movl (%esi), %eax | 
|  | ; NOSSE-NEXT:    movl 4(%esi), %edx | 
|  | ; NOSSE-NEXT:    movzwl 6(%esi), %edi | 
|  | ; NOSSE-NEXT:    leal {{[0-9]+}}(%esp), %ebx | 
|  | ; NOSSE-NEXT:    movl %ebx, (%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %edi | 
|  | ; NOSSE-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %edx | 
|  | ; NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %ecx | 
|  | ; NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    calll returns_v4bf16@PLT | 
|  | ; NOSSE-NEXT:    subl $4, %esp | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx | 
|  | ; NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %edx | 
|  | ; NOSSE-NEXT:    movw %dx, 6(%esi) | 
|  | ; NOSSE-NEXT:    movw %cx, 4(%esi) | 
|  | ; NOSSE-NEXT:    movl %eax, (%esi) | 
|  | ; NOSSE-NEXT:    addl $48, %esp | 
|  | ; NOSSE-NEXT:    popl %esi | 
|  | ; NOSSE-NEXT:    popl %edi | 
|  | ; NOSSE-NEXT:    popl %ebx | 
|  | ; NOSSE-NEXT:    retl | 
|  | ; | 
|  | ; SSE-LABEL: call_ret_v4bf16: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    pushl %ebx | 
|  | ; SSE-NEXT:    pushl %edi | 
|  | ; SSE-NEXT:    pushl %esi | 
|  | ; SSE-NEXT:    subl $48, %esp | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
|  | ; SSE-NEXT:    movzwl 2(%esi), %ecx | 
|  | ; SSE-NEXT:    movl (%esi), %eax | 
|  | ; SSE-NEXT:    movl 4(%esi), %edx | 
|  | ; SSE-NEXT:    movzwl 6(%esi), %edi | 
|  | ; SSE-NEXT:    leal {{[0-9]+}}(%esp), %ebx | 
|  | ; SSE-NEXT:    movl %ebx, (%esp) | 
|  | ; SSE-NEXT:    shll $16, %edi | 
|  | ; SSE-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %edx | 
|  | ; SSE-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %ecx | 
|  | ; SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    calll returns_v4bf16@PLT | 
|  | ; SSE-NEXT:    subl $4, %esp | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx | 
|  | ; SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %edx | 
|  | ; SSE-NEXT:    movw %dx, 6(%esi) | 
|  | ; SSE-NEXT:    movw %cx, 4(%esi) | 
|  | ; SSE-NEXT:    movl %eax, (%esi) | 
|  | ; SSE-NEXT:    addl $48, %esp | 
|  | ; SSE-NEXT:    popl %esi | 
|  | ; SSE-NEXT:    popl %edi | 
|  | ; SSE-NEXT:    popl %ebx | 
|  | ; SSE-NEXT:    retl | 
|  | %val = load <4 x bfloat>, ptr %ptr | 
|  | %bf16 = call <4 x bfloat> @returns_v4bf16(<4 x bfloat> %val) | 
|  | store <4 x bfloat> %bf16, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @call_ret_v8bf16(ptr %ptr) #0 { | 
|  | ; NOSSE-LABEL: call_ret_v8bf16: | 
|  | ; NOSSE:       # %bb.0: | 
|  | ; NOSSE-NEXT:    pushl %ebp | 
|  | ; NOSSE-NEXT:    pushl %ebx | 
|  | ; NOSSE-NEXT:    pushl %edi | 
|  | ; NOSSE-NEXT:    pushl %esi | 
|  | ; NOSSE-NEXT:    subl $108, %esp | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
|  | ; NOSSE-NEXT:    movzwl 2(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl (%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl 4(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movzwl 6(%esi), %edi | 
|  | ; NOSSE-NEXT:    movl 8(%esi), %ebx | 
|  | ; NOSSE-NEXT:    movzwl 10(%esi), %ebp | 
|  | ; NOSSE-NEXT:    movl 12(%esi), %ecx | 
|  | ; NOSSE-NEXT:    movzwl 14(%esi), %eax | 
|  | ; NOSSE-NEXT:    leal {{[0-9]+}}(%esp), %edx | 
|  | ; NOSSE-NEXT:    movl %edx, (%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %ecx | 
|  | ; NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %ebp | 
|  | ; NOSSE-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %ebx | 
|  | ; NOSSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %edi | 
|  | ; NOSSE-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    calll returns_v8bf16@PLT | 
|  | ; NOSSE-NEXT:    subl $4, %esp | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx | 
|  | ; NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %edx | 
|  | ; NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %edi | 
|  | ; NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %ebx | 
|  | ; NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %ebp | 
|  | ; NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax | 
|  | ; NOSSE-NEXT:    movw %ax, 14(%esi) | 
|  | ; NOSSE-NEXT:    movw %bp, 12(%esi) | 
|  | ; NOSSE-NEXT:    movw %bx, 10(%esi) | 
|  | ; NOSSE-NEXT:    movw %di, 8(%esi) | 
|  | ; NOSSE-NEXT:    movw %dx, 6(%esi) | 
|  | ; NOSSE-NEXT:    movw %cx, 4(%esi) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    movl %eax, (%esi) | 
|  | ; NOSSE-NEXT:    addl $108, %esp | 
|  | ; NOSSE-NEXT:    popl %esi | 
|  | ; NOSSE-NEXT:    popl %edi | 
|  | ; NOSSE-NEXT:    popl %ebx | 
|  | ; NOSSE-NEXT:    popl %ebp | 
|  | ; NOSSE-NEXT:    retl | 
|  | ; | 
|  | ; SSE-LABEL: call_ret_v8bf16: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    pushl %ebp | 
|  | ; SSE-NEXT:    pushl %ebx | 
|  | ; SSE-NEXT:    pushl %edi | 
|  | ; SSE-NEXT:    pushl %esi | 
|  | ; SSE-NEXT:    subl $108, %esp | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
|  | ; SSE-NEXT:    movzwl 2(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl (%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl 4(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movzwl 6(%esi), %edi | 
|  | ; SSE-NEXT:    movl 8(%esi), %ebx | 
|  | ; SSE-NEXT:    movzwl 10(%esi), %ebp | 
|  | ; SSE-NEXT:    movl 12(%esi), %ecx | 
|  | ; SSE-NEXT:    movzwl 14(%esi), %eax | 
|  | ; SSE-NEXT:    leal {{[0-9]+}}(%esp), %edx | 
|  | ; SSE-NEXT:    movl %edx, (%esp) | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %ecx | 
|  | ; SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %ebp | 
|  | ; SSE-NEXT:    movl %ebp, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %ebx | 
|  | ; SSE-NEXT:    movl %ebx, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %edi | 
|  | ; SSE-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    calll returns_v8bf16@PLT | 
|  | ; SSE-NEXT:    subl $4, %esp | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx | 
|  | ; SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %edx | 
|  | ; SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %edi | 
|  | ; SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %ebx | 
|  | ; SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %ebp | 
|  | ; SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax | 
|  | ; SSE-NEXT:    movw %ax, 14(%esi) | 
|  | ; SSE-NEXT:    movw %bp, 12(%esi) | 
|  | ; SSE-NEXT:    movw %bx, 10(%esi) | 
|  | ; SSE-NEXT:    movw %di, 8(%esi) | 
|  | ; SSE-NEXT:    movw %dx, 6(%esi) | 
|  | ; SSE-NEXT:    movw %cx, 4(%esi) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    movl %eax, (%esi) | 
|  | ; SSE-NEXT:    addl $108, %esp | 
|  | ; SSE-NEXT:    popl %esi | 
|  | ; SSE-NEXT:    popl %edi | 
|  | ; SSE-NEXT:    popl %ebx | 
|  | ; SSE-NEXT:    popl %ebp | 
|  | ; SSE-NEXT:    retl | 
|  | %val = load <8 x bfloat>, ptr %ptr | 
|  | %bf16 = call <8 x bfloat> @returns_v8bf16(<8 x bfloat> %val) | 
|  | store <8 x bfloat> %bf16, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @call_ret_v16bf16(ptr %ptr) #0 { | 
|  | ; NOSSE-LABEL: call_ret_v16bf16: | 
|  | ; NOSSE:       # %bb.0: | 
|  | ; NOSSE-NEXT:    pushl %ebp | 
|  | ; NOSSE-NEXT:    movl %esp, %ebp | 
|  | ; NOSSE-NEXT:    pushl %ebx | 
|  | ; NOSSE-NEXT:    pushl %edi | 
|  | ; NOSSE-NEXT:    pushl %esi | 
|  | ; NOSSE-NEXT:    andl $-32, %esp | 
|  | ; NOSSE-NEXT:    subl $256, %esp # imm = 0x100 | 
|  | ; NOSSE-NEXT:    movl 8(%ebp), %esi | 
|  | ; NOSSE-NEXT:    movzwl 2(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl (%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl 4(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movzwl 6(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl 8(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movzwl 10(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl 12(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movzwl 14(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl 16(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movzwl 18(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl 20(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movzwl 22(%esi), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl 24(%esi), %edi | 
|  | ; NOSSE-NEXT:    movzwl 26(%esi), %edx | 
|  | ; NOSSE-NEXT:    movl 28(%esi), %ecx | 
|  | ; NOSSE-NEXT:    movzwl 30(%esi), %eax | 
|  | ; NOSSE-NEXT:    leal {{[0-9]+}}(%esp), %ebx | 
|  | ; NOSSE-NEXT:    movl %ebx, (%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %ecx | 
|  | ; NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %edx | 
|  | ; NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    shll $16, %edi | 
|  | ; NOSSE-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    shll $16, %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    flds {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    fstps {{[0-9]+}}(%esp) | 
|  | ; NOSSE-NEXT:    calll returns_v16bf16@PLT | 
|  | ; NOSSE-NEXT:    subl $4, %esp | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; NOSSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edi | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; NOSSE-NEXT:    movl %edx, 28(%esi) | 
|  | ; NOSSE-NEXT:    movl %eax, 24(%esi) | 
|  | ; NOSSE-NEXT:    movl %ecx, 20(%esi) | 
|  | ; NOSSE-NEXT:    movl %ebx, 16(%esi) | 
|  | ; NOSSE-NEXT:    movl %edi, 12(%esi) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    movl %eax, 8(%esi) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    movl %eax, 4(%esi) | 
|  | ; NOSSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; NOSSE-NEXT:    movl %eax, (%esi) | 
|  | ; NOSSE-NEXT:    leal -12(%ebp), %esp | 
|  | ; NOSSE-NEXT:    popl %esi | 
|  | ; NOSSE-NEXT:    popl %edi | 
|  | ; NOSSE-NEXT:    popl %ebx | 
|  | ; NOSSE-NEXT:    popl %ebp | 
|  | ; NOSSE-NEXT:    retl | 
|  | ; | 
|  | ; SSE-LABEL: call_ret_v16bf16: | 
|  | ; SSE:       # %bb.0: | 
|  | ; SSE-NEXT:    pushl %ebp | 
|  | ; SSE-NEXT:    movl %esp, %ebp | 
|  | ; SSE-NEXT:    pushl %ebx | 
|  | ; SSE-NEXT:    pushl %edi | 
|  | ; SSE-NEXT:    pushl %esi | 
|  | ; SSE-NEXT:    andl $-32, %esp | 
|  | ; SSE-NEXT:    subl $256, %esp # imm = 0x100 | 
|  | ; SSE-NEXT:    movl 8(%ebp), %esi | 
|  | ; SSE-NEXT:    movzwl 2(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl (%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl 4(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movzwl 6(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl 8(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movzwl 10(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl 12(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movzwl 14(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl 16(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movzwl 18(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl 20(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movzwl 22(%esi), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl 24(%esi), %edi | 
|  | ; SSE-NEXT:    movzwl 26(%esi), %edx | 
|  | ; SSE-NEXT:    movl 28(%esi), %ecx | 
|  | ; SSE-NEXT:    movzwl 30(%esi), %eax | 
|  | ; SSE-NEXT:    leal {{[0-9]+}}(%esp), %ebx | 
|  | ; SSE-NEXT:    movl %ebx, (%esp) | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %ecx | 
|  | ; SSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %edx | 
|  | ; SSE-NEXT:    movl %edx, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    shll $16, %edi | 
|  | ; SSE-NEXT:    movl %edi, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    shll $16, %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | ; SSE-NEXT:    movss %xmm0, {{[0-9]+}}(%esp) | 
|  | ; SSE-NEXT:    calll returns_v16bf16@PLT | 
|  | ; SSE-NEXT:    subl $4, %esp | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; SSE-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax | 
|  | ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | ; SSE-NEXT:    movl %edx, 28(%esi) | 
|  | ; SSE-NEXT:    movl %eax, 24(%esi) | 
|  | ; SSE-NEXT:    movl %ecx, 20(%esi) | 
|  | ; SSE-NEXT:    movl %ebx, 16(%esi) | 
|  | ; SSE-NEXT:    movl %edi, 12(%esi) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    movl %eax, 8(%esi) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    movl %eax, 4(%esi) | 
|  | ; SSE-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload | 
|  | ; SSE-NEXT:    movl %eax, (%esi) | 
|  | ; SSE-NEXT:    leal -12(%ebp), %esp | 
|  | ; SSE-NEXT:    popl %esi | 
|  | ; SSE-NEXT:    popl %edi | 
|  | ; SSE-NEXT:    popl %ebx | 
|  | ; SSE-NEXT:    popl %ebp | 
|  | ; SSE-NEXT:    retl | 
|  | %val = load <16 x bfloat>, ptr %ptr | 
|  | %bf16 = call <16 x bfloat> @returns_v16bf16(<16 x bfloat> %val) | 
|  | store <16 x bfloat> %bf16, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | attributes #0 = { nounwind } |