blob: 69ecc804a0ddd58ba43c1420724ea95c400131ca [file] [log] [blame]
#if defined(__i386__)
.text
.align 6,0x90
.globl _GFp_poly1305_init_asm
.private_extern _GFp_poly1305_init_asm
.align 4
_GFp_poly1305_init_asm:
L_GFp_poly1305_init_asm_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ebp
xorl %eax,%eax
movl %eax,(%edi)
movl %eax,4(%edi)
movl %eax,8(%edi)
movl %eax,12(%edi)
movl %eax,16(%edi)
movl %eax,20(%edi)
cmpl $0,%esi
je L000nokey
call L001pic_point
L001pic_point:
popl %ebx
leal _GFp_poly1305_blocks-L001pic_point(%ebx),%eax
leal _GFp_poly1305_emit-L001pic_point(%ebx),%edx
movl L_GFp_ia32cap_P$non_lazy_ptr-L001pic_point(%ebx),%edi
movl (%edi),%ecx
andl $83886080,%ecx
cmpl $83886080,%ecx
leal __poly1305_blocks_sse2-L001pic_point(%ebx),%eax
leal __poly1305_emit_sse2-L001pic_point(%ebx),%edx
movl 20(%esp),%edi
movl %eax,(%ebp)
movl %edx,4(%ebp)
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
andl $268435455,%eax
andl $268435452,%ebx
andl $268435452,%ecx
andl $268435452,%edx
movl %eax,24(%edi)
movl %ebx,28(%edi)
movl %ecx,32(%edi)
movl %edx,36(%edi)
movl $1,%eax
L000nokey:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _GFp_poly1305_blocks
.private_extern _GFp_poly1305_blocks
.align 4
_GFp_poly1305_blocks:
L_GFp_poly1305_blocks_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
Lenter_blocks:
andl $-15,%ecx
jz L002nodata
subl $64,%esp
movl 24(%edi),%eax
movl 28(%edi),%ebx
leal (%esi,%ecx,1),%ebp
movl 32(%edi),%ecx
movl 36(%edi),%edx
movl %ebp,92(%esp)
movl %esi,%ebp
movl %eax,36(%esp)
movl %ebx,%eax
shrl $2,%eax
movl %ebx,40(%esp)
addl %ebx,%eax
movl %ecx,%ebx
shrl $2,%ebx
movl %ecx,44(%esp)
addl %ecx,%ebx
movl %edx,%ecx
shrl $2,%ecx
movl %edx,48(%esp)
addl %edx,%ecx
movl %eax,52(%esp)
movl %ebx,56(%esp)
movl %ecx,60(%esp)
movl (%edi),%eax
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%esi
movl 16(%edi),%edi
jmp L003loop
.align 5,0x90
L003loop:
addl (%ebp),%eax
adcl 4(%ebp),%ebx
adcl 8(%ebp),%ecx
adcl 12(%ebp),%esi
leal 16(%ebp),%ebp
adcl 96(%esp),%edi
movl %eax,(%esp)
movl %esi,12(%esp)
mull 36(%esp)
movl %edi,16(%esp)
movl %eax,%edi
movl %ebx,%eax
movl %edx,%esi
mull 60(%esp)
addl %eax,%edi
movl %ecx,%eax
adcl %edx,%esi
mull 56(%esp)
addl %eax,%edi
movl 12(%esp),%eax
adcl %edx,%esi
mull 52(%esp)
addl %eax,%edi
movl (%esp),%eax
adcl %edx,%esi
mull 40(%esp)
movl %edi,20(%esp)
xorl %edi,%edi
addl %eax,%esi
movl %ebx,%eax
adcl %edx,%edi
mull 36(%esp)
addl %eax,%esi
movl %ecx,%eax
adcl %edx,%edi
mull 60(%esp)
addl %eax,%esi
movl 12(%esp),%eax
adcl %edx,%edi
mull 56(%esp)
addl %eax,%esi
movl 16(%esp),%eax
adcl %edx,%edi
imull 52(%esp),%eax
addl %eax,%esi
movl (%esp),%eax
adcl $0,%edi
mull 44(%esp)
movl %esi,24(%esp)
xorl %esi,%esi
addl %eax,%edi
movl %ebx,%eax
adcl %edx,%esi
mull 40(%esp)
addl %eax,%edi
movl %ecx,%eax
adcl %edx,%esi
mull 36(%esp)
addl %eax,%edi
movl 12(%esp),%eax
adcl %edx,%esi
mull 60(%esp)
addl %eax,%edi
movl 16(%esp),%eax
adcl %edx,%esi
imull 56(%esp),%eax
addl %eax,%edi
movl (%esp),%eax
adcl $0,%esi
mull 48(%esp)
movl %edi,28(%esp)
xorl %edi,%edi
addl %eax,%esi
movl %ebx,%eax
adcl %edx,%edi
mull 44(%esp)
addl %eax,%esi
movl %ecx,%eax
adcl %edx,%edi
mull 40(%esp)
addl %eax,%esi
movl 12(%esp),%eax
adcl %edx,%edi
mull 36(%esp)
addl %eax,%esi
movl 16(%esp),%ecx
adcl %edx,%edi
movl %ecx,%edx
imull 60(%esp),%ecx
addl %ecx,%esi
movl 20(%esp),%eax
adcl $0,%edi
imull 36(%esp),%edx
addl %edi,%edx
movl 24(%esp),%ebx
movl 28(%esp),%ecx
movl %edx,%edi
shrl $2,%edx
andl $3,%edi
leal (%edx,%edx,4),%edx
addl %edx,%eax
adcl $0,%ebx
adcl $0,%ecx
adcl $0,%esi
adcl $0,%edi
cmpl 92(%esp),%ebp
jne L003loop
movl 84(%esp),%edx
addl $64,%esp
movl %eax,(%edx)
movl %ebx,4(%edx)
movl %ecx,8(%edx)
movl %esi,12(%edx)
movl %edi,16(%edx)
L002nodata:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _GFp_poly1305_emit
.private_extern _GFp_poly1305_emit
.align 4
_GFp_poly1305_emit:
L_GFp_poly1305_emit_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebp
Lenter_emit:
movl 24(%esp),%edi
movl (%ebp),%eax
movl 4(%ebp),%ebx
movl 8(%ebp),%ecx
movl 12(%ebp),%edx
movl 16(%ebp),%esi
addl $5,%eax
adcl $0,%ebx
adcl $0,%ecx
adcl $0,%edx
adcl $0,%esi
shrl $2,%esi
negl %esi
andl %esi,%eax
andl %esi,%ebx
andl %esi,%ecx
andl %esi,%edx
movl %eax,(%edi)
movl %ebx,4(%edi)
movl %ecx,8(%edi)
movl %edx,12(%edi)
notl %esi
movl (%ebp),%eax
movl 4(%ebp),%ebx
movl 8(%ebp),%ecx
movl 12(%ebp),%edx
movl 28(%esp),%ebp
andl %esi,%eax
andl %esi,%ebx
andl %esi,%ecx
andl %esi,%edx
orl (%edi),%eax
orl 4(%edi),%ebx
orl 8(%edi),%ecx
orl 12(%edi),%edx
addl (%ebp),%eax
adcl 4(%ebp),%ebx
adcl 8(%ebp),%ecx
adcl 12(%ebp),%edx
movl %eax,(%edi)
movl %ebx,4(%edi)
movl %ecx,8(%edi)
movl %edx,12(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.align 5,0x90
.private_extern __poly1305_init_sse2
.align 4
__poly1305_init_sse2:
movdqu 24(%edi),%xmm4
leal 48(%edi),%edi
movl %esp,%ebp
subl $224,%esp
andl $-16,%esp
movq 64(%ebx),%xmm7
movdqa %xmm4,%xmm0
movdqa %xmm4,%xmm1
movdqa %xmm4,%xmm2
pand %xmm7,%xmm0
psrlq $26,%xmm1
psrldq $6,%xmm2
pand %xmm7,%xmm1
movdqa %xmm2,%xmm3
psrlq $4,%xmm2
psrlq $30,%xmm3
pand %xmm7,%xmm2
pand %xmm7,%xmm3
psrldq $13,%xmm4
leal 144(%esp),%edx
movl $2,%ecx
L004square:
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
movdqa %xmm4,64(%esp)
movdqa %xmm1,%xmm6
movdqa %xmm2,%xmm5
pslld $2,%xmm6
pslld $2,%xmm5
paddd %xmm1,%xmm6
paddd %xmm2,%xmm5
movdqa %xmm6,80(%esp)
movdqa %xmm5,96(%esp)
movdqa %xmm3,%xmm6
movdqa %xmm4,%xmm5
pslld $2,%xmm6
pslld $2,%xmm5
paddd %xmm3,%xmm6
paddd %xmm4,%xmm5
movdqa %xmm6,112(%esp)
movdqa %xmm5,128(%esp)
pshufd $68,%xmm0,%xmm6
movdqa %xmm1,%xmm5
pshufd $68,%xmm1,%xmm1
pshufd $68,%xmm2,%xmm2
pshufd $68,%xmm3,%xmm3
pshufd $68,%xmm4,%xmm4
movdqa %xmm6,(%edx)
movdqa %xmm1,16(%edx)
movdqa %xmm2,32(%edx)
movdqa %xmm3,48(%edx)
movdqa %xmm4,64(%edx)
pmuludq %xmm0,%xmm4
pmuludq %xmm0,%xmm3
pmuludq %xmm0,%xmm2
pmuludq %xmm0,%xmm1
pmuludq %xmm6,%xmm0
movdqa %xmm5,%xmm6
pmuludq 48(%edx),%xmm5
movdqa %xmm6,%xmm7
pmuludq 32(%edx),%xmm6
paddq %xmm5,%xmm4
movdqa %xmm7,%xmm5
pmuludq 16(%edx),%xmm7
paddq %xmm6,%xmm3
movdqa 80(%esp),%xmm6
pmuludq (%edx),%xmm5
paddq %xmm7,%xmm2
pmuludq 64(%edx),%xmm6
movdqa 32(%esp),%xmm7
paddq %xmm5,%xmm1
movdqa %xmm7,%xmm5
pmuludq 32(%edx),%xmm7
paddq %xmm6,%xmm0
movdqa %xmm5,%xmm6
pmuludq 16(%edx),%xmm5
paddq %xmm7,%xmm4
movdqa 96(%esp),%xmm7
pmuludq (%edx),%xmm6
paddq %xmm5,%xmm3
movdqa %xmm7,%xmm5
pmuludq 64(%edx),%xmm7
paddq %xmm6,%xmm2
pmuludq 48(%edx),%xmm5
movdqa 48(%esp),%xmm6
paddq %xmm7,%xmm1
movdqa %xmm6,%xmm7
pmuludq 16(%edx),%xmm6
paddq %xmm5,%xmm0
movdqa 112(%esp),%xmm5
pmuludq (%edx),%xmm7
paddq %xmm6,%xmm4
movdqa %xmm5,%xmm6
pmuludq 64(%edx),%xmm5
paddq %xmm7,%xmm3
movdqa %xmm6,%xmm7
pmuludq 48(%edx),%xmm6
paddq %xmm5,%xmm2
pmuludq 32(%edx),%xmm7
movdqa 64(%esp),%xmm5
paddq %xmm6,%xmm1
movdqa 128(%esp),%xmm6
pmuludq (%edx),%xmm5
paddq %xmm7,%xmm0
movdqa %xmm6,%xmm7
pmuludq 64(%edx),%xmm6
paddq %xmm5,%xmm4
movdqa %xmm7,%xmm5
pmuludq 16(%edx),%xmm7
paddq %xmm6,%xmm3
movdqa %xmm5,%xmm6
pmuludq 32(%edx),%xmm5
paddq %xmm7,%xmm0
pmuludq 48(%edx),%xmm6
movdqa 64(%ebx),%xmm7
paddq %xmm5,%xmm1
paddq %xmm6,%xmm2
movdqa %xmm3,%xmm5
pand %xmm7,%xmm3
psrlq $26,%xmm5
paddq %xmm4,%xmm5
movdqa %xmm0,%xmm6
pand %xmm7,%xmm0
psrlq $26,%xmm6
movdqa %xmm5,%xmm4
paddq %xmm1,%xmm6
psrlq $26,%xmm5
pand %xmm7,%xmm4
movdqa %xmm6,%xmm1
psrlq $26,%xmm6
paddd %xmm5,%xmm0
psllq $2,%xmm5
paddq %xmm2,%xmm6
paddq %xmm0,%xmm5
pand %xmm7,%xmm1
movdqa %xmm6,%xmm2
psrlq $26,%xmm6
pand %xmm7,%xmm2
paddd %xmm3,%xmm6
movdqa %xmm5,%xmm0
psrlq $26,%xmm5
movdqa %xmm6,%xmm3
psrlq $26,%xmm6
pand %xmm7,%xmm0
paddd %xmm5,%xmm1
pand %xmm7,%xmm3
paddd %xmm6,%xmm4
decl %ecx
jz L005square_break
punpcklqdq (%esp),%xmm0
punpcklqdq 16(%esp),%xmm1
punpcklqdq 32(%esp),%xmm2
punpcklqdq 48(%esp),%xmm3
punpcklqdq 64(%esp),%xmm4
jmp L004square
L005square_break:
psllq $32,%xmm0
psllq $32,%xmm1
psllq $32,%xmm2
psllq $32,%xmm3
psllq $32,%xmm4
por (%esp),%xmm0
por 16(%esp),%xmm1
por 32(%esp),%xmm2
por 48(%esp),%xmm3
por 64(%esp),%xmm4
pshufd $141,%xmm0,%xmm0
pshufd $141,%xmm1,%xmm1
pshufd $141,%xmm2,%xmm2
pshufd $141,%xmm3,%xmm3
pshufd $141,%xmm4,%xmm4
movdqu %xmm0,(%edi)
movdqu %xmm1,16(%edi)
movdqu %xmm2,32(%edi)
movdqu %xmm3,48(%edi)
movdqu %xmm4,64(%edi)
movdqa %xmm1,%xmm6
movdqa %xmm2,%xmm5
pslld $2,%xmm6
pslld $2,%xmm5
paddd %xmm1,%xmm6
paddd %xmm2,%xmm5
movdqu %xmm6,80(%edi)
movdqu %xmm5,96(%edi)
movdqa %xmm3,%xmm6
movdqa %xmm4,%xmm5
pslld $2,%xmm6
pslld $2,%xmm5
paddd %xmm3,%xmm6
paddd %xmm4,%xmm5
movdqu %xmm6,112(%edi)
movdqu %xmm5,128(%edi)
movl %ebp,%esp
leal -48(%edi),%edi
ret
.align 5,0x90
.private_extern __poly1305_blocks_sse2
.align 4
__poly1305_blocks_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
movl 20(%edi),%eax
andl $-16,%ecx
jz L006nodata
cmpl $64,%ecx
jae L007enter_sse2
testl %eax,%eax
jz Lenter_blocks
.align 4,0x90
L007enter_sse2:
call L008pic_point
L008pic_point:
popl %ebx
leal Lconst_sse2-L008pic_point(%ebx),%ebx
testl %eax,%eax
jnz L009base2_26
call __poly1305_init_sse2
movl (%edi),%eax
movl 3(%edi),%ecx
movl 6(%edi),%edx
movl 9(%edi),%esi
movl 13(%edi),%ebp
movl $1,20(%edi)
shrl $2,%ecx
andl $67108863,%eax
shrl $4,%edx
andl $67108863,%ecx
shrl $6,%esi
andl $67108863,%edx
movd %eax,%xmm0
movd %ecx,%xmm1
movd %edx,%xmm2
movd %esi,%xmm3
movd %ebp,%xmm4
movl 24(%esp),%esi
movl 28(%esp),%ecx
jmp L010base2_32
.align 4,0x90
L009base2_26:
movd (%edi),%xmm0
movd 4(%edi),%xmm1
movd 8(%edi),%xmm2
movd 12(%edi),%xmm3
movd 16(%edi),%xmm4
movdqa 64(%ebx),%xmm7
L010base2_32:
movl 32(%esp),%eax
movl %esp,%ebp
subl $528,%esp
andl $-16,%esp
leal 48(%edi),%edi
shll $24,%eax
testl $31,%ecx
jz L011even
movdqu (%esi),%xmm6
leal 16(%esi),%esi
movdqa %xmm6,%xmm5
pand %xmm7,%xmm6
paddd %xmm6,%xmm0
movdqa %xmm5,%xmm6
psrlq $26,%xmm5
psrldq $6,%xmm6
pand %xmm7,%xmm5
paddd %xmm5,%xmm1
movdqa %xmm6,%xmm5
psrlq $4,%xmm6
pand %xmm7,%xmm6
paddd %xmm6,%xmm2
movdqa %xmm5,%xmm6
psrlq $30,%xmm5
pand %xmm7,%xmm5
psrldq $7,%xmm6
paddd %xmm5,%xmm3
movd %eax,%xmm5
paddd %xmm6,%xmm4
movd 12(%edi),%xmm6
paddd %xmm5,%xmm4
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
movdqa %xmm4,64(%esp)
pmuludq %xmm6,%xmm0
pmuludq %xmm6,%xmm1
pmuludq %xmm6,%xmm2
movd 28(%edi),%xmm5
pmuludq %xmm6,%xmm3
pmuludq %xmm6,%xmm4
movdqa %xmm5,%xmm6
pmuludq 48(%esp),%xmm5
movdqa %xmm6,%xmm7
pmuludq 32(%esp),%xmm6
paddq %xmm5,%xmm4
movdqa %xmm7,%xmm5
pmuludq 16(%esp),%xmm7
paddq %xmm6,%xmm3
movd 92(%edi),%xmm6
pmuludq (%esp),%xmm5
paddq %xmm7,%xmm2
pmuludq 64(%esp),%xmm6
movd 44(%edi),%xmm7
paddq %xmm5,%xmm1
movdqa %xmm7,%xmm5
pmuludq 32(%esp),%xmm7
paddq %xmm6,%xmm0
movdqa %xmm5,%xmm6
pmuludq 16(%esp),%xmm5
paddq %xmm7,%xmm4
movd 108(%edi),%xmm7
pmuludq (%esp),%xmm6
paddq %xmm5,%xmm3
movdqa %xmm7,%xmm5
pmuludq 64(%esp),%xmm7
paddq %xmm6,%xmm2
pmuludq 48(%esp),%xmm5
movd 60(%edi),%xmm6
paddq %xmm7,%xmm1
movdqa %xmm6,%xmm7
pmuludq 16(%esp),%xmm6
paddq %xmm5,%xmm0
movd 124(%edi),%xmm5
pmuludq (%esp),%xmm7
paddq %xmm6,%xmm4
movdqa %xmm5,%xmm6
pmuludq 64(%esp),%xmm5
paddq %xmm7,%xmm3
movdqa %xmm6,%xmm7
pmuludq 48(%esp),%xmm6
paddq %xmm5,%xmm2
pmuludq 32(%esp),%xmm7
movd 76(%edi),%xmm5
paddq %xmm6,%xmm1
movd 140(%edi),%xmm6
pmuludq (%esp),%xmm5
paddq %xmm7,%xmm0
movdqa %xmm6,%xmm7
pmuludq 64(%esp),%xmm6
paddq %xmm5,%xmm4
movdqa %xmm7,%xmm5
pmuludq 16(%esp),%xmm7
paddq %xmm6,%xmm3
movdqa %xmm5,%xmm6
pmuludq 32(%esp),%xmm5
paddq %xmm7,%xmm0
pmuludq 48(%esp),%xmm6
movdqa 64(%ebx),%xmm7
paddq %xmm5,%xmm1
paddq %xmm6,%xmm2
movdqa %xmm3,%xmm5
pand %xmm7,%xmm3
psrlq $26,%xmm5
paddq %xmm4,%xmm5
movdqa %xmm0,%xmm6
pand %xmm7,%xmm0
psrlq $26,%xmm6
movdqa %xmm5,%xmm4
paddq %xmm1,%xmm6
psrlq $26,%xmm5
pand %xmm7,%xmm4
movdqa %xmm6,%xmm1
psrlq $26,%xmm6
paddd %xmm5,%xmm0
psllq $2,%xmm5
paddq %xmm2,%xmm6
paddq %xmm0,%xmm5
pand %xmm7,%xmm1
movdqa %xmm6,%xmm2
psrlq $26,%xmm6
pand %xmm7,%xmm2
paddd %xmm3,%xmm6
movdqa %xmm5,%xmm0
psrlq $26,%xmm5
movdqa %xmm6,%xmm3
psrlq $26,%xmm6
pand %xmm7,%xmm0
paddd %xmm5,%xmm1
pand %xmm7,%xmm3
paddd %xmm6,%xmm4
subl $16,%ecx
jz L012done
L011even:
leal 384(%esp),%edx
leal -32(%esi),%eax
subl $64,%ecx
movdqu (%edi),%xmm5
pshufd $68,%xmm5,%xmm6
cmovbl %eax,%esi
pshufd $238,%xmm5,%xmm5
movdqa %xmm6,(%edx)
leal 160(%esp),%eax
movdqu 16(%edi),%xmm6
movdqa %xmm5,-144(%edx)
pshufd $68,%xmm6,%xmm5
pshufd $238,%xmm6,%xmm6
movdqa %xmm5,16(%edx)
movdqu 32(%edi),%xmm5
movdqa %xmm6,-128(%edx)
pshufd $68,%xmm5,%xmm6
pshufd $238,%xmm5,%xmm5
movdqa %xmm6,32(%edx)
movdqu 48(%edi),%xmm6
movdqa %xmm5,-112(%edx)
pshufd $68,%xmm6,%xmm5
pshufd $238,%xmm6,%xmm6
movdqa %xmm5,48(%edx)
movdqu 64(%edi),%xmm5
movdqa %xmm6,-96(%edx)
pshufd $68,%xmm5,%xmm6
pshufd $238,%xmm5,%xmm5
movdqa %xmm6,64(%edx)
movdqu 80(%edi),%xmm6
movdqa %xmm5,-80(%edx)
pshufd $68,%xmm6,%xmm5
pshufd $238,%xmm6,%xmm6
movdqa %xmm5,80(%edx)
movdqu 96(%edi),%xmm5
movdqa %xmm6,-64(%edx)
pshufd $68,%xmm5,%xmm6
pshufd $238,%xmm5,%xmm5
movdqa %xmm6,96(%edx)
movdqu 112(%edi),%xmm6
movdqa %xmm5,-48(%edx)
pshufd $68,%xmm6,%xmm5
pshufd $238,%xmm6,%xmm6
movdqa %xmm5,112(%edx)
movdqu 128(%edi),%xmm5
movdqa %xmm6,-32(%edx)
pshufd $68,%xmm5,%xmm6
pshufd $238,%xmm5,%xmm5
movdqa %xmm6,128(%edx)
movdqa %xmm5,-16(%edx)
movdqu 32(%esi),%xmm5
movdqu 48(%esi),%xmm6
leal 32(%esi),%esi
movdqa %xmm2,112(%esp)
movdqa %xmm3,128(%esp)
movdqa %xmm4,144(%esp)
movdqa %xmm5,%xmm2
movdqa %xmm6,%xmm3
psrldq $6,%xmm2
psrldq $6,%xmm3
movdqa %xmm5,%xmm4
punpcklqdq %xmm3,%xmm2
punpckhqdq %xmm6,%xmm4
punpcklqdq %xmm6,%xmm5
movdqa %xmm2,%xmm3
psrlq $4,%xmm2
psrlq $30,%xmm3
movdqa %xmm5,%xmm6
psrlq $40,%xmm4
psrlq $26,%xmm6
pand %xmm7,%xmm5
pand %xmm7,%xmm6
pand %xmm7,%xmm2
pand %xmm7,%xmm3
por (%ebx),%xmm4
movdqa %xmm0,80(%esp)
movdqa %xmm1,96(%esp)
jbe L013skip_loop
jmp L014loop
.align 5,0x90
L014loop:
movdqa -144(%edx),%xmm7
movdqa %xmm6,16(%eax)
movdqa %xmm2,32(%eax)
movdqa %xmm3,48(%eax)
movdqa %xmm4,64(%eax)
movdqa %xmm5,%xmm1
pmuludq %xmm7,%xmm5
movdqa %xmm6,%xmm0
pmuludq %xmm7,%xmm6
pmuludq %xmm7,%xmm2
pmuludq %xmm7,%xmm3
pmuludq %xmm7,%xmm4
pmuludq -16(%edx),%xmm0
movdqa %xmm1,%xmm7
pmuludq -128(%edx),%xmm1
paddq %xmm5,%xmm0
movdqa %xmm7,%xmm5
pmuludq -112(%edx),%xmm7
paddq %xmm6,%xmm1
movdqa %xmm5,%xmm6
pmuludq -96(%edx),%xmm5
paddq %xmm7,%xmm2
movdqa 16(%eax),%xmm7
pmuludq -80(%edx),%xmm6
paddq %xmm5,%xmm3
movdqa %xmm7,%xmm5
pmuludq -128(%edx),%xmm7
paddq %xmm6,%xmm4
movdqa %xmm5,%xmm6
pmuludq -112(%edx),%xmm5
paddq %xmm7,%xmm2
movdqa 32(%eax),%xmm7
pmuludq -96(%edx),%xmm6
paddq %xmm5,%xmm3
movdqa %xmm7,%xmm5
pmuludq -32(%edx),%xmm7
paddq %xmm6,%xmm4
movdqa %xmm5,%xmm6
pmuludq -16(%edx),%xmm5
paddq %xmm7,%xmm0
movdqa %xmm6,%xmm7
pmuludq -128(%edx),%xmm6
paddq %xmm5,%xmm1
movdqa 48(%eax),%xmm5
pmuludq -112(%edx),%xmm7
paddq %xmm6,%xmm3
movdqa %xmm5,%xmm6
pmuludq -48(%edx),%xmm5
paddq %xmm7,%xmm4
movdqa %xmm6,%xmm7
pmuludq -32(%edx),%xmm6
paddq %xmm5,%xmm0
movdqa %xmm7,%xmm5
pmuludq -16(%edx),%xmm7
paddq %xmm6,%xmm1
movdqa 64(%eax),%xmm6
pmuludq -128(%edx),%xmm5
paddq %xmm7,%xmm2
movdqa %xmm6,%xmm7
pmuludq -16(%edx),%xmm6
paddq %xmm5,%xmm4
movdqa %xmm7,%xmm5
pmuludq -64(%edx),%xmm7
paddq %xmm6,%xmm3
movdqa %xmm5,%xmm6
pmuludq -48(%edx),%xmm5
paddq %xmm7,%xmm0
movdqa 64(%ebx),%xmm7
pmuludq -32(%edx),%xmm6
paddq %xmm5,%xmm1
paddq %xmm6,%xmm2
movdqu -32(%esi),%xmm5
movdqu -16(%esi),%xmm6
leal 32(%esi),%esi
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
movdqa %xmm4,64(%esp)
movdqa %xmm5,%xmm2
movdqa %xmm6,%xmm3
psrldq $6,%xmm2
psrldq $6,%xmm3
movdqa %xmm5,%xmm4
punpcklqdq %xmm3,%xmm2
punpckhqdq %xmm6,%xmm4
punpcklqdq %xmm6,%xmm5
movdqa %xmm2,%xmm3
psrlq $4,%xmm2
psrlq $30,%xmm3
movdqa %xmm5,%xmm6
psrlq $40,%xmm4
psrlq $26,%xmm6
pand %xmm7,%xmm5
pand %xmm7,%xmm6
pand %xmm7,%xmm2
pand %xmm7,%xmm3
por (%ebx),%xmm4
leal -32(%esi),%eax
subl $64,%ecx
paddd 80(%esp),%xmm5
paddd 96(%esp),%xmm6
paddd 112(%esp),%xmm2
paddd 128(%esp),%xmm3
paddd 144(%esp),%xmm4
cmovbl %eax,%esi
leal 160(%esp),%eax
movdqa (%edx),%xmm7
movdqa %xmm1,16(%esp)
movdqa %xmm6,16(%eax)
movdqa %xmm2,32(%eax)
movdqa %xmm3,48(%eax)
movdqa %xmm4,64(%eax)
movdqa %xmm5,%xmm1
pmuludq %xmm7,%xmm5
paddq %xmm0,%xmm5
movdqa %xmm6,%xmm0
pmuludq %xmm7,%xmm6
pmuludq %xmm7,%xmm2
pmuludq %xmm7,%xmm3
pmuludq %xmm7,%xmm4
paddq 16(%esp),%xmm6
paddq 32(%esp),%xmm2
paddq 48(%esp),%xmm3
paddq 64(%esp),%xmm4
pmuludq 128(%edx),%xmm0
movdqa %xmm1,%xmm7
pmuludq 16(%edx),%xmm1
paddq %xmm5,%xmm0
movdqa %xmm7,%xmm5
pmuludq 32(%edx),%xmm7
paddq %xmm6,%xmm1
movdqa %xmm5,%xmm6
pmuludq 48(%edx),%xmm5
paddq %xmm7,%xmm2
movdqa 16(%eax),%xmm7
pmuludq 64(%edx),%xmm6
paddq %xmm5,%xmm3
movdqa %xmm7,%xmm5
pmuludq 16(%edx),%xmm7
paddq %xmm6,%xmm4
movdqa %xmm5,%xmm6
pmuludq 32(%edx),%xmm5
paddq %xmm7,%xmm2
movdqa 32(%eax),%xmm7
pmuludq 48(%edx),%xmm6
paddq %xmm5,%xmm3
movdqa %xmm7,%xmm5
pmuludq 112(%edx),%xmm7
paddq %xmm6,%xmm4
movdqa %xmm5,%xmm6
pmuludq 128(%edx),%xmm5
paddq %xmm7,%xmm0
movdqa %xmm6,%xmm7
pmuludq 16(%edx),%xmm6
paddq %xmm5,%xmm1
movdqa 48(%eax),%xmm5
pmuludq 32(%edx),%xmm7
paddq %xmm6,%xmm3
movdqa %xmm5,%xmm6
pmuludq 96(%edx),%xmm5
paddq %xmm7,%xmm4
movdqa %xmm6,%xmm7
pmuludq 112(%edx),%xmm6
paddq %xmm5,%xmm0
movdqa %xmm7,%xmm5
pmuludq 128(%edx),%xmm7
paddq %xmm6,%xmm1
movdqa 64(%eax),%xmm6
pmuludq 16(%edx),%xmm5
paddq %xmm7,%xmm2
movdqa %xmm6,%xmm7
pmuludq 128(%edx),%xmm6
paddq %xmm5,%xmm4
movdqa %xmm7,%xmm5
pmuludq 80(%edx),%xmm7
paddq %xmm6,%xmm3
movdqa %xmm5,%xmm6
pmuludq 96(%edx),%xmm5
paddq %xmm7,%xmm0
movdqa 64(%ebx),%xmm7
pmuludq 112(%edx),%xmm6
paddq %xmm5,%xmm1
paddq %xmm6,%xmm2
movdqa %xmm3,%xmm5
pand %xmm7,%xmm3
psrlq $26,%xmm5
paddq %xmm4,%xmm5
movdqa %xmm0,%xmm6
pand %xmm7,%xmm0
psrlq $26,%xmm6
movdqa %xmm5,%xmm4
paddq %xmm1,%xmm6
psrlq $26,%xmm5
pand %xmm7,%xmm4
movdqa %xmm6,%xmm1
psrlq $26,%xmm6
paddd %xmm5,%xmm0
psllq $2,%xmm5
paddq %xmm2,%xmm6
paddq %xmm0,%xmm5
pand %xmm7,%xmm1
movdqa %xmm6,%xmm2
psrlq $26,%xmm6
pand %xmm7,%xmm2
paddd %xmm3,%xmm6
movdqa %xmm5,%xmm0
psrlq $26,%xmm5
movdqa %xmm6,%xmm3
psrlq $26,%xmm6
pand %xmm7,%xmm0
paddd %xmm5,%xmm1
pand %xmm7,%xmm3
paddd %xmm6,%xmm4
movdqu 32(%esi),%xmm5
movdqu 48(%esi),%xmm6
leal 32(%esi),%esi
movdqa %xmm2,112(%esp)
movdqa %xmm3,128(%esp)
movdqa %xmm4,144(%esp)
movdqa %xmm5,%xmm2
movdqa %xmm6,%xmm3
psrldq $6,%xmm2
psrldq $6,%xmm3
movdqa %xmm5,%xmm4
punpcklqdq %xmm3,%xmm2
punpckhqdq %xmm6,%xmm4
punpcklqdq %xmm6,%xmm5
movdqa %xmm2,%xmm3
psrlq $4,%xmm2
psrlq $30,%xmm3
movdqa %xmm5,%xmm6
psrlq $40,%xmm4
psrlq $26,%xmm6
pand %xmm7,%xmm5
pand %xmm7,%xmm6
pand %xmm7,%xmm2
pand %xmm7,%xmm3
por (%ebx),%xmm4
movdqa %xmm0,80(%esp)
movdqa %xmm1,96(%esp)
ja L014loop
L013skip_loop:
pshufd $16,-144(%edx),%xmm7
addl $32,%ecx
jnz L015long_tail
paddd %xmm0,%xmm5
paddd %xmm1,%xmm6
paddd 112(%esp),%xmm2
paddd 128(%esp),%xmm3
paddd 144(%esp),%xmm4
L015long_tail:
movdqa %xmm5,(%eax)
movdqa %xmm6,16(%eax)
movdqa %xmm2,32(%eax)
movdqa %xmm3,48(%eax)
movdqa %xmm4,64(%eax)
pmuludq %xmm7,%xmm5
pmuludq %xmm7,%xmm6
pmuludq %xmm7,%xmm2
movdqa %xmm5,%xmm0
pshufd $16,-128(%edx),%xmm5
pmuludq %xmm7,%xmm3
movdqa %xmm6,%xmm1
pmuludq %xmm7,%xmm4
movdqa %xmm5,%xmm6
pmuludq 48(%eax),%xmm5
movdqa %xmm6,%xmm7
pmuludq 32(%eax),%xmm6
paddq %xmm5,%xmm4
movdqa %xmm7,%xmm5
pmuludq 16(%eax),%xmm7
paddq %xmm6,%xmm3
pshufd $16,-64(%edx),%xmm6
pmuludq (%eax),%xmm5
paddq %xmm7,%xmm2
pmuludq 64(%eax),%xmm6
pshufd $16,-112(%edx),%xmm7
paddq %xmm5,%xmm1
movdqa %xmm7,%xmm5
pmuludq 32(%eax),%xmm7
paddq %xmm6,%xmm0
movdqa %xmm5,%xmm6
pmuludq 16(%eax),%xmm5
paddq %xmm7,%xmm4
pshufd $16,-48(%edx),%xmm7
pmuludq (%eax),%xmm6
paddq %xmm5,%xmm3
movdqa %xmm7,%xmm5
pmuludq 64(%eax),%xmm7
paddq %xmm6,%xmm2
pmuludq 48(%eax),%xmm5
pshufd $16,-96(%edx),%xmm6
paddq %xmm7,%xmm1
movdqa %xmm6,%xmm7
pmuludq 16(%eax),%xmm6
paddq %xmm5,%xmm0
pshufd $16,-32(%edx),%xmm5
pmuludq (%eax),%xmm7
paddq %xmm6,%xmm4
movdqa %xmm5,%xmm6
pmuludq 64(%eax),%xmm5
paddq %xmm7,%xmm3
movdqa %xmm6,%xmm7
pmuludq 48(%eax),%xmm6
paddq %xmm5,%xmm2
pmuludq 32(%eax),%xmm7
pshufd $16,-80(%edx),%xmm5
paddq %xmm6,%xmm1
pshufd $16,-16(%edx),%xmm6
pmuludq (%eax),%xmm5
paddq %xmm7,%xmm0
movdqa %xmm6,%xmm7
pmuludq 64(%eax),%xmm6
paddq %xmm5,%xmm4
movdqa %xmm7,%xmm5
pmuludq 16(%eax),%xmm7
paddq %xmm6,%xmm3
movdqa %xmm5,%xmm6
pmuludq 32(%eax),%xmm5
paddq %xmm7,%xmm0
pmuludq 48(%eax),%xmm6
movdqa 64(%ebx),%xmm7
paddq %xmm5,%xmm1
paddq %xmm6,%xmm2
jz L016short_tail
movdqu -32(%esi),%xmm5
movdqu -16(%esi),%xmm6
leal 32(%esi),%esi
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
movdqa %xmm4,64(%esp)
movdqa %xmm5,%xmm2
movdqa %xmm6,%xmm3
psrldq $6,%xmm2
psrldq $6,%xmm3
movdqa %xmm5,%xmm4
punpcklqdq %xmm3,%xmm2
punpckhqdq %xmm6,%xmm4
punpcklqdq %xmm6,%xmm5
movdqa %xmm2,%xmm3
psrlq $4,%xmm2
psrlq $30,%xmm3
movdqa %xmm5,%xmm6
psrlq $40,%xmm4
psrlq $26,%xmm6
pand %xmm7,%xmm5
pand %xmm7,%xmm6
pand %xmm7,%xmm2
pand %xmm7,%xmm3
por (%ebx),%xmm4
pshufd $16,(%edx),%xmm7
paddd 80(%esp),%xmm5
paddd 96(%esp),%xmm6
paddd 112(%esp),%xmm2
paddd 128(%esp),%xmm3
paddd 144(%esp),%xmm4
movdqa %xmm5,(%esp)
pmuludq %xmm7,%xmm5
movdqa %xmm6,16(%esp)
pmuludq %xmm7,%xmm6
paddq %xmm5,%xmm0
movdqa %xmm2,%xmm5
pmuludq %xmm7,%xmm2
paddq %xmm6,%xmm1
movdqa %xmm3,%xmm6
pmuludq %xmm7,%xmm3
paddq 32(%esp),%xmm2
movdqa %xmm5,32(%esp)
pshufd $16,16(%edx),%xmm5
paddq 48(%esp),%xmm3
movdqa %xmm6,48(%esp)
movdqa %xmm4,%xmm6
pmuludq %xmm7,%xmm4
paddq 64(%esp),%xmm4
movdqa %xmm6,64(%esp)
movdqa %xmm5,%xmm6
pmuludq 48(%esp),%xmm5
movdqa %xmm6,%xmm7
pmuludq 32(%esp),%xmm6
paddq %xmm5,%xmm4
movdqa %xmm7,%xmm5
pmuludq 16(%esp),%xmm7
paddq %xmm6,%xmm3
pshufd $16,80(%edx),%xmm6
pmuludq (%esp),%xmm5
paddq %xmm7,%xmm2
pmuludq 64(%esp),%xmm6
pshufd $16,32(%edx),%xmm7
paddq %xmm5,%xmm1
movdqa %xmm7,%xmm5
pmuludq 32(%esp),%xmm7
paddq %xmm6,%xmm0
movdqa %xmm5,%xmm6
pmuludq 16(%esp),%xmm5
paddq %xmm7,%xmm4
pshufd $16,96(%edx),%xmm7
pmuludq (%esp),%xmm6
paddq %xmm5,%xmm3
movdqa %xmm7,%xmm5
pmuludq 64(%esp),%xmm7
paddq %xmm6,%xmm2
pmuludq 48(%esp),%xmm5
pshufd $16,48(%edx),%xmm6
paddq %xmm7,%xmm1
movdqa %xmm6,%xmm7
pmuludq 16(%esp),%xmm6
paddq %xmm5,%xmm0
pshufd $16,112(%edx),%xmm5
pmuludq (%esp),%xmm7
paddq %xmm6,%xmm4
movdqa %xmm5,%xmm6
pmuludq 64(%esp),%xmm5
paddq %xmm7,%xmm3
movdqa %xmm6,%xmm7
pmuludq 48(%esp),%xmm6
paddq %xmm5,%xmm2
pmuludq 32(%esp),%xmm7
pshufd $16,64(%edx),%xmm5
paddq %xmm6,%xmm1
pshufd $16,128(%edx),%xmm6
pmuludq (%esp),%xmm5
paddq %xmm7,%xmm0
movdqa %xmm6,%xmm7
pmuludq 64(%esp),%xmm6
paddq %xmm5,%xmm4
movdqa %xmm7,%xmm5
pmuludq 16(%esp),%xmm7
paddq %xmm6,%xmm3
movdqa %xmm5,%xmm6
pmuludq 32(%esp),%xmm5
paddq %xmm7,%xmm0
pmuludq 48(%esp),%xmm6
movdqa 64(%ebx),%xmm7
paddq %xmm5,%xmm1
paddq %xmm6,%xmm2
L016short_tail:
pshufd $78,%xmm4,%xmm6
pshufd $78,%xmm3,%xmm5
paddq %xmm6,%xmm4
paddq %xmm5,%xmm3
pshufd $78,%xmm0,%xmm6
pshufd $78,%xmm1,%xmm5
paddq %xmm6,%xmm0
paddq %xmm5,%xmm1
pshufd $78,%xmm2,%xmm6
movdqa %xmm3,%xmm5
pand %xmm7,%xmm3
psrlq $26,%xmm5
paddq %xmm6,%xmm2
paddq %xmm4,%xmm5
movdqa %xmm0,%xmm6
pand %xmm7,%xmm0
psrlq $26,%xmm6
movdqa %xmm5,%xmm4
paddq %xmm1,%xmm6
psrlq $26,%xmm5
pand %xmm7,%xmm4
movdqa %xmm6,%xmm1
psrlq $26,%xmm6
paddd %xmm5,%xmm0
psllq $2,%xmm5
paddq %xmm2,%xmm6
paddq %xmm0,%xmm5
pand %xmm7,%xmm1
movdqa %xmm6,%xmm2
psrlq $26,%xmm6
pand %xmm7,%xmm2
paddd %xmm3,%xmm6
movdqa %xmm5,%xmm0
psrlq $26,%xmm5
movdqa %xmm6,%xmm3
psrlq $26,%xmm6
pand %xmm7,%xmm0
paddd %xmm5,%xmm1
pand %xmm7,%xmm3
paddd %xmm6,%xmm4
L012done:
movd %xmm0,-48(%edi)
movd %xmm1,-44(%edi)
movd %xmm2,-40(%edi)
movd %xmm3,-36(%edi)
movd %xmm4,-32(%edi)
movl %ebp,%esp
L006nodata:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.align 5,0x90
.private_extern __poly1305_emit_sse2
.align 4
__poly1305_emit_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebp
cmpl $0,20(%ebp)
je Lenter_emit
movl (%ebp),%eax
movl 4(%ebp),%edi
movl 8(%ebp),%ecx
movl 12(%ebp),%edx
movl 16(%ebp),%esi
movl %edi,%ebx
shll $26,%edi
shrl $6,%ebx
addl %edi,%eax
movl %ecx,%edi
adcl $0,%ebx
shll $20,%edi
shrl $12,%ecx
addl %edi,%ebx
movl %edx,%edi
adcl $0,%ecx
shll $14,%edi
shrl $18,%edx
addl %edi,%ecx
movl %esi,%edi
adcl $0,%edx
shll $8,%edi
shrl $24,%esi
addl %edi,%edx
adcl $0,%esi
movl %esi,%edi
andl $3,%esi
shrl $2,%edi
leal (%edi,%edi,4),%ebp
movl 24(%esp),%edi
addl %ebp,%eax
movl 28(%esp),%ebp
adcl $0,%ebx
adcl $0,%ecx
adcl $0,%edx
adcl $0,%esi
movd %eax,%xmm0
addl $5,%eax
movd %ebx,%xmm1
adcl $0,%ebx
movd %ecx,%xmm2
adcl $0,%ecx
movd %edx,%xmm3
adcl $0,%edx
adcl $0,%esi
shrl $2,%esi
negl %esi
andl %esi,%eax
andl %esi,%ebx
andl %esi,%ecx
andl %esi,%edx
movl %eax,(%edi)
movd %xmm0,%eax
movl %ebx,4(%edi)
movd %xmm1,%ebx
movl %ecx,8(%edi)
movd %xmm2,%ecx
movl %edx,12(%edi)
movd %xmm3,%edx
notl %esi
andl %esi,%eax
andl %esi,%ebx
orl (%edi),%eax
andl %esi,%ecx
orl 4(%edi),%ebx
andl %esi,%edx
orl 8(%edi),%ecx
orl 12(%edi),%edx
addl (%ebp),%eax
adcl 4(%ebp),%ebx
movl %eax,(%edi)
adcl 8(%ebp),%ecx
movl %ebx,4(%edi)
adcl 12(%ebp),%edx
movl %ecx,8(%edi)
movl %edx,12(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.align 6,0x90
Lconst_sse2:
.long 16777216,0,16777216,0,16777216,0,16777216,0
.long 0,0,0,0,0,0,0,0
.long 67108863,0,67108863,0,67108863,0,67108863,0
.long 268435455,268435452,268435452,268435452
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
.align 2,0x90
.section __IMPORT,__pointers,non_lazy_symbol_pointers
L_GFp_ia32cap_P$non_lazy_ptr:
.indirect_symbol _GFp_ia32cap_P
.long 0
#endif