| # This file is generated from a similarly-named Perl script in the BoringSSL |
| # source tree. Do not edit by hand. |
| |
| #if defined(__i386__) |
| #if defined(BORINGSSL_PREFIX) |
| #include <boringssl_prefix_symbols_asm.h> |
| #endif |
| .text |
| .align 6,0x90 |
| .globl _GFp_poly1305_init_asm |
| .private_extern _GFp_poly1305_init_asm |
| .align 4 |
| _GFp_poly1305_init_asm: |
| L_GFp_poly1305_init_asm_begin: |
| pushl %ebp |
| pushl %ebx |
| pushl %esi |
| pushl %edi |
| movl 20(%esp),%edi |
| movl 24(%esp),%esi |
| movl 28(%esp),%ebp |
| xorl %eax,%eax |
| movl %eax,(%edi) |
| movl %eax,4(%edi) |
| movl %eax,8(%edi) |
| movl %eax,12(%edi) |
| movl %eax,16(%edi) |
| movl %eax,20(%edi) |
| cmpl $0,%esi |
| je L000nokey |
| call L001pic_point |
| L001pic_point: |
| popl %ebx |
| leal _GFp_poly1305_blocks-L001pic_point(%ebx),%eax |
| leal _GFp_poly1305_emit-L001pic_point(%ebx),%edx |
| movl L_GFp_ia32cap_P$non_lazy_ptr-L001pic_point(%ebx),%edi |
| movl (%edi),%ecx |
| andl $83886080,%ecx |
| cmpl $83886080,%ecx |
| leal __poly1305_blocks_sse2-L001pic_point(%ebx),%eax |
| leal __poly1305_emit_sse2-L001pic_point(%ebx),%edx |
| movl 20(%esp),%edi |
| movl %eax,(%ebp) |
| movl %edx,4(%ebp) |
| movl (%esi),%eax |
| movl 4(%esi),%ebx |
| movl 8(%esi),%ecx |
| movl 12(%esi),%edx |
| andl $268435455,%eax |
| andl $268435452,%ebx |
| andl $268435452,%ecx |
| andl $268435452,%edx |
| movl %eax,24(%edi) |
| movl %ebx,28(%edi) |
| movl %ecx,32(%edi) |
| movl %edx,36(%edi) |
| movl $1,%eax |
| L000nokey: |
| popl %edi |
| popl %esi |
| popl %ebx |
| popl %ebp |
| ret |
| .globl _GFp_poly1305_blocks |
| .private_extern _GFp_poly1305_blocks |
| .align 4 |
| _GFp_poly1305_blocks: |
| L_GFp_poly1305_blocks_begin: |
| pushl %ebp |
| pushl %ebx |
| pushl %esi |
| pushl %edi |
| movl 20(%esp),%edi |
| movl 24(%esp),%esi |
| movl 28(%esp),%ecx |
| Lenter_blocks: |
| andl $-15,%ecx |
| jz L002nodata |
| subl $64,%esp |
| movl 24(%edi),%eax |
| movl 28(%edi),%ebx |
| leal (%esi,%ecx,1),%ebp |
| movl 32(%edi),%ecx |
| movl 36(%edi),%edx |
| movl %ebp,92(%esp) |
| movl %esi,%ebp |
| movl %eax,36(%esp) |
| movl %ebx,%eax |
| shrl $2,%eax |
| movl %ebx,40(%esp) |
| addl %ebx,%eax |
| movl %ecx,%ebx |
| shrl $2,%ebx |
| movl %ecx,44(%esp) |
| addl %ecx,%ebx |
| movl %edx,%ecx |
| shrl $2,%ecx |
| movl %edx,48(%esp) |
| addl %edx,%ecx |
| movl %eax,52(%esp) |
| movl %ebx,56(%esp) |
| movl %ecx,60(%esp) |
| movl (%edi),%eax |
| movl 4(%edi),%ebx |
| movl 8(%edi),%ecx |
| movl 12(%edi),%esi |
| movl 16(%edi),%edi |
| jmp L003loop |
| .align 5,0x90 |
| L003loop: |
| addl (%ebp),%eax |
| adcl 4(%ebp),%ebx |
| adcl 8(%ebp),%ecx |
| adcl 12(%ebp),%esi |
| leal 16(%ebp),%ebp |
| adcl 96(%esp),%edi |
| movl %eax,(%esp) |
| movl %esi,12(%esp) |
| mull 36(%esp) |
| movl %edi,16(%esp) |
| movl %eax,%edi |
| movl %ebx,%eax |
| movl %edx,%esi |
| mull 60(%esp) |
| addl %eax,%edi |
| movl %ecx,%eax |
| adcl %edx,%esi |
| mull 56(%esp) |
| addl %eax,%edi |
| movl 12(%esp),%eax |
| adcl %edx,%esi |
| mull 52(%esp) |
| addl %eax,%edi |
| movl (%esp),%eax |
| adcl %edx,%esi |
| mull 40(%esp) |
| movl %edi,20(%esp) |
| xorl %edi,%edi |
| addl %eax,%esi |
| movl %ebx,%eax |
| adcl %edx,%edi |
| mull 36(%esp) |
| addl %eax,%esi |
| movl %ecx,%eax |
| adcl %edx,%edi |
| mull 60(%esp) |
| addl %eax,%esi |
| movl 12(%esp),%eax |
| adcl %edx,%edi |
| mull 56(%esp) |
| addl %eax,%esi |
| movl 16(%esp),%eax |
| adcl %edx,%edi |
| imull 52(%esp),%eax |
| addl %eax,%esi |
| movl (%esp),%eax |
| adcl $0,%edi |
| mull 44(%esp) |
| movl %esi,24(%esp) |
| xorl %esi,%esi |
| addl %eax,%edi |
| movl %ebx,%eax |
| adcl %edx,%esi |
| mull 40(%esp) |
| addl %eax,%edi |
| movl %ecx,%eax |
| adcl %edx,%esi |
| mull 36(%esp) |
| addl %eax,%edi |
| movl 12(%esp),%eax |
| adcl %edx,%esi |
| mull 60(%esp) |
| addl %eax,%edi |
| movl 16(%esp),%eax |
| adcl %edx,%esi |
| imull 56(%esp),%eax |
| addl %eax,%edi |
| movl (%esp),%eax |
| adcl $0,%esi |
| mull 48(%esp) |
| movl %edi,28(%esp) |
| xorl %edi,%edi |
| addl %eax,%esi |
| movl %ebx,%eax |
| adcl %edx,%edi |
| mull 44(%esp) |
| addl %eax,%esi |
| movl %ecx,%eax |
| adcl %edx,%edi |
| mull 40(%esp) |
| addl %eax,%esi |
| movl 12(%esp),%eax |
| adcl %edx,%edi |
| mull 36(%esp) |
| addl %eax,%esi |
| movl 16(%esp),%ecx |
| adcl %edx,%edi |
| movl %ecx,%edx |
| imull 60(%esp),%ecx |
| addl %ecx,%esi |
| movl 20(%esp),%eax |
| adcl $0,%edi |
| imull 36(%esp),%edx |
| addl %edi,%edx |
| movl 24(%esp),%ebx |
| movl 28(%esp),%ecx |
| movl %edx,%edi |
| shrl $2,%edx |
| andl $3,%edi |
| leal (%edx,%edx,4),%edx |
| addl %edx,%eax |
| adcl $0,%ebx |
| adcl $0,%ecx |
| adcl $0,%esi |
| adcl $0,%edi |
| cmpl 92(%esp),%ebp |
| jne L003loop |
| movl 84(%esp),%edx |
| addl $64,%esp |
| movl %eax,(%edx) |
| movl %ebx,4(%edx) |
| movl %ecx,8(%edx) |
| movl %esi,12(%edx) |
| movl %edi,16(%edx) |
| L002nodata: |
| popl %edi |
| popl %esi |
| popl %ebx |
| popl %ebp |
| ret |
| .globl _GFp_poly1305_emit |
| .private_extern _GFp_poly1305_emit |
| .align 4 |
| _GFp_poly1305_emit: |
| L_GFp_poly1305_emit_begin: |
| pushl %ebp |
| pushl %ebx |
| pushl %esi |
| pushl %edi |
| movl 20(%esp),%ebp |
| Lenter_emit: |
| movl 24(%esp),%edi |
| movl (%ebp),%eax |
| movl 4(%ebp),%ebx |
| movl 8(%ebp),%ecx |
| movl 12(%ebp),%edx |
| movl 16(%ebp),%esi |
| addl $5,%eax |
| adcl $0,%ebx |
| adcl $0,%ecx |
| adcl $0,%edx |
| adcl $0,%esi |
| shrl $2,%esi |
| negl %esi |
| andl %esi,%eax |
| andl %esi,%ebx |
| andl %esi,%ecx |
| andl %esi,%edx |
| movl %eax,(%edi) |
| movl %ebx,4(%edi) |
| movl %ecx,8(%edi) |
| movl %edx,12(%edi) |
| notl %esi |
| movl (%ebp),%eax |
| movl 4(%ebp),%ebx |
| movl 8(%ebp),%ecx |
| movl 12(%ebp),%edx |
| movl 28(%esp),%ebp |
| andl %esi,%eax |
| andl %esi,%ebx |
| andl %esi,%ecx |
| andl %esi,%edx |
| orl (%edi),%eax |
| orl 4(%edi),%ebx |
| orl 8(%edi),%ecx |
| orl 12(%edi),%edx |
| addl (%ebp),%eax |
| adcl 4(%ebp),%ebx |
| adcl 8(%ebp),%ecx |
| adcl 12(%ebp),%edx |
| movl %eax,(%edi) |
| movl %ebx,4(%edi) |
| movl %ecx,8(%edi) |
| movl %edx,12(%edi) |
| popl %edi |
| popl %esi |
| popl %ebx |
| popl %ebp |
| ret |
| .align 5,0x90 |
| .private_extern __poly1305_init_sse2 |
| .align 4 |
| __poly1305_init_sse2: |
| movdqu 24(%edi),%xmm4 |
| leal 48(%edi),%edi |
| movl %esp,%ebp |
| subl $224,%esp |
| andl $-16,%esp |
| movq 64(%ebx),%xmm7 |
| movdqa %xmm4,%xmm0 |
| movdqa %xmm4,%xmm1 |
| movdqa %xmm4,%xmm2 |
| pand %xmm7,%xmm0 |
| psrlq $26,%xmm1 |
| psrldq $6,%xmm2 |
| pand %xmm7,%xmm1 |
| movdqa %xmm2,%xmm3 |
| psrlq $4,%xmm2 |
| psrlq $30,%xmm3 |
| pand %xmm7,%xmm2 |
| pand %xmm7,%xmm3 |
| psrldq $13,%xmm4 |
| leal 144(%esp),%edx |
| movl $2,%ecx |
| L004square: |
| movdqa %xmm0,(%esp) |
| movdqa %xmm1,16(%esp) |
| movdqa %xmm2,32(%esp) |
| movdqa %xmm3,48(%esp) |
| movdqa %xmm4,64(%esp) |
| movdqa %xmm1,%xmm6 |
| movdqa %xmm2,%xmm5 |
| pslld $2,%xmm6 |
| pslld $2,%xmm5 |
| paddd %xmm1,%xmm6 |
| paddd %xmm2,%xmm5 |
| movdqa %xmm6,80(%esp) |
| movdqa %xmm5,96(%esp) |
| movdqa %xmm3,%xmm6 |
| movdqa %xmm4,%xmm5 |
| pslld $2,%xmm6 |
| pslld $2,%xmm5 |
| paddd %xmm3,%xmm6 |
| paddd %xmm4,%xmm5 |
| movdqa %xmm6,112(%esp) |
| movdqa %xmm5,128(%esp) |
| pshufd $68,%xmm0,%xmm6 |
| movdqa %xmm1,%xmm5 |
| pshufd $68,%xmm1,%xmm1 |
| pshufd $68,%xmm2,%xmm2 |
| pshufd $68,%xmm3,%xmm3 |
| pshufd $68,%xmm4,%xmm4 |
| movdqa %xmm6,(%edx) |
| movdqa %xmm1,16(%edx) |
| movdqa %xmm2,32(%edx) |
| movdqa %xmm3,48(%edx) |
| movdqa %xmm4,64(%edx) |
| pmuludq %xmm0,%xmm4 |
| pmuludq %xmm0,%xmm3 |
| pmuludq %xmm0,%xmm2 |
| pmuludq %xmm0,%xmm1 |
| pmuludq %xmm6,%xmm0 |
| movdqa %xmm5,%xmm6 |
| pmuludq 48(%edx),%xmm5 |
| movdqa %xmm6,%xmm7 |
| pmuludq 32(%edx),%xmm6 |
| paddq %xmm5,%xmm4 |
| movdqa %xmm7,%xmm5 |
| pmuludq 16(%edx),%xmm7 |
| paddq %xmm6,%xmm3 |
| movdqa 80(%esp),%xmm6 |
| pmuludq (%edx),%xmm5 |
| paddq %xmm7,%xmm2 |
| pmuludq 64(%edx),%xmm6 |
| movdqa 32(%esp),%xmm7 |
| paddq %xmm5,%xmm1 |
| movdqa %xmm7,%xmm5 |
| pmuludq 32(%edx),%xmm7 |
| paddq %xmm6,%xmm0 |
| movdqa %xmm5,%xmm6 |
| pmuludq 16(%edx),%xmm5 |
| paddq %xmm7,%xmm4 |
| movdqa 96(%esp),%xmm7 |
| pmuludq (%edx),%xmm6 |
| paddq %xmm5,%xmm3 |
| movdqa %xmm7,%xmm5 |
| pmuludq 64(%edx),%xmm7 |
| paddq %xmm6,%xmm2 |
| pmuludq 48(%edx),%xmm5 |
| movdqa 48(%esp),%xmm6 |
| paddq %xmm7,%xmm1 |
| movdqa %xmm6,%xmm7 |
| pmuludq 16(%edx),%xmm6 |
| paddq %xmm5,%xmm0 |
| movdqa 112(%esp),%xmm5 |
| pmuludq (%edx),%xmm7 |
| paddq %xmm6,%xmm4 |
| movdqa %xmm5,%xmm6 |
| pmuludq 64(%edx),%xmm5 |
| paddq %xmm7,%xmm3 |
| movdqa %xmm6,%xmm7 |
| pmuludq 48(%edx),%xmm6 |
| paddq %xmm5,%xmm2 |
| pmuludq 32(%edx),%xmm7 |
| movdqa 64(%esp),%xmm5 |
| paddq %xmm6,%xmm1 |
| movdqa 128(%esp),%xmm6 |
| pmuludq (%edx),%xmm5 |
| paddq %xmm7,%xmm0 |
| movdqa %xmm6,%xmm7 |
| pmuludq 64(%edx),%xmm6 |
| paddq %xmm5,%xmm4 |
| movdqa %xmm7,%xmm5 |
| pmuludq 16(%edx),%xmm7 |
| paddq %xmm6,%xmm3 |
| movdqa %xmm5,%xmm6 |
| pmuludq 32(%edx),%xmm5 |
| paddq %xmm7,%xmm0 |
| pmuludq 48(%edx),%xmm6 |
| movdqa 64(%ebx),%xmm7 |
| paddq %xmm5,%xmm1 |
| paddq %xmm6,%xmm2 |
| movdqa %xmm3,%xmm5 |
| pand %xmm7,%xmm3 |
| psrlq $26,%xmm5 |
| paddq %xmm4,%xmm5 |
| movdqa %xmm0,%xmm6 |
| pand %xmm7,%xmm0 |
| psrlq $26,%xmm6 |
| movdqa %xmm5,%xmm4 |
| paddq %xmm1,%xmm6 |
| psrlq $26,%xmm5 |
| pand %xmm7,%xmm4 |
| movdqa %xmm6,%xmm1 |
| psrlq $26,%xmm6 |
| paddd %xmm5,%xmm0 |
| psllq $2,%xmm5 |
| paddq %xmm2,%xmm6 |
| paddq %xmm0,%xmm5 |
| pand %xmm7,%xmm1 |
| movdqa %xmm6,%xmm2 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm2 |
| paddd %xmm3,%xmm6 |
| movdqa %xmm5,%xmm0 |
| psrlq $26,%xmm5 |
| movdqa %xmm6,%xmm3 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm0 |
| paddd %xmm5,%xmm1 |
| pand %xmm7,%xmm3 |
| paddd %xmm6,%xmm4 |
| decl %ecx |
| jz L005square_break |
| punpcklqdq (%esp),%xmm0 |
| punpcklqdq 16(%esp),%xmm1 |
| punpcklqdq 32(%esp),%xmm2 |
| punpcklqdq 48(%esp),%xmm3 |
| punpcklqdq 64(%esp),%xmm4 |
| jmp L004square |
| L005square_break: |
| psllq $32,%xmm0 |
| psllq $32,%xmm1 |
| psllq $32,%xmm2 |
| psllq $32,%xmm3 |
| psllq $32,%xmm4 |
| por (%esp),%xmm0 |
| por 16(%esp),%xmm1 |
| por 32(%esp),%xmm2 |
| por 48(%esp),%xmm3 |
| por 64(%esp),%xmm4 |
| pshufd $141,%xmm0,%xmm0 |
| pshufd $141,%xmm1,%xmm1 |
| pshufd $141,%xmm2,%xmm2 |
| pshufd $141,%xmm3,%xmm3 |
| pshufd $141,%xmm4,%xmm4 |
| movdqu %xmm0,(%edi) |
| movdqu %xmm1,16(%edi) |
| movdqu %xmm2,32(%edi) |
| movdqu %xmm3,48(%edi) |
| movdqu %xmm4,64(%edi) |
| movdqa %xmm1,%xmm6 |
| movdqa %xmm2,%xmm5 |
| pslld $2,%xmm6 |
| pslld $2,%xmm5 |
| paddd %xmm1,%xmm6 |
| paddd %xmm2,%xmm5 |
| movdqu %xmm6,80(%edi) |
| movdqu %xmm5,96(%edi) |
| movdqa %xmm3,%xmm6 |
| movdqa %xmm4,%xmm5 |
| pslld $2,%xmm6 |
| pslld $2,%xmm5 |
| paddd %xmm3,%xmm6 |
| paddd %xmm4,%xmm5 |
| movdqu %xmm6,112(%edi) |
| movdqu %xmm5,128(%edi) |
| movl %ebp,%esp |
| leal -48(%edi),%edi |
| ret |
| .align 5,0x90 |
| .private_extern __poly1305_blocks_sse2 |
| .align 4 |
| __poly1305_blocks_sse2: |
| pushl %ebp |
| pushl %ebx |
| pushl %esi |
| pushl %edi |
| movl 20(%esp),%edi |
| movl 24(%esp),%esi |
| movl 28(%esp),%ecx |
| movl 20(%edi),%eax |
| andl $-16,%ecx |
| jz L006nodata |
| cmpl $64,%ecx |
| jae L007enter_sse2 |
| testl %eax,%eax |
| jz Lenter_blocks |
| .align 4,0x90 |
| L007enter_sse2: |
| call L008pic_point |
| L008pic_point: |
| popl %ebx |
| leal Lconst_sse2-L008pic_point(%ebx),%ebx |
| testl %eax,%eax |
| jnz L009base2_26 |
| call __poly1305_init_sse2 |
| movl (%edi),%eax |
| movl 3(%edi),%ecx |
| movl 6(%edi),%edx |
| movl 9(%edi),%esi |
| movl 13(%edi),%ebp |
| movl $1,20(%edi) |
| shrl $2,%ecx |
| andl $67108863,%eax |
| shrl $4,%edx |
| andl $67108863,%ecx |
| shrl $6,%esi |
| andl $67108863,%edx |
| movd %eax,%xmm0 |
| movd %ecx,%xmm1 |
| movd %edx,%xmm2 |
| movd %esi,%xmm3 |
| movd %ebp,%xmm4 |
| movl 24(%esp),%esi |
| movl 28(%esp),%ecx |
| jmp L010base2_32 |
| .align 4,0x90 |
| L009base2_26: |
| movd (%edi),%xmm0 |
| movd 4(%edi),%xmm1 |
| movd 8(%edi),%xmm2 |
| movd 12(%edi),%xmm3 |
| movd 16(%edi),%xmm4 |
| movdqa 64(%ebx),%xmm7 |
| L010base2_32: |
| movl 32(%esp),%eax |
| movl %esp,%ebp |
| subl $528,%esp |
| andl $-16,%esp |
| leal 48(%edi),%edi |
| shll $24,%eax |
| testl $31,%ecx |
| jz L011even |
| movdqu (%esi),%xmm6 |
| leal 16(%esi),%esi |
| movdqa %xmm6,%xmm5 |
| pand %xmm7,%xmm6 |
| paddd %xmm6,%xmm0 |
| movdqa %xmm5,%xmm6 |
| psrlq $26,%xmm5 |
| psrldq $6,%xmm6 |
| pand %xmm7,%xmm5 |
| paddd %xmm5,%xmm1 |
| movdqa %xmm6,%xmm5 |
| psrlq $4,%xmm6 |
| pand %xmm7,%xmm6 |
| paddd %xmm6,%xmm2 |
| movdqa %xmm5,%xmm6 |
| psrlq $30,%xmm5 |
| pand %xmm7,%xmm5 |
| psrldq $7,%xmm6 |
| paddd %xmm5,%xmm3 |
| movd %eax,%xmm5 |
| paddd %xmm6,%xmm4 |
| movd 12(%edi),%xmm6 |
| paddd %xmm5,%xmm4 |
| movdqa %xmm0,(%esp) |
| movdqa %xmm1,16(%esp) |
| movdqa %xmm2,32(%esp) |
| movdqa %xmm3,48(%esp) |
| movdqa %xmm4,64(%esp) |
| pmuludq %xmm6,%xmm0 |
| pmuludq %xmm6,%xmm1 |
| pmuludq %xmm6,%xmm2 |
| movd 28(%edi),%xmm5 |
| pmuludq %xmm6,%xmm3 |
| pmuludq %xmm6,%xmm4 |
| movdqa %xmm5,%xmm6 |
| pmuludq 48(%esp),%xmm5 |
| movdqa %xmm6,%xmm7 |
| pmuludq 32(%esp),%xmm6 |
| paddq %xmm5,%xmm4 |
| movdqa %xmm7,%xmm5 |
| pmuludq 16(%esp),%xmm7 |
| paddq %xmm6,%xmm3 |
| movd 92(%edi),%xmm6 |
| pmuludq (%esp),%xmm5 |
| paddq %xmm7,%xmm2 |
| pmuludq 64(%esp),%xmm6 |
| movd 44(%edi),%xmm7 |
| paddq %xmm5,%xmm1 |
| movdqa %xmm7,%xmm5 |
| pmuludq 32(%esp),%xmm7 |
| paddq %xmm6,%xmm0 |
| movdqa %xmm5,%xmm6 |
| pmuludq 16(%esp),%xmm5 |
| paddq %xmm7,%xmm4 |
| movd 108(%edi),%xmm7 |
| pmuludq (%esp),%xmm6 |
| paddq %xmm5,%xmm3 |
| movdqa %xmm7,%xmm5 |
| pmuludq 64(%esp),%xmm7 |
| paddq %xmm6,%xmm2 |
| pmuludq 48(%esp),%xmm5 |
| movd 60(%edi),%xmm6 |
| paddq %xmm7,%xmm1 |
| movdqa %xmm6,%xmm7 |
| pmuludq 16(%esp),%xmm6 |
| paddq %xmm5,%xmm0 |
| movd 124(%edi),%xmm5 |
| pmuludq (%esp),%xmm7 |
| paddq %xmm6,%xmm4 |
| movdqa %xmm5,%xmm6 |
| pmuludq 64(%esp),%xmm5 |
| paddq %xmm7,%xmm3 |
| movdqa %xmm6,%xmm7 |
| pmuludq 48(%esp),%xmm6 |
| paddq %xmm5,%xmm2 |
| pmuludq 32(%esp),%xmm7 |
| movd 76(%edi),%xmm5 |
| paddq %xmm6,%xmm1 |
| movd 140(%edi),%xmm6 |
| pmuludq (%esp),%xmm5 |
| paddq %xmm7,%xmm0 |
| movdqa %xmm6,%xmm7 |
| pmuludq 64(%esp),%xmm6 |
| paddq %xmm5,%xmm4 |
| movdqa %xmm7,%xmm5 |
| pmuludq 16(%esp),%xmm7 |
| paddq %xmm6,%xmm3 |
| movdqa %xmm5,%xmm6 |
| pmuludq 32(%esp),%xmm5 |
| paddq %xmm7,%xmm0 |
| pmuludq 48(%esp),%xmm6 |
| movdqa 64(%ebx),%xmm7 |
| paddq %xmm5,%xmm1 |
| paddq %xmm6,%xmm2 |
| movdqa %xmm3,%xmm5 |
| pand %xmm7,%xmm3 |
| psrlq $26,%xmm5 |
| paddq %xmm4,%xmm5 |
| movdqa %xmm0,%xmm6 |
| pand %xmm7,%xmm0 |
| psrlq $26,%xmm6 |
| movdqa %xmm5,%xmm4 |
| paddq %xmm1,%xmm6 |
| psrlq $26,%xmm5 |
| pand %xmm7,%xmm4 |
| movdqa %xmm6,%xmm1 |
| psrlq $26,%xmm6 |
| paddd %xmm5,%xmm0 |
| psllq $2,%xmm5 |
| paddq %xmm2,%xmm6 |
| paddq %xmm0,%xmm5 |
| pand %xmm7,%xmm1 |
| movdqa %xmm6,%xmm2 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm2 |
| paddd %xmm3,%xmm6 |
| movdqa %xmm5,%xmm0 |
| psrlq $26,%xmm5 |
| movdqa %xmm6,%xmm3 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm0 |
| paddd %xmm5,%xmm1 |
| pand %xmm7,%xmm3 |
| paddd %xmm6,%xmm4 |
| subl $16,%ecx |
| jz L012done |
| L011even: |
| leal 384(%esp),%edx |
| leal -32(%esi),%eax |
| subl $64,%ecx |
| movdqu (%edi),%xmm5 |
| pshufd $68,%xmm5,%xmm6 |
| cmovbl %eax,%esi |
| pshufd $238,%xmm5,%xmm5 |
| movdqa %xmm6,(%edx) |
| leal 160(%esp),%eax |
| movdqu 16(%edi),%xmm6 |
| movdqa %xmm5,-144(%edx) |
| pshufd $68,%xmm6,%xmm5 |
| pshufd $238,%xmm6,%xmm6 |
| movdqa %xmm5,16(%edx) |
| movdqu 32(%edi),%xmm5 |
| movdqa %xmm6,-128(%edx) |
| pshufd $68,%xmm5,%xmm6 |
| pshufd $238,%xmm5,%xmm5 |
| movdqa %xmm6,32(%edx) |
| movdqu 48(%edi),%xmm6 |
| movdqa %xmm5,-112(%edx) |
| pshufd $68,%xmm6,%xmm5 |
| pshufd $238,%xmm6,%xmm6 |
| movdqa %xmm5,48(%edx) |
| movdqu 64(%edi),%xmm5 |
| movdqa %xmm6,-96(%edx) |
| pshufd $68,%xmm5,%xmm6 |
| pshufd $238,%xmm5,%xmm5 |
| movdqa %xmm6,64(%edx) |
| movdqu 80(%edi),%xmm6 |
| movdqa %xmm5,-80(%edx) |
| pshufd $68,%xmm6,%xmm5 |
| pshufd $238,%xmm6,%xmm6 |
| movdqa %xmm5,80(%edx) |
| movdqu 96(%edi),%xmm5 |
| movdqa %xmm6,-64(%edx) |
| pshufd $68,%xmm5,%xmm6 |
| pshufd $238,%xmm5,%xmm5 |
| movdqa %xmm6,96(%edx) |
| movdqu 112(%edi),%xmm6 |
| movdqa %xmm5,-48(%edx) |
| pshufd $68,%xmm6,%xmm5 |
| pshufd $238,%xmm6,%xmm6 |
| movdqa %xmm5,112(%edx) |
| movdqu 128(%edi),%xmm5 |
| movdqa %xmm6,-32(%edx) |
| pshufd $68,%xmm5,%xmm6 |
| pshufd $238,%xmm5,%xmm5 |
| movdqa %xmm6,128(%edx) |
| movdqa %xmm5,-16(%edx) |
| movdqu 32(%esi),%xmm5 |
| movdqu 48(%esi),%xmm6 |
| leal 32(%esi),%esi |
| movdqa %xmm2,112(%esp) |
| movdqa %xmm3,128(%esp) |
| movdqa %xmm4,144(%esp) |
| movdqa %xmm5,%xmm2 |
| movdqa %xmm6,%xmm3 |
| psrldq $6,%xmm2 |
| psrldq $6,%xmm3 |
| movdqa %xmm5,%xmm4 |
| punpcklqdq %xmm3,%xmm2 |
| punpckhqdq %xmm6,%xmm4 |
| punpcklqdq %xmm6,%xmm5 |
| movdqa %xmm2,%xmm3 |
| psrlq $4,%xmm2 |
| psrlq $30,%xmm3 |
| movdqa %xmm5,%xmm6 |
| psrlq $40,%xmm4 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm5 |
| pand %xmm7,%xmm6 |
| pand %xmm7,%xmm2 |
| pand %xmm7,%xmm3 |
| por (%ebx),%xmm4 |
| movdqa %xmm0,80(%esp) |
| movdqa %xmm1,96(%esp) |
| jbe L013skip_loop |
| jmp L014loop |
| .align 5,0x90 |
| L014loop: |
| movdqa -144(%edx),%xmm7 |
| movdqa %xmm6,16(%eax) |
| movdqa %xmm2,32(%eax) |
| movdqa %xmm3,48(%eax) |
| movdqa %xmm4,64(%eax) |
| movdqa %xmm5,%xmm1 |
| pmuludq %xmm7,%xmm5 |
| movdqa %xmm6,%xmm0 |
| pmuludq %xmm7,%xmm6 |
| pmuludq %xmm7,%xmm2 |
| pmuludq %xmm7,%xmm3 |
| pmuludq %xmm7,%xmm4 |
| pmuludq -16(%edx),%xmm0 |
| movdqa %xmm1,%xmm7 |
| pmuludq -128(%edx),%xmm1 |
| paddq %xmm5,%xmm0 |
| movdqa %xmm7,%xmm5 |
| pmuludq -112(%edx),%xmm7 |
| paddq %xmm6,%xmm1 |
| movdqa %xmm5,%xmm6 |
| pmuludq -96(%edx),%xmm5 |
| paddq %xmm7,%xmm2 |
| movdqa 16(%eax),%xmm7 |
| pmuludq -80(%edx),%xmm6 |
| paddq %xmm5,%xmm3 |
| movdqa %xmm7,%xmm5 |
| pmuludq -128(%edx),%xmm7 |
| paddq %xmm6,%xmm4 |
| movdqa %xmm5,%xmm6 |
| pmuludq -112(%edx),%xmm5 |
| paddq %xmm7,%xmm2 |
| movdqa 32(%eax),%xmm7 |
| pmuludq -96(%edx),%xmm6 |
| paddq %xmm5,%xmm3 |
| movdqa %xmm7,%xmm5 |
| pmuludq -32(%edx),%xmm7 |
| paddq %xmm6,%xmm4 |
| movdqa %xmm5,%xmm6 |
| pmuludq -16(%edx),%xmm5 |
| paddq %xmm7,%xmm0 |
| movdqa %xmm6,%xmm7 |
| pmuludq -128(%edx),%xmm6 |
| paddq %xmm5,%xmm1 |
| movdqa 48(%eax),%xmm5 |
| pmuludq -112(%edx),%xmm7 |
| paddq %xmm6,%xmm3 |
| movdqa %xmm5,%xmm6 |
| pmuludq -48(%edx),%xmm5 |
| paddq %xmm7,%xmm4 |
| movdqa %xmm6,%xmm7 |
| pmuludq -32(%edx),%xmm6 |
| paddq %xmm5,%xmm0 |
| movdqa %xmm7,%xmm5 |
| pmuludq -16(%edx),%xmm7 |
| paddq %xmm6,%xmm1 |
| movdqa 64(%eax),%xmm6 |
| pmuludq -128(%edx),%xmm5 |
| paddq %xmm7,%xmm2 |
| movdqa %xmm6,%xmm7 |
| pmuludq -16(%edx),%xmm6 |
| paddq %xmm5,%xmm4 |
| movdqa %xmm7,%xmm5 |
| pmuludq -64(%edx),%xmm7 |
| paddq %xmm6,%xmm3 |
| movdqa %xmm5,%xmm6 |
| pmuludq -48(%edx),%xmm5 |
| paddq %xmm7,%xmm0 |
| movdqa 64(%ebx),%xmm7 |
| pmuludq -32(%edx),%xmm6 |
| paddq %xmm5,%xmm1 |
| paddq %xmm6,%xmm2 |
| movdqu -32(%esi),%xmm5 |
| movdqu -16(%esi),%xmm6 |
| leal 32(%esi),%esi |
| movdqa %xmm2,32(%esp) |
| movdqa %xmm3,48(%esp) |
| movdqa %xmm4,64(%esp) |
| movdqa %xmm5,%xmm2 |
| movdqa %xmm6,%xmm3 |
| psrldq $6,%xmm2 |
| psrldq $6,%xmm3 |
| movdqa %xmm5,%xmm4 |
| punpcklqdq %xmm3,%xmm2 |
| punpckhqdq %xmm6,%xmm4 |
| punpcklqdq %xmm6,%xmm5 |
| movdqa %xmm2,%xmm3 |
| psrlq $4,%xmm2 |
| psrlq $30,%xmm3 |
| movdqa %xmm5,%xmm6 |
| psrlq $40,%xmm4 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm5 |
| pand %xmm7,%xmm6 |
| pand %xmm7,%xmm2 |
| pand %xmm7,%xmm3 |
| por (%ebx),%xmm4 |
| leal -32(%esi),%eax |
| subl $64,%ecx |
| paddd 80(%esp),%xmm5 |
| paddd 96(%esp),%xmm6 |
| paddd 112(%esp),%xmm2 |
| paddd 128(%esp),%xmm3 |
| paddd 144(%esp),%xmm4 |
| cmovbl %eax,%esi |
| leal 160(%esp),%eax |
| movdqa (%edx),%xmm7 |
| movdqa %xmm1,16(%esp) |
| movdqa %xmm6,16(%eax) |
| movdqa %xmm2,32(%eax) |
| movdqa %xmm3,48(%eax) |
| movdqa %xmm4,64(%eax) |
| movdqa %xmm5,%xmm1 |
| pmuludq %xmm7,%xmm5 |
| paddq %xmm0,%xmm5 |
| movdqa %xmm6,%xmm0 |
| pmuludq %xmm7,%xmm6 |
| pmuludq %xmm7,%xmm2 |
| pmuludq %xmm7,%xmm3 |
| pmuludq %xmm7,%xmm4 |
| paddq 16(%esp),%xmm6 |
| paddq 32(%esp),%xmm2 |
| paddq 48(%esp),%xmm3 |
| paddq 64(%esp),%xmm4 |
| pmuludq 128(%edx),%xmm0 |
| movdqa %xmm1,%xmm7 |
| pmuludq 16(%edx),%xmm1 |
| paddq %xmm5,%xmm0 |
| movdqa %xmm7,%xmm5 |
| pmuludq 32(%edx),%xmm7 |
| paddq %xmm6,%xmm1 |
| movdqa %xmm5,%xmm6 |
| pmuludq 48(%edx),%xmm5 |
| paddq %xmm7,%xmm2 |
| movdqa 16(%eax),%xmm7 |
| pmuludq 64(%edx),%xmm6 |
| paddq %xmm5,%xmm3 |
| movdqa %xmm7,%xmm5 |
| pmuludq 16(%edx),%xmm7 |
| paddq %xmm6,%xmm4 |
| movdqa %xmm5,%xmm6 |
| pmuludq 32(%edx),%xmm5 |
| paddq %xmm7,%xmm2 |
| movdqa 32(%eax),%xmm7 |
| pmuludq 48(%edx),%xmm6 |
| paddq %xmm5,%xmm3 |
| movdqa %xmm7,%xmm5 |
| pmuludq 112(%edx),%xmm7 |
| paddq %xmm6,%xmm4 |
| movdqa %xmm5,%xmm6 |
| pmuludq 128(%edx),%xmm5 |
| paddq %xmm7,%xmm0 |
| movdqa %xmm6,%xmm7 |
| pmuludq 16(%edx),%xmm6 |
| paddq %xmm5,%xmm1 |
| movdqa 48(%eax),%xmm5 |
| pmuludq 32(%edx),%xmm7 |
| paddq %xmm6,%xmm3 |
| movdqa %xmm5,%xmm6 |
| pmuludq 96(%edx),%xmm5 |
| paddq %xmm7,%xmm4 |
| movdqa %xmm6,%xmm7 |
| pmuludq 112(%edx),%xmm6 |
| paddq %xmm5,%xmm0 |
| movdqa %xmm7,%xmm5 |
| pmuludq 128(%edx),%xmm7 |
| paddq %xmm6,%xmm1 |
| movdqa 64(%eax),%xmm6 |
| pmuludq 16(%edx),%xmm5 |
| paddq %xmm7,%xmm2 |
| movdqa %xmm6,%xmm7 |
| pmuludq 128(%edx),%xmm6 |
| paddq %xmm5,%xmm4 |
| movdqa %xmm7,%xmm5 |
| pmuludq 80(%edx),%xmm7 |
| paddq %xmm6,%xmm3 |
| movdqa %xmm5,%xmm6 |
| pmuludq 96(%edx),%xmm5 |
| paddq %xmm7,%xmm0 |
| movdqa 64(%ebx),%xmm7 |
| pmuludq 112(%edx),%xmm6 |
| paddq %xmm5,%xmm1 |
| paddq %xmm6,%xmm2 |
| movdqa %xmm3,%xmm5 |
| pand %xmm7,%xmm3 |
| psrlq $26,%xmm5 |
| paddq %xmm4,%xmm5 |
| movdqa %xmm0,%xmm6 |
| pand %xmm7,%xmm0 |
| psrlq $26,%xmm6 |
| movdqa %xmm5,%xmm4 |
| paddq %xmm1,%xmm6 |
| psrlq $26,%xmm5 |
| pand %xmm7,%xmm4 |
| movdqa %xmm6,%xmm1 |
| psrlq $26,%xmm6 |
| paddd %xmm5,%xmm0 |
| psllq $2,%xmm5 |
| paddq %xmm2,%xmm6 |
| paddq %xmm0,%xmm5 |
| pand %xmm7,%xmm1 |
| movdqa %xmm6,%xmm2 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm2 |
| paddd %xmm3,%xmm6 |
| movdqa %xmm5,%xmm0 |
| psrlq $26,%xmm5 |
| movdqa %xmm6,%xmm3 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm0 |
| paddd %xmm5,%xmm1 |
| pand %xmm7,%xmm3 |
| paddd %xmm6,%xmm4 |
| movdqu 32(%esi),%xmm5 |
| movdqu 48(%esi),%xmm6 |
| leal 32(%esi),%esi |
| movdqa %xmm2,112(%esp) |
| movdqa %xmm3,128(%esp) |
| movdqa %xmm4,144(%esp) |
| movdqa %xmm5,%xmm2 |
| movdqa %xmm6,%xmm3 |
| psrldq $6,%xmm2 |
| psrldq $6,%xmm3 |
| movdqa %xmm5,%xmm4 |
| punpcklqdq %xmm3,%xmm2 |
| punpckhqdq %xmm6,%xmm4 |
| punpcklqdq %xmm6,%xmm5 |
| movdqa %xmm2,%xmm3 |
| psrlq $4,%xmm2 |
| psrlq $30,%xmm3 |
| movdqa %xmm5,%xmm6 |
| psrlq $40,%xmm4 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm5 |
| pand %xmm7,%xmm6 |
| pand %xmm7,%xmm2 |
| pand %xmm7,%xmm3 |
| por (%ebx),%xmm4 |
| movdqa %xmm0,80(%esp) |
| movdqa %xmm1,96(%esp) |
| ja L014loop |
| L013skip_loop: |
| pshufd $16,-144(%edx),%xmm7 |
| addl $32,%ecx |
| jnz L015long_tail |
| paddd %xmm0,%xmm5 |
| paddd %xmm1,%xmm6 |
| paddd 112(%esp),%xmm2 |
| paddd 128(%esp),%xmm3 |
| paddd 144(%esp),%xmm4 |
| L015long_tail: |
| movdqa %xmm5,(%eax) |
| movdqa %xmm6,16(%eax) |
| movdqa %xmm2,32(%eax) |
| movdqa %xmm3,48(%eax) |
| movdqa %xmm4,64(%eax) |
| pmuludq %xmm7,%xmm5 |
| pmuludq %xmm7,%xmm6 |
| pmuludq %xmm7,%xmm2 |
| movdqa %xmm5,%xmm0 |
| pshufd $16,-128(%edx),%xmm5 |
| pmuludq %xmm7,%xmm3 |
| movdqa %xmm6,%xmm1 |
| pmuludq %xmm7,%xmm4 |
| movdqa %xmm5,%xmm6 |
| pmuludq 48(%eax),%xmm5 |
| movdqa %xmm6,%xmm7 |
| pmuludq 32(%eax),%xmm6 |
| paddq %xmm5,%xmm4 |
| movdqa %xmm7,%xmm5 |
| pmuludq 16(%eax),%xmm7 |
| paddq %xmm6,%xmm3 |
| pshufd $16,-64(%edx),%xmm6 |
| pmuludq (%eax),%xmm5 |
| paddq %xmm7,%xmm2 |
| pmuludq 64(%eax),%xmm6 |
| pshufd $16,-112(%edx),%xmm7 |
| paddq %xmm5,%xmm1 |
| movdqa %xmm7,%xmm5 |
| pmuludq 32(%eax),%xmm7 |
| paddq %xmm6,%xmm0 |
| movdqa %xmm5,%xmm6 |
| pmuludq 16(%eax),%xmm5 |
| paddq %xmm7,%xmm4 |
| pshufd $16,-48(%edx),%xmm7 |
| pmuludq (%eax),%xmm6 |
| paddq %xmm5,%xmm3 |
| movdqa %xmm7,%xmm5 |
| pmuludq 64(%eax),%xmm7 |
| paddq %xmm6,%xmm2 |
| pmuludq 48(%eax),%xmm5 |
| pshufd $16,-96(%edx),%xmm6 |
| paddq %xmm7,%xmm1 |
| movdqa %xmm6,%xmm7 |
| pmuludq 16(%eax),%xmm6 |
| paddq %xmm5,%xmm0 |
| pshufd $16,-32(%edx),%xmm5 |
| pmuludq (%eax),%xmm7 |
| paddq %xmm6,%xmm4 |
| movdqa %xmm5,%xmm6 |
| pmuludq 64(%eax),%xmm5 |
| paddq %xmm7,%xmm3 |
| movdqa %xmm6,%xmm7 |
| pmuludq 48(%eax),%xmm6 |
| paddq %xmm5,%xmm2 |
| pmuludq 32(%eax),%xmm7 |
| pshufd $16,-80(%edx),%xmm5 |
| paddq %xmm6,%xmm1 |
| pshufd $16,-16(%edx),%xmm6 |
| pmuludq (%eax),%xmm5 |
| paddq %xmm7,%xmm0 |
| movdqa %xmm6,%xmm7 |
| pmuludq 64(%eax),%xmm6 |
| paddq %xmm5,%xmm4 |
| movdqa %xmm7,%xmm5 |
| pmuludq 16(%eax),%xmm7 |
| paddq %xmm6,%xmm3 |
| movdqa %xmm5,%xmm6 |
| pmuludq 32(%eax),%xmm5 |
| paddq %xmm7,%xmm0 |
| pmuludq 48(%eax),%xmm6 |
| movdqa 64(%ebx),%xmm7 |
| paddq %xmm5,%xmm1 |
| paddq %xmm6,%xmm2 |
| jz L016short_tail |
| movdqu -32(%esi),%xmm5 |
| movdqu -16(%esi),%xmm6 |
| leal 32(%esi),%esi |
| movdqa %xmm2,32(%esp) |
| movdqa %xmm3,48(%esp) |
| movdqa %xmm4,64(%esp) |
| movdqa %xmm5,%xmm2 |
| movdqa %xmm6,%xmm3 |
| psrldq $6,%xmm2 |
| psrldq $6,%xmm3 |
| movdqa %xmm5,%xmm4 |
| punpcklqdq %xmm3,%xmm2 |
| punpckhqdq %xmm6,%xmm4 |
| punpcklqdq %xmm6,%xmm5 |
| movdqa %xmm2,%xmm3 |
| psrlq $4,%xmm2 |
| psrlq $30,%xmm3 |
| movdqa %xmm5,%xmm6 |
| psrlq $40,%xmm4 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm5 |
| pand %xmm7,%xmm6 |
| pand %xmm7,%xmm2 |
| pand %xmm7,%xmm3 |
| por (%ebx),%xmm4 |
| pshufd $16,(%edx),%xmm7 |
| paddd 80(%esp),%xmm5 |
| paddd 96(%esp),%xmm6 |
| paddd 112(%esp),%xmm2 |
| paddd 128(%esp),%xmm3 |
| paddd 144(%esp),%xmm4 |
| movdqa %xmm5,(%esp) |
| pmuludq %xmm7,%xmm5 |
| movdqa %xmm6,16(%esp) |
| pmuludq %xmm7,%xmm6 |
| paddq %xmm5,%xmm0 |
| movdqa %xmm2,%xmm5 |
| pmuludq %xmm7,%xmm2 |
| paddq %xmm6,%xmm1 |
| movdqa %xmm3,%xmm6 |
| pmuludq %xmm7,%xmm3 |
| paddq 32(%esp),%xmm2 |
| movdqa %xmm5,32(%esp) |
| pshufd $16,16(%edx),%xmm5 |
| paddq 48(%esp),%xmm3 |
| movdqa %xmm6,48(%esp) |
| movdqa %xmm4,%xmm6 |
| pmuludq %xmm7,%xmm4 |
| paddq 64(%esp),%xmm4 |
| movdqa %xmm6,64(%esp) |
| movdqa %xmm5,%xmm6 |
| pmuludq 48(%esp),%xmm5 |
| movdqa %xmm6,%xmm7 |
| pmuludq 32(%esp),%xmm6 |
| paddq %xmm5,%xmm4 |
| movdqa %xmm7,%xmm5 |
| pmuludq 16(%esp),%xmm7 |
| paddq %xmm6,%xmm3 |
| pshufd $16,80(%edx),%xmm6 |
| pmuludq (%esp),%xmm5 |
| paddq %xmm7,%xmm2 |
| pmuludq 64(%esp),%xmm6 |
| pshufd $16,32(%edx),%xmm7 |
| paddq %xmm5,%xmm1 |
| movdqa %xmm7,%xmm5 |
| pmuludq 32(%esp),%xmm7 |
| paddq %xmm6,%xmm0 |
| movdqa %xmm5,%xmm6 |
| pmuludq 16(%esp),%xmm5 |
| paddq %xmm7,%xmm4 |
| pshufd $16,96(%edx),%xmm7 |
| pmuludq (%esp),%xmm6 |
| paddq %xmm5,%xmm3 |
| movdqa %xmm7,%xmm5 |
| pmuludq 64(%esp),%xmm7 |
| paddq %xmm6,%xmm2 |
| pmuludq 48(%esp),%xmm5 |
| pshufd $16,48(%edx),%xmm6 |
| paddq %xmm7,%xmm1 |
| movdqa %xmm6,%xmm7 |
| pmuludq 16(%esp),%xmm6 |
| paddq %xmm5,%xmm0 |
| pshufd $16,112(%edx),%xmm5 |
| pmuludq (%esp),%xmm7 |
| paddq %xmm6,%xmm4 |
| movdqa %xmm5,%xmm6 |
| pmuludq 64(%esp),%xmm5 |
| paddq %xmm7,%xmm3 |
| movdqa %xmm6,%xmm7 |
| pmuludq 48(%esp),%xmm6 |
| paddq %xmm5,%xmm2 |
| pmuludq 32(%esp),%xmm7 |
| pshufd $16,64(%edx),%xmm5 |
| paddq %xmm6,%xmm1 |
| pshufd $16,128(%edx),%xmm6 |
| pmuludq (%esp),%xmm5 |
| paddq %xmm7,%xmm0 |
| movdqa %xmm6,%xmm7 |
| pmuludq 64(%esp),%xmm6 |
| paddq %xmm5,%xmm4 |
| movdqa %xmm7,%xmm5 |
| pmuludq 16(%esp),%xmm7 |
| paddq %xmm6,%xmm3 |
| movdqa %xmm5,%xmm6 |
| pmuludq 32(%esp),%xmm5 |
| paddq %xmm7,%xmm0 |
| pmuludq 48(%esp),%xmm6 |
| movdqa 64(%ebx),%xmm7 |
| paddq %xmm5,%xmm1 |
| paddq %xmm6,%xmm2 |
| L016short_tail: |
| pshufd $78,%xmm4,%xmm6 |
| pshufd $78,%xmm3,%xmm5 |
| paddq %xmm6,%xmm4 |
| paddq %xmm5,%xmm3 |
| pshufd $78,%xmm0,%xmm6 |
| pshufd $78,%xmm1,%xmm5 |
| paddq %xmm6,%xmm0 |
| paddq %xmm5,%xmm1 |
| pshufd $78,%xmm2,%xmm6 |
| movdqa %xmm3,%xmm5 |
| pand %xmm7,%xmm3 |
| psrlq $26,%xmm5 |
| paddq %xmm6,%xmm2 |
| paddq %xmm4,%xmm5 |
| movdqa %xmm0,%xmm6 |
| pand %xmm7,%xmm0 |
| psrlq $26,%xmm6 |
| movdqa %xmm5,%xmm4 |
| paddq %xmm1,%xmm6 |
| psrlq $26,%xmm5 |
| pand %xmm7,%xmm4 |
| movdqa %xmm6,%xmm1 |
| psrlq $26,%xmm6 |
| paddd %xmm5,%xmm0 |
| psllq $2,%xmm5 |
| paddq %xmm2,%xmm6 |
| paddq %xmm0,%xmm5 |
| pand %xmm7,%xmm1 |
| movdqa %xmm6,%xmm2 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm2 |
| paddd %xmm3,%xmm6 |
| movdqa %xmm5,%xmm0 |
| psrlq $26,%xmm5 |
| movdqa %xmm6,%xmm3 |
| psrlq $26,%xmm6 |
| pand %xmm7,%xmm0 |
| paddd %xmm5,%xmm1 |
| pand %xmm7,%xmm3 |
| paddd %xmm6,%xmm4 |
| L012done: |
| movd %xmm0,-48(%edi) |
| movd %xmm1,-44(%edi) |
| movd %xmm2,-40(%edi) |
| movd %xmm3,-36(%edi) |
| movd %xmm4,-32(%edi) |
| movl %ebp,%esp |
| L006nodata: |
| popl %edi |
| popl %esi |
| popl %ebx |
| popl %ebp |
| ret |
| .align 5,0x90 |
| .private_extern __poly1305_emit_sse2 |
| .align 4 |
| __poly1305_emit_sse2: |
| pushl %ebp |
| pushl %ebx |
| pushl %esi |
| pushl %edi |
| movl 20(%esp),%ebp |
| cmpl $0,20(%ebp) |
| je Lenter_emit |
| movl (%ebp),%eax |
| movl 4(%ebp),%edi |
| movl 8(%ebp),%ecx |
| movl 12(%ebp),%edx |
| movl 16(%ebp),%esi |
| movl %edi,%ebx |
| shll $26,%edi |
| shrl $6,%ebx |
| addl %edi,%eax |
| movl %ecx,%edi |
| adcl $0,%ebx |
| shll $20,%edi |
| shrl $12,%ecx |
| addl %edi,%ebx |
| movl %edx,%edi |
| adcl $0,%ecx |
| shll $14,%edi |
| shrl $18,%edx |
| addl %edi,%ecx |
| movl %esi,%edi |
| adcl $0,%edx |
| shll $8,%edi |
| shrl $24,%esi |
| addl %edi,%edx |
| adcl $0,%esi |
| movl %esi,%edi |
| andl $3,%esi |
| shrl $2,%edi |
| leal (%edi,%edi,4),%ebp |
| movl 24(%esp),%edi |
| addl %ebp,%eax |
| movl 28(%esp),%ebp |
| adcl $0,%ebx |
| adcl $0,%ecx |
| adcl $0,%edx |
| adcl $0,%esi |
| movd %eax,%xmm0 |
| addl $5,%eax |
| movd %ebx,%xmm1 |
| adcl $0,%ebx |
| movd %ecx,%xmm2 |
| adcl $0,%ecx |
| movd %edx,%xmm3 |
| adcl $0,%edx |
| adcl $0,%esi |
| shrl $2,%esi |
| negl %esi |
| andl %esi,%eax |
| andl %esi,%ebx |
| andl %esi,%ecx |
| andl %esi,%edx |
| movl %eax,(%edi) |
| movd %xmm0,%eax |
| movl %ebx,4(%edi) |
| movd %xmm1,%ebx |
| movl %ecx,8(%edi) |
| movd %xmm2,%ecx |
| movl %edx,12(%edi) |
| movd %xmm3,%edx |
| notl %esi |
| andl %esi,%eax |
| andl %esi,%ebx |
| orl (%edi),%eax |
| andl %esi,%ecx |
| orl 4(%edi),%ebx |
| andl %esi,%edx |
| orl 8(%edi),%ecx |
| orl 12(%edi),%edx |
| addl (%ebp),%eax |
| adcl 4(%ebp),%ebx |
| movl %eax,(%edi) |
| adcl 8(%ebp),%ecx |
| movl %ebx,4(%edi) |
| adcl 12(%ebp),%edx |
| movl %ecx,8(%edi) |
| movl %edx,12(%edi) |
| popl %edi |
| popl %esi |
| popl %ebx |
| popl %ebp |
| ret |
| .align 6,0x90 |
| Lconst_sse2: |
| .long 16777216,0,16777216,0,16777216,0,16777216,0 |
| .long 0,0,0,0,0,0,0,0 |
| .long 67108863,0,67108863,0,67108863,0,67108863,0 |
| .long 268435455,268435452,268435452,268435452 |
| .byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54 |
| .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 |
| .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 |
| .byte 114,103,62,0 |
| .align 2,0x90 |
| .section __IMPORT,__pointers,non_lazy_symbol_pointers |
| L_GFp_ia32cap_P$non_lazy_ptr: |
| .indirect_symbol _GFp_ia32cap_P |
| .long 0 |
| #endif |