| // This file is generated from a similarly-named Perl script in the BoringSSL |
| // source tree. Do not edit by hand. |
| |
| #if defined(__has_feature) |
| #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) |
| #define OPENSSL_NO_ASM |
| #endif |
| #endif |
| |
| #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) |
| #if defined(BORINGSSL_PREFIX) |
| #include <boringssl_prefix_symbols_asm.h> |
| #endif |
| .text |
| .extern OPENSSL_ia32cap_P |
| .hidden OPENSSL_ia32cap_P |
| |
| chacha20_poly1305_constants: |
| |
| .align 64 |
| .Lchacha20_consts: |
| .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' |
| .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' |
| .Lrol8: |
| .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 |
| .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 |
| .Lrol16: |
| .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 |
| .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 |
| .Lavx2_init: |
| .long 0,0,0,0 |
| .Lsse_inc: |
| .long 1,0,0,0 |
| .Lavx2_inc: |
| .long 2,0,0,0,2,0,0,0 |
| .Lclamp: |
| .quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC |
| .quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF |
| .align 16 |
| .Land_masks: |
| .byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 |
| .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff |
| |
| .type poly_hash_ad_internal,@function |
| .align 64 |
| poly_hash_ad_internal: |
| .cfi_startproc |
| .cfi_def_cfa rsp, 8 |
| xorq %r10,%r10 |
| xorq %r11,%r11 |
| xorq %r12,%r12 |
| cmpq $13,%r8 |
| jne .Lhash_ad_loop |
| .Lpoly_fast_tls_ad: |
| |
| movq (%rcx),%r10 |
| movq 5(%rcx),%r11 |
| shrq $24,%r11 |
| movq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| .byte 0xf3,0xc3 |
| .Lhash_ad_loop: |
| |
| cmpq $16,%r8 |
| jb .Lhash_ad_tail |
| addq 0+0(%rcx),%r10 |
| adcq 8+0(%rcx),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| leaq 16(%rcx),%rcx |
| subq $16,%r8 |
| jmp .Lhash_ad_loop |
| .Lhash_ad_tail: |
| cmpq $0,%r8 |
| je .Lhash_ad_done |
| |
| xorq %r13,%r13 |
| xorq %r14,%r14 |
| xorq %r15,%r15 |
| addq %r8,%rcx |
| .Lhash_ad_tail_loop: |
| shldq $8,%r13,%r14 |
| shlq $8,%r13 |
| movzbq -1(%rcx),%r15 |
| xorq %r15,%r13 |
| decq %rcx |
| decq %r8 |
| jne .Lhash_ad_tail_loop |
| |
| addq %r13,%r10 |
| adcq %r14,%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| |
| .Lhash_ad_done: |
| .byte 0xf3,0xc3 |
| .cfi_endproc |
| .size poly_hash_ad_internal, .-poly_hash_ad_internal |
| |
| .globl chacha20_poly1305_open |
| .hidden chacha20_poly1305_open |
| .type chacha20_poly1305_open,@function |
| .align 64 |
| chacha20_poly1305_open: |
| .cfi_startproc |
| pushq %rbp |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %rbp,-16 |
| pushq %rbx |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %rbx,-24 |
| pushq %r12 |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %r12,-32 |
| pushq %r13 |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %r13,-40 |
| pushq %r14 |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %r14,-48 |
| pushq %r15 |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %r15,-56 |
| |
| |
| pushq %r9 |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %r9,-64 |
| subq $288 + 0 + 32,%rsp |
| .cfi_adjust_cfa_offset 288 + 32 |
| |
| leaq 32(%rsp),%rbp |
| andq $-32,%rbp |
| |
| movq %rdx,%rbx |
| movq %r8,0+0+32(%rbp) |
| movq %rbx,8+0+32(%rbp) |
| |
| movl OPENSSL_ia32cap_P+8(%rip),%eax |
| andl $288,%eax |
| xorl $288,%eax |
| jz chacha20_poly1305_open_avx2 |
| |
| cmpq $128,%rbx |
| jbe .Lopen_sse_128 |
| |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqu 0(%r9),%xmm4 |
| movdqu 16(%r9),%xmm8 |
| movdqu 32(%r9),%xmm12 |
| |
| movdqa %xmm12,%xmm7 |
| |
| movdqa %xmm4,0+48(%rbp) |
| movdqa %xmm8,0+64(%rbp) |
| movdqa %xmm12,0+96(%rbp) |
| movq $10,%r10 |
| .Lopen_sse_init_rounds: |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| |
| decq %r10 |
| jne .Lopen_sse_init_rounds |
| |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd 0+48(%rbp),%xmm4 |
| |
| pand .Lclamp(%rip),%xmm0 |
| movdqa %xmm0,0+0(%rbp) |
| movdqa %xmm4,0+16(%rbp) |
| |
| movq %r8,%r8 |
| call poly_hash_ad_internal |
| .Lopen_sse_main_loop: |
| cmpq $256,%rbx |
| jb .Lopen_sse_tail |
| |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqa 0+48(%rbp),%xmm4 |
| movdqa 0+64(%rbp),%xmm8 |
| movdqa %xmm0,%xmm1 |
| movdqa %xmm4,%xmm5 |
| movdqa %xmm8,%xmm9 |
| movdqa %xmm0,%xmm2 |
| movdqa %xmm4,%xmm6 |
| movdqa %xmm8,%xmm10 |
| movdqa %xmm0,%xmm3 |
| movdqa %xmm4,%xmm7 |
| movdqa %xmm8,%xmm11 |
| movdqa 0+96(%rbp),%xmm15 |
| paddd .Lsse_inc(%rip),%xmm15 |
| movdqa %xmm15,%xmm14 |
| paddd .Lsse_inc(%rip),%xmm14 |
| movdqa %xmm14,%xmm13 |
| paddd .Lsse_inc(%rip),%xmm13 |
| movdqa %xmm13,%xmm12 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,0+96(%rbp) |
| movdqa %xmm13,0+112(%rbp) |
| movdqa %xmm14,0+128(%rbp) |
| movdqa %xmm15,0+144(%rbp) |
| |
| |
| |
| movq $4,%rcx |
| movq %rsi,%r8 |
| .Lopen_sse_main_loop_rounds: |
| movdqa %xmm8,0+80(%rbp) |
| movdqa .Lrol16(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| addq 0+0(%r8),%r10 |
| adcq 8+0(%r8),%r11 |
| adcq $1,%r12 |
| |
| leaq 16(%r8),%r8 |
| pxor %xmm10,%xmm6 |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm4 |
| pxor %xmm8,%xmm4 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movdqa .Lrol8(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| pxor %xmm10,%xmm6 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm4 |
| pxor %xmm8,%xmm4 |
| movdqa 0+80(%rbp),%xmm8 |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| .byte 102,15,58,15,255,4 |
| .byte 102,69,15,58,15,219,8 |
| .byte 102,69,15,58,15,255,12 |
| .byte 102,15,58,15,246,4 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,12 |
| .byte 102,15,58,15,237,4 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,12 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa .Lrol16(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| pxor %xmm10,%xmm6 |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm4 |
| pxor %xmm8,%xmm4 |
| movdqa .Lrol8(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| pxor %xmm10,%xmm6 |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm4 |
| pxor %xmm8,%xmm4 |
| movdqa 0+80(%rbp),%xmm8 |
| .byte 102,15,58,15,255,12 |
| .byte 102,69,15,58,15,219,8 |
| .byte 102,69,15,58,15,255,4 |
| .byte 102,15,58,15,246,12 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,4 |
| .byte 102,15,58,15,237,12 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| |
| decq %rcx |
| jge .Lopen_sse_main_loop_rounds |
| addq 0+0(%r8),%r10 |
| adcq 8+0(%r8),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| leaq 16(%r8),%r8 |
| cmpq $-6,%rcx |
| jg .Lopen_sse_main_loop_rounds |
| paddd .Lchacha20_consts(%rip),%xmm3 |
| paddd 0+48(%rbp),%xmm7 |
| paddd 0+64(%rbp),%xmm11 |
| paddd 0+144(%rbp),%xmm15 |
| paddd .Lchacha20_consts(%rip),%xmm2 |
| paddd 0+48(%rbp),%xmm6 |
| paddd 0+64(%rbp),%xmm10 |
| paddd 0+128(%rbp),%xmm14 |
| paddd .Lchacha20_consts(%rip),%xmm1 |
| paddd 0+48(%rbp),%xmm5 |
| paddd 0+64(%rbp),%xmm9 |
| paddd 0+112(%rbp),%xmm13 |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd 0+48(%rbp),%xmm4 |
| paddd 0+64(%rbp),%xmm8 |
| paddd 0+96(%rbp),%xmm12 |
| movdqa %xmm12,0+80(%rbp) |
| movdqu 0 + 0(%rsi),%xmm12 |
| pxor %xmm3,%xmm12 |
| movdqu %xmm12,0 + 0(%rdi) |
| movdqu 16 + 0(%rsi),%xmm12 |
| pxor %xmm7,%xmm12 |
| movdqu %xmm12,16 + 0(%rdi) |
| movdqu 32 + 0(%rsi),%xmm12 |
| pxor %xmm11,%xmm12 |
| movdqu %xmm12,32 + 0(%rdi) |
| movdqu 48 + 0(%rsi),%xmm12 |
| pxor %xmm15,%xmm12 |
| movdqu %xmm12,48 + 0(%rdi) |
| movdqu 0 + 64(%rsi),%xmm3 |
| movdqu 16 + 64(%rsi),%xmm7 |
| movdqu 32 + 64(%rsi),%xmm11 |
| movdqu 48 + 64(%rsi),%xmm15 |
| pxor %xmm3,%xmm2 |
| pxor %xmm7,%xmm6 |
| pxor %xmm11,%xmm10 |
| pxor %xmm14,%xmm15 |
| movdqu %xmm2,0 + 64(%rdi) |
| movdqu %xmm6,16 + 64(%rdi) |
| movdqu %xmm10,32 + 64(%rdi) |
| movdqu %xmm15,48 + 64(%rdi) |
| movdqu 0 + 128(%rsi),%xmm3 |
| movdqu 16 + 128(%rsi),%xmm7 |
| movdqu 32 + 128(%rsi),%xmm11 |
| movdqu 48 + 128(%rsi),%xmm15 |
| pxor %xmm3,%xmm1 |
| pxor %xmm7,%xmm5 |
| pxor %xmm11,%xmm9 |
| pxor %xmm13,%xmm15 |
| movdqu %xmm1,0 + 128(%rdi) |
| movdqu %xmm5,16 + 128(%rdi) |
| movdqu %xmm9,32 + 128(%rdi) |
| movdqu %xmm15,48 + 128(%rdi) |
| movdqu 0 + 192(%rsi),%xmm3 |
| movdqu 16 + 192(%rsi),%xmm7 |
| movdqu 32 + 192(%rsi),%xmm11 |
| movdqu 48 + 192(%rsi),%xmm15 |
| pxor %xmm3,%xmm0 |
| pxor %xmm7,%xmm4 |
| pxor %xmm11,%xmm8 |
| pxor 0+80(%rbp),%xmm15 |
| movdqu %xmm0,0 + 192(%rdi) |
| movdqu %xmm4,16 + 192(%rdi) |
| movdqu %xmm8,32 + 192(%rdi) |
| movdqu %xmm15,48 + 192(%rdi) |
| |
| leaq 256(%rsi),%rsi |
| leaq 256(%rdi),%rdi |
| subq $256,%rbx |
| jmp .Lopen_sse_main_loop |
| .Lopen_sse_tail: |
| |
| testq %rbx,%rbx |
| jz .Lopen_sse_finalize |
| cmpq $192,%rbx |
| ja .Lopen_sse_tail_256 |
| cmpq $128,%rbx |
| ja .Lopen_sse_tail_192 |
| cmpq $64,%rbx |
| ja .Lopen_sse_tail_128 |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqa 0+48(%rbp),%xmm4 |
| movdqa 0+64(%rbp),%xmm8 |
| movdqa 0+96(%rbp),%xmm12 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,0+96(%rbp) |
| |
| xorq %r8,%r8 |
| movq %rbx,%rcx |
| cmpq $16,%rcx |
| jb .Lopen_sse_tail_64_rounds |
| .Lopen_sse_tail_64_rounds_and_x1hash: |
| addq 0+0(%rsi,%r8,1),%r10 |
| adcq 8+0(%rsi,%r8,1),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| subq $16,%rcx |
| .Lopen_sse_tail_64_rounds: |
| addq $16,%r8 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| |
| cmpq $16,%rcx |
| jae .Lopen_sse_tail_64_rounds_and_x1hash |
| cmpq $160,%r8 |
| jne .Lopen_sse_tail_64_rounds |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd 0+48(%rbp),%xmm4 |
| paddd 0+64(%rbp),%xmm8 |
| paddd 0+96(%rbp),%xmm12 |
| |
| jmp .Lopen_sse_tail_64_dec_loop |
| |
| .Lopen_sse_tail_128: |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqa 0+48(%rbp),%xmm4 |
| movdqa 0+64(%rbp),%xmm8 |
| movdqa %xmm0,%xmm1 |
| movdqa %xmm4,%xmm5 |
| movdqa %xmm8,%xmm9 |
| movdqa 0+96(%rbp),%xmm13 |
| paddd .Lsse_inc(%rip),%xmm13 |
| movdqa %xmm13,%xmm12 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,0+96(%rbp) |
| movdqa %xmm13,0+112(%rbp) |
| |
| movq %rbx,%rcx |
| andq $-16,%rcx |
| xorq %r8,%r8 |
| .Lopen_sse_tail_128_rounds_and_x1hash: |
| addq 0+0(%rsi,%r8,1),%r10 |
| adcq 8+0(%rsi,%r8,1),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| .Lopen_sse_tail_128_rounds: |
| addq $16,%r8 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm5 |
| pxor %xmm3,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm5 |
| pxor %xmm3,%xmm5 |
| .byte 102,15,58,15,237,4 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,12 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm5 |
| pxor %xmm3,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm5 |
| pxor %xmm3,%xmm5 |
| .byte 102,15,58,15,237,12 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,4 |
| |
| cmpq %rcx,%r8 |
| jb .Lopen_sse_tail_128_rounds_and_x1hash |
| cmpq $160,%r8 |
| jne .Lopen_sse_tail_128_rounds |
| paddd .Lchacha20_consts(%rip),%xmm1 |
| paddd 0+48(%rbp),%xmm5 |
| paddd 0+64(%rbp),%xmm9 |
| paddd 0+112(%rbp),%xmm13 |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd 0+48(%rbp),%xmm4 |
| paddd 0+64(%rbp),%xmm8 |
| paddd 0+96(%rbp),%xmm12 |
| movdqu 0 + 0(%rsi),%xmm3 |
| movdqu 16 + 0(%rsi),%xmm7 |
| movdqu 32 + 0(%rsi),%xmm11 |
| movdqu 48 + 0(%rsi),%xmm15 |
| pxor %xmm3,%xmm1 |
| pxor %xmm7,%xmm5 |
| pxor %xmm11,%xmm9 |
| pxor %xmm13,%xmm15 |
| movdqu %xmm1,0 + 0(%rdi) |
| movdqu %xmm5,16 + 0(%rdi) |
| movdqu %xmm9,32 + 0(%rdi) |
| movdqu %xmm15,48 + 0(%rdi) |
| |
| subq $64,%rbx |
| leaq 64(%rsi),%rsi |
| leaq 64(%rdi),%rdi |
| jmp .Lopen_sse_tail_64_dec_loop |
| |
| .Lopen_sse_tail_192: |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqa 0+48(%rbp),%xmm4 |
| movdqa 0+64(%rbp),%xmm8 |
| movdqa %xmm0,%xmm1 |
| movdqa %xmm4,%xmm5 |
| movdqa %xmm8,%xmm9 |
| movdqa %xmm0,%xmm2 |
| movdqa %xmm4,%xmm6 |
| movdqa %xmm8,%xmm10 |
| movdqa 0+96(%rbp),%xmm14 |
| paddd .Lsse_inc(%rip),%xmm14 |
| movdqa %xmm14,%xmm13 |
| paddd .Lsse_inc(%rip),%xmm13 |
| movdqa %xmm13,%xmm12 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,0+96(%rbp) |
| movdqa %xmm13,0+112(%rbp) |
| movdqa %xmm14,0+128(%rbp) |
| |
| movq %rbx,%rcx |
| movq $160,%r8 |
| cmpq $160,%rcx |
| cmovgq %r8,%rcx |
| andq $-16,%rcx |
| xorq %r8,%r8 |
| .Lopen_sse_tail_192_rounds_and_x1hash: |
| addq 0+0(%rsi,%r8,1),%r10 |
| adcq 8+0(%rsi,%r8,1),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| .Lopen_sse_tail_192_rounds: |
| addq $16,%r8 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm5 |
| pxor %xmm3,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm5 |
| pxor %xmm3,%xmm5 |
| .byte 102,15,58,15,237,4 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,12 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol16(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm6 |
| pxor %xmm3,%xmm6 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol8(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm6 |
| pxor %xmm3,%xmm6 |
| .byte 102,15,58,15,246,4 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,12 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm5 |
| pxor %xmm3,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm5 |
| pxor %xmm3,%xmm5 |
| .byte 102,15,58,15,237,12 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,4 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol16(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm6 |
| pxor %xmm3,%xmm6 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol8(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm6 |
| pxor %xmm3,%xmm6 |
| .byte 102,15,58,15,246,12 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,4 |
| |
| cmpq %rcx,%r8 |
| jb .Lopen_sse_tail_192_rounds_and_x1hash |
| cmpq $160,%r8 |
| jne .Lopen_sse_tail_192_rounds |
| cmpq $176,%rbx |
| jb .Lopen_sse_tail_192_finish |
| addq 0+160(%rsi),%r10 |
| adcq 8+160(%rsi),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| cmpq $192,%rbx |
| jb .Lopen_sse_tail_192_finish |
| addq 0+176(%rsi),%r10 |
| adcq 8+176(%rsi),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| .Lopen_sse_tail_192_finish: |
| paddd .Lchacha20_consts(%rip),%xmm2 |
| paddd 0+48(%rbp),%xmm6 |
| paddd 0+64(%rbp),%xmm10 |
| paddd 0+128(%rbp),%xmm14 |
| paddd .Lchacha20_consts(%rip),%xmm1 |
| paddd 0+48(%rbp),%xmm5 |
| paddd 0+64(%rbp),%xmm9 |
| paddd 0+112(%rbp),%xmm13 |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd 0+48(%rbp),%xmm4 |
| paddd 0+64(%rbp),%xmm8 |
| paddd 0+96(%rbp),%xmm12 |
| movdqu 0 + 0(%rsi),%xmm3 |
| movdqu 16 + 0(%rsi),%xmm7 |
| movdqu 32 + 0(%rsi),%xmm11 |
| movdqu 48 + 0(%rsi),%xmm15 |
| pxor %xmm3,%xmm2 |
| pxor %xmm7,%xmm6 |
| pxor %xmm11,%xmm10 |
| pxor %xmm14,%xmm15 |
| movdqu %xmm2,0 + 0(%rdi) |
| movdqu %xmm6,16 + 0(%rdi) |
| movdqu %xmm10,32 + 0(%rdi) |
| movdqu %xmm15,48 + 0(%rdi) |
| movdqu 0 + 64(%rsi),%xmm3 |
| movdqu 16 + 64(%rsi),%xmm7 |
| movdqu 32 + 64(%rsi),%xmm11 |
| movdqu 48 + 64(%rsi),%xmm15 |
| pxor %xmm3,%xmm1 |
| pxor %xmm7,%xmm5 |
| pxor %xmm11,%xmm9 |
| pxor %xmm13,%xmm15 |
| movdqu %xmm1,0 + 64(%rdi) |
| movdqu %xmm5,16 + 64(%rdi) |
| movdqu %xmm9,32 + 64(%rdi) |
| movdqu %xmm15,48 + 64(%rdi) |
| |
| subq $128,%rbx |
| leaq 128(%rsi),%rsi |
| leaq 128(%rdi),%rdi |
| jmp .Lopen_sse_tail_64_dec_loop |
| |
| .Lopen_sse_tail_256: |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqa 0+48(%rbp),%xmm4 |
| movdqa 0+64(%rbp),%xmm8 |
| movdqa %xmm0,%xmm1 |
| movdqa %xmm4,%xmm5 |
| movdqa %xmm8,%xmm9 |
| movdqa %xmm0,%xmm2 |
| movdqa %xmm4,%xmm6 |
| movdqa %xmm8,%xmm10 |
| movdqa %xmm0,%xmm3 |
| movdqa %xmm4,%xmm7 |
| movdqa %xmm8,%xmm11 |
| movdqa 0+96(%rbp),%xmm15 |
| paddd .Lsse_inc(%rip),%xmm15 |
| movdqa %xmm15,%xmm14 |
| paddd .Lsse_inc(%rip),%xmm14 |
| movdqa %xmm14,%xmm13 |
| paddd .Lsse_inc(%rip),%xmm13 |
| movdqa %xmm13,%xmm12 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,0+96(%rbp) |
| movdqa %xmm13,0+112(%rbp) |
| movdqa %xmm14,0+128(%rbp) |
| movdqa %xmm15,0+144(%rbp) |
| |
| xorq %r8,%r8 |
| .Lopen_sse_tail_256_rounds_and_x1hash: |
| addq 0+0(%rsi,%r8,1),%r10 |
| adcq 8+0(%rsi,%r8,1),%r11 |
| adcq $1,%r12 |
| movdqa %xmm11,0+80(%rbp) |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm11 |
| pslld $12,%xmm11 |
| psrld $20,%xmm4 |
| pxor %xmm11,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm11 |
| pslld $7,%xmm11 |
| psrld $25,%xmm4 |
| pxor %xmm11,%xmm4 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm11 |
| pslld $12,%xmm11 |
| psrld $20,%xmm5 |
| pxor %xmm11,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm11 |
| pslld $7,%xmm11 |
| psrld $25,%xmm5 |
| pxor %xmm11,%xmm5 |
| .byte 102,15,58,15,237,4 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,12 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol16(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm11 |
| pslld $12,%xmm11 |
| psrld $20,%xmm6 |
| pxor %xmm11,%xmm6 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol8(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm11 |
| pslld $7,%xmm11 |
| psrld $25,%xmm6 |
| pxor %xmm11,%xmm6 |
| .byte 102,15,58,15,246,4 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,12 |
| movdqa 0+80(%rbp),%xmm11 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movdqa %xmm9,0+80(%rbp) |
| paddd %xmm7,%xmm3 |
| pxor %xmm3,%xmm15 |
| pshufb .Lrol16(%rip),%xmm15 |
| paddd %xmm15,%xmm11 |
| pxor %xmm11,%xmm7 |
| movdqa %xmm7,%xmm9 |
| pslld $12,%xmm9 |
| psrld $20,%xmm7 |
| pxor %xmm9,%xmm7 |
| paddd %xmm7,%xmm3 |
| pxor %xmm3,%xmm15 |
| pshufb .Lrol8(%rip),%xmm15 |
| paddd %xmm15,%xmm11 |
| pxor %xmm11,%xmm7 |
| movdqa %xmm7,%xmm9 |
| pslld $7,%xmm9 |
| psrld $25,%xmm7 |
| pxor %xmm9,%xmm7 |
| .byte 102,15,58,15,255,4 |
| .byte 102,69,15,58,15,219,8 |
| .byte 102,69,15,58,15,255,12 |
| movdqa 0+80(%rbp),%xmm9 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| movdqa %xmm11,0+80(%rbp) |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm11 |
| pslld $12,%xmm11 |
| psrld $20,%xmm4 |
| pxor %xmm11,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm11 |
| pslld $7,%xmm11 |
| psrld $25,%xmm4 |
| pxor %xmm11,%xmm4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm11 |
| pslld $12,%xmm11 |
| psrld $20,%xmm5 |
| pxor %xmm11,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm11 |
| pslld $7,%xmm11 |
| psrld $25,%xmm5 |
| pxor %xmm11,%xmm5 |
| .byte 102,15,58,15,237,12 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,4 |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol16(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm11 |
| pslld $12,%xmm11 |
| psrld $20,%xmm6 |
| pxor %xmm11,%xmm6 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol8(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm11 |
| pslld $7,%xmm11 |
| psrld $25,%xmm6 |
| pxor %xmm11,%xmm6 |
| .byte 102,15,58,15,246,12 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,4 |
| movdqa 0+80(%rbp),%xmm11 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| movdqa %xmm9,0+80(%rbp) |
| paddd %xmm7,%xmm3 |
| pxor %xmm3,%xmm15 |
| pshufb .Lrol16(%rip),%xmm15 |
| paddd %xmm15,%xmm11 |
| pxor %xmm11,%xmm7 |
| movdqa %xmm7,%xmm9 |
| pslld $12,%xmm9 |
| psrld $20,%xmm7 |
| pxor %xmm9,%xmm7 |
| paddd %xmm7,%xmm3 |
| pxor %xmm3,%xmm15 |
| pshufb .Lrol8(%rip),%xmm15 |
| paddd %xmm15,%xmm11 |
| pxor %xmm11,%xmm7 |
| movdqa %xmm7,%xmm9 |
| pslld $7,%xmm9 |
| psrld $25,%xmm7 |
| pxor %xmm9,%xmm7 |
| .byte 102,15,58,15,255,12 |
| .byte 102,69,15,58,15,219,8 |
| .byte 102,69,15,58,15,255,4 |
| movdqa 0+80(%rbp),%xmm9 |
| |
| addq $16,%r8 |
| cmpq $160,%r8 |
| jb .Lopen_sse_tail_256_rounds_and_x1hash |
| |
| movq %rbx,%rcx |
| andq $-16,%rcx |
| .Lopen_sse_tail_256_hash: |
| addq 0+0(%rsi,%r8,1),%r10 |
| adcq 8+0(%rsi,%r8,1),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| addq $16,%r8 |
| cmpq %rcx,%r8 |
| jb .Lopen_sse_tail_256_hash |
| paddd .Lchacha20_consts(%rip),%xmm3 |
| paddd 0+48(%rbp),%xmm7 |
| paddd 0+64(%rbp),%xmm11 |
| paddd 0+144(%rbp),%xmm15 |
| paddd .Lchacha20_consts(%rip),%xmm2 |
| paddd 0+48(%rbp),%xmm6 |
| paddd 0+64(%rbp),%xmm10 |
| paddd 0+128(%rbp),%xmm14 |
| paddd .Lchacha20_consts(%rip),%xmm1 |
| paddd 0+48(%rbp),%xmm5 |
| paddd 0+64(%rbp),%xmm9 |
| paddd 0+112(%rbp),%xmm13 |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd 0+48(%rbp),%xmm4 |
| paddd 0+64(%rbp),%xmm8 |
| paddd 0+96(%rbp),%xmm12 |
| movdqa %xmm12,0+80(%rbp) |
| movdqu 0 + 0(%rsi),%xmm12 |
| pxor %xmm3,%xmm12 |
| movdqu %xmm12,0 + 0(%rdi) |
| movdqu 16 + 0(%rsi),%xmm12 |
| pxor %xmm7,%xmm12 |
| movdqu %xmm12,16 + 0(%rdi) |
| movdqu 32 + 0(%rsi),%xmm12 |
| pxor %xmm11,%xmm12 |
| movdqu %xmm12,32 + 0(%rdi) |
| movdqu 48 + 0(%rsi),%xmm12 |
| pxor %xmm15,%xmm12 |
| movdqu %xmm12,48 + 0(%rdi) |
| movdqu 0 + 64(%rsi),%xmm3 |
| movdqu 16 + 64(%rsi),%xmm7 |
| movdqu 32 + 64(%rsi),%xmm11 |
| movdqu 48 + 64(%rsi),%xmm15 |
| pxor %xmm3,%xmm2 |
| pxor %xmm7,%xmm6 |
| pxor %xmm11,%xmm10 |
| pxor %xmm14,%xmm15 |
| movdqu %xmm2,0 + 64(%rdi) |
| movdqu %xmm6,16 + 64(%rdi) |
| movdqu %xmm10,32 + 64(%rdi) |
| movdqu %xmm15,48 + 64(%rdi) |
| movdqu 0 + 128(%rsi),%xmm3 |
| movdqu 16 + 128(%rsi),%xmm7 |
| movdqu 32 + 128(%rsi),%xmm11 |
| movdqu 48 + 128(%rsi),%xmm15 |
| pxor %xmm3,%xmm1 |
| pxor %xmm7,%xmm5 |
| pxor %xmm11,%xmm9 |
| pxor %xmm13,%xmm15 |
| movdqu %xmm1,0 + 128(%rdi) |
| movdqu %xmm5,16 + 128(%rdi) |
| movdqu %xmm9,32 + 128(%rdi) |
| movdqu %xmm15,48 + 128(%rdi) |
| |
| movdqa 0+80(%rbp),%xmm12 |
| subq $192,%rbx |
| leaq 192(%rsi),%rsi |
| leaq 192(%rdi),%rdi |
| |
| |
| .Lopen_sse_tail_64_dec_loop: |
| cmpq $16,%rbx |
| jb .Lopen_sse_tail_16_init |
| subq $16,%rbx |
| movdqu (%rsi),%xmm3 |
| pxor %xmm3,%xmm0 |
| movdqu %xmm0,(%rdi) |
| leaq 16(%rsi),%rsi |
| leaq 16(%rdi),%rdi |
| movdqa %xmm4,%xmm0 |
| movdqa %xmm8,%xmm4 |
| movdqa %xmm12,%xmm8 |
| jmp .Lopen_sse_tail_64_dec_loop |
| .Lopen_sse_tail_16_init: |
| movdqa %xmm0,%xmm1 |
| |
| |
| .Lopen_sse_tail_16: |
| testq %rbx,%rbx |
| jz .Lopen_sse_finalize |
| |
| |
| |
| pxor %xmm3,%xmm3 |
| leaq -1(%rsi,%rbx,1),%rsi |
| movq %rbx,%r8 |
| .Lopen_sse_tail_16_compose: |
| pslldq $1,%xmm3 |
| pinsrb $0,(%rsi),%xmm3 |
| subq $1,%rsi |
| subq $1,%r8 |
| jnz .Lopen_sse_tail_16_compose |
| |
| .byte 102,73,15,126,221 |
| pextrq $1,%xmm3,%r14 |
| |
| pxor %xmm1,%xmm3 |
| |
| |
| .Lopen_sse_tail_16_extract: |
| pextrb $0,%xmm3,(%rdi) |
| psrldq $1,%xmm3 |
| addq $1,%rdi |
| subq $1,%rbx |
| jne .Lopen_sse_tail_16_extract |
| |
| addq %r13,%r10 |
| adcq %r14,%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| |
| .Lopen_sse_finalize: |
| addq 0+0+32(%rbp),%r10 |
| adcq 8+0+32(%rbp),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| |
| movq %r10,%r13 |
| movq %r11,%r14 |
| movq %r12,%r15 |
| subq $-5,%r10 |
| sbbq $-1,%r11 |
| sbbq $3,%r12 |
| cmovcq %r13,%r10 |
| cmovcq %r14,%r11 |
| cmovcq %r15,%r12 |
| |
| addq 0+0+16(%rbp),%r10 |
| adcq 8+0+16(%rbp),%r11 |
| |
| .cfi_remember_state |
| addq $288 + 0 + 32,%rsp |
| .cfi_adjust_cfa_offset -(288 + 32) |
| |
| popq %r9 |
| .cfi_adjust_cfa_offset -8 |
| .cfi_restore %r9 |
| movq %r10,(%r9) |
| movq %r11,8(%r9) |
| popq %r15 |
| .cfi_adjust_cfa_offset -8 |
| .cfi_restore %r15 |
| popq %r14 |
| .cfi_adjust_cfa_offset -8 |
| .cfi_restore %r14 |
| popq %r13 |
| .cfi_adjust_cfa_offset -8 |
| .cfi_restore %r13 |
| popq %r12 |
| .cfi_adjust_cfa_offset -8 |
| .cfi_restore %r12 |
| popq %rbx |
| .cfi_adjust_cfa_offset -8 |
| .cfi_restore %rbx |
| popq %rbp |
| .cfi_adjust_cfa_offset -8 |
| .cfi_restore %rbp |
| .byte 0xf3,0xc3 |
| |
| .Lopen_sse_128: |
| .cfi_restore_state |
| movdqu .Lchacha20_consts(%rip),%xmm0 |
| movdqa %xmm0,%xmm1 |
| movdqa %xmm0,%xmm2 |
| movdqu 0(%r9),%xmm4 |
| movdqa %xmm4,%xmm5 |
| movdqa %xmm4,%xmm6 |
| movdqu 16(%r9),%xmm8 |
| movdqa %xmm8,%xmm9 |
| movdqa %xmm8,%xmm10 |
| movdqu 32(%r9),%xmm12 |
| movdqa %xmm12,%xmm13 |
| paddd .Lsse_inc(%rip),%xmm13 |
| movdqa %xmm13,%xmm14 |
| paddd .Lsse_inc(%rip),%xmm14 |
| movdqa %xmm4,%xmm7 |
| movdqa %xmm8,%xmm11 |
| movdqa %xmm13,%xmm15 |
| movq $10,%r10 |
| |
| .Lopen_sse_128_rounds: |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm5 |
| pxor %xmm3,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm5 |
| pxor %xmm3,%xmm5 |
| .byte 102,15,58,15,237,4 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,12 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol16(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm6 |
| pxor %xmm3,%xmm6 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol8(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm6 |
| pxor %xmm3,%xmm6 |
| .byte 102,15,58,15,246,4 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,12 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm5 |
| pxor %xmm3,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm5 |
| pxor %xmm3,%xmm5 |
| .byte 102,15,58,15,237,12 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,4 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol16(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm6 |
| pxor %xmm3,%xmm6 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol8(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm6 |
| pxor %xmm3,%xmm6 |
| .byte 102,15,58,15,246,12 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,4 |
| |
| decq %r10 |
| jnz .Lopen_sse_128_rounds |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd .Lchacha20_consts(%rip),%xmm1 |
| paddd .Lchacha20_consts(%rip),%xmm2 |
| paddd %xmm7,%xmm4 |
| paddd %xmm7,%xmm5 |
| paddd %xmm7,%xmm6 |
| paddd %xmm11,%xmm9 |
| paddd %xmm11,%xmm10 |
| paddd %xmm15,%xmm13 |
| paddd .Lsse_inc(%rip),%xmm15 |
| paddd %xmm15,%xmm14 |
| |
| pand .Lclamp(%rip),%xmm0 |
| movdqa %xmm0,0+0(%rbp) |
| movdqa %xmm4,0+16(%rbp) |
| |
| movq %r8,%r8 |
| call poly_hash_ad_internal |
| .Lopen_sse_128_xor_hash: |
| cmpq $16,%rbx |
| jb .Lopen_sse_tail_16 |
| subq $16,%rbx |
| addq 0+0(%rsi),%r10 |
| adcq 8+0(%rsi),%r11 |
| adcq $1,%r12 |
| |
| |
| movdqu 0(%rsi),%xmm3 |
| pxor %xmm3,%xmm1 |
| movdqu %xmm1,0(%rdi) |
| leaq 16(%rsi),%rsi |
| leaq 16(%rdi),%rdi |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| |
| movdqa %xmm5,%xmm1 |
| movdqa %xmm9,%xmm5 |
| movdqa %xmm13,%xmm9 |
| movdqa %xmm2,%xmm13 |
| movdqa %xmm6,%xmm2 |
| movdqa %xmm10,%xmm6 |
| movdqa %xmm14,%xmm10 |
| jmp .Lopen_sse_128_xor_hash |
| .size chacha20_poly1305_open, .-chacha20_poly1305_open |
| .cfi_endproc |
| |
| |
| |
| |
| |
| |
| |
| .globl chacha20_poly1305_seal |
| .hidden chacha20_poly1305_seal |
| .type chacha20_poly1305_seal,@function |
| .align 64 |
| chacha20_poly1305_seal: |
| .cfi_startproc |
| pushq %rbp |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %rbp,-16 |
| pushq %rbx |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %rbx,-24 |
| pushq %r12 |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %r12,-32 |
| pushq %r13 |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %r13,-40 |
| pushq %r14 |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %r14,-48 |
| pushq %r15 |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %r15,-56 |
| |
| |
| pushq %r9 |
| .cfi_adjust_cfa_offset 8 |
| .cfi_offset %r9,-64 |
| subq $288 + 0 + 32,%rsp |
| .cfi_adjust_cfa_offset 288 + 32 |
| leaq 32(%rsp),%rbp |
| andq $-32,%rbp |
| |
| movq 56(%r9),%rbx |
| addq %rdx,%rbx |
| movq %r8,0+0+32(%rbp) |
| movq %rbx,8+0+32(%rbp) |
| movq %rdx,%rbx |
| |
| movl OPENSSL_ia32cap_P+8(%rip),%eax |
| andl $288,%eax |
| xorl $288,%eax |
| jz chacha20_poly1305_seal_avx2 |
| |
| cmpq $128,%rbx |
| jbe .Lseal_sse_128 |
| |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqu 0(%r9),%xmm4 |
| movdqu 16(%r9),%xmm8 |
| movdqu 32(%r9),%xmm12 |
| |
| movdqa %xmm0,%xmm1 |
| movdqa %xmm0,%xmm2 |
| movdqa %xmm0,%xmm3 |
| movdqa %xmm4,%xmm5 |
| movdqa %xmm4,%xmm6 |
| movdqa %xmm4,%xmm7 |
| movdqa %xmm8,%xmm9 |
| movdqa %xmm8,%xmm10 |
| movdqa %xmm8,%xmm11 |
| movdqa %xmm12,%xmm15 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,%xmm14 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,%xmm13 |
| paddd .Lsse_inc(%rip),%xmm12 |
| |
| movdqa %xmm4,0+48(%rbp) |
| movdqa %xmm8,0+64(%rbp) |
| movdqa %xmm12,0+96(%rbp) |
| movdqa %xmm13,0+112(%rbp) |
| movdqa %xmm14,0+128(%rbp) |
| movdqa %xmm15,0+144(%rbp) |
| movq $10,%r10 |
| .Lseal_sse_init_rounds: |
| movdqa %xmm8,0+80(%rbp) |
| movdqa .Lrol16(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| pxor %xmm10,%xmm6 |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm4 |
| pxor %xmm8,%xmm4 |
| movdqa .Lrol8(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| pxor %xmm10,%xmm6 |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm4 |
| pxor %xmm8,%xmm4 |
| movdqa 0+80(%rbp),%xmm8 |
| .byte 102,15,58,15,255,4 |
| .byte 102,69,15,58,15,219,8 |
| .byte 102,69,15,58,15,255,12 |
| .byte 102,15,58,15,246,4 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,12 |
| .byte 102,15,58,15,237,4 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,12 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa .Lrol16(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| pxor %xmm10,%xmm6 |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm4 |
| pxor %xmm8,%xmm4 |
| movdqa .Lrol8(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| pxor %xmm10,%xmm6 |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm4 |
| pxor %xmm8,%xmm4 |
| movdqa 0+80(%rbp),%xmm8 |
| .byte 102,15,58,15,255,12 |
| .byte 102,69,15,58,15,219,8 |
| .byte 102,69,15,58,15,255,4 |
| .byte 102,15,58,15,246,12 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,4 |
| .byte 102,15,58,15,237,12 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| |
| decq %r10 |
| jnz .Lseal_sse_init_rounds |
| paddd .Lchacha20_consts(%rip),%xmm3 |
| paddd 0+48(%rbp),%xmm7 |
| paddd 0+64(%rbp),%xmm11 |
| paddd 0+144(%rbp),%xmm15 |
| paddd .Lchacha20_consts(%rip),%xmm2 |
| paddd 0+48(%rbp),%xmm6 |
| paddd 0+64(%rbp),%xmm10 |
| paddd 0+128(%rbp),%xmm14 |
| paddd .Lchacha20_consts(%rip),%xmm1 |
| paddd 0+48(%rbp),%xmm5 |
| paddd 0+64(%rbp),%xmm9 |
| paddd 0+112(%rbp),%xmm13 |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd 0+48(%rbp),%xmm4 |
| paddd 0+64(%rbp),%xmm8 |
| paddd 0+96(%rbp),%xmm12 |
| |
| |
| pand .Lclamp(%rip),%xmm3 |
| movdqa %xmm3,0+0(%rbp) |
| movdqa %xmm7,0+16(%rbp) |
| |
| movq %r8,%r8 |
| call poly_hash_ad_internal |
| movdqu 0 + 0(%rsi),%xmm3 |
| movdqu 16 + 0(%rsi),%xmm7 |
| movdqu 32 + 0(%rsi),%xmm11 |
| movdqu 48 + 0(%rsi),%xmm15 |
| pxor %xmm3,%xmm2 |
| pxor %xmm7,%xmm6 |
| pxor %xmm11,%xmm10 |
| pxor %xmm14,%xmm15 |
| movdqu %xmm2,0 + 0(%rdi) |
| movdqu %xmm6,16 + 0(%rdi) |
| movdqu %xmm10,32 + 0(%rdi) |
| movdqu %xmm15,48 + 0(%rdi) |
| movdqu 0 + 64(%rsi),%xmm3 |
| movdqu 16 + 64(%rsi),%xmm7 |
| movdqu 32 + 64(%rsi),%xmm11 |
| movdqu 48 + 64(%rsi),%xmm15 |
| pxor %xmm3,%xmm1 |
| pxor %xmm7,%xmm5 |
| pxor %xmm11,%xmm9 |
| pxor %xmm13,%xmm15 |
| movdqu %xmm1,0 + 64(%rdi) |
| movdqu %xmm5,16 + 64(%rdi) |
| movdqu %xmm9,32 + 64(%rdi) |
| movdqu %xmm15,48 + 64(%rdi) |
| |
| cmpq $192,%rbx |
| ja .Lseal_sse_main_init |
| movq $128,%rcx |
| subq $128,%rbx |
| leaq 128(%rsi),%rsi |
| jmp .Lseal_sse_128_tail_hash |
| .Lseal_sse_main_init: |
| movdqu 0 + 128(%rsi),%xmm3 |
| movdqu 16 + 128(%rsi),%xmm7 |
| movdqu 32 + 128(%rsi),%xmm11 |
| movdqu 48 + 128(%rsi),%xmm15 |
| pxor %xmm3,%xmm0 |
| pxor %xmm7,%xmm4 |
| pxor %xmm11,%xmm8 |
| pxor %xmm12,%xmm15 |
| movdqu %xmm0,0 + 128(%rdi) |
| movdqu %xmm4,16 + 128(%rdi) |
| movdqu %xmm8,32 + 128(%rdi) |
| movdqu %xmm15,48 + 128(%rdi) |
| |
| movq $192,%rcx |
| subq $192,%rbx |
| leaq 192(%rsi),%rsi |
| movq $2,%rcx |
| movq $8,%r8 |
| cmpq $64,%rbx |
| jbe .Lseal_sse_tail_64 |
| cmpq $128,%rbx |
| jbe .Lseal_sse_tail_128 |
| cmpq $192,%rbx |
| jbe .Lseal_sse_tail_192 |
| |
| .Lseal_sse_main_loop: |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqa 0+48(%rbp),%xmm4 |
| movdqa 0+64(%rbp),%xmm8 |
| movdqa %xmm0,%xmm1 |
| movdqa %xmm4,%xmm5 |
| movdqa %xmm8,%xmm9 |
| movdqa %xmm0,%xmm2 |
| movdqa %xmm4,%xmm6 |
| movdqa %xmm8,%xmm10 |
| movdqa %xmm0,%xmm3 |
| movdqa %xmm4,%xmm7 |
| movdqa %xmm8,%xmm11 |
| movdqa 0+96(%rbp),%xmm15 |
| paddd .Lsse_inc(%rip),%xmm15 |
| movdqa %xmm15,%xmm14 |
| paddd .Lsse_inc(%rip),%xmm14 |
| movdqa %xmm14,%xmm13 |
| paddd .Lsse_inc(%rip),%xmm13 |
| movdqa %xmm13,%xmm12 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,0+96(%rbp) |
| movdqa %xmm13,0+112(%rbp) |
| movdqa %xmm14,0+128(%rbp) |
| movdqa %xmm15,0+144(%rbp) |
| |
| .align 32 |
| .Lseal_sse_main_rounds: |
| movdqa %xmm8,0+80(%rbp) |
| movdqa .Lrol16(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| addq 0+0(%rdi),%r10 |
| adcq 8+0(%rdi),%r11 |
| adcq $1,%r12 |
| pxor %xmm10,%xmm6 |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm4 |
| pxor %xmm8,%xmm4 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movdqa .Lrol8(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| pxor %xmm10,%xmm6 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm4 |
| pxor %xmm8,%xmm4 |
| movdqa 0+80(%rbp),%xmm8 |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| .byte 102,15,58,15,255,4 |
| .byte 102,69,15,58,15,219,8 |
| .byte 102,69,15,58,15,255,12 |
| .byte 102,15,58,15,246,4 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,12 |
| .byte 102,15,58,15,237,4 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,12 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa .Lrol16(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| pxor %xmm10,%xmm6 |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $20,%xmm8 |
| pslld $32-20,%xmm4 |
| pxor %xmm8,%xmm4 |
| movdqa .Lrol8(%rip),%xmm8 |
| paddd %xmm7,%xmm3 |
| paddd %xmm6,%xmm2 |
| paddd %xmm5,%xmm1 |
| paddd %xmm4,%xmm0 |
| pxor %xmm3,%xmm15 |
| pxor %xmm2,%xmm14 |
| pxor %xmm1,%xmm13 |
| pxor %xmm0,%xmm12 |
| .byte 102,69,15,56,0,248 |
| .byte 102,69,15,56,0,240 |
| .byte 102,69,15,56,0,232 |
| .byte 102,69,15,56,0,224 |
| movdqa 0+80(%rbp),%xmm8 |
| paddd %xmm15,%xmm11 |
| paddd %xmm14,%xmm10 |
| paddd %xmm13,%xmm9 |
| paddd %xmm12,%xmm8 |
| pxor %xmm11,%xmm7 |
| pxor %xmm10,%xmm6 |
| pxor %xmm9,%xmm5 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm8,0+80(%rbp) |
| movdqa %xmm7,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm7 |
| pxor %xmm8,%xmm7 |
| movdqa %xmm6,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm6 |
| pxor %xmm8,%xmm6 |
| movdqa %xmm5,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm5 |
| pxor %xmm8,%xmm5 |
| movdqa %xmm4,%xmm8 |
| psrld $25,%xmm8 |
| pslld $32-25,%xmm4 |
| pxor %xmm8,%xmm4 |
| movdqa 0+80(%rbp),%xmm8 |
| .byte 102,15,58,15,255,12 |
| .byte 102,69,15,58,15,219,8 |
| .byte 102,69,15,58,15,255,4 |
| .byte 102,15,58,15,246,12 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,4 |
| .byte 102,15,58,15,237,12 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| |
| leaq 16(%rdi),%rdi |
| decq %r8 |
| jge .Lseal_sse_main_rounds |
| addq 0+0(%rdi),%r10 |
| adcq 8+0(%rdi),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| leaq 16(%rdi),%rdi |
| decq %rcx |
| jg .Lseal_sse_main_rounds |
| paddd .Lchacha20_consts(%rip),%xmm3 |
| paddd 0+48(%rbp),%xmm7 |
| paddd 0+64(%rbp),%xmm11 |
| paddd 0+144(%rbp),%xmm15 |
| paddd .Lchacha20_consts(%rip),%xmm2 |
| paddd 0+48(%rbp),%xmm6 |
| paddd 0+64(%rbp),%xmm10 |
| paddd 0+128(%rbp),%xmm14 |
| paddd .Lchacha20_consts(%rip),%xmm1 |
| paddd 0+48(%rbp),%xmm5 |
| paddd 0+64(%rbp),%xmm9 |
| paddd 0+112(%rbp),%xmm13 |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd 0+48(%rbp),%xmm4 |
| paddd 0+64(%rbp),%xmm8 |
| paddd 0+96(%rbp),%xmm12 |
| |
| movdqa %xmm14,0+80(%rbp) |
| movdqa %xmm14,0+80(%rbp) |
| movdqu 0 + 0(%rsi),%xmm14 |
| pxor %xmm3,%xmm14 |
| movdqu %xmm14,0 + 0(%rdi) |
| movdqu 16 + 0(%rsi),%xmm14 |
| pxor %xmm7,%xmm14 |
| movdqu %xmm14,16 + 0(%rdi) |
| movdqu 32 + 0(%rsi),%xmm14 |
| pxor %xmm11,%xmm14 |
| movdqu %xmm14,32 + 0(%rdi) |
| movdqu 48 + 0(%rsi),%xmm14 |
| pxor %xmm15,%xmm14 |
| movdqu %xmm14,48 + 0(%rdi) |
| |
| movdqa 0+80(%rbp),%xmm14 |
| movdqu 0 + 64(%rsi),%xmm3 |
| movdqu 16 + 64(%rsi),%xmm7 |
| movdqu 32 + 64(%rsi),%xmm11 |
| movdqu 48 + 64(%rsi),%xmm15 |
| pxor %xmm3,%xmm2 |
| pxor %xmm7,%xmm6 |
| pxor %xmm11,%xmm10 |
| pxor %xmm14,%xmm15 |
| movdqu %xmm2,0 + 64(%rdi) |
| movdqu %xmm6,16 + 64(%rdi) |
| movdqu %xmm10,32 + 64(%rdi) |
| movdqu %xmm15,48 + 64(%rdi) |
| movdqu 0 + 128(%rsi),%xmm3 |
| movdqu 16 + 128(%rsi),%xmm7 |
| movdqu 32 + 128(%rsi),%xmm11 |
| movdqu 48 + 128(%rsi),%xmm15 |
| pxor %xmm3,%xmm1 |
| pxor %xmm7,%xmm5 |
| pxor %xmm11,%xmm9 |
| pxor %xmm13,%xmm15 |
| movdqu %xmm1,0 + 128(%rdi) |
| movdqu %xmm5,16 + 128(%rdi) |
| movdqu %xmm9,32 + 128(%rdi) |
| movdqu %xmm15,48 + 128(%rdi) |
| |
| cmpq $256,%rbx |
| ja .Lseal_sse_main_loop_xor |
| |
| movq $192,%rcx |
| subq $192,%rbx |
| leaq 192(%rsi),%rsi |
| jmp .Lseal_sse_128_tail_hash |
| .Lseal_sse_main_loop_xor: |
| movdqu 0 + 192(%rsi),%xmm3 |
| movdqu 16 + 192(%rsi),%xmm7 |
| movdqu 32 + 192(%rsi),%xmm11 |
| movdqu 48 + 192(%rsi),%xmm15 |
| pxor %xmm3,%xmm0 |
| pxor %xmm7,%xmm4 |
| pxor %xmm11,%xmm8 |
| pxor %xmm12,%xmm15 |
| movdqu %xmm0,0 + 192(%rdi) |
| movdqu %xmm4,16 + 192(%rdi) |
| movdqu %xmm8,32 + 192(%rdi) |
| movdqu %xmm15,48 + 192(%rdi) |
| |
| leaq 256(%rsi),%rsi |
| subq $256,%rbx |
| movq $6,%rcx |
| movq $4,%r8 |
| cmpq $192,%rbx |
| jg .Lseal_sse_main_loop |
| movq %rbx,%rcx |
| testq %rbx,%rbx |
| je .Lseal_sse_128_tail_hash |
| movq $6,%rcx |
| cmpq $128,%rbx |
| ja .Lseal_sse_tail_192 |
| cmpq $64,%rbx |
| ja .Lseal_sse_tail_128 |
| |
| .Lseal_sse_tail_64: |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqa 0+48(%rbp),%xmm4 |
| movdqa 0+64(%rbp),%xmm8 |
| movdqa 0+96(%rbp),%xmm12 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,0+96(%rbp) |
| |
| .Lseal_sse_tail_64_rounds_and_x2hash: |
| addq 0+0(%rdi),%r10 |
| adcq 8+0(%rdi),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| leaq 16(%rdi),%rdi |
| .Lseal_sse_tail_64_rounds_and_x1hash: |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| addq 0+0(%rdi),%r10 |
| adcq 8+0(%rdi),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| leaq 16(%rdi),%rdi |
| decq %rcx |
| jg .Lseal_sse_tail_64_rounds_and_x2hash |
| decq %r8 |
| jge .Lseal_sse_tail_64_rounds_and_x1hash |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd 0+48(%rbp),%xmm4 |
| paddd 0+64(%rbp),%xmm8 |
| paddd 0+96(%rbp),%xmm12 |
| |
| jmp .Lseal_sse_128_tail_xor |
| |
| .Lseal_sse_tail_128: |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqa 0+48(%rbp),%xmm4 |
| movdqa 0+64(%rbp),%xmm8 |
| movdqa %xmm0,%xmm1 |
| movdqa %xmm4,%xmm5 |
| movdqa %xmm8,%xmm9 |
| movdqa 0+96(%rbp),%xmm13 |
| paddd .Lsse_inc(%rip),%xmm13 |
| movdqa %xmm13,%xmm12 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,0+96(%rbp) |
| movdqa %xmm13,0+112(%rbp) |
| |
| .Lseal_sse_tail_128_rounds_and_x2hash: |
| addq 0+0(%rdi),%r10 |
| adcq 8+0(%rdi),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| leaq 16(%rdi),%rdi |
| .Lseal_sse_tail_128_rounds_and_x1hash: |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm5 |
| pxor %xmm3,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm5 |
| pxor %xmm3,%xmm5 |
| .byte 102,15,58,15,237,4 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,12 |
| addq 0+0(%rdi),%r10 |
| adcq 8+0(%rdi),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,12 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,4 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm5 |
| pxor %xmm3,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm5 |
| pxor %xmm3,%xmm5 |
| .byte 102,15,58,15,237,12 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,4 |
| |
| leaq 16(%rdi),%rdi |
| decq %rcx |
| jg .Lseal_sse_tail_128_rounds_and_x2hash |
| decq %r8 |
| jge .Lseal_sse_tail_128_rounds_and_x1hash |
| paddd .Lchacha20_consts(%rip),%xmm1 |
| paddd 0+48(%rbp),%xmm5 |
| paddd 0+64(%rbp),%xmm9 |
| paddd 0+112(%rbp),%xmm13 |
| paddd .Lchacha20_consts(%rip),%xmm0 |
| paddd 0+48(%rbp),%xmm4 |
| paddd 0+64(%rbp),%xmm8 |
| paddd 0+96(%rbp),%xmm12 |
| movdqu 0 + 0(%rsi),%xmm3 |
| movdqu 16 + 0(%rsi),%xmm7 |
| movdqu 32 + 0(%rsi),%xmm11 |
| movdqu 48 + 0(%rsi),%xmm15 |
| pxor %xmm3,%xmm1 |
| pxor %xmm7,%xmm5 |
| pxor %xmm11,%xmm9 |
| pxor %xmm13,%xmm15 |
| movdqu %xmm1,0 + 0(%rdi) |
| movdqu %xmm5,16 + 0(%rdi) |
| movdqu %xmm9,32 + 0(%rdi) |
| movdqu %xmm15,48 + 0(%rdi) |
| |
| movq $64,%rcx |
| subq $64,%rbx |
| leaq 64(%rsi),%rsi |
| jmp .Lseal_sse_128_tail_hash |
| |
| .Lseal_sse_tail_192: |
| movdqa .Lchacha20_consts(%rip),%xmm0 |
| movdqa 0+48(%rbp),%xmm4 |
| movdqa 0+64(%rbp),%xmm8 |
| movdqa %xmm0,%xmm1 |
| movdqa %xmm4,%xmm5 |
| movdqa %xmm8,%xmm9 |
| movdqa %xmm0,%xmm2 |
| movdqa %xmm4,%xmm6 |
| movdqa %xmm8,%xmm10 |
| movdqa 0+96(%rbp),%xmm14 |
| paddd .Lsse_inc(%rip),%xmm14 |
| movdqa %xmm14,%xmm13 |
| paddd .Lsse_inc(%rip),%xmm13 |
| movdqa %xmm13,%xmm12 |
| paddd .Lsse_inc(%rip),%xmm12 |
| movdqa %xmm12,0+96(%rbp) |
| movdqa %xmm13,0+112(%rbp) |
| movdqa %xmm14,0+128(%rbp) |
| |
| .Lseal_sse_tail_192_rounds_and_x2hash: |
| addq 0+0(%rdi),%r10 |
| adcq 8+0(%rdi),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
| imulq %r12,%r15 |
| addq %rax,%r14 |
| adcq %rdx,%r15 |
| movq 8+0+0(%rbp),%rax |
| movq %rax,%r9 |
| mulq %r10 |
| addq %rax,%r14 |
| adcq $0,%rdx |
| movq %rdx,%r10 |
| movq 8+0+0(%rbp),%rax |
| mulq %r11 |
| addq %rax,%r15 |
| adcq $0,%rdx |
| imulq %r12,%r9 |
| addq %r10,%r15 |
| adcq %rdx,%r9 |
| movq %r13,%r10 |
| movq %r14,%r11 |
| movq %r15,%r12 |
| andq $3,%r12 |
| movq %r15,%r13 |
| andq $-4,%r13 |
| movq %r9,%r14 |
| shrdq $2,%r9,%r15 |
| shrq $2,%r9 |
| addq %r13,%r15 |
| adcq %r14,%r9 |
| addq %r15,%r10 |
| adcq %r9,%r11 |
| adcq $0,%r12 |
| |
| leaq 16(%rdi),%rdi |
| .Lseal_sse_tail_192_rounds_and_x1hash: |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol16(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm4 |
| pxor %xmm3,%xmm4 |
| paddd %xmm4,%xmm0 |
| pxor %xmm0,%xmm12 |
| pshufb .Lrol8(%rip),%xmm12 |
| paddd %xmm12,%xmm8 |
| pxor %xmm8,%xmm4 |
| movdqa %xmm4,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm4 |
| pxor %xmm3,%xmm4 |
| .byte 102,15,58,15,228,4 |
| .byte 102,69,15,58,15,192,8 |
| .byte 102,69,15,58,15,228,12 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol16(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm5 |
| pxor %xmm3,%xmm5 |
| paddd %xmm5,%xmm1 |
| pxor %xmm1,%xmm13 |
| pshufb .Lrol8(%rip),%xmm13 |
| paddd %xmm13,%xmm9 |
| pxor %xmm9,%xmm5 |
| movdqa %xmm5,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm5 |
| pxor %xmm3,%xmm5 |
| .byte 102,15,58,15,237,4 |
| .byte 102,69,15,58,15,201,8 |
| .byte 102,69,15,58,15,237,12 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol16(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm3 |
| pslld $12,%xmm3 |
| psrld $20,%xmm6 |
| pxor %xmm3,%xmm6 |
| paddd %xmm6,%xmm2 |
| pxor %xmm2,%xmm14 |
| pshufb .Lrol8(%rip),%xmm14 |
| paddd %xmm14,%xmm10 |
| pxor %xmm10,%xmm6 |
| movdqa %xmm6,%xmm3 |
| pslld $7,%xmm3 |
| psrld $25,%xmm6 |
| pxor %xmm3,%xmm6 |
| .byte 102,15,58,15,246,4 |
| .byte 102,69,15,58,15,210,8 |
| .byte 102,69,15,58,15,246,12 |
| addq 0+0(%rdi),%r10 |
| adcq 8+0(%rdi),%r11 |
| adcq $1,%r12 |
| movq 0+0+0(%rbp),%rax |
| movq %rax,%r15 |
| mulq %r10 |
| movq %rax,%r13 |
| movq %rdx,%r14 |
| movq 0+0+0(%rbp),%rax |
| mulq %r11 |
|