| //+build !noasm !appengine |
| // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT |
| |
| DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080 |
| DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080 |
| DATA LCDATA1<>+0x010(SB)/8, $0x7f7f7f7f7f7f7f7f |
| DATA LCDATA1<>+0x018(SB)/8, $0x7f7f7f7f7f7f7f7f |
| GLOBL LCDATA1<>(SB), 8, $32 |
| |
| TEXT ·_int8_max_min_sse4(SB), $0-32 |
| |
| MOVQ values+0(FP), DI |
| MOVQ length+8(FP), SI |
| MOVQ minout+16(FP), DX |
| MOVQ maxout+24(FP), CX |
| LEAQ LCDATA1<>(SB), BP |
| |
| WORD $0xf685 // test esi, esi |
| JLE LBB0_1 |
| WORD $0x8941; BYTE $0xf1 // mov r9d, esi |
| WORD $0xfe83; BYTE $0x1f // cmp esi, 31 |
| JA LBB0_4 |
| WORD $0xb041; BYTE $0x80 // mov r8b, -128 |
| WORD $0xb640; BYTE $0x7f // mov sil, 127 |
| WORD $0x3145; BYTE $0xdb // xor r11d, r11d |
| JMP LBB0_11 |
| |
| LBB0_1: |
| WORD $0xb640; BYTE $0x7f // mov sil, 127 |
| WORD $0xb041; BYTE $0x80 // mov r8b, -128 |
| JMP LBB0_12 |
| |
| LBB0_4: |
| WORD $0x8945; BYTE $0xcb // mov r11d, r9d |
| LONG $0xe0e38341 // and r11d, -32 |
| LONG $0xe0438d49 // lea rax, [r11 - 32] |
| WORD $0x8949; BYTE $0xc0 // mov r8, rax |
| LONG $0x05e8c149 // shr r8, 5 |
| LONG $0x01c08349 // add r8, 1 |
| WORD $0x8548; BYTE $0xc0 // test rax, rax |
| JE LBB0_5 |
| WORD $0x894d; BYTE $0xc2 // mov r10, r8 |
| LONG $0xfee28349 // and r10, -2 |
| WORD $0xf749; BYTE $0xda // neg r10 |
| LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */ |
| LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */ |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd06f0f66 // movdqa xmm2, xmm0 |
| LONG $0xd96f0f66 // movdqa xmm3, xmm1 |
| |
| LBB0_7: |
| LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax] |
| LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16] |
| LONG $0x746f0ff3; WORD $0x2007 // movdqu xmm6, oword [rdi + rax + 32] |
| LONG $0x7c6f0ff3; WORD $0x3007 // movdqu xmm7, oword [rdi + rax + 48] |
| LONG $0x38380f66; BYTE $0xc4 // pminsb xmm0, xmm4 |
| LONG $0x38380f66; BYTE $0xd5 // pminsb xmm2, xmm5 |
| LONG $0x3c380f66; BYTE $0xcc // pmaxsb xmm1, xmm4 |
| LONG $0x3c380f66; BYTE $0xdd // pmaxsb xmm3, xmm5 |
| LONG $0x38380f66; BYTE $0xc6 // pminsb xmm0, xmm6 |
| LONG $0x38380f66; BYTE $0xd7 // pminsb xmm2, xmm7 |
| LONG $0x3c380f66; BYTE $0xce // pmaxsb xmm1, xmm6 |
| LONG $0x3c380f66; BYTE $0xdf // pmaxsb xmm3, xmm7 |
| LONG $0x40c08348 // add rax, 64 |
| LONG $0x02c28349 // add r10, 2 |
| JNE LBB0_7 |
| LONG $0x01c0f641 // test r8b, 1 |
| JE LBB0_10 |
| |
| LBB0_9: |
| LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax] |
| LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16] |
| LONG $0x3c380f66; BYTE $0xdd // pmaxsb xmm3, xmm5 |
| LONG $0x3c380f66; BYTE $0xcc // pmaxsb xmm1, xmm4 |
| LONG $0x38380f66; BYTE $0xd5 // pminsb xmm2, xmm5 |
| LONG $0x38380f66; BYTE $0xc4 // pminsb xmm0, xmm4 |
| |
| LBB0_10: |
| LONG $0x38380f66; BYTE $0xc2 // pminsb xmm0, xmm2 |
| LONG $0x3c380f66; BYTE $0xcb // pmaxsb xmm1, xmm3 |
| LONG $0x4def0f66; BYTE $0x10 // pxor xmm1, oword 16[rbp] /* [rip + .LCPI0_1] */ |
| LONG $0xd16f0f66 // movdqa xmm2, xmm1 |
| LONG $0xd2710f66; BYTE $0x08 // psrlw xmm2, 8 |
| LONG $0xd1da0f66 // pminub xmm2, xmm1 |
| LONG $0x41380f66; BYTE $0xca // phminposuw xmm1, xmm2 |
| LONG $0x7e0f4166; BYTE $0xc8 // movd r8d, xmm1 |
| LONG $0x7ff08041 // xor r8b, 127 |
| LONG $0x45ef0f66; BYTE $0x00 // pxor xmm0, oword 0[rbp] /* [rip + .LCPI0_0] */ |
| LONG $0xc86f0f66 // movdqa xmm1, xmm0 |
| LONG $0xd1710f66; BYTE $0x08 // psrlw xmm1, 8 |
| LONG $0xc8da0f66 // pminub xmm1, xmm0 |
| LONG $0x41380f66; BYTE $0xc1 // phminposuw xmm0, xmm1 |
| LONG $0xc67e0f66 // movd esi, xmm0 |
| LONG $0x80f68040 // xor sil, -128 |
| WORD $0x394d; BYTE $0xcb // cmp r11, r9 |
| JE LBB0_12 |
| |
| LBB0_11: |
| LONG $0x04b60f42; BYTE $0x1f // movzx eax, byte [rdi + r11] |
| WORD $0x3840; BYTE $0xc6 // cmp sil, al |
| LONG $0xf6b60f40 // movzx esi, sil |
| WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax |
| WORD $0x3841; BYTE $0xc0 // cmp r8b, al |
| LONG $0xc0b60f45 // movzx r8d, r8b |
| LONG $0xc04c0f44 // cmovl r8d, eax |
| LONG $0x01c38349 // add r11, 1 |
| WORD $0x394d; BYTE $0xd9 // cmp r9, r11 |
| JNE LBB0_11 |
| |
| LBB0_12: |
| WORD $0x8844; BYTE $0x01 // mov byte [rcx], r8b |
| WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil |
| RET |
| |
| LBB0_5: |
| LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */ |
| LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */ |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd06f0f66 // movdqa xmm2, xmm0 |
| LONG $0xd96f0f66 // movdqa xmm3, xmm1 |
| LONG $0x01c0f641 // test r8b, 1 |
| JNE LBB0_9 |
| JMP LBB0_10 |
| |
| TEXT ·_uint8_max_min_sse4(SB), $0-32 |
| |
| MOVQ values+0(FP), DI |
| MOVQ length+8(FP), SI |
| MOVQ minout+16(FP), DX |
| MOVQ maxout+24(FP), CX |
| |
| WORD $0xf685 // test esi, esi |
| JLE LBB1_1 |
| WORD $0x8941; BYTE $0xf1 // mov r9d, esi |
| WORD $0xfe83; BYTE $0x1f // cmp esi, 31 |
| JA LBB1_4 |
| WORD $0xb640; BYTE $0xff // mov sil, -1 |
| WORD $0x3145; BYTE $0xdb // xor r11d, r11d |
| WORD $0xc031 // xor eax, eax |
| JMP LBB1_11 |
| |
| LBB1_1: |
| WORD $0xb640; BYTE $0xff // mov sil, -1 |
| WORD $0xc031 // xor eax, eax |
| JMP LBB1_12 |
| |
| LBB1_4: |
| WORD $0x8945; BYTE $0xcb // mov r11d, r9d |
| LONG $0xe0e38341 // and r11d, -32 |
| LONG $0xe0438d49 // lea rax, [r11 - 32] |
| WORD $0x8949; BYTE $0xc0 // mov r8, rax |
| LONG $0x05e8c149 // shr r8, 5 |
| LONG $0x01c08349 // add r8, 1 |
| WORD $0x8548; BYTE $0xc0 // test rax, rax |
| JE LBB1_5 |
| WORD $0x894d; BYTE $0xc2 // mov r10, r8 |
| LONG $0xfee28349 // and r10, -2 |
| WORD $0xf749; BYTE $0xda // neg r10 |
| LONG $0xc9ef0f66 // pxor xmm1, xmm1 |
| LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 |
| LONG $0xdbef0f66 // pxor xmm3, xmm3 |
| |
| LBB1_7: |
| LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax] |
| LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16] |
| LONG $0x746f0ff3; WORD $0x2007 // movdqu xmm6, oword [rdi + rax + 32] |
| LONG $0x7c6f0ff3; WORD $0x3007 // movdqu xmm7, oword [rdi + rax + 48] |
| LONG $0xc4da0f66 // pminub xmm0, xmm4 |
| LONG $0xd5da0f66 // pminub xmm2, xmm5 |
| LONG $0xccde0f66 // pmaxub xmm1, xmm4 |
| LONG $0xddde0f66 // pmaxub xmm3, xmm5 |
| LONG $0xc6da0f66 // pminub xmm0, xmm6 |
| LONG $0xd7da0f66 // pminub xmm2, xmm7 |
| LONG $0xcede0f66 // pmaxub xmm1, xmm6 |
| LONG $0xdfde0f66 // pmaxub xmm3, xmm7 |
| LONG $0x40c08348 // add rax, 64 |
| LONG $0x02c28349 // add r10, 2 |
| JNE LBB1_7 |
| LONG $0x01c0f641 // test r8b, 1 |
| JE LBB1_10 |
| |
| LBB1_9: |
| LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax] |
| LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16] |
| LONG $0xddde0f66 // pmaxub xmm3, xmm5 |
| LONG $0xccde0f66 // pmaxub xmm1, xmm4 |
| LONG $0xd5da0f66 // pminub xmm2, xmm5 |
| LONG $0xc4da0f66 // pminub xmm0, xmm4 |
| |
| LBB1_10: |
| LONG $0xc2da0f66 // pminub xmm0, xmm2 |
| LONG $0xcbde0f66 // pmaxub xmm1, xmm3 |
| LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 |
| LONG $0xd1ef0f66 // pxor xmm2, xmm1 |
| LONG $0xca6f0f66 // movdqa xmm1, xmm2 |
| LONG $0xd1710f66; BYTE $0x08 // psrlw xmm1, 8 |
| LONG $0xcada0f66 // pminub xmm1, xmm2 |
| LONG $0x41380f66; BYTE $0xc9 // phminposuw xmm1, xmm1 |
| LONG $0xc87e0f66 // movd eax, xmm1 |
| WORD $0xd0f6 // not al |
| LONG $0xc86f0f66 // movdqa xmm1, xmm0 |
| LONG $0xd1710f66; BYTE $0x08 // psrlw xmm1, 8 |
| LONG $0xc8da0f66 // pminub xmm1, xmm0 |
| LONG $0x41380f66; BYTE $0xc1 // phminposuw xmm0, xmm1 |
| LONG $0xc67e0f66 // movd esi, xmm0 |
| WORD $0x394d; BYTE $0xcb // cmp r11, r9 |
| JE LBB1_12 |
| |
| LBB1_11: |
| LONG $0x04b60f46; BYTE $0x1f // movzx r8d, byte [rdi + r11] |
| WORD $0x3844; BYTE $0xc6 // cmp sil, r8b |
| LONG $0xf6b60f40 // movzx esi, sil |
| LONG $0xf0430f41 // cmovae esi, r8d |
| WORD $0x3844; BYTE $0xc0 // cmp al, r8b |
| WORD $0xb60f; BYTE $0xc0 // movzx eax, al |
| LONG $0xc0460f41 // cmovbe eax, r8d |
| LONG $0x01c38349 // add r11, 1 |
| WORD $0x394d; BYTE $0xd9 // cmp r9, r11 |
| JNE LBB1_11 |
| |
| LBB1_12: |
| WORD $0x0188 // mov byte [rcx], al |
| WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil |
| RET |
| |
| LBB1_5: |
| LONG $0xc9ef0f66 // pxor xmm1, xmm1 |
| LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 |
| LONG $0xdbef0f66 // pxor xmm3, xmm3 |
| LONG $0x01c0f641 // test r8b, 1 |
| JNE LBB1_9 |
| JMP LBB1_10 |
| |
| DATA LCDATA2<>+0x000(SB)/8, $0x8000800080008000 |
| DATA LCDATA2<>+0x008(SB)/8, $0x8000800080008000 |
| DATA LCDATA2<>+0x010(SB)/8, $0x7fff7fff7fff7fff |
| DATA LCDATA2<>+0x018(SB)/8, $0x7fff7fff7fff7fff |
| GLOBL LCDATA2<>(SB), 8, $32 |
| |
| TEXT ·_int16_max_min_sse4(SB), $0-32 |
| |
| MOVQ values+0(FP), DI |
| MOVQ length+8(FP), SI |
| MOVQ minout+16(FP), DX |
| MOVQ maxout+24(FP), CX |
| LEAQ LCDATA2<>(SB), BP |
| |
| WORD $0xf685 // test esi, esi |
| JLE LBB2_1 |
| WORD $0x8941; BYTE $0xf1 // mov r9d, esi |
| WORD $0xfe83; BYTE $0x0f // cmp esi, 15 |
| JA LBB2_4 |
| LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768 |
| LONG $0x7fffbe66 // mov si, 32767 |
| WORD $0x3145; BYTE $0xdb // xor r11d, r11d |
| JMP LBB2_11 |
| |
| LBB2_1: |
| LONG $0x7fffbe66 // mov si, 32767 |
| LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768 |
| JMP LBB2_12 |
| |
| LBB2_4: |
| WORD $0x8945; BYTE $0xcb // mov r11d, r9d |
| LONG $0xf0e38341 // and r11d, -16 |
| LONG $0xf0438d49 // lea rax, [r11 - 16] |
| WORD $0x8949; BYTE $0xc0 // mov r8, rax |
| LONG $0x04e8c149 // shr r8, 4 |
| LONG $0x01c08349 // add r8, 1 |
| WORD $0x8548; BYTE $0xc0 // test rax, rax |
| JE LBB2_5 |
| WORD $0x894d; BYTE $0xc2 // mov r10, r8 |
| LONG $0xfee28349 // and r10, -2 |
| WORD $0xf749; BYTE $0xda // neg r10 |
| LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI2_0] */ |
| LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI2_1] */ |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd06f0f66 // movdqa xmm2, xmm0 |
| LONG $0xd96f0f66 // movdqa xmm3, xmm1 |
| |
| LBB2_7: |
| LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax] |
| LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16] |
| LONG $0x746f0ff3; WORD $0x2047 // movdqu xmm6, oword [rdi + 2*rax + 32] |
| LONG $0x7c6f0ff3; WORD $0x3047 // movdqu xmm7, oword [rdi + 2*rax + 48] |
| LONG $0xc4ea0f66 // pminsw xmm0, xmm4 |
| LONG $0xd5ea0f66 // pminsw xmm2, xmm5 |
| LONG $0xccee0f66 // pmaxsw xmm1, xmm4 |
| LONG $0xddee0f66 // pmaxsw xmm3, xmm5 |
| LONG $0xc6ea0f66 // pminsw xmm0, xmm6 |
| LONG $0xd7ea0f66 // pminsw xmm2, xmm7 |
| LONG $0xceee0f66 // pmaxsw xmm1, xmm6 |
| LONG $0xdfee0f66 // pmaxsw xmm3, xmm7 |
| LONG $0x20c08348 // add rax, 32 |
| LONG $0x02c28349 // add r10, 2 |
| JNE LBB2_7 |
| LONG $0x01c0f641 // test r8b, 1 |
| JE LBB2_10 |
| |
| LBB2_9: |
| LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax] |
| LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16] |
| LONG $0xddee0f66 // pmaxsw xmm3, xmm5 |
| LONG $0xccee0f66 // pmaxsw xmm1, xmm4 |
| LONG $0xd5ea0f66 // pminsw xmm2, xmm5 |
| LONG $0xc4ea0f66 // pminsw xmm0, xmm4 |
| |
| LBB2_10: |
| LONG $0xc2ea0f66 // pminsw xmm0, xmm2 |
| LONG $0xcbee0f66 // pmaxsw xmm1, xmm3 |
| LONG $0x4def0f66; BYTE $0x10 // pxor xmm1, oword 16[rbp] /* [rip + .LCPI2_1] */ |
| LONG $0x41380f66; BYTE $0xc9 // phminposuw xmm1, xmm1 |
| LONG $0x7e0f4166; BYTE $0xc8 // movd r8d, xmm1 |
| LONG $0xfff08141; WORD $0x007f; BYTE $0x00 // xor r8d, 32767 |
| LONG $0x45ef0f66; BYTE $0x00 // pxor xmm0, oword 0[rbp] /* [rip + .LCPI2_0] */ |
| LONG $0x41380f66; BYTE $0xc0 // phminposuw xmm0, xmm0 |
| LONG $0xc67e0f66 // movd esi, xmm0 |
| LONG $0x8000f681; WORD $0x0000 // xor esi, 32768 |
| WORD $0x394d; BYTE $0xcb // cmp r11, r9 |
| JE LBB2_12 |
| |
| LBB2_11: |
| LONG $0x04b70f42; BYTE $0x5f // movzx eax, word [rdi + 2*r11] |
| WORD $0x3966; BYTE $0xc6 // cmp si, ax |
| WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax |
| LONG $0xc0394166 // cmp r8w, ax |
| LONG $0xc04c0f44 // cmovl r8d, eax |
| LONG $0x01c38349 // add r11, 1 |
| WORD $0x394d; BYTE $0xd9 // cmp r9, r11 |
| JNE LBB2_11 |
| |
| LBB2_12: |
| LONG $0x01894466 // mov word [rcx], r8w |
| WORD $0x8966; BYTE $0x32 // mov word [rdx], si |
| RET |
| |
| LBB2_5: |
| LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI2_0] */ |
| LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI2_1] */ |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd06f0f66 // movdqa xmm2, xmm0 |
| LONG $0xd96f0f66 // movdqa xmm3, xmm1 |
| LONG $0x01c0f641 // test r8b, 1 |
| JNE LBB2_9 |
| JMP LBB2_10 |
| |
| TEXT ·_uint16_max_min_sse4(SB), $0-32 |
| |
| MOVQ values+0(FP), DI |
| MOVQ length+8(FP), SI |
| MOVQ minout+16(FP), DX |
| MOVQ maxout+24(FP), CX |
| |
| WORD $0xf685 // test esi, esi |
| JLE LBB3_1 |
| WORD $0x8941; BYTE $0xf1 // mov r9d, esi |
| WORD $0xfe83; BYTE $0x0f // cmp esi, 15 |
| JA LBB3_4 |
| LONG $0xffb84166; BYTE $0xff // mov r8w, -1 |
| WORD $0x3145; BYTE $0xdb // xor r11d, r11d |
| WORD $0xf631 // xor esi, esi |
| JMP LBB3_11 |
| |
| LBB3_1: |
| LONG $0xffb84166; BYTE $0xff // mov r8w, -1 |
| WORD $0xf631 // xor esi, esi |
| JMP LBB3_12 |
| |
| LBB3_4: |
| WORD $0x8945; BYTE $0xcb // mov r11d, r9d |
| LONG $0xf0e38341 // and r11d, -16 |
| LONG $0xf0438d49 // lea rax, [r11 - 16] |
| WORD $0x8949; BYTE $0xc0 // mov r8, rax |
| LONG $0x04e8c149 // shr r8, 4 |
| LONG $0x01c08349 // add r8, 1 |
| WORD $0x8548; BYTE $0xc0 // test rax, rax |
| JE LBB3_5 |
| WORD $0x894d; BYTE $0xc2 // mov r10, r8 |
| LONG $0xfee28349 // and r10, -2 |
| WORD $0xf749; BYTE $0xda // neg r10 |
| LONG $0xc9ef0f66 // pxor xmm1, xmm1 |
| LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 |
| LONG $0xdbef0f66 // pxor xmm3, xmm3 |
| |
| LBB3_7: |
| LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax] |
| LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16] |
| LONG $0x746f0ff3; WORD $0x2047 // movdqu xmm6, oword [rdi + 2*rax + 32] |
| LONG $0x7c6f0ff3; WORD $0x3047 // movdqu xmm7, oword [rdi + 2*rax + 48] |
| LONG $0x3a380f66; BYTE $0xc4 // pminuw xmm0, xmm4 |
| LONG $0x3a380f66; BYTE $0xd5 // pminuw xmm2, xmm5 |
| LONG $0x3e380f66; BYTE $0xcc // pmaxuw xmm1, xmm4 |
| LONG $0x3e380f66; BYTE $0xdd // pmaxuw xmm3, xmm5 |
| LONG $0x3a380f66; BYTE $0xc6 // pminuw xmm0, xmm6 |
| LONG $0x3a380f66; BYTE $0xd7 // pminuw xmm2, xmm7 |
| LONG $0x3e380f66; BYTE $0xce // pmaxuw xmm1, xmm6 |
| LONG $0x3e380f66; BYTE $0xdf // pmaxuw xmm3, xmm7 |
| LONG $0x20c08348 // add rax, 32 |
| LONG $0x02c28349 // add r10, 2 |
| JNE LBB3_7 |
| LONG $0x01c0f641 // test r8b, 1 |
| JE LBB3_10 |
| |
| LBB3_9: |
| LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax] |
| LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16] |
| LONG $0x3e380f66; BYTE $0xdd // pmaxuw xmm3, xmm5 |
| LONG $0x3e380f66; BYTE $0xcc // pmaxuw xmm1, xmm4 |
| LONG $0x3a380f66; BYTE $0xd5 // pminuw xmm2, xmm5 |
| LONG $0x3a380f66; BYTE $0xc4 // pminuw xmm0, xmm4 |
| |
| LBB3_10: |
| LONG $0x3a380f66; BYTE $0xc2 // pminuw xmm0, xmm2 |
| LONG $0x3e380f66; BYTE $0xcb // pmaxuw xmm1, xmm3 |
| LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 |
| LONG $0xd1ef0f66 // pxor xmm2, xmm1 |
| LONG $0x41380f66; BYTE $0xca // phminposuw xmm1, xmm2 |
| LONG $0xce7e0f66 // movd esi, xmm1 |
| WORD $0xd6f7 // not esi |
| LONG $0x41380f66; BYTE $0xc0 // phminposuw xmm0, xmm0 |
| LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0 |
| WORD $0x394d; BYTE $0xcb // cmp r11, r9 |
| JE LBB3_12 |
| |
| LBB3_11: |
| LONG $0x04b70f42; BYTE $0x5f // movzx eax, word [rdi + 2*r11] |
| LONG $0xc0394166 // cmp r8w, ax |
| LONG $0xc0430f44 // cmovae r8d, eax |
| WORD $0x3966; BYTE $0xc6 // cmp si, ax |
| WORD $0x460f; BYTE $0xf0 // cmovbe esi, eax |
| LONG $0x01c38349 // add r11, 1 |
| WORD $0x394d; BYTE $0xd9 // cmp r9, r11 |
| JNE LBB3_11 |
| |
| LBB3_12: |
| WORD $0x8966; BYTE $0x31 // mov word [rcx], si |
| LONG $0x02894466 // mov word [rdx], r8w |
| RET |
| |
| LBB3_5: |
| LONG $0xc9ef0f66 // pxor xmm1, xmm1 |
| LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 |
| LONG $0xdbef0f66 // pxor xmm3, xmm3 |
| LONG $0x01c0f641 // test r8b, 1 |
| JNE LBB3_9 |
| JMP LBB3_10 |
| |
| DATA LCDATA3<>+0x000(SB)/8, $0x8000000080000000 |
| DATA LCDATA3<>+0x008(SB)/8, $0x8000000080000000 |
| DATA LCDATA3<>+0x010(SB)/8, $0x7fffffff7fffffff |
| DATA LCDATA3<>+0x018(SB)/8, $0x7fffffff7fffffff |
| GLOBL LCDATA3<>(SB), 8, $32 |
| |
| TEXT ·_int32_max_min_sse4(SB), $0-32 |
| |
| MOVQ values+0(FP), DI |
| MOVQ length+8(FP), SI |
| MOVQ minout+16(FP), DX |
| MOVQ maxout+24(FP), CX |
| LEAQ LCDATA3<>(SB), BP |
| |
| WORD $0xf685 // test esi, esi |
| JLE LBB4_1 |
| WORD $0x8941; BYTE $0xf1 // mov r9d, esi |
| WORD $0xfe83; BYTE $0x07 // cmp esi, 7 |
| JA LBB4_6 |
| LONG $0x000000b8; BYTE $0x80 // mov eax, -2147483648 |
| LONG $0xffffb841; WORD $0x7fff // mov r8d, 2147483647 |
| WORD $0x3145; BYTE $0xdb // xor r11d, r11d |
| JMP LBB4_4 |
| |
| LBB4_1: |
| LONG $0xffffb841; WORD $0x7fff // mov r8d, 2147483647 |
| LONG $0x000000b8; BYTE $0x80 // mov eax, -2147483648 |
| JMP LBB4_13 |
| |
| LBB4_6: |
| WORD $0x8945; BYTE $0xcb // mov r11d, r9d |
| LONG $0xf8e38341 // and r11d, -8 |
| LONG $0xf8438d49 // lea rax, [r11 - 8] |
| WORD $0x8949; BYTE $0xc0 // mov r8, rax |
| LONG $0x03e8c149 // shr r8, 3 |
| LONG $0x01c08349 // add r8, 1 |
| WORD $0x8548; BYTE $0xc0 // test rax, rax |
| JE LBB4_7 |
| WORD $0x894d; BYTE $0xc2 // mov r10, r8 |
| LONG $0xfee28349 // and r10, -2 |
| WORD $0xf749; BYTE $0xda // neg r10 |
| LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */ |
| LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI4_1] */ |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd06f0f66 // movdqa xmm2, xmm0 |
| LONG $0xd96f0f66 // movdqa xmm3, xmm1 |
| |
| LBB4_9: |
| LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] |
| LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] |
| LONG $0x746f0ff3; WORD $0x2087 // movdqu xmm6, oword [rdi + 4*rax + 32] |
| LONG $0x7c6f0ff3; WORD $0x3087 // movdqu xmm7, oword [rdi + 4*rax + 48] |
| LONG $0x39380f66; BYTE $0xc4 // pminsd xmm0, xmm4 |
| LONG $0x39380f66; BYTE $0xd5 // pminsd xmm2, xmm5 |
| LONG $0x3d380f66; BYTE $0xcc // pmaxsd xmm1, xmm4 |
| LONG $0x3d380f66; BYTE $0xdd // pmaxsd xmm3, xmm5 |
| LONG $0x39380f66; BYTE $0xc6 // pminsd xmm0, xmm6 |
| LONG $0x39380f66; BYTE $0xd7 // pminsd xmm2, xmm7 |
| LONG $0x3d380f66; BYTE $0xce // pmaxsd xmm1, xmm6 |
| LONG $0x3d380f66; BYTE $0xdf // pmaxsd xmm3, xmm7 |
| LONG $0x10c08348 // add rax, 16 |
| LONG $0x02c28349 // add r10, 2 |
| JNE LBB4_9 |
| LONG $0x01c0f641 // test r8b, 1 |
| JE LBB4_12 |
| |
| LBB4_11: |
| LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] |
| LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] |
| LONG $0x3d380f66; BYTE $0xdd // pmaxsd xmm3, xmm5 |
| LONG $0x3d380f66; BYTE $0xcc // pmaxsd xmm1, xmm4 |
| LONG $0x39380f66; BYTE $0xd5 // pminsd xmm2, xmm5 |
| LONG $0x39380f66; BYTE $0xc4 // pminsd xmm0, xmm4 |
| |
| LBB4_12: |
| LONG $0x39380f66; BYTE $0xc2 // pminsd xmm0, xmm2 |
| LONG $0x3d380f66; BYTE $0xcb // pmaxsd xmm1, xmm3 |
| LONG $0xd1700f66; BYTE $0x4e // pshufd xmm2, xmm1, 78 |
| LONG $0x3d380f66; BYTE $0xd1 // pmaxsd xmm2, xmm1 |
| LONG $0xca700f66; BYTE $0xe5 // pshufd xmm1, xmm2, 229 |
| LONG $0x3d380f66; BYTE $0xca // pmaxsd xmm1, xmm2 |
| LONG $0xc87e0f66 // movd eax, xmm1 |
| LONG $0xc8700f66; BYTE $0x4e // pshufd xmm1, xmm0, 78 |
| LONG $0x39380f66; BYTE $0xc8 // pminsd xmm1, xmm0 |
| LONG $0xc1700f66; BYTE $0xe5 // pshufd xmm0, xmm1, 229 |
| LONG $0x39380f66; BYTE $0xc1 // pminsd xmm0, xmm1 |
| LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0 |
| WORD $0x394d; BYTE $0xcb // cmp r11, r9 |
| JE LBB4_13 |
| |
| LBB4_4: |
| WORD $0xc689 // mov esi, eax |
| |
| LBB4_5: |
| LONG $0x9f048b42 // mov eax, dword [rdi + 4*r11] |
| WORD $0x3941; BYTE $0xc0 // cmp r8d, eax |
| LONG $0xc04f0f44 // cmovg r8d, eax |
| WORD $0xc639 // cmp esi, eax |
| WORD $0x4d0f; BYTE $0xc6 // cmovge eax, esi |
| LONG $0x01c38349 // add r11, 1 |
| WORD $0xc689 // mov esi, eax |
| WORD $0x394d; BYTE $0xd9 // cmp r9, r11 |
| JNE LBB4_5 |
| |
| LBB4_13: |
| WORD $0x0189 // mov dword [rcx], eax |
| WORD $0x8944; BYTE $0x02 // mov dword [rdx], r8d |
| RET |
| |
| LBB4_7: |
| LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */ |
| LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI4_1] */ |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd06f0f66 // movdqa xmm2, xmm0 |
| LONG $0xd96f0f66 // movdqa xmm3, xmm1 |
| LONG $0x01c0f641 // test r8b, 1 |
| JNE LBB4_11 |
| JMP LBB4_12 |
| |
| TEXT ·_uint32_max_min_sse4(SB), $0-32 |
| |
| MOVQ values+0(FP), DI |
| MOVQ length+8(FP), SI |
| MOVQ minout+16(FP), DX |
| MOVQ maxout+24(FP), CX |
| |
| WORD $0xf685 // test esi, esi |
| JLE LBB5_1 |
| WORD $0x8941; BYTE $0xf1 // mov r9d, esi |
| WORD $0xfe83; BYTE $0x07 // cmp esi, 7 |
| JA LBB5_6 |
| WORD $0x3145; BYTE $0xdb // xor r11d, r11d |
| LONG $0xffffb841; WORD $0xffff // mov r8d, -1 |
| WORD $0xf631 // xor esi, esi |
| JMP LBB5_4 |
| |
| LBB5_1: |
| LONG $0xffffb841; WORD $0xffff // mov r8d, -1 |
| WORD $0xf631 // xor esi, esi |
| JMP LBB5_13 |
| |
| LBB5_6: |
| WORD $0x8945; BYTE $0xcb // mov r11d, r9d |
| LONG $0xf8e38341 // and r11d, -8 |
| LONG $0xf8438d49 // lea rax, [r11 - 8] |
| WORD $0x8949; BYTE $0xc0 // mov r8, rax |
| LONG $0x03e8c149 // shr r8, 3 |
| LONG $0x01c08349 // add r8, 1 |
| WORD $0x8548; BYTE $0xc0 // test rax, rax |
| JE LBB5_7 |
| WORD $0x894d; BYTE $0xc2 // mov r10, r8 |
| LONG $0xfee28349 // and r10, -2 |
| WORD $0xf749; BYTE $0xda // neg r10 |
| LONG $0xc9ef0f66 // pxor xmm1, xmm1 |
| LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 |
| LONG $0xdbef0f66 // pxor xmm3, xmm3 |
| |
| LBB5_9: |
| LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] |
| LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] |
| LONG $0x746f0ff3; WORD $0x2087 // movdqu xmm6, oword [rdi + 4*rax + 32] |
| LONG $0x7c6f0ff3; WORD $0x3087 // movdqu xmm7, oword [rdi + 4*rax + 48] |
| LONG $0x3b380f66; BYTE $0xc4 // pminud xmm0, xmm4 |
| LONG $0x3b380f66; BYTE $0xd5 // pminud xmm2, xmm5 |
| LONG $0x3f380f66; BYTE $0xcc // pmaxud xmm1, xmm4 |
| LONG $0x3f380f66; BYTE $0xdd // pmaxud xmm3, xmm5 |
| LONG $0x3b380f66; BYTE $0xc6 // pminud xmm0, xmm6 |
| LONG $0x3b380f66; BYTE $0xd7 // pminud xmm2, xmm7 |
| LONG $0x3f380f66; BYTE $0xce // pmaxud xmm1, xmm6 |
| LONG $0x3f380f66; BYTE $0xdf // pmaxud xmm3, xmm7 |
| LONG $0x10c08348 // add rax, 16 |
| LONG $0x02c28349 // add r10, 2 |
| JNE LBB5_9 |
| LONG $0x01c0f641 // test r8b, 1 |
| JE LBB5_12 |
| |
| LBB5_11: |
| LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] |
| LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] |
| LONG $0x3f380f66; BYTE $0xdd // pmaxud xmm3, xmm5 |
| LONG $0x3f380f66; BYTE $0xcc // pmaxud xmm1, xmm4 |
| LONG $0x3b380f66; BYTE $0xd5 // pminud xmm2, xmm5 |
| LONG $0x3b380f66; BYTE $0xc4 // pminud xmm0, xmm4 |
| |
| LBB5_12: |
| LONG $0x3b380f66; BYTE $0xc2 // pminud xmm0, xmm2 |
| LONG $0x3f380f66; BYTE $0xcb // pmaxud xmm1, xmm3 |
| LONG $0xd1700f66; BYTE $0x4e // pshufd xmm2, xmm1, 78 |
| LONG $0x3f380f66; BYTE $0xd1 // pmaxud xmm2, xmm1 |
| LONG $0xca700f66; BYTE $0xe5 // pshufd xmm1, xmm2, 229 |
| LONG $0x3f380f66; BYTE $0xca // pmaxud xmm1, xmm2 |
| LONG $0xce7e0f66 // movd esi, xmm1 |
| LONG $0xc8700f66; BYTE $0x4e // pshufd xmm1, xmm0, 78 |
| LONG $0x3b380f66; BYTE $0xc8 // pminud xmm1, xmm0 |
| LONG $0xc1700f66; BYTE $0xe5 // pshufd xmm0, xmm1, 229 |
| LONG $0x3b380f66; BYTE $0xc1 // pminud xmm0, xmm1 |
| LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0 |
| WORD $0x394d; BYTE $0xcb // cmp r11, r9 |
| JE LBB5_13 |
| |
| LBB5_4: |
| WORD $0xf089 // mov eax, esi |
| |
| LBB5_5: |
| LONG $0x9f348b42 // mov esi, dword [rdi + 4*r11] |
| WORD $0x3941; BYTE $0xf0 // cmp r8d, esi |
| LONG $0xc6430f44 // cmovae r8d, esi |
| WORD $0xf039 // cmp eax, esi |
| WORD $0x470f; BYTE $0xf0 // cmova esi, eax |
| LONG $0x01c38349 // add r11, 1 |
| WORD $0xf089 // mov eax, esi |
| WORD $0x394d; BYTE $0xd9 // cmp r9, r11 |
| JNE LBB5_5 |
| |
| LBB5_13: |
| WORD $0x3189 // mov dword [rcx], esi |
| WORD $0x8944; BYTE $0x02 // mov dword [rdx], r8d |
| RET |
| |
| LBB5_7: |
| LONG $0xc9ef0f66 // pxor xmm1, xmm1 |
| LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 |
| WORD $0xc031 // xor eax, eax |
| LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 |
| LONG $0xdbef0f66 // pxor xmm3, xmm3 |
| LONG $0x01c0f641 // test r8b, 1 |
| JNE LBB5_11 |
| JMP LBB5_12 |
| |
| DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000 |
| DATA LCDATA4<>+0x008(SB)/8, $0x8000000000000000 |
| DATA LCDATA4<>+0x010(SB)/8, $0x7fffffffffffffff |
| DATA LCDATA4<>+0x018(SB)/8, $0x7fffffffffffffff |
| GLOBL LCDATA4<>(SB), 8, $32 |
| |
| TEXT ·_int64_max_min_sse4(SB), $0-32 |
| |
| MOVQ values+0(FP), DI |
| MOVQ length+8(FP), SI |
| MOVQ minout+16(FP), DX |
| MOVQ maxout+24(FP), CX |
| LEAQ LCDATA4<>(SB), BP |
| |
| QUAD $0xffffffffffffb849; WORD $0x7fff // mov r8, 9223372036854775807 |
| WORD $0xf685 // test esi, esi |
| JLE LBB6_1 |
| WORD $0x8941; BYTE $0xf1 // mov r9d, esi |
| WORD $0xfe83; BYTE $0x03 // cmp esi, 3 |
| JA LBB6_6 |
| LONG $0x01708d49 // lea rsi, [r8 + 1] |
| WORD $0x3145; BYTE $0xdb // xor r11d, r11d |
| JMP LBB6_4 |
| |
| LBB6_1: |
| LONG $0x01708d49 // lea rsi, [r8 + 1] |
| JMP LBB6_13 |
| |
| LBB6_6: |
| WORD $0x8945; BYTE $0xcb // mov r11d, r9d |
| LONG $0xfce38341 // and r11d, -4 |
| LONG $0xfc438d49 // lea rax, [r11 - 4] |
| WORD $0x8949; BYTE $0xc0 // mov r8, rax |
| LONG $0x02e8c149 // shr r8, 2 |
| LONG $0x01c08349 // add r8, 1 |
| WORD $0x8548; BYTE $0xc0 // test rax, rax |
| JE LBB6_7 |
| WORD $0x894d; BYTE $0xc2 // mov r10, r8 |
| LONG $0xfee28349 // and r10, -2 |
| WORD $0xf749; BYTE $0xda // neg r10 |
| LONG $0x6f0f4466; WORD $0x004d // movdqa xmm9, oword 0[rbp] /* [rip + .LCPI6_0] */ |
| LONG $0x6f0f4466; WORD $0x1045 // movdqa xmm8, oword 16[rbp] /* [rip + .LCPI6_1] */ |
| WORD $0xc031 // xor eax, eax |
| LONG $0x6f0f4166; BYTE $0xd0 // movdqa xmm2, xmm8 |
| LONG $0x6f0f4166; BYTE $0xf1 // movdqa xmm6, xmm9 |
| |
| LBB6_9: |
| LONG $0x3c6f0ff3; BYTE $0xc7 // movdqu xmm7, oword [rdi + 8*rax] |
| LONG $0xc76f0f66 // movdqa xmm0, xmm7 |
| LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8 |
| LONG $0xe76f0f66 // movdqa xmm4, xmm7 |
| LONG $0x380f4166; WORD $0xe015 // blendvpd xmm4, xmm8, xmm0 |
| LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu xmm1, oword [rdi + 8*rax + 16] |
| LONG $0xc16f0f66 // movdqa xmm0, xmm1 |
| LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 |
| LONG $0xe96f0f66 // movdqa xmm5, xmm1 |
| LONG $0x15380f66; BYTE $0xea // blendvpd xmm5, xmm2, xmm0 |
| LONG $0x6f0f4166; BYTE $0xc1 // movdqa xmm0, xmm9 |
| LONG $0x37380f66; BYTE $0xc7 // pcmpgtq xmm0, xmm7 |
| LONG $0x380f4166; WORD $0xf915 // blendvpd xmm7, xmm9, xmm0 |
| LONG $0xc66f0f66 // movdqa xmm0, xmm6 |
| LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 |
| LONG $0x15380f66; BYTE $0xce // blendvpd xmm1, xmm6, xmm0 |
| LONG $0x5c6f0ff3; WORD $0x20c7 // movdqu xmm3, oword [rdi + 8*rax + 32] |
| LONG $0xc36f0f66 // movdqa xmm0, xmm3 |
| LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 |
| LONG $0x6f0f4466; BYTE $0xc3 // movdqa xmm8, xmm3 |
| LONG $0x380f4466; WORD $0xc415 // blendvpd xmm8, xmm4, xmm0 |
| LONG $0x646f0ff3; WORD $0x30c7 // movdqu xmm4, oword [rdi + 8*rax + 48] |
| LONG $0xc46f0f66 // movdqa xmm0, xmm4 |
| LONG $0x37380f66; BYTE $0xc5 // pcmpgtq xmm0, xmm5 |
| LONG $0xd46f0f66 // movdqa xmm2, xmm4 |
| LONG $0x15380f66; BYTE $0xd5 // blendvpd xmm2, xmm5, xmm0 |
| LONG $0xc7280f66 // movapd xmm0, xmm7 |
| LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3 |
| LONG $0x15380f66; BYTE $0xdf // blendvpd xmm3, xmm7, xmm0 |
| LONG $0xc1280f66 // movapd xmm0, xmm1 |
| LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 |
| LONG $0x15380f66; BYTE $0xe1 // blendvpd xmm4, xmm1, xmm0 |
| LONG $0x08c08348 // add rax, 8 |
| LONG $0x280f4466; BYTE $0xcb // movapd xmm9, xmm3 |
| LONG $0xf4280f66 // movapd xmm6, xmm4 |
| LONG $0x02c28349 // add r10, 2 |
| JNE LBB6_9 |
| LONG $0x01c0f641 // test r8b, 1 |
| JE LBB6_12 |
| |
| LBB6_11: |
| LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu xmm1, oword [rdi + 8*rax + 16] |
| LONG $0xc4280f66 // movapd xmm0, xmm4 |
| LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 |
| LONG $0xe96f0f66 // movdqa xmm5, xmm1 |
| LONG $0x15380f66; BYTE $0xec // blendvpd xmm5, xmm4, xmm0 |
| LONG $0x246f0ff3; BYTE $0xc7 // movdqu xmm4, oword [rdi + 8*rax] |
| LONG $0xc3280f66 // movapd xmm0, xmm3 |
| LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 |
| LONG $0xf46f0f66 // movdqa xmm6, xmm4 |
| LONG $0x15380f66; BYTE $0xf3 // blendvpd xmm6, xmm3, xmm0 |
| LONG $0xc16f0f66 // movdqa xmm0, xmm1 |
| LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 |
| LONG $0x15380f66; BYTE $0xca // blendvpd xmm1, xmm2, xmm0 |
| LONG $0xc46f0f66 // movdqa xmm0, xmm4 |
| LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8 |
| LONG $0x380f4166; WORD $0xe015 // blendvpd xmm4, xmm8, xmm0 |
| LONG $0x280f4466; BYTE $0xc4 // movapd xmm8, xmm4 |
| LONG $0xd1280f66 // movapd xmm2, xmm1 |
| LONG $0xde280f66 // movapd xmm3, xmm6 |
| LONG $0xe5280f66 // movapd xmm4, xmm5 |
| |
| LBB6_12: |
| LONG $0xc3280f66 // movapd xmm0, xmm3 |
| LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 |
| LONG $0x15380f66; BYTE $0xe3 // blendvpd xmm4, xmm3, xmm0 |
| LONG $0xcc700f66; BYTE $0x4e // pshufd xmm1, xmm4, 78 |
| LONG $0xc46f0f66 // movdqa xmm0, xmm4 |
| LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 |
| LONG $0x15380f66; BYTE $0xcc // blendvpd xmm1, xmm4, xmm0 |
| LONG $0x7e0f4866; BYTE $0xce // movq rsi, xmm1 |
| LONG $0xc26f0f66 // movdqa xmm0, xmm2 |
| LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8 |
| LONG $0x380f4166; WORD $0xd015 // blendvpd xmm2, xmm8, xmm0 |
| LONG $0xca700f66; BYTE $0x4e // pshufd xmm1, xmm2, 78 |
| LONG $0xc16f0f66 // movdqa xmm0, xmm1 |
| LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 |
| LONG $0x15380f66; BYTE $0xca // blendvpd xmm1, xmm2, xmm0 |
| LONG $0x7e0f4966; BYTE $0xc8 // movq r8, xmm1 |
| WORD $0x394d; BYTE $0xcb // cmp r11, r9 |
| JE LBB6_13 |
| |
| LBB6_4: |
| WORD $0x8948; BYTE $0xf0 // mov rax, rsi |
| |
| LBB6_5: |
| LONG $0xdf348b4a // mov rsi, qword [rdi + 8*r11] |
| WORD $0x3949; BYTE $0xf0 // cmp r8, rsi |
| LONG $0xc64f0f4c // cmovg r8, rsi |
| WORD $0x3948; BYTE $0xf0 // cmp rax, rsi |
| LONG $0xf04d0f48 // cmovge rsi, rax |
| LONG $0x01c38349 // add r11, 1 |
| WORD $0x8948; BYTE $0xf0 // mov rax, rsi |
| WORD $0x394d; BYTE $0xd9 // cmp r9, r11 |
| JNE LBB6_5 |
| |
| LBB6_13: |
| WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi |
| WORD $0x894c; BYTE $0x02 // mov qword [rdx], r8 |
| RET |
| |
| LBB6_7: |
| LONG $0x5d280f66; BYTE $0x00 // movapd xmm3, oword 0[rbp] /* [rip + .LCPI6_0] */ |
| LONG $0x6f0f4466; WORD $0x1045 // movdqa xmm8, oword 16[rbp] /* [rip + .LCPI6_1] */ |
| WORD $0xc031 // xor eax, eax |
| LONG $0x6f0f4166; BYTE $0xd0 // movdqa xmm2, xmm8 |
| LONG $0xe3280f66 // movapd xmm4, xmm3 |
| LONG $0x01c0f641 // test r8b, 1 |
| JNE LBB6_11 |
| JMP LBB6_12 |
| |
| DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000 |
| DATA LCDATA5<>+0x008(SB)/8, $0x8000000000000000 |
| GLOBL LCDATA5<>(SB), 8, $16 |
| |
| TEXT ·_uint64_max_min_sse4(SB), $0-32 |
| |
| MOVQ values+0(FP), DI |
| MOVQ length+8(FP), SI |
| MOVQ minout+16(FP), DX |
| MOVQ maxout+24(FP), CX |
| LEAQ LCDATA5<>(SB), BP |
| |
| WORD $0xf685 // test esi, esi |
| JLE LBB7_1 |
| WORD $0x8941; BYTE $0xf1 // mov r9d, esi |
| WORD $0xfe83; BYTE $0x03 // cmp esi, 3 |
| JA LBB7_6 |
| LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov r8, -1 |
| WORD $0x3145; BYTE $0xdb // xor r11d, r11d |
| WORD $0xc031 // xor eax, eax |
| JMP LBB7_4 |
| |
| LBB7_1: |
| LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov r8, -1 |
| WORD $0xc031 // xor eax, eax |
| JMP LBB7_13 |
| |
| LBB7_6: |
| WORD $0x8945; BYTE $0xcb // mov r11d, r9d |
| LONG $0xfce38341 // and r11d, -4 |
| LONG $0xfc438d49 // lea rax, [r11 - 4] |
| WORD $0x8949; BYTE $0xc0 // mov r8, rax |
| LONG $0x02e8c149 // shr r8, 2 |
| LONG $0x01c08349 // add r8, 1 |
| WORD $0x8548; BYTE $0xc0 // test rax, rax |
| JE LBB7_7 |
| WORD $0x894d; BYTE $0xc2 // mov r10, r8 |
| LONG $0xfee28349 // and r10, -2 |
| WORD $0xf749; BYTE $0xda // neg r10 |
| LONG $0xef0f4566; BYTE $0xc9 // pxor xmm9, xmm9 |
| LONG $0x760f4566; BYTE $0xd2 // pcmpeqd xmm10, xmm10 |
| WORD $0xc031 // xor eax, eax |
| LONG $0x6f0f4466; WORD $0x0045 // movdqa xmm8, oword 0[rbp] /* [rip + .LCPI7_0] */ |
| LONG $0x760f4566; BYTE $0xdb // pcmpeqd xmm11, xmm11 |
| LONG $0xef0f4566; BYTE $0xe4 // pxor xmm12, xmm12 |
| |
| LBB7_9: |
| LONG $0x6f0f4166; BYTE $0xd2 // movdqa xmm2, xmm10 |
| LONG $0xef0f4166; BYTE $0xd0 // pxor xmm2, xmm8 |
| LONG $0x246f0ff3; BYTE $0xc7 // movdqu xmm4, oword [rdi + 8*rax] |
| LONG $0x6c6f0ff3; WORD $0x10c7 // movdqu xmm5, oword [rdi + 8*rax + 16] |
| LONG $0x6f0f44f3; WORD $0xc76c; BYTE $0x20 // movdqu xmm13, oword [rdi + 8*rax + 32] |
| LONG $0xc46f0f66 // movdqa xmm0, xmm4 |
| LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 |
| LONG $0x6f0f4166; BYTE $0xc9 // movdqa xmm1, xmm9 |
| LONG $0xef0f4166; BYTE $0xc8 // pxor xmm1, xmm8 |
| LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 |
| LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 |
| LONG $0xdc6f0f66 // movdqa xmm3, xmm4 |
| LONG $0x380f4166; WORD $0xda15 // blendvpd xmm3, xmm10, xmm0 |
| LONG $0x746f0ff3; WORD $0x30c7 // movdqu xmm6, oword [rdi + 8*rax + 48] |
| LONG $0x6f0f4166; BYTE $0xfb // movdqa xmm7, xmm11 |
| LONG $0xef0f4166; BYTE $0xf8 // pxor xmm7, xmm8 |
| LONG $0xc56f0f66 // movdqa xmm0, xmm5 |
| LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 |
| LONG $0x6f0f4166; BYTE $0xd4 // movdqa xmm2, xmm12 |
| LONG $0xef0f4166; BYTE $0xd0 // pxor xmm2, xmm8 |
| LONG $0x37380f66; BYTE $0xd0 // pcmpgtq xmm2, xmm0 |
| LONG $0x37380f66; BYTE $0xc7 // pcmpgtq xmm0, xmm7 |
| LONG $0xfd6f0f66 // movdqa xmm7, xmm5 |
| LONG $0x380f4166; WORD $0xfb15 // blendvpd xmm7, xmm11, xmm0 |
| LONG $0xc16f0f66 // movdqa xmm0, xmm1 |
| LONG $0x380f4166; WORD $0xe115 // blendvpd xmm4, xmm9, xmm0 |
| LONG $0xc26f0f66 // movdqa xmm0, xmm2 |
| LONG $0x380f4166; WORD $0xec15 // blendvpd xmm5, xmm12, xmm0 |
| LONG $0xd3280f66 // movapd xmm2, xmm3 |
| LONG $0x570f4166; BYTE $0xd0 // xorpd xmm2, xmm8 |
| LONG $0x6f0f4166; BYTE $0xc5 // movdqa xmm0, xmm13 |
| LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 |
| LONG $0xcc280f66 // movapd xmm1, xmm4 |
| LONG $0x570f4166; BYTE $0xc8 // xorpd xmm1, xmm8 |
| LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 |
| LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 |
| LONG $0x6f0f4566; BYTE $0xd5 // movdqa xmm10, xmm13 |
| LONG $0x380f4466; WORD $0xd315 // blendvpd xmm10, xmm3, xmm0 |
| LONG $0xdf280f66 // movapd xmm3, xmm7 |
| LONG $0x570f4166; BYTE $0xd8 // xorpd xmm3, xmm8 |
| LONG $0xc66f0f66 // movdqa xmm0, xmm6 |
| LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 |
| LONG $0xd5280f66 // movapd xmm2, xmm5 |
| LONG $0x570f4166; BYTE $0xd0 // xorpd xmm2, xmm8 |
| LONG $0x37380f66; BYTE $0xd0 // pcmpgtq xmm2, xmm0 |
| LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3 |
| LONG $0x6f0f4466; BYTE $0xde // movdqa xmm11, xmm6 |
| LONG $0x380f4466; WORD $0xdf15 // blendvpd xmm11, xmm7, xmm0 |
| LONG $0xc16f0f66 // movdqa xmm0, xmm1 |
| LONG $0x380f4466; WORD $0xec15 // blendvpd xmm13, xmm4, xmm0 |
| LONG $0xc26f0f66 // movdqa xmm0, xmm2 |
| LONG $0x15380f66; BYTE $0xf5 // blendvpd xmm6, xmm5, xmm0 |
| LONG $0x08c08348 // add rax, 8 |
| LONG $0x280f4566; BYTE $0xcd // movapd xmm9, xmm13 |
| LONG $0x280f4466; BYTE $0xe6 // movapd xmm12, xmm6 |
| LONG $0x02c28349 // add r10, 2 |
| JNE LBB7_9 |
| LONG $0x01c0f641 // test r8b, 1 |
| JE LBB7_12 |
| |
| LBB7_11: |
| LONG $0x24100f66; BYTE $0xc7 // movupd xmm4, oword [rdi + 8*rax] |
| LONG $0x5c100f66; WORD $0x10c7 // movupd xmm3, oword [rdi + 8*rax + 16] |
| LONG $0x6d280f66; BYTE $0x00 // movapd xmm5, oword 0[rbp] /* [rip + .LCPI7_0] */ |
| LONG $0xc6280f66 // movapd xmm0, xmm6 |
| LONG $0xc5570f66 // xorpd xmm0, xmm5 |
| LONG $0xcb280f66 // movapd xmm1, xmm3 |
| LONG $0xcd570f66 // xorpd xmm1, xmm5 |
| LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 |
| LONG $0xfb280f66 // movapd xmm7, xmm3 |
| LONG $0x15380f66; BYTE $0xfe // blendvpd xmm7, xmm6, xmm0 |
| LONG $0x280f4166; BYTE $0xc5 // movapd xmm0, xmm13 |
| LONG $0xc5570f66 // xorpd xmm0, xmm5 |
| LONG $0xd4280f66 // movapd xmm2, xmm4 |
| LONG $0xd5570f66 // xorpd xmm2, xmm5 |
| LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 |
| LONG $0xf4280f66 // movapd xmm6, xmm4 |
| LONG $0x380f4166; WORD $0xf515 // blendvpd xmm6, xmm13, xmm0 |
| LONG $0x280f4166; BYTE $0xc3 // movapd xmm0, xmm11 |
| LONG $0xc5570f66 // xorpd xmm0, xmm5 |
| LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 |
| LONG $0xc16f0f66 // movdqa xmm0, xmm1 |
| LONG $0x380f4166; WORD $0xdb15 // blendvpd xmm3, xmm11, xmm0 |
| LONG $0x570f4166; BYTE $0xea // xorpd xmm5, xmm10 |
| LONG $0x37380f66; BYTE $0xd5 // pcmpgtq xmm2, xmm5 |
| LONG $0xc26f0f66 // movdqa xmm0, xmm2 |
| LONG $0x380f4166; WORD $0xe215 // blendvpd xmm4, xmm10, xmm0 |
| LONG $0x280f4466; BYTE $0xd4 // movapd xmm10, xmm4 |
| LONG $0x280f4466; BYTE $0xdb // movapd xmm11, xmm3 |
| LONG $0x280f4466; BYTE $0xee // movapd xmm13, xmm6 |
| LONG $0xf7280f66 // movapd xmm6, xmm7 |
| |
| LBB7_12: |
| LONG $0x4d280f66; BYTE $0x00 // movapd xmm1, oword 0[rbp] /* [rip + .LCPI7_0] */ |
| LONG $0xd6280f66 // movapd xmm2, xmm6 |
| LONG $0xd1570f66 // xorpd xmm2, xmm1 |
| LONG $0x280f4166; BYTE $0xc5 // movapd xmm0, xmm13 |
| LONG $0xc1570f66 // xorpd xmm0, xmm1 |
| LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 |
| LONG $0x380f4166; WORD $0xf515 // blendvpd xmm6, xmm13, xmm0 |
| LONG $0xd6700f66; BYTE $0x4e // pshufd xmm2, xmm6, 78 |
| LONG $0xc6280f66 // movapd xmm0, xmm6 |
| LONG $0xc1570f66 // xorpd xmm0, xmm1 |
| LONG $0xda6f0f66 // movdqa xmm3, xmm2 |
| LONG $0xd9ef0f66 // pxor xmm3, xmm1 |
| LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3 |
| LONG $0x15380f66; BYTE $0xd6 // blendvpd xmm2, xmm6, xmm0 |
| LONG $0x7e0f4866; BYTE $0xd0 // movq rax, xmm2 |
| LONG $0x6f0f4166; BYTE $0xd2 // movdqa xmm2, xmm10 |
| LONG $0xd1ef0f66 // pxor xmm2, xmm1 |
| LONG $0x6f0f4166; BYTE $0xc3 // movdqa xmm0, xmm11 |
| LONG $0xc1ef0f66 // pxor xmm0, xmm1 |
| LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 |
| LONG $0x380f4566; WORD $0xda15 // blendvpd xmm11, xmm10, xmm0 |
| LONG $0x700f4166; WORD $0x4ed3 // pshufd xmm2, xmm11, 78 |
| LONG $0x6f0f4166; BYTE $0xc3 // movdqa xmm0, xmm11 |
| LONG $0xc1ef0f66 // pxor xmm0, xmm1 |
| LONG $0xcaef0f66 // pxor xmm1, xmm2 |
| LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 |
| LONG $0xc16f0f66 // movdqa xmm0, xmm1 |
| LONG $0x380f4166; WORD $0xd315 // blendvpd xmm2, xmm11, xmm0 |
| LONG $0x7e0f4966; BYTE $0xd0 // movq r8, xmm2 |
| WORD $0x394d; BYTE $0xcb // cmp r11, r9 |
| JE LBB7_13 |
| |
| LBB7_4: |
| WORD $0x8948; BYTE $0xc6 // mov rsi, rax |
| |
| LBB7_5: |
| LONG $0xdf048b4a // mov rax, qword [rdi + 8*r11] |
| WORD $0x3949; BYTE $0xc0 // cmp r8, rax |
| LONG $0xc0430f4c // cmovae r8, rax |
| WORD $0x3948; BYTE $0xc6 // cmp rsi, rax |
| LONG $0xc6470f48 // cmova rax, rsi |
| LONG $0x01c38349 // add r11, 1 |
| WORD $0x8948; BYTE $0xc6 // mov rsi, rax |
| WORD $0x394d; BYTE $0xd9 // cmp r9, r11 |
| JNE LBB7_5 |
| |
| LBB7_13: |
| WORD $0x8948; BYTE $0x01 // mov qword [rcx], rax |
| WORD $0x894c; BYTE $0x02 // mov qword [rdx], r8 |
| RET |
| |
| LBB7_7: |
| LONG $0x570f4566; BYTE $0xed // xorpd xmm13, xmm13 |
| LONG $0x760f4566; BYTE $0xd2 // pcmpeqd xmm10, xmm10 |
| WORD $0xc031 // xor eax, eax |
| LONG $0x760f4566; BYTE $0xdb // pcmpeqd xmm11, xmm11 |
| LONG $0xf6570f66 // xorpd xmm6, xmm6 |
| LONG $0x01c0f641 // test r8b, 1 |
| JNE LBB7_11 |
| JMP LBB7_12 |