| /* x86_functions.h -- x86 implementations for arch-specific functions. |
| * Copyright (C) 2013 Intel Corporation Jim Kukunas |
| * For conditions of distribution and use, see copyright notice in zlib.h |
| */ |
| |
| #ifndef X86_FUNCTIONS_H_ |
| #define X86_FUNCTIONS_H_ |
| |
| #include "x86_natives.h" |
| |
| /* So great news, your compiler is broken and causes stack smashing. Rather than |
| * notching out its compilation we'll just remove the assignment in the functable. |
| * Further context: |
| * https://developercommunity.visualstudio.com/t/Stack-corruption-with-v142-toolchain-whe/10853479 */ |
| #if defined(_MSC_VER) && defined(ARCH_32BIT) && _MSC_VER >= 1920 && _MSC_VER <= 1929 |
| #define NO_CHORBA_SSE |
| #endif |
| |
| #ifdef X86_SSE2 |
| uint8_t* chunkmemset_safe_sse2(uint8_t *out, uint8_t *from, size_t len, size_t left); |
| uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); |
| void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start); |
| uint32_t longest_match_sse2(deflate_state *const s, uint32_t cur_match); |
| uint32_t longest_match_slow_sse2(deflate_state *const s, uint32_t cur_match); |
| void slide_hash_sse2(deflate_state *s); |
| |
| # if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE) |
| uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len); |
| uint32_t crc32_copy_chorba_sse2(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); |
| uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint8_t *buf, size_t len); |
| # endif |
| #endif |
| |
| #ifndef X86_SSE2_NATIVE |
| # define CHUNKSET_FALLBACK |
| # define COMPARE256_FALLBACK |
| # define SLIDE_HASH_FALLBACK |
| #endif |
| |
| #ifdef X86_SSSE3 |
| uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len); |
| uint32_t adler32_copy_ssse3(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
| uint8_t* chunkmemset_safe_ssse3(uint8_t *out, uint8_t *from, size_t len, size_t left); |
| void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start); |
| #endif |
| |
| #ifndef X86_SSSE3_NATIVE |
| # define ADLER32_FALLBACK |
| #endif |
| |
| #if defined(X86_SSE41) |
| # if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE) |
| uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len); |
| uint32_t crc32_copy_chorba_sse41(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); |
| # endif |
| #endif |
| |
| #ifdef X86_SSE42 |
| uint32_t adler32_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
| #endif |
| |
| #ifdef X86_AVX2 |
| uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); |
| uint32_t adler32_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
| uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, size_t len, size_t left); |
| uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); |
| void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start); |
| uint32_t longest_match_avx2(deflate_state *const s, uint32_t cur_match); |
| uint32_t longest_match_slow_avx2(deflate_state *const s, uint32_t cur_match); |
| void slide_hash_avx2(deflate_state *s); |
| #endif |
| #ifdef X86_AVX512 |
| uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); |
| uint32_t adler32_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
| uint8_t* chunkmemset_safe_avx512(uint8_t *out, uint8_t *from, size_t len, size_t left); |
| uint32_t compare256_avx512(const uint8_t *src0, const uint8_t *src1); |
| void inflate_fast_avx512(PREFIX3(stream)* strm, uint32_t start); |
| uint32_t longest_match_avx512(deflate_state *const s, uint32_t cur_match); |
| uint32_t longest_match_slow_avx512(deflate_state *const s, uint32_t cur_match); |
| #endif |
| #ifdef X86_AVX512VNNI |
| uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len); |
| uint32_t adler32_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); |
| #endif |
| |
| #ifdef X86_PCLMULQDQ_CRC |
| uint32_t crc32_pclmulqdq(uint32_t crc, const uint8_t *buf, size_t len); |
| uint32_t crc32_copy_pclmulqdq(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); |
| #endif |
| #ifdef X86_VPCLMULQDQ_AVX2 |
| uint32_t crc32_vpclmulqdq_avx2(uint32_t crc, const uint8_t *buf, size_t len); |
| uint32_t crc32_copy_vpclmulqdq_avx2(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); |
| #endif |
| #ifdef X86_VPCLMULQDQ_AVX512 |
| uint32_t crc32_vpclmulqdq_avx512(uint32_t crc, const uint8_t *buf, size_t len); |
| uint32_t crc32_copy_vpclmulqdq_avx512(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); |
| #endif |
| |
| #if !defined(X86_PCLMULQDQ_NATIVE) && !defined(X86_VPCLMULQDQ_NATIVE) |
| # define CRC32_BRAID_FALLBACK |
| #endif |
| |
| #ifdef DISABLE_RUNTIME_CPU_DETECTION |
| // X86 - SSE2 |
| # ifdef X86_SSE2_NATIVE |
| # undef native_chunkmemset_safe |
| # define native_chunkmemset_safe chunkmemset_safe_sse2 |
| # undef native_compare256 |
| # define native_compare256 compare256_sse2 |
| # undef native_inflate_fast |
| # define native_inflate_fast inflate_fast_sse2 |
| # undef native_longest_match |
| # define native_longest_match longest_match_sse2 |
| # undef native_longest_match_slow |
| # define native_longest_match_slow longest_match_slow_sse2 |
| # if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE) |
| # undef native_crc32 |
| # define native_crc32 crc32_chorba_sse2 |
| # undef native_crc32_copy |
| # define native_crc32_copy crc32_copy_chorba_sse2 |
| # endif |
| # undef native_slide_hash |
| # define native_slide_hash slide_hash_sse2 |
| # endif |
| // X86 - SSSE3 |
| # ifdef X86_SSSE3_NATIVE |
| # undef native_adler32 |
| # define native_adler32 adler32_ssse3 |
| # undef native_adler32_copy |
| # define native_adler32_copy adler32_copy_ssse3 |
| # undef native_chunkmemset_safe |
| # define native_chunkmemset_safe chunkmemset_safe_ssse3 |
| # undef native_inflate_fast |
| # define native_inflate_fast inflate_fast_ssse3 |
| # endif |
| // X86 - SSE4.1 |
| # if defined(X86_SSE41_NATIVE) |
| # if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE) |
| # undef native_crc32 |
| # define native_crc32 crc32_chorba_sse41 |
| # undef native_crc32_copy |
| # define native_crc32_copy crc32_copy_chorba_sse41 |
| # endif |
| # endif |
| // X86 - SSE4.2 |
| # ifdef X86_SSE42_NATIVE |
| # undef native_adler32_copy |
| # define native_adler32_copy adler32_copy_sse42 |
| # endif |
| // X86 - PCLMUL |
| # ifdef X86_PCLMULQDQ_NATIVE |
| # undef native_crc32 |
| # define native_crc32 crc32_pclmulqdq |
| # undef native_crc32_copy |
| # define native_crc32_copy crc32_copy_pclmulqdq |
| # endif |
| // X86 - AVX2 |
| # ifdef X86_AVX2_NATIVE |
| # undef native_adler32 |
| # define native_adler32 adler32_avx2 |
| # undef native_adler32_copy |
| # define native_adler32_copy adler32_copy_avx2 |
| # undef native_chunkmemset_safe |
| # define native_chunkmemset_safe chunkmemset_safe_avx2 |
| # undef native_compare256 |
| # define native_compare256 compare256_avx2 |
| # undef native_inflate_fast |
| # define native_inflate_fast inflate_fast_avx2 |
| # undef native_longest_match |
| # define native_longest_match longest_match_avx2 |
| # undef native_longest_match_slow |
| # define native_longest_match_slow longest_match_slow_avx2 |
| # undef native_slide_hash |
| # define native_slide_hash slide_hash_avx2 |
| # endif |
| // X86 - AVX512 (F,DQ,BW,Vl) |
| # ifdef X86_AVX512_NATIVE |
| # undef native_adler32 |
| # define native_adler32 adler32_avx512 |
| # undef native_adler32_copy |
| # define native_adler32_copy adler32_copy_avx512 |
| # undef native_chunkmemset_safe |
| # define native_chunkmemset_safe chunkmemset_safe_avx512 |
| # undef native_compare256 |
| # define native_compare256 compare256_avx512 |
| # undef native_inflate_fast |
| # define native_inflate_fast inflate_fast_avx512 |
| # undef native_longest_match |
| # define native_longest_match longest_match_avx512 |
| # undef native_longest_match_slow |
| # define native_longest_match_slow longest_match_slow_avx512 |
| // X86 - AVX512 (VNNI) |
| # ifdef X86_AVX512VNNI_NATIVE |
| # undef native_adler32 |
| # define native_adler32 adler32_avx512_vnni |
| # undef native_adler32_copy |
| # define native_adler32_copy adler32_copy_avx512_vnni |
| # endif |
| # endif |
| // X86 - VPCLMULQDQ |
| # ifdef X86_VPCLMULQDQ_AVX512_NATIVE |
| # undef native_crc32 |
| # define native_crc32 crc32_vpclmulqdq_avx512 |
| # undef native_crc32_copy |
| # define native_crc32_copy crc32_copy_vpclmulqdq_avx512 |
| # elif defined(X86_VPCLMULQDQ_AVX2_NATIVE) |
| # undef native_crc32 |
| # define native_crc32 crc32_vpclmulqdq_avx2 |
| # undef native_crc32_copy |
| # define native_crc32_copy crc32_copy_vpclmulqdq_avx2 |
| # endif |
| #endif |
| |
| #endif /* X86_FUNCTIONS_H_ */ |