blob: eacaee36bf4a17893e5347853a63418e07cc3a61 [file] [log] [blame]
#ifndef FALLBACK_BUILTINS_H
#define FALLBACK_BUILTINS_H
#if defined(_MSC_VER) && !defined(__clang__)
#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64)
#include <intrin.h>
#ifdef X86_FEATURES
# include "arch/x86/x86_features.h"
#endif
/* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0
* Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
*/
static __forceinline unsigned long __builtin_ctz(uint32_t value) {
#ifdef X86_FEATURES
# ifndef X86_NOCHECK_TZCNT
if (x86_cpu_has_tzcnt)
# endif
return _tzcnt_u32(value);
#endif
unsigned long trailing_zero;
_BitScanForward(&trailing_zero, value);
return trailing_zero;
}
#define HAVE_BUILTIN_CTZ
#ifdef _M_AMD64
/* This is not a general purpose replacement for __builtin_ctzll. The function expects that value is != 0
* Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward64 is not checked
*/
static __forceinline unsigned long long __builtin_ctzll(uint64_t value) {
#ifdef X86_FEATURES
# ifndef X86_NOCHECK_TZCNT
if (x86_cpu_has_tzcnt)
# endif
return _tzcnt_u64(value);
#endif
unsigned long trailing_zero;
_BitScanForward64(&trailing_zero, value);
return trailing_zero;
}
#define HAVE_BUILTIN_CTZLL
#endif // Microsoft AMD64
#endif // Microsoft AMD64/IA64/x86/ARM/ARM64 test
#endif // _MSC_VER & !clang
/* Unfortunately GCC didn't support these things until version 10.
* Similarly, AppleClang didn't support them in Xcode 9.2 but did in 9.3.
*/
#ifdef __AVX2__
#include <immintrin.h>
#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10) \
|| (defined(__apple_build_version__) && __apple_build_version__ < 9020039)
static inline __m256i _mm256_zextsi128_si256(__m128i a) {
__m128i r;
__asm__ volatile ("vmovdqa %1,%0" : "=x" (r) : "x" (a));
return _mm256_castsi128_si256(r);
}
#ifdef __AVX512F__
static inline __m512i _mm512_zextsi128_si512(__m128i a) {
__m128i r;
__asm__ volatile ("vmovdqa %1,%0" : "=x" (r) : "x" (a));
return _mm512_castsi128_si512(r);
}
#endif // __AVX512F__
#endif // gcc/AppleClang version test
#endif // __AVX2__
/* Missing zero-extension AVX and AVX512 intrinsics.
* Fixed in Microsoft Visual Studio 2017 version 15.7
* https://developercommunity.visualstudio.com/t/missing-zero-extension-avx-and-avx512-intrinsics/175737
*/
#if defined(_MSC_VER) && _MSC_VER < 1914
#ifdef __AVX2__
static inline __m256i _mm256_zextsi128_si256(__m128i a) {
return _mm256_inserti128_si256(_mm256_setzero_si256(), a, 0);
}
#endif // __AVX2__
#ifdef __AVX512F__
static inline __m512i _mm512_zextsi128_si512(__m128i a) {
return _mm512_inserti32x4(_mm512_setzero_si512(), a, 0);
}
#endif // __AVX512F__
#endif // defined(_MSC_VER) && _MSC_VER < 1914
#endif // include guard FALLBACK_BUILTINS_H