Add fallback defines to skip generic C code when native intrinsics exist

Each arch header now sets *_FALLBACK defines (ADLER32_FALLBACK,
CHUNKSET_FALLBACK, COMPARE256_FALLBACK, CRC32_BRAID_FALLBACK,
SLIDE_HASH_FALLBACK) when no native SIMD implementation exists.
Generic C source files, declarations, functable entries, tests,
and benchmarks are guarded by these defines.

GitOrigin-RevId: 15c7569f1b38c5bca1f4ea9fee4e2b57482c9290
Change-Id: Ic80a3f033d17b74234487a2fa307a87d8c8ce394
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 81d7b07..ffd3c5b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1297,7 +1297,7 @@
     zutil.c
 )
 
-set(ZLIB_ALL_FALLBACK_SRCS
+set(ZLIB_GENERIC_SRCS
     arch/generic/adler32_c.c
     arch/generic/chunkset_c.c
     arch/generic/compare256_c.c
@@ -1306,16 +1306,6 @@
 )
 
 if(WITH_ALL_FALLBACKS)
-    list(APPEND ZLIB_GENERIC_SRCS ${ZLIB_ALL_FALLBACK_SRCS})
-    add_definitions(-DWITH_ALL_FALLBACKS)
-elseif(BASEARCH_X86_FOUND AND ARCH_64BIT AND WITH_SSE2)
-    # x86_64 always has SSE2, so let the SSE2 functions act as fallbacks.
-    list(APPEND ZLIB_GENERIC_SRCS
-        arch/generic/adler32_c.c
-        arch/generic/crc32_braid_c.c
-    )
-else()
-    list(APPEND ZLIB_GENERIC_SRCS ${ZLIB_ALL_FALLBACK_SRCS})
     add_definitions(-DWITH_ALL_FALLBACKS)
 endif()
 
diff --git a/arch/arm/arm_functions.h b/arch/arm/arm_functions.h
index bc77adb..d5ebd19 100644
--- a/arch/arm/arm_functions.h
+++ b/arch/arm/arm_functions.h
@@ -18,15 +18,29 @@
 void slide_hash_neon(deflate_state *s);
 #endif
 
+#ifndef ARM_NEON_NATIVE
+#  define ADLER32_FALLBACK
+#  define CHUNKSET_FALLBACK
+#  define COMPARE256_FALLBACK
+#  ifndef ARM_SIMD_NATIVE
+#    define SLIDE_HASH_FALLBACK
+#  endif
+#endif
+
 #ifdef ARM_CRC32
 uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_armv8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
+
 #ifdef ARM_PMULL_EOR3
 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_armv8_pmull_eor3(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 
+#if !defined(ARM_CRC32_NATIVE) && !defined(ARM_PMULL_EOR3_NATIVE)
+#  define CRC32_BRAID_FALLBACK
+#endif
+
 #ifdef ARM_SIMD
 void slide_hash_armv6(deflate_state *s);
 #endif
diff --git a/arch/generic/adler32_c.c b/arch/generic/adler32_c.c
index 84c946f..8abfcd2 100644
--- a/arch/generic/adler32_c.c
+++ b/arch/generic/adler32_c.c
@@ -4,6 +4,10 @@
  */
 
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#ifdef ADLER32_FALLBACK
+
 #include "functable.h"
 #include "adler32_p.h"
 
@@ -53,3 +57,5 @@
     memcpy(dst, src, len);
     return adler;
 }
+
+#endif /* ADLER32_FALLBACK */
diff --git a/arch/generic/chunkset_c.c b/arch/generic/chunkset_c.c
index ff9b1cb..5cc6697 100644
--- a/arch/generic/chunkset_c.c
+++ b/arch/generic/chunkset_c.c
@@ -3,6 +3,10 @@
  */
 
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#ifdef CHUNKSET_FALLBACK
+
 #include "zmemory.h"
 
 typedef uint64_t chunk_t;
@@ -38,3 +42,5 @@
 #define INFLATE_FAST     inflate_fast_c
 
 #include "inffast_tpl.h"
+
+#endif /* CHUNKSET_FALLBACK */
diff --git a/arch/generic/compare256_c.c b/arch/generic/compare256_c.c
index a2b4775..e0b0165 100644
--- a/arch/generic/compare256_c.c
+++ b/arch/generic/compare256_c.c
@@ -4,6 +4,10 @@
  */
 
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#ifdef COMPARE256_FALLBACK
+
 #include "zendian.h"
 #include "deflate.h"
 #include "fallback_builtins.h"
@@ -64,7 +68,6 @@
 #  define COMPARE256 compare256_64_static
 #endif
 
-#ifdef WITH_ALL_FALLBACKS
 Z_INTERNAL uint32_t compare256_8(const uint8_t *src0, const uint8_t *src1) {
     return compare256_8_static(src0, src1);
 }
@@ -72,7 +75,6 @@
 Z_INTERNAL uint32_t compare256_64(const uint8_t *src0, const uint8_t *src1) {
     return compare256_64_static(src0, src1);
 }
-#endif
 
 Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
     return COMPARE256(src0, src1);
@@ -86,3 +88,5 @@
 #define LONGEST_MATCH_SLOW
 #define LONGEST_MATCH       longest_match_slow_c
 #include "match_tpl.h"
+
+#endif /* COMPARE256_FALLBACK */
diff --git a/arch/generic/crc32_braid_c.c b/arch/generic/crc32_braid_c.c
index bda4a24..1e83543 100644
--- a/arch/generic/crc32_braid_c.c
+++ b/arch/generic/crc32_braid_c.c
@@ -8,6 +8,11 @@
  */
 
 #include "zbuild.h"
+#include "arch_functions.h"
+
+/* Used by chorba fallback and by arch-specific implementations (s390 vx, risc-v zbc). */
+#ifdef CRC32_BRAID_FALLBACK
+
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
 #include "crc32_p.h"
@@ -211,3 +216,5 @@
     memcpy(dst, src, len);
     return crc;
 }
+
+#endif /* CRC32_BRAID_FALLBACK */
diff --git a/arch/generic/crc32_chorba_c.c b/arch/generic/crc32_chorba_c.c
index 9f8427b..ded968a 100644
--- a/arch/generic/crc32_chorba_c.c
+++ b/arch/generic/crc32_chorba_c.c
@@ -1,5 +1,8 @@
 #include "zbuild.h"
-#include "zendian.h"
+#include "arch_functions.h"
+
+#ifdef CRC32_CHORBA_FALLBACK
+
 #if defined(__EMSCRIPTEN__)
 #  include "zutil_p.h"
 #endif
@@ -7,7 +10,6 @@
 #include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
-#include "generic_functions.h"
 
 /* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */
 #define bitbuffer_size_bytes (16 * 1024 * sizeof(chorba_word_t))
@@ -1032,3 +1034,5 @@
     memcpy(dst, src, len);
     return crc;
 }
+
+#endif /* CRC32_CHORBA_FALLBACK */
diff --git a/arch/generic/generic_functions.h b/arch/generic/generic_functions.h
index c150a2f..0fcca56 100644
--- a/arch/generic/generic_functions.h
+++ b/arch/generic/generic_functions.h
@@ -5,9 +5,6 @@
 #ifndef GENERIC_FUNCTIONS_H_
 #define GENERIC_FUNCTIONS_H_
 
-#include "zendian.h"
-#include "deflate.h"
-
 typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
 typedef uint32_t (*adler32_copy_func)(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1);
@@ -15,50 +12,69 @@
 typedef uint32_t (*crc32_copy_func)(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 typedef void     (*slide_hash_func)(deflate_state *s);
 
-
+#ifdef ADLER32_FALLBACK
 uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
 uint32_t adler32_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
-
+#endif
+#ifdef CHUNKSET_FALLBACK
 uint8_t* chunkmemset_safe_c(uint8_t *out, uint8_t *from, size_t len, size_t left);
-
-#ifdef WITH_ALL_FALLBACKS
+#endif
+#ifdef COMPARE256_FALLBACK
 uint32_t compare256_8(const uint8_t *src0, const uint8_t *src1);
 uint32_t compare256_64(const uint8_t *src0, const uint8_t *src1);
-#endif
 uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
+#endif
 
+#ifdef CRC32_BRAID_FALLBACK
 uint32_t crc32_braid(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_braid(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
+#endif
 
-#ifndef WITHOUT_CHORBA
+/* Chorba is available whenever braid is needed as a fallback and hasn't been disabled. */
+#if defined(CRC32_BRAID_FALLBACK) && !defined(WITHOUT_CHORBA)
+#  define CRC32_CHORBA_FALLBACK
+#endif
+
+#ifdef CRC32_CHORBA_FALLBACK
   uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len);
   uint32_t crc32_copy_chorba(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
-
+#ifdef CHUNKSET_FALLBACK
 void     inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
-
+#endif
+#ifdef COMPARE256_FALLBACK
 uint32_t longest_match_c(deflate_state *const s, uint32_t cur_match);
 uint32_t longest_match_slow_c(deflate_state *const s, uint32_t cur_match);
-
+#endif
+#ifdef SLIDE_HASH_FALLBACK
 void     slide_hash_c(deflate_state *s);
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
-// Generic code
-#  define native_adler32 adler32_c
-#  define native_adler32_copy adler32_copy_c
-#  define native_chunkmemset_safe chunkmemset_safe_c
-#ifndef WITHOUT_CHORBA
-#  define native_crc32 crc32_chorba
-#  define native_crc32_copy crc32_copy_chorba
-#else
-#  define native_crc32 crc32_braid
-#  define native_crc32_copy crc32_copy_braid
-#endif
-#  define native_inflate_fast inflate_fast_c
-#  define native_slide_hash slide_hash_c
-#  define native_longest_match longest_match_c
-#  define native_longest_match_slow longest_match_slow_c
-#  define native_compare256 compare256_c
+// Generic fallbacks when no native implementation exists
+#  ifdef ADLER32_FALLBACK
+#    define native_adler32 adler32_c
+#    define native_adler32_copy adler32_copy_c
+#  endif
+#  ifdef CHUNKSET_FALLBACK
+#    define native_chunkmemset_safe chunkmemset_safe_c
+#    define native_inflate_fast inflate_fast_c
+#  endif
+#  ifdef COMPARE256_FALLBACK
+#    define native_compare256 compare256_c
+#    define native_longest_match longest_match_c
+#    define native_longest_match_slow longest_match_slow_c
+#  endif
+#  ifdef CRC32_CHORBA_FALLBACK
+#    define native_crc32 crc32_chorba
+#    define native_crc32_copy crc32_copy_chorba
+#  elif defined(CRC32_BRAID_FALLBACK)
+#    define native_crc32 crc32_braid
+#    define native_crc32_copy crc32_copy_braid
+#  endif
+#  ifdef SLIDE_HASH_FALLBACK
+#    define native_slide_hash slide_hash_c
+#  endif
 #endif
 
 #endif
diff --git a/arch/generic/slide_hash_c.c b/arch/generic/slide_hash_c.c
index 8345b9e..8fdc478 100644
--- a/arch/generic/slide_hash_c.c
+++ b/arch/generic/slide_hash_c.c
@@ -5,6 +5,10 @@
  */
 
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#ifdef SLIDE_HASH_FALLBACK
+
 #include "deflate.h"
 
 /* ===========================================================================
@@ -50,3 +54,5 @@
     slide_hash_c_chain(s->head, HASH_SIZE, wsize);
     slide_hash_c_chain(s->prev, wsize, wsize);
 }
+
+#endif /* SLIDE_HASH_FALLBACK */
diff --git a/arch/loongarch/loongarch_functions.h b/arch/loongarch/loongarch_functions.h
index 0ec8bd6..980ebca 100644
--- a/arch/loongarch/loongarch_functions.h
+++ b/arch/loongarch/loongarch_functions.h
@@ -15,6 +15,10 @@
 uint32_t crc32_copy_loongarch64(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 
+#ifndef LOONGARCH_CRC_NATIVE
+#  define CRC32_BRAID_FALLBACK
+#endif
+
 #ifdef LOONGARCH_LSX
 uint32_t adler32_lsx(uint32_t adler, const uint8_t *src, size_t len);
 uint32_t adler32_copy_lsx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
@@ -26,6 +30,13 @@
 void slide_hash_lsx(deflate_state *s);
 #endif
 
+#ifndef LOONGARCH_LSX_NATIVE
+#  define ADLER32_FALLBACK
+#  define CHUNKSET_FALLBACK
+#  define COMPARE256_FALLBACK
+#  define SLIDE_HASH_FALLBACK
+#endif
+
 #ifdef LOONGARCH_LASX
 uint32_t adler32_lasx(uint32_t adler, const uint8_t *src, size_t len);
 uint32_t adler32_copy_lasx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
diff --git a/arch/power/power_functions.h b/arch/power/power_functions.h
index ccc7754..78bae4a 100644
--- a/arch/power/power_functions.h
+++ b/arch/power/power_functions.h
@@ -25,12 +25,28 @@
 void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
 #endif
 
+#if !defined(PPC_VMX_NATIVE) && !defined(POWER8_VSX_NATIVE)
+#  define ADLER32_FALLBACK
+#  define SLIDE_HASH_FALLBACK
+#endif
+
+#ifndef POWER8_VSX_NATIVE
+#  define CHUNKSET_FALLBACK
+#endif
+#ifndef POWER8_VSX_CRC32_NATIVE
+#  define CRC32_BRAID_FALLBACK
+#endif
+
 #ifdef POWER9
 uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
 uint32_t longest_match_power9(deflate_state *const s, uint32_t cur_match);
 uint32_t longest_match_slow_power9(deflate_state *const s, uint32_t cur_match);
 #endif
 
+#ifndef POWER9_NATIVE
+#  define COMPARE256_FALLBACK
+#endif
+
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 // Power - VMX
 #  ifdef PPC_VMX_NATIVE
diff --git a/arch/riscv/riscv_functions.h b/arch/riscv/riscv_functions.h
index 89120ff..22f783c 100644
--- a/arch/riscv/riscv_functions.h
+++ b/arch/riscv/riscv_functions.h
@@ -11,6 +11,8 @@
 
 #include "riscv_natives.h"
 
+#define CRC32_BRAID_FALLBACK  /* used by crc32_zbc */
+
 #ifdef RISCV_RVV
 uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
 uint32_t adler32_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
@@ -23,6 +25,13 @@
 void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
 #endif
 
+#ifndef RISCV_RVV_NATIVE
+#  define ADLER32_FALLBACK
+#  define CHUNKSET_FALLBACK
+#  define COMPARE256_FALLBACK
+#  define SLIDE_HASH_FALLBACK
+#endif
+
 #ifdef RISCV_CRC32_ZBC
 uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_riscv64_zbc(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
diff --git a/arch/s390/s390_functions.h b/arch/s390/s390_functions.h
index d5308c8..c0043a6 100644
--- a/arch/s390/s390_functions.h
+++ b/arch/s390/s390_functions.h
@@ -7,6 +7,15 @@
 
 #include "s390_natives.h"
 
+#define ADLER32_FALLBACK
+#define CHUNKSET_FALLBACK
+#define COMPARE256_FALLBACK
+#define CRC32_BRAID_FALLBACK  /* used by crc32_s390_vx */
+
+#ifndef S390_VX_NATIVE
+#  define SLIDE_HASH_FALLBACK
+#endif
+
 #ifdef S390_VX
 uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
 uint32_t crc32_copy_s390_vx(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
diff --git a/arch/x86/crc32_chorba_sse2.c b/arch/x86/crc32_chorba_sse2.c
index 93ec5d5..5fbdbc6 100644
--- a/arch/x86/crc32_chorba_sse2.c
+++ b/arch/x86/crc32_chorba_sse2.c
@@ -1,12 +1,13 @@
-#if defined(X86_SSE2) && !defined(WITHOUT_CHORBA_SSE)
-
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#if defined(X86_SSE2) && !defined(WITHOUT_CHORBA_SSE) && defined(CRC32_CHORBA_FALLBACK)
+
 #include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
 #include <emmintrin.h>
 #include "arch/x86/x86_intrins.h"
-#include "arch_functions.h"
 
 #define LSHIFT_QWORD(x)     _mm_unpacklo_epi64(_mm_setzero_si128(), (x))
 #define RSHIFT_QWORD(x)     _mm_unpackhi_epi64((x), _mm_setzero_si128())
diff --git a/arch/x86/crc32_chorba_sse41.c b/arch/x86/crc32_chorba_sse41.c
index a137c7a..d8cadc9 100644
--- a/arch/x86/crc32_chorba_sse41.c
+++ b/arch/x86/crc32_chorba_sse41.c
@@ -1,13 +1,14 @@
-#if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE)
-
 #include "zbuild.h"
+#include "arch_functions.h"
+
+#if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE) && defined(CRC32_CHORBA_FALLBACK)
+
 #include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
 #include <emmintrin.h>
 #include <smmintrin.h>
 #include "arch/x86/x86_intrins.h"
-#include "arch_functions.h"
 
 #define READ_NEXT(in, off, a, b) \
     do { \
diff --git a/arch/x86/x86_functions.h b/arch/x86/x86_functions.h
index 881c6ef..0bcbdae 100644
--- a/arch/x86/x86_functions.h
+++ b/arch/x86/x86_functions.h
@@ -24,13 +24,19 @@
 uint32_t longest_match_slow_sse2(deflate_state *const s, uint32_t cur_match);
 void slide_hash_sse2(deflate_state *s);
 
-#  if !defined(WITHOUT_CHORBA_SSE)
+#  if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
     uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len);
     uint32_t crc32_copy_chorba_sse2(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
     uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint8_t *buf, size_t len);
 #  endif
 #endif
 
+#ifndef X86_SSE2_NATIVE
+#  define CHUNKSET_FALLBACK
+#  define COMPARE256_FALLBACK
+#  define SLIDE_HASH_FALLBACK
+#endif
+
 #ifdef X86_SSSE3
 uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
 uint32_t adler32_copy_ssse3(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
@@ -38,9 +44,15 @@
 void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
 #endif
 
-#if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE)
+#ifndef X86_SSSE3_NATIVE
+#  define ADLER32_FALLBACK
+#endif
+
+#if defined(X86_SSE41)
+#  if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
     uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len);
     uint32_t crc32_copy_chorba_sse41(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
+#  endif
 #endif
 
 #ifdef X86_SSE42
@@ -84,6 +96,10 @@
 uint32_t crc32_copy_vpclmulqdq_avx512(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 
+#if !defined(X86_PCLMULQDQ_NATIVE) && !defined(X86_VPCLMULQDQ_NATIVE)
+#  define CRC32_BRAID_FALLBACK
+#endif
+
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 // X86 - SSE2
 #  ifdef X86_SSE2_NATIVE
@@ -97,7 +113,7 @@
 #    define native_longest_match longest_match_sse2
 #    undef native_longest_match_slow
 #    define native_longest_match_slow longest_match_slow_sse2
-#    if !defined(WITHOUT_CHORBA_SSE)
+#    if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
 #      undef native_crc32
 #      define native_crc32 crc32_chorba_sse2
 #      undef native_crc32_copy
@@ -118,11 +134,13 @@
 #    define native_inflate_fast inflate_fast_ssse3
 #  endif
 // X86 - SSE4.1
-#  if defined(X86_SSE41_NATIVE) && !defined(WITHOUT_CHORBA_SSE)
-#    undef native_crc32
-#    define native_crc32 crc32_chorba_sse41
-#    undef native_crc32_copy
-#    define native_crc32_copy crc32_copy_chorba_sse41
+#  if defined(X86_SSE41_NATIVE)
+#    if !defined(WITHOUT_CHORBA) && !defined(WITHOUT_CHORBA_SSE)
+#      undef native_crc32
+#      define native_crc32 crc32_chorba_sse41
+#      undef native_crc32_copy
+#      define native_crc32_copy crc32_copy_chorba_sse41
+#    endif
 #  endif
 // X86 - SSE4.2
 #  ifdef X86_SSE42_NATIVE
diff --git a/arch_functions.h b/arch_functions.h
index 979c968..d5b152e 100644
--- a/arch_functions.h
+++ b/arch_functions.h
@@ -11,8 +11,6 @@
 #include "deflate.h"
 #include "fallback_builtins.h"
 
-#include "arch/generic/generic_functions.h"
-
 #if defined(X86_FEATURES)
 #  include "arch/x86/x86_functions.h"
 #elif defined(ARM_FEATURES)
@@ -25,6 +23,34 @@
 #  include "arch/riscv/riscv_functions.h"
 #elif defined(LOONGARCH_FEATURES)
 #  include "arch/loongarch/loongarch_functions.h"
+#else
+/* No architecture detected - all fallbacks needed */
+#  ifndef WITH_ALL_FALLBACKS
+#    define WITH_ALL_FALLBACKS
+#  endif
 #endif
 
+#ifdef WITH_ALL_FALLBACKS
+#  ifndef ADLER32_FALLBACK
+#    define ADLER32_FALLBACK
+#  endif
+#  ifndef CHUNKSET_FALLBACK
+#    define CHUNKSET_FALLBACK
+#  endif
+#  ifndef COMPARE256_FALLBACK
+#    define COMPARE256_FALLBACK
+#  endif
+#  ifndef CRC32_BRAID_FALLBACK
+#    define CRC32_BRAID_FALLBACK
+#  endif
+#  if !defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA)
+#    define CRC32_CHORBA_FALLBACK
+#  endif
+#  ifndef SLIDE_HASH_FALLBACK
+#    define SLIDE_HASH_FALLBACK
+#  endif
+#endif
+
+#include "arch/generic/generic_functions.h"
+
 #endif
diff --git a/functable.c b/functable.c
index fad863e..4064c69 100644
--- a/functable.c
+++ b/functable.c
@@ -75,60 +75,25 @@
     cpu_check_features(&cf);
     ft.force_init = &force_init_empty;
 
-    // Set up generic C code fallbacks
-#ifndef WITH_ALL_FALLBACKS
     // Only use necessary generic functions when no suitable simd versions are available.
-#  ifdef X86_SSE2_NATIVE
-    // x86_64 always has SSE2
+#ifdef ADLER32_FALLBACK
     ft.adler32 = &adler32_c;
     ft.adler32_copy = &adler32_copy_c;
-    ft.crc32 = &crc32_braid;
-    ft.crc32_copy = &crc32_copy_braid;
-#  elif defined(ARM_NEON_NATIVE)
-#    ifndef ARM_CRC32_NATIVE
-    ft.crc32 = &crc32_braid;
-    ft.crc32_copy = &crc32_copy_braid;
-#    endif
-#  elif defined(POWER8_VSX_NATIVE)
-#    ifndef POWER9_NATIVE
-    ft.compare256 = &compare256_c;
-    ft.longest_match = &longest_match_c;
-    ft.longest_match_slow = &longest_match_slow_c;
-#    endif
-#    ifndef POWER8_VSX_CRC32_NATIVE
-    ft.crc32 = &crc32_braid;
-    ft.crc32_copy = &crc32_copy_braid;
-#    endif
-#  elif defined(LOONGARCH_LSX_NATIVE)
-#    ifndef LOONGARCH_CRC
-    ft.crc32 = &crc32_braid;
-    ft.crc32_copy = &crc32_copy_braid;
-#    endif
-#  elif defined(RISCV_RVV_NATIVE)
-#    ifndef RISCV_ZBC_NATIVE
-    ft.crc32 = &crc32_braid;
-    ft.crc32_copy = &crc32_copy_braid;
-#    endif
-#  elif defined(S390_VX_NATIVE)
-    ft.adler32 = &adler32_c;
-    ft.adler32_copy = &adler32_copy_c;
+#endif
+#ifdef CHUNKSET_FALLBACK
     ft.chunkmemset_safe = &chunkmemset_safe_c;
-    ft.compare256 = &compare256_c;
     ft.inflate_fast = &inflate_fast_c;
+#endif
+#ifdef COMPARE256_FALLBACK
+    ft.compare256 = &compare256_c;
     ft.longest_match = &longest_match_c;
     ft.longest_match_slow = &longest_match_slow_c;
-    ft.slide_hash = &slide_hash_c;
-#  endif
-#else // WITH_ALL_FALLBACKS
-    ft.adler32 = &adler32_c;
-    ft.adler32_copy = &adler32_copy_c;
-    ft.chunkmemset_safe = &chunkmemset_safe_c;
-    ft.compare256 = &compare256_c;
+#endif
+#ifdef CRC32_BRAID_FALLBACK
     ft.crc32 = &crc32_braid;
     ft.crc32_copy = &crc32_copy_braid;
-    ft.inflate_fast = &inflate_fast_c;
-    ft.longest_match = &longest_match_c;
-    ft.longest_match_slow = &longest_match_slow_c;
+#endif
+#ifdef SLIDE_HASH_FALLBACK
     ft.slide_hash = &slide_hash_c;
 #endif
 
@@ -136,7 +101,7 @@
 #ifdef WITH_OPTIM
 
     // Chorba generic C fallback
-#ifndef WITHOUT_CHORBA
+#ifdef CRC32_CHORBA_FALLBACK
     ft.crc32 = &crc32_chorba;
     ft.crc32_copy = &crc32_copy_chorba;
 #endif
diff --git a/test/benchmarks/benchmark_adler32.cc b/test/benchmarks/benchmark_adler32.cc
index 5ee9102..6916af7 100644
--- a/test/benchmarks/benchmark_adler32.cc
+++ b/test/benchmarks/benchmark_adler32.cc
@@ -77,7 +77,9 @@
     BENCHMARK_ADLER32_MISALIGNED(name, hashfunc, support_flag); \
     BENCHMARK_ADLER32_ALIGNED(name, hashfunc, support_flag);
 
+#ifdef ADLER32_FALLBACK
 BENCHMARK_ADLER32(c, adler32_c, 1);
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 BENCHMARK_ADLER32(native, native_adler32, 1);
diff --git a/test/benchmarks/benchmark_adler32_copy.cc b/test/benchmarks/benchmark_adler32_copy.cc
index 6d913b1..ff6a1b0 100644
--- a/test/benchmarks/benchmark_adler32_copy.cc
+++ b/test/benchmarks/benchmark_adler32_copy.cc
@@ -128,7 +128,9 @@
     BENCHMARK_ADLER32_COPY_ONLY(name, copyfunc, support_flag)
 #endif
 
+#ifdef ADLER32_FALLBACK
 BENCHMARK_ADLER32_COPY(c, adler32_c, adler32_copy_c, 1);
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 BENCHMARK_ADLER32_COPY(native, native_adler32, native_adler32_copy, 1);
diff --git a/test/benchmarks/benchmark_compare256.cc b/test/benchmarks/benchmark_compare256.cc
index 2d83528..88929bf 100644
--- a/test/benchmarks/benchmark_compare256.cc
+++ b/test/benchmarks/benchmark_compare256.cc
@@ -73,7 +73,7 @@
 BENCHMARK_COMPARE256(native, native_compare256, 1);
 #else
 
-#ifdef WITH_ALL_FALLBACKS
+#ifdef COMPARE256_FALLBACK
 BENCHMARK_COMPARE256(8, compare256_8, 1);
 BENCHMARK_COMPARE256(64, compare256_64, 1);
 #endif
diff --git a/test/benchmarks/benchmark_crc32.cc b/test/benchmarks/benchmark_crc32.cc
index 772dbfd..b95f952 100644
--- a/test/benchmarks/benchmark_crc32.cc
+++ b/test/benchmarks/benchmark_crc32.cc
@@ -77,16 +77,18 @@
     BENCHMARK_CRC32_MISALIGNED(name, hashfunc, support_flag); \
     BENCHMARK_CRC32_ALIGNED(name, hashfunc, support_flag);
 
+#ifdef CRC32_BRAID_FALLBACK
 BENCHMARK_CRC32(braid, crc32_braid, 1);
+#endif
+#ifdef CRC32_CHORBA_FALLBACK
+BENCHMARK_CRC32(chorba_c, crc32_chorba, 1);
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 BENCHMARK_CRC32(native, native_crc32, 1);
 #else
 
-#ifndef WITHOUT_CHORBA
-BENCHMARK_CRC32(chorba_c, crc32_chorba, 1);
-#endif
-#ifndef WITHOUT_CHORBA_SSE
+#if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
 #   ifdef X86_SSE2
     BENCHMARK_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2);
 #   endif
diff --git a/test/benchmarks/benchmark_crc32_copy.cc b/test/benchmarks/benchmark_crc32_copy.cc
index b0f0704..2df1f57 100644
--- a/test/benchmarks/benchmark_crc32_copy.cc
+++ b/test/benchmarks/benchmark_crc32_copy.cc
@@ -128,17 +128,19 @@
 #endif
 
 // Base test
+#ifdef CRC32_BRAID_FALLBACK
 BENCHMARK_CRC32_COPY(braid, crc32_braid, crc32_copy_braid, 1);
+#endif
+#ifdef CRC32_CHORBA_FALLBACK
+BENCHMARK_CRC32_COPY(chorba, crc32_chorba, crc32_copy_chorba, 1)
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
     // Native
     BENCHMARK_CRC32_COPY(native, native_crc32, native_crc32_copy, 1)
 #else
     // Optimized functions
-#  ifndef WITHOUT_CHORBA
-    BENCHMARK_CRC32_COPY(chorba, crc32_chorba, crc32_copy_chorba, 1)
-#  endif
-#  ifndef WITHOUT_CHORBA_SSE
+#  if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
 #    ifdef X86_SSE2
     BENCHMARK_CRC32_COPY(chorba_sse2, crc32_chorba_sse2, crc32_copy_chorba_sse2, test_cpu_features.x86.has_sse2);
 #    endif
diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc
index 6e8f455..34c8fbe 100644
--- a/test/benchmarks/benchmark_slidehash.cc
+++ b/test/benchmarks/benchmark_slidehash.cc
@@ -77,7 +77,7 @@
     } \
     BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(512, MAX_RANDOM_INTS);
 
-#if defined(WITH_ALL_FALLBACKS) || !(defined(__x86_64__) || defined(_M_X64))
+#ifdef SLIDE_HASH_FALLBACK
 BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
 #endif
 
diff --git a/test/test_adler32.cc b/test/test_adler32.cc
index c461f93..7fe8bd6 100644
--- a/test/test_adler32.cc
+++ b/test/test_adler32.cc
@@ -36,7 +36,9 @@
         hash(GetParam(), func); \
     }
 
+#ifdef ADLER32_FALLBACK
 TEST_ADLER32(c, adler32_c, 1)
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 TEST_ADLER32(native, native_adler32, 1)
diff --git a/test/test_adler32_copy.cc b/test/test_adler32_copy.cc
index 725d86a..47b6341 100644
--- a/test/test_adler32_copy.cc
+++ b/test/test_adler32_copy.cc
@@ -40,7 +40,9 @@
     }
 
 // Base test
+#ifdef ADLER32_FALLBACK
 TEST_ADLER32_COPY(c, adler32_copy_c, 1)
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
     // Native test
diff --git a/test/test_compare256.cc b/test/test_compare256.cc
index b3efe79..9978f9a 100644
--- a/test/test_compare256.cc
+++ b/test/test_compare256.cc
@@ -63,7 +63,7 @@
 TEST_COMPARE256(native, native_compare256, 1)
 #else
 
-#ifdef WITH_ALL_FALLBACKS
+#ifdef COMPARE256_FALLBACK
 TEST_COMPARE256(8, compare256_8, 1)
 TEST_COMPARE256(64, compare256_64, 1)
 #endif
diff --git a/test/test_crc32.cc b/test/test_crc32.cc
index 19eb439..3da7a34 100644
--- a/test/test_crc32.cc
+++ b/test/test_crc32.cc
@@ -77,7 +77,12 @@
         hash(func); \
     }
 
+#ifdef CRC32_BRAID_FALLBACK
 TEST_CRC32(braid, crc32_braid, 1)
+#endif
+#ifdef CRC32_CHORBA_FALLBACK
+TEST_CRC32(chorba_c, crc32_chorba, 1)
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 TEST_CRC32(native, native_crc32, 1)
@@ -99,9 +104,6 @@
     }
 #endif
 
-#ifndef WITHOUT_CHORBA
-TEST_CRC32(chorba_c, crc32_chorba, 1)
-#endif
 #ifdef ARM_CRC32
 INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_offsets));
 TEST_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32)
@@ -129,7 +131,7 @@
 #ifdef X86_VPCLMULQDQ_AVX512
 TEST_CRC32(vpclmulqdq_avx512, crc32_vpclmulqdq_avx512, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq))
 #endif
-#ifndef WITHOUT_CHORBA_SSE
+#if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
 #   ifdef X86_SSE2
     TEST_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2)
 #   endif
diff --git a/test/test_crc32_copy.cc b/test/test_crc32_copy.cc
index 12b2be7..9edc8f6 100644
--- a/test/test_crc32_copy.cc
+++ b/test/test_crc32_copy.cc
@@ -40,17 +40,19 @@
     }
 
 // Base test
+#ifdef CRC32_BRAID_FALLBACK
 TEST_CRC32_COPY(braid, crc32_copy_braid, 1)
+#endif
+#ifdef CRC32_CHORBA_FALLBACK
+TEST_CRC32_COPY(chorba, crc32_copy_chorba, 1)
+#endif
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
     // Native test
     TEST_CRC32_COPY(native, native_crc32_copy, 1)
 #else
     // Optimized functions
-#  ifndef WITHOUT_CHORBA
-    TEST_CRC32_COPY(chorba, crc32_copy_chorba, 1)
-#  endif
-#  ifndef WITHOUT_CHORBA_SSE
+#  if defined(CRC32_CHORBA_FALLBACK) && !defined(WITHOUT_CHORBA_SSE)
 #    ifdef X86_SSE2
     TEST_CRC32_COPY(chorba_sse2, crc32_copy_chorba_sse2, test_cpu_features.x86.has_sse2)
 #    endif