diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index a0f3bc57..02f98272 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -81,11 +81,11 @@ jobs: build-src-dir: ../zlib-ng/test/add-subdirectory-project readonly-project-dir: true - - name: Ubuntu GCC -O1 No Unaligned UBSAN + - name: Ubuntu GCC -O1 UBSAN os: ubuntu-latest compiler: gcc cxx-compiler: g++ - cmake-args: -DWITH_UNALIGNED=OFF -DWITH_SANITIZER=Undefined + cmake-args: -DWITH_SANITIZER=Undefined codecov: ubuntu_gcc_o1 cflags: -O1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 11e9ae55..01edc153 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,7 +93,6 @@ option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF) option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF) option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF) option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF) -option(WITH_UNALIGNED "Support unaligned reads on platforms that support it" ON) set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols. Useful when embedding into a larger library. @@ -147,7 +146,6 @@ mark_as_advanced(FORCE WITH_RVV WITH_INFLATE_STRICT WITH_INFLATE_ALLOW_INVALID_DIST - WITH_UNALIGNED INSTALL_UTILS ) @@ -336,12 +334,6 @@ if(NOT WITH_NATIVE_INSTRUCTIONS) endforeach() endif() -# Set architecture alignment requirements -if(NOT WITH_UNALIGNED) - add_definitions(-DNO_UNALIGNED) - message(STATUS "Unaligned reads manually disabled") -endif() - # Apply warning compiler flags if(WITH_MAINTAINER_WARNINGS) add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE}) diff --git a/README.md b/README.md index 411621b5..28aad7f1 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Features * Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV * Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX * Support for hardware-accelerated deflate using IBM Z DFLTCC -* Unaligned memory read/writes and large bit buffer improvements +* Safe unaligned memory read/writes and large bit buffer improvements * Includes improvements from Cloudflare and Intel forks * Configure, CMake, and NMake build system support * Comprehensive set of CMake unit tests @@ -213,7 +213,6 @@ Advanced Build Options | WITH_CRC32_VX | --without-crc32-vx | Build with vectorized CRC32 on IBM Z | ON | | WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Build with DFLTCC intrinsics for compression on IBM Z | OFF | | WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Build with DFLTCC intrinsics for decompression on IBM Z | OFF | -| WITH_UNALIGNED | --without-unaligned | Allow optimizations that use unaligned reads if safe on current arch| ON | | WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF | | WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF | | INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF | diff --git a/arch/generic/compare256_c.c b/arch/generic/compare256_c.c index 0c12cb3a..3704c2f6 100644 --- a/arch/generic/compare256_c.c +++ b/arch/generic/compare256_c.c @@ -57,7 +57,7 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) { #include "match_tpl.h" -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN +#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 /* 16-bit unaligned integer comparison */ static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) { uint32_t len = 0; @@ -138,8 +138,8 @@ Z_INTERNAL uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t * #endif -#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) -/* UNALIGNED64_OK, 64-bit integer comparison */ +#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 +/* 64-bit integer comparison */ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const uint8_t *src1) { uint32_t len = 0; diff --git a/arch/generic/generic_functions.h b/arch/generic/generic_functions.h index e243f326..eaba70c3 100644 --- a/arch/generic/generic_functions.h +++ b/arch/generic/generic_functions.h @@ -28,13 +28,13 @@ void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start); uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1); -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN -uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1); +#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 + uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1); # ifdef HAVE_BUILTIN_CTZ - uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1); + uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1); # endif -# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) - uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1); +# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 + uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1); # endif #endif @@ -43,29 +43,24 @@ typedef void (*slide_hash_func)(deflate_state *s); void slide_hash_c(deflate_state *s); uint32_t longest_match_c(deflate_state *const s, Pos cur_match); -# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN - uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); -# ifdef HAVE_BUILTIN_CTZ - uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match); -# endif -# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) - uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match); -# endif -# endif - uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match); -# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN +#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 + uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match); - uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match); -# ifdef UNALIGNED64_OK - uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match); -# endif +# ifdef HAVE_BUILTIN_CTZ + uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match); + uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match); # endif +# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 + uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match); + uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match); +# endif +#endif // Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions. -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN -# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 +# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 # define longest_match_generic longest_match_unaligned_64 # define longest_match_slow_generic longest_match_slow_unaligned_64 # define compare256_generic compare256_unaligned_64 diff --git a/chunkset_tpl.h b/chunkset_tpl.h index 5d4cacbd..383b4d8f 100644 --- a/chunkset_tpl.h +++ b/chunkset_tpl.h @@ -227,17 +227,15 @@ rem_bytes: } Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, uint8_t *from, unsigned len, unsigned left) { -#if !defined(UNALIGNED64_OK) -# if !defined(UNALIGNED_OK) +#if OPTIMAL_CMP < 32 static const uint32_t align_mask = 7; -# else +#elif OPTIMAL_CMP == 32 static const uint32_t align_mask = 3; -# endif #endif len = MIN(len, left); -#if !defined(UNALIGNED64_OK) +#if OPTIMAL_CMP < 64 while (((uintptr_t)out & align_mask) && (len > 0)) { *out++ = *from++; --len; diff --git a/cmake/detect-sanitizer.cmake b/cmake/detect-sanitizer.cmake index f9521ec2..b71c1a37 100644 --- a/cmake/detect-sanitizer.cmake +++ b/cmake/detect-sanitizer.cmake @@ -111,6 +111,7 @@ endmacro() macro(add_undefined_sanitizer) set(known_checks + alignment array-bounds bool bounds @@ -137,10 +138,6 @@ macro(add_undefined_sanitizer) vptr ) - # Only check for alignment sanitizer flag if unaligned access is not supported - if(NOT WITH_UNALIGNED) - list(APPEND known_checks alignment) - endif() # Object size sanitizer has no effect at -O0 and produces compiler warning if enabled if(NOT CMAKE_C_FLAGS MATCHES "-O0") list(APPEND known_checks object-size) @@ -153,12 +150,6 @@ macro(add_undefined_sanitizer) add_compile_options(-fsanitize=${supported_checks}) add_link_options(-fsanitize=${supported_checks}) - # Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if - # it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing. - if(WITH_UNALIGNED) - add_compile_options(-fno-sanitize=alignment) - endif() - add_common_sanitizer_flags() else() message(STATUS "Undefined behavior sanitizer is not supported") diff --git a/compare256_rle.h b/compare256_rle.h index 0f3998d4..ccfbeba2 100644 --- a/compare256_rle.h +++ b/compare256_rle.h @@ -42,7 +42,7 @@ static inline uint32_t compare256_rle_c(const uint8_t *src0, const uint8_t *src1 return 256; } -#ifdef UNALIGNED_OK +#if OPTIMAL_CMP >= 32 /* 16-bit unaligned integer comparison */ static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) { uint32_t len = 0; @@ -100,7 +100,7 @@ static inline uint32_t compare256_rle_unaligned_32(const uint8_t *src0, const ui #endif -#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 /* 64-bit unaligned integer comparison */ static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const uint8_t *src1) { uint32_t src0_cmp32, len = 0; diff --git a/configure b/configure index e6270ae0..5cd8163a 100755 --- a/configure +++ b/configure @@ -87,7 +87,6 @@ mandir=${mandir-'${prefix}/share/man'} shared_ext='.so' shared=1 gzfileops=1 -unalignedok=1 compat=0 cover=0 build32=0 @@ -164,7 +163,6 @@ case "$1" in echo ' [--warn] Enables extra compiler warnings' | tee -a configure.log echo ' [--debug] Enables extra debug prints during operation' | tee -a configure.log echo ' [--zlib-compat] Compiles for zlib-compatible API instead of zlib-ng API' | tee -a configure.log - echo ' [--without-unaligned] Compiles without fast unaligned access' | tee -a configure.log echo ' [--without-gzfileops] Compiles without the gzfile parts of the API enabled' | tee -a configure.log echo ' [--without-optimizations] Compiles without support for optional instruction sets' | tee -a configure.log echo ' [--without-new-strategies] Compiles without using new additional deflate strategies' | tee -a configure.log @@ -195,7 +193,6 @@ case "$1" in -s* | --shared | --enable-shared) shared=1; shift ;; -t | --static) shared=0; shift ;; --zlib-compat) compat=1; shift ;; - --without-unaligned) unalignedok=0; shift ;; --without-gzfileops) gzfileops=0; shift ;; --cover) cover=1; shift ;; -3* | --32) build32=1; shift ;; @@ -876,13 +873,6 @@ else PIC_TESTOBJG="\$(OBJG)" fi -# set architecture alignment requirements -if test $unalignedok -eq 0; then - CFLAGS="${CFLAGS} -DNO_UNALIGNED" - SFLAGS="${SFLAGS} -DNO_UNALIGNED" - echo "Unaligned reads manually disabled." | tee -a configure.log -fi - # enable reduced memory configuration if test $reducedmem -eq 1; then echo "Configuring for reduced memory environment." | tee -a configure.log diff --git a/deflate_rle.c b/deflate_rle.c index e8e501b1..551fe02a 100644 --- a/deflate_rle.c +++ b/deflate_rle.c @@ -10,8 +10,8 @@ #include "deflate_p.h" #include "functable.h" -#ifdef UNALIGNED_OK -# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +#if OPTIMAL_CMP >= 32 +# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 # define compare256_rle compare256_rle_unaligned_64 # elif defined(HAVE_BUILTIN_CTZ) # define compare256_rle compare256_rle_unaligned_32 diff --git a/insert_string_tpl.h b/insert_string_tpl.h index a5685c4e..e7037c04 100644 --- a/insert_string_tpl.h +++ b/insert_string_tpl.h @@ -29,21 +29,13 @@ # define HASH_CALC_MASK HASH_MASK #endif #ifndef HASH_CALC_READ -# ifdef UNALIGNED_OK -# if BYTE_ORDER == LITTLE_ENDIAN -# define HASH_CALC_READ \ - memcpy(&val, strstart, sizeof(val)); -# else -# define HASH_CALC_READ \ - memcpy(&val, strstart, sizeof(val)); \ - val = ZSWAP32(val); -# endif +# if BYTE_ORDER == LITTLE_ENDIAN +# define HASH_CALC_READ \ + memcpy(&val, strstart, sizeof(val)); # else # define HASH_CALC_READ \ - val = ((uint32_t)(strstart[0])); \ - val |= ((uint32_t)(strstart[1]) << 8); \ - val |= ((uint32_t)(strstart[2]) << 16); \ - val |= ((uint32_t)(strstart[3]) << 24); + memcpy(&val, strstart, sizeof(val)); \ + val = ZSWAP32(val); # endif #endif diff --git a/match_tpl.h b/match_tpl.h index 9c258242..f44da750 100644 --- a/match_tpl.h +++ b/match_tpl.h @@ -40,7 +40,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { uint32_t chain_length, nice_match, best_len, offset; uint32_t lookahead = s->lookahead; Pos match_offset = 0; -#ifdef UNALIGNED_OK +#if OPTIMAL_CMP >= 32 uint8_t scan_start[8]; #endif uint8_t scan_end[8]; @@ -59,20 +59,20 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { * to find the next best match length. */ offset = best_len-1; -#ifdef UNALIGNED_OK +#if OPTIMAL_CMP >= 32 if (best_len >= sizeof(uint32_t)) { offset -= 2; -#ifdef UNALIGNED64_OK +#if OPTIMAL_CMP >= 64 if (best_len >= sizeof(uint64_t)) offset -= 4; #endif } #endif -#ifdef UNALIGNED64_OK +#if OPTIMAL_CMP >= 64 memcpy(scan_start, scan, sizeof(uint64_t)); memcpy(scan_end, scan+offset, sizeof(uint64_t)); -#elif defined(UNALIGNED_OK) +#elif OPTIMAL_CMP >= 32 memcpy(scan_start, scan, sizeof(uint32_t)); memcpy(scan_end, scan+offset, sizeof(uint32_t)); #else @@ -138,7 +138,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { * that depend on those values. However the length of the match is limited to the * lookahead, so the output of deflate is not affected by the uninitialized values. */ -#ifdef UNALIGNED_OK +#if OPTIMAL_CMP >= 32 if (best_len < sizeof(uint32_t)) { for (;;) { if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 && @@ -146,7 +146,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { break; GOTO_NEXT_CHAIN; } -# ifdef UNALIGNED64_OK +# if OPTIMAL_CMP >= 64 } else if (best_len >= sizeof(uint64_t)) { for (;;) { if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 && @@ -186,19 +186,19 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { return best_len; offset = best_len-1; -#ifdef UNALIGNED_OK +#if OPTIMAL_CMP >= 32 if (best_len >= sizeof(uint32_t)) { offset -= 2; -#ifdef UNALIGNED64_OK +#if OPTIMAL_CMP >= 64 if (best_len >= sizeof(uint64_t)) offset -= 4; #endif } #endif -#ifdef UNALIGNED64_OK +#if OPTIMAL_CMP >= 64 memcpy(scan_end, scan+offset, sizeof(uint64_t)); -#elif defined(UNALIGNED_OK) +#elif OPTIMAL_CMP >= 32 memcpy(scan_end, scan+offset, sizeof(uint32_t)); #else scan_end[0] = *(scan+offset); diff --git a/test/benchmarks/benchmark_compare256.cc b/test/benchmarks/benchmark_compare256.cc index efdbbacc..a9aa0fca 100644 --- a/test/benchmarks/benchmark_compare256.cc +++ b/test/benchmarks/benchmark_compare256.cc @@ -66,14 +66,14 @@ BENCHMARK_COMPARE256(c, compare256_c, 1); BENCHMARK_COMPARE256(native, native_compare256, 1); #else -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN +#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 BENCHMARK_COMPARE256(unaligned_16, compare256_unaligned_16, 1); -#ifdef HAVE_BUILTIN_CTZ +# if defined(HAVE_BUILTIN_CTZ) BENCHMARK_COMPARE256(unaligned_32, compare256_unaligned_32, 1); -#endif -#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +# endif +# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 BENCHMARK_COMPARE256(unaligned_64, compare256_unaligned_64, 1); -#endif +# endif #endif #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2); diff --git a/test/benchmarks/benchmark_compare256_rle.cc b/test/benchmarks/benchmark_compare256_rle.cc index 3b1eced4..9eb299f3 100644 --- a/test/benchmarks/benchmark_compare256_rle.cc +++ b/test/benchmarks/benchmark_compare256_rle.cc @@ -61,12 +61,12 @@ public: BENCHMARK_COMPARE256_RLE(c, compare256_rle_c, 1); -#ifdef UNALIGNED_OK +#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 BENCHMARK_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1); -#ifdef HAVE_BUILTIN_CTZ +# if defined(HAVE_BUILTIN_CTZ) BENCHMARK_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1); -#endif -#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +# endif +# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 BENCHMARK_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1); -#endif +# endif #endif diff --git a/test/test_compare256.cc b/test/test_compare256.cc index e1662cdf..97e28470 100644 --- a/test/test_compare256.cc +++ b/test/test_compare256.cc @@ -65,15 +65,16 @@ TEST_COMPARE256(c, compare256_c, 1) TEST_COMPARE256(native, native_compare256, 1) #else -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN +#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 TEST_COMPARE256(unaligned_16, compare256_unaligned_16, 1) -#ifdef HAVE_BUILTIN_CTZ +# if defined(HAVE_BUILTIN_CTZ) TEST_COMPARE256(unaligned_32, compare256_unaligned_32, 1) -#endif -#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +# endif +# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 TEST_COMPARE256(unaligned_64, compare256_unaligned_64, 1) +# endif #endif -#endif + #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2) #endif diff --git a/test/test_compare256_rle.cc b/test/test_compare256_rle.cc index 5333ce76..6c5d9d4f 100644 --- a/test/test_compare256_rle.cc +++ b/test/test_compare256_rle.cc @@ -52,12 +52,12 @@ static inline void compare256_rle_match_check(compare256_rle_func compare256_rle TEST_COMPARE256_RLE(c, compare256_rle_c, 1) -#ifdef UNALIGNED_OK +#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32 TEST_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1) -#ifdef HAVE_BUILTIN_CTZ +# if defined(HAVE_BUILTIN_CTZ) TEST_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1) -#endif -#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +# endif +# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64 TEST_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1) -#endif +# endif #endif diff --git a/zbuild.h b/zbuild.h index 0023a235..4d3fc5f2 100644 --- a/zbuild.h +++ b/zbuild.h @@ -243,29 +243,31 @@ # define Tracecv(c, x) #endif -#ifndef NO_UNALIGNED -# if defined(__x86_64__) || defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) -# define UNALIGNED_OK -# define UNALIGNED64_OK -# elif defined(__i386__) || defined(__i486__) || defined(__i586__) || \ - defined(__i686__) || defined(_X86_) || defined(_M_IX86) -# define UNALIGNED_OK -# elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__) -# define UNALIGNED_OK -# define UNALIGNED64_OK -# endif -# elif defined(__arm__) || (_M_ARM >= 7) -# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__) -# define UNALIGNED_OK -# endif -# elif defined(__powerpc64__) || defined(__ppc64__) -# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -# define UNALIGNED_OK -# define UNALIGNED64_OK -# endif +#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) +# define OPTIMAL_CMP 64 +#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || \ + defined(__i686__) || defined(_X86_) || defined(_M_IX86) +# define OPTIMAL_CMP 32 +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__) +# define OPTIMAL_CMP 64 +# endif +#elif defined(__arm__) || (_M_ARM >= 7) +# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__) +# define OPTIMAL_CMP 32 +# endif +#elif defined(__powerpc64__) || defined(__ppc64__) +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define OPTIMAL_CMP 64 # endif #endif +#if defined(NO_UNALIGNED) +# undef OPTIMAL_CMP +#endif +#if !defined(OPTIMAL_CMP) +# define OPTIMAL_CMP 8 +#endif + #if defined(__has_feature) # if __has_feature(address_sanitizer)