From 9d4af458ea8a56fb208e3e89c903648c11460a3d Mon Sep 17 00:00:00 2001 From: Hans Kristian Rosbach Date: Mon, 17 Feb 2025 21:22:51 +0100 Subject: [PATCH] Make Chorba configurable,and add a few missing header files to CMake config. Add CI run without chorba enabled. --- .github/workflows/cmake.yml | 7 +++++++ CMakeLists.txt | 18 ++++++++++++++++-- Makefile.in | 4 ++-- arch/generic/crc32_c.c | 7 +++---- arch/x86/crc32_pclmulqdq_tpl.h | 2 ++ 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index d1ddb0f2..a59aca64 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -97,6 +97,13 @@ jobs: packages: gcc-multilib g++-multilib codecov: ubuntu_gcc_m32 + - name: Ubuntu GCC No Chorba + os: ubuntu-latest + compiler: gcc + cxx-compiler: g++ + cmake-args: -DWITH_CHORBA=OFF + codecov: ubuntu_gcc_no_chorba + - name: Ubuntu GCC No CTZLL os: ubuntu-latest compiler: gcc diff --git a/CMakeLists.txt b/CMakeLists.txt index 96a37044..a4f07481 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,6 +86,7 @@ option(WITH_BENCHMARK_APPS "Build application benchmarks" OFF) option(WITH_OPTIM "Build with optimisation" ON) option(WITH_REDUCED_MEM "Reduced memory usage for special cases (reduces performance)" OFF) option(WITH_NEW_STRATEGIES "Use new strategies" ON) +option(WITH_CRC32_CHORBA "Enable optimized CRC32 algorithm Chorba" ON) option(WITH_NATIVE_INSTRUCTIONS "Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)" OFF) option(WITH_RUNTIME_CPU_DETECTION "Build with runtime detection of CPU architecture" ON) @@ -137,6 +138,7 @@ option(INSTALL_UTILS "Copy minigzip and minideflate during install" OFF) mark_as_advanced(FORCE ZLIB_SYMBOL_PREFIX WITH_REDUCED_MEM + WITH_CRC32_CHORBA WITH_ARMV8 WITH_NEON WITH_ARMV6 WITH_DFLTCC_DEFLATE @@ -170,6 +172,10 @@ if(WITH_GZFILEOP) add_definitions(-DWITH_GZFILEOP) endif() +if(NOT WITH_CRC32_CHORBA) + add_definitions(-DWITHOUT_CHORBA) +endif() + if(CMAKE_C_COMPILER_ID MATCHES "^Intel") if(CMAKE_HOST_UNIX) set(WARNFLAGS -Wall) @@ -1092,14 +1098,16 @@ set(ZLIB_PRIVATE_HDRS arch/generic/compare256_p.h arch/generic/generic_functions.h adler32_p.h + arch_functions.h chunkset_tpl.h compare256_rle.h - arch_functions.h + crc32.h crc32_braid_p.h crc32_braid_comb_p.h crc32_braid_tbl.h deflate.h deflate_p.h + fallback_builtins.h functable.h inffast_tpl.h inffixed_tbl.h @@ -1113,7 +1121,9 @@ set(ZLIB_PRIVATE_HDRS trees_tbl.h zbuild.h zendian.h + zmemory.h zutil.h + zutil_p.h ) set(ZLIB_SRCS arch/generic/adler32_c.c @@ -1122,7 +1132,6 @@ set(ZLIB_SRCS arch/generic/compare256_c.c arch/generic/crc32_braid_c.c arch/generic/crc32_c.c - arch/generic/crc32_chorba_c.c arch/generic/crc32_fold_c.c arch/generic/slide_hash_c.c adler32.c @@ -1148,6 +1157,10 @@ set(ZLIB_SRCS zutil.c ) +if(WITH_CRC32_CHORBA) + list(APPEND ZLIB_SRCS arch/generic/crc32_chorba_c.c) +endif() + if(WITH_RUNTIME_CPU_DETECTION) list(APPEND ZLIB_PRIVATE_HDRS cpu_features.h) list(APPEND ZLIB_SRCS cpu_features.c) @@ -1390,6 +1403,7 @@ add_feature_info(WITH_BENCHMARKS WITH_BENCHMARKS "Build test/benchmarks") add_feature_info(WITH_BENCHMARK_APPS WITH_BENCHMARK_APPS "Build application benchmarks") add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation") add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies") +add_feature_info(WITH_CRC32_CHORBA WITH_CRC32_CHORBA "Use optimized CRC32 algorithm Chorba") add_feature_info(WITH_NATIVE_INSTRUCTIONS WITH_NATIVE_INSTRUCTIONS "Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)") add_feature_info(WITH_RUNTIME_CPU_DETECTION WITH_RUNTIME_CPU_DETECTION "Build with runtime CPU detection") diff --git a/Makefile.in b/Makefile.in index 1d69cce9..04bcb96c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -81,7 +81,6 @@ OBJZ = \ arch/generic/compare256_c.o \ arch/generic/crc32_braid_c.o \ arch/generic/crc32_c.o \ - arch/generic/crc32_chorba_c.o \ arch/generic/crc32_fold_c.o \ arch/generic/slide_hash_c.o \ adler32.o \ @@ -105,6 +104,7 @@ OBJZ = \ trees.o \ uncompr.o \ zutil.o \ + arch/generic/crc32_chorba_c.o \ cpu_features.o \ $(ARCH_STATIC_OBJS) @@ -123,7 +123,6 @@ PIC_OBJZ = \ arch/generic/compare256_c.lo \ arch/generic/crc32_braid_c.lo \ arch/generic/crc32_c.lo \ - arch/generic/crc32_chorba_c.lo \ arch/generic/crc32_fold_c.lo \ arch/generic/slide_hash_c.lo \ adler32.lo \ @@ -147,6 +146,7 @@ PIC_OBJZ = \ trees.lo \ uncompr.lo \ zutil.lo \ + arch/generic/crc32_chorba_c.lo \ cpu_features.lo \ $(ARCH_SHARED_OBJS) diff --git a/arch/generic/crc32_c.c b/arch/generic/crc32_c.c index 5e8f0a73..e7394a8c 100644 --- a/arch/generic/crc32_c.c +++ b/arch/generic/crc32_c.c @@ -4,12 +4,11 @@ #include "generic_functions.h" Z_INTERNAL uint32_t crc32_c(uint32_t crc, const uint8_t *buf, size_t len) { - uint32_t c; + uint32_t c = (~crc) & 0xffffffff; + +#ifndef WITHOUT_CHORBA uint64_t* aligned_buf; size_t aligned_len; - - c = (~crc) & 0xffffffff; -#ifndef WITHOUT_CHORBA unsigned long algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 0xF)) & 0xF; if (algn_diff < len) { if (algn_diff) { diff --git a/arch/x86/crc32_pclmulqdq_tpl.h b/arch/x86/crc32_pclmulqdq_tpl.h index 53e56ead..933733af 100644 --- a/arch/x86/crc32_pclmulqdq_tpl.h +++ b/arch/x86/crc32_pclmulqdq_tpl.h @@ -168,6 +168,7 @@ static void fold_4(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m1 *xmm_crc3 = _mm_castps_si128(ps_res3); } +#ifndef WITHOUT_CHORBA static void fold_12(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) { const __m128i xmm_fold12 = _mm_set_epi64x(0x596C8D81, 0xF5E48C85); __m128i x_tmp0, x_tmp1, x_tmp2, x_tmp3; @@ -209,6 +210,7 @@ static void fold_12(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m *xmm_crc2 = _mm_castps_si128(ps_res2); *xmm_crc3 = _mm_castps_si128(ps_res3); } +#endif static const unsigned ALIGNED_(32) pshufb_shf_table[60] = { 0x84838281, 0x88878685, 0x8c8b8a89, 0x008f8e8d, /* shl 15 (16 - 1)/shr1 */