diff --git a/CMakeLists.txt b/CMakeLists.txt index 2324ecda..7afa5319 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -124,6 +124,7 @@ elseif(BASEARCH_PPC_FOUND) option(WITH_POWER9 "Build with optimisations for POWER9" ON) elseif(BASEARCH_RISCV_FOUND) option(WITH_RVV "Build with RVV intrinsics" ON) + option(WITH_RISCV_ZBC "Build with RISCV ZBC" ON) elseif(BASEARCH_S360_FOUND) option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF) option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF) @@ -158,6 +159,7 @@ mark_as_advanced(FORCE WITH_POWER8 WITH_POWER9 WITH_RVV + WITH_RISCV_ZBC WITH_INFLATE_STRICT WITH_INFLATE_ALLOW_INVALID_DIST INSTALL_UTILS @@ -945,15 +947,34 @@ if(WITH_OPTIM) # FIXME: we will not set compile flags for riscv_features.c when # the kernels update hwcap or hwprobe for riscv set(RVV_SRCS ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c) - if(WITH_RUNTIME_CPU_DETECTION) - list(APPEND RVV_SRCS ${ARCHDIR}/riscv_features.c) - endif() list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS}) set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}") else() set(WITH_RVV OFF) endif() endif() + if(WITH_RISCV_ZBC) + check_riscv_zbc_ext() + if(HAVE_RISCV_ZBC) + add_definitions(-DRISCV_CRC32_ZBC) + set(ZBC_SRCS ${ARCHDIR}/crc32_zbc.c) + list(APPEND ZLIB_ARCH_SRCS ${ZBC_SRCS}) + set_property(SOURCE ${ZBC_SRCS} PROPERTY COMPILE_FLAGS "-march=rv64gc_zbc ${NOLTOFLAG}") + add_feature_info(RISCV_ZBC 1 "Support RISC-V Zbc extension for CRC32") + else() + set(WITH_RISCV_ZBC OFF) + endif() + endif() + + if(WITH_RUNTIME_CPU_DETECTION AND BASEARCH_RISCV_FOUND) + if(WITH_RVV AND WITH_RISCV_ZBC AND HAVE_RVV_INTRIN AND HAVE_RISCV_ZBC) + set_property(SOURCE ${ARCHDIR}/riscv_features.c PROPERTY COMPILE_FLAGS "${RISCVFLAG}_zbc ${NOLTOFLAG}") + elseif(WITH_RVV AND HAVE_RVV_INTRIN) + set_property(SOURCE ${ARCHDIR}/riscv_features.c PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}") + elseif(WITH_RISCV_ZBC AND HAVE_RISCV_ZBC) + set_property(SOURCE ${ARCHDIR}/riscv_features.c PROPERTY COMPILE_FLAGS "${RISCVZBCFLAG} ${NOLTOFLAG}") + endif() + endif() elseif(BASEARCH_S360_FOUND) check_s390_intrinsics() if(HAVE_S390_INTRIN) @@ -1528,6 +1549,7 @@ elseif(BASEARCH_PPC_FOUND) add_feature_info(WITH_POWER9 WITH_POWER9 "Build with optimisations for POWER9") elseif(BASEARCH_RISCV_FOUND) add_feature_info(WITH_RVV WITH_RVV "Build with RVV intrinsics") + add_feature_info(WITH_RISCV_ZBC WITH_RISCV_ZBC "Build with RISCV ZBC") elseif(BASEARCH_S360_FOUND) add_feature_info(WITH_DFLTCC_DEFLATE WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z") add_feature_info(WITH_DFLTCC_INFLATE WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z") diff --git a/arch/riscv/crc32_zbc.c b/arch/riscv/crc32_zbc.c new file mode 100644 index 00000000..d5dc71cc --- /dev/null +++ b/arch/riscv/crc32_zbc.c @@ -0,0 +1,101 @@ +/* crc32_zbc.c - RISCV Zbc version of crc32 + * Copyright (C) 2025 ByteDance. All rights reserved. + * Contributed by Yin Tong + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#if defined(RISCV_CRC32_ZBC) +#include "zbuild.h" +#include + +#define CLMUL_MIN_LEN 16 // Minimum size of buffer for _crc32_clmul +#define CLMUL_CHUNK_LEN 16 // Length of chunk for clmul + +extern uint32_t crc32_c(uint32_t crc, const uint8_t *buf, size_t len); + +#define CONSTANT_R3 0x1751997d0ULL +#define CONSTANT_R4 0x0ccaa009eULL +#define CONSTANT_R5 0x163cd6124ULL +#define MASK32 0xFFFFFFFF +#define CRCPOLY_TRUE_LE_FULL 0x1DB710641ULL +#define CONSTANT_RU 0x1F7011641ULL + +static inline uint64_t clmul(uint64_t a, uint64_t b) { + uint64_t res; + __asm__ volatile("clmul %0, %1, %2" : "=r"(res) : "r"(a), "r"(b)); + return res; +} + +static inline uint64_t clmulh(uint64_t a, uint64_t b) { + uint64_t res; + __asm__ volatile("clmulh %0, %1, %2" : "=r"(res) : "r"(a), "r"(b)); + return res; +} + +static inline uint32_t crc32_clmul_impl(uint64_t crc, const unsigned char *buf, + uint64_t len) { + const uint64_t *buf64 = (const uint64_t *)buf; + uint64_t low = buf64[0] ^ crc; + uint64_t high = buf64[1]; + + if (len < 16) + goto finish_fold; + len -= 16; + buf64 += 2; + + // process each 16-byte block + while (len >= 16) { + uint64_t t2 = clmul(CONSTANT_R4, high); + uint64_t t3 = clmulh(CONSTANT_R4, high); + + uint64_t t0_new = clmul(CONSTANT_R3, low); + uint64_t t1_new = clmulh(CONSTANT_R3, low); + + // Combine the results and XOR with new data + low = t0_new ^ t2; + high = t1_new ^ t3; + low ^= buf64[0]; + high ^= buf64[1]; + + buf64 += 2; + len -= 16; + } + +finish_fold: + // Fold the 128-bit result into 64 bits + uint64_t fold_t3 = clmulh(low, CONSTANT_R4); + uint64_t fold_t2 = clmul(low, CONSTANT_R4); + low = high ^ fold_t2; + high = fold_t3; + + // Combine the low and high parts and perform polynomial reduction + uint64_t combined = (low >> 32) | ((high & MASK32) << 32); + uint64_t reduced_low = clmul(low & MASK32, CONSTANT_R5) ^ combined; + + // Barrett reduction step + uint64_t barrett = clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32; + barrett = clmul(barrett, CRCPOLY_TRUE_LE_FULL); + uint64_t final = barrett ^ reduced_low; + + // Return the high 32 bits as the final CRC + return (uint32_t)(final >> 32); +} + +Z_INTERNAL uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, + size_t len) { + if (len < CLMUL_MIN_LEN) { + return crc32_c(crc, buf, len); + } + + uint64_t unaligned_length = len % CLMUL_CHUNK_LEN; + if (unaligned_length) { + crc = crc32_c(crc, buf, unaligned_length); + buf += unaligned_length; + len -= unaligned_length; + } + crc ^= 0xFFFFFFFF; + crc = crc32_clmul_impl(crc, buf, len); + return crc ^ 0xFFFFFFFF; +} + +#endif diff --git a/arch/riscv/riscv_features.c b/arch/riscv/riscv_features.c index f9957d19..da509a84 100644 --- a/arch/riscv/riscv_features.c +++ b/arch/riscv/riscv_features.c @@ -11,6 +11,7 @@ #include "riscv_features.h" #define ISA_V_HWCAP (1 << ('v' - 'a')) +#define ISA_ZBC_HWCAP (1 << 29) int Z_INTERNAL is_kernel_version_greater_or_equal_to_6_5() { struct utsname buffer; @@ -36,6 +37,12 @@ void Z_INTERNAL riscv_check_features_compile_time(struct riscv_cpu_features *fea #else features->has_rvv = 0; #endif + +#if defined(__riscv_zbc) && defined(__linux__) + features->has_zbc = 1; +#else + features->has_zbc = 0; +#endif } void Z_INTERNAL riscv_check_features_runtime(struct riscv_cpu_features *features) { @@ -45,6 +52,7 @@ void Z_INTERNAL riscv_check_features_runtime(struct riscv_cpu_features *features unsigned long hw_cap = 0; #endif features->has_rvv = hw_cap & ISA_V_HWCAP; + features->has_zbc = hw_cap & ISA_ZBC_HWCAP; } void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features) { diff --git a/arch/riscv/riscv_features.h b/arch/riscv/riscv_features.h index b1593acc..42855a1b 100644 --- a/arch/riscv/riscv_features.h +++ b/arch/riscv/riscv_features.h @@ -11,6 +11,7 @@ struct riscv_cpu_features { int has_rvv; + int has_zbc; }; void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features); diff --git a/arch/riscv/riscv_functions.h b/arch/riscv/riscv_functions.h index 1792b9d2..86b68a6d 100644 --- a/arch/riscv/riscv_functions.h +++ b/arch/riscv/riscv_functions.h @@ -22,6 +22,10 @@ void slide_hash_rvv(deflate_state *s); void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start); #endif +#ifdef RISCV_CRC32_ZBC +uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len); +#endif + #ifdef DISABLE_RUNTIME_CPU_DETECTION // RISCV - RVV # if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__) @@ -44,6 +48,12 @@ void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start); # undef native_slide_hash # define native_slide_hash slide_hash_rvv # endif + +// RISCV - CRC32 +# if (defined(RISCV_CRC32_ZBC) && defined (__riscv_zbc)) +# undef native_crc32 +# define native_crc32 crc32_riscv64_zbc +# endif #endif #endif /* RISCV_FUNCTIONS_H_ */ diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake index 66872766..d9e01f69 100644 --- a/cmake/detect-intrinsics.cmake +++ b/cmake/detect-intrinsics.cmake @@ -458,6 +458,28 @@ macro(check_rvv_intrinsics) set(CMAKE_REQUIRED_FLAGS) endmacro() +macro(check_riscv_zbc_ext) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + set(RISCVZBCFLAG "-march=rv64gc_zbc") + endif() + endif() + # Check whether compiler supports RISC-V Zbc inline asm + # gcc-11 / clang-14 at least + set(CMAKE_REQUIRED_FLAGS "${RISCVZBCFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") + check_c_source_compiles( + "#include + uint64_t f(uint64_t a, uint64_t b) { + uint64_t c; + __asm__ __volatile__ (\"clmul %[result], %[input_a], %[input_b]\" : [result] \"=r\" (c) : [input_a] \"r\" (a), [input_b] \"r\" (b)); + return c; + } + int main(void) { return f(1, 2); }" + HAVE_RISCV_ZBC + ) + set(CMAKE_REQUIRED_FLAGS) +endmacro() + macro(check_s390_intrinsics) check_c_source_compiles( "#include diff --git a/functable.c b/functable.c index 1d38637f..ef1fc31d 100644 --- a/functable.c +++ b/functable.c @@ -256,6 +256,12 @@ static void init_functable(void) { } #endif + // RISCV - ZBC +#ifdef RISCV_CRC32_ZBC + if (cf.riscv.has_zbc) { + ft.crc32 = &crc32_riscv64_zbc; + } +#endif // S390 #ifdef S390_CRC32_VX diff --git a/test/benchmarks/benchmark_crc32.cc b/test/benchmarks/benchmark_crc32.cc index e6947715..23a1dc19 100644 --- a/test/benchmarks/benchmark_crc32.cc +++ b/test/benchmarks/benchmark_crc32.cc @@ -80,6 +80,9 @@ BENCHMARK_CRC32(native, native_crc32, 1); #ifdef ARM_CRC32 BENCHMARK_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32); #endif +#ifdef RISCV_CRC32_ZBC +BENCHMARK_CRC32(riscv, crc32_riscv64_zbc, test_cpu_features.riscv.has_zbc); +#endif #ifdef POWER8_VSX_CRC32 BENCHMARK_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07); #endif diff --git a/test/test_crc32.cc b/test/test_crc32.cc index 56667f02..2f768d0c 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -270,6 +270,9 @@ INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_o TEST_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32) TEST_CRC32_ALIGN(armv8_align, crc32_armv8, test_cpu_features.arm.has_crc32) #endif +#ifdef RISCV_CRC32_ZBC +TEST_CRC32(riscv, crc32_riscv64_zbc, test_cpu_features.riscv.has_zbc) +#endif #ifdef POWER8_VSX_CRC32 TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07) #endif