mirror of
https://github.com/GerbilSoft/zlib-ng.git
synced 2025-06-18 11:35:35 -04:00
Split cpu_features.h by architecture.
This commit is contained in:
parent
598128f5d1
commit
8e0e24cd18
@ -540,10 +540,11 @@ if(WITH_REDUCED_MEM)
|
||||
message(STATUS "Configured for reduced memory environment")
|
||||
endif()
|
||||
|
||||
set(GENERIC_ARCHDIR "arch/generic")
|
||||
|
||||
set(ZLIB_ARCH_SRCS)
|
||||
set(ZLIB_ARCH_HDRS)
|
||||
set(ARCHDIR "arch/generic")
|
||||
set(ZLIB_ARCH_HDRS ${GENERIC_ARCHDIR}/generic_features.h)
|
||||
|
||||
if(BASEARCH_ARM_FOUND)
|
||||
set(ARCHDIR "arch/arm")
|
||||
elseif(BASEARCH_PPC_FOUND)
|
||||
@ -558,6 +559,7 @@ elseif(BASEARCH_X86_FOUND)
|
||||
add_feature_info(SSE2 1 "Support the SSE2 instruction set, using \"${SSE2FLAG}\"")
|
||||
endif()
|
||||
else()
|
||||
set(ARCHDIR ${GENERIC_ARCHDIR})
|
||||
message(STATUS "No optimized architecture: using ${ARCHDIR}")
|
||||
endif()
|
||||
|
||||
|
@ -13,4 +13,42 @@ struct arm_cpu_features {
|
||||
|
||||
void Z_INTERNAL arm_check_features(struct arm_cpu_features *features);
|
||||
|
||||
#ifdef CPU_FEATURES_H_
|
||||
|
||||
#ifdef ARM_NEON
|
||||
extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
extern uint32_t chunksize_neon(void);
|
||||
extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
extern void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
|
||||
|
||||
# ifdef HAVE_BUILTIN_CTZLL
|
||||
extern uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1);
|
||||
# ifdef DEFLATE_H_
|
||||
extern uint32_t longest_match_neon(deflate_state *const s, Pos cur_match);
|
||||
extern uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match);
|
||||
# endif
|
||||
# endif
|
||||
# ifdef DEFLATE_H_
|
||||
extern void slide_hash_neon(deflate_state *s);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef ARM_ACLE
|
||||
extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
|
||||
# ifdef DEFLATE_H_
|
||||
extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
|
||||
extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef ARM_SIMD
|
||||
# ifdef DEFLATE_H_
|
||||
extern void slide_hash_armv6(deflate_state *s);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* ARM_H_ */
|
||||
|
60
arch/generic/generic_features.h
Normal file
60
arch/generic/generic_features.h
Normal file
@ -0,0 +1,60 @@
|
||||
/* generic_features.h -- generic C implementations for arch-specific features
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef GENERIC_FEATURES_H_
|
||||
#define GENERIC_FEATURES_H_
|
||||
|
||||
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1);
|
||||
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
|
||||
extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
extern uint32_t chunksize_c(void);
|
||||
extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
extern void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
|
||||
|
||||
extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
|
||||
extern uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
extern uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
extern uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
|
||||
# endif
|
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef DEFLATE_H_
|
||||
typedef void (*slide_hash_func)(deflate_state *s);
|
||||
|
||||
extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
|
||||
extern void slide_hash_c(deflate_state *s);
|
||||
extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val);
|
||||
|
||||
extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
|
||||
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||
# endif
|
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
|
||||
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
extern uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||
extern uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||
# ifdef UNALIGNED64_OK
|
||||
extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@ -15,4 +15,35 @@ struct power_cpu_features {
|
||||
|
||||
void Z_INTERNAL power_check_features(struct power_cpu_features *features);
|
||||
|
||||
#ifdef CPU_FEATURES_H_
|
||||
|
||||
#ifdef PPC_VMX
|
||||
extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
# ifdef DEFLATE_H_
|
||||
extern void slide_hash_vmx(deflate_state *s);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef POWER8_VSX
|
||||
extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
extern uint32_t chunksize_power8(void);
|
||||
extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
extern void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
|
||||
|
||||
# ifdef DEFLATE_H_
|
||||
extern void slide_hash_power8(deflate_state *s);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef POWER9
|
||||
extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
|
||||
# ifdef DEFLATE_H_
|
||||
extern uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
|
||||
extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* POWER_H_ */
|
||||
|
@ -15,4 +15,22 @@ struct riscv_cpu_features {
|
||||
|
||||
void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features);
|
||||
|
||||
#ifdef CPU_FEATURES_H_
|
||||
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
extern uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
extern uint32_t chunksize_rvv(void);
|
||||
extern uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
extern uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1);
|
||||
extern void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
|
||||
# ifdef DEFLATE_H_
|
||||
extern uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match);
|
||||
extern uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match);
|
||||
extern void slide_hash_rvv(deflate_state *s);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* RISCV_H_ */
|
||||
|
@ -7,4 +7,12 @@ struct s390_cpu_features {
|
||||
|
||||
void Z_INTERNAL s390_check_features(struct s390_cpu_features *features);
|
||||
|
||||
#ifdef CPU_FEATURES_H_
|
||||
|
||||
#ifdef S390_CRC32_VX
|
||||
extern uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* x86_features.h -- check for CPU features
|
||||
* Copyright (C) 2013 Intel Corporation Jim Kukunas
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
* Copyright (C) 2013 Intel Corporation Jim Kukunas
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef X86_FEATURES_H_
|
||||
#define X86_FEATURES_H_
|
||||
@ -21,4 +21,81 @@ struct x86_cpu_features {
|
||||
|
||||
void Z_INTERNAL x86_check_features(struct x86_cpu_features *features);
|
||||
|
||||
#ifdef CPU_FEATURES_H_
|
||||
|
||||
#include "fallback_builtins.h"
|
||||
#include "crc32.h"
|
||||
|
||||
#ifdef X86_SSE2
|
||||
extern uint32_t chunksize_sse2(void);
|
||||
extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
extern void inflate_fast_sse2(PREFIX3(stream) *strm, uint32_t start);
|
||||
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
extern uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
|
||||
# ifdef DEFLATE_H_
|
||||
extern uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
|
||||
extern uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
|
||||
extern void slide_hash_sse2(deflate_state *s);
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef X86_SSSE3
|
||||
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
extern uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
extern void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
|
||||
#ifdef X86_SSE42
|
||||
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
# ifdef DEFLATE_H_
|
||||
extern void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
extern Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str);
|
||||
extern uint32_t update_hash_sse42(deflate_state *const s, uint32_t h, uint32_t val);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef X86_AVX2
|
||||
extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
extern uint32_t chunksize_avx2(void);
|
||||
extern uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
extern void inflate_fast_avx2(PREFIX3(stream) *strm, uint32_t start);
|
||||
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
extern uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
|
||||
# ifdef DEFLATE_H_
|
||||
extern uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
|
||||
extern uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
|
||||
extern void slide_hash_avx2(deflate_state *s);
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#ifdef X86_AVX512
|
||||
extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX512VNNI
|
||||
extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
extern uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc);
|
||||
extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
||||
extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
|
||||
extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_VPCLMULQDQ_CRC
|
||||
extern uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc);
|
||||
extern void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
extern void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
||||
extern uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
|
||||
extern uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* CPU_H_ */
|
||||
|
269
cpu_features.h
269
cpu_features.h
@ -6,11 +6,14 @@
|
||||
#ifndef CPU_FEATURES_H_
|
||||
#define CPU_FEATURES_H_
|
||||
|
||||
#include "crc32.h"
|
||||
#ifdef ZLIB_COMPAT
|
||||
typedef struct z_stream_s z_stream;
|
||||
#else
|
||||
typedef struct zng_stream_s zng_stream;
|
||||
#endif
|
||||
|
||||
#if defined(X86_FEATURES)
|
||||
# include "arch/x86/x86_features.h"
|
||||
# include "fallback_builtins.h"
|
||||
#elif defined(ARM_FEATURES)
|
||||
# include "arch/arm/arm_features.h"
|
||||
#elif defined(PPC_FEATURES) || defined(POWER_FEATURES)
|
||||
@ -21,6 +24,8 @@
|
||||
# include "arch/riscv/riscv_features.h"
|
||||
#endif
|
||||
|
||||
#include "arch/generic/generic_features.h"
|
||||
|
||||
struct cpu_features {
|
||||
#if defined(X86_FEATURES)
|
||||
struct x86_cpu_features x86;
|
||||
@ -39,264 +44,4 @@ struct cpu_features {
|
||||
|
||||
extern void cpu_check_features(struct cpu_features *features);
|
||||
|
||||
/* adler32 */
|
||||
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
|
||||
extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#ifdef ARM_NEON
|
||||
extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef PPC_VMX
|
||||
extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_SSSE3
|
||||
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX512
|
||||
extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX512VNNI
|
||||
extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef POWER8_VSX
|
||||
extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
/* adler32 folding */
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
#ifdef X86_SSE42
|
||||
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX512
|
||||
extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX512VNNI
|
||||
extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
|
||||
/* CRC32 folding */
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
extern uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc);
|
||||
extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
||||
extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
|
||||
extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_VPCLMULQDQ_CRC
|
||||
extern uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc);
|
||||
extern void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
extern void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
||||
extern uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
|
||||
extern uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
/* memory chunking */
|
||||
extern uint32_t chunksize_c(void);
|
||||
extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#ifdef X86_SSE2
|
||||
extern uint32_t chunksize_sse2(void);
|
||||
extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef X86_SSSE3
|
||||
extern uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
extern uint32_t chunksize_avx2(void);
|
||||
extern uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef ARM_NEON
|
||||
extern uint32_t chunksize_neon(void);
|
||||
extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef POWER8_VSX
|
||||
extern uint32_t chunksize_power8(void);
|
||||
extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t chunksize_rvv(void);
|
||||
extern uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
typedef struct z_stream_s z_stream;
|
||||
#else
|
||||
typedef struct zng_stream_s zng_stream;
|
||||
#endif
|
||||
|
||||
/* inflate fast loop */
|
||||
extern void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
|
||||
#ifdef X86_SSE2
|
||||
extern void inflate_fast_sse2(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef X86_SSSE3
|
||||
extern void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
extern void inflate_fast_avx2(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef ARM_NEON
|
||||
extern void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef POWER8_VSX
|
||||
extern void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
|
||||
/* CRC32 */
|
||||
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
|
||||
extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#ifdef ARM_ACLE
|
||||
extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#elif defined(POWER8_VSX)
|
||||
extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#elif defined(S390_CRC32_VX)
|
||||
extern uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
/* compare256 */
|
||||
typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1);
|
||||
|
||||
extern uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
extern uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
|
||||
#ifdef HAVE_BUILTIN_CTZ
|
||||
extern uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#endif
|
||||
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#ifdef POWER9
|
||||
extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
|
||||
#ifdef DEFLATE_H_
|
||||
/* insert_string */
|
||||
extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
#ifdef X86_SSE42
|
||||
extern void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
#elif defined(ARM_ACLE)
|
||||
extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
#endif
|
||||
|
||||
/* longest_match */
|
||||
extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||
#ifdef HAVE_BUILTIN_CTZ
|
||||
extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#endif
|
||||
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t longest_match_neon(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#ifdef POWER9
|
||||
extern uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
|
||||
/* longest_match_slow */
|
||||
extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
extern uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||
extern uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||
#ifdef UNALIGNED64_OK
|
||||
extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#endif
|
||||
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#ifdef POWER9
|
||||
extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
|
||||
/* quick_insert_string */
|
||||
extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
|
||||
#ifdef X86_SSE42
|
||||
extern Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str);
|
||||
#elif defined(ARM_ACLE)
|
||||
extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
|
||||
#endif
|
||||
|
||||
/* slide_hash */
|
||||
typedef void (*slide_hash_func)(deflate_state *s);
|
||||
|
||||
#ifdef X86_SSE2
|
||||
extern void slide_hash_sse2(deflate_state *s);
|
||||
#endif
|
||||
#if defined(ARM_SIMD)
|
||||
extern void slide_hash_armv6(deflate_state *s);
|
||||
#endif
|
||||
#if defined(ARM_NEON)
|
||||
extern void slide_hash_neon(deflate_state *s);
|
||||
#endif
|
||||
#if defined(PPC_VMX)
|
||||
extern void slide_hash_vmx(deflate_state *s);
|
||||
#endif
|
||||
#if defined(POWER8_VSX)
|
||||
extern void slide_hash_power8(deflate_state *s);
|
||||
#endif
|
||||
#if defined(RISCV_RVV)
|
||||
extern void slide_hash_rvv(deflate_state *s);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
extern void slide_hash_avx2(deflate_state *s);
|
||||
#endif
|
||||
|
||||
/* update_hash */
|
||||
extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val);
|
||||
#ifdef X86_SSE42
|
||||
extern uint32_t update_hash_sse42(deflate_state *const s, uint32_t h, uint32_t val);
|
||||
#elif defined(ARM_ACLE)
|
||||
extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -5,9 +5,6 @@
|
||||
#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
|
||||
#include <intrin.h>
|
||||
#ifdef X86_FEATURES
|
||||
# include "arch/x86/x86_features.h"
|
||||
#endif
|
||||
|
||||
/* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0.
|
||||
* Because of that assumption trailing_zero is not initialized and the return value is not checked.
|
||||
|
Loading…
Reference in New Issue
Block a user