mirror of
https://github.com/GerbilSoft/zlib-ng.git
synced 2025-06-18 19:45:37 -04:00

and remove SSE4.2 and ACLE optimizations. The functable overhead is higher than the benefit from using optimized functions.
71 lines
3.0 KiB
C
71 lines
3.0 KiB
C
/* x86_functions.h -- x86 implementations for arch-specific functions.
|
|
* Copyright (C) 2013 Intel Corporation Jim Kukunas
|
|
* For conditions of distribution and use, see copyright notice in zlib.h
|
|
*/
|
|
|
|
#ifndef X86_FUNCTIONS_H_
|
|
#define X86_FUNCTIONS_H_
|
|
|
|
#ifdef X86_SSE2
|
|
uint32_t chunksize_sse2(void);
|
|
uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
|
|
|
# ifdef HAVE_BUILTIN_CTZ
|
|
uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
|
|
uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
|
|
uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
|
|
void slide_hash_sse2(deflate_state *s);
|
|
# endif
|
|
void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start);
|
|
#endif
|
|
|
|
#ifdef X86_SSSE3
|
|
uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
|
|
uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
|
void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
|
|
#endif
|
|
|
|
#ifdef X86_SSE42
|
|
uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
|
#endif
|
|
|
|
#ifdef X86_AVX2
|
|
uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
|
|
uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
|
uint32_t chunksize_avx2(void);
|
|
uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
|
|
|
# ifdef HAVE_BUILTIN_CTZ
|
|
uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
|
|
uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
|
|
uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
|
|
void slide_hash_avx2(deflate_state *s);
|
|
# endif
|
|
void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start);
|
|
#endif
|
|
#ifdef X86_AVX512
|
|
uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
|
|
uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
|
#endif
|
|
#ifdef X86_AVX512VNNI
|
|
uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
|
|
uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
|
#endif
|
|
|
|
#ifdef X86_PCLMULQDQ_CRC
|
|
uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc);
|
|
void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
|
void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
|
uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
|
|
uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
|
#endif
|
|
#ifdef X86_VPCLMULQDQ_CRC
|
|
uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc);
|
|
void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
|
void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
|
uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
|
|
uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
|
#endif
|
|
|
|
#endif /* X86_FUNCTIONS_H_ */
|