mirror of
https://github.com/GerbilSoft/zlib-ng.git
synced 2025-06-18 11:35:35 -04:00
Move update_hash(), insert_string() and quick_insert_string() out of functable
and remove SSE4.2 and ACLE optimizations. The functable overhead is higher than the benefit from using optimized functions.
This commit is contained in:
parent
ef2f8d528c
commit
9953f12e21
@ -653,7 +653,7 @@ if(WITH_OPTIM)
|
||||
check_acle_compiler_flag()
|
||||
if(HAVE_ACLE_FLAG)
|
||||
add_definitions(-DARM_ACLE)
|
||||
set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c)
|
||||
set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c)
|
||||
set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG} ${NOLTOFLAG}")
|
||||
list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS})
|
||||
add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"")
|
||||
@ -857,8 +857,8 @@ if(WITH_OPTIM)
|
||||
check_sse42_intrinsics()
|
||||
if(HAVE_SSE42_INTRIN AND WITH_SSSE3)
|
||||
add_definitions(-DX86_SSE42)
|
||||
set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
|
||||
add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"")
|
||||
set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c)
|
||||
add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized adler32 hash generation, using \"${SSE42FLAG}\"")
|
||||
list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
|
||||
set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
|
||||
else()
|
||||
@ -1034,7 +1034,6 @@ set(ZLIB_SRCS
|
||||
arch/generic/compare256_c.c
|
||||
arch/generic/crc32_braid_c.c
|
||||
arch/generic/crc32_fold_c.c
|
||||
arch/generic/insert_string_c.c
|
||||
arch/generic/slide_hash_c.c
|
||||
adler32.c
|
||||
compress.c
|
||||
@ -1053,6 +1052,7 @@ set(ZLIB_SRCS
|
||||
infback.c
|
||||
inflate.c
|
||||
inftrees.c
|
||||
insert_string.c
|
||||
insert_string_roll.c
|
||||
trees.c
|
||||
uncompr.c
|
||||
|
@ -80,7 +80,6 @@ OBJZ = \
|
||||
arch/generic/compare256_c.o \
|
||||
arch/generic/crc32_braid_c.o \
|
||||
arch/generic/crc32_fold_c.o \
|
||||
arch/generic/insert_string_c.o \
|
||||
arch/generic/slide_hash_c.o \
|
||||
adler32.o \
|
||||
compress.o \
|
||||
@ -99,6 +98,7 @@ OBJZ = \
|
||||
infback.o \
|
||||
inflate.o \
|
||||
inftrees.o \
|
||||
insert_string.o \
|
||||
insert_string_roll.o \
|
||||
trees.o \
|
||||
uncompr.o \
|
||||
@ -120,7 +120,6 @@ PIC_OBJZ = \
|
||||
arch/generic/compare256_c.lo \
|
||||
arch/generic/crc32_braid_c.lo \
|
||||
arch/generic/crc32_fold_c.lo \
|
||||
arch/generic/insert_string_c.lo \
|
||||
arch/generic/slide_hash_c.lo \
|
||||
adler32.lo \
|
||||
compress.lo \
|
||||
@ -139,6 +138,7 @@ PIC_OBJZ = \
|
||||
infback.lo \
|
||||
inflate.lo \
|
||||
inftrees.lo \
|
||||
insert_string.lo \
|
||||
insert_string_roll.lo \
|
||||
trees.lo \
|
||||
uncompr.lo \
|
||||
|
@ -21,7 +21,6 @@ Features
|
||||
* Support for CPU intrinsics when available
|
||||
* Adler32 implementation using SSSE3, AVX2, AVX512, AVX512-VNNI, Neon, VMX & VSX
|
||||
* CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, ACLE, & IBM Z
|
||||
* Hash table implementation using CRC32-C intrinsics on x86 and ARM
|
||||
* Slide hash implementations using SSE2, AVX2, ARMv6, Neon, VMX & VSX
|
||||
* Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
|
||||
* Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
|
||||
|
@ -25,7 +25,6 @@ all: \
|
||||
crc32_acle.o crc32_acle.lo \
|
||||
slide_hash_neon.o slide_hash_neon.lo \
|
||||
slide_hash_armv6.o slide_hash_armv6.lo \
|
||||
insert_string_acle.o insert_string_acle.lo
|
||||
|
||||
adler32_neon.o:
|
||||
$(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
|
||||
@ -69,12 +68,6 @@ slide_hash_armv6.o:
|
||||
slide_hash_armv6.lo:
|
||||
$(CC) $(SFLAGS) $(ARMV6FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_armv6.c
|
||||
|
||||
insert_string_acle.o:
|
||||
$(CC) $(CFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
|
||||
|
||||
insert_string_acle.lo:
|
||||
$(CC) $(SFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
|
||||
|
||||
mostlyclean: clean
|
||||
clean:
|
||||
rm -f *.o *.lo *~
|
||||
|
@ -22,10 +22,6 @@ void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
|
||||
|
||||
#ifdef ARM_ACLE
|
||||
uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
|
||||
void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
|
||||
uint32_t update_hash_acle(uint32_t h, uint32_t val);
|
||||
#endif
|
||||
|
||||
#ifdef ARM_SIMD
|
||||
|
@ -1,24 +0,0 @@
|
||||
/* insert_string_acle.c -- insert_string integer hash variant using ACLE's CRC instructions
|
||||
*
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef ARM_ACLE
|
||||
#include "acle_intrins.h"
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
|
||||
#define HASH_CALC(s, h, val) \
|
||||
h = __crc32w(0, val)
|
||||
|
||||
#define HASH_CALC_VAR h
|
||||
#define HASH_CALC_VAR_INIT uint32_t h = 0
|
||||
|
||||
#define UPDATE_HASH Z_TARGET_CRC update_hash_acle
|
||||
#define INSERT_STRING Z_TARGET_CRC insert_string_acle
|
||||
#define QUICK_INSERT_STRING Z_TARGET_CRC quick_insert_string_acle
|
||||
|
||||
#include "insert_string_tpl.h"
|
||||
#endif
|
@ -19,7 +19,6 @@ all: \
|
||||
compare256_c.o compare256_c.lo \
|
||||
crc32_braid_c.o crc32_braid_c.lo \
|
||||
crc32_fold_c.o crc32_fold_c.lo \
|
||||
insert_string_c.o insert_string_c.lo \
|
||||
slide_hash_c.o slide_hash_c.lo
|
||||
|
||||
|
||||
@ -59,12 +58,6 @@ crc32_fold_c.o: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable
|
||||
crc32_fold_c.lo: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c
|
||||
|
||||
insert_string_c.o: $(SRCDIR)/insert_string_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h $(SRCTOP)/insert_string_tpl.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_c.c
|
||||
|
||||
insert_string_c.lo: $(SRCDIR)/insert_string_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h $(SRCTOP)/insert_string_tpl.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_c.c
|
||||
|
||||
slide_hash_c.o: $(SRCDIR)/slide_hash_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_c.c
|
||||
|
||||
|
@ -40,10 +40,7 @@ uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
|
||||
|
||||
typedef void (*slide_hash_func)(deflate_state *s);
|
||||
|
||||
void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
|
||||
void slide_hash_c(deflate_state *s);
|
||||
uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val);
|
||||
|
||||
uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
|
||||
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
|
@ -35,7 +35,6 @@ all: \
|
||||
chunkset_ssse3.o chunkset_ssse3.lo \
|
||||
compare256_avx2.o compare256_avx2.lo \
|
||||
compare256_sse2.o compare256_sse2.lo \
|
||||
insert_string_sse42.o insert_string_sse42.lo \
|
||||
crc32_pclmulqdq.o crc32_pclmulqdq.lo \
|
||||
crc32_vpclmulqdq.o crc32_vpclmulqdq.lo \
|
||||
slide_hash_avx2.o slide_hash_avx2.lo \
|
||||
@ -77,12 +76,6 @@ compare256_sse2.o:
|
||||
compare256_sse2.lo:
|
||||
$(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_sse2.c
|
||||
|
||||
insert_string_sse42.o:
|
||||
$(CC) $(CFLAGS) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c
|
||||
|
||||
insert_string_sse42.lo:
|
||||
$(CC) $(SFLAGS) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c
|
||||
|
||||
crc32_pclmulqdq.o:
|
||||
$(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c
|
||||
|
||||
|
@ -1,24 +0,0 @@
|
||||
/* insert_string_sse42.c -- insert_string integer hash variant using SSE4.2's CRC instructions
|
||||
*
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef X86_SSE42
|
||||
#include "zbuild.h"
|
||||
#include <nmmintrin.h>
|
||||
#include "deflate.h"
|
||||
|
||||
#define HASH_CALC(s, h, val)\
|
||||
h = _mm_crc32_u32(h, val)
|
||||
|
||||
#define HASH_CALC_VAR h
|
||||
#define HASH_CALC_VAR_INIT uint32_t h = 0
|
||||
|
||||
#define UPDATE_HASH update_hash_sse42
|
||||
#define INSERT_STRING insert_string_sse42
|
||||
#define QUICK_INSERT_STRING quick_insert_string_sse42
|
||||
|
||||
#include "insert_string_tpl.h"
|
||||
#endif
|
@ -27,9 +27,6 @@ void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
|
||||
|
||||
#ifdef X86_SSE42
|
||||
uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str);
|
||||
uint32_t update_hash_sse42(uint32_t h, uint32_t val);
|
||||
#endif
|
||||
|
||||
#ifdef X86_AVX2
|
||||
|
8
configure
vendored
8
configure
vendored
@ -1503,8 +1503,8 @@ case "${ARCH}" in
|
||||
if test ${HAVE_SSE42_INTRIN} -eq 1; then
|
||||
CFLAGS="${CFLAGS} -DX86_SSE42"
|
||||
SFLAGS="${SFLAGS} -DX86_SSE42"
|
||||
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o insert_string_sse42.o"
|
||||
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo insert_string_sse42.lo"
|
||||
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o"
|
||||
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo"
|
||||
fi
|
||||
|
||||
check_pclmulqdq_intrinsics
|
||||
@ -1695,8 +1695,8 @@ EOF
|
||||
CFLAGS="${CFLAGS} -DARM_ACLE"
|
||||
SFLAGS="${SFLAGS} -DARM_ACLE"
|
||||
|
||||
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o insert_string_acle.o"
|
||||
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo insert_string_acle.lo"
|
||||
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o"
|
||||
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo"
|
||||
fi
|
||||
fi
|
||||
|
||||
|
10
deflate.c
10
deflate.c
@ -120,10 +120,6 @@ static void lm_set_level (deflate_state *s, int level);
|
||||
static void lm_init (deflate_state *s);
|
||||
Z_INTERNAL unsigned read_buf (PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
|
||||
|
||||
extern uint32_t update_hash_roll (uint32_t h, uint32_t val);
|
||||
extern void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count);
|
||||
extern Pos quick_insert_string_roll(deflate_state *const s, uint32_t str);
|
||||
|
||||
/* ===========================================================================
|
||||
* Local data
|
||||
*/
|
||||
@ -1144,9 +1140,9 @@ static void lm_set_level(deflate_state *s, int level) {
|
||||
s->insert_string = &insert_string_roll;
|
||||
s->quick_insert_string = &quick_insert_string_roll;
|
||||
} else {
|
||||
s->update_hash = functable.update_hash;
|
||||
s->insert_string = functable.insert_string;
|
||||
s->quick_insert_string = functable.quick_insert_string;
|
||||
s->update_hash = update_hash;
|
||||
s->insert_string = insert_string;
|
||||
s->quick_insert_string = quick_insert_string;
|
||||
}
|
||||
|
||||
s->level = level;
|
||||
|
@ -117,6 +117,14 @@ typedef uint32_t (* update_hash_cb) (uint32_t h, uint32_t val);
|
||||
typedef void (* insert_string_cb) (deflate_state *const s, uint32_t str, uint32_t count);
|
||||
typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str);
|
||||
|
||||
uint32_t update_hash (uint32_t h, uint32_t val);
|
||||
void insert_string (deflate_state *const s, uint32_t str, uint32_t count);
|
||||
Pos quick_insert_string (deflate_state *const s, uint32_t str);
|
||||
|
||||
uint32_t update_hash_roll (uint32_t h, uint32_t val);
|
||||
void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count);
|
||||
Pos quick_insert_string_roll(deflate_state *const s, uint32_t str);
|
||||
|
||||
struct ALIGNED_(16) internal_state {
|
||||
PREFIX3(stream) *strm; /* pointer back to this zlib stream */
|
||||
unsigned char *pending_buf; /* output still pending */
|
||||
|
@ -41,7 +41,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
|
||||
* dictionary, and set hash_head to the head of the hash chain:
|
||||
*/
|
||||
if (s->lookahead >= WANT_MIN_MATCH) {
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
hash_head = quick_insert_string(s, s->strstart);
|
||||
dist = (int64_t)s->strstart - hash_head;
|
||||
|
||||
/* Find the longest match, discarding those <= prev_length.
|
||||
@ -71,11 +71,11 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
|
||||
match_len--; /* string at strstart already in table */
|
||||
s->strstart++;
|
||||
|
||||
functable.insert_string(s, s->strstart, match_len);
|
||||
insert_string(s, s->strstart, match_len);
|
||||
s->strstart += match_len;
|
||||
} else {
|
||||
s->strstart += match_len;
|
||||
functable.quick_insert_string(s, s->strstart + 2 - STD_MIN_MATCH);
|
||||
quick_insert_string(s, s->strstart + 2 - STD_MIN_MATCH);
|
||||
|
||||
/* If lookahead < STD_MIN_MATCH, ins_h is garbage, but it does not
|
||||
* matter since it will be recomputed at next deflate call.
|
||||
|
@ -52,9 +52,9 @@ static void insert_match(deflate_state *s, struct match match) {
|
||||
if (UNLIKELY(match.match_length > 0)) {
|
||||
if (match.strstart >= match.orgstart) {
|
||||
if (match.strstart + match.match_length - 1 >= match.orgstart) {
|
||||
functable.insert_string(s, match.strstart, match.match_length);
|
||||
insert_string(s, match.strstart, match.match_length);
|
||||
} else {
|
||||
functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
|
||||
insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
|
||||
}
|
||||
match.strstart += match.match_length;
|
||||
match.match_length = 0;
|
||||
@ -72,12 +72,12 @@ static void insert_match(deflate_state *s, struct match match) {
|
||||
|
||||
if (LIKELY(match.strstart >= match.orgstart)) {
|
||||
if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) {
|
||||
functable.insert_string(s, match.strstart, match.match_length);
|
||||
insert_string(s, match.strstart, match.match_length);
|
||||
} else {
|
||||
functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
|
||||
insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
|
||||
}
|
||||
} else if (match.orgstart < match.strstart + match.match_length) {
|
||||
functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
|
||||
insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
|
||||
}
|
||||
match.strstart += match.match_length;
|
||||
match.match_length = 0;
|
||||
@ -86,7 +86,7 @@ static void insert_match(deflate_state *s, struct match match) {
|
||||
match.match_length = 0;
|
||||
|
||||
if (match.strstart >= (STD_MIN_MATCH - 2))
|
||||
functable.quick_insert_string(s, match.strstart + 2 - STD_MIN_MATCH);
|
||||
quick_insert_string(s, match.strstart + 2 - STD_MIN_MATCH);
|
||||
|
||||
/* If lookahead < WANT_MIN_MATCH, ins_h is garbage, but it does not
|
||||
* matter since it will be recomputed at next deflate call.
|
||||
@ -199,7 +199,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
} else {
|
||||
hash_head = 0;
|
||||
if (s->lookahead >= WANT_MIN_MATCH) {
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
hash_head = quick_insert_string(s, s->strstart);
|
||||
}
|
||||
|
||||
current_match.strstart = (uint16_t)s->strstart;
|
||||
@ -235,7 +235,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
/* now, look ahead one */
|
||||
if (LIKELY(!early_exit && s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) {
|
||||
s->strstart = current_match.strstart + current_match.match_length;
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
hash_head = quick_insert_string(s, s->strstart);
|
||||
|
||||
next_match.strstart = (uint16_t)s->strstart;
|
||||
next_match.orgstart = next_match.strstart;
|
||||
|
@ -86,7 +86,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
|
||||
}
|
||||
|
||||
if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) {
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
hash_head = quick_insert_string(s, s->strstart);
|
||||
dist = (int64_t)s->strstart - hash_head;
|
||||
|
||||
if (dist <= MAX_DIST(s) && dist > 0) {
|
||||
|
30
functable.c
30
functable.c
@ -58,10 +58,7 @@ static void init_functable(void) {
|
||||
ft.crc32_fold_final = &crc32_fold_final_c;
|
||||
ft.crc32_fold_reset = &crc32_fold_reset_c;
|
||||
ft.inflate_fast = &inflate_fast_c;
|
||||
ft.insert_string = &insert_string_c;
|
||||
ft.quick_insert_string = &quick_insert_string_c;
|
||||
ft.slide_hash = &slide_hash_c;
|
||||
ft.update_hash = &update_hash_c;
|
||||
ft.longest_match = &longest_match_generic;
|
||||
ft.longest_match_slow = &longest_match_slow_generic;
|
||||
ft.compare256 = &compare256_generic;
|
||||
@ -97,9 +94,6 @@ static void init_functable(void) {
|
||||
#ifdef X86_SSE42
|
||||
if (cf.x86.has_sse42) {
|
||||
ft.adler32_fold_copy = &adler32_fold_copy_sse42;
|
||||
ft.insert_string = &insert_string_sse42;
|
||||
ft.quick_insert_string = &quick_insert_string_sse42;
|
||||
ft.update_hash = &update_hash_sse42;
|
||||
}
|
||||
#endif
|
||||
// X86 - PCLMUL
|
||||
@ -183,9 +177,6 @@ static void init_functable(void) {
|
||||
#ifdef ARM_ACLE
|
||||
if (cf.arm.has_crc32) {
|
||||
ft.crc32 = &crc32_acle;
|
||||
ft.insert_string = &insert_string_acle;
|
||||
ft.quick_insert_string = &quick_insert_string_acle;
|
||||
ft.update_hash = &update_hash_acle;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -256,12 +247,9 @@ static void init_functable(void) {
|
||||
FUNCTABLE_ASSIGN(ft, crc32_fold_final);
|
||||
FUNCTABLE_ASSIGN(ft, crc32_fold_reset);
|
||||
FUNCTABLE_ASSIGN(ft, inflate_fast);
|
||||
FUNCTABLE_ASSIGN(ft, insert_string);
|
||||
FUNCTABLE_ASSIGN(ft, longest_match);
|
||||
FUNCTABLE_ASSIGN(ft, longest_match_slow);
|
||||
FUNCTABLE_ASSIGN(ft, quick_insert_string);
|
||||
FUNCTABLE_ASSIGN(ft, slide_hash);
|
||||
FUNCTABLE_ASSIGN(ft, update_hash);
|
||||
|
||||
// Memory barrier for weak memory order CPUs
|
||||
FUNCTABLE_BARRIER();
|
||||
@ -327,11 +315,6 @@ static void inflate_fast_stub(PREFIX3(stream) *strm, uint32_t start) {
|
||||
functable.inflate_fast(strm, start);
|
||||
}
|
||||
|
||||
static void insert_string_stub(deflate_state* const s, uint32_t str, uint32_t count) {
|
||||
init_functable();
|
||||
functable.insert_string(s, str, count);
|
||||
}
|
||||
|
||||
static uint32_t longest_match_stub(deflate_state* const s, Pos cur_match) {
|
||||
init_functable();
|
||||
return functable.longest_match(s, cur_match);
|
||||
@ -342,21 +325,11 @@ static uint32_t longest_match_slow_stub(deflate_state* const s, Pos cur_match) {
|
||||
return functable.longest_match_slow(s, cur_match);
|
||||
}
|
||||
|
||||
static Pos quick_insert_string_stub(deflate_state* const s, const uint32_t str) {
|
||||
init_functable();
|
||||
return functable.quick_insert_string(s, str);
|
||||
}
|
||||
|
||||
static void slide_hash_stub(deflate_state* s) {
|
||||
init_functable();
|
||||
functable.slide_hash(s);
|
||||
}
|
||||
|
||||
static uint32_t update_hash_stub(uint32_t h, uint32_t val) {
|
||||
init_functable();
|
||||
return functable.update_hash(h, val);
|
||||
}
|
||||
|
||||
/* functable init */
|
||||
Z_INTERNAL struct functable_s functable = {
|
||||
force_init_stub,
|
||||
@ -371,10 +344,7 @@ Z_INTERNAL struct functable_s functable = {
|
||||
crc32_fold_final_stub,
|
||||
crc32_fold_reset_stub,
|
||||
inflate_fast_stub,
|
||||
insert_string_stub,
|
||||
longest_match_stub,
|
||||
longest_match_slow_stub,
|
||||
quick_insert_string_stub,
|
||||
slide_hash_stub,
|
||||
update_hash_stub
|
||||
};
|
||||
|
@ -22,12 +22,9 @@ struct functable_s {
|
||||
uint32_t (* crc32_fold_final) (struct crc32_fold_s *crc);
|
||||
uint32_t (* crc32_fold_reset) (struct crc32_fold_s *crc);
|
||||
void (* inflate_fast) (PREFIX3(stream) *strm, uint32_t start);
|
||||
void (* insert_string) (deflate_state *const s, uint32_t str, uint32_t count);
|
||||
uint32_t (* longest_match) (deflate_state *const s, Pos cur_match);
|
||||
uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match);
|
||||
Pos (* quick_insert_string)(deflate_state *const s, uint32_t str);
|
||||
void (* slide_hash) (deflate_state *s);
|
||||
uint32_t (* update_hash) (uint32_t h, uint32_t val);
|
||||
};
|
||||
|
||||
Z_INTERNAL extern struct functable_s functable;
|
||||
|
@ -14,8 +14,8 @@
|
||||
#define HASH_CALC_VAR h
|
||||
#define HASH_CALC_VAR_INIT uint32_t h = 0
|
||||
|
||||
#define UPDATE_HASH update_hash_c
|
||||
#define INSERT_STRING insert_string_c
|
||||
#define QUICK_INSERT_STRING quick_insert_string_c
|
||||
#define UPDATE_HASH update_hash
|
||||
#define INSERT_STRING insert_string
|
||||
#define QUICK_INSERT_STRING quick_insert_string
|
||||
|
||||
#include "insert_string_tpl.h"
|
@ -67,7 +67,7 @@ OBJS = \
|
||||
infback.obj \
|
||||
inflate.obj \
|
||||
inftrees.obj \
|
||||
insert_string_c.obj \
|
||||
insert_string.obj \
|
||||
insert_string_roll.obj \
|
||||
slide_hash_c.obj \
|
||||
trees.obj \
|
||||
@ -99,7 +99,7 @@ WFLAGS = $(WFLAGS) \
|
||||
-DARM_NEON \
|
||||
-DARM_NOCHECK_NEON \
|
||||
#
|
||||
OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj
|
||||
OBJS = $(OBJS) crc32_acle.obj adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj
|
||||
|
||||
# targets
|
||||
all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \
|
||||
@ -208,7 +208,7 @@ deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/defl
|
||||
infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h
|
||||
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
|
||||
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
|
||||
insert_string_c.obj: $(SRCDIR)/arch/generic/insert_string_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/insert_string_tpl.h
|
||||
insert_string.obj: $(SRCDIR)/insert_string.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/insert_string_tpl.h
|
||||
slide_hash_c.obj: $(SRCDIR)/arch/generic/slide_hash_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
|
||||
slide_hash_neon.obj: $(SRCDIR)/arch/arm/slide_hash_neon.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
|
||||
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h
|
||||
|
@ -72,7 +72,7 @@ OBJS = \
|
||||
infback.obj \
|
||||
inflate.obj \
|
||||
inftrees.obj \
|
||||
insert_string_c.obj \
|
||||
insert_string.obj \
|
||||
insert_string_roll.obj \
|
||||
slide_hash_c.obj \
|
||||
trees.obj \
|
||||
@ -100,7 +100,7 @@ OBJS = $(OBJS) gzlib.obj gzread.obj gzwrite.obj
|
||||
|
||||
!if "$(WITH_ACLE)" != ""
|
||||
WFLAGS = $(WFLAGS) -DARM_ACLE
|
||||
OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj
|
||||
OBJS = $(OBJS) crc32_acle.obj
|
||||
!endif
|
||||
!if "$(WITH_VFPV3)" != ""
|
||||
NEON_ARCH = /arch:VFPv3
|
||||
@ -229,7 +229,7 @@ deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/defl
|
||||
infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h
|
||||
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
|
||||
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
|
||||
insert_string_c.obj: $(SRCDIR)/arch/generic/insert_string_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/insert_string_tpl.h
|
||||
insert_string.obj: $(SRCDIR)/insert_string.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/insert_string_tpl.h
|
||||
slide_hash_c.obj: $(SRCDIR)/arch/generic/slide_hash_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
|
||||
trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h
|
||||
zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/zutil_p.h
|
||||
|
@ -80,9 +80,8 @@ OBJS = \
|
||||
infback.obj \
|
||||
inflate.obj \
|
||||
inftrees.obj \
|
||||
insert_string_c.obj \
|
||||
insert_string.obj \
|
||||
insert_string_roll.obj \
|
||||
insert_string_sse42.obj \
|
||||
slide_hash_c.obj \
|
||||
slide_hash_avx2.obj \
|
||||
slide_hash_sse2.obj \
|
||||
@ -231,9 +230,8 @@ deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/defl
|
||||
infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h
|
||||
inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h
|
||||
inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h
|
||||
insert_string_c.obj: $(SRCDIR)/arch/generic/insert_string_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/insert_string_tpl.h
|
||||
insert_string.obj: $(SRCDIR)/insert_string.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/insert_string_tpl.h
|
||||
insert_string_roll.obj: $(SRCDIR)/insert_string_roll.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/insert_string_tpl.h
|
||||
insert_string_sse42.obj: $(SRCDIR)/arch/x86/insert_string_sse42.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/insert_string_tpl.h
|
||||
slide_hash_c.obj: $(SRCDIR)/arch/generic/slide_hash_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
|
||||
slide_hash_avx2.obj: $(SRCDIR)/arch/x86/slide_hash_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
|
||||
slide_hash_sse2.obj: $(SRCDIR)/arch/x86/slide_hash_sse2.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h
|
||||
|
Loading…
Reference in New Issue
Block a user