Move adler32 C fallbacks to arch/generic

This commit is contained in:
Hans Kristian Rosbach 2024-01-03 15:22:10 +01:00 committed by Hans Kristian Rosbach
parent 6f38b4c5fc
commit 4e132cc0ec
19 changed files with 131 additions and 74 deletions

View File

@ -977,6 +977,7 @@ set(ZLIB_PUBLIC_HDRS
${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h
)
set(ZLIB_PRIVATE_HDRS
arch/generic/adler32_fold_c.h
adler32_p.h
chunkset_tpl.h
compare256_rle.h
@ -1003,8 +1004,9 @@ set(ZLIB_PRIVATE_HDRS
zutil.h
)
set(ZLIB_SRCS
arch/generic/adler32_c.c
arch/generic/adler32_fold_c.c
adler32.c
adler32_fold.c
chunkset.c
compare256.c
compress.c

View File

@ -74,8 +74,9 @@ man3dir = ${mandir}/man3
pkgconfigdir = ${libdir}/pkgconfig
OBJZ = \
arch/generic/adler32_c.o \
arch/generic/adler32_fold_c.o \
adler32.o \
adler32_fold.o \
chunkset.o \
compare256.o \
compress.o \
@ -112,8 +113,9 @@ TESTOBJG =
OBJC = $(OBJZ) $(OBJG)
PIC_OBJZ = \
arch/generic/adler32_c.lo \
arch/generic/adler32_fold_c.lo \
adler32.lo \
adler32_fold.lo \
chunkset.lo \
compare256.lo \
compress.lo \
@ -169,6 +171,12 @@ $(ARCHDIR)/%.o: $(SRCDIR)/$(ARCHDIR)/%.c
$(ARCHDIR)/%.lo: $(SRCDIR)/$(ARCHDIR)/%.c
$(MAKE) -C $(ARCHDIR) $(notdir $@)
arch/generic/%.o: $(SRCDIR)/arch/generic/%.c
$(MAKE) -C arch/generic $(notdir $@)
arch/generic/%.lo: $(SRCDIR)/arch/generic/%.c
$(MAKE) -C arch/generic $(notdir $@)
%.o: $(ARCHDIR)/%.o
-cp $< $@

View File

@ -7,52 +7,6 @@
#include "functable.h"
#include "adler32_p.h"
/* ========================================================================= */
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) {
uint32_t sum2;
unsigned n;
/* split Adler-32 into component sums */
sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
/* in case user likes doing a byte at a time, keep it fast */
if (UNLIKELY(len == 1))
return adler32_len_1(adler, buf, sum2);
/* initial Adler-32 value (deferred check for len == 1 speed) */
if (UNLIKELY(buf == NULL))
return 1L;
/* in case short lengths are provided, keep it somewhat fast */
if (UNLIKELY(len < 16))
return adler32_len_16(adler, buf, len, sum2);
/* do length NMAX blocks -- requires just one modulo operation */
while (len >= NMAX) {
len -= NMAX;
#ifdef UNROLL_MORE
n = NMAX / 16; /* NMAX is divisible by 16 */
#else
n = NMAX / 8; /* NMAX is divisible by 8 */
#endif
do {
#ifdef UNROLL_MORE
DO16(adler, sum2, buf); /* 16 sums unrolled */
buf += 16;
#else
DO8(adler, sum2, buf, 0); /* 8 sums unrolled */
buf += 8;
#endif
} while (--n);
adler %= BASE;
sum2 %= BASE;
}
/* do remaining bytes (less than NMAX, still just one modulo) */
return adler32_len_64(adler, buf, len, sum2);
}
#ifdef ZLIB_COMPAT
unsigned long Z_EXPORT PREFIX(adler32_z)(unsigned long adler, const unsigned char *buf, size_t len) {
return (unsigned long)functable.adler32((uint32_t)adler, buf, len);

View File

@ -1,5 +1,6 @@
# Makefile for zlib
# Makefile for zlib-ng
# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler
# Copyright (C) 2024 Hans Kristian Rosbach
# For conditions of distribution and use, see copyright notice in zlib.h
CC=
@ -11,12 +12,27 @@ SRCDIR=.
SRCTOP=../..
TOPDIR=$(SRCTOP)
all:
all: \
adler32_c.o adler32_c.lo \
adler32_fold_c.o adler32_fold_c.lo
adler32_c.o: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c
adler32_c.lo: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c
adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/adler32_fold_c.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/adler32_fold_c.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
mostlyclean: clean
clean:
rm -f *.o *.lo *~ \
rm -f *.o *.lo *~
rm -rf objs
rm -f *.gcda *.gcno *.gcov

54
arch/generic/adler32_c.c Normal file
View File

@ -0,0 +1,54 @@
/* adler32.c -- compute the Adler-32 checksum of a data stream
* Copyright (C) 1995-2011, 2016 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "functable.h"
#include "adler32_p.h"
/* ========================================================================= */
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) {
uint32_t sum2;
unsigned n;
/* split Adler-32 into component sums */
sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
/* in case user likes doing a byte at a time, keep it fast */
if (UNLIKELY(len == 1))
return adler32_len_1(adler, buf, sum2);
/* initial Adler-32 value (deferred check for len == 1 speed) */
if (UNLIKELY(buf == NULL))
return 1L;
/* in case short lengths are provided, keep it somewhat fast */
if (UNLIKELY(len < 16))
return adler32_len_16(adler, buf, len, sum2);
/* do length NMAX blocks -- requires just one modulo operation */
while (len >= NMAX) {
len -= NMAX;
#ifdef UNROLL_MORE
n = NMAX / 16; /* NMAX is divisible by 16 */
#else
n = NMAX / 8; /* NMAX is divisible by 8 */
#endif
do {
#ifdef UNROLL_MORE
DO16(adler, sum2, buf); /* 16 sums unrolled */
buf += 16;
#else
DO8(adler, sum2, buf, 0); /* 8 sums unrolled */
buf += 8;
#endif
} while (--n);
adler %= BASE;
sum2 %= BASE;
}
/* do remaining bytes (less than NMAX, still just one modulo) */
return adler32_len_64(adler, buf, len, sum2);
}

View File

@ -5,7 +5,7 @@
#include "zbuild.h"
#include "functable.h"
#include "adler32_fold.h"
#include "adler32_fold_c.h"
#include <limits.h>

View File

@ -3,8 +3,8 @@
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifndef ADLER32_FOLD_H_
#define ADLER32_FOLD_H_
#ifndef ADLER32_FOLD_C_H_
#define ADLER32_FOLD_C_H_
Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);

View File

@ -11,7 +11,6 @@
#include "zbuild.h"
#include <immintrin.h>
#include "adler32_fold.h"
#include "adler32_p.h"
#include "adler32_avx2_p.h"
#include "x86_intrins.h"

View File

@ -10,7 +10,6 @@
#include "zbuild.h"
#include "adler32_p.h"
#include "adler32_fold.h"
#include "cpu_features.h"
#include <immintrin.h>
#include "x86_intrins.h"

View File

@ -13,7 +13,6 @@
#include "adler32_p.h"
#include "cpu_features.h"
#include <immintrin.h>
#include "adler32_fold.h"
#include "x86_intrins.h"
#include "adler32_avx512_p.h"
#include "adler32_avx2_p.h"

View File

@ -8,7 +8,6 @@
#include "zbuild.h"
#include "adler32_p.h"
#include "adler32_fold.h"
#include "adler32_ssse3_p.h"
#include <immintrin.h>

15
configure vendored
View File

@ -2109,6 +2109,21 @@ for file in $SRCDIR/$ARCHDIR/*.c; do
fi
done
# Generate Makefile in generic arch dir
mkdir -p arch/generic
sed < $SRCDIR/arch/generic/Makefile.in "
/^CC *=/s#=.*#=$CC#
/^CFLAGS *=/s#=.*#=$CFLAGS#
/^SFLAGS *=/s#=.*#=$SFLAGS#
/^INCLUDES *=/s#=.*#=$INCLUDES#
/^SRCDIR *=/s#=.*#=$SRCDIR/arch/generic#
/^SRCTOP *=/s#=.*#=$SRCDIR#
/^BUILDDIR *=/s#=.*#=$BUILDDIR#
" > arch/generic/Makefile
## TODO: Process header dependencies
# Emscripten does not support large amounts of data via stdin/out
# https://github.com/emscripten-core/emscripten/issues/16755#issuecomment-1102732849
if test "$CHOST" != "wasm32"; then

View File

@ -6,7 +6,6 @@
#ifndef CPU_FEATURES_H_
#define CPU_FEATURES_H_
#include "adler32_fold.h"
#include "crc32_fold.h"
#if defined(X86_FEATURES)

View File

@ -12,7 +12,6 @@
#include "zutil.h"
#include "zendian.h"
#include "adler32_fold.h"
#include "crc32_fold.h"
/* define NO_GZIP when compiling if you want to disable gzip header and

View File

@ -8,7 +8,7 @@
#include "deflate.h"
#include "crc32_fold.h"
#include "adler32_fold.h"
#include "adler32_fold_c.h"
#ifdef ZLIB_COMPAT
typedef struct z_stream_s z_stream;

View File

@ -11,7 +11,6 @@
#ifndef INFLATE_H_
#define INFLATE_H_
#include "adler32_fold.h"
#include "crc32_fold.h"
/* define NO_GZIP when compiling if you want to disable gzip header and trailer decoding by inflate().

View File

@ -44,7 +44,8 @@ SUFFIX =
OBJS = \
adler32.obj \
adler32_fold.obj \
adler32_c.obj \
adler32_fold_c.obj \
arm_features.obj \
chunkset.obj \
compare256.obj \
@ -169,6 +170,9 @@ gzwrite2.obj: gzwrite.c
{$(TOP)/arch/arm}.c.obj:
$(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $<
{$(TOP)/arch/generic}.c.obj:
$(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $<
{$(TOP)/test}.c.obj:
$(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $<
@ -176,8 +180,9 @@ $(TOP)/zconf$(SUFFIX).h: zconf
SRCDIR = $(TOP)
# Keep the dependences in sync with top-level Makefile.in
adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h
adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32_c.obj: $(SRCDIR)/arch/generic/adler32_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/adler32_fold_c.h
chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h
gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h

View File

@ -49,7 +49,8 @@ SUFFIX =
OBJS = \
adler32.obj \
adler32_fold.obj \
adler32_c.obj \
adler32_fold_c.obj \
arm_features.obj \
chunkset.obj \
compare256.obj \
@ -190,6 +191,9 @@ gzwrite2.obj: gzwrite.c
{$(TOP)/arch/arm}.c.obj:
$(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $<
{$(TOP)/arch/generic}.c.obj:
$(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $<
{$(TOP)/test}.c.obj:
$(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $<
@ -197,8 +201,9 @@ $(TOP)/zconf$(SUFFIX).h: zconf
SRCDIR = $(TOP)
# Keep the dependences in sync with top-level Makefile.in
adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h
adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32_c.obj: $(SRCDIR)/arch/generic/adler32_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/adler32_fold_c.h
functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h
gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h
gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h

View File

@ -47,12 +47,13 @@ SUFFIX =
OBJS = \
adler32.obj \
adler32_c.obj \
adler32_avx2.obj \
adler32_avx512.obj \
adler32_avx512_vnni.obj \
adler32_sse42.obj \
adler32_ssse3.obj \
adler32_fold.obj \
adler32_fold_c.obj \
chunkset.obj \
chunkset_avx2.obj \
chunkset_sse2.obj \
@ -178,6 +179,9 @@ gzwrite2.obj: gzwrite.c
{$(TOP)/arch/x86}.c.obj:
$(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $<
{$(TOP)/arch/generic}.c.obj:
$(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $<
{$(TOP)/test}.c.obj:
$(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $<
@ -185,15 +189,16 @@ $(TOP)/zconf$(SUFFIX).h: zconf
SRCDIR = $(TOP)
# Keep the dependences in sync with top-level Makefile.in
adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32_c.obj: $(SRCDIR)/arch/generic/adler32_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h
adler32_avx2.obj: $(SRCDIR)/arch/x86/adler32_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/fallback_builtins.h
adler32_avx512.obj: $(SRCDIR)/arch/x86/adler32_avx512.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/arch/x86/adler32_avx512_p.h
adler32_avx512_vnni.obj: $(SRCDIR)/arch/x86/adler32_avx512_vnni.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/arch/x86/adler32_avx512_p.h
adler32_sse42.obj: $(SRCDIR)/arch/x86/adler32_sse42.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/adler32_fold.h \
adler32_sse42.obj: $(SRCDIR)/arch/x86/adler32_sse42.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h \
$(SRCDIR)/arch/x86/adler32_ssse3_p.h
adler32_ssse3.obj: $(SRCDIR)/arch/x86/adler32_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/adler32_fold.h \
adler32_ssse3.obj: $(SRCDIR)/arch/x86/adler32_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h \
$(SRCDIR)/arch/x86/adler32_ssse3_p.h
adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h
adler32_fold_c.obj: $(SRCDIR)/arch/generic/adler32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/adler32_fold_c.h
functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h
gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h
gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h