mirror of
https://github.com/GerbilSoft/zlib-ng.git
synced 2025-06-18 11:35:35 -04:00
Move crc32 C fallbacks to arch/generic
This commit is contained in:
parent
4e132cc0ec
commit
06895bc1b3
@ -978,6 +978,7 @@ set(ZLIB_PUBLIC_HDRS
|
||||
)
|
||||
set(ZLIB_PRIVATE_HDRS
|
||||
arch/generic/adler32_fold_c.h
|
||||
arch/generic/crc32_fold_c.h
|
||||
adler32_p.h
|
||||
chunkset_tpl.h
|
||||
compare256_rle.h
|
||||
@ -985,7 +986,6 @@ set(ZLIB_PRIVATE_HDRS
|
||||
crc32_braid_p.h
|
||||
crc32_braid_comb_p.h
|
||||
crc32_braid_tbl.h
|
||||
crc32_fold.h
|
||||
deflate.h
|
||||
deflate_p.h
|
||||
functable.h
|
||||
@ -1006,6 +1006,8 @@ set(ZLIB_PRIVATE_HDRS
|
||||
set(ZLIB_SRCS
|
||||
arch/generic/adler32_c.c
|
||||
arch/generic/adler32_fold_c.c
|
||||
arch/generic/crc32_braid_c.c
|
||||
arch/generic/crc32_fold_c.c
|
||||
adler32.c
|
||||
chunkset.c
|
||||
compare256.c
|
||||
@ -1013,7 +1015,6 @@ set(ZLIB_SRCS
|
||||
cpu_features.c
|
||||
crc32_braid.c
|
||||
crc32_braid_comb.c
|
||||
crc32_fold.c
|
||||
deflate.c
|
||||
deflate_fast.c
|
||||
deflate_huff.c
|
||||
|
@ -76,6 +76,8 @@ pkgconfigdir = ${libdir}/pkgconfig
|
||||
OBJZ = \
|
||||
arch/generic/adler32_c.o \
|
||||
arch/generic/adler32_fold_c.o \
|
||||
arch/generic/crc32_braid_c.o \
|
||||
arch/generic/crc32_fold_c.o \
|
||||
adler32.o \
|
||||
chunkset.o \
|
||||
compare256.o \
|
||||
@ -83,7 +85,6 @@ OBJZ = \
|
||||
cpu_features.o \
|
||||
crc32_braid.o \
|
||||
crc32_braid_comb.o \
|
||||
crc32_fold.o \
|
||||
deflate.o \
|
||||
deflate_fast.o \
|
||||
deflate_huff.o \
|
||||
@ -115,6 +116,8 @@ OBJC = $(OBJZ) $(OBJG)
|
||||
PIC_OBJZ = \
|
||||
arch/generic/adler32_c.lo \
|
||||
arch/generic/adler32_fold_c.lo \
|
||||
arch/generic/crc32_braid_c.lo \
|
||||
arch/generic/crc32_fold_c.lo \
|
||||
adler32.lo \
|
||||
chunkset.lo \
|
||||
compare256.lo \
|
||||
@ -122,7 +125,6 @@ PIC_OBJZ = \
|
||||
cpu_features.lo \
|
||||
crc32_braid.lo \
|
||||
crc32_braid_comb.lo \
|
||||
crc32_fold.lo \
|
||||
deflate.lo \
|
||||
deflate_fast.lo \
|
||||
deflate_huff.lo \
|
||||
|
@ -14,7 +14,9 @@ TOPDIR=$(SRCTOP)
|
||||
|
||||
all: \
|
||||
adler32_c.o adler32_c.lo \
|
||||
adler32_fold_c.o adler32_fold_c.lo
|
||||
adler32_fold_c.o adler32_fold_c.lo \
|
||||
crc32_braid_c.o crc32_braid_c.lo \
|
||||
crc32_fold_c.o crc32_fold_c.lo
|
||||
|
||||
|
||||
adler32_c.o: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
|
||||
@ -29,6 +31,18 @@ adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/funct
|
||||
adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/adler32_fold_c.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
|
||||
|
||||
crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c
|
||||
|
||||
crc32_braid_c.lo: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c
|
||||
|
||||
crc32_fold_c.o: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/crc32_fold_c.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c
|
||||
|
||||
crc32_fold_c.lo: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/crc32_fold_c.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c
|
||||
|
||||
|
||||
mostlyclean: clean
|
||||
clean:
|
||||
|
235
arch/generic/crc32_braid_c.c
Normal file
235
arch/generic/crc32_braid_c.c
Normal file
@ -0,0 +1,235 @@
|
||||
/* crc32_braid.c -- compute the CRC-32 of a data stream
|
||||
* Copyright (C) 1995-2022 Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*
|
||||
* This interleaved implementation of a CRC makes use of pipelined multiple
|
||||
* arithmetic-logic units, commonly found in modern CPU cores. It is due to
|
||||
* Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "crc32_braid_p.h"
|
||||
#include "crc32_braid_tbl.h"
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
/*
|
||||
A CRC of a message is computed on N braids of words in the message, where
|
||||
each word consists of W bytes (4 or 8). If N is 3, for example, then three
|
||||
running sparse CRCs are calculated respectively on each braid, at these
|
||||
indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
|
||||
This is done starting at a word boundary, and continues until as many blocks
|
||||
of N * W bytes as are available have been processed. The results are combined
|
||||
into a single CRC at the end. For this code, N must be in the range 1..6 and
|
||||
W must be 4 or 8. The upper limit on N can be increased if desired by adding
|
||||
more #if blocks, extending the patterns apparent in the code. In addition,
|
||||
crc32 tables would need to be regenerated, if the maximum N value is increased.
|
||||
|
||||
N and W are chosen empirically by benchmarking the execution time on a given
|
||||
processor. The choices for N and W below were based on testing on Intel Kaby
|
||||
Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
|
||||
Octeon II processors. The Intel, AMD, and ARM processors were all fastest
|
||||
with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
|
||||
They were all tested with either gcc or clang, all using the -O3 optimization
|
||||
level. Your mileage may vary.
|
||||
*/
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
# define ZSWAPWORD(word) (word)
|
||||
# define BRAID_TABLE crc_braid_table
|
||||
#elif BYTE_ORDER == BIG_ENDIAN
|
||||
# if W == 8
|
||||
# define ZSWAPWORD(word) ZSWAP64(word)
|
||||
# elif W == 4
|
||||
# define ZSWAPWORD(word) ZSWAP32(word)
|
||||
# endif
|
||||
# define BRAID_TABLE crc_braid_big_table
|
||||
#else
|
||||
# error "No endian defined"
|
||||
#endif
|
||||
#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8)
|
||||
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
|
||||
|
||||
/* ========================================================================= */
|
||||
#ifdef W
|
||||
/*
|
||||
Return the CRC of the W bytes in the word_t data, taking the
|
||||
least-significant byte of the word as the first byte of data, without any pre
|
||||
or post conditioning. This is used to combine the CRCs of each braid.
|
||||
*/
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
static uint32_t crc_word(z_word_t data) {
|
||||
int k;
|
||||
for (k = 0; k < W; k++)
|
||||
data = (data >> 8) ^ crc_table[data & 0xff];
|
||||
return (uint32_t)data;
|
||||
}
|
||||
#elif BYTE_ORDER == BIG_ENDIAN
|
||||
static z_word_t crc_word(z_word_t data) {
|
||||
int k;
|
||||
for (k = 0; k < W; k++)
|
||||
data = (data << 8) ^
|
||||
crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
|
||||
return data;
|
||||
}
|
||||
#endif /* BYTE_ORDER */
|
||||
|
||||
#endif /* W */
|
||||
|
||||
/* ========================================================================= */
|
||||
Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) {
|
||||
Z_REGISTER uint32_t c;
|
||||
|
||||
/* Pre-condition the CRC */
|
||||
c = (~crc) & 0xffffffff;
|
||||
|
||||
#ifdef W
|
||||
/* If provided enough bytes, do a braided CRC calculation. */
|
||||
if (len >= N * W + W - 1) {
|
||||
size_t blks;
|
||||
z_word_t const *words;
|
||||
int k;
|
||||
|
||||
/* Compute the CRC up to a z_word_t boundary. */
|
||||
while (len && ((uintptr_t)buf & (W - 1)) != 0) {
|
||||
len--;
|
||||
DO1;
|
||||
}
|
||||
|
||||
/* Compute the CRC on as many N z_word_t blocks as are available. */
|
||||
blks = len / (N * W);
|
||||
len -= blks * N * W;
|
||||
words = (z_word_t const *)buf;
|
||||
|
||||
z_word_t crc0, word0, comb;
|
||||
#if N > 1
|
||||
z_word_t crc1, word1;
|
||||
#if N > 2
|
||||
z_word_t crc2, word2;
|
||||
#if N > 3
|
||||
z_word_t crc3, word3;
|
||||
#if N > 4
|
||||
z_word_t crc4, word4;
|
||||
#if N > 5
|
||||
z_word_t crc5, word5;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
/* Initialize the CRC for each braid. */
|
||||
crc0 = ZSWAPWORD(c);
|
||||
#if N > 1
|
||||
crc1 = 0;
|
||||
#if N > 2
|
||||
crc2 = 0;
|
||||
#if N > 3
|
||||
crc3 = 0;
|
||||
#if N > 4
|
||||
crc4 = 0;
|
||||
#if N > 5
|
||||
crc5 = 0;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
/* Process the first blks-1 blocks, computing the CRCs on each braid independently. */
|
||||
while (--blks) {
|
||||
/* Load the word for each braid into registers. */
|
||||
word0 = crc0 ^ words[0];
|
||||
#if N > 1
|
||||
word1 = crc1 ^ words[1];
|
||||
#if N > 2
|
||||
word2 = crc2 ^ words[2];
|
||||
#if N > 3
|
||||
word3 = crc3 ^ words[3];
|
||||
#if N > 4
|
||||
word4 = crc4 ^ words[4];
|
||||
#if N > 5
|
||||
word5 = crc5 ^ words[5];
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
words += N;
|
||||
|
||||
/* Compute and update the CRC for each word. The loop should get unrolled. */
|
||||
crc0 = BRAID_TABLE[0][word0 & 0xff];
|
||||
#if N > 1
|
||||
crc1 = BRAID_TABLE[0][word1 & 0xff];
|
||||
#if N > 2
|
||||
crc2 = BRAID_TABLE[0][word2 & 0xff];
|
||||
#if N > 3
|
||||
crc3 = BRAID_TABLE[0][word3 & 0xff];
|
||||
#if N > 4
|
||||
crc4 = BRAID_TABLE[0][word4 & 0xff];
|
||||
#if N > 5
|
||||
crc5 = BRAID_TABLE[0][word5 & 0xff];
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
for (k = 1; k < W; k++) {
|
||||
crc0 ^= BRAID_TABLE[k][(word0 >> (k << 3)) & 0xff];
|
||||
#if N > 1
|
||||
crc1 ^= BRAID_TABLE[k][(word1 >> (k << 3)) & 0xff];
|
||||
#if N > 2
|
||||
crc2 ^= BRAID_TABLE[k][(word2 >> (k << 3)) & 0xff];
|
||||
#if N > 3
|
||||
crc3 ^= BRAID_TABLE[k][(word3 >> (k << 3)) & 0xff];
|
||||
#if N > 4
|
||||
crc4 ^= BRAID_TABLE[k][(word4 >> (k << 3)) & 0xff];
|
||||
#if N > 5
|
||||
crc5 ^= BRAID_TABLE[k][(word5 >> (k << 3)) & 0xff];
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* Process the last block, combining the CRCs of the N braids at the same time. */
|
||||
comb = crc_word(crc0 ^ words[0]);
|
||||
#if N > 1
|
||||
comb = crc_word(crc1 ^ words[1] ^ comb);
|
||||
#if N > 2
|
||||
comb = crc_word(crc2 ^ words[2] ^ comb);
|
||||
#if N > 3
|
||||
comb = crc_word(crc3 ^ words[3] ^ comb);
|
||||
#if N > 4
|
||||
comb = crc_word(crc4 ^ words[4] ^ comb);
|
||||
#if N > 5
|
||||
comb = crc_word(crc5 ^ words[5] ^ comb);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
words += N;
|
||||
c = ZSWAPWORD(comb);
|
||||
|
||||
/* Update the pointer to the remaining bytes to process. */
|
||||
buf = (const unsigned char *)words;
|
||||
}
|
||||
|
||||
#endif /* W */
|
||||
|
||||
/* Complete the computation of the CRC on any remaining bytes. */
|
||||
while (len >= 8) {
|
||||
len -= 8;
|
||||
DO8;
|
||||
}
|
||||
while (len) {
|
||||
len--;
|
||||
DO1;
|
||||
}
|
||||
|
||||
/* Return the CRC, post-conditioned. */
|
||||
return c ^ 0xffffffff;
|
||||
}
|
@ -4,10 +4,9 @@
|
||||
*/
|
||||
#include "zbuild.h"
|
||||
#include "functable.h"
|
||||
#include "crc32.h"
|
||||
|
||||
#include "crc32_fold.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include "crc32_fold_c.h"
|
||||
|
||||
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) {
|
||||
crc->value = CRC32_INITIAL_VALUE;
|
@ -2,16 +2,8 @@
|
||||
* Copyright (C) 2021 Nathan Moinvaziri
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#ifndef CRC32_FOLD_H_
|
||||
#define CRC32_FOLD_H_
|
||||
|
||||
#define CRC32_FOLD_BUFFER_SIZE (16 * 4)
|
||||
/* sizeof(__m128i) * (4 folds) */
|
||||
|
||||
typedef struct crc32_fold_s {
|
||||
uint8_t fold[CRC32_FOLD_BUFFER_SIZE];
|
||||
uint32_t value;
|
||||
} crc32_fold;
|
||||
#ifndef CRC32_FOLD_C_H_
|
||||
#define CRC32_FOLD_C_H_
|
||||
|
||||
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc);
|
||||
Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
@ -26,7 +26,7 @@
|
||||
# include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include "crc32_fold.h"
|
||||
#include "crc32.h"
|
||||
#include "crc32_braid_p.h"
|
||||
#include "x86_intrins.h"
|
||||
#include <assert.h>
|
||||
|
@ -6,7 +6,7 @@
|
||||
#ifndef CPU_FEATURES_H_
|
||||
#define CPU_FEATURES_H_
|
||||
|
||||
#include "crc32_fold.h"
|
||||
#include "crc32.h"
|
||||
|
||||
#if defined(X86_FEATURES)
|
||||
# include "arch/x86/x86_features.h"
|
||||
|
16
crc32.h
Normal file
16
crc32.h
Normal file
@ -0,0 +1,16 @@
|
||||
/* crc32.h -- crc32 folding interface
|
||||
* Copyright (C) 2021 Nathan Moinvaziri
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#ifndef CRC32_H_
|
||||
#define CRC32_H_
|
||||
|
||||
#define CRC32_FOLD_BUFFER_SIZE (16 * 4)
|
||||
/* sizeof(__m128i) * (4 folds) */
|
||||
|
||||
typedef struct crc32_fold_s {
|
||||
uint8_t fold[CRC32_FOLD_BUFFER_SIZE];
|
||||
uint32_t value;
|
||||
} crc32_fold;
|
||||
|
||||
#endif
|
225
crc32_braid.c
225
crc32_braid.c
@ -8,9 +8,7 @@
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "functable.h"
|
||||
#include "crc32_braid_p.h"
|
||||
#include "crc32_braid_tbl.h"
|
||||
|
||||
/* ========================================================================= */
|
||||
@ -42,226 +40,3 @@ uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t
|
||||
return PREFIX(crc32_z)(crc, buf, len);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
/*
|
||||
A CRC of a message is computed on N braids of words in the message, where
|
||||
each word consists of W bytes (4 or 8). If N is 3, for example, then three
|
||||
running sparse CRCs are calculated respectively on each braid, at these
|
||||
indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
|
||||
This is done starting at a word boundary, and continues until as many blocks
|
||||
of N * W bytes as are available have been processed. The results are combined
|
||||
into a single CRC at the end. For this code, N must be in the range 1..6 and
|
||||
W must be 4 or 8. The upper limit on N can be increased if desired by adding
|
||||
more #if blocks, extending the patterns apparent in the code. In addition,
|
||||
crc32 tables would need to be regenerated, if the maximum N value is increased.
|
||||
|
||||
N and W are chosen empirically by benchmarking the execution time on a given
|
||||
processor. The choices for N and W below were based on testing on Intel Kaby
|
||||
Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
|
||||
Octeon II processors. The Intel, AMD, and ARM processors were all fastest
|
||||
with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
|
||||
They were all tested with either gcc or clang, all using the -O3 optimization
|
||||
level. Your mileage may vary.
|
||||
*/
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
# define ZSWAPWORD(word) (word)
|
||||
# define BRAID_TABLE crc_braid_table
|
||||
#elif BYTE_ORDER == BIG_ENDIAN
|
||||
# if W == 8
|
||||
# define ZSWAPWORD(word) ZSWAP64(word)
|
||||
# elif W == 4
|
||||
# define ZSWAPWORD(word) ZSWAP32(word)
|
||||
# endif
|
||||
# define BRAID_TABLE crc_braid_big_table
|
||||
#else
|
||||
# error "No endian defined"
|
||||
#endif
|
||||
#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8)
|
||||
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
|
||||
|
||||
/* ========================================================================= */
|
||||
#ifdef W
|
||||
/*
|
||||
Return the CRC of the W bytes in the word_t data, taking the
|
||||
least-significant byte of the word as the first byte of data, without any pre
|
||||
or post conditioning. This is used to combine the CRCs of each braid.
|
||||
*/
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
static uint32_t crc_word(z_word_t data) {
|
||||
int k;
|
||||
for (k = 0; k < W; k++)
|
||||
data = (data >> 8) ^ crc_table[data & 0xff];
|
||||
return (uint32_t)data;
|
||||
}
|
||||
#elif BYTE_ORDER == BIG_ENDIAN
|
||||
static z_word_t crc_word(z_word_t data) {
|
||||
int k;
|
||||
for (k = 0; k < W; k++)
|
||||
data = (data << 8) ^
|
||||
crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
|
||||
return data;
|
||||
}
|
||||
#endif /* BYTE_ORDER */
|
||||
|
||||
#endif /* W */
|
||||
|
||||
/* ========================================================================= */
|
||||
Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) {
|
||||
Z_REGISTER uint32_t c;
|
||||
|
||||
/* Pre-condition the CRC */
|
||||
c = (~crc) & 0xffffffff;
|
||||
|
||||
#ifdef W
|
||||
/* If provided enough bytes, do a braided CRC calculation. */
|
||||
if (len >= N * W + W - 1) {
|
||||
size_t blks;
|
||||
z_word_t const *words;
|
||||
int k;
|
||||
|
||||
/* Compute the CRC up to a z_word_t boundary. */
|
||||
while (len && ((uintptr_t)buf & (W - 1)) != 0) {
|
||||
len--;
|
||||
DO1;
|
||||
}
|
||||
|
||||
/* Compute the CRC on as many N z_word_t blocks as are available. */
|
||||
blks = len / (N * W);
|
||||
len -= blks * N * W;
|
||||
words = (z_word_t const *)buf;
|
||||
|
||||
z_word_t crc0, word0, comb;
|
||||
#if N > 1
|
||||
z_word_t crc1, word1;
|
||||
#if N > 2
|
||||
z_word_t crc2, word2;
|
||||
#if N > 3
|
||||
z_word_t crc3, word3;
|
||||
#if N > 4
|
||||
z_word_t crc4, word4;
|
||||
#if N > 5
|
||||
z_word_t crc5, word5;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
/* Initialize the CRC for each braid. */
|
||||
crc0 = ZSWAPWORD(c);
|
||||
#if N > 1
|
||||
crc1 = 0;
|
||||
#if N > 2
|
||||
crc2 = 0;
|
||||
#if N > 3
|
||||
crc3 = 0;
|
||||
#if N > 4
|
||||
crc4 = 0;
|
||||
#if N > 5
|
||||
crc5 = 0;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
/* Process the first blks-1 blocks, computing the CRCs on each braid independently. */
|
||||
while (--blks) {
|
||||
/* Load the word for each braid into registers. */
|
||||
word0 = crc0 ^ words[0];
|
||||
#if N > 1
|
||||
word1 = crc1 ^ words[1];
|
||||
#if N > 2
|
||||
word2 = crc2 ^ words[2];
|
||||
#if N > 3
|
||||
word3 = crc3 ^ words[3];
|
||||
#if N > 4
|
||||
word4 = crc4 ^ words[4];
|
||||
#if N > 5
|
||||
word5 = crc5 ^ words[5];
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
words += N;
|
||||
|
||||
/* Compute and update the CRC for each word. The loop should get unrolled. */
|
||||
crc0 = BRAID_TABLE[0][word0 & 0xff];
|
||||
#if N > 1
|
||||
crc1 = BRAID_TABLE[0][word1 & 0xff];
|
||||
#if N > 2
|
||||
crc2 = BRAID_TABLE[0][word2 & 0xff];
|
||||
#if N > 3
|
||||
crc3 = BRAID_TABLE[0][word3 & 0xff];
|
||||
#if N > 4
|
||||
crc4 = BRAID_TABLE[0][word4 & 0xff];
|
||||
#if N > 5
|
||||
crc5 = BRAID_TABLE[0][word5 & 0xff];
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
for (k = 1; k < W; k++) {
|
||||
crc0 ^= BRAID_TABLE[k][(word0 >> (k << 3)) & 0xff];
|
||||
#if N > 1
|
||||
crc1 ^= BRAID_TABLE[k][(word1 >> (k << 3)) & 0xff];
|
||||
#if N > 2
|
||||
crc2 ^= BRAID_TABLE[k][(word2 >> (k << 3)) & 0xff];
|
||||
#if N > 3
|
||||
crc3 ^= BRAID_TABLE[k][(word3 >> (k << 3)) & 0xff];
|
||||
#if N > 4
|
||||
crc4 ^= BRAID_TABLE[k][(word4 >> (k << 3)) & 0xff];
|
||||
#if N > 5
|
||||
crc5 ^= BRAID_TABLE[k][(word5 >> (k << 3)) & 0xff];
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* Process the last block, combining the CRCs of the N braids at the same time. */
|
||||
comb = crc_word(crc0 ^ words[0]);
|
||||
#if N > 1
|
||||
comb = crc_word(crc1 ^ words[1] ^ comb);
|
||||
#if N > 2
|
||||
comb = crc_word(crc2 ^ words[2] ^ comb);
|
||||
#if N > 3
|
||||
comb = crc_word(crc3 ^ words[3] ^ comb);
|
||||
#if N > 4
|
||||
comb = crc_word(crc4 ^ words[4] ^ comb);
|
||||
#if N > 5
|
||||
comb = crc_word(crc5 ^ words[5] ^ comb);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
words += N;
|
||||
c = ZSWAPWORD(comb);
|
||||
|
||||
/* Update the pointer to the remaining bytes to process. */
|
||||
buf = (const unsigned char *)words;
|
||||
}
|
||||
|
||||
#endif /* W */
|
||||
|
||||
/* Complete the computation of the CRC on any remaining bytes. */
|
||||
while (len >= 8) {
|
||||
len -= 8;
|
||||
DO8;
|
||||
}
|
||||
while (len) {
|
||||
len--;
|
||||
DO1;
|
||||
}
|
||||
|
||||
/* Return the CRC, post-conditioned. */
|
||||
return c ^ 0xffffffff;
|
||||
}
|
||||
|
@ -7,7 +7,6 @@
|
||||
* Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "crc32_braid_p.h"
|
||||
#include "crc32_braid_tbl.h"
|
||||
|
@ -1,7 +1,6 @@
|
||||
#ifndef CRC32_BRAID_P_H_
|
||||
#define CRC32_BRAID_P_H_
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zendian.h"
|
||||
|
||||
/* Define N */
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
#include "zutil.h"
|
||||
#include "zendian.h"
|
||||
#include "crc32_fold.h"
|
||||
#include "crc32.h"
|
||||
|
||||
/* define NO_GZIP when compiling if you want to disable gzip header and
|
||||
trailer creation by deflate(). NO_GZIP would be used to avoid linking in
|
||||
|
@ -7,7 +7,7 @@
|
||||
#define FUNCTABLE_H_
|
||||
|
||||
#include "deflate.h"
|
||||
#include "crc32_fold.h"
|
||||
#include "crc32_fold_c.h"
|
||||
#include "adler32_fold_c.h"
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
|
@ -11,7 +11,7 @@
|
||||
#ifndef INFLATE_H_
|
||||
#define INFLATE_H_
|
||||
|
||||
#include "crc32_fold.h"
|
||||
#include "crc32.h"
|
||||
|
||||
/* define NO_GZIP when compiling if you want to disable gzip header and trailer decoding by inflate().
|
||||
NO_GZIP would be used to avoid linking in the crc code when it is not needed.
|
||||
|
@ -52,8 +52,9 @@ OBJS = \
|
||||
compress.obj \
|
||||
cpu_features.obj \
|
||||
crc32_braid.obj \
|
||||
crc32_braid_c.obj \
|
||||
crc32_braid_comb.obj \
|
||||
crc32_fold.obj \
|
||||
crc32_fold_c.obj \
|
||||
deflate.obj \
|
||||
deflate_fast.obj \
|
||||
deflate_huff.obj \
|
||||
@ -191,9 +192,10 @@ gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)
|
||||
compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
|
||||
uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
|
||||
cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
|
||||
crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h
|
||||
crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h
|
||||
crc32_fold.obj: $(SRCDIR)/crc32_fold.c $(SRCDIR)/zbuild.h
|
||||
crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_tbl.h
|
||||
crc32_braid_c.obj: $(SRCDIR)/arch/generic/crc32_braid_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h
|
||||
crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h
|
||||
crc32_fold_c.obj: $(SRCDIR)/arch/generic/crc32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/crc32_fold_c.h
|
||||
deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
|
||||
deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
|
||||
deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
|
||||
|
@ -57,8 +57,9 @@ OBJS = \
|
||||
compress.obj \
|
||||
cpu_features.obj \
|
||||
crc32_braid.obj \
|
||||
crc32_braid_c.obj \
|
||||
crc32_braid_comb.obj \
|
||||
crc32_fold.obj \
|
||||
crc32_fold_c.obj \
|
||||
deflate.obj \
|
||||
deflate_fast.obj \
|
||||
deflate_huff.obj \
|
||||
@ -212,9 +213,10 @@ compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
|
||||
uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h
|
||||
chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
|
||||
cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
|
||||
crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h
|
||||
crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h
|
||||
crc32_fold.obj: $(SRCDIR)/crc32_fold.c $(SRCDIR)/zbuild.h
|
||||
crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_tbl.h
|
||||
crc32_braid_c.obj: $(SRCDIR)/arch/generic/crc32_braid_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h
|
||||
crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h
|
||||
crc32_fold_c.obj: $(SRCDIR)/arch/generic/crc32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/crc32_fold_c.h
|
||||
deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
|
||||
deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
|
||||
deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
|
||||
|
@ -64,8 +64,9 @@ OBJS = \
|
||||
compress.obj \
|
||||
cpu_features.obj \
|
||||
crc32_braid.obj \
|
||||
crc32_braid_c.obj \
|
||||
crc32_braid_comb.obj \
|
||||
crc32_fold.obj \
|
||||
crc32_fold_c.obj \
|
||||
crc32_pclmulqdq.obj \
|
||||
deflate.obj \
|
||||
deflate_fast.obj \
|
||||
@ -210,11 +211,12 @@ chunkset_avx2.obj: $(SRCDIR)/arch/x86/chunkset_avx2.c $(SRCDIR)/zbuild.h $(SRCDI
|
||||
chunkset_sse2.obj: $(SRCDIR)/arch/x86/chunkset_sse2.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
|
||||
chunkset_ssse3.obj: $(SRCDIR)/arch/x86/chunkset_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
|
||||
cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h
|
||||
crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h
|
||||
crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h
|
||||
crc32_fold.obj: $(SRCDIR)/crc32_fold.c $(SRCDIR)/zbuild.h
|
||||
crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_tbl.h
|
||||
crc32_braid_c.obj: $(SRCDIR)/arch/generic/crc32_braid_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h
|
||||
crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h
|
||||
crc32_fold_c.obj: $(SRCDIR)/arch/generic/crc32_fold_c.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/arch/generic/crc32_fold_c.h
|
||||
crc32_pclmulqdq.obj: $(SRCDIR)/arch/x86/crc32_pclmulqdq.c $(SRCDIR)/arch/x86/crc32_pclmulqdq_tpl.h $(SRCDIR)/arch/x86/crc32_fold_pclmulqdq_tpl.h \
|
||||
$(SRCDIR)/crc32_fold.h $(SRCDIR)/zbuild.h
|
||||
$(SRCDIR)/arch/generic/crc32_fold_c.h $(SRCDIR)/zbuild.h
|
||||
deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
|
||||
deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
|
||||
deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h
|
||||
|
Loading…
Reference in New Issue
Block a user