mirror of
https://github.com/GerbilSoft/zlib-ng.git
synced 2025-06-19 03:55:39 -04:00
Use GCC's may_alias attribute for unaligned memory access
This commit is contained in:
parent
fc90e7b3fc
commit
d7e121e56b
@ -5,6 +5,7 @@
|
|||||||
#ifdef ARM_NEON
|
#ifdef ARM_NEON
|
||||||
#include "neon_intrins.h"
|
#include "neon_intrins.h"
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
|
#include "zmemory.h"
|
||||||
#include "arch/generic/chunk_permute_table.h"
|
#include "arch/generic/chunk_permute_table.h"
|
||||||
|
|
||||||
typedef uint8x16_t chunk_t;
|
typedef uint8x16_t chunk_t;
|
||||||
@ -31,21 +32,15 @@ static const lut_rem_pair perm_idx_lut[13] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||||
uint16_t tmp;
|
*chunk = vreinterpretq_u8_u16(vdupq_n_u16(zng_memread_2(from)));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||||
uint32_t tmp;
|
*chunk = vreinterpretq_u8_u32(vdupq_n_u32(zng_memread_4(from)));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||||
uint64_t tmp;
|
*chunk = vreinterpretq_u8_u64(vdupq_n_u64(zng_memread_8(from)));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CHUNKSIZE chunksize_neon
|
#define CHUNKSIZE chunksize_neon
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
#include "zutil_p.h"
|
#include "zmemory.h"
|
||||||
#include "deflate.h"
|
#include "deflate.h"
|
||||||
#include "fallback_builtins.h"
|
#include "fallback_builtins.h"
|
||||||
|
|
||||||
|
@ -40,10 +40,10 @@ chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.
|
|||||||
chunkset_c.lo: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
|
chunkset_c.lo: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
|
||||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
|
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
|
||||||
|
|
||||||
compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
|
compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
|
||||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
|
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
|
||||||
|
|
||||||
compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
|
compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
|
||||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
|
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
|
||||||
|
|
||||||
crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
|
crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
|
#include "zmemory.h"
|
||||||
|
|
||||||
typedef uint64_t chunk_t;
|
typedef uint64_t chunk_t;
|
||||||
|
|
||||||
@ -12,21 +13,20 @@ typedef uint64_t chunk_t;
|
|||||||
#define HAVE_CHUNKMEMSET_8
|
#define HAVE_CHUNKMEMSET_8
|
||||||
|
|
||||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||||
uint8_t *dest = (uint8_t *)chunk;
|
uint32_t tmp = zng_memread_4(from);
|
||||||
memcpy(dest, from, sizeof(uint32_t));
|
*chunk = tmp | ((chunk_t)tmp << 32);
|
||||||
memcpy(dest+4, from, sizeof(uint32_t));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||||
memcpy(chunk, from, sizeof(uint64_t));
|
*chunk = zng_memread_8(from);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||||
memcpy(chunk, (uint8_t *)s, sizeof(uint64_t));
|
*chunk = zng_memread_8(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
|
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
|
||||||
memcpy(out, chunk, sizeof(uint64_t));
|
zng_memwrite_8(out, *chunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CHUNKSIZE chunksize_c
|
#define CHUNKSIZE chunksize_c
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
#include "zutil_p.h"
|
#include "zmemory.h"
|
||||||
#include "deflate.h"
|
#include "deflate.h"
|
||||||
#include "fallback_builtins.h"
|
#include "fallback_builtins.h"
|
||||||
|
|
||||||
@ -107,8 +107,8 @@ static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const
|
|||||||
do {
|
do {
|
||||||
uint32_t sv, mv, diff;
|
uint32_t sv, mv, diff;
|
||||||
|
|
||||||
memcpy(&sv, src0, sizeof(sv));
|
sv = zng_memread_4(src0);
|
||||||
memcpy(&mv, src1, sizeof(mv));
|
mv = zng_memread_4(src1);
|
||||||
|
|
||||||
diff = sv ^ mv;
|
diff = sv ^ mv;
|
||||||
if (diff) {
|
if (diff) {
|
||||||
@ -151,8 +151,8 @@ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const
|
|||||||
do {
|
do {
|
||||||
uint64_t sv, mv, diff;
|
uint64_t sv, mv, diff;
|
||||||
|
|
||||||
memcpy(&sv, src0, sizeof(sv));
|
sv = zng_memread_8(src0);
|
||||||
memcpy(&mv, src1, sizeof(mv));
|
mv = zng_memread_8(src1);
|
||||||
|
|
||||||
diff = sv ^ mv;
|
diff = sv ^ mv;
|
||||||
if (diff) {
|
if (diff) {
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#ifdef POWER8_VSX
|
#ifdef POWER8_VSX
|
||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
|
#include "zmemory.h"
|
||||||
|
|
||||||
typedef vector unsigned char chunk_t;
|
typedef vector unsigned char chunk_t;
|
||||||
|
|
||||||
@ -15,21 +16,15 @@ typedef vector unsigned char chunk_t;
|
|||||||
#define HAVE_CHUNKMEMSET_8
|
#define HAVE_CHUNKMEMSET_8
|
||||||
|
|
||||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||||
uint16_t tmp;
|
*chunk = (vector unsigned char)vec_splats(zng_memread_2(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = (vector unsigned char)vec_splats(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||||
uint32_t tmp;
|
*chunk = (vector unsigned char)vec_splats(zng_memread_4(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = (vector unsigned char)vec_splats(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||||
uint64_t tmp;
|
*chunk = (vector unsigned char)vec_splats((unsigned long long)zng_memread_8(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = (vector unsigned char)vec_splats((unsigned long long)tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
#ifdef POWER9
|
#ifdef POWER9
|
||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
#include "zutil_p.h"
|
#include "zmemory.h"
|
||||||
#include "deflate.h"
|
#include "deflate.h"
|
||||||
#include "zendian.h"
|
#include "zendian.h"
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#ifdef RISCV_RVV
|
#ifdef RISCV_RVV
|
||||||
|
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
#include "zutil_p.h"
|
#include "zmemory.h"
|
||||||
#include "deflate.h"
|
#include "deflate.h"
|
||||||
#include "fallback_builtins.h"
|
#include "fallback_builtins.h"
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||||
*/
|
*/
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
|
#include "zmemory.h"
|
||||||
|
|
||||||
#ifdef X86_AVX2
|
#ifdef X86_AVX2
|
||||||
#include "avx2_tables.h"
|
#include "avx2_tables.h"
|
||||||
@ -19,21 +20,15 @@ typedef __m128i halfchunk_t;
|
|||||||
#define HAVE_HALF_CHUNK
|
#define HAVE_HALF_CHUNK
|
||||||
|
|
||||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||||
int16_t tmp;
|
*chunk = _mm256_set1_epi16(zng_memread_2(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm256_set1_epi16(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||||
int32_t tmp;
|
*chunk = _mm256_set1_epi32(zng_memread_4(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm256_set1_epi32(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||||
int64_t tmp;
|
*chunk = _mm256_set1_epi64x(zng_memread_8(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm256_set1_epi64x(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) {
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||||
*/
|
*/
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
|
#include "zmemory.h"
|
||||||
|
|
||||||
#ifdef X86_AVX512
|
#ifdef X86_AVX512
|
||||||
|
|
||||||
@ -33,21 +34,15 @@ static inline mask_t gen_mask(unsigned len) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||||
int16_t tmp;
|
*chunk = _mm256_set1_epi16(zng_memread_2(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm256_set1_epi16(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||||
int32_t tmp;
|
*chunk = _mm256_set1_epi32(zng_memread_4(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm256_set1_epi32(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||||
int64_t tmp;
|
*chunk = _mm256_set1_epi64x(zng_memread_8(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm256_set1_epi64x(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) {
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
|
#include "zmemory.h"
|
||||||
|
|
||||||
#ifdef X86_SSE2
|
#ifdef X86_SSE2
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
@ -14,21 +15,15 @@ typedef __m128i chunk_t;
|
|||||||
#define HAVE_CHUNKMEMSET_8
|
#define HAVE_CHUNKMEMSET_8
|
||||||
|
|
||||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||||
int16_t tmp;
|
*chunk = _mm_set1_epi16(zng_memread_2(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm_set1_epi16(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||||
int32_t tmp;
|
*chunk = _mm_set1_epi32(zng_memread_4(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm_set1_epi32(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||||
int64_t tmp;
|
*chunk = _mm_set1_epi64x(zng_memread_8(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm_set1_epi64x(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
|
#include "zmemory.h"
|
||||||
|
|
||||||
#if defined(X86_SSSE3)
|
#if defined(X86_SSSE3)
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
@ -33,21 +34,15 @@ static const lut_rem_pair perm_idx_lut[13] = {
|
|||||||
|
|
||||||
|
|
||||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||||
int16_t tmp;
|
*chunk = _mm_set1_epi16(zng_memread_2(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm_set1_epi16(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||||
int32_t tmp;
|
*chunk = _mm_set1_epi32(zng_memread_4(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm_set1_epi32(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||||
int64_t tmp;
|
*chunk = _mm_set1_epi64x(zng_memread_8(from));
|
||||||
memcpy(&tmp, from, sizeof(tmp));
|
|
||||||
*chunk = _mm_set1_epi64x(tmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
#include "zutil_p.h"
|
#include "zmemory.h"
|
||||||
#include "deflate.h"
|
#include "deflate.h"
|
||||||
#include "fallback_builtins.h"
|
#include "fallback_builtins.h"
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
#include "zutil_p.h"
|
#include "zmemory.h"
|
||||||
#include "deflate.h"
|
#include "deflate.h"
|
||||||
#include "fallback_builtins.h"
|
#include "fallback_builtins.h"
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
|
#include "zmemory.h"
|
||||||
#include "fallback_builtins.h"
|
#include "fallback_builtins.h"
|
||||||
#include "zendian.h"
|
#include "zendian.h"
|
||||||
|
|
||||||
@ -47,25 +48,21 @@ static inline uint32_t compare256_rle_c(const uint8_t *src0, const uint8_t *src1
|
|||||||
/* 16-bit unaligned integer comparison */
|
/* 16-bit unaligned integer comparison */
|
||||||
static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
|
static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
|
||||||
uint32_t len = 0;
|
uint32_t len = 0;
|
||||||
uint16_t src0_cmp, src1_cmp;
|
uint16_t src0_cmp;
|
||||||
|
|
||||||
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
|
src0_cmp = zng_memread_2(src0);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
|
if (src0_cmp != zng_memread_2(src1))
|
||||||
if (src0_cmp != src1_cmp)
|
|
||||||
return len + (*src0 == *src1);
|
return len + (*src0 == *src1);
|
||||||
src1 += 2, len += 2;
|
src1 += 2, len += 2;
|
||||||
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
|
if (src0_cmp != zng_memread_2(src1))
|
||||||
if (src0_cmp != src1_cmp)
|
|
||||||
return len + (*src0 == *src1);
|
return len + (*src0 == *src1);
|
||||||
src1 += 2, len += 2;
|
src1 += 2, len += 2;
|
||||||
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
|
if (src0_cmp != zng_memread_2(src1))
|
||||||
if (src0_cmp != src1_cmp)
|
|
||||||
return len + (*src0 == *src1);
|
return len + (*src0 == *src1);
|
||||||
src1 += 2, len += 2;
|
src1 += 2, len += 2;
|
||||||
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
|
if (src0_cmp != zng_memread_2(src1))
|
||||||
if (src0_cmp != src1_cmp)
|
|
||||||
return len + (*src0 == *src1);
|
return len + (*src0 == *src1);
|
||||||
src1 += 2, len += 2;
|
src1 += 2, len += 2;
|
||||||
} while (len < 256);
|
} while (len < 256);
|
||||||
@ -79,13 +76,13 @@ static inline uint32_t compare256_rle_unaligned_32(const uint8_t *src0, const ui
|
|||||||
uint32_t sv, len = 0;
|
uint32_t sv, len = 0;
|
||||||
uint16_t src0_cmp;
|
uint16_t src0_cmp;
|
||||||
|
|
||||||
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
|
src0_cmp = zng_memread_2(src0);
|
||||||
sv = ((uint32_t)src0_cmp << 16) | src0_cmp;
|
sv = ((uint32_t)src0_cmp << 16) | src0_cmp;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
uint32_t mv, diff;
|
uint32_t mv, diff;
|
||||||
|
|
||||||
memcpy(&mv, src1, sizeof(mv));
|
mv = zng_memread_4(src1);
|
||||||
|
|
||||||
diff = sv ^ mv;
|
diff = sv ^ mv;
|
||||||
if (diff) {
|
if (diff) {
|
||||||
@ -112,14 +109,14 @@ static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const ui
|
|||||||
uint16_t src0_cmp;
|
uint16_t src0_cmp;
|
||||||
uint64_t sv;
|
uint64_t sv;
|
||||||
|
|
||||||
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
|
src0_cmp = zng_memread_2(src0);
|
||||||
src0_cmp32 = ((uint32_t)src0_cmp << 16) | src0_cmp;
|
src0_cmp32 = ((uint32_t)src0_cmp << 16) | src0_cmp;
|
||||||
sv = ((uint64_t)src0_cmp32 << 32) | src0_cmp32;
|
sv = ((uint64_t)src0_cmp32 << 32) | src0_cmp32;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
uint64_t mv, diff;
|
uint64_t mv, diff;
|
||||||
|
|
||||||
memcpy(&mv, src1, sizeof(mv));
|
mv = zng_memread_8(src1);
|
||||||
|
|
||||||
diff = sv ^ mv;
|
diff = sv ^ mv;
|
||||||
if (diff) {
|
if (diff) {
|
||||||
|
11
deflate.h
11
deflate.h
@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
#include "zutil.h"
|
#include "zutil.h"
|
||||||
#include "zendian.h"
|
#include "zendian.h"
|
||||||
|
#include "zmemory.h"
|
||||||
#include "crc32.h"
|
#include "crc32.h"
|
||||||
|
|
||||||
#ifdef S390_DFLTCC_DEFLATE
|
#ifdef S390_DFLTCC_DEFLATE
|
||||||
@ -355,7 +356,7 @@ static inline void put_short(deflate_state *s, uint16_t w) {
|
|||||||
#if BYTE_ORDER == BIG_ENDIAN
|
#if BYTE_ORDER == BIG_ENDIAN
|
||||||
w = ZSWAP16(w);
|
w = ZSWAP16(w);
|
||||||
#endif
|
#endif
|
||||||
memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
|
zng_memwrite_2(&s->pending_buf[s->pending], w);
|
||||||
s->pending += 2;
|
s->pending += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -367,7 +368,7 @@ static inline void put_short_msb(deflate_state *s, uint16_t w) {
|
|||||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||||
w = ZSWAP16(w);
|
w = ZSWAP16(w);
|
||||||
#endif
|
#endif
|
||||||
memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
|
zng_memwrite_2(&s->pending_buf[s->pending], w);
|
||||||
s->pending += 2;
|
s->pending += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -379,7 +380,7 @@ static inline void put_uint32(deflate_state *s, uint32_t dw) {
|
|||||||
#if BYTE_ORDER == BIG_ENDIAN
|
#if BYTE_ORDER == BIG_ENDIAN
|
||||||
dw = ZSWAP32(dw);
|
dw = ZSWAP32(dw);
|
||||||
#endif
|
#endif
|
||||||
memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
|
zng_memwrite_4(&s->pending_buf[s->pending], dw);
|
||||||
s->pending += 4;
|
s->pending += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -391,7 +392,7 @@ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
|
|||||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||||
dw = ZSWAP32(dw);
|
dw = ZSWAP32(dw);
|
||||||
#endif
|
#endif
|
||||||
memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
|
zng_memwrite_4(&s->pending_buf[s->pending], dw);
|
||||||
s->pending += 4;
|
s->pending += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -403,7 +404,7 @@ static inline void put_uint64(deflate_state *s, uint64_t lld) {
|
|||||||
#if BYTE_ORDER == BIG_ENDIAN
|
#if BYTE_ORDER == BIG_ENDIAN
|
||||||
lld = ZSWAP64(lld);
|
lld = ZSWAP64(lld);
|
||||||
#endif
|
#endif
|
||||||
memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld));
|
zng_memwrite_8(&s->pending_buf[s->pending], lld);
|
||||||
s->pending += 8;
|
s->pending += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "zbuild.h"
|
#include "zbuild.h"
|
||||||
#include "zutil_p.h"
|
#include "zmemory.h"
|
||||||
#include "deflate.h"
|
#include "deflate.h"
|
||||||
#include "deflate_p.h"
|
#include "deflate_p.h"
|
||||||
#include "functable.h"
|
#include "functable.h"
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#define INFLATE_P_H
|
#define INFLATE_P_H
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include "zmemory.h"
|
||||||
|
|
||||||
/* Architecture-specific hooks. */
|
/* Architecture-specific hooks. */
|
||||||
#ifdef S390_DFLTCC_INFLATE
|
#ifdef S390_DFLTCC_INFLATE
|
||||||
@ -138,8 +139,7 @@
|
|||||||
|
|
||||||
/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
|
/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
|
||||||
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
|
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
|
||||||
uint64_t chunk;
|
uint64_t chunk = zng_memread_8(in);
|
||||||
memcpy(&chunk, in, sizeof(chunk));
|
|
||||||
|
|
||||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||||
return chunk << bits;
|
return chunk << bits;
|
||||||
@ -175,7 +175,10 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len,
|
|||||||
uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows
|
uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows
|
||||||
|
|
||||||
/* So this doesn't give use a worst case scenario of function calls in a loop,
|
/* So this doesn't give use a worst case scenario of function calls in a loop,
|
||||||
* we want to instead break this down into copy blocks of fixed lengths */
|
* we want to instead break this down into copy blocks of fixed lengths
|
||||||
|
*
|
||||||
|
* TODO: The memcpy calls aren't inlined on architectures with strict memory alignment
|
||||||
|
*/
|
||||||
while (len) {
|
while (len) {
|
||||||
tocopy = MIN(non_olap_size, len);
|
tocopy = MIN(non_olap_size, len);
|
||||||
len -= tocopy;
|
len -= tocopy;
|
||||||
|
@ -22,6 +22,8 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "zmemory.h"
|
||||||
|
|
||||||
#ifndef HASH_CALC_OFFSET
|
#ifndef HASH_CALC_OFFSET
|
||||||
# define HASH_CALC_OFFSET 0
|
# define HASH_CALC_OFFSET 0
|
||||||
#endif
|
#endif
|
||||||
@ -31,11 +33,10 @@
|
|||||||
#ifndef HASH_CALC_READ
|
#ifndef HASH_CALC_READ
|
||||||
# if BYTE_ORDER == LITTLE_ENDIAN
|
# if BYTE_ORDER == LITTLE_ENDIAN
|
||||||
# define HASH_CALC_READ \
|
# define HASH_CALC_READ \
|
||||||
memcpy(&val, strstart, sizeof(val));
|
val = zng_memread_4(strstart);
|
||||||
# else
|
# else
|
||||||
# define HASH_CALC_READ \
|
# define HASH_CALC_READ \
|
||||||
memcpy(&val, strstart, sizeof(val)); \
|
val = ZSWAP32(zng_memread_4(strstart));
|
||||||
val = ZSWAP32(val);
|
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
35
match_tpl.h
35
match_tpl.h
@ -40,10 +40,15 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
|||||||
uint32_t chain_length, nice_match, best_len, offset;
|
uint32_t chain_length, nice_match, best_len, offset;
|
||||||
uint32_t lookahead = s->lookahead;
|
uint32_t lookahead = s->lookahead;
|
||||||
Pos match_offset = 0;
|
Pos match_offset = 0;
|
||||||
#if OPTIMAL_CMP >= 32
|
#if OPTIMAL_CMP >= 64
|
||||||
uint8_t scan_start[8];
|
uint64_t scan_start;
|
||||||
#endif
|
uint64_t scan_end;
|
||||||
|
#elif OPTIMAL_CMP >= 32
|
||||||
|
uint32_t scan_start;
|
||||||
|
uint32_t scan_end;
|
||||||
|
#else
|
||||||
uint8_t scan_end[8];
|
uint8_t scan_end[8];
|
||||||
|
#endif
|
||||||
|
|
||||||
#define GOTO_NEXT_CHAIN \
|
#define GOTO_NEXT_CHAIN \
|
||||||
if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
|
if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
|
||||||
@ -70,11 +75,11 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if OPTIMAL_CMP >= 64
|
#if OPTIMAL_CMP >= 64
|
||||||
memcpy(scan_start, scan, sizeof(uint64_t));
|
scan_start = zng_memread_8(scan);
|
||||||
memcpy(scan_end, scan+offset, sizeof(uint64_t));
|
scan_end = zng_memread_8(scan+offset);
|
||||||
#elif OPTIMAL_CMP >= 32
|
#elif OPTIMAL_CMP >= 32
|
||||||
memcpy(scan_start, scan, sizeof(uint32_t));
|
scan_start = zng_memread_4(scan);
|
||||||
memcpy(scan_end, scan+offset, sizeof(uint32_t));
|
scan_end = zng_memread_4(scan+offset);
|
||||||
#else
|
#else
|
||||||
scan_end[0] = *(scan+offset);
|
scan_end[0] = *(scan+offset);
|
||||||
scan_end[1] = *(scan+offset+1);
|
scan_end[1] = *(scan+offset+1);
|
||||||
@ -141,24 +146,24 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
|||||||
#if OPTIMAL_CMP >= 32
|
#if OPTIMAL_CMP >= 32
|
||||||
if (best_len < sizeof(uint32_t)) {
|
if (best_len < sizeof(uint32_t)) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
|
if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
|
||||||
zng_memcmp_2(mbase_start+cur_match, scan_start) == 0)
|
zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
|
||||||
break;
|
break;
|
||||||
GOTO_NEXT_CHAIN;
|
GOTO_NEXT_CHAIN;
|
||||||
}
|
}
|
||||||
# if OPTIMAL_CMP >= 64
|
# if OPTIMAL_CMP >= 64
|
||||||
} else if (best_len >= sizeof(uint64_t)) {
|
} else if (best_len >= sizeof(uint64_t)) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
|
if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
|
||||||
zng_memcmp_8(mbase_start+cur_match, scan_start) == 0)
|
zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
|
||||||
break;
|
break;
|
||||||
GOTO_NEXT_CHAIN;
|
GOTO_NEXT_CHAIN;
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
} else {
|
} else {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (zng_memcmp_4(mbase_end+cur_match, scan_end) == 0 &&
|
if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
|
||||||
zng_memcmp_4(mbase_start+cur_match, scan_start) == 0)
|
zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
|
||||||
break;
|
break;
|
||||||
GOTO_NEXT_CHAIN;
|
GOTO_NEXT_CHAIN;
|
||||||
}
|
}
|
||||||
@ -197,9 +202,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if OPTIMAL_CMP >= 64
|
#if OPTIMAL_CMP >= 64
|
||||||
memcpy(scan_end, scan+offset, sizeof(uint64_t));
|
scan_end = zng_memread_8(scan+offset);
|
||||||
#elif OPTIMAL_CMP >= 32
|
#elif OPTIMAL_CMP >= 32
|
||||||
memcpy(scan_end, scan+offset, sizeof(uint32_t));
|
scan_end = zng_memread_4(scan+offset);
|
||||||
#else
|
#else
|
||||||
scan_end[0] = *(scan+offset);
|
scan_end[0] = *(scan+offset);
|
||||||
scan_end[1] = *(scan+offset+1);
|
scan_end[1] = *(scan+offset+1);
|
||||||
|
@ -183,7 +183,7 @@ adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_
|
|||||||
adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h
|
adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h
|
||||||
adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h
|
adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h
|
||||||
chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
|
chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
|
||||||
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
|
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
|
||||||
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
|
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
|
||||||
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
|
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
|
||||||
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
|
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
|
||||||
@ -194,7 +194,7 @@ deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.
|
|||||||
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
|
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
|
||||||
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
|
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
|
||||||
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
|
@ -204,7 +204,7 @@ adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_
|
|||||||
adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h
|
adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h
|
||||||
adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h
|
adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h
|
||||||
chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
|
chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
|
||||||
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
|
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
|
||||||
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
|
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
|
||||||
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
|
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
|
||||||
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
|
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
|
||||||
@ -215,7 +215,7 @@ deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.
|
|||||||
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
|
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
|
||||||
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
|
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
|
||||||
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
|
@ -212,9 +212,9 @@ chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset
|
|||||||
chunkset_avx2.obj: $(TOP)/arch/x86/chunkset_avx2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h
|
chunkset_avx2.obj: $(TOP)/arch/x86/chunkset_avx2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h
|
||||||
chunkset_sse2.obj: $(TOP)/arch/x86/chunkset_sse2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
|
chunkset_sse2.obj: $(TOP)/arch/x86/chunkset_sse2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
|
||||||
chunkset_ssse3.obj: $(TOP)/arch/x86/chunkset_ssse3.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h
|
chunkset_ssse3.obj: $(TOP)/arch/x86/chunkset_ssse3.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h
|
||||||
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
|
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
|
||||||
compare256_avx2.obj: $(TOP)/arch/x86/compare256_avx2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
|
compare256_avx2.obj: $(TOP)/arch/x86/compare256_avx2.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
|
||||||
compare256_sse2.obj: $(TOP)/arch/x86/compare256_sse2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
|
compare256_sse2.obj: $(TOP)/arch/x86/compare256_sse2.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
|
||||||
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
|
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
|
||||||
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
|
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
|
||||||
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
|
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
|
||||||
@ -226,7 +226,7 @@ deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.
|
|||||||
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
|
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
|
||||||
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
|
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
|
||||||
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
|
||||||
|
99
zmemory.h
Normal file
99
zmemory.h
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
/* zmemory.h -- Private inline functions used internally in zlib-ng
|
||||||
|
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _ZMEMORY_H
|
||||||
|
#define _ZMEMORY_H
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && (__GNUC__ >= 4)
|
||||||
|
# define HAVE_MAY_ALIAS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline uint16_t zng_memread_2(const void *ptr) {
|
||||||
|
#if defined(HAVE_MAY_ALIAS)
|
||||||
|
typedef struct { uint16_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint16_t;
|
||||||
|
return ((const unaligned_uint16_t *)ptr)->val;
|
||||||
|
#else
|
||||||
|
uint16_t val;
|
||||||
|
memcpy(&val, ptr, sizeof(val));
|
||||||
|
return val;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t zng_memread_4(const void *ptr) {
|
||||||
|
#if defined(HAVE_MAY_ALIAS)
|
||||||
|
typedef struct { uint32_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint32_t;
|
||||||
|
return ((const unaligned_uint32_t *)ptr)->val;
|
||||||
|
#else
|
||||||
|
uint32_t val;
|
||||||
|
memcpy(&val, ptr, sizeof(val));
|
||||||
|
return val;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t zng_memread_8(const void *ptr) {
|
||||||
|
#if defined(HAVE_MAY_ALIAS)
|
||||||
|
typedef struct { uint64_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint64_t;
|
||||||
|
return ((const unaligned_uint64_t *)ptr)->val;
|
||||||
|
#else
|
||||||
|
uint64_t val;
|
||||||
|
memcpy(&val, ptr, sizeof(val));
|
||||||
|
return val;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void zng_memwrite_2(void *ptr, uint16_t val) {
|
||||||
|
#if defined(HAVE_MAY_ALIAS)
|
||||||
|
typedef struct { uint16_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint16_t;
|
||||||
|
((unaligned_uint16_t *)ptr)->val = val;
|
||||||
|
#else
|
||||||
|
memcpy(ptr, &val, sizeof(val));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void zng_memwrite_4(void *ptr, uint32_t val) {
|
||||||
|
#if defined(HAVE_MAY_ALIAS)
|
||||||
|
typedef struct { uint32_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint32_t;
|
||||||
|
((unaligned_uint32_t *)ptr)->val = val;
|
||||||
|
#else
|
||||||
|
memcpy(ptr, &val, sizeof(val));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void zng_memwrite_8(void *ptr, uint64_t val) {
|
||||||
|
#if defined(HAVE_MAY_ALIAS)
|
||||||
|
typedef struct { uint64_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint64_t;
|
||||||
|
((unaligned_uint64_t *)ptr)->val = val;
|
||||||
|
#else
|
||||||
|
memcpy(ptr, &val, sizeof(val));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Use zng_memread_* instead of memcmp to avoid older compilers not converting memcmp
|
||||||
|
calls to unaligned comparisons when unaligned access is supported. Use memcmp only when
|
||||||
|
unaligned support is not available to avoid an extra call to memcpy. */
|
||||||
|
static inline int32_t zng_memcmp_2(const void *src0, const void *src1) {
|
||||||
|
#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 16
|
||||||
|
return zng_memread_2(src0) != zng_memread_2(src1);
|
||||||
|
#else
|
||||||
|
return memcmp(src0, src1, 2);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32_t zng_memcmp_4(const void *src0, const void *src1) {
|
||||||
|
#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 32
|
||||||
|
return zng_memread_4(src0) != zng_memread_4(src1);
|
||||||
|
#else
|
||||||
|
return memcmp(src0, src1, 4);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32_t zng_memcmp_8(const void *src0, const void *src1) {
|
||||||
|
#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 64
|
||||||
|
return zng_memread_8(src0) != zng_memread_8(src1);
|
||||||
|
#else
|
||||||
|
return memcmp(src0, src1, 8);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
29
zutil_p.h
29
zutil_p.h
@ -43,33 +43,4 @@ static inline void zng_free(void *ptr) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Use memcpy instead of memcmp to avoid older compilers not converting memcmp calls to
|
|
||||||
unaligned comparisons when unaligned access is supported. */
|
|
||||||
static inline int32_t zng_memcmp_2(const void *src0, const void *src1) {
|
|
||||||
uint16_t src0_cmp, src1_cmp;
|
|
||||||
|
|
||||||
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
|
|
||||||
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
|
|
||||||
|
|
||||||
return src0_cmp != src1_cmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int32_t zng_memcmp_4(const void *src0, const void *src1) {
|
|
||||||
uint32_t src0_cmp, src1_cmp;
|
|
||||||
|
|
||||||
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
|
|
||||||
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
|
|
||||||
|
|
||||||
return src0_cmp != src1_cmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int32_t zng_memcmp_8(const void *src0, const void *src1) {
|
|
||||||
uint64_t src0_cmp, src1_cmp;
|
|
||||||
|
|
||||||
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
|
|
||||||
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
|
|
||||||
|
|
||||||
return src0_cmp != src1_cmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user