Use GCC's may_alias attribute for unaligned memory access

This commit is contained in:
Cameron Cawley 2023-07-27 21:07:29 +01:00 committed by Hans Kristian Rosbach
parent fc90e7b3fc
commit d7e121e56b
25 changed files with 197 additions and 150 deletions

View File

@ -5,6 +5,7 @@
#ifdef ARM_NEON
#include "neon_intrins.h"
#include "zbuild.h"
#include "zmemory.h"
#include "arch/generic/chunk_permute_table.h"
typedef uint8x16_t chunk_t;
@ -31,21 +32,15 @@ static const lut_rem_pair perm_idx_lut[13] = {
};
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
uint16_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
*chunk = vreinterpretq_u8_u16(vdupq_n_u16(zng_memread_2(from)));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint32_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
*chunk = vreinterpretq_u8_u32(vdupq_n_u32(zng_memread_4(from)));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
uint64_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
*chunk = vreinterpretq_u8_u64(vdupq_n_u64(zng_memread_8(from)));
}
#define CHUNKSIZE chunksize_neon

View File

@ -4,7 +4,7 @@
*/
#include "zbuild.h"
#include "zutil_p.h"
#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"

View File

@ -40,10 +40,10 @@ chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.
chunkset_c.lo: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h

View File

@ -3,6 +3,7 @@
*/
#include "zbuild.h"
#include "zmemory.h"
typedef uint64_t chunk_t;
@ -12,21 +13,20 @@ typedef uint64_t chunk_t;
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint8_t *dest = (uint8_t *)chunk;
memcpy(dest, from, sizeof(uint32_t));
memcpy(dest+4, from, sizeof(uint32_t));
uint32_t tmp = zng_memread_4(from);
*chunk = tmp | ((chunk_t)tmp << 32);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
memcpy(chunk, from, sizeof(uint64_t));
*chunk = zng_memread_8(from);
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
memcpy(chunk, (uint8_t *)s, sizeof(uint64_t));
*chunk = zng_memread_8(s);
}
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
memcpy(out, chunk, sizeof(uint64_t));
zng_memwrite_8(out, *chunk);
}
#define CHUNKSIZE chunksize_c

View File

@ -4,7 +4,7 @@
*/
#include "zbuild.h"
#include "zutil_p.h"
#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"
@ -107,8 +107,8 @@ static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const
do {
uint32_t sv, mv, diff;
memcpy(&sv, src0, sizeof(sv));
memcpy(&mv, src1, sizeof(mv));
sv = zng_memread_4(src0);
mv = zng_memread_4(src1);
diff = sv ^ mv;
if (diff) {
@ -151,8 +151,8 @@ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const
do {
uint64_t sv, mv, diff;
memcpy(&sv, src0, sizeof(sv));
memcpy(&mv, src1, sizeof(mv));
sv = zng_memread_8(src0);
mv = zng_memread_8(src1);
diff = sv ^ mv;
if (diff) {

View File

@ -5,6 +5,7 @@
#ifdef POWER8_VSX
#include <altivec.h>
#include "zbuild.h"
#include "zmemory.h"
typedef vector unsigned char chunk_t;
@ -15,21 +16,15 @@ typedef vector unsigned char chunk_t;
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
uint16_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = (vector unsigned char)vec_splats(tmp);
*chunk = (vector unsigned char)vec_splats(zng_memread_2(from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint32_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = (vector unsigned char)vec_splats(tmp);
*chunk = (vector unsigned char)vec_splats(zng_memread_4(from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
uint64_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = (vector unsigned char)vec_splats((unsigned long long)tmp);
*chunk = (vector unsigned char)vec_splats((unsigned long long)zng_memread_8(from));
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {

View File

@ -6,7 +6,7 @@
#ifdef POWER9
#include <altivec.h>
#include "zbuild.h"
#include "zutil_p.h"
#include "zmemory.h"
#include "deflate.h"
#include "zendian.h"

View File

@ -7,7 +7,7 @@
#ifdef RISCV_RVV
#include "zbuild.h"
#include "zutil_p.h"
#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"

View File

@ -2,6 +2,7 @@
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "zmemory.h"
#ifdef X86_AVX2
#include "avx2_tables.h"
@ -19,21 +20,15 @@ typedef __m128i halfchunk_t;
#define HAVE_HALF_CHUNK
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
int16_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm256_set1_epi16(tmp);
*chunk = _mm256_set1_epi16(zng_memread_2(from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
int32_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm256_set1_epi32(tmp);
*chunk = _mm256_set1_epi32(zng_memread_4(from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
int64_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm256_set1_epi64x(tmp);
*chunk = _mm256_set1_epi64x(zng_memread_8(from));
}
static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) {

View File

@ -2,6 +2,7 @@
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "zmemory.h"
#ifdef X86_AVX512
@ -33,21 +34,15 @@ static inline mask_t gen_mask(unsigned len) {
}
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
int16_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm256_set1_epi16(tmp);
*chunk = _mm256_set1_epi16(zng_memread_2(from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
int32_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm256_set1_epi32(tmp);
*chunk = _mm256_set1_epi32(zng_memread_4(from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
int64_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm256_set1_epi64x(tmp);
*chunk = _mm256_set1_epi64x(zng_memread_8(from));
}
static inline void chunkmemset_16(uint8_t *from, chunk_t *chunk) {

View File

@ -3,6 +3,7 @@
*/
#include "zbuild.h"
#include "zmemory.h"
#ifdef X86_SSE2
#include <immintrin.h>
@ -14,21 +15,15 @@ typedef __m128i chunk_t;
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
int16_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi16(tmp);
*chunk = _mm_set1_epi16(zng_memread_2(from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
int32_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi32(tmp);
*chunk = _mm_set1_epi32(zng_memread_4(from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
int64_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi64x(tmp);
*chunk = _mm_set1_epi64x(zng_memread_8(from));
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {

View File

@ -3,6 +3,7 @@
*/
#include "zbuild.h"
#include "zmemory.h"
#if defined(X86_SSSE3)
#include <immintrin.h>
@ -33,21 +34,15 @@ static const lut_rem_pair perm_idx_lut[13] = {
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
int16_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi16(tmp);
*chunk = _mm_set1_epi16(zng_memread_2(from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
int32_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi32(tmp);
*chunk = _mm_set1_epi32(zng_memread_4(from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
int64_t tmp;
memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi64x(tmp);
*chunk = _mm_set1_epi64x(zng_memread_8(from));
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {

View File

@ -4,7 +4,7 @@
*/
#include "zbuild.h"
#include "zutil_p.h"
#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"

View File

@ -4,7 +4,7 @@
*/
#include "zbuild.h"
#include "zutil_p.h"
#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"

View File

@ -4,6 +4,7 @@
*/
#include "zbuild.h"
#include "zmemory.h"
#include "fallback_builtins.h"
#include "zendian.h"
@ -47,25 +48,21 @@ static inline uint32_t compare256_rle_c(const uint8_t *src0, const uint8_t *src1
/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
uint16_t src0_cmp, src1_cmp;
uint16_t src0_cmp;
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
src0_cmp = zng_memread_2(src0);
do {
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
if (src0_cmp != src1_cmp)
if (src0_cmp != zng_memread_2(src1))
return len + (*src0 == *src1);
src1 += 2, len += 2;
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
if (src0_cmp != src1_cmp)
if (src0_cmp != zng_memread_2(src1))
return len + (*src0 == *src1);
src1 += 2, len += 2;
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
if (src0_cmp != src1_cmp)
if (src0_cmp != zng_memread_2(src1))
return len + (*src0 == *src1);
src1 += 2, len += 2;
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
if (src0_cmp != src1_cmp)
if (src0_cmp != zng_memread_2(src1))
return len + (*src0 == *src1);
src1 += 2, len += 2;
} while (len < 256);
@ -79,13 +76,13 @@ static inline uint32_t compare256_rle_unaligned_32(const uint8_t *src0, const ui
uint32_t sv, len = 0;
uint16_t src0_cmp;
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
src0_cmp = zng_memread_2(src0);
sv = ((uint32_t)src0_cmp << 16) | src0_cmp;
do {
uint32_t mv, diff;
memcpy(&mv, src1, sizeof(mv));
mv = zng_memread_4(src1);
diff = sv ^ mv;
if (diff) {
@ -112,14 +109,14 @@ static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const ui
uint16_t src0_cmp;
uint64_t sv;
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
src0_cmp = zng_memread_2(src0);
src0_cmp32 = ((uint32_t)src0_cmp << 16) | src0_cmp;
sv = ((uint64_t)src0_cmp32 << 32) | src0_cmp32;
do {
uint64_t mv, diff;
memcpy(&mv, src1, sizeof(mv));
mv = zng_memread_8(src1);
diff = sv ^ mv;
if (diff) {

View File

@ -12,6 +12,7 @@
#include "zutil.h"
#include "zendian.h"
#include "zmemory.h"
#include "crc32.h"
#ifdef S390_DFLTCC_DEFLATE
@ -355,7 +356,7 @@ static inline void put_short(deflate_state *s, uint16_t w) {
#if BYTE_ORDER == BIG_ENDIAN
w = ZSWAP16(w);
#endif
memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
zng_memwrite_2(&s->pending_buf[s->pending], w);
s->pending += 2;
}
@ -367,7 +368,7 @@ static inline void put_short_msb(deflate_state *s, uint16_t w) {
#if BYTE_ORDER == LITTLE_ENDIAN
w = ZSWAP16(w);
#endif
memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
zng_memwrite_2(&s->pending_buf[s->pending], w);
s->pending += 2;
}
@ -379,7 +380,7 @@ static inline void put_uint32(deflate_state *s, uint32_t dw) {
#if BYTE_ORDER == BIG_ENDIAN
dw = ZSWAP32(dw);
#endif
memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
zng_memwrite_4(&s->pending_buf[s->pending], dw);
s->pending += 4;
}
@ -391,7 +392,7 @@ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
#if BYTE_ORDER == LITTLE_ENDIAN
dw = ZSWAP32(dw);
#endif
memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
zng_memwrite_4(&s->pending_buf[s->pending], dw);
s->pending += 4;
}
@ -403,7 +404,7 @@ static inline void put_uint64(deflate_state *s, uint64_t lld) {
#if BYTE_ORDER == BIG_ENDIAN
lld = ZSWAP64(lld);
#endif
memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld));
zng_memwrite_8(&s->pending_buf[s->pending], lld);
s->pending += 8;
}

View File

@ -18,7 +18,7 @@
*/
#include "zbuild.h"
#include "zutil_p.h"
#include "zmemory.h"
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"

View File

@ -6,6 +6,7 @@
#define INFLATE_P_H
#include <stdlib.h>
#include "zmemory.h"
/* Architecture-specific hooks. */
#ifdef S390_DFLTCC_INFLATE
@ -138,8 +139,7 @@
/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
uint64_t chunk;
memcpy(&chunk, in, sizeof(chunk));
uint64_t chunk = zng_memread_8(in);
#if BYTE_ORDER == LITTLE_ENDIAN
return chunk << bits;
@ -175,7 +175,10 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len,
uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows
/* So this doesn't give use a worst case scenario of function calls in a loop,
* we want to instead break this down into copy blocks of fixed lengths */
* we want to instead break this down into copy blocks of fixed lengths
*
* TODO: The memcpy calls aren't inlined on architectures with strict memory alignment
*/
while (len) {
tocopy = MIN(non_olap_size, len);
len -= tocopy;

View File

@ -22,6 +22,8 @@
*
*/
#include "zmemory.h"
#ifndef HASH_CALC_OFFSET
# define HASH_CALC_OFFSET 0
#endif
@ -31,11 +33,10 @@
#ifndef HASH_CALC_READ
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val));
val = zng_memread_4(strstart);
# else
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val)); \
val = ZSWAP32(val);
val = ZSWAP32(zng_memread_4(strstart));
# endif
#endif

View File

@ -40,10 +40,15 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
uint32_t chain_length, nice_match, best_len, offset;
uint32_t lookahead = s->lookahead;
Pos match_offset = 0;
#if OPTIMAL_CMP >= 32
uint8_t scan_start[8];
#endif
#if OPTIMAL_CMP >= 64
uint64_t scan_start;
uint64_t scan_end;
#elif OPTIMAL_CMP >= 32
uint32_t scan_start;
uint32_t scan_end;
#else
uint8_t scan_end[8];
#endif
#define GOTO_NEXT_CHAIN \
if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
@ -70,11 +75,11 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#endif
#if OPTIMAL_CMP >= 64
memcpy(scan_start, scan, sizeof(uint64_t));
memcpy(scan_end, scan+offset, sizeof(uint64_t));
scan_start = zng_memread_8(scan);
scan_end = zng_memread_8(scan+offset);
#elif OPTIMAL_CMP >= 32
memcpy(scan_start, scan, sizeof(uint32_t));
memcpy(scan_end, scan+offset, sizeof(uint32_t));
scan_start = zng_memread_4(scan);
scan_end = zng_memread_4(scan+offset);
#else
scan_end[0] = *(scan+offset);
scan_end[1] = *(scan+offset+1);
@ -141,24 +146,24 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#if OPTIMAL_CMP >= 32
if (best_len < sizeof(uint32_t)) {
for (;;) {
if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
zng_memcmp_2(mbase_start+cur_match, scan_start) == 0)
if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# if OPTIMAL_CMP >= 64
} else if (best_len >= sizeof(uint64_t)) {
for (;;) {
if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
zng_memcmp_8(mbase_start+cur_match, scan_start) == 0)
if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# endif
} else {
for (;;) {
if (zng_memcmp_4(mbase_end+cur_match, scan_end) == 0 &&
zng_memcmp_4(mbase_start+cur_match, scan_start) == 0)
if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
@ -197,9 +202,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#endif
#if OPTIMAL_CMP >= 64
memcpy(scan_end, scan+offset, sizeof(uint64_t));
scan_end = zng_memread_8(scan+offset);
#elif OPTIMAL_CMP >= 32
memcpy(scan_end, scan+offset, sizeof(uint32_t));
scan_end = zng_memread_4(scan+offset);
#else
scan_end[0] = *(scan+offset);
scan_end[1] = *(scan+offset+1);

View File

@ -183,7 +183,7 @@ adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_
adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h
adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h
chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
@ -194,7 +194,7 @@ deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h

View File

@ -204,7 +204,7 @@ adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_
adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h
adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h
chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
@ -215,7 +215,7 @@ deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h

View File

@ -212,9 +212,9 @@ chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset
chunkset_avx2.obj: $(TOP)/arch/x86/chunkset_avx2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h
chunkset_sse2.obj: $(TOP)/arch/x86/chunkset_sse2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h
chunkset_ssse3.obj: $(TOP)/arch/x86/chunkset_ssse3.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compare256_avx2.obj: $(TOP)/arch/x86/compare256_avx2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compare256_sse2.obj: $(TOP)/arch/x86/compare256_sse2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compare256_avx2.obj: $(TOP)/arch/x86/compare256_avx2.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compare256_sse2.obj: $(TOP)/arch/x86/compare256_sse2.c $(TOP)/zbuild.h $(TOP)/zmemory.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h
compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h
cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h
crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h
@ -226,7 +226,7 @@ deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.
deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h
deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zmemory.h
deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h
deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h
deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h

99
zmemory.h Normal file
View File

@ -0,0 +1,99 @@
/* zmemory.h -- Private inline functions used internally in zlib-ng
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifndef _ZMEMORY_H
#define _ZMEMORY_H
#if defined(__GNUC__) && (__GNUC__ >= 4)
# define HAVE_MAY_ALIAS
#endif
static inline uint16_t zng_memread_2(const void *ptr) {
#if defined(HAVE_MAY_ALIAS)
typedef struct { uint16_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint16_t;
return ((const unaligned_uint16_t *)ptr)->val;
#else
uint16_t val;
memcpy(&val, ptr, sizeof(val));
return val;
#endif
}
static inline uint32_t zng_memread_4(const void *ptr) {
#if defined(HAVE_MAY_ALIAS)
typedef struct { uint32_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint32_t;
return ((const unaligned_uint32_t *)ptr)->val;
#else
uint32_t val;
memcpy(&val, ptr, sizeof(val));
return val;
#endif
}
static inline uint64_t zng_memread_8(const void *ptr) {
#if defined(HAVE_MAY_ALIAS)
typedef struct { uint64_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint64_t;
return ((const unaligned_uint64_t *)ptr)->val;
#else
uint64_t val;
memcpy(&val, ptr, sizeof(val));
return val;
#endif
}
static inline void zng_memwrite_2(void *ptr, uint16_t val) {
#if defined(HAVE_MAY_ALIAS)
typedef struct { uint16_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint16_t;
((unaligned_uint16_t *)ptr)->val = val;
#else
memcpy(ptr, &val, sizeof(val));
#endif
}
static inline void zng_memwrite_4(void *ptr, uint32_t val) {
#if defined(HAVE_MAY_ALIAS)
typedef struct { uint32_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint32_t;
((unaligned_uint32_t *)ptr)->val = val;
#else
memcpy(ptr, &val, sizeof(val));
#endif
}
static inline void zng_memwrite_8(void *ptr, uint64_t val) {
#if defined(HAVE_MAY_ALIAS)
typedef struct { uint64_t val; } __attribute__ ((__packed__, __may_alias__)) unaligned_uint64_t;
((unaligned_uint64_t *)ptr)->val = val;
#else
memcpy(ptr, &val, sizeof(val));
#endif
}
/* Use zng_memread_* instead of memcmp to avoid older compilers not converting memcmp
calls to unaligned comparisons when unaligned access is supported. Use memcmp only when
unaligned support is not available to avoid an extra call to memcpy. */
static inline int32_t zng_memcmp_2(const void *src0, const void *src1) {
#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 16
return zng_memread_2(src0) != zng_memread_2(src1);
#else
return memcmp(src0, src1, 2);
#endif
}
static inline int32_t zng_memcmp_4(const void *src0, const void *src1) {
#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 32
return zng_memread_4(src0) != zng_memread_4(src1);
#else
return memcmp(src0, src1, 4);
#endif
}
static inline int32_t zng_memcmp_8(const void *src0, const void *src1) {
#if defined(HAVE_MAY_ALIAS) || OPTIMAL_CMP >= 64
return zng_memread_8(src0) != zng_memread_8(src1);
#else
return memcmp(src0, src1, 8);
#endif
}
#endif

View File

@ -43,33 +43,4 @@ static inline void zng_free(void *ptr) {
#endif
}
/* Use memcpy instead of memcmp to avoid older compilers not converting memcmp calls to
unaligned comparisons when unaligned access is supported. */
static inline int32_t zng_memcmp_2(const void *src0, const void *src1) {
uint16_t src0_cmp, src1_cmp;
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
return src0_cmp != src1_cmp;
}
static inline int32_t zng_memcmp_4(const void *src0, const void *src1) {
uint32_t src0_cmp, src1_cmp;
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
return src0_cmp != src1_cmp;
}
static inline int32_t zng_memcmp_8(const void *src0, const void *src1) {
uint64_t src0_cmp, src1_cmp;
memcpy(&src0_cmp, src0, sizeof(src0_cmp));
memcpy(&src1_cmp, src1, sizeof(src1_cmp));
return src0_cmp != src1_cmp;
}
#endif