mirror of
https://github.com/GerbilSoft/zlib-ng.git
synced 2025-06-18 11:35:35 -04:00
Since we long ago make unaligned reads safe (by using memcpy or intrinsics),
it is time to replace the UNALIGNED_OK checks that have since really only been used to select the optimal comparison sizes for the arch instead.
This commit is contained in:
parent
4fa76be6c0
commit
509f6b5818
4
.github/workflows/cmake.yml
vendored
4
.github/workflows/cmake.yml
vendored
@ -81,11 +81,11 @@ jobs:
|
|||||||
build-src-dir: ../zlib-ng/test/add-subdirectory-project
|
build-src-dir: ../zlib-ng/test/add-subdirectory-project
|
||||||
readonly-project-dir: true
|
readonly-project-dir: true
|
||||||
|
|
||||||
- name: Ubuntu GCC -O1 No Unaligned UBSAN
|
- name: Ubuntu GCC -O1 UBSAN
|
||||||
os: ubuntu-latest
|
os: ubuntu-latest
|
||||||
compiler: gcc
|
compiler: gcc
|
||||||
cxx-compiler: g++
|
cxx-compiler: g++
|
||||||
cmake-args: -DWITH_UNALIGNED=OFF -DWITH_SANITIZER=Undefined
|
cmake-args: -DWITH_SANITIZER=Undefined
|
||||||
codecov: ubuntu_gcc_o1
|
codecov: ubuntu_gcc_o1
|
||||||
cflags: -O1
|
cflags: -O1
|
||||||
|
|
||||||
|
@ -93,7 +93,6 @@ option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF)
|
|||||||
option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
|
option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
|
||||||
option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
|
option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
|
||||||
option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF)
|
option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF)
|
||||||
option(WITH_UNALIGNED "Support unaligned reads on platforms that support it" ON)
|
|
||||||
|
|
||||||
set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols.
|
set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols.
|
||||||
Useful when embedding into a larger library.
|
Useful when embedding into a larger library.
|
||||||
@ -147,7 +146,6 @@ mark_as_advanced(FORCE
|
|||||||
WITH_RVV
|
WITH_RVV
|
||||||
WITH_INFLATE_STRICT
|
WITH_INFLATE_STRICT
|
||||||
WITH_INFLATE_ALLOW_INVALID_DIST
|
WITH_INFLATE_ALLOW_INVALID_DIST
|
||||||
WITH_UNALIGNED
|
|
||||||
INSTALL_UTILS
|
INSTALL_UTILS
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -336,12 +334,6 @@ if(NOT WITH_NATIVE_INSTRUCTIONS)
|
|||||||
endforeach()
|
endforeach()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Set architecture alignment requirements
|
|
||||||
if(NOT WITH_UNALIGNED)
|
|
||||||
add_definitions(-DNO_UNALIGNED)
|
|
||||||
message(STATUS "Unaligned reads manually disabled")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Apply warning compiler flags
|
# Apply warning compiler flags
|
||||||
if(WITH_MAINTAINER_WARNINGS)
|
if(WITH_MAINTAINER_WARNINGS)
|
||||||
add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE})
|
add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE})
|
||||||
|
@ -25,7 +25,7 @@ Features
|
|||||||
* Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
|
* Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
|
||||||
* Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
|
* Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
|
||||||
* Support for hardware-accelerated deflate using IBM Z DFLTCC
|
* Support for hardware-accelerated deflate using IBM Z DFLTCC
|
||||||
* Unaligned memory read/writes and large bit buffer improvements
|
* Safe unaligned memory read/writes and large bit buffer improvements
|
||||||
* Includes improvements from Cloudflare and Intel forks
|
* Includes improvements from Cloudflare and Intel forks
|
||||||
* Configure, CMake, and NMake build system support
|
* Configure, CMake, and NMake build system support
|
||||||
* Comprehensive set of CMake unit tests
|
* Comprehensive set of CMake unit tests
|
||||||
@ -213,7 +213,6 @@ Advanced Build Options
|
|||||||
| WITH_CRC32_VX | --without-crc32-vx | Build with vectorized CRC32 on IBM Z | ON |
|
| WITH_CRC32_VX | --without-crc32-vx | Build with vectorized CRC32 on IBM Z | ON |
|
||||||
| WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Build with DFLTCC intrinsics for compression on IBM Z | OFF |
|
| WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Build with DFLTCC intrinsics for compression on IBM Z | OFF |
|
||||||
| WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Build with DFLTCC intrinsics for decompression on IBM Z | OFF |
|
| WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Build with DFLTCC intrinsics for decompression on IBM Z | OFF |
|
||||||
| WITH_UNALIGNED | --without-unaligned | Allow optimizations that use unaligned reads if safe on current arch| ON |
|
|
||||||
| WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF |
|
| WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF |
|
||||||
| WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF |
|
| WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF |
|
||||||
| INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF |
|
| INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF |
|
||||||
|
@ -57,7 +57,7 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
|
|||||||
|
|
||||||
#include "match_tpl.h"
|
#include "match_tpl.h"
|
||||||
|
|
||||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
|
||||||
/* 16-bit unaligned integer comparison */
|
/* 16-bit unaligned integer comparison */
|
||||||
static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) {
|
static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) {
|
||||||
uint32_t len = 0;
|
uint32_t len = 0;
|
||||||
@ -138,8 +138,8 @@ Z_INTERNAL uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
|
||||||
/* UNALIGNED64_OK, 64-bit integer comparison */
|
/* 64-bit integer comparison */
|
||||||
static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const uint8_t *src1) {
|
static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const uint8_t *src1) {
|
||||||
uint32_t len = 0;
|
uint32_t len = 0;
|
||||||
|
|
||||||
|
@ -28,13 +28,13 @@ void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
|
|||||||
uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
|
uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
|
||||||
|
|
||||||
uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
|
uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
|
||||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
|
||||||
uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
|
uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
|
||||||
# ifdef HAVE_BUILTIN_CTZ
|
# ifdef HAVE_BUILTIN_CTZ
|
||||||
uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
|
uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
|
||||||
# endif
|
# endif
|
||||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
|
||||||
uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
|
uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -43,29 +43,24 @@ typedef void (*slide_hash_func)(deflate_state *s);
|
|||||||
void slide_hash_c(deflate_state *s);
|
void slide_hash_c(deflate_state *s);
|
||||||
|
|
||||||
uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
|
uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
|
||||||
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
|
||||||
uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
|
|
||||||
# ifdef HAVE_BUILTIN_CTZ
|
|
||||||
uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
|
|
||||||
# endif
|
|
||||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
|
||||||
uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
|
|
||||||
uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
|
uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
|
||||||
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
|
||||||
|
uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||||
uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
|
uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||||
uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
|
# ifdef HAVE_BUILTIN_CTZ
|
||||||
# ifdef UNALIGNED64_OK
|
uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||||
uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
|
uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||||
# endif
|
|
||||||
# endif
|
# endif
|
||||||
|
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
|
||||||
|
uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||||
|
uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions.
|
// Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions.
|
||||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
|
||||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
|
||||||
# define longest_match_generic longest_match_unaligned_64
|
# define longest_match_generic longest_match_unaligned_64
|
||||||
# define longest_match_slow_generic longest_match_slow_unaligned_64
|
# define longest_match_slow_generic longest_match_slow_unaligned_64
|
||||||
# define compare256_generic compare256_unaligned_64
|
# define compare256_generic compare256_unaligned_64
|
||||||
|
@ -227,17 +227,15 @@ rem_bytes:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, uint8_t *from, unsigned len, unsigned left) {
|
Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, uint8_t *from, unsigned len, unsigned left) {
|
||||||
#if !defined(UNALIGNED64_OK)
|
#if OPTIMAL_CMP < 32
|
||||||
# if !defined(UNALIGNED_OK)
|
|
||||||
static const uint32_t align_mask = 7;
|
static const uint32_t align_mask = 7;
|
||||||
# else
|
#elif OPTIMAL_CMP == 32
|
||||||
static const uint32_t align_mask = 3;
|
static const uint32_t align_mask = 3;
|
||||||
# endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
len = MIN(len, left);
|
len = MIN(len, left);
|
||||||
|
|
||||||
#if !defined(UNALIGNED64_OK)
|
#if OPTIMAL_CMP < 64
|
||||||
while (((uintptr_t)out & align_mask) && (len > 0)) {
|
while (((uintptr_t)out & align_mask) && (len > 0)) {
|
||||||
*out++ = *from++;
|
*out++ = *from++;
|
||||||
--len;
|
--len;
|
||||||
|
@ -111,6 +111,7 @@ endmacro()
|
|||||||
|
|
||||||
macro(add_undefined_sanitizer)
|
macro(add_undefined_sanitizer)
|
||||||
set(known_checks
|
set(known_checks
|
||||||
|
alignment
|
||||||
array-bounds
|
array-bounds
|
||||||
bool
|
bool
|
||||||
bounds
|
bounds
|
||||||
@ -137,10 +138,6 @@ macro(add_undefined_sanitizer)
|
|||||||
vptr
|
vptr
|
||||||
)
|
)
|
||||||
|
|
||||||
# Only check for alignment sanitizer flag if unaligned access is not supported
|
|
||||||
if(NOT WITH_UNALIGNED)
|
|
||||||
list(APPEND known_checks alignment)
|
|
||||||
endif()
|
|
||||||
# Object size sanitizer has no effect at -O0 and produces compiler warning if enabled
|
# Object size sanitizer has no effect at -O0 and produces compiler warning if enabled
|
||||||
if(NOT CMAKE_C_FLAGS MATCHES "-O0")
|
if(NOT CMAKE_C_FLAGS MATCHES "-O0")
|
||||||
list(APPEND known_checks object-size)
|
list(APPEND known_checks object-size)
|
||||||
@ -153,12 +150,6 @@ macro(add_undefined_sanitizer)
|
|||||||
add_compile_options(-fsanitize=${supported_checks})
|
add_compile_options(-fsanitize=${supported_checks})
|
||||||
add_link_options(-fsanitize=${supported_checks})
|
add_link_options(-fsanitize=${supported_checks})
|
||||||
|
|
||||||
# Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if
|
|
||||||
# it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing.
|
|
||||||
if(WITH_UNALIGNED)
|
|
||||||
add_compile_options(-fno-sanitize=alignment)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_common_sanitizer_flags()
|
add_common_sanitizer_flags()
|
||||||
else()
|
else()
|
||||||
message(STATUS "Undefined behavior sanitizer is not supported")
|
message(STATUS "Undefined behavior sanitizer is not supported")
|
||||||
|
@ -42,7 +42,7 @@ static inline uint32_t compare256_rle_c(const uint8_t *src0, const uint8_t *src1
|
|||||||
return 256;
|
return 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef UNALIGNED_OK
|
#if OPTIMAL_CMP >= 32
|
||||||
/* 16-bit unaligned integer comparison */
|
/* 16-bit unaligned integer comparison */
|
||||||
static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
|
static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
|
||||||
uint32_t len = 0;
|
uint32_t len = 0;
|
||||||
@ -100,7 +100,7 @@ static inline uint32_t compare256_rle_unaligned_32(const uint8_t *src0, const ui
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
|
||||||
/* 64-bit unaligned integer comparison */
|
/* 64-bit unaligned integer comparison */
|
||||||
static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const uint8_t *src1) {
|
static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const uint8_t *src1) {
|
||||||
uint32_t src0_cmp32, len = 0;
|
uint32_t src0_cmp32, len = 0;
|
||||||
|
10
configure
vendored
10
configure
vendored
@ -87,7 +87,6 @@ mandir=${mandir-'${prefix}/share/man'}
|
|||||||
shared_ext='.so'
|
shared_ext='.so'
|
||||||
shared=1
|
shared=1
|
||||||
gzfileops=1
|
gzfileops=1
|
||||||
unalignedok=1
|
|
||||||
compat=0
|
compat=0
|
||||||
cover=0
|
cover=0
|
||||||
build32=0
|
build32=0
|
||||||
@ -164,7 +163,6 @@ case "$1" in
|
|||||||
echo ' [--warn] Enables extra compiler warnings' | tee -a configure.log
|
echo ' [--warn] Enables extra compiler warnings' | tee -a configure.log
|
||||||
echo ' [--debug] Enables extra debug prints during operation' | tee -a configure.log
|
echo ' [--debug] Enables extra debug prints during operation' | tee -a configure.log
|
||||||
echo ' [--zlib-compat] Compiles for zlib-compatible API instead of zlib-ng API' | tee -a configure.log
|
echo ' [--zlib-compat] Compiles for zlib-compatible API instead of zlib-ng API' | tee -a configure.log
|
||||||
echo ' [--without-unaligned] Compiles without fast unaligned access' | tee -a configure.log
|
|
||||||
echo ' [--without-gzfileops] Compiles without the gzfile parts of the API enabled' | tee -a configure.log
|
echo ' [--without-gzfileops] Compiles without the gzfile parts of the API enabled' | tee -a configure.log
|
||||||
echo ' [--without-optimizations] Compiles without support for optional instruction sets' | tee -a configure.log
|
echo ' [--without-optimizations] Compiles without support for optional instruction sets' | tee -a configure.log
|
||||||
echo ' [--without-new-strategies] Compiles without using new additional deflate strategies' | tee -a configure.log
|
echo ' [--without-new-strategies] Compiles without using new additional deflate strategies' | tee -a configure.log
|
||||||
@ -195,7 +193,6 @@ case "$1" in
|
|||||||
-s* | --shared | --enable-shared) shared=1; shift ;;
|
-s* | --shared | --enable-shared) shared=1; shift ;;
|
||||||
-t | --static) shared=0; shift ;;
|
-t | --static) shared=0; shift ;;
|
||||||
--zlib-compat) compat=1; shift ;;
|
--zlib-compat) compat=1; shift ;;
|
||||||
--without-unaligned) unalignedok=0; shift ;;
|
|
||||||
--without-gzfileops) gzfileops=0; shift ;;
|
--without-gzfileops) gzfileops=0; shift ;;
|
||||||
--cover) cover=1; shift ;;
|
--cover) cover=1; shift ;;
|
||||||
-3* | --32) build32=1; shift ;;
|
-3* | --32) build32=1; shift ;;
|
||||||
@ -876,13 +873,6 @@ else
|
|||||||
PIC_TESTOBJG="\$(OBJG)"
|
PIC_TESTOBJG="\$(OBJG)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# set architecture alignment requirements
|
|
||||||
if test $unalignedok -eq 0; then
|
|
||||||
CFLAGS="${CFLAGS} -DNO_UNALIGNED"
|
|
||||||
SFLAGS="${SFLAGS} -DNO_UNALIGNED"
|
|
||||||
echo "Unaligned reads manually disabled." | tee -a configure.log
|
|
||||||
fi
|
|
||||||
|
|
||||||
# enable reduced memory configuration
|
# enable reduced memory configuration
|
||||||
if test $reducedmem -eq 1; then
|
if test $reducedmem -eq 1; then
|
||||||
echo "Configuring for reduced memory environment." | tee -a configure.log
|
echo "Configuring for reduced memory environment." | tee -a configure.log
|
||||||
|
@ -10,8 +10,8 @@
|
|||||||
#include "deflate_p.h"
|
#include "deflate_p.h"
|
||||||
#include "functable.h"
|
#include "functable.h"
|
||||||
|
|
||||||
#ifdef UNALIGNED_OK
|
#if OPTIMAL_CMP >= 32
|
||||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
|
||||||
# define compare256_rle compare256_rle_unaligned_64
|
# define compare256_rle compare256_rle_unaligned_64
|
||||||
# elif defined(HAVE_BUILTIN_CTZ)
|
# elif defined(HAVE_BUILTIN_CTZ)
|
||||||
# define compare256_rle compare256_rle_unaligned_32
|
# define compare256_rle compare256_rle_unaligned_32
|
||||||
|
@ -29,21 +29,13 @@
|
|||||||
# define HASH_CALC_MASK HASH_MASK
|
# define HASH_CALC_MASK HASH_MASK
|
||||||
#endif
|
#endif
|
||||||
#ifndef HASH_CALC_READ
|
#ifndef HASH_CALC_READ
|
||||||
# ifdef UNALIGNED_OK
|
# if BYTE_ORDER == LITTLE_ENDIAN
|
||||||
# if BYTE_ORDER == LITTLE_ENDIAN
|
# define HASH_CALC_READ \
|
||||||
# define HASH_CALC_READ \
|
memcpy(&val, strstart, sizeof(val));
|
||||||
memcpy(&val, strstart, sizeof(val));
|
|
||||||
# else
|
|
||||||
# define HASH_CALC_READ \
|
|
||||||
memcpy(&val, strstart, sizeof(val)); \
|
|
||||||
val = ZSWAP32(val);
|
|
||||||
# endif
|
|
||||||
# else
|
# else
|
||||||
# define HASH_CALC_READ \
|
# define HASH_CALC_READ \
|
||||||
val = ((uint32_t)(strstart[0])); \
|
memcpy(&val, strstart, sizeof(val)); \
|
||||||
val |= ((uint32_t)(strstart[1]) << 8); \
|
val = ZSWAP32(val);
|
||||||
val |= ((uint32_t)(strstart[2]) << 16); \
|
|
||||||
val |= ((uint32_t)(strstart[3]) << 24);
|
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
22
match_tpl.h
22
match_tpl.h
@ -40,7 +40,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
|||||||
uint32_t chain_length, nice_match, best_len, offset;
|
uint32_t chain_length, nice_match, best_len, offset;
|
||||||
uint32_t lookahead = s->lookahead;
|
uint32_t lookahead = s->lookahead;
|
||||||
Pos match_offset = 0;
|
Pos match_offset = 0;
|
||||||
#ifdef UNALIGNED_OK
|
#if OPTIMAL_CMP >= 32
|
||||||
uint8_t scan_start[8];
|
uint8_t scan_start[8];
|
||||||
#endif
|
#endif
|
||||||
uint8_t scan_end[8];
|
uint8_t scan_end[8];
|
||||||
@ -59,20 +59,20 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
|||||||
* to find the next best match length.
|
* to find the next best match length.
|
||||||
*/
|
*/
|
||||||
offset = best_len-1;
|
offset = best_len-1;
|
||||||
#ifdef UNALIGNED_OK
|
#if OPTIMAL_CMP >= 32
|
||||||
if (best_len >= sizeof(uint32_t)) {
|
if (best_len >= sizeof(uint32_t)) {
|
||||||
offset -= 2;
|
offset -= 2;
|
||||||
#ifdef UNALIGNED64_OK
|
#if OPTIMAL_CMP >= 64
|
||||||
if (best_len >= sizeof(uint64_t))
|
if (best_len >= sizeof(uint64_t))
|
||||||
offset -= 4;
|
offset -= 4;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef UNALIGNED64_OK
|
#if OPTIMAL_CMP >= 64
|
||||||
memcpy(scan_start, scan, sizeof(uint64_t));
|
memcpy(scan_start, scan, sizeof(uint64_t));
|
||||||
memcpy(scan_end, scan+offset, sizeof(uint64_t));
|
memcpy(scan_end, scan+offset, sizeof(uint64_t));
|
||||||
#elif defined(UNALIGNED_OK)
|
#elif OPTIMAL_CMP >= 32
|
||||||
memcpy(scan_start, scan, sizeof(uint32_t));
|
memcpy(scan_start, scan, sizeof(uint32_t));
|
||||||
memcpy(scan_end, scan+offset, sizeof(uint32_t));
|
memcpy(scan_end, scan+offset, sizeof(uint32_t));
|
||||||
#else
|
#else
|
||||||
@ -138,7 +138,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
|||||||
* that depend on those values. However the length of the match is limited to the
|
* that depend on those values. However the length of the match is limited to the
|
||||||
* lookahead, so the output of deflate is not affected by the uninitialized values.
|
* lookahead, so the output of deflate is not affected by the uninitialized values.
|
||||||
*/
|
*/
|
||||||
#ifdef UNALIGNED_OK
|
#if OPTIMAL_CMP >= 32
|
||||||
if (best_len < sizeof(uint32_t)) {
|
if (best_len < sizeof(uint32_t)) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
|
if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
|
||||||
@ -146,7 +146,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
|||||||
break;
|
break;
|
||||||
GOTO_NEXT_CHAIN;
|
GOTO_NEXT_CHAIN;
|
||||||
}
|
}
|
||||||
# ifdef UNALIGNED64_OK
|
# if OPTIMAL_CMP >= 64
|
||||||
} else if (best_len >= sizeof(uint64_t)) {
|
} else if (best_len >= sizeof(uint64_t)) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
|
if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
|
||||||
@ -186,19 +186,19 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
|||||||
return best_len;
|
return best_len;
|
||||||
|
|
||||||
offset = best_len-1;
|
offset = best_len-1;
|
||||||
#ifdef UNALIGNED_OK
|
#if OPTIMAL_CMP >= 32
|
||||||
if (best_len >= sizeof(uint32_t)) {
|
if (best_len >= sizeof(uint32_t)) {
|
||||||
offset -= 2;
|
offset -= 2;
|
||||||
#ifdef UNALIGNED64_OK
|
#if OPTIMAL_CMP >= 64
|
||||||
if (best_len >= sizeof(uint64_t))
|
if (best_len >= sizeof(uint64_t))
|
||||||
offset -= 4;
|
offset -= 4;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef UNALIGNED64_OK
|
#if OPTIMAL_CMP >= 64
|
||||||
memcpy(scan_end, scan+offset, sizeof(uint64_t));
|
memcpy(scan_end, scan+offset, sizeof(uint64_t));
|
||||||
#elif defined(UNALIGNED_OK)
|
#elif OPTIMAL_CMP >= 32
|
||||||
memcpy(scan_end, scan+offset, sizeof(uint32_t));
|
memcpy(scan_end, scan+offset, sizeof(uint32_t));
|
||||||
#else
|
#else
|
||||||
scan_end[0] = *(scan+offset);
|
scan_end[0] = *(scan+offset);
|
||||||
|
@ -66,14 +66,14 @@ BENCHMARK_COMPARE256(c, compare256_c, 1);
|
|||||||
BENCHMARK_COMPARE256(native, native_compare256, 1);
|
BENCHMARK_COMPARE256(native, native_compare256, 1);
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
|
||||||
BENCHMARK_COMPARE256(unaligned_16, compare256_unaligned_16, 1);
|
BENCHMARK_COMPARE256(unaligned_16, compare256_unaligned_16, 1);
|
||||||
#ifdef HAVE_BUILTIN_CTZ
|
# if defined(HAVE_BUILTIN_CTZ)
|
||||||
BENCHMARK_COMPARE256(unaligned_32, compare256_unaligned_32, 1);
|
BENCHMARK_COMPARE256(unaligned_32, compare256_unaligned_32, 1);
|
||||||
#endif
|
# endif
|
||||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
|
||||||
BENCHMARK_COMPARE256(unaligned_64, compare256_unaligned_64, 1);
|
BENCHMARK_COMPARE256(unaligned_64, compare256_unaligned_64, 1);
|
||||||
#endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
||||||
BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
|
BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
|
||||||
|
@ -61,12 +61,12 @@ public:
|
|||||||
|
|
||||||
BENCHMARK_COMPARE256_RLE(c, compare256_rle_c, 1);
|
BENCHMARK_COMPARE256_RLE(c, compare256_rle_c, 1);
|
||||||
|
|
||||||
#ifdef UNALIGNED_OK
|
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
|
||||||
BENCHMARK_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1);
|
BENCHMARK_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1);
|
||||||
#ifdef HAVE_BUILTIN_CTZ
|
# if defined(HAVE_BUILTIN_CTZ)
|
||||||
BENCHMARK_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1);
|
BENCHMARK_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1);
|
||||||
#endif
|
# endif
|
||||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
|
||||||
BENCHMARK_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1);
|
BENCHMARK_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1);
|
||||||
#endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -65,15 +65,16 @@ TEST_COMPARE256(c, compare256_c, 1)
|
|||||||
TEST_COMPARE256(native, native_compare256, 1)
|
TEST_COMPARE256(native, native_compare256, 1)
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
|
||||||
TEST_COMPARE256(unaligned_16, compare256_unaligned_16, 1)
|
TEST_COMPARE256(unaligned_16, compare256_unaligned_16, 1)
|
||||||
#ifdef HAVE_BUILTIN_CTZ
|
# if defined(HAVE_BUILTIN_CTZ)
|
||||||
TEST_COMPARE256(unaligned_32, compare256_unaligned_32, 1)
|
TEST_COMPARE256(unaligned_32, compare256_unaligned_32, 1)
|
||||||
#endif
|
# endif
|
||||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
|
||||||
TEST_COMPARE256(unaligned_64, compare256_unaligned_64, 1)
|
TEST_COMPARE256(unaligned_64, compare256_unaligned_64, 1)
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
||||||
TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2)
|
TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2)
|
||||||
#endif
|
#endif
|
||||||
|
@ -52,12 +52,12 @@ static inline void compare256_rle_match_check(compare256_rle_func compare256_rle
|
|||||||
|
|
||||||
TEST_COMPARE256_RLE(c, compare256_rle_c, 1)
|
TEST_COMPARE256_RLE(c, compare256_rle_c, 1)
|
||||||
|
|
||||||
#ifdef UNALIGNED_OK
|
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
|
||||||
TEST_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1)
|
TEST_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1)
|
||||||
#ifdef HAVE_BUILTIN_CTZ
|
# if defined(HAVE_BUILTIN_CTZ)
|
||||||
TEST_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1)
|
TEST_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1)
|
||||||
#endif
|
# endif
|
||||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
|
||||||
TEST_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1)
|
TEST_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1)
|
||||||
#endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
44
zbuild.h
44
zbuild.h
@ -243,29 +243,31 @@
|
|||||||
# define Tracecv(c, x)
|
# define Tracecv(c, x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef NO_UNALIGNED
|
#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64)
|
||||||
# if defined(__x86_64__) || defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64)
|
# define OPTIMAL_CMP 64
|
||||||
# define UNALIGNED_OK
|
#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || \
|
||||||
# define UNALIGNED64_OK
|
defined(__i686__) || defined(_X86_) || defined(_M_IX86)
|
||||||
# elif defined(__i386__) || defined(__i486__) || defined(__i586__) || \
|
# define OPTIMAL_CMP 32
|
||||||
defined(__i686__) || defined(_X86_) || defined(_M_IX86)
|
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||||
# define UNALIGNED_OK
|
# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
|
||||||
# elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
# define OPTIMAL_CMP 64
|
||||||
# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
|
# endif
|
||||||
# define UNALIGNED_OK
|
#elif defined(__arm__) || (_M_ARM >= 7)
|
||||||
# define UNALIGNED64_OK
|
# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
|
||||||
# endif
|
# define OPTIMAL_CMP 32
|
||||||
# elif defined(__arm__) || (_M_ARM >= 7)
|
# endif
|
||||||
# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
|
#elif defined(__powerpc64__) || defined(__ppc64__)
|
||||||
# define UNALIGNED_OK
|
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
# endif
|
# define OPTIMAL_CMP 64
|
||||||
# elif defined(__powerpc64__) || defined(__ppc64__)
|
|
||||||
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
||||||
# define UNALIGNED_OK
|
|
||||||
# define UNALIGNED64_OK
|
|
||||||
# endif
|
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(NO_UNALIGNED)
|
||||||
|
# undef OPTIMAL_CMP
|
||||||
|
#endif
|
||||||
|
#if !defined(OPTIMAL_CMP)
|
||||||
|
# define OPTIMAL_CMP 8
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__has_feature)
|
#if defined(__has_feature)
|
||||||
# if __has_feature(address_sanitizer)
|
# if __has_feature(address_sanitizer)
|
||||||
|
Loading…
Reference in New Issue
Block a user