Move UNALIGNED_OK detection to compile time instead of configure time.

This commit is contained in:
Nathan Moinvaziri 2022-01-17 18:47:23 -08:00 committed by Hans Kristian Rosbach
parent 276a3ec3b3
commit e38c493337
10 changed files with 28 additions and 78 deletions

View File

@ -41,21 +41,20 @@ jobs:
cmake-args: -DZLIB_COMPAT=ON -DZLIB_SYMBOL_PREFIX=zTest_ cmake-args: -DZLIB_COMPAT=ON -DZLIB_SYMBOL_PREFIX=zTest_
codecov: ubuntu_gcc_compat_sprefix codecov: ubuntu_gcc_compat_sprefix
- name: Ubuntu GCC OSB -O1 No Unaligned64 UBSAN - name: Ubuntu GCC OSB -O1 UBSAN
os: ubuntu-latest os: ubuntu-latest
compiler: gcc compiler: gcc
cxx-compiler: g++ cxx-compiler: g++
cmake-args: -DWITH_UNALIGNED=ON -DUNALIGNED64_OK=OFF -DWITH_SANITIZER=Undefined cmake-args: -DWITH_SANITIZER=Undefined
build-dir: ../build build-dir: ../build
build-src-dir: ../zlib-ng build-src-dir: ../zlib-ng
codecov: ubuntu_gcc_osb codecov: ubuntu_gcc_osb
cflags: -O1 -g3 cflags: -O1 -g3
- name: Ubuntu GCC -O3 No Unaligned - name: Ubuntu GCC -O3
os: ubuntu-latest os: ubuntu-latest
compiler: gcc compiler: gcc
cxx-compiler: g++ cxx-compiler: g++
cmake-args: -DWITH_UNALIGNED=OFF
codecov: ubuntu_gcc_o3 codecov: ubuntu_gcc_o3
cflags: -O3 cflags: -O3

View File

@ -87,7 +87,6 @@ option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF)
option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF) option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF) option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF) option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF)
option(WITH_UNALIGNED "Support unaligned reads on platforms that support it" ON)
set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols. set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols.
Useful when embedding into a larger library. Useful when embedding into a larger library.
@ -138,7 +137,6 @@ mark_as_advanced(FORCE
WITH_POWER8 WITH_POWER8
WITH_INFLATE_STRICT WITH_INFLATE_STRICT
WITH_INFLATE_ALLOW_INVALID_DIST WITH_INFLATE_ALLOW_INVALID_DIST
WITH_UNALIGNED
INSTALL_UTILS INSTALL_UTILS
) )
@ -249,46 +247,6 @@ else()
endif() endif()
endif() endif()
# Set architecture alignment requirements
if(WITH_UNALIGNED)
if((BASEARCH_ARM_FOUND AND NOT "${ARCH}" MATCHES "armv[2-7]") OR (BASEARCH_PPC_FOUND AND "${ARCH}" MATCHES "powerpc64le") OR BASEARCH_X86_FOUND)
if(NOT DEFINED UNALIGNED_OK)
set(UNALIGNED_OK TRUE)
endif()
endif()
if(UNALIGNED_OK)
add_definitions(-DUNALIGNED_OK)
message(STATUS "Architecture supports unaligned reads")
endif()
if(BASEARCH_ARM_FOUND)
if(NOT DEFINED UNALIGNED64_OK)
if("${ARCH}" MATCHES "armv[2-7]")
set(UNALIGNED64_OK FALSE)
elseif("${ARCH}" MATCHES "(arm(v[8-9])?|aarch64)")
set(UNALIGNED64_OK TRUE)
endif()
endif()
endif()
if(BASEARCH_PPC_FOUND)
if(NOT DEFINED UNALIGNED64_OK)
if("${ARCH}" MATCHES "powerpc64le")
set(UNALIGNED64_OK TRUE)
endif()
endif()
endif()
if(BASEARCH_X86_FOUND)
if(NOT DEFINED UNALIGNED64_OK)
set(UNALIGNED64_OK TRUE)
endif()
endif()
if(UNALIGNED64_OK)
add_definitions(-DUNALIGNED64_OK)
message(STATUS "Architecture supports unaligned reads of > 4 bytes")
endif()
else()
message(STATUS "Unaligned reads manually disabled")
endif()
# Apply warning compiler flags # Apply warning compiler flags
if(WITH_MAINTAINER_WARNINGS) if(WITH_MAINTAINER_WARNINGS)
add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE}) add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE})
@ -1459,8 +1417,6 @@ add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with p
add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting") add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting")
add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict inflate distance checking") add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict inflate distance checking")
add_feature_info(WITH_INFLATE_ALLOW_INVALID_DIST WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances") add_feature_info(WITH_INFLATE_ALLOW_INVALID_DIST WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances")
add_feature_info(WITH_UNALIGNED UNALIGNED_OK "Support unaligned reads on platforms that support it")
add_feature_info(WITH_UNALIGNED64 UNALIGNED64_OK "Support unaligned 64-bit reads on platforms that support it")
if(BASEARCH_ARM_FOUND) if(BASEARCH_ARM_FOUND)
add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE") add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE")

View File

@ -194,7 +194,6 @@ Advanced Build Options
| CMake | configure | Description | Default | | CMake | configure | Description | Default |
|:--------------------------------|:----------------------|:--------------------------------------------------------------------|------------------------| |:--------------------------------|:----------------------|:--------------------------------------------------------------------|------------------------|
| ZLIB_DUAL_LINK | | Dual link tests with system zlib | OFF | | ZLIB_DUAL_LINK | | Dual link tests with system zlib | OFF |
| UNALIGNED_OK | | Allow unaligned reads | ON (x86, arm) |
| FORCE_SSE2 | --force-sse2 | Skip runtime check for SSE2 instructions (Always on for x86_64) | OFF (x86) | | FORCE_SSE2 | --force-sse2 | Skip runtime check for SSE2 instructions (Always on for x86_64) | OFF (x86) |
| FORCE_TZCNT | --force-tzcnt | Skip runtime check for TZCNT instructions | OFF | | FORCE_TZCNT | --force-tzcnt | Skip runtime check for TZCNT instructions | OFF |
| WITH_AVX2 | | Build with AVX2 intrinsics | ON | | WITH_AVX2 | | Build with AVX2 intrinsics | ON |
@ -212,7 +211,6 @@ Advanced Build Options
| WITH_CRC32_VX | --without-crc32-vx | Build with vectorized CRC32 on IBM Z | ON | | WITH_CRC32_VX | --without-crc32-vx | Build with vectorized CRC32 on IBM Z | ON |
| WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Build with DFLTCC intrinsics for compression on IBM Z | OFF | | WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Build with DFLTCC intrinsics for compression on IBM Z | OFF |
| WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Build with DFLTCC intrinsics for decompression on IBM Z | OFF | | WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Build with DFLTCC intrinsics for decompression on IBM Z | OFF |
| WITH_UNALIGNED | | Allow optimizations that use unaligned reads if safe on current arch| ON |
| WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF | | WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF |
| WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF | | WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF |
| INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF | | INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF |

View File

@ -14,7 +14,7 @@
# include <nmmintrin.h> # include <nmmintrin.h>
#endif #endif
/* UNALIGNED_OK, AVX2 intrinsic comparison */ /* AVX2 unaligned intrinsic comparison */
static inline uint32_t compare256_unaligned_avx2_static(const uint8_t *src0, const uint8_t *src1) { static inline uint32_t compare256_unaligned_avx2_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0; uint32_t len = 0;

View File

@ -56,7 +56,7 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
#include "match_tpl.h" #include "match_tpl.h"
#ifdef UNALIGNED_OK #ifdef UNALIGNED_OK
/* UNALIGNED_OK, 16-bit integer comparison */ /* 16-bit unaligned integer comparison */
static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) { static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0; uint32_t len = 0;
@ -94,7 +94,7 @@ Z_INTERNAL uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *
#include "match_tpl.h" #include "match_tpl.h"
#ifdef HAVE_BUILTIN_CTZ #ifdef HAVE_BUILTIN_CTZ
/* UNALIGNED_OK, 32-bit integer comparison */ /* 32-bit unaligned integer comparison */
static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const uint8_t *src1) { static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0; uint32_t len = 0;

20
configure vendored
View File

@ -1481,9 +1481,6 @@ case "${ARCH}" in
i386 | i486 | i586 | i686 |x86_64) i386 | i486 | i586 | i686 |x86_64)
ARCHDIR=arch/x86 ARCHDIR=arch/x86
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
# Enable arch-specific optimizations # Enable arch-specific optimizations
if test $without_optimizations -eq 0; then if test $without_optimizations -eq 0; then
CFLAGS="${CFLAGS} -DX86_FEATURES" CFLAGS="${CFLAGS} -DX86_FEATURES"
@ -1704,9 +1701,6 @@ EOF
fi fi
;; ;;
armv6l | armv6hl) armv6l | armv6hl)
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"
if test $without_optimizations -eq 0; then if test $without_optimizations -eq 0; then
if test $buildacle -eq 1; then if test $buildacle -eq 1; then
echo ACLE support not available echo ACLE support not available
@ -1718,9 +1712,6 @@ EOF
fi fi
;; ;;
arm | armv7*) arm | armv7*)
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"
if test $without_optimizations -eq 0; then if test $without_optimizations -eq 0; then
if test $buildacle -eq 1; then if test $buildacle -eq 1; then
echo ACLE support not available echo ACLE support not available
@ -1745,9 +1736,6 @@ EOF
fi fi
;; ;;
armv8-a | armv8-a+simd) armv8-a | armv8-a+simd)
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
if test $without_optimizations -eq 0; then if test $without_optimizations -eq 0; then
if test $buildacle -eq 1; then if test $buildacle -eq 1; then
echo ACLE support not available echo ACLE support not available
@ -1772,9 +1760,6 @@ EOF
fi fi
;; ;;
armv8-a+crc | armv8-a+crc+simd | armv8.[1234]-a | armv8.[1234]-a+simd) armv8-a+crc | armv8-a+crc+simd | armv8.[1234]-a | armv8.[1234]-a+simd)
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
acleflag="-march=${ARCH}" acleflag="-march=${ARCH}"
if test $without_optimizations -eq 0; then if test $without_optimizations -eq 0; then
@ -1869,9 +1854,6 @@ EOF
neonflag="-march=${ARCH}" neonflag="-march=${ARCH}"
acleflag="-march=${ARCH}" acleflag="-march=${ARCH}"
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
;; ;;
powerpc*) powerpc*)
case "${ARCH}" in case "${ARCH}" in
@ -1883,8 +1865,6 @@ EOF
;; ;;
powerpc64le) powerpc64le)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64le [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64le
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
;; ;;
esac esac

View File

@ -30,8 +30,6 @@ WFLAGS = \
-D_CRT_NONSTDC_NO_DEPRECATE \ -D_CRT_NONSTDC_NO_DEPRECATE \
-DARM_NEON_HASLD4 \ -DARM_NEON_HASLD4 \
-DARM_FEATURES \ -DARM_FEATURES \
-DUNALIGNED_OK \
-DUNALIGNED64_OK \
# #
LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest
ARFLAGS = -nologo ARFLAGS = -nologo

View File

@ -30,7 +30,6 @@ WFLAGS = \
-D_CRT_NONSTDC_NO_DEPRECATE \ -D_CRT_NONSTDC_NO_DEPRECATE \
-DARM_FEATURES \ -DARM_FEATURES \
-DARM_NEON_HASLD4 \ -DARM_NEON_HASLD4 \
-DUNALIGNED_OK \
# #
LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest
ARFLAGS = -nologo ARFLAGS = -nologo

View File

@ -35,8 +35,6 @@ WFLAGS = \
-DX86_AVX2 \ -DX86_AVX2 \
-DX86_AVX_CHUNKSET \ -DX86_AVX_CHUNKSET \
-DX86_SSE2_CHUNKSET \ -DX86_SSE2_CHUNKSET \
-DUNALIGNED_OK \
-DUNALIGNED64_OK \
# #
LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest
ARFLAGS = -nologo ARFLAGS = -nologo

View File

@ -194,6 +194,28 @@
# define Tracecv(c, x) # define Tracecv(c, x)
#endif #endif
#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64)
# define UNALIGNED_OK
# define UNALIGNED64_OK
#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || \
defined(__i686__) || defined(_X86_) || defined(_M_IX86)
# define UNALIGNED_OK
#elif defined(__aarch64__) || defined(_M_ARM64)
# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
# define UNALIGNED_OK
# define UNALIGNED64_OK
# endif
#elif defined(__arm__) || (_M_ARM >= 7)
# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
# define UNALIGNED_OK
# endif
#elif defined(__powerpc64__) || defined(__ppc64__)
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define UNALIGNED_OK
# define UNALIGNED64_OK
# endif
#endif
/* Force compiler to emit unaligned memory accesses if unaligned access is supported /* Force compiler to emit unaligned memory accesses if unaligned access is supported
on the architecture, otherwise don't assume unaligned access is supported. Older on the architecture, otherwise don't assume unaligned access is supported. Older
compilers don't optimize memcpy and memcmp calls to unaligned access instructions compilers don't optimize memcpy and memcmp calls to unaligned access instructions