mirror of
https://github.com/GerbilSoft/zlib-ng.git
synced 2025-06-18 19:45:37 -04:00
Remove chunkmemset_3 and chunkmemset_6 on ARM/AArch64 as they need 3 chunks...
* Don't unroll distances smaller than chunk size.
This commit is contained in:
parent
6539b769e6
commit
6575fbffea
@ -1252,6 +1252,12 @@ if(ZLIB_ENABLE_TESTS)
|
|||||||
"-DDECOMPRESS_TARGET=${MINIGZIP_COMMAND}"
|
"-DDECOMPRESS_TARGET=${MINIGZIP_COMMAND}"
|
||||||
-DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/data/lcet10.txt
|
-DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/data/lcet10.txt
|
||||||
-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake)
|
-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake)
|
||||||
|
|
||||||
|
add_test(NAME GH-751
|
||||||
|
COMMAND ${CMAKE_COMMAND}
|
||||||
|
"-DTARGET=${MINIGZIP_COMMAND}"
|
||||||
|
-DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/GH-751/test.txt
|
||||||
|
-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
FEATURE_SUMMARY(WHAT ALL INCLUDE_QUIET_PACKAGES)
|
FEATURE_SUMMARY(WHAT ALL INCLUDE_QUIET_PACKAGES)
|
||||||
|
@ -15,7 +15,6 @@ typedef uint8x16_t chunk_t;
|
|||||||
|
|
||||||
#define HAVE_CHUNKMEMSET_1
|
#define HAVE_CHUNKMEMSET_1
|
||||||
#define HAVE_CHUNKMEMSET_2
|
#define HAVE_CHUNKMEMSET_2
|
||||||
#define HAVE_CHUNKMEMSET_3
|
|
||||||
#define HAVE_CHUNKMEMSET_4
|
#define HAVE_CHUNKMEMSET_4
|
||||||
#define HAVE_CHUNKMEMSET_8
|
#define HAVE_CHUNKMEMSET_8
|
||||||
|
|
||||||
@ -42,77 +41,6 @@ static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
|||||||
#define CHUNKMEMSET chunkmemset_neon
|
#define CHUNKMEMSET chunkmemset_neon
|
||||||
#define CHUNKMEMSET_SAFE chunkmemset_safe_neon
|
#define CHUNKMEMSET_SAFE chunkmemset_safe_neon
|
||||||
|
|
||||||
uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len);
|
|
||||||
uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len);
|
|
||||||
|
|
||||||
static inline uint8_t *chunkmemset_3(uint8_t *out, uint8_t *from, unsigned dist, unsigned len) {
|
|
||||||
uint8x8x3_t chunks;
|
|
||||||
unsigned sz = sizeof(chunks);
|
|
||||||
if (len < sz) {
|
|
||||||
out = CHUNKUNROLL(out, &dist, &len);
|
|
||||||
return CHUNKCOPY(out, out - dist, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Load 3 bytes 'a,b,c' from FROM and duplicate across all lanes:
|
|
||||||
chunks[0] = {a,a,a,a,a,a,a,a}
|
|
||||||
chunks[1] = {b,b,b,b,b,b,b,b}
|
|
||||||
chunks[2] = {c,c,c,c,c,c,c,c}. */
|
|
||||||
chunks = vld3_dup_u8(from);
|
|
||||||
|
|
||||||
unsigned rem = len % sz;
|
|
||||||
len -= rem;
|
|
||||||
while (len) {
|
|
||||||
/* Store "a,b,c, ..., a,b,c". */
|
|
||||||
vst3_u8(out, chunks);
|
|
||||||
out += sz;
|
|
||||||
len -= sz;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!rem)
|
|
||||||
return out;
|
|
||||||
|
|
||||||
/* Last, deal with the case when LEN is not a multiple of SZ. */
|
|
||||||
out = CHUNKUNROLL(out, &dist, &rem);
|
|
||||||
return CHUNKCOPY(out, out - dist, rem);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
|
||||||
|
|
||||||
#define HAVE_CHUNKMEMSET_6
|
|
||||||
|
|
||||||
static inline uint8_t *chunkmemset_6(uint8_t *out, uint8_t *from, unsigned dist, unsigned len) {
|
|
||||||
uint16x8x3_t chunks;
|
|
||||||
unsigned sz = sizeof(chunks);
|
|
||||||
if (len < sz) {
|
|
||||||
out = CHUNKUNROLL(out, &dist, &len);
|
|
||||||
return CHUNKCOPY(out, out - dist, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Load 6 bytes 'ab,cd,ef' from FROM and duplicate across all lanes:
|
|
||||||
chunks[0] = {ab,ab,ab,ab,ab,ab,ab,ab}
|
|
||||||
chunks[1] = {cd,cd,cd,cd,cd,cd,cd,cd}
|
|
||||||
chunks[2] = {ef,ef,ef,ef,ef,ef,ef,ef}. */
|
|
||||||
chunks = vld3q_dup_u16((unsigned short *)from);
|
|
||||||
|
|
||||||
unsigned rem = len % sz;
|
|
||||||
len -= rem;
|
|
||||||
while (len) {
|
|
||||||
/* Store "ab,cd,ef, ..., ab,cd,ef". */
|
|
||||||
vst3q_u16((unsigned short *)out, chunks);
|
|
||||||
out += sz;
|
|
||||||
len -= sz;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!rem)
|
|
||||||
return out;
|
|
||||||
|
|
||||||
/* Last, deal with the case when LEN is not a multiple of SZ. */
|
|
||||||
out = CHUNKUNROLL(out, &dist, &rem);
|
|
||||||
return CHUNKCOPY(out, out - dist, rem);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||||
*chunk = vld1q_u8(s);
|
*chunk = vld1q_u8(s);
|
||||||
}
|
}
|
||||||
|
@ -115,21 +115,11 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
|
|||||||
chunkmemset_2(from, &chunk);
|
chunkmemset_2(from, &chunk);
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAVE_CHUNKMEMSET_3
|
|
||||||
if (dist == 3) {
|
|
||||||
return chunkmemset_3(out, from, dist, len);
|
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
#ifdef HAVE_CHUNKMEMSET_4
|
#ifdef HAVE_CHUNKMEMSET_4
|
||||||
if (dist == 4) {
|
if (dist == 4) {
|
||||||
chunkmemset_4(from, &chunk);
|
chunkmemset_4(from, &chunk);
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAVE_CHUNKMEMSET_6
|
|
||||||
if (dist == 6) {
|
|
||||||
return chunkmemset_6(out, from, dist, len);
|
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
#ifdef HAVE_CHUNKMEMSET_8
|
#ifdef HAVE_CHUNKMEMSET_8
|
||||||
if (dist == 8) {
|
if (dist == 8) {
|
||||||
chunkmemset_8(from, &chunk);
|
chunkmemset_8(from, &chunk);
|
||||||
@ -137,6 +127,16 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
|
|||||||
#endif
|
#endif
|
||||||
if (dist == sz) {
|
if (dist == sz) {
|
||||||
loadchunk(from, &chunk);
|
loadchunk(from, &chunk);
|
||||||
|
} else if (dist < sz) {
|
||||||
|
unsigned char *end = out + len - 1;
|
||||||
|
while (len > dist) {
|
||||||
|
out = CHUNKCOPY_SAFE(out, from, dist, end);
|
||||||
|
len -= dist;
|
||||||
|
}
|
||||||
|
if (len > 0) {
|
||||||
|
out = CHUNKCOPY_SAFE(out, from, len, end);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
} else {
|
} else {
|
||||||
out = CHUNKUNROLL(out, &dist, &len);
|
out = CHUNKUNROLL(out, &dist, &len);
|
||||||
return CHUNKCOPY(out, out - dist, len);
|
return CHUNKCOPY(out, out - dist, len);
|
||||||
|
1
test/GH-751/test.txt
Normal file
1
test/GH-751/test.txt
Normal file
File diff suppressed because one or more lines are too long
@ -91,7 +91,7 @@ CVE-2003-0107$(EXE): CVE-2003-0107.o
|
|||||||
$(CC) $(CFLAGS) -o $@ CVE-2003-0107.o $(TEST_LDFLAGS)
|
$(CC) $(CFLAGS) -o $@ CVE-2003-0107.o $(TEST_LDFLAGS)
|
||||||
|
|
||||||
.PHONY: ghtests
|
.PHONY: ghtests
|
||||||
ghtests: testGH-361 testGH-364
|
ghtests: testGH-361 testGH-364 testGH-751
|
||||||
|
|
||||||
.PHONY: testGH-361
|
.PHONY: testGH-361
|
||||||
testGH-361:
|
testGH-361:
|
||||||
@ -104,6 +104,10 @@ switchlevels$(EXE): $(SRCDIR)/switchlevels.c
|
|||||||
testGH-364: switchlevels$(EXE)
|
testGH-364: switchlevels$(EXE)
|
||||||
$(QEMU_RUN) ./switchlevels$(EXE) 1 5 9 3 <$(SRCDIR)/GH-364/test.bin >/dev/null
|
$(QEMU_RUN) ./switchlevels$(EXE) 1 5 9 3 <$(SRCDIR)/GH-364/test.bin >/dev/null
|
||||||
|
|
||||||
|
.PHONY: testGH-751
|
||||||
|
testGH-751:
|
||||||
|
$(QEMU_RUN) ../minigzip$(EXE) <$(SRCDIR)/GH-751/test.txt | $(QEMU_RUN) ../minigzip$(EXE) -d >/dev/null
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o *.gcda *.gcno *.gcov
|
rm -f *.o *.gcda *.gcno *.gcov
|
||||||
rm -f CVE-2003-0107$(EXE) switchlevels$(EXE)
|
rm -f CVE-2003-0107$(EXE) switchlevels$(EXE)
|
||||||
|
Loading…
Reference in New Issue
Block a user