mirror of
https://github.com/GerbilSoft/zlib-ng.git
synced 2025-06-18 11:35:35 -04:00
Remove chunkmemset_3 and chunkmemset_6 on ARM/AArch64 as they need 3 chunks...
* Don't unroll distances smaller than chunk size.
This commit is contained in:
parent
6539b769e6
commit
6575fbffea
@ -1252,6 +1252,12 @@ if(ZLIB_ENABLE_TESTS)
|
||||
"-DDECOMPRESS_TARGET=${MINIGZIP_COMMAND}"
|
||||
-DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/data/lcet10.txt
|
||||
-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake)
|
||||
|
||||
add_test(NAME GH-751
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
"-DTARGET=${MINIGZIP_COMMAND}"
|
||||
-DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/GH-751/test.txt
|
||||
-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake)
|
||||
endif()
|
||||
|
||||
FEATURE_SUMMARY(WHAT ALL INCLUDE_QUIET_PACKAGES)
|
||||
|
@ -15,7 +15,6 @@ typedef uint8x16_t chunk_t;
|
||||
|
||||
#define HAVE_CHUNKMEMSET_1
|
||||
#define HAVE_CHUNKMEMSET_2
|
||||
#define HAVE_CHUNKMEMSET_3
|
||||
#define HAVE_CHUNKMEMSET_4
|
||||
#define HAVE_CHUNKMEMSET_8
|
||||
|
||||
@ -42,77 +41,6 @@ static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||
#define CHUNKMEMSET chunkmemset_neon
|
||||
#define CHUNKMEMSET_SAFE chunkmemset_safe_neon
|
||||
|
||||
uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len);
|
||||
uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len);
|
||||
|
||||
static inline uint8_t *chunkmemset_3(uint8_t *out, uint8_t *from, unsigned dist, unsigned len) {
|
||||
uint8x8x3_t chunks;
|
||||
unsigned sz = sizeof(chunks);
|
||||
if (len < sz) {
|
||||
out = CHUNKUNROLL(out, &dist, &len);
|
||||
return CHUNKCOPY(out, out - dist, len);
|
||||
}
|
||||
|
||||
/* Load 3 bytes 'a,b,c' from FROM and duplicate across all lanes:
|
||||
chunks[0] = {a,a,a,a,a,a,a,a}
|
||||
chunks[1] = {b,b,b,b,b,b,b,b}
|
||||
chunks[2] = {c,c,c,c,c,c,c,c}. */
|
||||
chunks = vld3_dup_u8(from);
|
||||
|
||||
unsigned rem = len % sz;
|
||||
len -= rem;
|
||||
while (len) {
|
||||
/* Store "a,b,c, ..., a,b,c". */
|
||||
vst3_u8(out, chunks);
|
||||
out += sz;
|
||||
len -= sz;
|
||||
}
|
||||
|
||||
if (!rem)
|
||||
return out;
|
||||
|
||||
/* Last, deal with the case when LEN is not a multiple of SZ. */
|
||||
out = CHUNKUNROLL(out, &dist, &rem);
|
||||
return CHUNKCOPY(out, out - dist, rem);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
|
||||
#define HAVE_CHUNKMEMSET_6
|
||||
|
||||
static inline uint8_t *chunkmemset_6(uint8_t *out, uint8_t *from, unsigned dist, unsigned len) {
|
||||
uint16x8x3_t chunks;
|
||||
unsigned sz = sizeof(chunks);
|
||||
if (len < sz) {
|
||||
out = CHUNKUNROLL(out, &dist, &len);
|
||||
return CHUNKCOPY(out, out - dist, len);
|
||||
}
|
||||
|
||||
/* Load 6 bytes 'ab,cd,ef' from FROM and duplicate across all lanes:
|
||||
chunks[0] = {ab,ab,ab,ab,ab,ab,ab,ab}
|
||||
chunks[1] = {cd,cd,cd,cd,cd,cd,cd,cd}
|
||||
chunks[2] = {ef,ef,ef,ef,ef,ef,ef,ef}. */
|
||||
chunks = vld3q_dup_u16((unsigned short *)from);
|
||||
|
||||
unsigned rem = len % sz;
|
||||
len -= rem;
|
||||
while (len) {
|
||||
/* Store "ab,cd,ef, ..., ab,cd,ef". */
|
||||
vst3q_u16((unsigned short *)out, chunks);
|
||||
out += sz;
|
||||
len -= sz;
|
||||
}
|
||||
|
||||
if (!rem)
|
||||
return out;
|
||||
|
||||
/* Last, deal with the case when LEN is not a multiple of SZ. */
|
||||
out = CHUNKUNROLL(out, &dist, &rem);
|
||||
return CHUNKCOPY(out, out - dist, rem);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||
*chunk = vld1q_u8(s);
|
||||
}
|
||||
|
@ -115,21 +115,11 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
|
||||
chunkmemset_2(from, &chunk);
|
||||
} else
|
||||
#endif
|
||||
#ifdef HAVE_CHUNKMEMSET_3
|
||||
if (dist == 3) {
|
||||
return chunkmemset_3(out, from, dist, len);
|
||||
} else
|
||||
#endif
|
||||
#ifdef HAVE_CHUNKMEMSET_4
|
||||
if (dist == 4) {
|
||||
chunkmemset_4(from, &chunk);
|
||||
} else
|
||||
#endif
|
||||
#ifdef HAVE_CHUNKMEMSET_6
|
||||
if (dist == 6) {
|
||||
return chunkmemset_6(out, from, dist, len);
|
||||
} else
|
||||
#endif
|
||||
#ifdef HAVE_CHUNKMEMSET_8
|
||||
if (dist == 8) {
|
||||
chunkmemset_8(from, &chunk);
|
||||
@ -137,6 +127,16 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
|
||||
#endif
|
||||
if (dist == sz) {
|
||||
loadchunk(from, &chunk);
|
||||
} else if (dist < sz) {
|
||||
unsigned char *end = out + len - 1;
|
||||
while (len > dist) {
|
||||
out = CHUNKCOPY_SAFE(out, from, dist, end);
|
||||
len -= dist;
|
||||
}
|
||||
if (len > 0) {
|
||||
out = CHUNKCOPY_SAFE(out, from, len, end);
|
||||
}
|
||||
return out;
|
||||
} else {
|
||||
out = CHUNKUNROLL(out, &dist, &len);
|
||||
return CHUNKCOPY(out, out - dist, len);
|
||||
|
1
test/GH-751/test.txt
Normal file
1
test/GH-751/test.txt
Normal file
File diff suppressed because one or more lines are too long
@ -91,7 +91,7 @@ CVE-2003-0107$(EXE): CVE-2003-0107.o
|
||||
$(CC) $(CFLAGS) -o $@ CVE-2003-0107.o $(TEST_LDFLAGS)
|
||||
|
||||
.PHONY: ghtests
|
||||
ghtests: testGH-361 testGH-364
|
||||
ghtests: testGH-361 testGH-364 testGH-751
|
||||
|
||||
.PHONY: testGH-361
|
||||
testGH-361:
|
||||
@ -104,6 +104,10 @@ switchlevels$(EXE): $(SRCDIR)/switchlevels.c
|
||||
testGH-364: switchlevels$(EXE)
|
||||
$(QEMU_RUN) ./switchlevels$(EXE) 1 5 9 3 <$(SRCDIR)/GH-364/test.bin >/dev/null
|
||||
|
||||
.PHONY: testGH-751
|
||||
testGH-751:
|
||||
$(QEMU_RUN) ../minigzip$(EXE) <$(SRCDIR)/GH-751/test.txt | $(QEMU_RUN) ../minigzip$(EXE) -d >/dev/null
|
||||
|
||||
clean:
|
||||
rm -f *.o *.gcda *.gcno *.gcov
|
||||
rm -f CVE-2003-0107$(EXE) switchlevels$(EXE)
|
||||
|
Loading…
Reference in New Issue
Block a user