Remove chunkmemset_3 and chunkmemset_6 on ARM/AArch64 as they need 3 chunks...

* Don't unroll distances smaller than chunk size.
This commit is contained in:
Mika Lindqvist 2020-09-14 18:40:35 +03:00 committed by Hans Kristian Rosbach
parent 6539b769e6
commit 6575fbffea
5 changed files with 22 additions and 83 deletions

View File

@ -1252,6 +1252,12 @@ if(ZLIB_ENABLE_TESTS)
"-DDECOMPRESS_TARGET=${MINIGZIP_COMMAND}"
-DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/data/lcet10.txt
-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake)
add_test(NAME GH-751
COMMAND ${CMAKE_COMMAND}
"-DTARGET=${MINIGZIP_COMMAND}"
-DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/GH-751/test.txt
-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake)
endif()
FEATURE_SUMMARY(WHAT ALL INCLUDE_QUIET_PACKAGES)

View File

@ -15,7 +15,6 @@ typedef uint8x16_t chunk_t;
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_3
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
@ -42,77 +41,6 @@ static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
#define CHUNKMEMSET chunkmemset_neon
#define CHUNKMEMSET_SAFE chunkmemset_safe_neon
uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len);
uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len);
static inline uint8_t *chunkmemset_3(uint8_t *out, uint8_t *from, unsigned dist, unsigned len) {
uint8x8x3_t chunks;
unsigned sz = sizeof(chunks);
if (len < sz) {
out = CHUNKUNROLL(out, &dist, &len);
return CHUNKCOPY(out, out - dist, len);
}
/* Load 3 bytes 'a,b,c' from FROM and duplicate across all lanes:
chunks[0] = {a,a,a,a,a,a,a,a}
chunks[1] = {b,b,b,b,b,b,b,b}
chunks[2] = {c,c,c,c,c,c,c,c}. */
chunks = vld3_dup_u8(from);
unsigned rem = len % sz;
len -= rem;
while (len) {
/* Store "a,b,c, ..., a,b,c". */
vst3_u8(out, chunks);
out += sz;
len -= sz;
}
if (!rem)
return out;
/* Last, deal with the case when LEN is not a multiple of SZ. */
out = CHUNKUNROLL(out, &dist, &rem);
return CHUNKCOPY(out, out - dist, rem);
}
#if defined(__aarch64__) || defined(_M_ARM64)
#define HAVE_CHUNKMEMSET_6
static inline uint8_t *chunkmemset_6(uint8_t *out, uint8_t *from, unsigned dist, unsigned len) {
uint16x8x3_t chunks;
unsigned sz = sizeof(chunks);
if (len < sz) {
out = CHUNKUNROLL(out, &dist, &len);
return CHUNKCOPY(out, out - dist, len);
}
/* Load 6 bytes 'ab,cd,ef' from FROM and duplicate across all lanes:
chunks[0] = {ab,ab,ab,ab,ab,ab,ab,ab}
chunks[1] = {cd,cd,cd,cd,cd,cd,cd,cd}
chunks[2] = {ef,ef,ef,ef,ef,ef,ef,ef}. */
chunks = vld3q_dup_u16((unsigned short *)from);
unsigned rem = len % sz;
len -= rem;
while (len) {
/* Store "ab,cd,ef, ..., ab,cd,ef". */
vst3q_u16((unsigned short *)out, chunks);
out += sz;
len -= sz;
}
if (!rem)
return out;
/* Last, deal with the case when LEN is not a multiple of SZ. */
out = CHUNKUNROLL(out, &dist, &rem);
return CHUNKCOPY(out, out - dist, rem);
}
#endif
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
*chunk = vld1q_u8(s);
}

View File

@ -115,21 +115,11 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
chunkmemset_2(from, &chunk);
} else
#endif
#ifdef HAVE_CHUNKMEMSET_3
if (dist == 3) {
return chunkmemset_3(out, from, dist, len);
} else
#endif
#ifdef HAVE_CHUNKMEMSET_4
if (dist == 4) {
chunkmemset_4(from, &chunk);
} else
#endif
#ifdef HAVE_CHUNKMEMSET_6
if (dist == 6) {
return chunkmemset_6(out, from, dist, len);
} else
#endif
#ifdef HAVE_CHUNKMEMSET_8
if (dist == 8) {
chunkmemset_8(from, &chunk);
@ -137,6 +127,16 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
#endif
if (dist == sz) {
loadchunk(from, &chunk);
} else if (dist < sz) {
unsigned char *end = out + len - 1;
while (len > dist) {
out = CHUNKCOPY_SAFE(out, from, dist, end);
len -= dist;
}
if (len > 0) {
out = CHUNKCOPY_SAFE(out, from, len, end);
}
return out;
} else {
out = CHUNKUNROLL(out, &dist, &len);
return CHUNKCOPY(out, out - dist, len);

1
test/GH-751/test.txt Normal file

File diff suppressed because one or more lines are too long

View File

@ -91,7 +91,7 @@ CVE-2003-0107$(EXE): CVE-2003-0107.o
$(CC) $(CFLAGS) -o $@ CVE-2003-0107.o $(TEST_LDFLAGS)
.PHONY: ghtests
ghtests: testGH-361 testGH-364
ghtests: testGH-361 testGH-364 testGH-751
.PHONY: testGH-361
testGH-361:
@ -104,6 +104,10 @@ switchlevels$(EXE): $(SRCDIR)/switchlevels.c
testGH-364: switchlevels$(EXE)
$(QEMU_RUN) ./switchlevels$(EXE) 1 5 9 3 <$(SRCDIR)/GH-364/test.bin >/dev/null
.PHONY: testGH-751
testGH-751:
$(QEMU_RUN) ../minigzip$(EXE) <$(SRCDIR)/GH-751/test.txt | $(QEMU_RUN) ../minigzip$(EXE) -d >/dev/null
clean:
rm -f *.o *.gcda *.gcno *.gcov
rm -f CVE-2003-0107$(EXE) switchlevels$(EXE)