diff --git a/adler32.c b/adler32.c index feff67bc..af410477 100644 --- a/adler32.c +++ b/adler32.c @@ -13,9 +13,6 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len); static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len2); -#define NMAX 5552 -/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ - #define DO1(buf, i) {adler += (buf)[i]; sum2 += adler;} #define DO2(buf, i) DO1(buf, i); DO1(buf, i+1); #define DO4(buf, i) DO2(buf, i); DO2(buf, i+2); diff --git a/adler32_p.h b/adler32_p.h index 131513a8..67665304 100644 --- a/adler32_p.h +++ b/adler32_p.h @@ -9,6 +9,8 @@ #define ADLER32_P_H #define BASE 65521U /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ /* use NO_DIVIDE if your processor does not do division in hardware -- try it both ways to see which is faster */ diff --git a/arch/arm/adler32_neon.c b/arch/arm/adler32_neon.c index f8573ec5..3e0ee4dd 100644 --- a/arch/arm/adler32_neon.c +++ b/arch/arm/adler32_neon.c @@ -72,7 +72,6 @@ static void NEON_accum32(uint32_t *s, const unsigned char *buf, size_t len) { } static void NEON_handle_tail(uint32_t *pair, const unsigned char *buf, size_t len) { - /* Oldie K&R code integration. */ unsigned int i; for (i = 0; i < len; ++i) { pair[0] += buf[i]; @@ -97,15 +96,9 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) { if (len < 16) return adler32_len_16(adler, buf, len, sum2); - /* The largest prime smaller than 65536. */ - const uint32_t M_BASE = 65521; - /* This is the threshold where doing accumulation may overflow. */ - const int M_NMAX = 5552; - uint32_t pair[2]; - int n = M_NMAX; + int n = NMAX; unsigned int done = 0; - /* Oldie K&R code integration. */ unsigned int i; /* Split Adler-32 into component sums, it can be supplied by @@ -122,8 +115,8 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) { break; NEON_accum32(pair, buf + i, n / 16); - pair[0] %= M_BASE; - pair[1] %= M_BASE; + pair[0] %= BASE; + pair[1] %= BASE; done += (n / 16) * 16; } @@ -131,8 +124,8 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) { /* Handle the tail elements. */ if (done < len) { NEON_handle_tail(pair, (buf + done), len - done); - pair[0] %= M_BASE; - pair[1] %= M_BASE; + pair[0] %= BASE; + pair[1] %= BASE; } /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */