mirror of
https://github.com/GerbilSoft/zlib-ng.git
synced 2025-06-19 03:55:39 -04:00
Remove the "avx512_well_suited" cpu flag
Now that we have confirmation that the AVX512 variants so far have been universally better on every capable CPU we've tested them on, there's no sense in trying to maintain a whitelist.
This commit is contained in:
parent
8437a02b93
commit
429bc4f5d5
@ -28,7 +28,6 @@ Z_INTERNAL int x86_cpu_has_sse42;
|
|||||||
Z_INTERNAL int x86_cpu_has_pclmulqdq;
|
Z_INTERNAL int x86_cpu_has_pclmulqdq;
|
||||||
Z_INTERNAL int x86_cpu_has_vpclmulqdq;
|
Z_INTERNAL int x86_cpu_has_vpclmulqdq;
|
||||||
Z_INTERNAL int x86_cpu_has_tzcnt;
|
Z_INTERNAL int x86_cpu_has_tzcnt;
|
||||||
Z_INTERNAL int x86_cpu_well_suited_avx512;
|
|
||||||
|
|
||||||
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
|
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
@ -61,32 +60,14 @@ static void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, unsigne
|
|||||||
void Z_INTERNAL x86_check_features(void) {
|
void Z_INTERNAL x86_check_features(void) {
|
||||||
unsigned eax, ebx, ecx, edx;
|
unsigned eax, ebx, ecx, edx;
|
||||||
unsigned maxbasic;
|
unsigned maxbasic;
|
||||||
unsigned family, model, extended_model;
|
|
||||||
int intel_cpu;
|
|
||||||
char cpu_identity[13];
|
|
||||||
|
|
||||||
cpuid(0, &maxbasic, &ebx, &ecx, &edx);
|
cpuid(0, &maxbasic, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
/* NULL terminate the string */
|
|
||||||
memset(cpu_identity, 0, 13);
|
|
||||||
memcpy(cpu_identity, (char*)&ebx, sizeof(int));
|
|
||||||
memcpy(cpu_identity + 4, (char*)&edx, sizeof(int));
|
|
||||||
memcpy(cpu_identity + 8, (char*)&ecx, sizeof(int));
|
|
||||||
|
|
||||||
intel_cpu = strncmp(cpu_identity, "GenuineIntel", 12) == 0;
|
|
||||||
|
|
||||||
cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx);
|
|
||||||
|
|
||||||
x86_cpu_has_sse2 = edx & 0x4000000;
|
x86_cpu_has_sse2 = edx & 0x4000000;
|
||||||
x86_cpu_has_ssse3 = ecx & 0x200;
|
x86_cpu_has_ssse3 = ecx & 0x200;
|
||||||
x86_cpu_has_sse41 = ecx & 0x80000;
|
x86_cpu_has_sse41 = ecx & 0x80000;
|
||||||
x86_cpu_has_sse42 = ecx & 0x100000;
|
x86_cpu_has_sse42 = ecx & 0x100000;
|
||||||
x86_cpu_has_pclmulqdq = ecx & 0x2;
|
x86_cpu_has_pclmulqdq = ecx & 0x2;
|
||||||
x86_cpu_well_suited_avx512 = 0;
|
|
||||||
|
|
||||||
model = (eax & 0xf0) >> 4;
|
|
||||||
family = (eax & 0xf00) >> 8;
|
|
||||||
extended_model = (eax & 0xf0000) >> 16;
|
|
||||||
|
|
||||||
if (maxbasic >= 7) {
|
if (maxbasic >= 7) {
|
||||||
cpuidex(7, 0, &eax, &ebx, &ecx, &edx);
|
cpuidex(7, 0, &eax, &ebx, &ecx, &edx);
|
||||||
@ -104,30 +85,4 @@ void Z_INTERNAL x86_check_features(void) {
|
|||||||
x86_cpu_has_avx2 = 0;
|
x86_cpu_has_avx2 = 0;
|
||||||
x86_cpu_has_vpclmulqdq = 0;
|
x86_cpu_has_vpclmulqdq = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (intel_cpu) {
|
|
||||||
/* All of the Knights Landing and Knights Ferry _likely_ benefit
|
|
||||||
* from the AVX512 adler checksum implementation */
|
|
||||||
if (family == 0xb) {
|
|
||||||
x86_cpu_well_suited_avx512 = 1;
|
|
||||||
} else if (family == 0x6) {
|
|
||||||
if (model == 0x5 && extended_model == 0x5) {
|
|
||||||
/* Experimentally, on skylake-x and cascadelake-x, it has been
|
|
||||||
* unwaiveringly faster to use avx512 and avx512 vnni */
|
|
||||||
x86_cpu_well_suited_avx512 = 1;
|
|
||||||
} else if (model == 0xa && extended_model == 0x6) {
|
|
||||||
/* Icelake server */
|
|
||||||
x86_cpu_well_suited_avx512 = 1;
|
|
||||||
} else if (model == 0xf && extended_model == 0x8) {
|
|
||||||
/* Saphire rapids */
|
|
||||||
x86_cpu_well_suited_avx512 = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Still need to check whether Rocket Lake and/or AlderLake
|
|
||||||
* benefit from the AVX512VNNI accelerated adler32 implementations.
|
|
||||||
* For now this working list is probably safe */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,6 @@ extern int x86_cpu_has_sse42;
|
|||||||
extern int x86_cpu_has_pclmulqdq;
|
extern int x86_cpu_has_pclmulqdq;
|
||||||
extern int x86_cpu_has_vpclmulqdq;
|
extern int x86_cpu_has_vpclmulqdq;
|
||||||
extern int x86_cpu_has_tzcnt;
|
extern int x86_cpu_has_tzcnt;
|
||||||
extern int x86_cpu_well_suited_avx512;
|
|
||||||
|
|
||||||
void Z_INTERNAL x86_check_features(void);
|
void Z_INTERNAL x86_check_features(void);
|
||||||
|
|
||||||
|
@ -42,8 +42,7 @@ static __forceinline unsigned long long __builtin_ctzll(uint64_t value) {
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Unfortunately GCC _and_ clang didn't support these things until version
|
/* Unfortunately GCC didn't support these things until version 10 */
|
||||||
* 10 and 12, respectively */
|
|
||||||
#ifdef __AVX2__
|
#ifdef __AVX2__
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
@ -61,7 +60,7 @@ static inline __m512i _mm512_zextsi128_si512(__m128i a) {
|
|||||||
return _mm512_castsi128_si512(r);
|
return _mm512_castsi128_si512(r);
|
||||||
}
|
}
|
||||||
#endif // __AVX512F__
|
#endif // __AVX512F__
|
||||||
#endif // __AVX2__
|
#endif // gcc version 10 test
|
||||||
|
|
||||||
#endif // clang/gcc test
|
#endif // __AVX2__
|
||||||
#endif // include guard FALLBACK_BUILTINS_H
|
#endif // include guard FALLBACK_BUILTINS_H
|
||||||
|
@ -120,11 +120,11 @@ Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_
|
|||||||
functable.adler32 = &adler32_avx2;
|
functable.adler32 = &adler32_avx2;
|
||||||
#endif
|
#endif
|
||||||
#ifdef X86_AVX512_ADLER32
|
#ifdef X86_AVX512_ADLER32
|
||||||
if (x86_cpu_has_avx512 && x86_cpu_well_suited_avx512)
|
if (x86_cpu_has_avx512)
|
||||||
functable.adler32 = &adler32_avx512;
|
functable.adler32 = &adler32_avx512;
|
||||||
#endif
|
#endif
|
||||||
#ifdef X86_AVX512VNNI_ADLER32
|
#ifdef X86_AVX512VNNI_ADLER32
|
||||||
if (x86_cpu_has_avx512vnni && x86_cpu_well_suited_avx512) {
|
if (x86_cpu_has_avx512vnni) {
|
||||||
functable.adler32 = &adler32_avx512_vnni;
|
functable.adler32 = &adler32_avx512_vnni;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user