Remove the "avx512_well_suited" cpu flag

Now that we have confirmation that the AVX512 variants so far have been
universally better on every capable CPU we've tested them on, there's no
sense in trying to maintain a whitelist.
This commit is contained in:
Adam Stylinski 2022-01-18 09:47:45 -05:00 committed by Hans Kristian Rosbach
parent 8437a02b93
commit 429bc4f5d5
5 changed files with 7 additions and 54 deletions

View File

@ -28,7 +28,6 @@ Z_INTERNAL int x86_cpu_has_sse42;
Z_INTERNAL int x86_cpu_has_pclmulqdq;
Z_INTERNAL int x86_cpu_has_vpclmulqdq;
Z_INTERNAL int x86_cpu_has_tzcnt;
Z_INTERNAL int x86_cpu_well_suited_avx512;
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
#ifdef _MSC_VER
@ -61,32 +60,14 @@ static void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, unsigne
void Z_INTERNAL x86_check_features(void) {
unsigned eax, ebx, ecx, edx;
unsigned maxbasic;
unsigned family, model, extended_model;
int intel_cpu;
char cpu_identity[13];
cpuid(0, &maxbasic, &ebx, &ecx, &edx);
/* NULL terminate the string */
memset(cpu_identity, 0, 13);
memcpy(cpu_identity, (char*)&ebx, sizeof(int));
memcpy(cpu_identity + 4, (char*)&edx, sizeof(int));
memcpy(cpu_identity + 8, (char*)&ecx, sizeof(int));
intel_cpu = strncmp(cpu_identity, "GenuineIntel", 12) == 0;
cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx);
x86_cpu_has_sse2 = edx & 0x4000000;
x86_cpu_has_ssse3 = ecx & 0x200;
x86_cpu_has_sse41 = ecx & 0x80000;
x86_cpu_has_sse42 = ecx & 0x100000;
x86_cpu_has_pclmulqdq = ecx & 0x2;
x86_cpu_well_suited_avx512 = 0;
model = (eax & 0xf0) >> 4;
family = (eax & 0xf00) >> 8;
extended_model = (eax & 0xf0000) >> 16;
if (maxbasic >= 7) {
cpuidex(7, 0, &eax, &ebx, &ecx, &edx);
@ -104,30 +85,4 @@ void Z_INTERNAL x86_check_features(void) {
x86_cpu_has_avx2 = 0;
x86_cpu_has_vpclmulqdq = 0;
}
if (intel_cpu) {
/* All of the Knights Landing and Knights Ferry _likely_ benefit
* from the AVX512 adler checksum implementation */
if (family == 0xb) {
x86_cpu_well_suited_avx512 = 1;
} else if (family == 0x6) {
if (model == 0x5 && extended_model == 0x5) {
/* Experimentally, on skylake-x and cascadelake-x, it has been
* unwaiveringly faster to use avx512 and avx512 vnni */
x86_cpu_well_suited_avx512 = 1;
} else if (model == 0xa && extended_model == 0x6) {
/* Icelake server */
x86_cpu_well_suited_avx512 = 1;
} else if (model == 0xf && extended_model == 0x8) {
/* Saphire rapids */
x86_cpu_well_suited_avx512 = 1;
}
/* Still need to check whether Rocket Lake and/or AlderLake
* benefit from the AVX512VNNI accelerated adler32 implementations.
* For now this working list is probably safe */
}
}
}

View File

@ -16,7 +16,6 @@ extern int x86_cpu_has_sse42;
extern int x86_cpu_has_pclmulqdq;
extern int x86_cpu_has_vpclmulqdq;
extern int x86_cpu_has_tzcnt;
extern int x86_cpu_well_suited_avx512;
void Z_INTERNAL x86_check_features(void);

View File

@ -42,8 +42,7 @@ static __forceinline unsigned long long __builtin_ctzll(uint64_t value) {
#endif
#endif
/* Unfortunately GCC _and_ clang didn't support these things until version
* 10 and 12, respectively */
/* Unfortunately GCC didn't support these things until version 10 */
#ifdef __AVX2__
#include <immintrin.h>
@ -61,7 +60,7 @@ static inline __m512i _mm512_zextsi128_si512(__m128i a) {
return _mm512_castsi128_si512(r);
}
#endif // __AVX512F__
#endif // __AVX2__
#endif // gcc version 10 test
#endif // clang/gcc test
#endif // __AVX2__
#endif // include guard FALLBACK_BUILTINS_H

View File

@ -120,11 +120,11 @@ Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_
functable.adler32 = &adler32_avx2;
#endif
#ifdef X86_AVX512_ADLER32
if (x86_cpu_has_avx512 && x86_cpu_well_suited_avx512)
if (x86_cpu_has_avx512)
functable.adler32 = &adler32_avx512;
#endif
#ifdef X86_AVX512VNNI_ADLER32
if (x86_cpu_has_avx512vnni && x86_cpu_well_suited_avx512) {
if (x86_cpu_has_avx512vnni) {
functable.adler32 = &adler32_avx512_vnni;
}
#endif