From 0829baf902dbf5982732aa54454178f55b50bdc6 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 17 Jul 2015 14:25:37 -0700 Subject: Expand reporting of the Intel instruction set extensions Detection for most of them is free because we're loading the entire registers anyway. The only exception is AVX512VBMI, which is in a new register we hadn't yet read from. I've also added the new GCC names so they can be used with QT_FUNCTION_TARGET. The only two exceptions are "movbe" and "popcnt", which are extremely restricted in use and we are not likely to have code dedicated to using them. Change-Id: Ib306f8f647014b399b87ffff13f1d8fd29e58be0 Reviewed-by: Oswald Buddenhagen Reviewed-by: Thiago Macieira --- src/corelib/tools/qsimd.cpp | 75 ++++++++++++++++++++++++++++++------ src/corelib/tools/qsimd_p.h | 93 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 155 insertions(+), 13 deletions(-) (limited to 'src/corelib/tools') diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index 825028037f..d0c65a04b1 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -205,21 +205,24 @@ static void cpuidFeatures01(uint &ecx, uint &edx) inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} #endif -static void cpuidFeatures07_00(uint &ebx) +static void cpuidFeatures07_00(uint &ebx, uint &ecx) { #if defined(Q_CC_GNU) qregisteruint rbx; // in case it's 64-bit + qregisteruint rcx = 0; asm ("xchg " PICreg", %0\n" "cpuid\n" "xchg " PICreg", %0\n" - : "=&r" (rbx) - : "a" (7), "c" (0) + : "=&r" (rbx), "+&c" (rcx) + : "a" (7) : "%edx"); ebx = rbx; + ecx = rcx; #elif defined(Q_OS_WIN) int info[4]; __cpuidex(info, 7, 0); ebx = info[1]; + ecx = info[2]; #endif } @@ -257,7 +260,12 @@ static quint64 detectProcessorFeatures() AVXState = XMM0_15 | YMM0_15Hi128, AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31 }; - static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2); + static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) | + (Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) | + (Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) | + (Q_UINT64_C(1) << CpuFeatureAVX512VL) | + (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI); + static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512; static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2; quint64 features = 0; @@ -295,8 +303,9 @@ static quint64 detectProcessorFeatures() } uint cpuid0700EBX = 0; + uint cpuid0700ECX = 0; if (cpuidLevel >= 7) { - cpuidFeatures07_00(cpuid0700EBX); + cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX); // the high 32-bits of features is cpuid0700EBX features |= quint64(cpuid0700EBX) << 32; @@ -305,6 +314,15 @@ static quint64 detectProcessorFeatures() if ((xgetbvA & AVXState) != AVXState) { // support for YMM registers is disabled, disable all AVX features &= ~AllAVX; + } else if ((xgetbvA & AVX512State) != AVX512State) { + // support for ZMM registers or mask registers is disabled, disable all AVX512 + features &= ~AllAVX512; + } else { + // this feature is out of order + if (cpuid0700ECX & (1u << 1)) + features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI; + else + features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI); } return features; @@ -484,7 +502,7 @@ static const int features_indices[] = { /* Data: sse3 sse2 - + avx512vbmi @@ -526,10 +544,31 @@ static const int features_indices[] = { rtm + + + + + avx512f + avx512dq + rdseed + + + avx512ifma + + + + + avx512pf + avx512er + avx512cd + sha + avx512bw + avx512vl */ static const char features_string[] = " sse3\0" " sse2\0" + " avx512vbmi\0" " ssse3\0" " fma\0" " cmpxchg16b\0" @@ -546,15 +585,27 @@ static const char features_string[] = " avx2\0" " bmi2\0" " rtm\0" + " avx512f\0" + " avx512dq\0" + " rdseed\0" + " avx512ifma\0" + " avx512pf\0" + " avx512er\0" + " avx512cd\0" + " sha\0" + " avx512bw\0" + " avx512vl\0" "\0"; static const quint8 features_indices[] = { - 0, 6, 5, 5, 5, 5, 5, 5, - 5, 12, 5, 5, 19, 24, 5, 5, - 5, 5, 5, 36, 44, 5, 52, 59, - 5, 67, 5, 5, 72, 77, 83, 5, - 5, 5, 5, 91, 96, 101, 5, 5, - 107, 5, 5, 113 + 0, 6, 12, 5, 5, 5, 5, 5, + 5, 24, 5, 5, 31, 36, 5, 5, + 5, 5, 5, 48, 56, 5, 64, 71, + 5, 79, 5, 5, 84, 89, 95, 5, + 5, 5, 5, 103, 108, 113, 5, 5, + 119, 5, 5, 125, 5, 5, 5, 5, + 130, 139, 149, 5, 5, 157, 5, 5, + 5, 5, 169, 179, 189, 199, 204, 214 }; #else static const char features_string[] = ""; diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index b815e976a7..be003f6c6d 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -218,6 +218,23 @@ # endif #endif +#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f" +#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd" +#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er" +#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf" +#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw" +#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq" +#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl" +#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma" +#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi" + +#define QT_FUNCTION_TARGET_STRING_F16C "f16c" +#define QT_FUNCTION_TARGET_STRING_RDRAND "rdrnd" +#define QT_FUNCTION_TARGET_STRING_BMI "bmi" +#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2" +#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed" +#define QT_FUNCTION_TARGET_STRING_SHA "sha" + // other x86 intrinsics #if defined(Q_PROCESSOR_X86) && ((defined(Q_CC_GNU) && (Q_CC_GNU >= 404)) \ || (defined(Q_CC_CLANG) && (Q_CC_CLANG >= 208)) \ @@ -260,23 +277,97 @@ enum CPUFeatures { CpuFeatureSSSE3 = (0 + 9), CpuFeatureSSE4_1 = (0 + 19), CpuFeatureSSE4_2 = (0 + 20), + CpuFeatureMOVBE = (0 + 22), + CpuFeaturePOPCNT = (0 + 23), CpuFeatureAES = (0 + 25), CpuFeatureAVX = (0 + 28), + CpuFeatureF16C = (0 + 29), + CpuFeatureRDRAND = (0 + 30), + // 31 is always zero and we've used it for the QSimdInitialized // in level 7, leaf 0, EBX + CpuFeatureBMI = (32 + 3), CpuFeatureHLE = (32 + 4), CpuFeatureAVX2 = (32 + 5), + CpuFeatureBMI2 = (32 + 8), CpuFeatureRTM = (32 + 11), + CpuFeatureAVX512F = (32 + 16), + CpuFeatureAVX512DQ = (32 + 17), + CpuFeatureRDSEED = (32 + 18), + CpuFeatureAVX512IFMA = (32 + 21), + CpuFeatureAVX512PF = (32 + 26), + CpuFeatureAVX512ER = (32 + 27), + CpuFeatureAVX512CD = (32 + 28), + CpuFeatureSHA = (32 + 29), + CpuFeatureAVX512BW = (32 + 30), + CpuFeatureAVX512VL = (32 + 31), + + // in level 7, leaf 0, ECX (out of order, for now) + CpuFeatureAVX512VBMI = 2, // uses the bit for DTES64 #endif // used only to indicate that the CPU detection was initialised QSimdInitialized = 0x80000000 }; -static const uint qCompilerCpuFeatures = 0 +static const quint64 qCompilerCpuFeatures = 0 +#if defined __SHA__ + | (Q_UINT64_C(1) << CpuFeatureSHA) +#endif +#if defined __AES__ + | (Q_UINT64_C(1) << CpuFeatureAES) +#endif #if defined __RTM__ | (Q_UINT64_C(1) << CpuFeatureRTM) #endif +#ifdef __RDRND__ + | (Q_UINT64_C(1) << CpuFeatureRDRAND) +#endif +#ifdef __RDSEED__ + | (Q_UINT64_C(1) << CpuFeatureRDSEED) +#endif +#if defined __BMI__ + | (Q_UINT64_C(1) << CpuFeatureBMI) +#endif +#if defined __BMI2__ + | (Q_UINT64_C(1) << CpuFeatureBMI2) +#endif +#if defined __F16C__ + | (Q_UINT64_C(1) << CpuFeatureF16C) +#endif +#if defined __POPCNT__ + | (Q_UINT64_C(1) << CpuFeaturePOPCNT) +#endif +#if defined __MOVBE__ // GCC and Clang don't seem to define this + | (Q_UINT64_C(1) << CpuFeatureMOVBE) +#endif +#if defined __AVX512F__ + | (Q_UINT64_C(1) << CpuFeatureAVX512F) +#endif +#if defined __AVX512CD__ + | (Q_UINT64_C(1) << CpuFeatureAVX512CD) +#endif +#if defined __AVX512ER__ + | (Q_UINT64_C(1) << CpuFeatureAVX512ER) +#endif +#if defined __AVX512PF__ + | (Q_UINT64_C(1) << CpuFeatureAVX512PF) +#endif +#if defined __AVX512BW__ + | (Q_UINT64_C(1) << CpuFeatureAVX512BW) +#endif +#if defined __AVX512DQ__ + | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) +#endif +#if defined __AVX512VL__ + | (Q_UINT64_C(1) << CpuFeatureAVX512VL) +#endif +#if defined __AVX512IFMA__ + | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) +#endif +#if defined __AVX512VBMI__ + | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI) +#endif #if defined __AVX2__ | (Q_UINT64_C(1) << CpuFeatureAVX2) #endif -- cgit v1.2.3