diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 252 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 144 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_x86.cpp | 98 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_x86_p.h | 227 |
4 files changed, 393 insertions, 328 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index c4d7117449..fd9c6a7079 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -1,7 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2016 Intel Corporation. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -80,6 +80,43 @@ QT_BEGIN_NAMESPACE +/* + * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note + * we remove the terminating -1 that the script adds. + */ + +// begin generated +#if defined(Q_PROCESSOR_ARM) +/* Data: + neon + crc32 + */ +static const char features_string[] = + " neon\0" + " crc32\0" + "\0"; +static const int features_indices[] = { 0, 6 }; +#elif defined(Q_PROCESSOR_MIPS) +/* Data: + dsp + dspr2 +*/ +static const char features_string[] = + " dsp\0" + " dspr2\0" + "\0"; + +static const int features_indices[] = { + 0, 5 +}; +#elif defined(Q_PROCESSOR_X86) +# include "qsimd_x86.cpp" // generated by util/x86simdgen +#else +static const char features_string[] = ""; +static const int features_indices[] = { }; +#endif +// end generated + #if defined (Q_OS_NACL) static inline uint detectProcessorFeatures() { @@ -222,29 +259,32 @@ static void cpuidFeatures01(uint &ecx, uint &edx) inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} #endif -static void cpuidFeatures07_00(uint &ebx, uint &ecx) +static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) { #if defined(Q_CC_GNU) qregisteruint rbx; // in case it's 64-bit qregisteruint rcx = 0; + qregisteruint rdx = 0; asm ("xchg " PICreg", %0\n" "cpuid\n" "xchg " PICreg", %0\n" - : "=&r" (rbx), "+&c" (rcx) - : "a" (7) - : "%edx"); + : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx) + : "a" (7)); ebx = rbx; ecx = rcx; + edx = rdx; #elif defined(Q_OS_WIN) int info[4]; __cpuidex(info, 7, 0); ebx = info[1]; ecx = info[2]; + edx = info[3]; #elif defined(Q_CC_GHS) unsigned int info[4]; __CPUIDEX(7, 0, info); ebx = info[1]; ecx = info[2]; + edx = info[3]; #endif } @@ -285,8 +325,11 @@ static quint64 detectProcessorFeatures() static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) | (Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) | (Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) | - (Q_UINT64_C(1) << CpuFeatureAVX512VL) | - (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI); + (Q_UINT64_C(1) << CpuFeatureAVX512VL) | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | + (Q_UINT64_C(1) << CpuFeatureAVX512VBMI) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2) | + (Q_UINT64_C(1) << CpuFeatureAVX512VNNI) | (Q_UINT64_C(1) << CpuFeatureAVX512BITALG) | + (Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ) | + (Q_UINT64_C(1) << CpuFeatureAVX5124NNIW) | (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS); static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512; static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2; @@ -299,52 +342,33 @@ static quint64 detectProcessorFeatures() Q_ASSERT(cpuidLevel >= 1); #endif - uint cpuid01ECX = 0, cpuid01EDX = 0; - cpuidFeatures01(cpuid01ECX, cpuid01EDX); - - // the low 32-bits of features is cpuid01ECX - // note: we need to check OS support for saving the AVX register state - features = cpuid01ECX; - -#if defined(Q_PROCESSOR_X86_32) - // x86 might not have SSE2 support - if (cpuid01EDX & (1u << 26)) - features |= Q_UINT64_C(1) << CpuFeatureSSE2; - else - features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2); - // we should verify that the OS enabled saving of the SSE state... -#else - // x86-64 or x32 - features |= Q_UINT64_C(1) << CpuFeatureSSE2; -#endif + uint results[X86CpuidMaxLeaf] = {}; + cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]); + if (cpuidLevel >= 7) + cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]); + + // populate our feature list + for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) { + uint word = x86_locators[i] / 32; + uint bit = 1U << (x86_locators[i] % 32); + quint64 feature = Q_UINT64_C(1) << (i + 1); + if (results[word] & bit) + features |= feature; + } + // now check the AVX state uint xgetbvA = 0, xgetbvD = 0; - if (cpuid01ECX & (1u << 27)) { + if (results[Leaf1ECX] & (1u << 27)) { // XGETBV enabled xgetbv(0, xgetbvA, xgetbvD); } - uint cpuid0700EBX = 0; - uint cpuid0700ECX = 0; - if (cpuidLevel >= 7) { - cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX); - - // the high 32-bits of features is cpuid0700EBX - features |= quint64(cpuid0700EBX) << 32; - } - if ((xgetbvA & AVXState) != AVXState) { // support for YMM registers is disabled, disable all AVX features &= ~AllAVX; } else if ((xgetbvA & AVX512State) != AVX512State) { // support for ZMM registers or mask registers is disabled, disable all AVX512 features &= ~AllAVX512; - } else { - // this feature is out of order - if (cpuid0700ECX & (1u << 1)) - features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI; - else - features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI); } return features; @@ -493,152 +517,6 @@ static inline uint detectProcessorFeatures() } #endif -/* - * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note - * that the x86 version has a lot of blanks that must be kept and that the - * offset table's type is changed to make the table smaller. We also remove the - * terminating -1 that the script adds. - */ - -// begin generated -#if defined(Q_PROCESSOR_ARM) -/* Data: - neon - crc32 - */ -static const char features_string[] = - " neon\0" - " crc32\0" - "\0"; -static const int features_indices[] = { 0, 6 }; -#elif defined(Q_PROCESSOR_MIPS) -/* Data: - dsp - dspr2 -*/ -static const char features_string[] = - " dsp\0" - " dspr2\0" - "\0"; - -static const int features_indices[] = { - 0, 5 -}; -#elif defined(Q_PROCESSOR_X86) -/* Data: - sse3 - sse2 - avx512vbmi - - - - - - - ssse3 - - - fma - cmpxchg16b - - - - - - sse4.1 - sse4.2 - - movbe - popcnt - - aes - - - avx - f16c - rdrand - - - - - bmi - hle - avx2 - - - bmi2 - - - rtm - - - - - avx512f - avx512dq - rdseed - - - avx512ifma - - - - - avx512pf - avx512er - avx512cd - sha - avx512bw - avx512vl - */ -static const char features_string[] = - " sse3\0" - " sse2\0" - " avx512vbmi\0" - " ssse3\0" - " fma\0" - " cmpxchg16b\0" - " sse4.1\0" - " sse4.2\0" - " movbe\0" - " popcnt\0" - " aes\0" - " avx\0" - " f16c\0" - " rdrand\0" - " bmi\0" - " hle\0" - " avx2\0" - " bmi2\0" - " rtm\0" - " avx512f\0" - " avx512dq\0" - " rdseed\0" - " avx512ifma\0" - " avx512pf\0" - " avx512er\0" - " avx512cd\0" - " sha\0" - " avx512bw\0" - " avx512vl\0" - "\0"; - -static const quint8 features_indices[] = { - 0, 6, 12, 5, 5, 5, 5, 5, - 5, 24, 5, 5, 31, 36, 5, 5, - 5, 5, 5, 48, 56, 5, 64, 71, - 5, 79, 5, 5, 84, 89, 95, 5, - 5, 5, 5, 103, 108, 113, 5, 5, - 119, 5, 5, 125, 5, 5, 5, 5, - 130, 139, 149, 5, 5, 157, 5, 5, - 5, 5, 169, 179, 189, 199, 204, 214 -}; -#else -static const char features_string[] = ""; -static const int features_indices[] = { }; -#endif -// end generated - static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]); // record what CPU features were enabled by default in this Qt build diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index 3161ee7412..1b7ed57fa8 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -179,6 +179,7 @@ #ifdef Q_PROCESSOR_X86 /* -- x86 intrinsic support -- */ +# include "qsimd_x86_p.h" # if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2) // MSVC doesn't define __SSE2__, so do it ourselves @@ -232,33 +233,6 @@ # define __RDRND__ 1 # endif -#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2" -#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3" -#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3" -#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1" -#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2" -#define QT_FUNCTION_TARGET_STRING_AVX "avx" -#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2" -#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f" -#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd" -#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er" -#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf" -#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw" -#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq" -#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl" -#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma" -#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi" - -#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2" -#define QT_FUNCTION_TARGET_STRING_PCLMUL "pclmul,sse4.2" -#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt" -#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx" -#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd" -#define QT_FUNCTION_TARGET_STRING_BMI "bmi" -#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2" -#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed" -#define QT_FUNCTION_TARGET_STRING_SHA "sha" - #endif /* Q_PROCESSOR_X86 */ // Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html @@ -292,6 +266,7 @@ QT_BEGIN_NAMESPACE +#ifndef Q_PROCESSOR_X86 enum CPUFeatures { #if defined(Q_PROCESSOR_ARM) CpuFeatureNEON = 0, @@ -300,42 +275,6 @@ enum CPUFeatures { #elif defined(Q_PROCESSOR_MIPS) CpuFeatureDSP = 0, CpuFeatureDSPR2 = 1, -#elif defined(Q_PROCESSOR_X86) - // The order of the flags is jumbled so it matches most closely the bits in CPUID - // Out of order: - CpuFeatureSSE2 = 1, // uses the bit for PCLMULQDQ - // in level 1, ECX - CpuFeatureSSE3 = (0 + 0), - CpuFeatureSSSE3 = (0 + 9), - CpuFeatureSSE4_1 = (0 + 19), - CpuFeatureSSE4_2 = (0 + 20), - CpuFeatureMOVBE = (0 + 22), - CpuFeaturePOPCNT = (0 + 23), - CpuFeatureAES = (0 + 25), - CpuFeatureAVX = (0 + 28), - CpuFeatureF16C = (0 + 29), - CpuFeatureRDRND = (0 + 30), - // 31 is always zero and we've used it for the QSimdInitialized - - // in level 7, leaf 0, EBX - CpuFeatureBMI = (32 + 3), - CpuFeatureHLE = (32 + 4), - CpuFeatureAVX2 = (32 + 5), - CpuFeatureBMI2 = (32 + 8), - CpuFeatureRTM = (32 + 11), - CpuFeatureAVX512F = (32 + 16), - CpuFeatureAVX512DQ = (32 + 17), - CpuFeatureRDSEED = (32 + 18), - CpuFeatureAVX512IFMA = (32 + 21), - CpuFeatureAVX512PF = (32 + 26), - CpuFeatureAVX512ER = (32 + 27), - CpuFeatureAVX512CD = (32 + 28), - CpuFeatureSHA = (32 + 29), - CpuFeatureAVX512BW = (32 + 30), - CpuFeatureAVX512VL = (32 + 31), - - // in level 7, leaf 0, ECX (out of order, for now) - CpuFeatureAVX512VBMI = 2, // uses the bit for DTES64 #endif // used only to indicate that the CPU detection was initialised @@ -343,84 +282,6 @@ enum CPUFeatures { }; static const quint64 qCompilerCpuFeatures = 0 -#if defined __SHA__ - | (Q_UINT64_C(1) << CpuFeatureSHA) -#endif -#if defined __AES__ - | (Q_UINT64_C(1) << CpuFeatureAES) -#endif -#if defined __RTM__ - | (Q_UINT64_C(1) << CpuFeatureRTM) -#endif -#ifdef __RDRND__ - | (Q_UINT64_C(1) << CpuFeatureRDRND) -#endif -#ifdef __RDSEED__ - | (Q_UINT64_C(1) << CpuFeatureRDSEED) -#endif -#if defined __BMI__ - | (Q_UINT64_C(1) << CpuFeatureBMI) -#endif -#if defined __BMI2__ - | (Q_UINT64_C(1) << CpuFeatureBMI2) -#endif -#if defined __F16C__ - | (Q_UINT64_C(1) << CpuFeatureF16C) -#endif -#if defined __POPCNT__ - | (Q_UINT64_C(1) << CpuFeaturePOPCNT) -#endif -#if defined __MOVBE__ // GCC and Clang don't seem to define this - | (Q_UINT64_C(1) << CpuFeatureMOVBE) -#endif -#if defined __AVX512F__ - | (Q_UINT64_C(1) << CpuFeatureAVX512F) -#endif -#if defined __AVX512CD__ - | (Q_UINT64_C(1) << CpuFeatureAVX512CD) -#endif -#if defined __AVX512ER__ - | (Q_UINT64_C(1) << CpuFeatureAVX512ER) -#endif -#if defined __AVX512PF__ - | (Q_UINT64_C(1) << CpuFeatureAVX512PF) -#endif -#if defined __AVX512BW__ - | (Q_UINT64_C(1) << CpuFeatureAVX512BW) -#endif -#if defined __AVX512DQ__ - | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) -#endif -#if defined __AVX512VL__ - | (Q_UINT64_C(1) << CpuFeatureAVX512VL) -#endif -#if defined __AVX512IFMA__ - | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) -#endif -#if defined __AVX512VBMI__ - | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI) -#endif -#if defined __AVX2__ - | (Q_UINT64_C(1) << CpuFeatureAVX2) -#endif -#if defined __AVX__ - | (Q_UINT64_C(1) << CpuFeatureAVX) -#endif -#if defined __SSE4_2__ - | (Q_UINT64_C(1) << CpuFeatureSSE4_2) -#endif -#if defined __SSE4_1__ - | (Q_UINT64_C(1) << CpuFeatureSSE4_1) -#endif -#if defined __SSSE3__ - | (Q_UINT64_C(1) << CpuFeatureSSSE3) -#endif -#if defined __SSE3__ - | (Q_UINT64_C(1) << CpuFeatureSSE3) -#endif -#if defined __SSE2__ - | (Q_UINT64_C(1) << CpuFeatureSSE2) -#endif #if defined __ARM_NEON__ | (Q_UINT64_C(1) << CpuFeatureNEON) #endif @@ -434,6 +295,7 @@ static const quint64 qCompilerCpuFeatures = 0 | (Q_UINT64_C(1) << CpuFeatureDSPR2) #endif ; +#endif #ifdef QT_BOOTSTRAPPED static inline quint64 qCpuFeatures() diff --git a/src/corelib/tools/qsimd_x86.cpp b/src/corelib/tools/qsimd_x86.cpp new file mode 100644 index 0000000000..8275f964d8 --- /dev/null +++ b/src/corelib/tools/qsimd_x86.cpp @@ -0,0 +1,98 @@ +// This is a generated file. DO NOT EDIT. +// Please see util/x86simdgen/generate.pl +#include <qglobal.h> + +static const char features_string[] = + " sse2\0" + " sse3\0" + " ssse3\0" + " fma\0" + " sse4.1\0" + " sse4.2\0" + " movbe\0" + " popcnt\0" + " aes\0" + " avx\0" + " f16c\0" + " rdrnd\0" + " bmi\0" + " hle\0" + " avx2\0" + " bmi2\0" + " rtm\0" + " avx512f\0" + " avx512dq\0" + " rdseed\0" + " avx512ifma\0" + " avx512pf\0" + " avx512er\0" + " avx512cd\0" + " sha\0" + " avx512bw\0" + " avx512vl\0" + " avx512vbmi\0" + " avx512vbmi2\0" + " gfni\0" + " vaes\0" + " avx512vnni\0" + " avx512bitalg\0" + " avx512vpopcntdq\0" + " avx5124nniw\0" + " avx5124fmaps\0" + "\0"; + +static const quint16 features_indices[] = { + 306, 0, 6, 12, 19, 24, 32, 40, + 47, 55, 60, 65, 71, 78, 83, 88, + 94, 100, 105, 114, 124, 132, 144, 154, + 164, 174, 179, 189, 199, 211, 224, 230, + 236, 248, 262, 279, 292 +}; + +enum X86CpuidLeaves { + Leaf1ECX, + Leaf1EDX, + Leaf7_0EBX, + Leaf7_0ECX, + Leaf7_0EDX, + X86CpuidMaxLeaf +}; + +static const quint8 x86_locators[] = { + Leaf1EDX*32 + 26, // sse2 + Leaf1ECX*32 + 0, // sse3 + Leaf1ECX*32 + 9, // ssse3 + Leaf1ECX*32 + 12, // fma + Leaf1ECX*32 + 19, // sse4.1 + Leaf1ECX*32 + 20, // sse4.2 + Leaf1ECX*32 + 22, // movbe + Leaf1ECX*32 + 23, // popcnt + Leaf1ECX*32 + 25, // aes + Leaf1ECX*32 + 28, // avx + Leaf1ECX*32 + 29, // f16c + Leaf1ECX*32 + 30, // rdrnd + Leaf7_0EBX*32 + 3, // bmi + Leaf7_0EBX*32 + 4, // hle + Leaf7_0EBX*32 + 5, // avx2 + Leaf7_0EBX*32 + 8, // bmi2 + Leaf7_0EBX*32 + 11, // rtm + Leaf7_0EBX*32 + 16, // avx512f + Leaf7_0EBX*32 + 17, // avx512dq + Leaf7_0EBX*32 + 18, // rdseed + Leaf7_0EBX*32 + 21, // avx512ifma + Leaf7_0EBX*32 + 26, // avx512pf + Leaf7_0EBX*32 + 27, // avx512er + Leaf7_0EBX*32 + 28, // avx512cd + Leaf7_0EBX*32 + 29, // sha + Leaf7_0EBX*32 + 30, // avx512bw + Leaf7_0EBX*32 + 31, // avx512vl + Leaf7_0ECX*32 + 1, // avx512vbmi + Leaf7_0ECX*32 + 6, // avx512vbmi2 + Leaf7_0ECX*32 + 8, // gfni + Leaf7_0ECX*32 + 9, // vaes + Leaf7_0ECX*32 + 11, // avx512vnni + Leaf7_0ECX*32 + 12, // avx512bitalg + Leaf7_0ECX*32 + 14, // avx512vpopcntdq + Leaf7_0EDX*32 + 2, // avx5124nniw + Leaf7_0EDX*32 + 3 // avx5124fmaps +}; diff --git a/src/corelib/tools/qsimd_x86_p.h b/src/corelib/tools/qsimd_x86_p.h new file mode 100644 index 0000000000..45d5f2895f --- /dev/null +++ b/src/corelib/tools/qsimd_x86_p.h @@ -0,0 +1,227 @@ +// This is a generated file. DO NOT EDIT. +// Please see util/x86simdgen/generate.pl +#ifndef QSIMD_P_H +# error "Please include <private/qsimd_p.h> instead" +#endif +#ifndef QSIMD_X86_P_H +#define QSIMD_X86_P_H + +#include "qsimd_p.h" + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +QT_BEGIN_NAMESPACE + +// Macros for QT_FUNCTION_TARGET (for Clang and GCC) +#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2" +#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3" +#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3" +#define QT_FUNCTION_TARGET_STRING_FMA "fma" +#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1" +#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2" +#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe" +#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt" +#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2" +#define QT_FUNCTION_TARGET_STRING_AVX "avx" +#define QT_FUNCTION_TARGET_STRING_F16C "f16c" +#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd" +#define QT_FUNCTION_TARGET_STRING_BMI "bmi" +#define QT_FUNCTION_TARGET_STRING_HLE "hle" +#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2" +#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2" +#define QT_FUNCTION_TARGET_STRING_RTM "rtm" +#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f" +#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq" +#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed" +#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma" +#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf" +#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er" +#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd" +#define QT_FUNCTION_TARGET_STRING_SHA "sha" +#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw" +#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl" +#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi" +#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2" +#define QT_FUNCTION_TARGET_STRING_GFNI "gfni" +#define QT_FUNCTION_TARGET_STRING_VAES "vaes" +#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni" +#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg" +#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq" +#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw" +#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps" + +enum CPUFeatures { + // in CPUID Leaf 1, EDX: + CpuFeatureSSE2 = 1, + + // in CPUID Leaf 1, ECX: + CpuFeatureSSE3 = 2, + CpuFeatureSSSE3 = 3, + CpuFeatureFMA = 4, + CpuFeatureSSE4_1 = 5, + CpuFeatureSSE4_2 = 6, + CpuFeatureMOVBE = 7, + CpuFeaturePOPCNT = 8, + CpuFeatureAES = 9, + CpuFeatureAVX = 10, + CpuFeatureF16C = 11, + CpuFeatureRDRND = 12, + + // in CPUID Leaf 7, Sub-leaf 0, EBX: + CpuFeatureBMI = 13, + CpuFeatureHLE = 14, + CpuFeatureAVX2 = 15, + CpuFeatureBMI2 = 16, + CpuFeatureRTM = 17, + CpuFeatureAVX512F = 18, + CpuFeatureAVX512DQ = 19, + CpuFeatureRDSEED = 20, + CpuFeatureAVX512IFMA = 21, + CpuFeatureAVX512PF = 22, + CpuFeatureAVX512ER = 23, + CpuFeatureAVX512CD = 24, + CpuFeatureSHA = 25, + CpuFeatureAVX512BW = 26, + CpuFeatureAVX512VL = 27, + + // in CPUID Leaf 7, Sub-leaf 0, ECX: + CpuFeatureAVX512VBMI = 28, + CpuFeatureAVX512VBMI2 = 29, + CpuFeatureGFNI = 30, + CpuFeatureVAES = 31, + CpuFeatureAVX512VNNI = 32, + CpuFeatureAVX512BITALG = 33, + CpuFeatureAVX512VPOPCNTDQ = 34, + + // in CPUID Leaf 7, Sub-leaf 0, EDX: + CpuFeatureAVX5124NNIW = 35, + CpuFeatureAVX5124FMAPS = 36, + + // used only to indicate that the CPU detection was initialized + QSimdInitialized = 1 +}; + +static const quint64 qCompilerCpuFeatures = 0 +#ifdef __SSE2__ + | (Q_UINT64_C(1) << CpuFeatureSSE2) +#endif +#ifdef __SSE3__ + | (Q_UINT64_C(1) << CpuFeatureSSE3) +#endif +#ifdef __SSSE3__ + | (Q_UINT64_C(1) << CpuFeatureSSSE3) +#endif +#ifdef __FMA__ + | (Q_UINT64_C(1) << CpuFeatureFMA) +#endif +#ifdef __SSE4_1__ + | (Q_UINT64_C(1) << CpuFeatureSSE4_1) +#endif +#ifdef __SSE4_2__ + | (Q_UINT64_C(1) << CpuFeatureSSE4_2) +#endif +#ifdef __MOVBE__ + | (Q_UINT64_C(1) << CpuFeatureMOVBE) +#endif +#ifdef __POPCNT__ + | (Q_UINT64_C(1) << CpuFeaturePOPCNT) +#endif +#ifdef __AES__ + | (Q_UINT64_C(1) << CpuFeatureAES) +#endif +#ifdef __AVX__ + | (Q_UINT64_C(1) << CpuFeatureAVX) +#endif +#ifdef __F16C__ + | (Q_UINT64_C(1) << CpuFeatureF16C) +#endif +#ifdef __RDRND__ + | (Q_UINT64_C(1) << CpuFeatureRDRND) +#endif +#ifdef __BMI__ + | (Q_UINT64_C(1) << CpuFeatureBMI) +#endif +#ifdef __HLE__ + | (Q_UINT64_C(1) << CpuFeatureHLE) +#endif +#ifdef __AVX2__ + | (Q_UINT64_C(1) << CpuFeatureAVX2) +#endif +#ifdef __BMI2__ + | (Q_UINT64_C(1) << CpuFeatureBMI2) +#endif +#ifdef __RTM__ + | (Q_UINT64_C(1) << CpuFeatureRTM) +#endif +#ifdef __AVX512F__ + | (Q_UINT64_C(1) << CpuFeatureAVX512F) +#endif +#ifdef __AVX512DQ__ + | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) +#endif +#ifdef __RDSEED__ + | (Q_UINT64_C(1) << CpuFeatureRDSEED) +#endif +#ifdef __AVX512IFMA__ + | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) +#endif +#ifdef __AVX512PF__ + | (Q_UINT64_C(1) << CpuFeatureAVX512PF) +#endif +#ifdef __AVX512ER__ + | (Q_UINT64_C(1) << CpuFeatureAVX512ER) +#endif +#ifdef __AVX512CD__ + | (Q_UINT64_C(1) << CpuFeatureAVX512CD) +#endif +#ifdef __SHA__ + | (Q_UINT64_C(1) << CpuFeatureSHA) +#endif +#ifdef __AVX512BW__ + | (Q_UINT64_C(1) << CpuFeatureAVX512BW) +#endif +#ifdef __AVX512VL__ + | (Q_UINT64_C(1) << CpuFeatureAVX512VL) +#endif +#ifdef __AVX512VBMI__ + | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI) +#endif +#ifdef __AVX512VBMI2__ + | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2) +#endif +#ifdef __GFNI__ + | (Q_UINT64_C(1) << CpuFeatureGFNI) +#endif +#ifdef __VAES__ + | (Q_UINT64_C(1) << CpuFeatureVAES) +#endif +#ifdef __AVX512VNNI__ + | (Q_UINT64_C(1) << CpuFeatureAVX512VNNI) +#endif +#ifdef __AVX512BITALG__ + | (Q_UINT64_C(1) << CpuFeatureAVX512BITALG) +#endif +#ifdef __AVX512VPOPCNTDQ__ + | (Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ) +#endif +#ifdef __AVX5124NNIW__ + | (Q_UINT64_C(1) << CpuFeatureAVX5124NNIW) +#endif +#ifdef __AVX5124FMAPS__ + | (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS) +#endif + ; + +QT_END_NAMESPACE + +#endif // QSIMD_X86_P_H + |