summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/corelib/tools/qsimd.cpp252
-rw-r--r--src/corelib/tools/qsimd_p.h144
-rw-r--r--src/corelib/tools/qsimd_x86.cpp98
-rw-r--r--src/corelib/tools/qsimd_x86_p.h227
4 files changed, 393 insertions, 328 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index c4d7117449..fd9c6a7079 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2016 Intel Corporation.
+** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -80,6 +80,43 @@
QT_BEGIN_NAMESPACE
+/*
+ * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
+ * we remove the terminating -1 that the script adds.
+ */
+
+// begin generated
+#if defined(Q_PROCESSOR_ARM)
+/* Data:
+ neon
+ crc32
+ */
+static const char features_string[] =
+ " neon\0"
+ " crc32\0"
+ "\0";
+static const int features_indices[] = { 0, 6 };
+#elif defined(Q_PROCESSOR_MIPS)
+/* Data:
+ dsp
+ dspr2
+*/
+static const char features_string[] =
+ " dsp\0"
+ " dspr2\0"
+ "\0";
+
+static const int features_indices[] = {
+ 0, 5
+};
+#elif defined(Q_PROCESSOR_X86)
+# include "qsimd_x86.cpp" // generated by util/x86simdgen
+#else
+static const char features_string[] = "";
+static const int features_indices[] = { };
+#endif
+// end generated
+
#if defined (Q_OS_NACL)
static inline uint detectProcessorFeatures()
{
@@ -222,29 +259,32 @@ static void cpuidFeatures01(uint &ecx, uint &edx)
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
#endif
-static void cpuidFeatures07_00(uint &ebx, uint &ecx)
+static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
{
#if defined(Q_CC_GNU)
qregisteruint rbx; // in case it's 64-bit
qregisteruint rcx = 0;
+ qregisteruint rdx = 0;
asm ("xchg " PICreg", %0\n"
"cpuid\n"
"xchg " PICreg", %0\n"
- : "=&r" (rbx), "+&c" (rcx)
- : "a" (7)
- : "%edx");
+ : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
+ : "a" (7));
ebx = rbx;
ecx = rcx;
+ edx = rdx;
#elif defined(Q_OS_WIN)
int info[4];
__cpuidex(info, 7, 0);
ebx = info[1];
ecx = info[2];
+ edx = info[3];
#elif defined(Q_CC_GHS)
unsigned int info[4];
__CPUIDEX(7, 0, info);
ebx = info[1];
ecx = info[2];
+ edx = info[3];
#endif
}
@@ -285,8 +325,11 @@ static quint64 detectProcessorFeatures()
static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) |
(Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) |
(Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) |
- (Q_UINT64_C(1) << CpuFeatureAVX512VL) |
- (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
+ (Q_UINT64_C(1) << CpuFeatureAVX512VL) | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VBMI) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VNNI) | (Q_UINT64_C(1) << CpuFeatureAVX512BITALG) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ) |
+ (Q_UINT64_C(1) << CpuFeatureAVX5124NNIW) | (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS);
static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512;
static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2;
@@ -299,52 +342,33 @@ static quint64 detectProcessorFeatures()
Q_ASSERT(cpuidLevel >= 1);
#endif
- uint cpuid01ECX = 0, cpuid01EDX = 0;
- cpuidFeatures01(cpuid01ECX, cpuid01EDX);
-
- // the low 32-bits of features is cpuid01ECX
- // note: we need to check OS support for saving the AVX register state
- features = cpuid01ECX;
-
-#if defined(Q_PROCESSOR_X86_32)
- // x86 might not have SSE2 support
- if (cpuid01EDX & (1u << 26))
- features |= Q_UINT64_C(1) << CpuFeatureSSE2;
- else
- features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2);
- // we should verify that the OS enabled saving of the SSE state...
-#else
- // x86-64 or x32
- features |= Q_UINT64_C(1) << CpuFeatureSSE2;
-#endif
+ uint results[X86CpuidMaxLeaf] = {};
+ cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
+ if (cpuidLevel >= 7)
+ cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
+
+ // populate our feature list
+ for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
+ uint word = x86_locators[i] / 32;
+ uint bit = 1U << (x86_locators[i] % 32);
+ quint64 feature = Q_UINT64_C(1) << (i + 1);
+ if (results[word] & bit)
+ features |= feature;
+ }
+ // now check the AVX state
uint xgetbvA = 0, xgetbvD = 0;
- if (cpuid01ECX & (1u << 27)) {
+ if (results[Leaf1ECX] & (1u << 27)) {
// XGETBV enabled
xgetbv(0, xgetbvA, xgetbvD);
}
- uint cpuid0700EBX = 0;
- uint cpuid0700ECX = 0;
- if (cpuidLevel >= 7) {
- cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX);
-
- // the high 32-bits of features is cpuid0700EBX
- features |= quint64(cpuid0700EBX) << 32;
- }
-
if ((xgetbvA & AVXState) != AVXState) {
// support for YMM registers is disabled, disable all AVX
features &= ~AllAVX;
} else if ((xgetbvA & AVX512State) != AVX512State) {
// support for ZMM registers or mask registers is disabled, disable all AVX512
features &= ~AllAVX512;
- } else {
- // this feature is out of order
- if (cpuid0700ECX & (1u << 1))
- features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI;
- else
- features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
}
return features;
@@ -493,152 +517,6 @@ static inline uint detectProcessorFeatures()
}
#endif
-/*
- * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
- * that the x86 version has a lot of blanks that must be kept and that the
- * offset table's type is changed to make the table smaller. We also remove the
- * terminating -1 that the script adds.
- */
-
-// begin generated
-#if defined(Q_PROCESSOR_ARM)
-/* Data:
- neon
- crc32
- */
-static const char features_string[] =
- " neon\0"
- " crc32\0"
- "\0";
-static const int features_indices[] = { 0, 6 };
-#elif defined(Q_PROCESSOR_MIPS)
-/* Data:
- dsp
- dspr2
-*/
-static const char features_string[] =
- " dsp\0"
- " dspr2\0"
- "\0";
-
-static const int features_indices[] = {
- 0, 5
-};
-#elif defined(Q_PROCESSOR_X86)
-/* Data:
- sse3
- sse2
- avx512vbmi
-
-
-
-
-
-
- ssse3
-
-
- fma
- cmpxchg16b
-
-
-
-
-
- sse4.1
- sse4.2
-
- movbe
- popcnt
-
- aes
-
-
- avx
- f16c
- rdrand
-
-
-
-
- bmi
- hle
- avx2
-
-
- bmi2
-
-
- rtm
-
-
-
-
- avx512f
- avx512dq
- rdseed
-
-
- avx512ifma
-
-
-
-
- avx512pf
- avx512er
- avx512cd
- sha
- avx512bw
- avx512vl
- */
-static const char features_string[] =
- " sse3\0"
- " sse2\0"
- " avx512vbmi\0"
- " ssse3\0"
- " fma\0"
- " cmpxchg16b\0"
- " sse4.1\0"
- " sse4.2\0"
- " movbe\0"
- " popcnt\0"
- " aes\0"
- " avx\0"
- " f16c\0"
- " rdrand\0"
- " bmi\0"
- " hle\0"
- " avx2\0"
- " bmi2\0"
- " rtm\0"
- " avx512f\0"
- " avx512dq\0"
- " rdseed\0"
- " avx512ifma\0"
- " avx512pf\0"
- " avx512er\0"
- " avx512cd\0"
- " sha\0"
- " avx512bw\0"
- " avx512vl\0"
- "\0";
-
-static const quint8 features_indices[] = {
- 0, 6, 12, 5, 5, 5, 5, 5,
- 5, 24, 5, 5, 31, 36, 5, 5,
- 5, 5, 5, 48, 56, 5, 64, 71,
- 5, 79, 5, 5, 84, 89, 95, 5,
- 5, 5, 5, 103, 108, 113, 5, 5,
- 119, 5, 5, 125, 5, 5, 5, 5,
- 130, 139, 149, 5, 5, 157, 5, 5,
- 5, 5, 169, 179, 189, 199, 204, 214
-};
-#else
-static const char features_string[] = "";
-static const int features_indices[] = { };
-#endif
-// end generated
-
static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
// record what CPU features were enabled by default in this Qt build
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 3161ee7412..1b7ed57fa8 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -179,6 +179,7 @@
#ifdef Q_PROCESSOR_X86
/* -- x86 intrinsic support -- */
+# include "qsimd_x86_p.h"
# if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2)
// MSVC doesn't define __SSE2__, so do it ourselves
@@ -232,33 +233,6 @@
# define __RDRND__ 1
# endif
-#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
-#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
-#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
-#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
-#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
-#define QT_FUNCTION_TARGET_STRING_AVX "avx"
-#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
-#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
-#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
-#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
-#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
-#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
-#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
-#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
-#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
-#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
-
-#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
-#define QT_FUNCTION_TARGET_STRING_PCLMUL "pclmul,sse4.2"
-#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
-#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx"
-#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
-#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
-#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
-#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
-#define QT_FUNCTION_TARGET_STRING_SHA "sha"
-
#endif /* Q_PROCESSOR_X86 */
// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
@@ -292,6 +266,7 @@
QT_BEGIN_NAMESPACE
+#ifndef Q_PROCESSOR_X86
enum CPUFeatures {
#if defined(Q_PROCESSOR_ARM)
CpuFeatureNEON = 0,
@@ -300,42 +275,6 @@ enum CPUFeatures {
#elif defined(Q_PROCESSOR_MIPS)
CpuFeatureDSP = 0,
CpuFeatureDSPR2 = 1,
-#elif defined(Q_PROCESSOR_X86)
- // The order of the flags is jumbled so it matches most closely the bits in CPUID
- // Out of order:
- CpuFeatureSSE2 = 1, // uses the bit for PCLMULQDQ
- // in level 1, ECX
- CpuFeatureSSE3 = (0 + 0),
- CpuFeatureSSSE3 = (0 + 9),
- CpuFeatureSSE4_1 = (0 + 19),
- CpuFeatureSSE4_2 = (0 + 20),
- CpuFeatureMOVBE = (0 + 22),
- CpuFeaturePOPCNT = (0 + 23),
- CpuFeatureAES = (0 + 25),
- CpuFeatureAVX = (0 + 28),
- CpuFeatureF16C = (0 + 29),
- CpuFeatureRDRND = (0 + 30),
- // 31 is always zero and we've used it for the QSimdInitialized
-
- // in level 7, leaf 0, EBX
- CpuFeatureBMI = (32 + 3),
- CpuFeatureHLE = (32 + 4),
- CpuFeatureAVX2 = (32 + 5),
- CpuFeatureBMI2 = (32 + 8),
- CpuFeatureRTM = (32 + 11),
- CpuFeatureAVX512F = (32 + 16),
- CpuFeatureAVX512DQ = (32 + 17),
- CpuFeatureRDSEED = (32 + 18),
- CpuFeatureAVX512IFMA = (32 + 21),
- CpuFeatureAVX512PF = (32 + 26),
- CpuFeatureAVX512ER = (32 + 27),
- CpuFeatureAVX512CD = (32 + 28),
- CpuFeatureSHA = (32 + 29),
- CpuFeatureAVX512BW = (32 + 30),
- CpuFeatureAVX512VL = (32 + 31),
-
- // in level 7, leaf 0, ECX (out of order, for now)
- CpuFeatureAVX512VBMI = 2, // uses the bit for DTES64
#endif
// used only to indicate that the CPU detection was initialised
@@ -343,84 +282,6 @@ enum CPUFeatures {
};
static const quint64 qCompilerCpuFeatures = 0
-#if defined __SHA__
- | (Q_UINT64_C(1) << CpuFeatureSHA)
-#endif
-#if defined __AES__
- | (Q_UINT64_C(1) << CpuFeatureAES)
-#endif
-#if defined __RTM__
- | (Q_UINT64_C(1) << CpuFeatureRTM)
-#endif
-#ifdef __RDRND__
- | (Q_UINT64_C(1) << CpuFeatureRDRND)
-#endif
-#ifdef __RDSEED__
- | (Q_UINT64_C(1) << CpuFeatureRDSEED)
-#endif
-#if defined __BMI__
- | (Q_UINT64_C(1) << CpuFeatureBMI)
-#endif
-#if defined __BMI2__
- | (Q_UINT64_C(1) << CpuFeatureBMI2)
-#endif
-#if defined __F16C__
- | (Q_UINT64_C(1) << CpuFeatureF16C)
-#endif
-#if defined __POPCNT__
- | (Q_UINT64_C(1) << CpuFeaturePOPCNT)
-#endif
-#if defined __MOVBE__ // GCC and Clang don't seem to define this
- | (Q_UINT64_C(1) << CpuFeatureMOVBE)
-#endif
-#if defined __AVX512F__
- | (Q_UINT64_C(1) << CpuFeatureAVX512F)
-#endif
-#if defined __AVX512CD__
- | (Q_UINT64_C(1) << CpuFeatureAVX512CD)
-#endif
-#if defined __AVX512ER__
- | (Q_UINT64_C(1) << CpuFeatureAVX512ER)
-#endif
-#if defined __AVX512PF__
- | (Q_UINT64_C(1) << CpuFeatureAVX512PF)
-#endif
-#if defined __AVX512BW__
- | (Q_UINT64_C(1) << CpuFeatureAVX512BW)
-#endif
-#if defined __AVX512DQ__
- | (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
-#endif
-#if defined __AVX512VL__
- | (Q_UINT64_C(1) << CpuFeatureAVX512VL)
-#endif
-#if defined __AVX512IFMA__
- | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
-#endif
-#if defined __AVX512VBMI__
- | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
-#endif
-#if defined __AVX2__
- | (Q_UINT64_C(1) << CpuFeatureAVX2)
-#endif
-#if defined __AVX__
- | (Q_UINT64_C(1) << CpuFeatureAVX)
-#endif
-#if defined __SSE4_2__
- | (Q_UINT64_C(1) << CpuFeatureSSE4_2)
-#endif
-#if defined __SSE4_1__
- | (Q_UINT64_C(1) << CpuFeatureSSE4_1)
-#endif
-#if defined __SSSE3__
- | (Q_UINT64_C(1) << CpuFeatureSSSE3)
-#endif
-#if defined __SSE3__
- | (Q_UINT64_C(1) << CpuFeatureSSE3)
-#endif
-#if defined __SSE2__
- | (Q_UINT64_C(1) << CpuFeatureSSE2)
-#endif
#if defined __ARM_NEON__
| (Q_UINT64_C(1) << CpuFeatureNEON)
#endif
@@ -434,6 +295,7 @@ static const quint64 qCompilerCpuFeatures = 0
| (Q_UINT64_C(1) << CpuFeatureDSPR2)
#endif
;
+#endif
#ifdef QT_BOOTSTRAPPED
static inline quint64 qCpuFeatures()
diff --git a/src/corelib/tools/qsimd_x86.cpp b/src/corelib/tools/qsimd_x86.cpp
new file mode 100644
index 0000000000..8275f964d8
--- /dev/null
+++ b/src/corelib/tools/qsimd_x86.cpp
@@ -0,0 +1,98 @@
+// This is a generated file. DO NOT EDIT.
+// Please see util/x86simdgen/generate.pl
+#include <qglobal.h>
+
+static const char features_string[] =
+ " sse2\0"
+ " sse3\0"
+ " ssse3\0"
+ " fma\0"
+ " sse4.1\0"
+ " sse4.2\0"
+ " movbe\0"
+ " popcnt\0"
+ " aes\0"
+ " avx\0"
+ " f16c\0"
+ " rdrnd\0"
+ " bmi\0"
+ " hle\0"
+ " avx2\0"
+ " bmi2\0"
+ " rtm\0"
+ " avx512f\0"
+ " avx512dq\0"
+ " rdseed\0"
+ " avx512ifma\0"
+ " avx512pf\0"
+ " avx512er\0"
+ " avx512cd\0"
+ " sha\0"
+ " avx512bw\0"
+ " avx512vl\0"
+ " avx512vbmi\0"
+ " avx512vbmi2\0"
+ " gfni\0"
+ " vaes\0"
+ " avx512vnni\0"
+ " avx512bitalg\0"
+ " avx512vpopcntdq\0"
+ " avx5124nniw\0"
+ " avx5124fmaps\0"
+ "\0";
+
+static const quint16 features_indices[] = {
+ 306, 0, 6, 12, 19, 24, 32, 40,
+ 47, 55, 60, 65, 71, 78, 83, 88,
+ 94, 100, 105, 114, 124, 132, 144, 154,
+ 164, 174, 179, 189, 199, 211, 224, 230,
+ 236, 248, 262, 279, 292
+};
+
+enum X86CpuidLeaves {
+ Leaf1ECX,
+ Leaf1EDX,
+ Leaf7_0EBX,
+ Leaf7_0ECX,
+ Leaf7_0EDX,
+ X86CpuidMaxLeaf
+};
+
+static const quint8 x86_locators[] = {
+ Leaf1EDX*32 + 26, // sse2
+ Leaf1ECX*32 + 0, // sse3
+ Leaf1ECX*32 + 9, // ssse3
+ Leaf1ECX*32 + 12, // fma
+ Leaf1ECX*32 + 19, // sse4.1
+ Leaf1ECX*32 + 20, // sse4.2
+ Leaf1ECX*32 + 22, // movbe
+ Leaf1ECX*32 + 23, // popcnt
+ Leaf1ECX*32 + 25, // aes
+ Leaf1ECX*32 + 28, // avx
+ Leaf1ECX*32 + 29, // f16c
+ Leaf1ECX*32 + 30, // rdrnd
+ Leaf7_0EBX*32 + 3, // bmi
+ Leaf7_0EBX*32 + 4, // hle
+ Leaf7_0EBX*32 + 5, // avx2
+ Leaf7_0EBX*32 + 8, // bmi2
+ Leaf7_0EBX*32 + 11, // rtm
+ Leaf7_0EBX*32 + 16, // avx512f
+ Leaf7_0EBX*32 + 17, // avx512dq
+ Leaf7_0EBX*32 + 18, // rdseed
+ Leaf7_0EBX*32 + 21, // avx512ifma
+ Leaf7_0EBX*32 + 26, // avx512pf
+ Leaf7_0EBX*32 + 27, // avx512er
+ Leaf7_0EBX*32 + 28, // avx512cd
+ Leaf7_0EBX*32 + 29, // sha
+ Leaf7_0EBX*32 + 30, // avx512bw
+ Leaf7_0EBX*32 + 31, // avx512vl
+ Leaf7_0ECX*32 + 1, // avx512vbmi
+ Leaf7_0ECX*32 + 6, // avx512vbmi2
+ Leaf7_0ECX*32 + 8, // gfni
+ Leaf7_0ECX*32 + 9, // vaes
+ Leaf7_0ECX*32 + 11, // avx512vnni
+ Leaf7_0ECX*32 + 12, // avx512bitalg
+ Leaf7_0ECX*32 + 14, // avx512vpopcntdq
+ Leaf7_0EDX*32 + 2, // avx5124nniw
+ Leaf7_0EDX*32 + 3 // avx5124fmaps
+};
diff --git a/src/corelib/tools/qsimd_x86_p.h b/src/corelib/tools/qsimd_x86_p.h
new file mode 100644
index 0000000000..45d5f2895f
--- /dev/null
+++ b/src/corelib/tools/qsimd_x86_p.h
@@ -0,0 +1,227 @@
+// This is a generated file. DO NOT EDIT.
+// Please see util/x86simdgen/generate.pl
+#ifndef QSIMD_P_H
+# error "Please include <private/qsimd_p.h> instead"
+#endif
+#ifndef QSIMD_X86_P_H
+#define QSIMD_X86_P_H
+
+#include "qsimd_p.h"
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+QT_BEGIN_NAMESPACE
+
+// Macros for QT_FUNCTION_TARGET (for Clang and GCC)
+#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
+#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
+#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
+#define QT_FUNCTION_TARGET_STRING_FMA "fma"
+#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
+#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
+#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe"
+#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
+#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
+#define QT_FUNCTION_TARGET_STRING_AVX "avx"
+#define QT_FUNCTION_TARGET_STRING_F16C "f16c"
+#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
+#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
+#define QT_FUNCTION_TARGET_STRING_HLE "hle"
+#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
+#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
+#define QT_FUNCTION_TARGET_STRING_RTM "rtm"
+#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
+#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
+#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
+#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
+#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
+#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
+#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
+#define QT_FUNCTION_TARGET_STRING_SHA "sha"
+#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
+#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
+#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
+#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2"
+#define QT_FUNCTION_TARGET_STRING_GFNI "gfni"
+#define QT_FUNCTION_TARGET_STRING_VAES "vaes"
+#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni"
+#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg"
+#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq"
+#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw"
+#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps"
+
+enum CPUFeatures {
+ // in CPUID Leaf 1, EDX:
+ CpuFeatureSSE2 = 1,
+
+ // in CPUID Leaf 1, ECX:
+ CpuFeatureSSE3 = 2,
+ CpuFeatureSSSE3 = 3,
+ CpuFeatureFMA = 4,
+ CpuFeatureSSE4_1 = 5,
+ CpuFeatureSSE4_2 = 6,
+ CpuFeatureMOVBE = 7,
+ CpuFeaturePOPCNT = 8,
+ CpuFeatureAES = 9,
+ CpuFeatureAVX = 10,
+ CpuFeatureF16C = 11,
+ CpuFeatureRDRND = 12,
+
+ // in CPUID Leaf 7, Sub-leaf 0, EBX:
+ CpuFeatureBMI = 13,
+ CpuFeatureHLE = 14,
+ CpuFeatureAVX2 = 15,
+ CpuFeatureBMI2 = 16,
+ CpuFeatureRTM = 17,
+ CpuFeatureAVX512F = 18,
+ CpuFeatureAVX512DQ = 19,
+ CpuFeatureRDSEED = 20,
+ CpuFeatureAVX512IFMA = 21,
+ CpuFeatureAVX512PF = 22,
+ CpuFeatureAVX512ER = 23,
+ CpuFeatureAVX512CD = 24,
+ CpuFeatureSHA = 25,
+ CpuFeatureAVX512BW = 26,
+ CpuFeatureAVX512VL = 27,
+
+ // in CPUID Leaf 7, Sub-leaf 0, ECX:
+ CpuFeatureAVX512VBMI = 28,
+ CpuFeatureAVX512VBMI2 = 29,
+ CpuFeatureGFNI = 30,
+ CpuFeatureVAES = 31,
+ CpuFeatureAVX512VNNI = 32,
+ CpuFeatureAVX512BITALG = 33,
+ CpuFeatureAVX512VPOPCNTDQ = 34,
+
+ // in CPUID Leaf 7, Sub-leaf 0, EDX:
+ CpuFeatureAVX5124NNIW = 35,
+ CpuFeatureAVX5124FMAPS = 36,
+
+ // used only to indicate that the CPU detection was initialized
+ QSimdInitialized = 1
+};
+
+static const quint64 qCompilerCpuFeatures = 0
+#ifdef __SSE2__
+ | (Q_UINT64_C(1) << CpuFeatureSSE2)
+#endif
+#ifdef __SSE3__
+ | (Q_UINT64_C(1) << CpuFeatureSSE3)
+#endif
+#ifdef __SSSE3__
+ | (Q_UINT64_C(1) << CpuFeatureSSSE3)
+#endif
+#ifdef __FMA__
+ | (Q_UINT64_C(1) << CpuFeatureFMA)
+#endif
+#ifdef __SSE4_1__
+ | (Q_UINT64_C(1) << CpuFeatureSSE4_1)
+#endif
+#ifdef __SSE4_2__
+ | (Q_UINT64_C(1) << CpuFeatureSSE4_2)
+#endif
+#ifdef __MOVBE__
+ | (Q_UINT64_C(1) << CpuFeatureMOVBE)
+#endif
+#ifdef __POPCNT__
+ | (Q_UINT64_C(1) << CpuFeaturePOPCNT)
+#endif
+#ifdef __AES__
+ | (Q_UINT64_C(1) << CpuFeatureAES)
+#endif
+#ifdef __AVX__
+ | (Q_UINT64_C(1) << CpuFeatureAVX)
+#endif
+#ifdef __F16C__
+ | (Q_UINT64_C(1) << CpuFeatureF16C)
+#endif
+#ifdef __RDRND__
+ | (Q_UINT64_C(1) << CpuFeatureRDRND)
+#endif
+#ifdef __BMI__
+ | (Q_UINT64_C(1) << CpuFeatureBMI)
+#endif
+#ifdef __HLE__
+ | (Q_UINT64_C(1) << CpuFeatureHLE)
+#endif
+#ifdef __AVX2__
+ | (Q_UINT64_C(1) << CpuFeatureAVX2)
+#endif
+#ifdef __BMI2__
+ | (Q_UINT64_C(1) << CpuFeatureBMI2)
+#endif
+#ifdef __RTM__
+ | (Q_UINT64_C(1) << CpuFeatureRTM)
+#endif
+#ifdef __AVX512F__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512F)
+#endif
+#ifdef __AVX512DQ__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
+#endif
+#ifdef __RDSEED__
+ | (Q_UINT64_C(1) << CpuFeatureRDSEED)
+#endif
+#ifdef __AVX512IFMA__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
+#endif
+#ifdef __AVX512PF__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512PF)
+#endif
+#ifdef __AVX512ER__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512ER)
+#endif
+#ifdef __AVX512CD__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512CD)
+#endif
+#ifdef __SHA__
+ | (Q_UINT64_C(1) << CpuFeatureSHA)
+#endif
+#ifdef __AVX512BW__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512BW)
+#endif
+#ifdef __AVX512VL__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VL)
+#endif
+#ifdef __AVX512VBMI__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
+#endif
+#ifdef __AVX512VBMI2__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2)
+#endif
+#ifdef __GFNI__
+ | (Q_UINT64_C(1) << CpuFeatureGFNI)
+#endif
+#ifdef __VAES__
+ | (Q_UINT64_C(1) << CpuFeatureVAES)
+#endif
+#ifdef __AVX512VNNI__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VNNI)
+#endif
+#ifdef __AVX512BITALG__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512BITALG)
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ)
+#endif
+#ifdef __AVX5124NNIW__
+ | (Q_UINT64_C(1) << CpuFeatureAVX5124NNIW)
+#endif
+#ifdef __AVX5124FMAPS__
+ | (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS)
+#endif
+ ;
+
+QT_END_NAMESPACE
+
+#endif // QSIMD_X86_P_H
+