summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2017-09-17 12:39:35 -0700
committerThiago Macieira <thiago.macieira@intel.com>2018-05-05 06:20:07 +0000
commitcf63b0e1dfc0bf3d11a92c5bf82840ddb6bb22ac (patch)
treeb4aa1443bc4202f12dc1804606d3125851af8bc8
parent6e1190053d28877b2e90375056735429525e2ee7 (diff)
qsimd: add support for new x86 CPU features
This adds detection for: VAES, GFNI, AVX512VBMI2, AVX512VNNI, AVX512BITALG, AVX512VPOPCNTDQ, AVX512_4NNIW, AVX512_4FMAPS. These features were found in the "IntelĀ® Architecture Instruction Set Extensions and Future Features" manual, revision 30. This commit also adds support for RDPID (already in the main manual) and the Control-flow Enforcement Technology, which appears in a separate Intel paper. This new support was done by adding a new generator script so we don't have to maintain two tables in sync, one in qsimd.cpp with the feature names, and the other in qsimd_p.h. Since we now need a lot more bits, it's no longer worth keeping the two halves of the qt_cpu_features variable mostly similar to the main two CPUID results. This commit goes back to keeping things in order, like we used to prior to commit 6a8251a89b6a61258498f4af1ba7b3d5b7f7096c (Qt 5.6) At the time of this commit, GCC 8 has macros for AVX512VPOPCNTDQ, AVX512_4NNIW, AVX512_4FMAPS, AVX512VBMI2 and GFNI. Change-Id: I938b024e38bf4aac9154fffd14f7afae50faaa96 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> Reviewed-by: Lars Knoll <lars.knoll@qt.io>
-rw-r--r--config.tests/arch/arch.cpp34
-rw-r--r--header.MIT28
-rw-r--r--src/corelib/tools/qsimd.cpp252
-rw-r--r--src/corelib/tools/qsimd_p.h144
-rw-r--r--src/corelib/tools/qsimd_x86.cpp98
-rw-r--r--src/corelib/tools/qsimd_x86_p.h227
-rwxr-xr-xutil/x86simdgen/generate.pl192
-rw-r--r--util/x86simdgen/simd.txt37
8 files changed, 683 insertions, 329 deletions
diff --git a/config.tests/arch/arch.cpp b/config.tests/arch/arch.cpp
index bb3efec177..2be3630af8 100644
--- a/config.tests/arch/arch.cpp
+++ b/config.tests/arch/arch.cpp
@@ -115,6 +115,22 @@ const char msg2[] = "==Qt=magic=Qt== Sub-architecture:"
// AVX512 Vector Byte Manipulation Instructions, Intel processor codename "Cannonlake"
" avx512vbmi"
#endif
+#ifdef __AVX512VBMI2__
+// AVX512 Vector Byte Manipulation Instructions #2, Intel processor codename "Ice Lake"
+" avx512vbmi2"
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+// AVX512 Vector Population Count Double & Quad, Future Intel Xeon Phi processor codename "Knights Mill", Intel processor codename "Ice Lake"
+" avx512vpopcntdq"
+#endif
+#ifdef __AVX5124FMAPS__
+// AVX512 4-iteration Fused Multiply Accumulation Packed Single, Future Intel Xeon Phi processor codename "Knights Mill"
+" avx5124fmaps"
+#endif
+#ifdef __AVX5124VNNIW__
+// AVX512 4-iteration Vector Neural Network Instructions Word, Future Intel Xeon Phi processor codename "Knights Mill"
+" avx5124vnniw"
+#endif
#ifdef __BMI__
// Bit Manipulation Instructions 1, Intel Core 4th Generation ("Haswell"), AMD "Bulldozer 2"
" bmi"
@@ -145,6 +161,14 @@ const char msg2[] = "==Qt=magic=Qt== Sub-architecture:"
// rdfsgsbase, wrfsgsbase, Intel Core 3rd Generation ("Ivy Bridge")
" fsgsbase"
#endif
+#ifdef __GFNI__
+// Galois Field new instructions, Intel processor codename "Ice Lake"
+" gfni"
+#endif
+#ifdef __IBT__
+// Indirect Branch Tracking, Intel processor TBA
+" ibt"
+#endif
#ifdef __LWP__
// LWP instructions, AMD "Bulldozer"
" lwp"
@@ -186,6 +210,10 @@ const char msg2[] = "==Qt=magic=Qt== Sub-architecture:"
// Prefetch data for writing, Intel Core 5th Generation ("Broadwell")
" prfchw"
#endif
+#ifdef __RDPID__
+// Read Processor ID, Intel processors codename "Ice Lake" and "Goldmont Plus"
+" rdpid"
+#endif
#ifdef __RDRND__
// Random number generator, Intel Core 3rd Generation ("Ivy Bridge")
" rdrnd"
@@ -199,9 +227,13 @@ const char msg2[] = "==Qt=magic=Qt== Sub-architecture:"
" rtm"
#endif
#ifdef __SHA__
-// SHA-1 and SHA-256 instructions, Intel processor TBA
+// SHA-1 and SHA-256 instructions, Intel processors codename "Cannon Lake" and "Goldmont"
" sha"
#endif
+#ifdef __SHSTK__
+// Shadow stack, Intel processor TBA
+" shstk"
+#endif
#if defined(__SSE__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) || defined(_M_X64)
// Streaming SIMD Extensions, Intel Pentium III, AMD Athlon
" sse"
diff --git a/header.MIT b/header.MIT
new file mode 100644
index 0000000000..df431dda02
--- /dev/null
+++ b/header.MIT
@@ -0,0 +1,28 @@
+/****************************************************************************
+**
+** Copyright (C) YYYY Intel Corporation.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the FOO module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:MIT$
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and associated documentation files (the "Software"), to deal
+** in the Software without restriction, including without limitation the rights
+** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+** copies of the Software, and to permit persons to whom the Software is
+** furnished to do so, subject to the following conditions:
+**
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+** THE SOFTWARE.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index c4d7117449..fd9c6a7079 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2016 Intel Corporation.
+** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -80,6 +80,43 @@
QT_BEGIN_NAMESPACE
+/*
+ * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
+ * we remove the terminating -1 that the script adds.
+ */
+
+// begin generated
+#if defined(Q_PROCESSOR_ARM)
+/* Data:
+ neon
+ crc32
+ */
+static const char features_string[] =
+ " neon\0"
+ " crc32\0"
+ "\0";
+static const int features_indices[] = { 0, 6 };
+#elif defined(Q_PROCESSOR_MIPS)
+/* Data:
+ dsp
+ dspr2
+*/
+static const char features_string[] =
+ " dsp\0"
+ " dspr2\0"
+ "\0";
+
+static const int features_indices[] = {
+ 0, 5
+};
+#elif defined(Q_PROCESSOR_X86)
+# include "qsimd_x86.cpp" // generated by util/x86simdgen
+#else
+static const char features_string[] = "";
+static const int features_indices[] = { };
+#endif
+// end generated
+
#if defined (Q_OS_NACL)
static inline uint detectProcessorFeatures()
{
@@ -222,29 +259,32 @@ static void cpuidFeatures01(uint &ecx, uint &edx)
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
#endif
-static void cpuidFeatures07_00(uint &ebx, uint &ecx)
+static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
{
#if defined(Q_CC_GNU)
qregisteruint rbx; // in case it's 64-bit
qregisteruint rcx = 0;
+ qregisteruint rdx = 0;
asm ("xchg " PICreg", %0\n"
"cpuid\n"
"xchg " PICreg", %0\n"
- : "=&r" (rbx), "+&c" (rcx)
- : "a" (7)
- : "%edx");
+ : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
+ : "a" (7));
ebx = rbx;
ecx = rcx;
+ edx = rdx;
#elif defined(Q_OS_WIN)
int info[4];
__cpuidex(info, 7, 0);
ebx = info[1];
ecx = info[2];
+ edx = info[3];
#elif defined(Q_CC_GHS)
unsigned int info[4];
__CPUIDEX(7, 0, info);
ebx = info[1];
ecx = info[2];
+ edx = info[3];
#endif
}
@@ -285,8 +325,11 @@ static quint64 detectProcessorFeatures()
static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) |
(Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) |
(Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) |
- (Q_UINT64_C(1) << CpuFeatureAVX512VL) |
- (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
+ (Q_UINT64_C(1) << CpuFeatureAVX512VL) | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VBMI) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VNNI) | (Q_UINT64_C(1) << CpuFeatureAVX512BITALG) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ) |
+ (Q_UINT64_C(1) << CpuFeatureAVX5124NNIW) | (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS);
static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512;
static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2;
@@ -299,52 +342,33 @@ static quint64 detectProcessorFeatures()
Q_ASSERT(cpuidLevel >= 1);
#endif
- uint cpuid01ECX = 0, cpuid01EDX = 0;
- cpuidFeatures01(cpuid01ECX, cpuid01EDX);
-
- // the low 32-bits of features is cpuid01ECX
- // note: we need to check OS support for saving the AVX register state
- features = cpuid01ECX;
-
-#if defined(Q_PROCESSOR_X86_32)
- // x86 might not have SSE2 support
- if (cpuid01EDX & (1u << 26))
- features |= Q_UINT64_C(1) << CpuFeatureSSE2;
- else
- features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2);
- // we should verify that the OS enabled saving of the SSE state...
-#else
- // x86-64 or x32
- features |= Q_UINT64_C(1) << CpuFeatureSSE2;
-#endif
+ uint results[X86CpuidMaxLeaf] = {};
+ cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
+ if (cpuidLevel >= 7)
+ cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
+
+ // populate our feature list
+ for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
+ uint word = x86_locators[i] / 32;
+ uint bit = 1U << (x86_locators[i] % 32);
+ quint64 feature = Q_UINT64_C(1) << (i + 1);
+ if (results[word] & bit)
+ features |= feature;
+ }
+ // now check the AVX state
uint xgetbvA = 0, xgetbvD = 0;
- if (cpuid01ECX & (1u << 27)) {
+ if (results[Leaf1ECX] & (1u << 27)) {
// XGETBV enabled
xgetbv(0, xgetbvA, xgetbvD);
}
- uint cpuid0700EBX = 0;
- uint cpuid0700ECX = 0;
- if (cpuidLevel >= 7) {
- cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX);
-
- // the high 32-bits of features is cpuid0700EBX
- features |= quint64(cpuid0700EBX) << 32;
- }
-
if ((xgetbvA & AVXState) != AVXState) {
// support for YMM registers is disabled, disable all AVX
features &= ~AllAVX;
} else if ((xgetbvA & AVX512State) != AVX512State) {
// support for ZMM registers or mask registers is disabled, disable all AVX512
features &= ~AllAVX512;
- } else {
- // this feature is out of order
- if (cpuid0700ECX & (1u << 1))
- features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI;
- else
- features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
}
return features;
@@ -493,152 +517,6 @@ static inline uint detectProcessorFeatures()
}
#endif
-/*
- * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
- * that the x86 version has a lot of blanks that must be kept and that the
- * offset table's type is changed to make the table smaller. We also remove the
- * terminating -1 that the script adds.
- */
-
-// begin generated
-#if defined(Q_PROCESSOR_ARM)
-/* Data:
- neon
- crc32
- */
-static const char features_string[] =
- " neon\0"
- " crc32\0"
- "\0";
-static const int features_indices[] = { 0, 6 };
-#elif defined(Q_PROCESSOR_MIPS)
-/* Data:
- dsp
- dspr2
-*/
-static const char features_string[] =
- " dsp\0"
- " dspr2\0"
- "\0";
-
-static const int features_indices[] = {
- 0, 5
-};
-#elif defined(Q_PROCESSOR_X86)
-/* Data:
- sse3
- sse2
- avx512vbmi
-
-
-
-
-
-
- ssse3
-
-
- fma
- cmpxchg16b
-
-
-
-
-
- sse4.1
- sse4.2
-
- movbe
- popcnt
-
- aes
-
-
- avx
- f16c
- rdrand
-
-
-
-
- bmi
- hle
- avx2
-
-
- bmi2
-
-
- rtm
-
-
-
-
- avx512f
- avx512dq
- rdseed
-
-
- avx512ifma
-
-
-
-
- avx512pf
- avx512er
- avx512cd
- sha
- avx512bw
- avx512vl
- */
-static const char features_string[] =
- " sse3\0"
- " sse2\0"
- " avx512vbmi\0"
- " ssse3\0"
- " fma\0"
- " cmpxchg16b\0"
- " sse4.1\0"
- " sse4.2\0"
- " movbe\0"
- " popcnt\0"
- " aes\0"
- " avx\0"
- " f16c\0"
- " rdrand\0"
- " bmi\0"
- " hle\0"
- " avx2\0"
- " bmi2\0"
- " rtm\0"
- " avx512f\0"
- " avx512dq\0"
- " rdseed\0"
- " avx512ifma\0"
- " avx512pf\0"
- " avx512er\0"
- " avx512cd\0"
- " sha\0"
- " avx512bw\0"
- " avx512vl\0"
- "\0";
-
-static const quint8 features_indices[] = {
- 0, 6, 12, 5, 5, 5, 5, 5,
- 5, 24, 5, 5, 31, 36, 5, 5,
- 5, 5, 5, 48, 56, 5, 64, 71,
- 5, 79, 5, 5, 84, 89, 95, 5,
- 5, 5, 5, 103, 108, 113, 5, 5,
- 119, 5, 5, 125, 5, 5, 5, 5,
- 130, 139, 149, 5, 5, 157, 5, 5,
- 5, 5, 169, 179, 189, 199, 204, 214
-};
-#else
-static const char features_string[] = "";
-static const int features_indices[] = { };
-#endif
-// end generated
-
static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
// record what CPU features were enabled by default in this Qt build
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 3161ee7412..1b7ed57fa8 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -179,6 +179,7 @@
#ifdef Q_PROCESSOR_X86
/* -- x86 intrinsic support -- */
+# include "qsimd_x86_p.h"
# if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2)
// MSVC doesn't define __SSE2__, so do it ourselves
@@ -232,33 +233,6 @@
# define __RDRND__ 1
# endif
-#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
-#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
-#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
-#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
-#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
-#define QT_FUNCTION_TARGET_STRING_AVX "avx"
-#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
-#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
-#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
-#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
-#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
-#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
-#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
-#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
-#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
-#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
-
-#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
-#define QT_FUNCTION_TARGET_STRING_PCLMUL "pclmul,sse4.2"
-#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
-#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx"
-#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
-#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
-#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
-#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
-#define QT_FUNCTION_TARGET_STRING_SHA "sha"
-
#endif /* Q_PROCESSOR_X86 */
// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
@@ -292,6 +266,7 @@
QT_BEGIN_NAMESPACE
+#ifndef Q_PROCESSOR_X86
enum CPUFeatures {
#if defined(Q_PROCESSOR_ARM)
CpuFeatureNEON = 0,
@@ -300,42 +275,6 @@ enum CPUFeatures {
#elif defined(Q_PROCESSOR_MIPS)
CpuFeatureDSP = 0,
CpuFeatureDSPR2 = 1,
-#elif defined(Q_PROCESSOR_X86)
- // The order of the flags is jumbled so it matches most closely the bits in CPUID
- // Out of order:
- CpuFeatureSSE2 = 1, // uses the bit for PCLMULQDQ
- // in level 1, ECX
- CpuFeatureSSE3 = (0 + 0),
- CpuFeatureSSSE3 = (0 + 9),
- CpuFeatureSSE4_1 = (0 + 19),
- CpuFeatureSSE4_2 = (0 + 20),
- CpuFeatureMOVBE = (0 + 22),
- CpuFeaturePOPCNT = (0 + 23),
- CpuFeatureAES = (0 + 25),
- CpuFeatureAVX = (0 + 28),
- CpuFeatureF16C = (0 + 29),
- CpuFeatureRDRND = (0 + 30),
- // 31 is always zero and we've used it for the QSimdInitialized
-
- // in level 7, leaf 0, EBX
- CpuFeatureBMI = (32 + 3),
- CpuFeatureHLE = (32 + 4),
- CpuFeatureAVX2 = (32 + 5),
- CpuFeatureBMI2 = (32 + 8),
- CpuFeatureRTM = (32 + 11),
- CpuFeatureAVX512F = (32 + 16),
- CpuFeatureAVX512DQ = (32 + 17),
- CpuFeatureRDSEED = (32 + 18),
- CpuFeatureAVX512IFMA = (32 + 21),
- CpuFeatureAVX512PF = (32 + 26),
- CpuFeatureAVX512ER = (32 + 27),
- CpuFeatureAVX512CD = (32 + 28),
- CpuFeatureSHA = (32 + 29),
- CpuFeatureAVX512BW = (32 + 30),
- CpuFeatureAVX512VL = (32 + 31),
-
- // in level 7, leaf 0, ECX (out of order, for now)
- CpuFeatureAVX512VBMI = 2, // uses the bit for DTES64
#endif
// used only to indicate that the CPU detection was initialised
@@ -343,84 +282,6 @@ enum CPUFeatures {
};
static const quint64 qCompilerCpuFeatures = 0
-#if defined __SHA__
- | (Q_UINT64_C(1) << CpuFeatureSHA)
-#endif
-#if defined __AES__
- | (Q_UINT64_C(1) << CpuFeatureAES)
-#endif
-#if defined __RTM__
- | (Q_UINT64_C(1) << CpuFeatureRTM)
-#endif
-#ifdef __RDRND__
- | (Q_UINT64_C(1) << CpuFeatureRDRND)
-#endif
-#ifdef __RDSEED__
- | (Q_UINT64_C(1) << CpuFeatureRDSEED)
-#endif
-#if defined __BMI__
- | (Q_UINT64_C(1) << CpuFeatureBMI)
-#endif
-#if defined __BMI2__
- | (Q_UINT64_C(1) << CpuFeatureBMI2)
-#endif
-#if defined __F16C__
- | (Q_UINT64_C(1) << CpuFeatureF16C)
-#endif
-#if defined __POPCNT__
- | (Q_UINT64_C(1) << CpuFeaturePOPCNT)
-#endif
-#if defined __MOVBE__ // GCC and Clang don't seem to define this
- | (Q_UINT64_C(1) << CpuFeatureMOVBE)
-#endif
-#if defined __AVX512F__
- | (Q_UINT64_C(1) << CpuFeatureAVX512F)
-#endif
-#if defined __AVX512CD__
- | (Q_UINT64_C(1) << CpuFeatureAVX512CD)
-#endif
-#if defined __AVX512ER__
- | (Q_UINT64_C(1) << CpuFeatureAVX512ER)
-#endif
-#if defined __AVX512PF__
- | (Q_UINT64_C(1) << CpuFeatureAVX512PF)
-#endif
-#if defined __AVX512BW__
- | (Q_UINT64_C(1) << CpuFeatureAVX512BW)
-#endif
-#if defined __AVX512DQ__
- | (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
-#endif
-#if defined __AVX512VL__
- | (Q_UINT64_C(1) << CpuFeatureAVX512VL)
-#endif
-#if defined __AVX512IFMA__
- | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
-#endif
-#if defined __AVX512VBMI__
- | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
-#endif
-#if defined __AVX2__
- | (Q_UINT64_C(1) << CpuFeatureAVX2)
-#endif
-#if defined __AVX__
- | (Q_UINT64_C(1) << CpuFeatureAVX)
-#endif
-#if defined __SSE4_2__
- | (Q_UINT64_C(1) << CpuFeatureSSE4_2)
-#endif
-#if defined __SSE4_1__
- | (Q_UINT64_C(1) << CpuFeatureSSE4_1)
-#endif
-#if defined __SSSE3__
- | (Q_UINT64_C(1) << CpuFeatureSSSE3)
-#endif
-#if defined __SSE3__
- | (Q_UINT64_C(1) << CpuFeatureSSE3)
-#endif
-#if defined __SSE2__
- | (Q_UINT64_C(1) << CpuFeatureSSE2)
-#endif
#if defined __ARM_NEON__
| (Q_UINT64_C(1) << CpuFeatureNEON)
#endif
@@ -434,6 +295,7 @@ static const quint64 qCompilerCpuFeatures = 0
| (Q_UINT64_C(1) << CpuFeatureDSPR2)
#endif
;
+#endif
#ifdef QT_BOOTSTRAPPED
static inline quint64 qCpuFeatures()
diff --git a/src/corelib/tools/qsimd_x86.cpp b/src/corelib/tools/qsimd_x86.cpp
new file mode 100644
index 0000000000..8275f964d8
--- /dev/null
+++ b/src/corelib/tools/qsimd_x86.cpp
@@ -0,0 +1,98 @@
+// This is a generated file. DO NOT EDIT.
+// Please see util/x86simdgen/generate.pl
+#include <qglobal.h>
+
+static const char features_string[] =
+ " sse2\0"
+ " sse3\0"
+ " ssse3\0"
+ " fma\0"
+ " sse4.1\0"
+ " sse4.2\0"
+ " movbe\0"
+ " popcnt\0"
+ " aes\0"
+ " avx\0"
+ " f16c\0"
+ " rdrnd\0"
+ " bmi\0"
+ " hle\0"
+ " avx2\0"
+ " bmi2\0"
+ " rtm\0"
+ " avx512f\0"
+ " avx512dq\0"
+ " rdseed\0"
+ " avx512ifma\0"
+ " avx512pf\0"
+ " avx512er\0"
+ " avx512cd\0"
+ " sha\0"
+ " avx512bw\0"
+ " avx512vl\0"
+ " avx512vbmi\0"
+ " avx512vbmi2\0"
+ " gfni\0"
+ " vaes\0"
+ " avx512vnni\0"
+ " avx512bitalg\0"
+ " avx512vpopcntdq\0"
+ " avx5124nniw\0"
+ " avx5124fmaps\0"
+ "\0";
+
+static const quint16 features_indices[] = {
+ 306, 0, 6, 12, 19, 24, 32, 40,
+ 47, 55, 60, 65, 71, 78, 83, 88,
+ 94, 100, 105, 114, 124, 132, 144, 154,
+ 164, 174, 179, 189, 199, 211, 224, 230,
+ 236, 248, 262, 279, 292
+};
+
+enum X86CpuidLeaves {
+ Leaf1ECX,
+ Leaf1EDX,
+ Leaf7_0EBX,
+ Leaf7_0ECX,
+ Leaf7_0EDX,
+ X86CpuidMaxLeaf
+};
+
+static const quint8 x86_locators[] = {
+ Leaf1EDX*32 + 26, // sse2
+ Leaf1ECX*32 + 0, // sse3
+ Leaf1ECX*32 + 9, // ssse3
+ Leaf1ECX*32 + 12, // fma
+ Leaf1ECX*32 + 19, // sse4.1
+ Leaf1ECX*32 + 20, // sse4.2
+ Leaf1ECX*32 + 22, // movbe
+ Leaf1ECX*32 + 23, // popcnt
+ Leaf1ECX*32 + 25, // aes
+ Leaf1ECX*32 + 28, // avx
+ Leaf1ECX*32 + 29, // f16c
+ Leaf1ECX*32 + 30, // rdrnd
+ Leaf7_0EBX*32 + 3, // bmi
+ Leaf7_0EBX*32 + 4, // hle
+ Leaf7_0EBX*32 + 5, // avx2
+ Leaf7_0EBX*32 + 8, // bmi2
+ Leaf7_0EBX*32 + 11, // rtm
+ Leaf7_0EBX*32 + 16, // avx512f
+ Leaf7_0EBX*32 + 17, // avx512dq
+ Leaf7_0EBX*32 + 18, // rdseed
+ Leaf7_0EBX*32 + 21, // avx512ifma
+ Leaf7_0EBX*32 + 26, // avx512pf
+ Leaf7_0EBX*32 + 27, // avx512er
+ Leaf7_0EBX*32 + 28, // avx512cd
+ Leaf7_0EBX*32 + 29, // sha
+ Leaf7_0EBX*32 + 30, // avx512bw
+ Leaf7_0EBX*32 + 31, // avx512vl
+ Leaf7_0ECX*32 + 1, // avx512vbmi
+ Leaf7_0ECX*32 + 6, // avx512vbmi2
+ Leaf7_0ECX*32 + 8, // gfni
+ Leaf7_0ECX*32 + 9, // vaes
+ Leaf7_0ECX*32 + 11, // avx512vnni
+ Leaf7_0ECX*32 + 12, // avx512bitalg
+ Leaf7_0ECX*32 + 14, // avx512vpopcntdq
+ Leaf7_0EDX*32 + 2, // avx5124nniw
+ Leaf7_0EDX*32 + 3 // avx5124fmaps
+};
diff --git a/src/corelib/tools/qsimd_x86_p.h b/src/corelib/tools/qsimd_x86_p.h
new file mode 100644
index 0000000000..45d5f2895f
--- /dev/null
+++ b/src/corelib/tools/qsimd_x86_p.h
@@ -0,0 +1,227 @@
+// This is a generated file. DO NOT EDIT.
+// Please see util/x86simdgen/generate.pl
+#ifndef QSIMD_P_H
+# error "Please include <private/qsimd_p.h> instead"
+#endif
+#ifndef QSIMD_X86_P_H
+#define QSIMD_X86_P_H
+
+#include "qsimd_p.h"
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+QT_BEGIN_NAMESPACE
+
+// Macros for QT_FUNCTION_TARGET (for Clang and GCC)
+#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
+#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
+#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
+#define QT_FUNCTION_TARGET_STRING_FMA "fma"
+#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
+#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
+#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe"
+#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
+#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
+#define QT_FUNCTION_TARGET_STRING_AVX "avx"
+#define QT_FUNCTION_TARGET_STRING_F16C "f16c"
+#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
+#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
+#define QT_FUNCTION_TARGET_STRING_HLE "hle"
+#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
+#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
+#define QT_FUNCTION_TARGET_STRING_RTM "rtm"
+#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
+#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
+#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
+#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
+#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
+#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
+#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
+#define QT_FUNCTION_TARGET_STRING_SHA "sha"
+#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
+#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
+#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
+#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2"
+#define QT_FUNCTION_TARGET_STRING_GFNI "gfni"
+#define QT_FUNCTION_TARGET_STRING_VAES "vaes"
+#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni"
+#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg"
+#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq"
+#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw"
+#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps"
+
+enum CPUFeatures {
+ // in CPUID Leaf 1, EDX:
+ CpuFeatureSSE2 = 1,
+
+ // in CPUID Leaf 1, ECX:
+ CpuFeatureSSE3 = 2,
+ CpuFeatureSSSE3 = 3,
+ CpuFeatureFMA = 4,
+ CpuFeatureSSE4_1 = 5,
+ CpuFeatureSSE4_2 = 6,
+ CpuFeatureMOVBE = 7,
+ CpuFeaturePOPCNT = 8,
+ CpuFeatureAES = 9,
+ CpuFeatureAVX = 10,
+ CpuFeatureF16C = 11,
+ CpuFeatureRDRND = 12,
+
+ // in CPUID Leaf 7, Sub-leaf 0, EBX:
+ CpuFeatureBMI = 13,
+ CpuFeatureHLE = 14,
+ CpuFeatureAVX2 = 15,
+ CpuFeatureBMI2 = 16,
+ CpuFeatureRTM = 17,
+ CpuFeatureAVX512F = 18,
+ CpuFeatureAVX512DQ = 19,
+ CpuFeatureRDSEED = 20,
+ CpuFeatureAVX512IFMA = 21,
+ CpuFeatureAVX512PF = 22,
+ CpuFeatureAVX512ER = 23,
+ CpuFeatureAVX512CD = 24,
+ CpuFeatureSHA = 25,
+ CpuFeatureAVX512BW = 26,
+ CpuFeatureAVX512VL = 27,
+
+ // in CPUID Leaf 7, Sub-leaf 0, ECX:
+ CpuFeatureAVX512VBMI = 28,
+ CpuFeatureAVX512VBMI2 = 29,
+ CpuFeatureGFNI = 30,
+ CpuFeatureVAES = 31,
+ CpuFeatureAVX512VNNI = 32,
+ CpuFeatureAVX512BITALG = 33,
+ CpuFeatureAVX512VPOPCNTDQ = 34,
+
+ // in CPUID Leaf 7, Sub-leaf 0, EDX:
+ CpuFeatureAVX5124NNIW = 35,
+ CpuFeatureAVX5124FMAPS = 36,
+
+ // used only to indicate that the CPU detection was initialized
+ QSimdInitialized = 1
+};
+
+static const quint64 qCompilerCpuFeatures = 0
+#ifdef __SSE2__
+ | (Q_UINT64_C(1) << CpuFeatureSSE2)
+#endif
+#ifdef __SSE3__
+ | (Q_UINT64_C(1) << CpuFeatureSSE3)
+#endif
+#ifdef __SSSE3__
+ | (Q_UINT64_C(1) << CpuFeatureSSSE3)
+#endif
+#ifdef __FMA__
+ | (Q_UINT64_C(1) << CpuFeatureFMA)
+#endif
+#ifdef __SSE4_1__
+ | (Q_UINT64_C(1) << CpuFeatureSSE4_1)
+#endif
+#ifdef __SSE4_2__
+ | (Q_UINT64_C(1) << CpuFeatureSSE4_2)
+#endif
+#ifdef __MOVBE__
+ | (Q_UINT64_C(1) << CpuFeatureMOVBE)
+#endif
+#ifdef __POPCNT__
+ | (Q_UINT64_C(1) << CpuFeaturePOPCNT)
+#endif
+#ifdef __AES__
+ | (Q_UINT64_C(1) << CpuFeatureAES)
+#endif
+#ifdef __AVX__
+ | (Q_UINT64_C(1) << CpuFeatureAVX)
+#endif
+#ifdef __F16C__
+ | (Q_UINT64_C(1) << CpuFeatureF16C)
+#endif
+#ifdef __RDRND__
+ | (Q_UINT64_C(1) << CpuFeatureRDRND)
+#endif
+#ifdef __BMI__
+ | (Q_UINT64_C(1) << CpuFeatureBMI)
+#endif
+#ifdef __HLE__
+ | (Q_UINT64_C(1) << CpuFeatureHLE)
+#endif
+#ifdef __AVX2__
+ | (Q_UINT64_C(1) << CpuFeatureAVX2)
+#endif
+#ifdef __BMI2__
+ | (Q_UINT64_C(1) << CpuFeatureBMI2)
+#endif
+#ifdef __RTM__
+ | (Q_UINT64_C(1) << CpuFeatureRTM)
+#endif
+#ifdef __AVX512F__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512F)
+#endif
+#ifdef __AVX512DQ__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
+#endif
+#ifdef __RDSEED__
+ | (Q_UINT64_C(1) << CpuFeatureRDSEED)
+#endif
+#ifdef __AVX512IFMA__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
+#endif
+#ifdef __AVX512PF__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512PF)
+#endif
+#ifdef __AVX512ER__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512ER)
+#endif
+#ifdef __AVX512CD__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512CD)
+#endif
+#ifdef __SHA__
+ | (Q_UINT64_C(1) << CpuFeatureSHA)
+#endif
+#ifdef __AVX512BW__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512BW)
+#endif
+#ifdef __AVX512VL__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VL)
+#endif
+#ifdef __AVX512VBMI__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
+#endif
+#ifdef __AVX512VBMI2__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2)
+#endif
+#ifdef __GFNI__
+ | (Q_UINT64_C(1) << CpuFeatureGFNI)
+#endif
+#ifdef __VAES__
+ | (Q_UINT64_C(1) << CpuFeatureVAES)
+#endif
+#ifdef __AVX512VNNI__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VNNI)
+#endif
+#ifdef __AVX512BITALG__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512BITALG)
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ)
+#endif
+#ifdef __AVX5124NNIW__
+ | (Q_UINT64_C(1) << CpuFeatureAVX5124NNIW)
+#endif
+#ifdef __AVX5124FMAPS__
+ | (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS)
+#endif
+ ;
+
+QT_END_NAMESPACE
+
+#endif // QSIMD_X86_P_H
+
diff --git a/util/x86simdgen/generate.pl b/util/x86simdgen/generate.pl
new file mode 100755
index 0000000000..572891d483
--- /dev/null
+++ b/util/x86simdgen/generate.pl
@@ -0,0 +1,192 @@
+#!/usr/bin/env perl
+#############################################################################
+##
+## Copyright (C) 2018 Intel Corporation.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the build configuration tools of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:MIT$
+## Permission is hereby granted, free of charge, to any person obtaining a copy
+## of this software and associated documentation files (the "Software"), to deal
+## in the Software without restriction, including without limitation the rights
+## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+## copies of the Software, and to permit persons to whom the Software is
+## furnished to do so, subject to the following conditions:
+##
+## The above copyright notice and this permission notice shall be included in
+## all copies or substantial portions of the Software.
+##
+## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+## THE SOFTWARE.
+## $QT_END_LICENSE$
+##
+#############################################################################
+
+use strict;
+$\ = "\n";
+$/ = "\n";
+my %leaves = (
+ Leaf1EDX => "CPUID Leaf 1, EDX",
+ Leaf1ECX => "CPUID Leaf 1, ECX",
+ Leaf7_0EBX => "CPUID Leaf 7, Sub-leaf 0, EBX",
+ Leaf7_0ECX => "CPUID Leaf 7, Sub-leaf 0, ECX",
+ Leaf7_0EDX => "CPUID Leaf 7, Sub-leaf 0, EDX",
+);
+my @leafNames = sort keys %leaves;
+
+# Read data from stdin
+my $i = 1;
+my @features;
+while (<STDIN>) {
+ s/#.*$//;
+ chomp;
+ next if $_ eq "";
+
+ my ($name, $function, $bit, $depends) = split /\s+/;
+ die("Unknown CPUID function \"$function\"")
+ unless grep $function, @leafNames;
+
+ my $id = uc($name);
+ $id =~ s/[^A-Z0-9_]/_/g;
+ push @features,
+ { name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function };
+ ++$i;
+}
+
+if (my $h = shift @ARGV) {
+ open HEADER, ">", $h;
+ select HEADER;
+}
+
+# Print the qsimd_x86_p.h output
+print q{// This is a generated file. DO NOT EDIT.
+// Please see util/x86simdgen/generate.pl";
+#ifndef QSIMD_P_H
+# error "Please include <private/qsimd_p.h> instead"
+#endif
+#ifndef QSIMD_X86_P_H
+#define QSIMD_X86_P_H
+
+#include "qsimd_p.h"
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+QT_BEGIN_NAMESPACE
+
+// Macros for QT_FUNCTION_TARGET (for Clang and GCC)};
+
+# #Define the feature string names for Clang and GCC
+for my $feature (@features) {
+ my $str = $feature->{name};
+ $str .= ",$feature->{depends}" if defined($feature->{depends});
+ printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n",
+ $feature->{id}, $str;
+}
+
+# Print the enum
+printf "\nenum CPUFeatures {";
+my $lastleaf;
+for (my $i = 0; $i < scalar @features; ++$i) {
+ my $feature = $features[$i];
+ # Leaf header:
+ printf "\n // in %s:\n", $leaves{$feature->{leaf}}
+ if $feature->{leaf} ne $lastleaf;
+ $lastleaf = $feature->{leaf};
+
+ # Feature
+ printf " CpuFeature%-13s = %d,\n", $feature->{id}, $i + 1;
+}
+
+print q{
+ // used only to indicate that the CPU detection was initialized
+ QSimdInitialized = 1
+\};
+
+static const quint64 qCompilerCpuFeatures = 0};
+
+# And print the compiler-enabled features part:
+for my $feature (@features) {
+ printf
+ "#ifdef __%s__\n" .
+ " | (Q_UINT64_C(1) << CpuFeature%s)\n" .
+ "#endif\n",
+ $feature->{id}, $feature->{id};
+}
+
+print q{ ;
+
+QT_END_NAMESPACE
+
+#endif // QSIMD_X86_P_H
+};
+
+if (my $cpp = shift @ARGV) {
+ open CPP, ">", $cpp;
+ select CPP;
+} else {
+ print q{
+
+---- cut here, paste the rest into qsimd_x86.cpp ---
+
+
+};
+};
+
+print "// This is a generated file. DO NOT EDIT.";
+print "// Please see util/x86simdgen/generate.pl";
+print "#include <qglobal.h>";
+print "";
+
+# Now generate the string table and bit-location array
+my $offset = 0;
+my @offsets;
+print "static const char features_string[] =";
+for my $feature (@features) {
+ print " \" $feature->{name}\\0\"";
+ push @offsets, $offset;
+ $offset += 2 + length($feature->{name});
+}
+print " \"\\0\";";
+
+# Print the string offset table
+printf "\nstatic const %s features_indices[] = {\n %3d",
+ $offset > 255 ? "quint16" : "quint8", $offset;
+for (my $j = 0; $j < scalar @offsets; ++$j) {
+ printf ",%s%3d",
+ ($j + 1) % 8 ? " " : "\n ", $offsets[$j];
+}
+print "\n};";
+
+# Print the locator enum and table
+print "\nenum X86CpuidLeaves {";
+map { print " $_," } @leafNames;
+print " X86CpuidMaxLeaf\n};";
+
+my $type = scalar %leaves > 8 ? "quint16" : "quint8";
+printf "\nstatic const %s x86_locators[] = {",
+ $type, $type;
+my $lastname;
+for (my $j = 0; $j < scalar @features; ++$j) {
+ my $feature = $features[$j];
+ printf ", // %s", $lastname
+ if defined($lastname);
+ printf "\n %s*32 + %2d",
+ $feature->{leaf}, $feature->{bit};
+ $lastname = $feature->{name};
+}
+print " // $lastname\n};";
diff --git a/util/x86simdgen/simd.txt b/util/x86simdgen/simd.txt
new file mode 100644
index 0000000000..1fce7b9497
--- /dev/null
+++ b/util/x86simdgen/simd.txt
@@ -0,0 +1,37 @@
+# Feature CPUID function Bit Required feature
+sse2 Leaf1EDX 26
+sse3 Leaf1ECX 0
+ssse3 Leaf1ECX 9
+fma Leaf1ECX 12
+sse4.1 Leaf1ECX 19
+sse4.2 Leaf1ECX 20
+movbe Leaf1ECX 22
+popcnt Leaf1ECX 23
+aes Leaf1ECX 25 sse4.2
+avx Leaf1ECX 28
+f16c Leaf1ECX 29
+rdrnd Leaf1ECX 30
+bmi Leaf7_0EBX 3
+hle Leaf7_0EBX 4
+avx2 Leaf7_0EBX 5
+bmi2 Leaf7_0EBX 8
+rtm Leaf7_0EBX 11
+avx512f Leaf7_0EBX 16
+avx512dq Leaf7_0EBX 17
+rdseed Leaf7_0EBX 18
+avx512ifma Leaf7_0EBX 21
+avx512pf Leaf7_0EBX 26
+avx512er Leaf7_0EBX 27
+avx512cd Leaf7_0EBX 28
+sha Leaf7_0EBX 29
+avx512bw Leaf7_0EBX 30
+avx512vl Leaf7_0EBX 31
+avx512vbmi Leaf7_0ECX 1
+avx512vbmi2 Leaf7_0ECX 6
+gfni Leaf7_0ECX 8
+vaes Leaf7_0ECX 9
+avx512vnni Leaf7_0ECX 11
+avx512bitalg Leaf7_0ECX 12
+avx512vpopcntdq Leaf7_0ECX 14
+avx5124nniw Leaf7_0EDX 2
+avx5124fmaps Leaf7_0EDX 3