summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config.tests/arch/arch.cpp34
-rw-r--r--header.MIT28
-rw-r--r--src/corelib/tools/qsimd.cpp252
-rw-r--r--src/corelib/tools/qsimd_p.h144
-rw-r--r--src/corelib/tools/qsimd_x86.cpp98
-rw-r--r--src/corelib/tools/qsimd_x86_p.h227
-rwxr-xr-xutil/x86simdgen/generate.pl192
-rw-r--r--util/x86simdgen/simd.txt37
8 files changed, 683 insertions, 329 deletions
diff --git a/config.tests/arch/arch.cpp b/config.tests/arch/arch.cpp
index bb3efec177..2be3630af8 100644
--- a/config.tests/arch/arch.cpp
+++ b/config.tests/arch/arch.cpp
@@ -115,6 +115,22 @@ const char msg2[] = "==Qt=magic=Qt== Sub-architecture:"
// AVX512 Vector Byte Manipulation Instructions, Intel processor codename "Cannonlake"
" avx512vbmi"
#endif
+#ifdef __AVX512VBMI2__
+// AVX512 Vector Byte Manipulation Instructions #2, Intel processor codename "Ice Lake"
+" avx512vbmi2"
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+// AVX512 Vector Population Count Double & Quad, Future Intel Xeon Phi processor codename "Knights Mill", Intel processor codename "Ice Lake"
+" avx512vpopcntdq"
+#endif
+#ifdef __AVX5124FMAPS__
+// AVX512 4-iteration Fused Multiply Accumulation Packed Single, Future Intel Xeon Phi processor codename "Knights Mill"
+" avx5124fmaps"
+#endif
+#ifdef __AVX5124VNNIW__
+// AVX512 4-iteration Vector Neural Network Instructions Word, Future Intel Xeon Phi processor codename "Knights Mill"
+" avx5124vnniw"
+#endif
#ifdef __BMI__
// Bit Manipulation Instructions 1, Intel Core 4th Generation ("Haswell"), AMD "Bulldozer 2"
" bmi"
@@ -145,6 +161,14 @@ const char msg2[] = "==Qt=magic=Qt== Sub-architecture:"
// rdfsgsbase, wrfsgsbase, Intel Core 3rd Generation ("Ivy Bridge")
" fsgsbase"
#endif
+#ifdef __GFNI__
+// Galois Field new instructions, Intel processor codename "Ice Lake"
+" gfni"
+#endif
+#ifdef __IBT__
+// Indirect Branch Tracking, Intel processor TBA
+" ibt"
+#endif
#ifdef __LWP__
// LWP instructions, AMD "Bulldozer"
" lwp"
@@ -186,6 +210,10 @@ const char msg2[] = "==Qt=magic=Qt== Sub-architecture:"
// Prefetch data for writing, Intel Core 5th Generation ("Broadwell")
" prfchw"
#endif
+#ifdef __RDPID__
+// Read Processor ID, Intel processors codename "Ice Lake" and "Goldmont Plus"
+" rdpid"
+#endif
#ifdef __RDRND__
// Random number generator, Intel Core 3rd Generation ("Ivy Bridge")
" rdrnd"
@@ -199,9 +227,13 @@ const char msg2[] = "==Qt=magic=Qt== Sub-architecture:"
" rtm"
#endif
#ifdef __SHA__
-// SHA-1 and SHA-256 instructions, Intel processor TBA
+// SHA-1 and SHA-256 instructions, Intel processors codename "Cannon Lake" and "Goldmont"
" sha"
#endif
+#ifdef __SHSTK__
+// Shadow stack, Intel processor TBA
+" shstk"
+#endif
#if defined(__SSE__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) || defined(_M_X64)
// Streaming SIMD Extensions, Intel Pentium III, AMD Athlon
" sse"
diff --git a/header.MIT b/header.MIT
new file mode 100644
index 0000000000..df431dda02
--- /dev/null
+++ b/header.MIT
@@ -0,0 +1,28 @@
+/****************************************************************************
+**
+** Copyright (C) YYYY Intel Corporation.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the FOO module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:MIT$
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and associated documentation files (the "Software"), to deal
+** in the Software without restriction, including without limitation the rights
+** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+** copies of the Software, and to permit persons to whom the Software is
+** furnished to do so, subject to the following conditions:
+**
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+** THE SOFTWARE.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index c4d7117449..fd9c6a7079 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2016 Intel Corporation.
+** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -80,6 +80,43 @@
QT_BEGIN_NAMESPACE
+/*
+ * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
+ * we remove the terminating -1 that the script adds.
+ */
+
+// begin generated
+#if defined(Q_PROCESSOR_ARM)
+/* Data:
+ neon
+ crc32
+ */
+static const char features_string[] =
+ " neon\0"
+ " crc32\0"
+ "\0";
+static const int features_indices[] = { 0, 6 };
+#elif defined(Q_PROCESSOR_MIPS)
+/* Data:
+ dsp
+ dspr2
+*/
+static const char features_string[] =
+ " dsp\0"
+ " dspr2\0"
+ "\0";
+
+static const int features_indices[] = {
+ 0, 5
+};
+#elif defined(Q_PROCESSOR_X86)
+# include "qsimd_x86.cpp" // generated by util/x86simdgen
+#else
+static const char features_string[] = "";
+static const int features_indices[] = { };
+#endif
+// end generated
+
#if defined (Q_OS_NACL)
static inline uint detectProcessorFeatures()
{
@@ -222,29 +259,32 @@ static void cpuidFeatures01(uint &ecx, uint &edx)
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
#endif
-static void cpuidFeatures07_00(uint &ebx, uint &ecx)
+static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
{
#if defined(Q_CC_GNU)
qregisteruint rbx; // in case it's 64-bit
qregisteruint rcx = 0;
+ qregisteruint rdx = 0;
asm ("xchg " PICreg", %0\n"
"cpuid\n"
"xchg " PICreg", %0\n"
- : "=&r" (rbx), "+&c" (rcx)
- : "a" (7)
- : "%edx");
+ : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
+ : "a" (7));
ebx = rbx;
ecx = rcx;
+ edx = rdx;
#elif defined(Q_OS_WIN)
int info[4];
__cpuidex(info, 7, 0);
ebx = info[1];
ecx = info[2];
+ edx = info[3];
#elif defined(Q_CC_GHS)
unsigned int info[4];
__CPUIDEX(7, 0, info);
ebx = info[1];
ecx = info[2];
+ edx = info[3];
#endif
}
@@ -285,8 +325,11 @@ static quint64 detectProcessorFeatures()
static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) |
(Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) |
(Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) |
- (Q_UINT64_C(1) << CpuFeatureAVX512VL) |
- (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
+ (Q_UINT64_C(1) << CpuFeatureAVX512VL) | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VBMI) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VNNI) | (Q_UINT64_C(1) << CpuFeatureAVX512BITALG) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ) |
+ (Q_UINT64_C(1) << CpuFeatureAVX5124NNIW) | (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS);
static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512;
static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2;
@@ -299,52 +342,33 @@ static quint64 detectProcessorFeatures()
Q_ASSERT(cpuidLevel >= 1);
#endif
- uint cpuid01ECX = 0, cpuid01EDX = 0;
- cpuidFeatures01(cpuid01ECX, cpuid01EDX);
-
- // the low 32-bits of features is cpuid01ECX
- // note: we need to check OS support for saving the AVX register state
- features = cpuid01ECX;
-
-#if defined(Q_PROCESSOR_X86_32)
- // x86 might not have SSE2 support
- if (cpuid01EDX & (1u << 26))
- features |= Q_UINT64_C(1) << CpuFeatureSSE2;
- else
- features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2);
- // we should verify that the OS enabled saving of the SSE state...
-#else
- // x86-64 or x32
- features |= Q_UINT64_C(1) << CpuFeatureSSE2;
-#endif
+ uint results[X86CpuidMaxLeaf] = {};
+ cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
+ if (cpuidLevel >= 7)
+ cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
+
+ // populate our feature list
+ for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
+ uint word = x86_locators[i] / 32;
+ uint bit = 1U << (x86_locators[i] % 32);
+ quint64 feature = Q_UINT64_C(1) << (i + 1);
+ if (results[word] & bit)
+ features |= feature;
+ }
+ // now check the AVX state
uint xgetbvA = 0, xgetbvD = 0;
- if (cpuid01ECX & (1u << 27)) {
+ if (results[Leaf1ECX] & (1u << 27)) {
// XGETBV enabled
xgetbv(0, xgetbvA, xgetbvD);
}
- uint cpuid0700EBX = 0;
- uint cpuid0700ECX = 0;
- if (cpuidLevel >= 7) {
- cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX);
-
- // the high 32-bits of features is cpuid0700EBX
- features |= quint64(cpuid0700EBX) << 32;
- }
-
if ((xgetbvA & AVXState) != AVXState) {
// support for YMM registers is disabled, disable all AVX
features &= ~AllAVX;
} else if ((xgetbvA & AVX512State) != AVX512State) {
// support for ZMM registers or mask registers is disabled, disable all AVX512
features &= ~AllAVX512;
- } else {
- // this feature is out of order
- if (cpuid0700ECX & (1u << 1))
- features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI;
- else
- features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
}
return features;
@@ -493,152 +517,6 @@ static inline uint detectProcessorFeatures()
}
#endif
-/*
- * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
- * that the x86 version has a lot of blanks that must be kept and that the
- * offset table's type is changed to make the table smaller. We also remove the
- * terminating -1 that the script adds.
- */
-
-// begin generated
-#if defined(Q_PROCESSOR_ARM)
-/* Data:
- neon
- crc32
- */
-static const char features_string[] =
- " neon\0"
- " crc32\0"
- "\0";
-static const int features_indices[] = { 0, 6 };
-#elif defined(Q_PROCESSOR_MIPS)
-/* Data:
- dsp
- dspr2
-*/
-static const char features_string[] =
- " dsp\0"
- " dspr2\0"
- "\0";
-
-static const int features_indices[] = {
- 0, 5
-};
-#elif defined(Q_PROCESSOR_X86)
-/* Data:
- sse3
- sse2
- avx512vbmi
-
-
-
-
-
-
- ssse3
-
-
- fma
- cmpxchg16b
-
-
-
-
-
- sse4.1
- sse4.2
-
- movbe
- popcnt
-
- aes
-
-
- avx
- f16c
- rdrand
-
-
-
-
- bmi
- hle
- avx2
-
-
- bmi2
-
-
- rtm
-
-
-
-
- avx512f
- avx512dq
- rdseed
-
-
- avx512ifma
-
-
-
-
- avx512pf
- avx512er
- avx512cd
- sha
- avx512bw
- avx512vl
- */
-static const char features_string[] =
- " sse3\0"
- " sse2\0"
- " avx512vbmi\0"
- " ssse3\0"
- " fma\0"
- " cmpxchg16b\0"
- " sse4.1\0"
- " sse4.2\0"
- " movbe\0"
- " popcnt\0"
- " aes\0"
- " avx\0"
- " f16c\0"
- " rdrand\0"
- " bmi\0"
- " hle\0"
- " avx2\0"
- " bmi2\0"
- " rtm\0"
- " avx512f\0"
- " avx512dq\0"
- " rdseed\0"
- " avx512ifma\0"
- " avx512pf\0"
- " avx512er\0"
- " avx512cd\0"
- " sha\0"
- " avx512bw\0"
- " avx512vl\0"
- "\0";
-
-static const quint8 features_indices[] = {
- 0, 6, 12, 5, 5, 5, 5, 5,
- 5, 24, 5, 5, 31, 36, 5, 5,
- 5, 5, 5, 48, 56, 5, 64, 71,
- 5, 79, 5, 5, 84, 89, 95, 5,
- 5, 5, 5, 103, 108, 113, 5, 5,
- 119, 5, 5, 125, 5, 5, 5, 5,
- 130, 139, 149, 5, 5, 157, 5, 5,
- 5, 5, 169, 179, 189, 199, 204, 214
-};
-#else
-static const char features_string[] = "";
-static const int features_indices[] = { };
-#endif
-// end generated
-
static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
// record what CPU features were enabled by default in this Qt build
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 3161ee7412..1b7ed57fa8 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -179,6 +179,7 @@
#ifdef Q_PROCESSOR_X86
/* -- x86 intrinsic support -- */
+# include "qsimd_x86_p.h"
# if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2)
// MSVC doesn't define __SSE2__, so do it ourselves
@@ -232,33 +233,6 @@
# define __RDRND__ 1
# endif
-#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
-#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
-#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
-#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
-#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
-#define QT_FUNCTION_TARGET_STRING_AVX "avx"
-#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
-#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
-#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
-#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
-#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
-#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
-#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
-#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
-#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
-#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
-
-#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
-#define QT_FUNCTION_TARGET_STRING_PCLMUL "pclmul,sse4.2"
-#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
-#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx"
-#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
-#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
-#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
-#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
-#define QT_FUNCTION_TARGET_STRING_SHA "sha"
-
#endif /* Q_PROCESSOR_X86 */
// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
@@ -292,6 +266,7 @@
QT_BEGIN_NAMESPACE
+#ifndef Q_PROCESSOR_X86
enum CPUFeatures {
#if defined(Q_PROCESSOR_ARM)
CpuFeatureNEON = 0,
@@ -300,42 +275,6 @@ enum CPUFeatures {
#elif defined(Q_PROCESSOR_MIPS)
CpuFeatureDSP = 0,
CpuFeatureDSPR2 = 1,
-#elif defined(Q_PROCESSOR_X86)
- // The order of the flags is jumbled so it matches most closely the bits in CPUID
- // Out of order:
- CpuFeatureSSE2 = 1, // uses the bit for PCLMULQDQ
- // in level 1, ECX
- CpuFeatureSSE3 = (0 + 0),
- CpuFeatureSSSE3 = (0 + 9),
- CpuFeatureSSE4_1 = (0 + 19),
- CpuFeatureSSE4_2 = (0 + 20),
- CpuFeatureMOVBE = (0 + 22),
- CpuFeaturePOPCNT = (0 + 23),
- CpuFeatureAES = (0 + 25),
- CpuFeatureAVX = (0 + 28),
- CpuFeatureF16C = (0 + 29),
- CpuFeatureRDRND = (0 + 30),
- // 31 is always zero and we've used it for the QSimdInitialized
-
- // in level 7, leaf 0, EBX
- CpuFeatureBMI = (32 + 3),
- CpuFeatureHLE = (32 + 4),
- CpuFeatureAVX2 = (32 + 5),
- CpuFeatureBMI2 = (32 + 8),
- CpuFeatureRTM = (32 + 11),
- CpuFeatureAVX512F = (32 + 16),
- CpuFeatureAVX512DQ = (32 + 17),
- CpuFeatureRDSEED = (32 + 18),
- CpuFeatureAVX512IFMA = (32 + 21),
- CpuFeatureAVX512PF = (32 + 26),
- CpuFeatureAVX512ER = (32 + 27),
- CpuFeatureAVX512CD = (32 + 28),
- CpuFeatureSHA = (32 + 29),
- CpuFeatureAVX512BW = (32 + 30),
- CpuFeatureAVX512VL = (32 + 31),
-
- // in level 7, leaf 0, ECX (out of order, for now)
- CpuFeatureAVX512VBMI = 2, // uses the bit for DTES64
#endif
// used only to indicate that the CPU detection was initialised
@@ -343,84 +282,6 @@ enum CPUFeatures {
};
static const quint64 qCompilerCpuFeatures = 0
-#if defined __SHA__
- | (Q_UINT64_C(1) << CpuFeatureSHA)
-#endif
-#if defined __AES__
- | (Q_UINT64_C(1) << CpuFeatureAES)
-#endif
-#if defined __RTM__
- | (Q_UINT64_C(1) << CpuFeatureRTM)
-#endif
-#ifdef __RDRND__
- | (Q_UINT64_C(1) << CpuFeatureRDRND)
-#endif
-#ifdef __RDSEED__
- | (Q_UINT64_C(1) << CpuFeatureRDSEED)
-#endif
-#if defined __BMI__
- | (Q_UINT64_C(1) << CpuFeatureBMI)
-#endif
-#if defined __BMI2__
- | (Q_UINT64_C(1) << CpuFeatureBMI2)
-#endif
-#if defined __F16C__
- | (Q_UINT64_C(1) << CpuFeatureF16C)
-#endif
-#if defined __POPCNT__
- | (Q_UINT64_C(1) << CpuFeaturePOPCNT)
-#endif
-#if defined __MOVBE__ // GCC and Clang don't seem to define this
- | (Q_UINT64_C(1) << CpuFeatureMOVBE)
-#endif
-#if defined __AVX512F__
- | (Q_UINT64_C(1) << CpuFeatureAVX512F)
-#endif
-#if defined __AVX512CD__
- | (Q_UINT64_C(1) << CpuFeatureAVX512CD)
-#endif
-#if defined __AVX512ER__
- | (Q_UINT64_C(1) << CpuFeatureAVX512ER)
-#endif
-#if defined __AVX512PF__
- | (Q_UINT64_C(1) << CpuFeatureAVX512PF)
-#endif
-#if defined __AVX512BW__
- | (Q_UINT64_C(1) << CpuFeatureAVX512BW)
-#endif
-#if defined __AVX512DQ__
- | (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
-#endif
-#if defined __AVX512VL__
- | (Q_UINT64_C(1) << CpuFeatureAVX512VL)
-#endif
-#if defined __AVX512IFMA__
- | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
-#endif
-#if defined __AVX512VBMI__
- | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
-#endif
-#if defined __AVX2__
- | (Q_UINT64_C(1) << CpuFeatureAVX2)
-#endif
-#if defined __AVX__
- | (Q_UINT64_C(1) << CpuFeatureAVX)
-#endif
-#if defined __SSE4_2__
- | (Q_UINT64_C(1) << CpuFeatureSSE4_2)
-#endif
-#if defined __SSE4_1__
- | (Q_UINT64_C(1) << CpuFeatureSSE4_1)
-#endif
-#if defined __SSSE3__
- | (Q_UINT64_C(1) << CpuFeatureSSSE3)
-#endif
-#if defined __SSE3__
- | (Q_UINT64_C(1) << CpuFeatureSSE3)
-#endif
-#if defined __SSE2__
- | (Q_UINT64_C(1) << CpuFeatureSSE2)
-#endif
#if defined __ARM_NEON__
| (Q_UINT64_C(1) << CpuFeatureNEON)
#endif
@@ -434,6 +295,7 @@ static const quint64 qCompilerCpuFeatures = 0
| (Q_UINT64_C(1) << CpuFeatureDSPR2)
#endif
;
+#endif
#ifdef QT_BOOTSTRAPPED
static inline quint64 qCpuFeatures()
diff --git a/src/corelib/tools/qsimd_x86.cpp b/src/corelib/tools/qsimd_x86.cpp
new file mode 100644
index 0000000000..8275f964d8
--- /dev/null
+++ b/src/corelib/tools/qsimd_x86.cpp
@@ -0,0 +1,98 @@
+// This is a generated file. DO NOT EDIT.
+// Please see util/x86simdgen/generate.pl
+#include <qglobal.h>
+
+static const char features_string[] =
+ " sse2\0"
+ " sse3\0"
+ " ssse3\0"
+ " fma\0"
+ " sse4.1\0"
+ " sse4.2\0"
+ " movbe\0"
+ " popcnt\0"
+ " aes\0"
+ " avx\0"
+ " f16c\0"
+ " rdrnd\0"
+ " bmi\0"
+ " hle\0"
+ " avx2\0"
+ " bmi2\0"
+ " rtm\0"
+ " avx512f\0"
+ " avx512dq\0"
+ " rdseed\0"
+ " avx512ifma\0"
+ " avx512pf\0"
+ " avx512er\0"
+ " avx512cd\0"
+ " sha\0"
+ " avx512bw\0"
+ " avx512vl\0"
+ " avx512vbmi\0"
+ " avx512vbmi2\0"
+ " gfni\0"
+ " vaes\0"
+ " avx512vnni\0"
+ " avx512bitalg\0"
+ " avx512vpopcntdq\0"
+ " avx5124nniw\0"
+ " avx5124fmaps\0"
+ "\0";
+
+static const quint16 features_indices[] = {
+ 306, 0, 6, 12, 19, 24, 32, 40,
+ 47, 55, 60, 65, 71, 78, 83, 88,
+ 94, 100, 105, 114, 124, 132, 144, 154,
+ 164, 174, 179, 189, 199, 211, 224, 230,
+ 236, 248, 262, 279, 292
+};
+
+enum X86CpuidLeaves {
+ Leaf1ECX,
+ Leaf1EDX,
+ Leaf7_0EBX,
+ Leaf7_0ECX,
+ Leaf7_0EDX,
+ X86CpuidMaxLeaf
+};
+
+static const quint8 x86_locators[] = {
+ Leaf1EDX*32 + 26, // sse2
+ Leaf1ECX*32 + 0, // sse3
+ Leaf1ECX*32 + 9, // ssse3
+ Leaf1ECX*32 + 12, // fma
+ Leaf1ECX*32 + 19, // sse4.1
+ Leaf1ECX*32 + 20, // sse4.2
+ Leaf1ECX*32 + 22, // movbe
+ Leaf1ECX*32 + 23, // popcnt
+ Leaf1ECX*32 + 25, // aes
+ Leaf1ECX*32 + 28, // avx
+ Leaf1ECX*32 + 29, // f16c
+ Leaf1ECX*32 + 30, // rdrnd
+ Leaf7_0EBX*32 + 3, // bmi
+ Leaf7_0EBX*32 + 4, // hle
+ Leaf7_0EBX*32 + 5, // avx2
+ Leaf7_0EBX*32 + 8, // bmi2
+ Leaf7_0EBX*32 + 11, // rtm
+ Leaf7_0EBX*32 + 16, // avx512f
+ Leaf7_0EBX*32 + 17, // avx512dq
+ Leaf7_0EBX*32 + 18, // rdseed
+ Leaf7_0EBX*32 + 21, // avx512ifma
+ Leaf7_0EBX*32 + 26, // avx512pf
+ Leaf7_0EBX*32 + 27, // avx512er
+ Leaf7_0EBX*32 + 28, // avx512cd
+ Leaf7_0EBX*32 + 29, // sha
+ Leaf7_0EBX*32 + 30, // avx512bw
+ Leaf7_0EBX*32 + 31, // avx512vl
+ Leaf7_0ECX*32 + 1, // avx512vbmi
+ Leaf7_0ECX*32 + 6, // avx512vbmi2
+ Leaf7_0ECX*32 + 8, // gfni
+ Leaf7_0ECX*32 + 9, // vaes
+ Leaf7_0ECX*32 + 11, // avx512vnni
+ Leaf7_0ECX*32 + 12, // avx512bitalg
+ Leaf7_0ECX*32 + 14, // avx512vpopcntdq
+ Leaf7_0EDX*32 + 2, // avx5124nniw
+ Leaf7_0EDX*32 + 3 // avx5124fmaps
+};
diff --git a/src/corelib/tools/qsimd_x86_p.h b/src/corelib/tools/qsimd_x86_p.h
new file mode 100644
index 0000000000..45d5f2895f
--- /dev/null
+++ b/src/corelib/tools/qsimd_x86_p.h
@@ -0,0 +1,227 @@
+// This is a generated file. DO NOT EDIT.
+// Please see util/x86simdgen/generate.pl
+#ifndef QSIMD_P_H
+# error "Please include <private/qsimd_p.h> instead"
+#endif
+#ifndef QSIMD_X86_P_H
+#define QSIMD_X86_P_H
+
+#include "qsimd_p.h"
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+QT_BEGIN_NAMESPACE
+
+// Macros for QT_FUNCTION_TARGET (for Clang and GCC)
+#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
+#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
+#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
+#define QT_FUNCTION_TARGET_STRING_FMA "fma"
+#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
+#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
+#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe"
+#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
+#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
+#define QT_FUNCTION_TARGET_STRING_AVX "avx"
+#define QT_FUNCTION_TARGET_STRING_F16C "f16c"
+#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
+#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
+#define QT_FUNCTION_TARGET_STRING_HLE "hle"
+#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2"
+#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
+#define QT_FUNCTION_TARGET_STRING_RTM "rtm"
+#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f"
+#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq"
+#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
+#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma"
+#define QT_FUNCTION_TARGET_STRING_AVX512PF "avx512pf"
+#define QT_FUNCTION_TARGET_STRING_AVX512ER "avx512er"
+#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd"
+#define QT_FUNCTION_TARGET_STRING_SHA "sha"
+#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw"
+#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl"
+#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi"
+#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2"
+#define QT_FUNCTION_TARGET_STRING_GFNI "gfni"
+#define QT_FUNCTION_TARGET_STRING_VAES "vaes"
+#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni"
+#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg"
+#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq"
+#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw"
+#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps"
+
+enum CPUFeatures {
+ // in CPUID Leaf 1, EDX:
+ CpuFeatureSSE2 = 1,
+
+ // in CPUID Leaf 1, ECX:
+ CpuFeatureSSE3 = 2,
+ CpuFeatureSSSE3 = 3,
+ CpuFeatureFMA = 4,
+ CpuFeatureSSE4_1 = 5,
+ CpuFeatureSSE4_2 = 6,
+ CpuFeatureMOVBE = 7,
+ CpuFeaturePOPCNT = 8,
+ CpuFeatureAES = 9,
+ CpuFeatureAVX = 10,
+ CpuFeatureF16C = 11,
+ CpuFeatureRDRND = 12,
+
+ // in CPUID Leaf 7, Sub-leaf 0, EBX:
+ CpuFeatureBMI = 13,
+ CpuFeatureHLE = 14,
+ CpuFeatureAVX2 = 15,
+ CpuFeatureBMI2 = 16,
+ CpuFeatureRTM = 17,
+ CpuFeatureAVX512F = 18,
+ CpuFeatureAVX512DQ = 19,
+ CpuFeatureRDSEED = 20,
+ CpuFeatureAVX512IFMA = 21,
+ CpuFeatureAVX512PF = 22,
+ CpuFeatureAVX512ER = 23,
+ CpuFeatureAVX512CD = 24,
+ CpuFeatureSHA = 25,
+ CpuFeatureAVX512BW = 26,
+ CpuFeatureAVX512VL = 27,
+
+ // in CPUID Leaf 7, Sub-leaf 0, ECX:
+ CpuFeatureAVX512VBMI = 28,
+ CpuFeatureAVX512VBMI2 = 29,
+ CpuFeatureGFNI = 30,
+ CpuFeatureVAES = 31,
+ CpuFeatureAVX512VNNI = 32,
+ CpuFeatureAVX512BITALG = 33,
+ CpuFeatureAVX512VPOPCNTDQ = 34,
+
+ // in CPUID Leaf 7, Sub-leaf 0, EDX:
+ CpuFeatureAVX5124NNIW = 35,
+ CpuFeatureAVX5124FMAPS = 36,
+
+ // used only to indicate that the CPU detection was initialized
+ QSimdInitialized = 1
+};
+
+static const quint64 qCompilerCpuFeatures = 0
+#ifdef __SSE2__
+ | (Q_UINT64_C(1) << CpuFeatureSSE2)
+#endif
+#ifdef __SSE3__
+ | (Q_UINT64_C(1) << CpuFeatureSSE3)
+#endif
+#ifdef __SSSE3__
+ | (Q_UINT64_C(1) << CpuFeatureSSSE3)
+#endif
+#ifdef __FMA__
+ | (Q_UINT64_C(1) << CpuFeatureFMA)
+#endif
+#ifdef __SSE4_1__
+ | (Q_UINT64_C(1) << CpuFeatureSSE4_1)
+#endif
+#ifdef __SSE4_2__
+ | (Q_UINT64_C(1) << CpuFeatureSSE4_2)
+#endif
+#ifdef __MOVBE__
+ | (Q_UINT64_C(1) << CpuFeatureMOVBE)
+#endif
+#ifdef __POPCNT__
+ | (Q_UINT64_C(1) << CpuFeaturePOPCNT)
+#endif
+#ifdef __AES__
+ | (Q_UINT64_C(1) << CpuFeatureAES)
+#endif
+#ifdef __AVX__
+ | (Q_UINT64_C(1) << CpuFeatureAVX)
+#endif
+#ifdef __F16C__
+ | (Q_UINT64_C(1) << CpuFeatureF16C)
+#endif
+#ifdef __RDRND__
+ | (Q_UINT64_C(1) << CpuFeatureRDRND)
+#endif
+#ifdef __BMI__
+ | (Q_UINT64_C(1) << CpuFeatureBMI)
+#endif
+#ifdef __HLE__
+ | (Q_UINT64_C(1) << CpuFeatureHLE)
+#endif
+#ifdef __AVX2__
+ | (Q_UINT64_C(1) << CpuFeatureAVX2)
+#endif
+#ifdef __BMI2__
+ | (Q_UINT64_C(1) << CpuFeatureBMI2)
+#endif
+#ifdef __RTM__
+ | (Q_UINT64_C(1) << CpuFeatureRTM)
+#endif
+#ifdef __AVX512F__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512F)
+#endif
+#ifdef __AVX512DQ__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
+#endif
+#ifdef __RDSEED__
+ | (Q_UINT64_C(1) << CpuFeatureRDSEED)
+#endif
+#ifdef __AVX512IFMA__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
+#endif
+#ifdef __AVX512PF__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512PF)
+#endif
+#ifdef __AVX512ER__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512ER)
+#endif
+#ifdef __AVX512CD__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512CD)
+#endif
+#ifdef __SHA__
+ | (Q_UINT64_C(1) << CpuFeatureSHA)
+#endif
+#ifdef __AVX512BW__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512BW)
+#endif
+#ifdef __AVX512VL__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VL)
+#endif
+#ifdef __AVX512VBMI__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
+#endif
+#ifdef __AVX512VBMI2__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2)
+#endif
+#ifdef __GFNI__
+ | (Q_UINT64_C(1) << CpuFeatureGFNI)
+#endif
+#ifdef __VAES__
+ | (Q_UINT64_C(1) << CpuFeatureVAES)
+#endif
+#ifdef __AVX512VNNI__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VNNI)
+#endif
+#ifdef __AVX512BITALG__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512BITALG)
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+ | (Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ)
+#endif
+#ifdef __AVX5124NNIW__
+ | (Q_UINT64_C(1) << CpuFeatureAVX5124NNIW)
+#endif
+#ifdef __AVX5124FMAPS__
+ | (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS)
+#endif
+ ;
+
+QT_END_NAMESPACE
+
+#endif // QSIMD_X86_P_H
+
diff --git a/util/x86simdgen/generate.pl b/util/x86simdgen/generate.pl
new file mode 100755
index 0000000000..572891d483
--- /dev/null
+++ b/util/x86simdgen/generate.pl
@@ -0,0 +1,192 @@
+#!/usr/bin/env perl
+#############################################################################
+##
+## Copyright (C) 2018 Intel Corporation.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the build configuration tools of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:MIT$
+## Permission is hereby granted, free of charge, to any person obtaining a copy
+## of this software and associated documentation files (the "Software"), to deal
+## in the Software without restriction, including without limitation the rights
+## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+## copies of the Software, and to permit persons to whom the Software is
+## furnished to do so, subject to the following conditions:
+##
+## The above copyright notice and this permission notice shall be included in
+## all copies or substantial portions of the Software.
+##
+## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+## THE SOFTWARE.
+## $QT_END_LICENSE$
+##
+#############################################################################
+
+use strict;
+$\ = "\n";
+$/ = "\n";
+my %leaves = (
+ Leaf1EDX => "CPUID Leaf 1, EDX",
+ Leaf1ECX => "CPUID Leaf 1, ECX",
+ Leaf7_0EBX => "CPUID Leaf 7, Sub-leaf 0, EBX",
+ Leaf7_0ECX => "CPUID Leaf 7, Sub-leaf 0, ECX",
+ Leaf7_0EDX => "CPUID Leaf 7, Sub-leaf 0, EDX",
+);
+my @leafNames = sort keys %leaves;
+
+# Read data from stdin
+my $i = 1;
+my @features;
+while (<STDIN>) {
+ s/#.*$//;
+ chomp;
+ next if $_ eq "";
+
+ my ($name, $function, $bit, $depends) = split /\s+/;
+ die("Unknown CPUID function \"$function\"")
+ unless grep $function, @leafNames;
+
+ my $id = uc($name);
+ $id =~ s/[^A-Z0-9_]/_/g;
+ push @features,
+ { name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function };
+ ++$i;
+}
+
+if (my $h = shift @ARGV) {
+ open HEADER, ">", $h;
+ select HEADER;
+}
+
+# Print the qsimd_x86_p.h output
+print q{// This is a generated file. DO NOT EDIT.
+// Please see util/x86simdgen/generate.pl";
+#ifndef QSIMD_P_H
+# error "Please include <private/qsimd_p.h> instead"
+#endif
+#ifndef QSIMD_X86_P_H
+#define QSIMD_X86_P_H
+
+#include "qsimd_p.h"
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+QT_BEGIN_NAMESPACE
+
+// Macros for QT_FUNCTION_TARGET (for Clang and GCC)};
+
+# #Define the feature string names for Clang and GCC
+for my $feature (@features) {
+ my $str = $feature->{name};
+ $str .= ",$feature->{depends}" if defined($feature->{depends});
+ printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n",
+ $feature->{id}, $str;
+}
+
+# Print the enum
+printf "\nenum CPUFeatures {";
+my $lastleaf;
+for (my $i = 0; $i < scalar @features; ++$i) {
+ my $feature = $features[$i];
+ # Leaf header:
+ printf "\n // in %s:\n", $leaves{$feature->{leaf}}
+ if $feature->{leaf} ne $lastleaf;
+ $lastleaf = $feature->{leaf};
+
+ # Feature
+ printf " CpuFeature%-13s = %d,\n", $feature->{id}, $i + 1;
+}
+
+print q{
+ // used only to indicate that the CPU detection was initialized
+ QSimdInitialized = 1
+\};
+
+static const quint64 qCompilerCpuFeatures = 0};
+
+# And print the compiler-enabled features part:
+for my $feature (@features) {
+ printf
+ "#ifdef __%s__\n" .
+ " | (Q_UINT64_C(1) << CpuFeature%s)\n" .
+ "#endif\n",
+ $feature->{id}, $feature->{id};
+}
+
+print q{ ;
+
+QT_END_NAMESPACE
+
+#endif // QSIMD_X86_P_H
+};
+
+if (my $cpp = shift @ARGV) {
+ open CPP, ">", $cpp;
+ select CPP;
+} else {
+ print q{
+
+---- cut here, paste the rest into qsimd_x86.cpp ---
+
+
+};
+};
+
+print "// This is a generated file. DO NOT EDIT.";
+print "// Please see util/x86simdgen/generate.pl";
+print "#include <qglobal.h>";
+print "";
+
+# Now generate the string table and bit-location array
+my $offset = 0;
+my @offsets;
+print "static const char features_string[] =";
+for my $feature (@features) {
+ print " \" $feature->{name}\\0\"";
+ push @offsets, $offset;
+ $offset += 2 + length($feature->{name});
+}
+print " \"\\0\";";
+
+# Print the string offset table
+printf "\nstatic const %s features_indices[] = {\n %3d",
+ $offset > 255 ? "quint16" : "quint8", $offset;
+for (my $j = 0; $j < scalar @offsets; ++$j) {
+ printf ",%s%3d",
+ ($j + 1) % 8 ? " " : "\n ", $offsets[$j];
+}
+print "\n};";
+
+# Print the locator enum and table
+print "\nenum X86CpuidLeaves {";
+map { print " $_," } @leafNames;
+print " X86CpuidMaxLeaf\n};";
+
+my $type = scalar %leaves > 8 ? "quint16" : "quint8";
+printf "\nstatic const %s x86_locators[] = {",
+ $type, $type;
+my $lastname;
+for (my $j = 0; $j < scalar @features; ++$j) {
+ my $feature = $features[$j];
+ printf ", // %s", $lastname
+ if defined($lastname);
+ printf "\n %s*32 + %2d",
+ $feature->{leaf}, $feature->{bit};
+ $lastname = $feature->{name};
+}
+print " // $lastname\n};";
diff --git a/util/x86simdgen/simd.txt b/util/x86simdgen/simd.txt
new file mode 100644
index 0000000000..1fce7b9497
--- /dev/null
+++ b/util/x86simdgen/simd.txt
@@ -0,0 +1,37 @@
+# Feature CPUID function Bit Required feature
+sse2 Leaf1EDX 26
+sse3 Leaf1ECX 0
+ssse3 Leaf1ECX 9
+fma Leaf1ECX 12
+sse4.1 Leaf1ECX 19
+sse4.2 Leaf1ECX 20
+movbe Leaf1ECX 22
+popcnt Leaf1ECX 23
+aes Leaf1ECX 25 sse4.2
+avx Leaf1ECX 28
+f16c Leaf1ECX 29
+rdrnd Leaf1ECX 30
+bmi Leaf7_0EBX 3
+hle Leaf7_0EBX 4
+avx2 Leaf7_0EBX 5
+bmi2 Leaf7_0EBX 8
+rtm Leaf7_0EBX 11
+avx512f Leaf7_0EBX 16
+avx512dq Leaf7_0EBX 17
+rdseed Leaf7_0EBX 18
+avx512ifma Leaf7_0EBX 21
+avx512pf Leaf7_0EBX 26
+avx512er Leaf7_0EBX 27
+avx512cd Leaf7_0EBX 28
+sha Leaf7_0EBX 29
+avx512bw Leaf7_0EBX 30
+avx512vl Leaf7_0EBX 31
+avx512vbmi Leaf7_0ECX 1
+avx512vbmi2 Leaf7_0ECX 6
+gfni Leaf7_0ECX 8
+vaes Leaf7_0ECX 9
+avx512vnni Leaf7_0ECX 11
+avx512bitalg Leaf7_0ECX 12
+avx512vpopcntdq Leaf7_0ECX 14
+avx5124nniw Leaf7_0EDX 2
+avx5124fmaps Leaf7_0EDX 3