summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qsimd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qsimd.cpp')
-rw-r--r--src/corelib/tools/qsimd.cpp271
1 files changed, 75 insertions, 196 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index 25340f2d02..07a8b022bc 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2016 Intel Corporation.
+** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -80,6 +80,43 @@
QT_BEGIN_NAMESPACE
+/*
+ * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
+ * we remove the terminating -1 that the script adds.
+ */
+
+// begin generated
+#if defined(Q_PROCESSOR_ARM)
+/* Data:
+ neon
+ crc32
+ */
+static const char features_string[] =
+ " neon\0"
+ " crc32\0"
+ "\0";
+static const int features_indices[] = { 0, 6 };
+#elif defined(Q_PROCESSOR_MIPS)
+/* Data:
+ dsp
+ dspr2
+*/
+static const char features_string[] =
+ " dsp\0"
+ " dspr2\0"
+ "\0";
+
+static const int features_indices[] = {
+ 0, 5
+};
+#elif defined(Q_PROCESSOR_X86)
+# include "qsimd_x86.cpp" // generated by util/x86simdgen
+#else
+static const char features_string[] = "";
+static const int features_indices[] = { };
+#endif
+// end generated
+
#if defined (Q_OS_NACL)
static inline uint detectProcessorFeatures()
{
@@ -153,7 +190,9 @@ static inline quint64 detectProcessorFeatures()
static int maxBasicCpuidSupported()
{
-#if defined(Q_CC_GNU)
+#if defined(Q_CC_EMSCRIPTEN)
+ return 6; // All features supported by Emscripten
+#elif defined(Q_CC_GNU)
qregisterint tmp1;
# if Q_PROCESSOR_X86 < 5
@@ -198,7 +237,7 @@ static int maxBasicCpuidSupported()
static void cpuidFeatures01(uint &ecx, uint &edx)
{
-#if defined(Q_CC_GNU)
+#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
qregisterint tmp1;
asm ("xchg " PICreg", %2\n"
"cpuid\n"
@@ -215,6 +254,9 @@ static void cpuidFeatures01(uint &ecx, uint &edx)
__CPUID(1, info);
ecx = info[2];
edx = info[3];
+#else
+ Q_UNUSED(ecx);
+ Q_UNUSED(edx);
#endif
}
@@ -222,29 +264,32 @@ static void cpuidFeatures01(uint &ecx, uint &edx)
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
#endif
-static void cpuidFeatures07_00(uint &ebx, uint &ecx)
+static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
{
-#if defined(Q_CC_GNU)
+#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
qregisteruint rbx; // in case it's 64-bit
qregisteruint rcx = 0;
+ qregisteruint rdx = 0;
asm ("xchg " PICreg", %0\n"
"cpuid\n"
"xchg " PICreg", %0\n"
- : "=&r" (rbx), "+&c" (rcx)
- : "a" (7)
- : "%edx");
+ : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
+ : "a" (7));
ebx = rbx;
ecx = rcx;
+ edx = rdx;
#elif defined(Q_OS_WIN)
int info[4];
__cpuidex(info, 7, 0);
ebx = info[1];
ecx = info[2];
+ edx = info[3];
#elif defined(Q_CC_GHS)
unsigned int info[4];
__CPUIDEX(7, 0, info);
ebx = info[1];
ecx = info[2];
+ edx = info[3];
#endif
}
@@ -254,7 +299,7 @@ inline quint64 _xgetbv(__int64) { return 0; }
#endif
static void xgetbv(uint in, uint &eax, uint &edx)
{
-#if defined(Q_CC_GNU) || defined(Q_CC_GHS)
+#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS)
asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
: "=a" (eax), "=d" (edx)
: "c" (in));
@@ -262,6 +307,10 @@ static void xgetbv(uint in, uint &eax, uint &edx)
quint64 result = _xgetbv(in);
eax = result;
edx = result >> 32;
+#else
+ Q_UNUSED(in);
+ Q_UNUSED(eax);
+ Q_UNUSED(edx);
#endif
}
@@ -282,13 +331,8 @@ static quint64 detectProcessorFeatures()
AVXState = XMM0_15 | YMM0_15Hi128,
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
};
- static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) |
- (Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) |
- (Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) |
- (Q_UINT64_C(1) << CpuFeatureAVX512VL) |
- (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
- static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512;
- static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2;
+ static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512;
+ static const quint64 AllAVX = CpuFeatureAVX | AllAVX2;
quint64 features = 0;
int cpuidLevel = maxBasicCpuidSupported();
@@ -299,52 +343,33 @@ static quint64 detectProcessorFeatures()
Q_ASSERT(cpuidLevel >= 1);
#endif
- uint cpuid01ECX = 0, cpuid01EDX = 0;
- cpuidFeatures01(cpuid01ECX, cpuid01EDX);
-
- // the low 32-bits of features is cpuid01ECX
- // note: we need to check OS support for saving the AVX register state
- features = cpuid01ECX;
-
-#if defined(Q_PROCESSOR_X86_32)
- // x86 might not have SSE2 support
- if (cpuid01EDX & (1u << 26))
- features |= Q_UINT64_C(1) << CpuFeatureSSE2;
- else
- features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2);
- // we should verify that the OS enabled saving of the SSE state...
-#else
- // x86-64 or x32
- features |= Q_UINT64_C(1) << CpuFeatureSSE2;
-#endif
+ uint results[X86CpuidMaxLeaf] = {};
+ cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
+ if (cpuidLevel >= 7)
+ cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
+
+ // populate our feature list
+ for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
+ uint word = x86_locators[i] / 32;
+ uint bit = 1U << (x86_locators[i] % 32);
+ quint64 feature = Q_UINT64_C(1) << (i + 1);
+ if (results[word] & bit)
+ features |= feature;
+ }
+ // now check the AVX state
uint xgetbvA = 0, xgetbvD = 0;
- if (cpuid01ECX & (1u << 27)) {
+ if (results[Leaf1ECX] & (1u << 27)) {
// XGETBV enabled
xgetbv(0, xgetbvA, xgetbvD);
}
- uint cpuid0700EBX = 0;
- uint cpuid0700ECX = 0;
- if (cpuidLevel >= 7) {
- cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX);
-
- // the high 32-bits of features is cpuid0700EBX
- features |= quint64(cpuid0700EBX) << 32;
- }
-
if ((xgetbvA & AVXState) != AVXState) {
// support for YMM registers is disabled, disable all AVX
features &= ~AllAVX;
} else if ((xgetbvA & AVX512State) != AVX512State) {
// support for ZMM registers or mask registers is disabled, disable all AVX512
features &= ~AllAVX512;
- } else {
- // this feature is out of order
- if (cpuid0700ECX & (1u << 1))
- features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI;
- else
- features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
}
return features;
@@ -493,152 +518,6 @@ static inline uint detectProcessorFeatures()
}
#endif
-/*
- * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
- * that the x86 version has a lot of blanks that must be kept and that the
- * offset table's type is changed to make the table smaller. We also remove the
- * terminating -1 that the script adds.
- */
-
-// begin generated
-#if defined(Q_PROCESSOR_ARM)
-/* Data:
- neon
- crc32
- */
-static const char features_string[] =
- " neon\0"
- " crc32\0"
- "\0";
-static const int features_indices[] = { 0, 6 };
-#elif defined(Q_PROCESSOR_MIPS)
-/* Data:
- dsp
- dspr2
-*/
-static const char features_string[] =
- " dsp\0"
- " dspr2\0"
- "\0";
-
-static const int features_indices[] = {
- 0, 5
-};
-#elif defined(Q_PROCESSOR_X86)
-/* Data:
- sse3
- sse2
- avx512vbmi
-
-
-
-
-
-
- ssse3
-
-
- fma
- cmpxchg16b
-
-
-
-
-
- sse4.1
- sse4.2
-
- movbe
- popcnt
-
- aes
-
-
- avx
- f16c
- rdrand
-
-
-
-
- bmi
- hle
- avx2
-
-
- bmi2
-
-
- rtm
-
-
-
-
- avx512f
- avx512dq
- rdseed
-
-
- avx512ifma
-
-
-
-
- avx512pf
- avx512er
- avx512cd
- sha
- avx512bw
- avx512vl
- */
-static const char features_string[] =
- " sse3\0"
- " sse2\0"
- " avx512vbmi\0"
- " ssse3\0"
- " fma\0"
- " cmpxchg16b\0"
- " sse4.1\0"
- " sse4.2\0"
- " movbe\0"
- " popcnt\0"
- " aes\0"
- " avx\0"
- " f16c\0"
- " rdrand\0"
- " bmi\0"
- " hle\0"
- " avx2\0"
- " bmi2\0"
- " rtm\0"
- " avx512f\0"
- " avx512dq\0"
- " rdseed\0"
- " avx512ifma\0"
- " avx512pf\0"
- " avx512er\0"
- " avx512cd\0"
- " sha\0"
- " avx512bw\0"
- " avx512vl\0"
- "\0";
-
-static const quint8 features_indices[] = {
- 0, 6, 12, 5, 5, 5, 5, 5,
- 5, 24, 5, 5, 31, 36, 5, 5,
- 5, 5, 5, 48, 56, 5, 64, 71,
- 5, 79, 5, 5, 84, 89, 95, 5,
- 5, 5, 5, 103, 108, 113, 5, 5,
- 119, 5, 5, 125, 5, 5, 5, 5,
- 130, 139, 149, 5, 5, 157, 5, 5,
- 5, 5, 169, 179, 189, 199, 204, 214
-};
-#else
-static const char features_string[] = "";
-static const int features_indices[] = { };
-#endif
-// end generated
-
static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
// record what CPU features were enabled by default in this Qt build