summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qsimd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qsimd.cpp')
-rw-r--r--src/corelib/tools/qsimd.cpp182
1 files changed, 121 insertions, 61 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index 91700c053d..825028037f 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -78,28 +78,28 @@ static inline uint detectProcessorFeatures()
return 0;
}
#elif defined (Q_OS_WINCE)
-static inline uint detectProcessorFeatures()
+static inline quint64 detectProcessorFeatures()
{
- uint features = 0;
+ quint64 features = 0;
#if defined (ARM)
# ifdef PF_ARM_NEON
if (IsProcessorFeaturePresent(PF_ARM_NEON))
- features |= ARM_NEON;
+ features |= Q_UINT64_C(1) << CpuFeatureNEON;
# endif
#elif defined(_X86_)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
- features |= SSE2;
+ features |= Q_UINT64_C(1) << CpuFeatureSSE2;
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
- features |= SSE3;
+ features |= Q_UINT64_C(1) << CpuFeatureSSE3;
#endif
return features;
}
#elif defined(Q_PROCESSOR_ARM)
-static inline uint detectProcessorFeatures()
+static inline quint64 detectProcessorFeatures()
{
- uint features = 0;
+ quint64 features = 0;
#if defined(Q_OS_LINUX)
int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY);
@@ -117,7 +117,7 @@ static inline uint detectProcessorFeatures()
for (int i = 0; i < max; i += 2)
if (vector[i] == AT_HWCAP) {
if (vector[i+1] & HWCAP_NEON)
- features |= NEON;
+ features |= Q_UINT64_C(1) << CpuFeatureNEON;
break;
}
}
@@ -129,7 +129,7 @@ static inline uint detectProcessorFeatures()
#endif
#if defined(__ARM_NEON__)
- features = NEON;
+ features = Q_UINT64_C(1) << CpuFeatureNEON;
#endif
return features;
@@ -257,6 +257,8 @@ static quint64 detectProcessorFeatures()
AVXState = XMM0_15 | YMM0_15Hi128,
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
};
+ static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2);
+ static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2;
quint64 features = 0;
int cpuidLevel = maxBasicCpuidSupported();
@@ -269,28 +271,23 @@ static quint64 detectProcessorFeatures()
uint cpuid01ECX = 0, cpuid01EDX = 0;
cpuidFeatures01(cpuid01ECX, cpuid01EDX);
+
+ // the low 32-bits of features is cpuid01ECX
+ // note: we need to check OS support for saving the AVX register state
+ features = cpuid01ECX;
+
#if defined(Q_PROCESSOR_X86_32)
// x86 might not have SSE2 support
if (cpuid01EDX & (1u << 26))
- features |= SSE2;
+ features |= Q_UINT64_C(1) << CpuFeatureSSE2;
+ else
+ features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2);
// we should verify that the OS enabled saving of the SSE state...
#else
// x86-64 or x32
- features = SSE2;
+ features |= Q_UINT64_C(1) << CpuFeatureSSE2;
#endif
- // common part between 32- and 64-bit
- if (cpuid01ECX & (1u))
- features |= SSE3;
- if (cpuid01ECX & (1u << 9))
- features |= SSSE3;
- if (cpuid01ECX & (1u << 19))
- features |= SSE4_1;
- if (cpuid01ECX & (1u << 20))
- features |= SSE4_2;
- if (cpuid01ECX & (1u << 25))
- features |= 0; // AES, enable if needed
-
uint xgetbvA = 0, xgetbvD = 0;
if (cpuid01ECX & (1u << 27)) {
// XGETBV enabled
@@ -298,22 +295,17 @@ static quint64 detectProcessorFeatures()
}
uint cpuid0700EBX = 0;
- if (cpuidLevel >= 7)
+ if (cpuidLevel >= 7) {
cpuidFeatures07_00(cpuid0700EBX);
- if ((xgetbvA & AVXState) == AVXState) {
- // support for YMM and XMM registers is enabled
- if (cpuid01ECX & (1u << 28))
- features |= AVX;
-
- if (cpuid0700EBX & (1u << 5))
- features |= AVX2;
+ // the high 32-bits of features is cpuid0700EBX
+ features |= quint64(cpuid0700EBX) << 32;
}
- if (cpuid0700EBX & (1u << 4))
- features |= HLE; // Hardware Lock Ellision
- if (cpuid0700EBX & (1u << 11))
- features |= RTM; // Restricted Transactional Memory
+ if ((xgetbvA & AVXState) != AVXState) {
+ // support for YMM registers is disabled, disable all AVX
+ features &= ~AllAVX;
+ }
return features;
}
@@ -430,24 +422,24 @@ static bool procCpuinfoContains(const char *prefix, const char *string)
}
#endif
-static inline uint detectProcessorFeatures()
+static inline quint64 detectProcessorFeatures()
{
// NOTE: MIPS 74K cores are the only ones supporting DSPr2.
- uint flags = 0;
+ quint64 flags = 0;
#if defined __mips_dsp
- flags |= DSP;
+ flags |= Q_UINT64_C(1) << CpuFeatureDSP;
# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
- flags |= DSPR2;
+ flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
# elif defined(Q_OS_LINUX)
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
- flags |= DSPR2;
+ flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
# endif
#elif defined(Q_OS_LINUX)
if (procCpuinfoContains("ASEs implemented", "dsp")) {
- flags |= DSP;
+ flags |= Q_UINT64_C(1) << CpuFeatureDSP;
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
- flags |= DSPR2;
+ flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
}
#endif
@@ -462,47 +454,115 @@ static inline uint detectProcessorFeatures()
#endif
/*
- * Use kdesdk/scripts/generate_string_table.pl to update the table below.
- * Here's the data (don't forget the ONE leading space):
+ * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
+ * that the x86 version has a lot of blanks that must be kept and that the
+ * offset table's type is changed to make the table smaller. We also remove the
+ * terminating -1 that the script adds.
+ */
+// begin generated
+#if defined(Q_PROCESSOR_ARM)
+/* Data:
neon
- sse2
+ */
+static const char features_string[] = " neon\0";
+static const int features_indices[] = { 0 };
+#elif defined(Q_PROCESSOR_MIPS)
+/* Data:
+ dsp
+ dspr2
+*/
+static const char features_string[] =
+ " dsp\0"
+ " dspr2\0"
+ "\0";
+
+static const int features_indices[] = {
+ 0, 5
+};
+#elif defined(Q_PROCESSOR_X86)
+/* Data:
sse3
+ sse2
+
+
+
+
+
+
+
ssse3
+
+
+ fma
+ cmpxchg16b
+
+
+
+
+
sse4.1
sse4.2
+
+ movbe
+ popcnt
+
+ aes
+
+
avx
- avx2
+ f16c
+ rdrand
+
+
+
+
+ bmi
hle
- rtm
- dsp
- dspr2
- */
+ avx2
-// begin generated
+
+ bmi2
+
+
+ rtm
+ */
static const char features_string[] =
- "\0"
- " neon\0"
- " sse2\0"
" sse3\0"
+ " sse2\0"
" ssse3\0"
+ " fma\0"
+ " cmpxchg16b\0"
" sse4.1\0"
" sse4.2\0"
+ " movbe\0"
+ " popcnt\0"
+ " aes\0"
" avx\0"
- " avx2\0"
+ " f16c\0"
+ " rdrand\0"
+ " bmi\0"
" hle\0"
+ " avx2\0"
+ " bmi2\0"
" rtm\0"
- " dsp\0"
- " dspr2\0"
"\0";
-static const int features_indices[] = {
- 0, 1, 7, 13, 19, 26, 34, 42,
- 47, 53, 58, 63, 68, -1
+static const quint8 features_indices[] = {
+ 0, 6, 5, 5, 5, 5, 5, 5,
+ 5, 12, 5, 5, 19, 24, 5, 5,
+ 5, 5, 5, 36, 44, 5, 52, 59,
+ 5, 67, 5, 5, 72, 77, 83, 5,
+ 5, 5, 5, 91, 96, 101, 5, 5,
+ 107, 5, 5, 113
};
+#else
+static const char features_string[] = "";
+static const int features_indices[] = { };
+#endif
// end generated
-static const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
+static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
// record what CPU features were enabled by default in this Qt build
static const quint64 minFeature = qCompilerCpuFeatures;