summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qsimd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qsimd.cpp')
-rw-r--r--src/corelib/tools/qsimd.cpp298
1 files changed, 211 insertions, 87 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index 4459d89e07..f07eb098f2 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -43,7 +43,9 @@
#if defined(Q_OS_WIN)
# if defined(Q_OS_WINCE)
# include <qt_windows.h>
-# include <cmnintrin.h>
+# if _WIN32_WCE < 0x800
+# include <cmnintrin.h>
+# endif
# endif
# if !defined(Q_CC_GNU)
# ifndef Q_OS_WINCE
@@ -76,28 +78,28 @@ static inline uint detectProcessorFeatures()
return 0;
}
#elif defined (Q_OS_WINCE)
-static inline uint detectProcessorFeatures()
+static inline quint64 detectProcessorFeatures()
{
- uint features = 0;
+ quint64 features = 0;
#if defined (ARM)
# ifdef PF_ARM_NEON
if (IsProcessorFeaturePresent(PF_ARM_NEON))
- features |= ARM_NEON;
+ features |= Q_UINT64_C(1) << CpuFeatureNEON;
# endif
#elif defined(_X86_)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
- features |= SSE2;
+ features |= Q_UINT64_C(1) << CpuFeatureSSE2;
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
- features |= SSE3;
+ features |= Q_UINT64_C(1) << CpuFeatureSSE3;
#endif
return features;
}
#elif defined(Q_PROCESSOR_ARM)
-static inline uint detectProcessorFeatures()
+static inline quint64 detectProcessorFeatures()
{
- uint features = 0;
+ quint64 features = 0;
#if defined(Q_OS_LINUX)
int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY);
@@ -115,7 +117,7 @@ static inline uint detectProcessorFeatures()
for (int i = 0; i < max; i += 2)
if (vector[i] == AT_HWCAP) {
if (vector[i+1] & HWCAP_NEON)
- features |= NEON;
+ features |= Q_UINT64_C(1) << CpuFeatureNEON;
break;
}
}
@@ -127,7 +129,7 @@ static inline uint detectProcessorFeatures()
#endif
#if defined(__ARM_NEON__)
- features = NEON;
+ features = Q_UINT64_C(1) << CpuFeatureNEON;
#endif
return features;
@@ -203,21 +205,24 @@ static void cpuidFeatures01(uint &ecx, uint &edx)
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
#endif
-static void cpuidFeatures07_00(uint &ebx)
+static void cpuidFeatures07_00(uint &ebx, uint &ecx)
{
#if defined(Q_CC_GNU)
qregisteruint rbx; // in case it's 64-bit
+ qregisteruint rcx = 0;
asm ("xchg " PICreg", %0\n"
"cpuid\n"
"xchg " PICreg", %0\n"
- : "=&r" (rbx)
- : "a" (7), "c" (0)
+ : "=&r" (rbx), "+&c" (rcx)
+ : "a" (7)
: "%edx");
ebx = rbx;
+ ecx = rcx;
#elif defined(Q_OS_WIN)
int info[4];
__cpuidex(info, 7, 0);
ebx = info[1];
+ ecx = info[2];
#endif
}
@@ -238,7 +243,7 @@ static void xgetbv(uint in, uint &eax, uint &edx)
#endif
}
-static inline uint detectProcessorFeatures()
+static quint64 detectProcessorFeatures()
{
// Flags from the CR0 / XCR0 state register
enum XCR0Flags {
@@ -255,8 +260,15 @@ static inline uint detectProcessorFeatures()
AVXState = XMM0_15 | YMM0_15Hi128,
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
};
-
- uint features = 0;
+ static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512VL) |
+ (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
+ static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512;
+ static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2;
+
+ quint64 features = 0;
int cpuidLevel = maxBasicCpuidSupported();
#if Q_PROCESSOR_X86 < 5
if (cpuidLevel < 1)
@@ -267,28 +279,23 @@ static inline uint detectProcessorFeatures()
uint cpuid01ECX = 0, cpuid01EDX = 0;
cpuidFeatures01(cpuid01ECX, cpuid01EDX);
+
+ // the low 32-bits of features is cpuid01ECX
+ // note: we need to check OS support for saving the AVX register state
+ features = cpuid01ECX;
+
#if defined(Q_PROCESSOR_X86_32)
// x86 might not have SSE2 support
if (cpuid01EDX & (1u << 26))
- features |= SSE2;
+ features |= Q_UINT64_C(1) << CpuFeatureSSE2;
+ else
+ features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2);
// we should verify that the OS enabled saving of the SSE state...
#else
// x86-64 or x32
- features = SSE2;
+ features |= Q_UINT64_C(1) << CpuFeatureSSE2;
#endif
- // common part between 32- and 64-bit
- if (cpuid01ECX & (1u))
- features |= SSE3;
- if (cpuid01ECX & (1u << 9))
- features |= SSSE3;
- if (cpuid01ECX & (1u << 19))
- features |= SSE4_1;
- if (cpuid01ECX & (1u << 20))
- features |= SSE4_2;
- if (cpuid01ECX & (1u << 25))
- features |= 0; // AES, enable if needed
-
uint xgetbvA = 0, xgetbvD = 0;
if (cpuid01ECX & (1u << 27)) {
// XGETBV enabled
@@ -296,22 +303,27 @@ static inline uint detectProcessorFeatures()
}
uint cpuid0700EBX = 0;
- if (cpuidLevel >= 7)
- cpuidFeatures07_00(cpuid0700EBX);
-
- if ((xgetbvA & AVXState) == AVXState) {
- // support for YMM and XMM registers is enabled
- if (cpuid01ECX & (1u << 28))
- features |= AVX;
+ uint cpuid0700ECX = 0;
+ if (cpuidLevel >= 7) {
+ cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX);
- if (cpuid0700EBX & (1u << 5))
- features |= AVX2;
+ // the high 32-bits of features is cpuid0700EBX
+ features |= quint64(cpuid0700EBX) << 32;
}
- if (cpuid0700EBX & (1u << 4))
- features |= HLE; // Hardware Lock Ellision
- if (cpuid0700EBX & (1u << 11))
- features |= RTM; // Restricted Transactional Memory
+ if ((xgetbvA & AVXState) != AVXState) {
+ // support for YMM registers is disabled, disable all AVX
+ features &= ~AllAVX;
+ } else if ((xgetbvA & AVX512State) != AVX512State) {
+ // support for ZMM registers or mask registers is disabled, disable all AVX512
+ features &= ~AllAVX512;
+ } else {
+ // this feature is out of order
+ if (cpuid0700ECX & (1u << 1))
+ features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI;
+ else
+ features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI);
+ }
return features;
}
@@ -428,24 +440,24 @@ static bool procCpuinfoContains(const char *prefix, const char *string)
}
#endif
-static inline uint detectProcessorFeatures()
+static inline quint64 detectProcessorFeatures()
{
// NOTE: MIPS 74K cores are the only ones supporting DSPr2.
- uint flags = 0;
+ quint64 flags = 0;
#if defined __mips_dsp
- flags |= DSP;
+ flags |= Q_UINT64_C(1) << CpuFeatureDSP;
# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
- flags |= DSPR2;
+ flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
# elif defined(Q_OS_LINUX)
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
- flags |= DSPR2;
+ flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
# endif
#elif defined(Q_OS_LINUX)
if (procCpuinfoContains("ASEs implemented", "dsp")) {
- flags |= DSP;
+ flags |= Q_UINT64_C(1) << CpuFeatureDSP;
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
- flags |= DSPR2;
+ flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
}
#endif
@@ -460,70 +472,179 @@ static inline uint detectProcessorFeatures()
#endif
/*
- * Use kdesdk/scripts/generate_string_table.pl to update the table below.
- * Here's the data (don't forget the ONE leading space):
+ * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
+ * that the x86 version has a lot of blanks that must be kept and that the
+ * offset table's type is changed to make the table smaller. We also remove the
+ * terminating -1 that the script adds.
+ */
+// begin generated
+#if defined(Q_PROCESSOR_ARM)
+/* Data:
neon
- sse2
+ */
+static const char features_string[] = " neon\0";
+static const int features_indices[] = { 0 };
+#elif defined(Q_PROCESSOR_MIPS)
+/* Data:
+ dsp
+ dspr2
+*/
+static const char features_string[] =
+ " dsp\0"
+ " dspr2\0"
+ "\0";
+
+static const int features_indices[] = {
+ 0, 5
+};
+#elif defined(Q_PROCESSOR_X86)
+/* Data:
sse3
+ sse2
+ avx512vbmi
+
+
+
+
+
+
ssse3
+
+
+ fma
+ cmpxchg16b
+
+
+
+
+
sse4.1
sse4.2
+
+ movbe
+ popcnt
+
+ aes
+
+
avx
- avx2
+ f16c
+ rdrand
+
+
+
+
+ bmi
hle
+ avx2
+
+
+ bmi2
+
+
rtm
- dsp
- dspr2
- */
-// begin generated
+
+
+
+ avx512f
+ avx512dq
+ rdseed
+
+
+ avx512ifma
+
+
+
+
+ avx512pf
+ avx512er
+ avx512cd
+ sha
+ avx512bw
+ avx512vl
+ */
static const char features_string[] =
- "\0"
- " neon\0"
- " sse2\0"
" sse3\0"
+ " sse2\0"
+ " avx512vbmi\0"
" ssse3\0"
+ " fma\0"
+ " cmpxchg16b\0"
" sse4.1\0"
" sse4.2\0"
+ " movbe\0"
+ " popcnt\0"
+ " aes\0"
" avx\0"
- " avx2\0"
+ " f16c\0"
+ " rdrand\0"
+ " bmi\0"
" hle\0"
+ " avx2\0"
+ " bmi2\0"
" rtm\0"
- " dsp\0"
- " dspr2\0"
+ " avx512f\0"
+ " avx512dq\0"
+ " rdseed\0"
+ " avx512ifma\0"
+ " avx512pf\0"
+ " avx512er\0"
+ " avx512cd\0"
+ " sha\0"
+ " avx512bw\0"
+ " avx512vl\0"
"\0";
-static const int features_indices[] = {
- 0, 1, 7, 13, 19, 26, 34, 42,
- 47, 53, 58, 63, 68, -1
+static const quint8 features_indices[] = {
+ 0, 6, 12, 5, 5, 5, 5, 5,
+ 5, 24, 5, 5, 31, 36, 5, 5,
+ 5, 5, 5, 48, 56, 5, 64, 71,
+ 5, 79, 5, 5, 84, 89, 95, 5,
+ 5, 5, 5, 103, 108, 113, 5, 5,
+ 119, 5, 5, 125, 5, 5, 5, 5,
+ 130, 139, 149, 5, 5, 157, 5, 5,
+ 5, 5, 169, 179, 189, 199, 204, 214
};
+#else
+static const char features_string[] = "";
+static const int features_indices[] = { };
+#endif
// end generated
-static const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
+static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
// record what CPU features were enabled by default in this Qt build
-static const uint minFeature = qCompilerCpuFeatures;
+static const quint64 minFeature = qCompilerCpuFeatures;
#ifdef Q_OS_WIN
#if defined(Q_CC_GNU)
-# define ffs __builtin_ffs
+# define ffsll __builtin_ffsll
#else
-int ffs(int i)
+int ffsll(quint64 i)
{
-#ifndef Q_OS_WINCE
+#if defined(Q_OS_WIN64)
unsigned long result;
- return _BitScanForward(&result, i) ? result : 0;
+ return _BitScanForward64(&result, i) ? result : 0;
+#elif !defined(Q_OS_WINCE)
+ unsigned long result;
+ return _BitScanForward(&result, i) ? result :
+ _BitScanForward(&result, i >> 32) ? result + 32 : 0;
#else
return 0;
#endif
}
#endif
-#elif defined(Q_OS_ANDROID)
-# define ffs __builtin_ffs
+#elif defined(Q_OS_ANDROID) || defined(Q_OS_QNX) || defined(Q_OS_OSX) || defined(Q_OS_HAIKU)
+# define ffsll __builtin_ffsll
#endif
-QBasicAtomicInt qt_cpu_features = Q_BASIC_ATOMIC_INITIALIZER(0);
+#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
+Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) };
+#else
+Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) };
+#endif
void qDetectCpuFeatures()
{
@@ -545,17 +666,17 @@ void qDetectCpuFeatures()
// contains all the features that the code required. Qt 4 ran for years
// like that, so it shouldn't be a problem.
- qt_cpu_features.store(minFeature | QSimdInitialized);
+ qt_cpu_features.store(minFeature | quint32(QSimdInitialized));
return;
# endif
#endif
- uint f = detectProcessorFeatures();
+ quint64 f = detectProcessorFeatures();
QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
if (!disable.isEmpty()) {
disable.prepend(' ');
for (int i = 0; i < features_count; ++i) {
if (disable.contains(features_string + features_indices[i]))
- f &= ~(1 << i);
+ f &= ~(Q_UINT64_C(1) << i);
}
}
@@ -565,29 +686,32 @@ void qDetectCpuFeatures()
bool runningOnValgrind = false;
#endif
if (!runningOnValgrind && (minFeature != 0 && (f & minFeature) != minFeature)) {
- uint missing = minFeature & ~f;
+ quint64 missing = minFeature & ~f;
fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
for (int i = 0; i < features_count; ++i) {
- if (missing & (1 << i))
+ if (missing & (Q_UINT64_C(1) << i))
fprintf(stderr, "%s", features_string + features_indices[i]);
}
fprintf(stderr, "\n");
fflush(stderr);
- qFatal("Aborted. Incompatible processor: missing feature 0x%x -%s.", missing,
- features_string + features_indices[ffs(missing) - 1]);
+ qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing,
+ features_string + features_indices[ffsll(missing) - 1]);
}
- qt_cpu_features.store(f | QSimdInitialized);
+ qt_cpu_features[0].store(f | quint32(QSimdInitialized));
+#ifndef Q_ATOMIC_INT64_IS_SUPPORTED
+ qt_cpu_features[1].store(f >> 32);
+#endif
}
void qDumpCPUFeatures()
{
- uint features = qCpuFeatures();
+ quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized);
printf("Processor features: ");
for (int i = 0; i < features_count; ++i) {
- if (features & (1 << i))
+ if (features & (Q_UINT64_C(1) << i))
printf("%s%s", features_string + features_indices[i],
- minFeature & (1 << i) ? "[required]" : "");
+ minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
}
puts("");
}