summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qsimd_p.h
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2012-04-02 20:52:28 -0300
committerQt by Nokia <qt-info@nokia.com>2012-07-02 10:09:31 +0200
commit978937ab4a44179224a3021f89555bfa988fe687 (patch)
tree2dbe36298a7218566549a7fe3dc7c059a3b0ac8c /src/corelib/tools/qsimd_p.h
parentf58390e0f495e229d9f2f1301c3a9dec978af9c2 (diff)
Make the CPU detection much more efficient in user code
First, check that the option in question hasn't been already enabled by the compiler, via compiler switches. If it has been, then we don't need to verify anything, and we can assume that it's safe to use such instructions. For example, on an x86-64 build, qCpuHasFeature(SSE2) is always a constant true. If the compile-time check fails, then we proceed to try and detect the processor features at runtime. But instead of insisting on a call to qDetectCPUFeatures, allow the code using the detection to read from a variable and simply test it for values. Only if the variable isn't initialised should it make a function call. The Q_ASSUME allows this code to be very efficient even with multiple uses of qCpuHasFeature. Change the uninitialised value from -1 to 0 so that simpler instructions can be used to check for non-initialisation. The qDetectCPUFeatures function is renamed to qDetectCpuFeatures to match the Qt coding style and also to catch uses this code that need to be adapted. Change-Id: I24ca5a6ad21075e2e249e1a4f8f5057b8f68ce7c Reviewed-by: Bradley T. Hughes <bradley.hughes@nokia.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/tools/qsimd_p.h')
-rw-r--r--src/corelib/tools/qsimd_p.h63
1 files changed, 60 insertions, 3 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 0cc6bf4b76..ce9c7b789f 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -43,6 +43,7 @@
#define QSIMD_P_H
#include <qglobal.h>
+#include <qatomic.h>
QT_BEGIN_HEADER
@@ -172,7 +173,6 @@ QT_BEGIN_NAMESPACE
enum CPUFeatures {
- None = 0,
IWMMXT = 0x1,
NEON = 0x2,
SSE2 = 0x4,
@@ -183,10 +183,67 @@ enum CPUFeatures {
AVX = 0x80,
AVX2 = 0x100,
HLE = 0x200,
- RTM = 0x400
+ RTM = 0x400,
+
+ // used only to indicate that the CPU detection was initialised
+ QSimdInitialized = 0x80000000
};
-Q_CORE_EXPORT uint qDetectCPUFeatures();
+static const uint qCompilerCpuFeatures = 0
+#if defined __RTM__
+ | RTM
+#endif
+#if defined __HLE__
+ | HLE
+#endif
+#if defined __AVX2__
+ | AVX2
+#endif
+#if defined __AVX__
+ | AVX
+#endif
+#if defined __SSE4_2__
+ | SSE4_2
+#endif
+#if defined __SSE4_1__
+ | SSE4_1
+#endif
+#if defined __SSSE3__
+ | SSSE3
+#endif
+#if defined __SSE3__
+ | SSE3
+#endif
+#if defined __SSE2__
+ | SSE2
+#endif
+#if defined __ARM_NEON__
+ | NEON
+#endif
+#if defined __IWMMXT__
+ | IWMMXT
+#endif
+ ;
+
+
+extern Q_CORE_EXPORT QBasicAtomicInt qt_cpu_features;
+Q_CORE_EXPORT void qDetectCpuFeatures();
+
+inline uint qCpuFeatures()
+{
+ int features = qt_cpu_features.load();
+ if (Q_UNLIKELY(features == 0)) {
+ qDetectCpuFeatures();
+ features = qt_cpu_features.load();
+ Q_ASSUME(features != 0);
+ }
+ return uint(features);
+}
+
+inline uint qCpuHasFeature(CPUFeatures feature)
+{
+ return qCompilerCpuFeatures & feature || qCpuFeatures() & feature;
+}
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \