summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2012-04-02 20:52:28 -0300
committerQt by Nokia <qt-info@nokia.com>2012-07-02 10:09:31 +0200
commit978937ab4a44179224a3021f89555bfa988fe687 (patch)
tree2dbe36298a7218566549a7fe3dc7c059a3b0ac8c
parentf58390e0f495e229d9f2f1301c3a9dec978af9c2 (diff)
Make the CPU detection much more efficient in user code
First, check that the option in question hasn't been already enabled by the compiler, via compiler switches. If it has been, then we don't need to verify anything, and we can assume that it's safe to use such instructions. For example, on an x86-64 build, qCpuHasFeature(SSE2) is always a constant true. If the compile-time check fails, then we proceed to try and detect the processor features at runtime. But instead of insisting on a call to qDetectCPUFeatures, allow the code using the detection to read from a variable and simply test it for values. Only if the variable isn't initialised should it make a function call. The Q_ASSUME allows this code to be very efficient even with multiple uses of qCpuHasFeature. Change the uninitialised value from -1 to 0 so that simpler instructions can be used to check for non-initialisation. The qDetectCPUFeatures function is renamed to qDetectCpuFeatures to match the Qt coding style and also to catch uses this code that need to be adapted. Change-Id: I24ca5a6ad21075e2e249e1a4f8f5057b8f68ce7c Reviewed-by: Bradley T. Hughes <bradley.hughes@nokia.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/tools/qsimd.cpp47
-rw-r--r--src/corelib/tools/qsimd_p.h63
-rw-r--r--src/gui/image/qimage.cpp34
-rw-r--r--src/gui/image/qjpeghandler.cpp6
-rw-r--r--src/gui/painting/qdrawhelper.cpp2
5 files changed, 87 insertions, 65 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index c399a5a527..df41e6bd6d 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -355,39 +355,9 @@ static const int features_indices[] = {
static const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
-static const uint minFeature = None
-#if defined __RTM__
- | RTM
-#endif
+// record what CPU features were enabled by default in this Qt build
// don't define for HLE, since the HLE prefix can be run on older CPUs
-#if defined __AVX2__
- | AVX2
-#endif
-#if defined __AVX__
- | AVX
-#endif
-#if defined __SSE4_2__
- | SSE4_2
-#endif
-#if defined __SSE4_1__
- | SSE4_1
-#endif
-#if defined __SSSE3__
- | SSSE3
-#endif
-#if defined __SSE3__
- | SSE3
-#endif
-#if defined __SSE2__
- | SSE2
-#endif
-#if defined __ARM_NEON__
- | NEON
-#endif
-#if defined __IWMMXT__
- | IWMMXT
-#endif
- ;
+static const uint minFeature = qCompilerCpuFeatures & ~HLE;
#ifdef Q_OS_WIN
#if defined(Q_CC_GNU)
@@ -405,12 +375,10 @@ int ffs(int i)
#endif
#endif // Q_OS_WIN
-uint qDetectCPUFeatures()
-{
- static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1);
- if (features.load() != -1)
- return features.load();
+QBasicAtomicInt qt_cpu_features = Q_BASIC_ATOMIC_INITIALIZER(0);
+void qDetectCpuFeatures()
+{
uint f = detectProcessorFeatures();
QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
if (!disable.isEmpty()) {
@@ -434,13 +402,12 @@ uint qDetectCPUFeatures()
features_string + features_indices[ffs(missing) - 1]);
}
- features.store(f);
- return f;
+ qt_cpu_features.store(f | QSimdInitialized);
}
void qDumpCPUFeatures()
{
- uint features = qDetectCPUFeatures();
+ uint features = qCpuFeatures();
printf("Processor features: ");
for (int i = 0; i < features_count; ++i) {
if (features & (1 << i))
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 0cc6bf4b76..ce9c7b789f 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -43,6 +43,7 @@
#define QSIMD_P_H
#include <qglobal.h>
+#include <qatomic.h>
QT_BEGIN_HEADER
@@ -172,7 +173,6 @@ QT_BEGIN_NAMESPACE
enum CPUFeatures {
- None = 0,
IWMMXT = 0x1,
NEON = 0x2,
SSE2 = 0x4,
@@ -183,10 +183,67 @@ enum CPUFeatures {
AVX = 0x80,
AVX2 = 0x100,
HLE = 0x200,
- RTM = 0x400
+ RTM = 0x400,
+
+ // used only to indicate that the CPU detection was initialised
+ QSimdInitialized = 0x80000000
};
-Q_CORE_EXPORT uint qDetectCPUFeatures();
+static const uint qCompilerCpuFeatures = 0
+#if defined __RTM__
+ | RTM
+#endif
+#if defined __HLE__
+ | HLE
+#endif
+#if defined __AVX2__
+ | AVX2
+#endif
+#if defined __AVX__
+ | AVX
+#endif
+#if defined __SSE4_2__
+ | SSE4_2
+#endif
+#if defined __SSE4_1__
+ | SSE4_1
+#endif
+#if defined __SSSE3__
+ | SSSE3
+#endif
+#if defined __SSE3__
+ | SSE3
+#endif
+#if defined __SSE2__
+ | SSE2
+#endif
+#if defined __ARM_NEON__
+ | NEON
+#endif
+#if defined __IWMMXT__
+ | IWMMXT
+#endif
+ ;
+
+
+extern Q_CORE_EXPORT QBasicAtomicInt qt_cpu_features;
+Q_CORE_EXPORT void qDetectCpuFeatures();
+
+inline uint qCpuFeatures()
+{
+ int features = qt_cpu_features.load();
+ if (Q_UNLIKELY(features == 0)) {
+ qDetectCpuFeatures();
+ features = qt_cpu_features.load();
+ Q_ASSUME(features != 0);
+ }
+ return uint(features);
+}
+
+inline uint qCpuHasFeature(CPUFeatures feature)
+{
+ return qCompilerCpuFeatures & feature || qCpuFeatures() & feature;
+}
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
diff --git a/src/gui/image/qimage.cpp b/src/gui/image/qimage.cpp
index 8536e59440..b16ae2ffce 100644
--- a/src/gui/image/qimage.cpp
+++ b/src/gui/image/qimage.cpp
@@ -3270,12 +3270,8 @@ static InPlace_Image_Converter inplace_converter_map[QImage::NImageFormats][QIma
void qInitImageConversions()
{
- const uint features = qDetectCPUFeatures();
- Q_UNUSED(features);
-
-#ifdef QT_COMPILER_SUPPORTS_SSE2
#ifdef QT_COMPILER_SUPPORTS_AVX
- if (features & AVX) {
+ if (qCpuHasFeature(AVX)) {
extern bool convert_ARGB_to_ARGB_PM_inplace_avx(QImageData *data, Qt::ImageConversionFlags);
inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_avx;
@@ -3283,29 +3279,33 @@ void qInitImageConversions()
converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_avx;
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_avx;
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_avx;
- } else
+ return;
+ }
#endif
- if (features & SSE2) {
- extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags);
- inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_sse2;
+#if defined(QT_COMPILER_SUPPORTS_SSE2) && !defined(__AVX__)
+ if (qCpuHasFeature(SSE2)) {
+ extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags);
+ inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_sse2;
#ifdef QT_COMPILER_SUPPORTS_SSSE3
- if (features & SSSE3) {
- extern void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
- converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_ssse3;
- converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_ssse3;
- converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_ssse3;
- }
-#endif
+ if (qCpuHasFeature(SSSE3)) {
+ extern void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
+ converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_ssse3;
+ converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_ssse3;
+ converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_ssse3;
}
+#endif
+ return;
+ }
#endif // SSE2
#ifdef QT_COMPILER_SUPPORTS_NEON
- if (features & NEON) {
+ if (qCpuHasFeature(NEON)) {
extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon;
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_neon;
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_neon;
+ return;
}
#endif
}
diff --git a/src/gui/image/qjpeghandler.cpp b/src/gui/image/qjpeghandler.cpp
index eff106a486..057bfc2592 100644
--- a/src/gui/image/qjpeghandler.cpp
+++ b/src/gui/image/qjpeghandler.cpp
@@ -873,20 +873,18 @@ bool QJpegHandlerPrivate::read(QImage *image)
QJpegHandler::QJpegHandler()
: d(new QJpegHandlerPrivate(this))
{
- const uint features = qDetectCPUFeatures();
- Q_UNUSED(features);
#if defined(QT_COMPILER_SUPPORTS_NEON)
// from qimage_neon.cpp
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, const uchar *src, int len);
- if (features & NEON)
+ if (qCpuHasFeature(NEON))
rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_neon;
#endif // QT_COMPILER_SUPPORTS_NEON
#if defined(QT_COMPILER_SUPPORTS_SSSE3)
// from qimage_ssse3.cpp
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len);
- if (features & SSSE3)
+ if (qCpuHasFeature(SSSE3))
rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_ssse3;
#endif // QT_COMPILER_SUPPORTS_SSSE3
}
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 2ca304b53b..08975daf71 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -5803,7 +5803,7 @@ void qInitDrawhelperAsm()
CompositionFunction *functionForModeAsm = 0;
CompositionFunctionSolid *functionForModeSolidAsm = 0;
- const uint features = qDetectCPUFeatures();
+ const uint features = qCpuFeatures();
if (false) {
#ifdef QT_COMPILER_SUPPORTS_AVX
} else if (features & AVX) {