diff options
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 47 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 63 | ||||
-rw-r--r-- | src/gui/image/qimage.cpp | 34 | ||||
-rw-r--r-- | src/gui/image/qjpeghandler.cpp | 6 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 2 |
5 files changed, 87 insertions, 65 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index c399a5a527..df41e6bd6d 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -355,39 +355,9 @@ static const int features_indices[] = { static const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]); -static const uint minFeature = None -#if defined __RTM__ - | RTM -#endif +// record what CPU features were enabled by default in this Qt build // don't define for HLE, since the HLE prefix can be run on older CPUs -#if defined __AVX2__ - | AVX2 -#endif -#if defined __AVX__ - | AVX -#endif -#if defined __SSE4_2__ - | SSE4_2 -#endif -#if defined __SSE4_1__ - | SSE4_1 -#endif -#if defined __SSSE3__ - | SSSE3 -#endif -#if defined __SSE3__ - | SSE3 -#endif -#if defined __SSE2__ - | SSE2 -#endif -#if defined __ARM_NEON__ - | NEON -#endif -#if defined __IWMMXT__ - | IWMMXT -#endif - ; +static const uint minFeature = qCompilerCpuFeatures & ~HLE; #ifdef Q_OS_WIN #if defined(Q_CC_GNU) @@ -405,12 +375,10 @@ int ffs(int i) #endif #endif // Q_OS_WIN -uint qDetectCPUFeatures() -{ - static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1); - if (features.load() != -1) - return features.load(); +QBasicAtomicInt qt_cpu_features = Q_BASIC_ATOMIC_INITIALIZER(0); +void qDetectCpuFeatures() +{ uint f = detectProcessorFeatures(); QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); if (!disable.isEmpty()) { @@ -434,13 +402,12 @@ uint qDetectCPUFeatures() features_string + features_indices[ffs(missing) - 1]); } - features.store(f); - return f; + qt_cpu_features.store(f | QSimdInitialized); } void qDumpCPUFeatures() { - uint features = qDetectCPUFeatures(); + uint features = qCpuFeatures(); printf("Processor features: "); for (int i = 0; i < features_count; ++i) { if (features & (1 << i)) diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index 0cc6bf4b76..ce9c7b789f 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -43,6 +43,7 @@ #define QSIMD_P_H #include <qglobal.h> +#include <qatomic.h> QT_BEGIN_HEADER @@ -172,7 +173,6 @@ QT_BEGIN_NAMESPACE enum CPUFeatures { - None = 0, IWMMXT = 0x1, NEON = 0x2, SSE2 = 0x4, @@ -183,10 +183,67 @@ enum CPUFeatures { AVX = 0x80, AVX2 = 0x100, HLE = 0x200, - RTM = 0x400 + RTM = 0x400, + + // used only to indicate that the CPU detection was initialised + QSimdInitialized = 0x80000000 }; -Q_CORE_EXPORT uint qDetectCPUFeatures(); +static const uint qCompilerCpuFeatures = 0 +#if defined __RTM__ + | RTM +#endif +#if defined __HLE__ + | HLE +#endif +#if defined __AVX2__ + | AVX2 +#endif +#if defined __AVX__ + | AVX +#endif +#if defined __SSE4_2__ + | SSE4_2 +#endif +#if defined __SSE4_1__ + | SSE4_1 +#endif +#if defined __SSSE3__ + | SSSE3 +#endif +#if defined __SSE3__ + | SSE3 +#endif +#if defined __SSE2__ + | SSE2 +#endif +#if defined __ARM_NEON__ + | NEON +#endif +#if defined __IWMMXT__ + | IWMMXT +#endif + ; + + +extern Q_CORE_EXPORT QBasicAtomicInt qt_cpu_features; +Q_CORE_EXPORT void qDetectCpuFeatures(); + +inline uint qCpuFeatures() +{ + int features = qt_cpu_features.load(); + if (Q_UNLIKELY(features == 0)) { + qDetectCpuFeatures(); + features = qt_cpu_features.load(); + Q_ASSUME(features != 0); + } + return uint(features); +} + +inline uint qCpuHasFeature(CPUFeatures feature) +{ + return qCompilerCpuFeatures & feature || qCpuFeatures() & feature; +} #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ diff --git a/src/gui/image/qimage.cpp b/src/gui/image/qimage.cpp index 8536e59440..b16ae2ffce 100644 --- a/src/gui/image/qimage.cpp +++ b/src/gui/image/qimage.cpp @@ -3270,12 +3270,8 @@ static InPlace_Image_Converter inplace_converter_map[QImage::NImageFormats][QIma void qInitImageConversions() { - const uint features = qDetectCPUFeatures(); - Q_UNUSED(features); - -#ifdef QT_COMPILER_SUPPORTS_SSE2 #ifdef QT_COMPILER_SUPPORTS_AVX - if (features & AVX) { + if (qCpuHasFeature(AVX)) { extern bool convert_ARGB_to_ARGB_PM_inplace_avx(QImageData *data, Qt::ImageConversionFlags); inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_avx; @@ -3283,29 +3279,33 @@ void qInitImageConversions() converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_avx; converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_avx; converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_avx; - } else + return; + } #endif - if (features & SSE2) { - extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags); - inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_sse2; +#if defined(QT_COMPILER_SUPPORTS_SSE2) && !defined(__AVX__) + if (qCpuHasFeature(SSE2)) { + extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags); + inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_sse2; #ifdef QT_COMPILER_SUPPORTS_SSSE3 - if (features & SSSE3) { - extern void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags); - converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_ssse3; - converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_ssse3; - converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_ssse3; - } -#endif + if (qCpuHasFeature(SSSE3)) { + extern void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags); + converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_ssse3; + converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_ssse3; + converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_ssse3; } +#endif + return; + } #endif // SSE2 #ifdef QT_COMPILER_SUPPORTS_NEON - if (features & NEON) { + if (qCpuHasFeature(NEON)) { extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags); converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon; converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_neon; converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_neon; + return; } #endif } diff --git a/src/gui/image/qjpeghandler.cpp b/src/gui/image/qjpeghandler.cpp index eff106a486..057bfc2592 100644 --- a/src/gui/image/qjpeghandler.cpp +++ b/src/gui/image/qjpeghandler.cpp @@ -873,20 +873,18 @@ bool QJpegHandlerPrivate::read(QImage *image) QJpegHandler::QJpegHandler() : d(new QJpegHandlerPrivate(this)) { - const uint features = qDetectCPUFeatures(); - Q_UNUSED(features); #if defined(QT_COMPILER_SUPPORTS_NEON) // from qimage_neon.cpp Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, const uchar *src, int len); - if (features & NEON) + if (qCpuHasFeature(NEON)) rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_neon; #endif // QT_COMPILER_SUPPORTS_NEON #if defined(QT_COMPILER_SUPPORTS_SSSE3) // from qimage_ssse3.cpp Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len); - if (features & SSSE3) + if (qCpuHasFeature(SSSE3)) rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_ssse3; #endif // QT_COMPILER_SUPPORTS_SSSE3 } diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 2ca304b53b..08975daf71 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -5803,7 +5803,7 @@ void qInitDrawhelperAsm() CompositionFunction *functionForModeAsm = 0; CompositionFunctionSolid *functionForModeSolidAsm = 0; - const uint features = qDetectCPUFeatures(); + const uint features = qCpuFeatures(); if (false) { #ifdef QT_COMPILER_SUPPORTS_AVX } else if (features & AVX) { |