diff options
-rw-r--r-- | src/gui/image/qimage_conversions.cpp | 32 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 36 | ||||
-rw-r--r-- | tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp | 1 |
3 files changed, 62 insertions, 7 deletions
diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp index 2e8fc1963d..696f95b565 100644 --- a/src/gui/image/qimage_conversions.cpp +++ b/src/gui/image/qimage_conversions.cpp @@ -195,7 +195,7 @@ static bool convert_passthrough_inplace(QImageData *data, Qt::ImageConversionFla return true; } -static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +static inline void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { Q_ASSERT(src->format == QImage::Format_ARGB32 || src->format == QImage::Format_RGBA8888); Q_ASSERT(dest->format == QImage::Format_ARGB32_Premultiplied || dest->format == QImage::Format_RGBA8888_Premultiplied); @@ -219,6 +219,15 @@ static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt: } } +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) +QT_FUNCTION_TARGET(SSE4_1) +static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags) +{ + // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. + convert_ARGB_to_ARGB_PM(dest, src, flags); +} +#endif + extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags); #ifndef __SSE2__ @@ -232,7 +241,7 @@ static bool convert_ARGB_to_ARGB_PM_inplace(QImageData *data, Qt::ImageConversio for (int i = 0; i < data->height; ++i) { const QRgb *end = rgb_data + data->width; while (rgb_data < end) { - *rgb_data = PREMUL(*rgb_data); + *rgb_data = qPremultiply(*rgb_data); ++rgb_data; } rgb_data += pad; @@ -312,7 +321,7 @@ static bool convert_ARGB_to_RGBA_inplace(QImageData *data, Qt::ImageConversionFl return true; } -static void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +static inline void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { Q_ASSERT(src->format == QImage::Format_ARGB32); Q_ASSERT(dest->format == QImage::Format_RGBA8888_Premultiplied); @@ -336,6 +345,15 @@ static void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt: } } +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) +QT_FUNCTION_TARGET(SSE4_1) +static void convert_ARGB_to_RGBA_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags) +{ + // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. + convert_ARGB_to_RGBA_PM(dest, src, flags); +} +#endif + static void convert_RGBA_to_ARGB(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { Q_ASSERT(src->format == QImage::Format_RGBX8888 || src->format == QImage::Format_RGBA8888 || src->format == QImage::Format_RGBA8888_Premultiplied); @@ -2945,6 +2963,14 @@ void qInitImageConversions() } #endif +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) + if (qCpuHasFeature(SSE4_1)) { + qimage_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4; + qimage_converter_map[QImage::Format_RGBA8888][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4; + qimage_converter_map[QImage::Format_ARGB32][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_RGBA_PM_sse4; + } +#endif + #if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64) extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags); qimage_converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon; diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index de4be7177b..ac73d78afe 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -503,14 +503,25 @@ static const uint *QT_FASTCALL convertPassThrough(uint *, const uint *src, int, return src; } -static const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count, - const QPixelLayout *, const QRgb *) +static inline const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) { for (int i = 0; i < count; ++i) buffer[i] = qPremultiply(src[i]); return buffer; } +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) +QT_FUNCTION_TARGET(SSE4_1) +static const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, + const QPixelLayout *layout, const QRgb *clut) +{ + // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. + return convertARGB32ToARGB32PM(buffer, src, count, layout, clut); +} +#endif + + static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *) { @@ -519,14 +530,24 @@ static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const u return buffer; } -static const uint *QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count, - const QPixelLayout *, const QRgb *) +static inline const uint *QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) { for (int i = 0; i < count; ++i) buffer[i] = qPremultiply(RGBA2ARGB(src[i])); return buffer; } +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) +QT_FUNCTION_TARGET(SSE4_1) +static const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, + const QPixelLayout *layout, const QRgb *clut) +{ + // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. + return convertRGBA8888ToARGB32PM(buffer, src, count, layout, clut); +} +#endif + static const uint *QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *) { @@ -6858,6 +6879,13 @@ void qInitDrawhelperAsm() } #endif // SSSE3 +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) + if (qCpuHasFeature(SSE4_1)) { + qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; + qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; + } +#endif + functionForModeAsm = qt_functionForMode_SSE2; functionForModeSolidAsm = qt_functionForModeSolid_SSE2; #endif // SSE2 diff --git a/tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp b/tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp index 9d6bdb28fa..44d7854eb8 100644 --- a/tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp +++ b/tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp @@ -148,6 +148,7 @@ void tst_QImageConversion::convertGeneric_data() QTest::newRow("argb32 -> argb8565pm") << argb32 << QImage::Format_ARGB8565_Premultiplied; QTest::newRow("argb32 -> argb4444pm") << argb32 << QImage::Format_ARGB4444_Premultiplied; QTest::newRow("argb32 -> argb32pm") << argb32 << QImage::Format_ARGB32_Premultiplied; + QTest::newRow("argb32 -> rgba8888pm") << argb32 << QImage::Format_RGBA8888_Premultiplied; } void tst_QImageConversion::convertGeneric() |