From 89edf43c44294888781c308d9b1f1d9bab63645b Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 28 Jan 2015 11:07:14 +0100 Subject: Generate SSE4.1 versions of premultiplying methods where convenient The autovectorized versions of premultiplying conversions are almost twice as fast with SSE4.1 as with SSE2. Therefore this patch lets compilers that can make those versions convenient without duplicating code do that and lets us use them when available. Change-Id: I699035963abe55a38b9ef8ba7b4a8c961c8dfcdd Reviewed-by: Gunnar Sletta --- src/gui/image/qimage_conversions.cpp | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) (limited to 'src/gui/image') diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp index 2e8fc1963d..696f95b565 100644 --- a/src/gui/image/qimage_conversions.cpp +++ b/src/gui/image/qimage_conversions.cpp @@ -195,7 +195,7 @@ static bool convert_passthrough_inplace(QImageData *data, Qt::ImageConversionFla return true; } -static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +static inline void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { Q_ASSERT(src->format == QImage::Format_ARGB32 || src->format == QImage::Format_RGBA8888); Q_ASSERT(dest->format == QImage::Format_ARGB32_Premultiplied || dest->format == QImage::Format_RGBA8888_Premultiplied); @@ -219,6 +219,15 @@ static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt: } } +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) +QT_FUNCTION_TARGET(SSE4_1) +static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags) +{ + // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. + convert_ARGB_to_ARGB_PM(dest, src, flags); +} +#endif + extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags); #ifndef __SSE2__ @@ -232,7 +241,7 @@ static bool convert_ARGB_to_ARGB_PM_inplace(QImageData *data, Qt::ImageConversio for (int i = 0; i < data->height; ++i) { const QRgb *end = rgb_data + data->width; while (rgb_data < end) { - *rgb_data = PREMUL(*rgb_data); + *rgb_data = qPremultiply(*rgb_data); ++rgb_data; } rgb_data += pad; @@ -312,7 +321,7 @@ static bool convert_ARGB_to_RGBA_inplace(QImageData *data, Qt::ImageConversionFl return true; } -static void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +static inline void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { Q_ASSERT(src->format == QImage::Format_ARGB32); Q_ASSERT(dest->format == QImage::Format_RGBA8888_Premultiplied); @@ -336,6 +345,15 @@ static void convert_ARGB_to_RGBA_PM(QImageData *dest, const QImageData *src, Qt: } } +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) +QT_FUNCTION_TARGET(SSE4_1) +static void convert_ARGB_to_RGBA_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags) +{ + // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. + convert_ARGB_to_RGBA_PM(dest, src, flags); +} +#endif + static void convert_RGBA_to_ARGB(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { Q_ASSERT(src->format == QImage::Format_RGBX8888 || src->format == QImage::Format_RGBA8888 || src->format == QImage::Format_RGBA8888_Premultiplied); @@ -2945,6 +2963,14 @@ void qInitImageConversions() } #endif +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) + if (qCpuHasFeature(SSE4_1)) { + qimage_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4; + qimage_converter_map[QImage::Format_RGBA8888][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4; + qimage_converter_map[QImage::Format_ARGB32][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_RGBA_PM_sse4; + } +#endif + #if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64) extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags); qimage_converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon; -- cgit v1.2.3