From 8f6f9cbaa93e386dcbc447641bb6454e6899e067 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 16 May 2018 17:50:16 +0200 Subject: Reapply SSE4 acceleration to ARGB32->ARGB32PM conversion After the merger of fetch and convert, we were missing the hook to the accelerated merged version of ARGB32->ARGB32PM conversion, causing a minor performance regression. Change-Id: I3965d1a95f2305306005db09640f2775aa645d2e Reviewed-by: Thiago Macieira --- src/gui/painting/qdrawhelper.cpp | 6 ++++++ src/gui/painting/qdrawhelper_sse4.cpp | 18 ++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index c6d7ffd835..3be1af026c 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -6283,13 +6283,19 @@ static void qInitDrawhelperFunctions() if (qCpuHasFeature(SSE4_1)) { extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QVector *); extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QVector *); + extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count, + const QVector *, QDitherInfo *); + extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count, + const QVector *, QDitherInfo *); extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector *, QDitherInfo *); extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector *, QDitherInfo *); extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector *, QDitherInfo *); + qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4; qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; + qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4; diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp index ce0d45bf27..4696493715 100644 --- a/src/gui/painting/qdrawhelper_sse4.cpp +++ b/src/gui/painting/qdrawhelper_sse4.cpp @@ -45,7 +45,7 @@ QT_BEGIN_NAMESPACE template -static inline void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count) +static void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count) { int i = 0; const __m128i alphaMask = _mm_set1_epi32(0xff000000); @@ -83,7 +83,7 @@ static inline void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int _mm_storeu_si128((__m128i *)&buffer[i], srcVector); } } else { - _mm_storeu_si128((__m128i *)&buffer[i], _mm_setzero_si128()); + _mm_storeu_si128((__m128i *)&buffer[i], zero); } } @@ -103,6 +103,20 @@ void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const Q convertARGBToARGB32PM_sse4(buffer, buffer, count); } +const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count, + const QVector *, QDitherInfo *) +{ + convertARGBToARGB32PM_sse4(buffer, reinterpret_cast(src) + index, count); + return buffer; +} + +const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count, + const QVector *, QDitherInfo *) +{ + convertARGBToARGB32PM_sse4(buffer, reinterpret_cast(src) + index, count); + return buffer; +} + void QT_FASTCALL storeRGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector *, QDitherInfo *) { -- cgit v1.2.3