From 63d5a42b59149bfdebc9a5a123e7115a813bcaa4 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 22 Apr 2015 19:22:26 +0000 Subject: Revert "Remove separate SSE4 unpremultiply function" Could causedSSE4 instructions to be used on non SSE4 machines in cases when qUnpremultiplywas not inlined. This reverts commit 964ccc58534aac436529007000d1c38d76c88834. Change-Id: Ic676ade8f75129e8d37c4d96cbfb2bdb5b794919 Task-number: QTBUG-45741 Reviewed-by: Thiago Macieira --- src/gui/painting/qdrawhelper.cpp | 10 ++++---- src/gui/painting/qdrawhelper_sse4.cpp | 7 +++--- src/gui/painting/qdrawingprimitive_sse2_p.h | 21 ++++++++++++++++ src/gui/painting/qrgb.h | 37 +++-------------------------- 4 files changed, 34 insertions(+), 41 deletions(-) (limited to 'src/gui/painting') diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 2817a4a145..37f2de98b6 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -6704,22 +6704,24 @@ void qInitDrawhelperAsm() } #endif // SSSE3 -#if defined(QT_COMPILER_SUPPORTS_SSE4_1) && !defined(__SSE4_1__) +#if QT_COMPILER_SUPPORTS_SSE4_1 if (qCpuHasFeature(SSE4_1)) { +#if !defined(__SSE4_1__) extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); + qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; + qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; +#endif extern const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); extern const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); extern const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); - qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; - qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; qPixelLayouts[QImage::Format_ARGB32].convertFromARGB32PM = convertARGB32FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].convertFromARGB32PM = convertRGBA8888FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBX8888].convertFromARGB32PM = convertRGBXFromARGB32PM_sse4; } #endif -#if defined(QT_COMPILER_SUPPORTS_AVX2) && !defined(__AVX2__) +#if QT_COMPILER_SUPPORTS_AVX2 && !defined(__AVX2__) if (qCpuHasFeature(AVX2)) { extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp index 8e0b2cbb18..43a3958997 100644 --- a/src/gui/painting/qdrawhelper_sse4.cpp +++ b/src/gui/painting/qdrawhelper_sse4.cpp @@ -32,6 +32,7 @@ ****************************************************************************/ #include +#include #if defined(QT_COMPILER_SUPPORTS_SSE4_1) @@ -53,7 +54,7 @@ const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint const QPixelLayout *, const QRgb *) { for (int i = 0; i < count; ++i) - buffer[i] = qUnpremultiply(src[i]); + buffer[i] = qUnpremultiply_sse4(src[i]); return buffer; } @@ -61,7 +62,7 @@ const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uin const QPixelLayout *, const QRgb *) { for (int i = 0; i < count; ++i) - buffer[i] = ARGB2RGBA(qUnpremultiply(src[i])); + buffer[i] = ARGB2RGBA(qUnpremultiply_sse4(src[i])); return buffer; } @@ -69,7 +70,7 @@ const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *s const QPixelLayout *, const QRgb *) { for (int i = 0; i < count; ++i) - buffer[i] = ARGB2RGBA(0xff000000 | qUnpremultiply(src[i])); + buffer[i] = ARGB2RGBA(0xff000000 | qUnpremultiply_sse4(src[i])); return buffer; } diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h index fbee06e157..4d0790a502 100644 --- a/src/gui/painting/qdrawingprimitive_sse2_p.h +++ b/src/gui/painting/qdrawingprimitive_sse2_p.h @@ -236,4 +236,25 @@ QT_END_NAMESPACE #endif // __SSE2__ +QT_BEGIN_NAMESPACE +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) +QT_FUNCTION_TARGET(SSE4_1) +inline QRgb qUnpremultiply_sse4(QRgb p) +{ + const uint alpha = qAlpha(p); + const uint invAlpha = qt_inv_premul_factor[alpha]; + const __m128i via = _mm_set1_epi32(invAlpha); + const __m128i vr = _mm_set1_epi32(0x8000); + __m128i vl = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(p)); + vl = _mm_mullo_epi32(vl, via); + vl = _mm_add_epi32(vl, vr); + vl = _mm_srai_epi32(vl, 16); + vl = _mm_insert_epi32(vl, alpha, 3); + vl = _mm_packus_epi32(vl, _mm_setzero_si128()); + vl = _mm_packus_epi16(vl, _mm_setzero_si128()); + return _mm_cvtsi128_si32(vl); +} +#endif +QT_END_NAMESPACE + #endif // QDRAWINGPRIMITIVE_SSE2_P_H diff --git a/src/gui/painting/qrgb.h b/src/gui/painting/qrgb.h index 05b3a76bce..f7f2185bef 100644 --- a/src/gui/painting/qrgb.h +++ b/src/gui/painting/qrgb.h @@ -36,11 +36,6 @@ #include #include -#if defined(__SSE4_1__) -#include -#elif defined(__SSE2__) -#include -#endif QT_BEGIN_NAMESPACE @@ -92,45 +87,19 @@ inline Q_DECL_RELAXED_CONSTEXPR QRgb qPremultiply(QRgb x) Q_GUI_EXPORT extern const uint qt_inv_premul_factor[]; -#if defined(__SSE2__) -inline QRgb qUnpremultiply(QRgb p) -{ - const uint alpha = qAlpha(p); - if (alpha == 255 || alpha == 0) - return p; - const uint invAlpha = qt_inv_premul_factor[alpha]; - const __m128i via = _mm_set1_epi32(invAlpha); - const __m128i vr = _mm_set1_epi32(0x8000); -#ifdef __SSE4_1__ - __m128i vl = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(p)); - vl = _mm_mullo_epi32(vl, via); -#else - __m128i vl = _mm_unpacklo_epi8(_mm_cvtsi32_si128(p), _mm_setzero_si128()); - vl = _mm_unpacklo_epi16(vl, vl); - __m128i vll = _mm_mullo_epi16(vl, via); - __m128i vlh = _mm_mulhi_epu16(vl, via); - vl = _mm_add_epi32(vll, _mm_slli_epi32(vlh, 16)); -#endif - vl = _mm_add_epi32(vl, vr); - vl = _mm_srli_epi32(vl, 16); - vl = _mm_packs_epi32(vl, _mm_setzero_si128()); - vl = _mm_insert_epi16(vl, alpha, 3); - vl = _mm_packus_epi16(vl, _mm_setzero_si128()); - return _mm_cvtsi128_si32(vl); -} -#else inline QRgb qUnpremultiply(QRgb p) { const uint alpha = qAlpha(p); // Alpha 255 and 0 are the two most common values, which makes them beneficial to short-cut. - if (alpha == 255 || alpha == 0) + if (alpha == 255) return p; + if (alpha == 0) + return 0; // (p*(0x00ff00ff/alpha)) >> 16 == (p*255)/alpha for all p and alpha <= 256. const uint invAlpha = qt_inv_premul_factor[alpha]; // We add 0x8000 to get even rounding. The rounding also ensures that qPremultiply(qUnpremultiply(p)) == p for all p. return qRgba((qRed(p)*invAlpha + 0x8000)>>16, (qGreen(p)*invAlpha + 0x8000)>>16, (qBlue(p)*invAlpha + 0x8000)>>16, alpha); } -#endif QT_END_NAMESPACE -- cgit v1.2.3