From 964ccc58534aac436529007000d1c38d76c88834 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Mon, 13 Apr 2015 16:06:57 +0200 Subject: Remove separate SSE4 unpremultiply function Merges the SSE4 specific unpremultiply with the normal version, and adds a SSE2 fallback. There was no reason to split the two since compile time options will ensure the right version is inlined. Also adds short-cut for 0 and 255 values. Change-Id: Ie5aa262f6964219fd3062d4a498f697cf79a4595 Reviewed-by: Thiago Macieira --- src/gui/painting/qrgb.h | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) (limited to 'src/gui/painting/qrgb.h') diff --git a/src/gui/painting/qrgb.h b/src/gui/painting/qrgb.h index f7f2185bef..05b3a76bce 100644 --- a/src/gui/painting/qrgb.h +++ b/src/gui/painting/qrgb.h @@ -36,6 +36,11 @@ #include #include +#if defined(__SSE4_1__) +#include +#elif defined(__SSE2__) +#include +#endif QT_BEGIN_NAMESPACE @@ -87,19 +92,45 @@ inline Q_DECL_RELAXED_CONSTEXPR QRgb qPremultiply(QRgb x) Q_GUI_EXPORT extern const uint qt_inv_premul_factor[]; +#if defined(__SSE2__) +inline QRgb qUnpremultiply(QRgb p) +{ + const uint alpha = qAlpha(p); + if (alpha == 255 || alpha == 0) + return p; + const uint invAlpha = qt_inv_premul_factor[alpha]; + const __m128i via = _mm_set1_epi32(invAlpha); + const __m128i vr = _mm_set1_epi32(0x8000); +#ifdef __SSE4_1__ + __m128i vl = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(p)); + vl = _mm_mullo_epi32(vl, via); +#else + __m128i vl = _mm_unpacklo_epi8(_mm_cvtsi32_si128(p), _mm_setzero_si128()); + vl = _mm_unpacklo_epi16(vl, vl); + __m128i vll = _mm_mullo_epi16(vl, via); + __m128i vlh = _mm_mulhi_epu16(vl, via); + vl = _mm_add_epi32(vll, _mm_slli_epi32(vlh, 16)); +#endif + vl = _mm_add_epi32(vl, vr); + vl = _mm_srli_epi32(vl, 16); + vl = _mm_packs_epi32(vl, _mm_setzero_si128()); + vl = _mm_insert_epi16(vl, alpha, 3); + vl = _mm_packus_epi16(vl, _mm_setzero_si128()); + return _mm_cvtsi128_si32(vl); +} +#else inline QRgb qUnpremultiply(QRgb p) { const uint alpha = qAlpha(p); // Alpha 255 and 0 are the two most common values, which makes them beneficial to short-cut. - if (alpha == 255) + if (alpha == 255 || alpha == 0) return p; - if (alpha == 0) - return 0; // (p*(0x00ff00ff/alpha)) >> 16 == (p*255)/alpha for all p and alpha <= 256. const uint invAlpha = qt_inv_premul_factor[alpha]; // We add 0x8000 to get even rounding. The rounding also ensures that qPremultiply(qUnpremultiply(p)) == p for all p. return qRgba((qRed(p)*invAlpha + 0x8000)>>16, (qGreen(p)*invAlpha + 0x8000)>>16, (qBlue(p)*invAlpha + 0x8000)>>16, alpha); } +#endif QT_END_NAMESPACE -- cgit v1.2.3