From bc162382e5aef0b796582340beb866fb7c55a289 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Thu, 23 Apr 2015 15:31:28 +0200 Subject: Short-cut SSE4 unpremultiply Even with SSE4 optimized unpremultiply it is still significantly faster to skip the calculation on alpha values 0 and 255. Change-Id: Iafe658fea8eacf35a857f292952b0c1ee056139c Reviewed-by: Gunnar Sletta --- src/gui/painting/qdrawingprimitive_sse2_p.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h index 4d0790a502..1a7dddf0d5 100644 --- a/src/gui/painting/qdrawingprimitive_sse2_p.h +++ b/src/gui/painting/qdrawingprimitive_sse2_p.h @@ -242,6 +242,8 @@ QT_FUNCTION_TARGET(SSE4_1) inline QRgb qUnpremultiply_sse4(QRgb p) { const uint alpha = qAlpha(p); + if (alpha == 255 || alpha == 0) + return p; const uint invAlpha = qt_inv_premul_factor[alpha]; const __m128i via = _mm_set1_epi32(invAlpha); const __m128i vr = _mm_set1_epi32(0x8000); @@ -250,8 +252,8 @@ inline QRgb qUnpremultiply_sse4(QRgb p) vl = _mm_add_epi32(vl, vr); vl = _mm_srai_epi32(vl, 16); vl = _mm_insert_epi32(vl, alpha, 3); - vl = _mm_packus_epi32(vl, _mm_setzero_si128()); - vl = _mm_packus_epi16(vl, _mm_setzero_si128()); + vl = _mm_packus_epi32(vl, vl); + vl = _mm_packus_epi16(vl, vl); return _mm_cvtsi128_si32(vl); } #endif -- cgit v1.2.3