summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qdrawingprimitive_sse2_p.h
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@digia.com>2015-04-23 15:31:28 +0200
committerAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-04-25 00:15:52 +0000
commitbc162382e5aef0b796582340beb866fb7c55a289 (patch)
tree88c7b12318127f28f64868b6ed38e5a0e36b4c1d /src/gui/painting/qdrawingprimitive_sse2_p.h
parentce302a53df8ecbbab4c30fea05753c3b1060fa56 (diff)
Short-cut SSE4 unpremultiply
Even with SSE4 optimized unpremultiply it is still significantly faster to skip the calculation on alpha values 0 and 255. Change-Id: Iafe658fea8eacf35a857f292952b0c1ee056139c Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
Diffstat (limited to 'src/gui/painting/qdrawingprimitive_sse2_p.h')
-rw-r--r--src/gui/painting/qdrawingprimitive_sse2_p.h6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
index 4d0790a502..1a7dddf0d5 100644
--- a/src/gui/painting/qdrawingprimitive_sse2_p.h
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -242,6 +242,8 @@ QT_FUNCTION_TARGET(SSE4_1)
inline QRgb qUnpremultiply_sse4(QRgb p)
{
const uint alpha = qAlpha(p);
+ if (alpha == 255 || alpha == 0)
+ return p;
const uint invAlpha = qt_inv_premul_factor[alpha];
const __m128i via = _mm_set1_epi32(invAlpha);
const __m128i vr = _mm_set1_epi32(0x8000);
@@ -250,8 +252,8 @@ inline QRgb qUnpremultiply_sse4(QRgb p)
vl = _mm_add_epi32(vl, vr);
vl = _mm_srai_epi32(vl, 16);
vl = _mm_insert_epi32(vl, alpha, 3);
- vl = _mm_packus_epi32(vl, _mm_setzero_si128());
- vl = _mm_packus_epi16(vl, _mm_setzero_si128());
+ vl = _mm_packus_epi32(vl, vl);
+ vl = _mm_packus_epi16(vl, vl);
return _mm_cvtsi128_si32(vl);
}
#endif