diff options
author | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-01-29 12:25:06 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-02-09 11:36:49 +0000 |
commit | 38aafe1a17c5d3be81edf798dd4dbe745727f4fd (patch) | |
tree | 7e742aae377ddd5aef9d53bdcbf500721d2f43f2 /src/gui/painting/qdrawingprimitive_sse2_p.h | |
parent | 868201155fd677dbc6d14346f5ea61e82ebce27b (diff) |
Optimize unpremultiply on SSE4.1
Adds an SSE4.1 optimized version of qUnpremultiply and uses it in the
most drawing conversions methods. This gives a speed-up of little over
2x.
Change-Id: Ieb858a94ada1eb86d7af715ac1a100f1587f360d
Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
Diffstat (limited to 'src/gui/painting/qdrawingprimitive_sse2_p.h')
-rw-r--r-- | src/gui/painting/qdrawingprimitive_sse2_p.h | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h index 2b4cdc286e..deac31b797 100644 --- a/src/gui/painting/qdrawingprimitive_sse2_p.h +++ b/src/gui/painting/qdrawingprimitive_sse2_p.h @@ -236,4 +236,26 @@ QT_END_NAMESPACE #endif // __SSE2__ +QT_BEGIN_NAMESPACE +#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) +QT_FUNCTION_TARGET(SSE4_1) +inline QRgb qUnpremultiply_sse4(QRgb p) +{ + const uint alpha = qAlpha(p); + const uint invAlpha = qt_inv_premul_factor[alpha]; + const __m128i via = _mm_set1_epi32(invAlpha); + const __m128i vr = _mm_set1_epi32(0x8000); + __m128i vl = _mm_unpacklo_epi8(_mm_cvtsi32_si128(p), _mm_setzero_si128()); + vl = _mm_unpacklo_epi16(vl, _mm_setzero_si128()); + vl = _mm_mullo_epi32(vl, via); + vl = _mm_add_epi32(vl, vr); + vl = _mm_srai_epi32(vl, 16); + vl = _mm_insert_epi32(vl, alpha, 3); + vl = _mm_packus_epi32(vl, _mm_setzero_si128()); + vl = _mm_packus_epi16(vl, _mm_setzero_si128()); + return _mm_cvtsi128_si32(vl); +} +#endif +QT_END_NAMESPACE + #endif // QDRAWINGPRIMITIVE_SSE2_P_H |