Improve drawing scaled image with raster using SSE2

That codepath is taken in qml when an Image specify with and hight and is not smooth, and the image contains alpha contents The changes in qdrawingprimitive_sse2_p.h just put some code from the BLEND_SOURCE_OVER_ARGB32_SSE2 macro into a sub macro to allow its reuse The code that is not SSE2 in qt_scale_image_argb32_on_argb32_sse2 comes from the qt_scale_image_argb32_on_argb32 in qblendfunctions.cpp Change-Id: I071a040af4514fb21777dead9f7c5baf16071d59 Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
author: Olivier Goffart <ogoffart@kde.org> 2011-10-24 08:02:10 +0200
committer: Qt by Nokia <qt-info@nokia.com> 2011-10-24 13:56:59 +0200
commit: 4bff8ea4d48851fbea078bd93226888bdd05d8dc (patch)
tree: 27f8c6fab968a356fc31e3d76148fec038ac2e4c /src/gui/painting/qdrawingprimitive_sse2_p.h
parent: cea8e41dc838bcdc2ec63eefac6441ddc608e390 (diff)
1 files changed, 28 insertions, 22 deletions
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
index 4c66d90bf1..dad8f6cb5d 100644
--- a/src/gui/painting/qdrawingprimitive_sse2_p.h
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -128,6 +128,33 @@ QT_BEGIN_NAMESPACE
     result = _mm_or_si128(finalAG, finalRB); \
 }
 
+// same as BLEND_SOURCE_OVER_ARGB32_SSE2, but for one vector srcVector
+#define BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask) { \
+        const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
+        if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
+            /* all opaque */ \
+            _mm_store_si128((__m128i *)&dst[x], srcVector); \
+        } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
+            /* not fully transparent */ \
+            /* extract the alpha channel on 2 x 16 bits */ \
+            /* so we have room for the multiplication */ \
+            /* each 32 bits will be in the form 0x00AA00AA */ \
+            /* with A being the 1 - alpha */ \
+            __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
+            alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
+            alphaChannel = _mm_sub_epi16(one, alphaChannel); \
+ \
+            const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
+            __m128i destMultipliedByOneMinusAlpha; \
+            BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
+ \
+            /* result = s + d * (1-alpha) */\
+            const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
+            _mm_store_si128((__m128i *)&dst[x], result); \
+        } \
+    }
+
+
 // Basically blend src over dst with the const alpha defined as constAlphaVector.
 // nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
 //const __m128i nullVector = _mm_set1_epi32(0);
@@ -153,28 +180,7 @@ QT_BEGIN_NAMESPACE
 \
     for (; x < length-3; x += 4) { \
         const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
-        const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
-        if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
-            /* all opaque */ \
-            _mm_store_si128((__m128i *)&dst[x], srcVector); \
-        } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
-            /* not fully transparent */ \
-            /* extract the alpha channel on 2 x 16 bits */ \
-            /* so we have room for the multiplication */ \
-            /* each 32 bits will be in the form 0x00AA00AA */ \
-            /* with A being the 1 - alpha */ \
-            __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
-            alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
-            alphaChannel = _mm_sub_epi16(one, alphaChannel); \
- \
-            const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
-            __m128i destMultipliedByOneMinusAlpha; \
-            BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
- \
-            /* result = s + d * (1-alpha) */\
-            const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
-            _mm_store_si128((__m128i *)&dst[x], result); \
-        } \
+        BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask) \
     } \
     for (; x < length; ++x) { \
         uint s = src[x]; \
author	Olivier Goffart <ogoffart@kde.org>	2011-10-24 08:02:10 +0200
committer	Qt by Nokia <qt-info@nokia.com>	2011-10-24 13:56:59 +0200
commit	4bff8ea4d48851fbea078bd93226888bdd05d8dc (patch)
tree	27f8c6fab968a356fc31e3d76148fec038ac2e4c /src/gui/painting/qdrawingprimitive_sse2_p.h
parent	cea8e41dc838bcdc2ec63eefac6441ddc608e390 (diff)