From 4bff8ea4d48851fbea078bd93226888bdd05d8dc Mon Sep 17 00:00:00 2001
From: Olivier Goffart <ogoffart@kde.org>
Date: Mon, 24 Oct 2011 08:02:10 +0200
Subject: Improve drawing scaled image with raster using SSE2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That codepath is taken in qml when an Image specify with and hight and
is not smooth, and the image contains alpha contents

The changes in qdrawingprimitive_sse2_p.h just put some code from the
BLEND_SOURCE_OVER_ARGB32_SSE2 macro into a sub macro to allow its reuse

The code that is not SSE2 in qt_scale_image_argb32_on_argb32_sse2 comes
from the qt_scale_image_argb32_on_argb32 in qblendfunctions.cpp

Change-Id: I071a040af4514fb21777dead9f7c5baf16071d59
Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
---
 src/gui/painting/qdrawingprimitive_sse2_p.h | 50 ++++++++++++++++-------------
 1 file changed, 28 insertions(+), 22 deletions(-)

(limited to 'src/gui/painting/qdrawingprimitive_sse2_p.h')

diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
index 4c66d90bf1..dad8f6cb5d 100644
--- a/src/gui/painting/qdrawingprimitive_sse2_p.h
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -128,6 +128,33 @@ QT_BEGIN_NAMESPACE
     result = _mm_or_si128(finalAG, finalRB); \
 }
 
+// same as BLEND_SOURCE_OVER_ARGB32_SSE2, but for one vector srcVector
+#define BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask) { \
+        const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
+        if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
+            /* all opaque */ \
+            _mm_store_si128((__m128i *)&dst[x], srcVector); \
+        } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
+            /* not fully transparent */ \
+            /* extract the alpha channel on 2 x 16 bits */ \
+            /* so we have room for the multiplication */ \
+            /* each 32 bits will be in the form 0x00AA00AA */ \
+            /* with A being the 1 - alpha */ \
+            __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
+            alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
+            alphaChannel = _mm_sub_epi16(one, alphaChannel); \
+ \
+            const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
+            __m128i destMultipliedByOneMinusAlpha; \
+            BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
+ \
+            /* result = s + d * (1-alpha) */\
+            const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
+            _mm_store_si128((__m128i *)&dst[x], result); \
+        } \
+    }
+
+
 // Basically blend src over dst with the const alpha defined as constAlphaVector.
 // nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
 //const __m128i nullVector = _mm_set1_epi32(0);
@@ -153,28 +180,7 @@ QT_BEGIN_NAMESPACE
 \
     for (; x < length-3; x += 4) { \
         const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
-        const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
-        if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
-            /* all opaque */ \
-            _mm_store_si128((__m128i *)&dst[x], srcVector); \
-        } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
-            /* not fully transparent */ \
-            /* extract the alpha channel on 2 x 16 bits */ \
-            /* so we have room for the multiplication */ \
-            /* each 32 bits will be in the form 0x00AA00AA */ \
-            /* with A being the 1 - alpha */ \
-            __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
-            alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
-            alphaChannel = _mm_sub_epi16(one, alphaChannel); \
- \
-            const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
-            __m128i destMultipliedByOneMinusAlpha; \
-            BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
- \
-            /* result = s + d * (1-alpha) */\
-            const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
-            _mm_store_si128((__m128i *)&dst[x], result); \
-        } \
+        BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask) \
     } \
     for (; x < length; ++x) { \
         uint s = src[x]; \
-- 
cgit v1.2.3