From 4bff8ea4d48851fbea078bd93226888bdd05d8dc Mon Sep 17 00:00:00 2001 From: Olivier Goffart Date: Mon, 24 Oct 2011 08:02:10 +0200 Subject: Improve drawing scaled image with raster using SSE2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That codepath is taken in qml when an Image specify with and hight and is not smooth, and the image contains alpha contents The changes in qdrawingprimitive_sse2_p.h just put some code from the BLEND_SOURCE_OVER_ARGB32_SSE2 macro into a sub macro to allow its reuse The code that is not SSE2 in qt_scale_image_argb32_on_argb32_sse2 comes from the qt_scale_image_argb32_on_argb32 in qblendfunctions.cpp Change-Id: I071a040af4514fb21777dead9f7c5baf16071d59 Reviewed-by: Samuel Rødal --- src/gui/painting/qdrawingprimitive_sse2_p.h | 50 ++++++++++++++++------------- 1 file changed, 28 insertions(+), 22 deletions(-) (limited to 'src/gui/painting/qdrawingprimitive_sse2_p.h') diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h index 4c66d90bf1..dad8f6cb5d 100644 --- a/src/gui/painting/qdrawingprimitive_sse2_p.h +++ b/src/gui/painting/qdrawingprimitive_sse2_p.h @@ -128,6 +128,33 @@ QT_BEGIN_NAMESPACE result = _mm_or_si128(finalAG, finalRB); \ } +// same as BLEND_SOURCE_OVER_ARGB32_SSE2, but for one vector srcVector +#define BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask) { \ + const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ + /* all opaque */ \ + _mm_store_si128((__m128i *)&dst[x], srcVector); \ + } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ + /* not fully transparent */ \ + /* extract the alpha channel on 2 x 16 bits */ \ + /* so we have room for the multiplication */ \ + /* each 32 bits will be in the form 0x00AA00AA */ \ + /* with A being the 1 - alpha */ \ + __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \ + alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ + alphaChannel = _mm_sub_epi16(one, alphaChannel); \ + \ + const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ + __m128i destMultipliedByOneMinusAlpha; \ + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ + \ + /* result = s + d * (1-alpha) */\ + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ + _mm_store_si128((__m128i *)&dst[x], result); \ + } \ + } + + // Basically blend src over dst with the const alpha defined as constAlphaVector. // nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as: //const __m128i nullVector = _mm_set1_epi32(0); @@ -153,28 +180,7 @@ QT_BEGIN_NAMESPACE \ for (; x < length-3; x += 4) { \ const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \ - const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ - if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ - /* all opaque */ \ - _mm_store_si128((__m128i *)&dst[x], srcVector); \ - } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ - /* not fully transparent */ \ - /* extract the alpha channel on 2 x 16 bits */ \ - /* so we have room for the multiplication */ \ - /* each 32 bits will be in the form 0x00AA00AA */ \ - /* with A being the 1 - alpha */ \ - __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \ - alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ - alphaChannel = _mm_sub_epi16(one, alphaChannel); \ - \ - const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ - __m128i destMultipliedByOneMinusAlpha; \ - BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ - \ - /* result = s + d * (1-alpha) */\ - const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ - _mm_store_si128((__m128i *)&dst[x], result); \ - } \ + BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask) \ } \ for (; x < length; ++x) { \ uint s = src[x]; \ -- cgit v1.2.3