From 4bff8ea4d48851fbea078bd93226888bdd05d8dc Mon Sep 17 00:00:00 2001 From: Olivier Goffart Date: Mon, 24 Oct 2011 08:02:10 +0200 Subject: Improve drawing scaled image with raster using SSE2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That codepath is taken in qml when an Image specify with and hight and is not smooth, and the image contains alpha contents The changes in qdrawingprimitive_sse2_p.h just put some code from the BLEND_SOURCE_OVER_ARGB32_SSE2 macro into a sub macro to allow its reuse The code that is not SSE2 in qt_scale_image_argb32_on_argb32_sse2 comes from the qt_scale_image_argb32_on_argb32 in qblendfunctions.cpp Change-Id: I071a040af4514fb21777dead9f7c5baf16071d59 Reviewed-by: Samuel Rødal --- src/gui/painting/qdrawhelper.cpp | 9 +++ src/gui/painting/qdrawhelper_sse2.cpp | 116 ++++++++++++++++++++++++++++ src/gui/painting/qdrawingprimitive_sse2_p.h | 50 ++++++------ 3 files changed, 153 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index c4c5846ed9..88c237b59b 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7050,6 +7050,15 @@ void qInitDrawhelperAsm() qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2; qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2; qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse2; + + extern void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + int const_alpha); + qScaleFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2; + qScaleFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2; #endif #ifdef QT_HAVE_SSE } else if (features & SSE) { diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 340cd7100b..3bbdae00bd 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -538,6 +538,122 @@ const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Opera return qt_fetch_radial_gradient_template >(buffer, op, data, y, x, length); } +void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + int const_alpha) +{ + if (const_alpha != 256) { + // from qblendfunctions.cpp + extern void qt_scale_image_argb32_on_argb32(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + int const_alpha); + return qt_scale_image_argb32_on_argb32(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha); + } + + qreal sx = targetRect.width() / (qreal) sourceRect.width(); + qreal sy = targetRect.height() / (qreal) sourceRect.height(); + + int ix = 0x00010000 / sx; + int iy = 0x00010000 / sy; + + int cx1 = clip.x(); + int cx2 = clip.x() + clip.width(); + int cy1 = clip.top(); + int cy2 = clip.y() + clip.height(); + + int tx1 = qRound(targetRect.left()); + int tx2 = qRound(targetRect.right()); + int ty1 = qRound(targetRect.top()); + int ty2 = qRound(targetRect.bottom()); + + if (tx2 < tx1) + qSwap(tx2, tx1); + if (ty2 < ty1) + qSwap(ty2, ty1); + + if (tx1 < cx1) + tx1 = cx1; + if (tx2 >= cx2) + tx2 = cx2; + + if (tx1 >= tx2) + return; + + if (ty1 < cy1) + ty1 = cy1; + if (ty2 >= cy2) + ty2 = cy2; + if (ty1 >= ty2) + return; + + int h = ty2 - ty1; + int w = tx2 - tx1; + + quint32 basex; + quint32 srcy; + + if (sx < 0) { + int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; + basex = quint32(sourceRect.right() * 65536) + dstx; + } else { + int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; + basex = quint32(sourceRect.left() * 65536) + dstx; + } + if (sy < 0) { + int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; + srcy = quint32(sourceRect.bottom() * 65536) + dsty; + } else { + int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; + srcy = quint32(sourceRect.top() * 65536) + dsty; + } + + quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1; + + const __m128i nullVector = _mm_set1_epi32(0); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i one = _mm_set1_epi16(0xff); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i alphaMask = _mm_set1_epi32(0xff000000); + const __m128i ixVector = _mm_set1_epi32(4*ix); + + + while (h--) { + const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl); + int srcx = basex; + int x = 0; + + ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) { + uint s = src[(srcx + x*ix) >> 16]; + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); + } + + __m128i srcxVector = _mm_set_epi32(srcx, srcx + ix, srcx + ix + ix, srcx + ix + ix + ix); + + for (; x> 16]; + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); + } + dst = (quint32 *)(((uchar *) dst) + dbpl); + srcy += iy; + } +} + QT_END_NAMESPACE diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h index 4c66d90bf1..dad8f6cb5d 100644 --- a/src/gui/painting/qdrawingprimitive_sse2_p.h +++ b/src/gui/painting/qdrawingprimitive_sse2_p.h @@ -128,6 +128,33 @@ QT_BEGIN_NAMESPACE result = _mm_or_si128(finalAG, finalRB); \ } +// same as BLEND_SOURCE_OVER_ARGB32_SSE2, but for one vector srcVector +#define BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask) { \ + const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ + /* all opaque */ \ + _mm_store_si128((__m128i *)&dst[x], srcVector); \ + } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ + /* not fully transparent */ \ + /* extract the alpha channel on 2 x 16 bits */ \ + /* so we have room for the multiplication */ \ + /* each 32 bits will be in the form 0x00AA00AA */ \ + /* with A being the 1 - alpha */ \ + __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \ + alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ + alphaChannel = _mm_sub_epi16(one, alphaChannel); \ + \ + const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ + __m128i destMultipliedByOneMinusAlpha; \ + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ + \ + /* result = s + d * (1-alpha) */\ + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ + _mm_store_si128((__m128i *)&dst[x], result); \ + } \ + } + + // Basically blend src over dst with the const alpha defined as constAlphaVector. // nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as: //const __m128i nullVector = _mm_set1_epi32(0); @@ -153,28 +180,7 @@ QT_BEGIN_NAMESPACE \ for (; x < length-3; x += 4) { \ const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \ - const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ - if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ - /* all opaque */ \ - _mm_store_si128((__m128i *)&dst[x], srcVector); \ - } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ - /* not fully transparent */ \ - /* extract the alpha channel on 2 x 16 bits */ \ - /* so we have room for the multiplication */ \ - /* each 32 bits will be in the form 0x00AA00AA */ \ - /* with A being the 1 - alpha */ \ - __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \ - alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ - alphaChannel = _mm_sub_epi16(one, alphaChannel); \ - \ - const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ - __m128i destMultipliedByOneMinusAlpha; \ - BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ - \ - /* result = s + d * (1-alpha) */\ - const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ - _mm_store_si128((__m128i *)&dst[x], result); \ - } \ + BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask) \ } \ for (; x < length; ++x) { \ uint s = src[x]; \ -- cgit v1.2.3