diff options
author | Olivier Goffart <ogoffart@kde.org> | 2011-10-24 08:02:10 +0200 |
---|---|---|
committer | Qt by Nokia <qt-info@nokia.com> | 2011-10-24 13:56:59 +0200 |
commit | 4bff8ea4d48851fbea078bd93226888bdd05d8dc (patch) | |
tree | 27f8c6fab968a356fc31e3d76148fec038ac2e4c /src/gui/painting/qdrawhelper_sse2.cpp | |
parent | cea8e41dc838bcdc2ec63eefac6441ddc608e390 (diff) |
Improve drawing scaled image with raster using SSE2
That codepath is taken in qml when an Image specify with and hight and
is not smooth, and the image contains alpha contents
The changes in qdrawingprimitive_sse2_p.h just put some code from the
BLEND_SOURCE_OVER_ARGB32_SSE2 macro into a sub macro to allow its reuse
The code that is not SSE2 in qt_scale_image_argb32_on_argb32_sse2 comes
from the qt_scale_image_argb32_on_argb32 in qblendfunctions.cpp
Change-Id: I071a040af4514fb21777dead9f7c5baf16071d59
Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
Diffstat (limited to 'src/gui/painting/qdrawhelper_sse2.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper_sse2.cpp | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 340cd7100b..3bbdae00bd 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -538,6 +538,122 @@ const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Opera return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length); } +void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + int const_alpha) +{ + if (const_alpha != 256) { + // from qblendfunctions.cpp + extern void qt_scale_image_argb32_on_argb32(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + int const_alpha); + return qt_scale_image_argb32_on_argb32(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha); + } + + qreal sx = targetRect.width() / (qreal) sourceRect.width(); + qreal sy = targetRect.height() / (qreal) sourceRect.height(); + + int ix = 0x00010000 / sx; + int iy = 0x00010000 / sy; + + int cx1 = clip.x(); + int cx2 = clip.x() + clip.width(); + int cy1 = clip.top(); + int cy2 = clip.y() + clip.height(); + + int tx1 = qRound(targetRect.left()); + int tx2 = qRound(targetRect.right()); + int ty1 = qRound(targetRect.top()); + int ty2 = qRound(targetRect.bottom()); + + if (tx2 < tx1) + qSwap(tx2, tx1); + if (ty2 < ty1) + qSwap(ty2, ty1); + + if (tx1 < cx1) + tx1 = cx1; + if (tx2 >= cx2) + tx2 = cx2; + + if (tx1 >= tx2) + return; + + if (ty1 < cy1) + ty1 = cy1; + if (ty2 >= cy2) + ty2 = cy2; + if (ty1 >= ty2) + return; + + int h = ty2 - ty1; + int w = tx2 - tx1; + + quint32 basex; + quint32 srcy; + + if (sx < 0) { + int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; + basex = quint32(sourceRect.right() * 65536) + dstx; + } else { + int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; + basex = quint32(sourceRect.left() * 65536) + dstx; + } + if (sy < 0) { + int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; + srcy = quint32(sourceRect.bottom() * 65536) + dsty; + } else { + int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; + srcy = quint32(sourceRect.top() * 65536) + dsty; + } + + quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1; + + const __m128i nullVector = _mm_set1_epi32(0); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i one = _mm_set1_epi16(0xff); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i alphaMask = _mm_set1_epi32(0xff000000); + const __m128i ixVector = _mm_set1_epi32(4*ix); + + + while (h--) { + const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl); + int srcx = basex; + int x = 0; + + ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) { + uint s = src[(srcx + x*ix) >> 16]; + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); + } + + __m128i srcxVector = _mm_set_epi32(srcx, srcx + ix, srcx + ix + ix, srcx + ix + ix + ix); + + for (; x<w - 3; x += 4) { + union Vect_buffer { __m128i vect; quint32 i[4]; }; + Vect_buffer addr; + addr.vect = _mm_srli_epi32(srcxVector, 16); + srcxVector = _mm_add_epi32(srcxVector, ixVector); + + const __m128i srcVector = _mm_set_epi32(src[addr.i[0]], src[addr.i[1]], src[addr.i[2]], src[addr.i[3]]); + BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask); + } + + for (; x<w; x++) { + uint s = src[(srcx + x*ix) >> 16]; + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); + } + dst = (quint32 *)(((uchar *) dst) + dbpl); + srcy += iy; + } +} + QT_END_NAMESPACE |