From dc82a0f4f3a380edfe910a78f3bdd32210975b85 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Thu, 21 Jun 2018 13:18:30 +0200 Subject: Smooth image scaling for 64bit images Adds support for smooth scaling 64bit images. Task-number: QTBUG-45858 Change-Id: If46030fb8e7d684159f852a3b8266a74e5e6700c Reviewed-by: Eirik Aavitsland --- src/gui/painting/qdrawhelper.cpp | 37 ------ src/gui/painting/qdrawhelper_p.h | 71 ++++++++++++ src/gui/painting/qimagescale.cpp | 235 ++++++++++++++++++++++++++++++++++++--- src/gui/painting/qrgba64_p.h | 13 --- 4 files changed, 289 insertions(+), 67 deletions(-) (limited to 'src/gui/painting') diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 4b68c22e95..98baffc740 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -2277,43 +2277,6 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, u } #endif -#if defined(__SSE2__) -static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty) -{ - __m128i vt = _mm_loadu_si128((const __m128i*)t); - if (disty) { - __m128i vb = _mm_loadu_si128((const __m128i*)b); - vt = _mm_mulhi_epu16(vt, _mm_set1_epi16(0x10000 - disty)); - vb = _mm_mulhi_epu16(vb, _mm_set1_epi16(disty)); - vt = _mm_add_epi16(vt, vb); - } - if (distx) { - const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); - const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); - vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); - vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); - } -#ifdef Q_PROCESSOR_X86_64 - return QRgba64::fromRgba64(_mm_cvtsi128_si64(vt)); -#else - QRgba64 out; - _mm_storel_epi64((__m128i*)&out, vt); - return out; -#endif -} -#else -static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty) -{ - const uint dx = distx>>8; - const uint dy = disty>>8; - const uint idx = 256 - dx; - const uint idy = 256 - dy; - QRgba64 xtop = interpolate256(t[0], idx, t[1], dx); - QRgba64 xbot = interpolate256(b[0], idx, b[1], dx); - return interpolate256(xtop, idy, xbot, dy); -} -#endif - template void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2); diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 078ab62251..fb08261205 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -747,6 +747,77 @@ static constexpr inline bool hasFastInterpolate4() { return false; } #endif +static inline QRgba64 multiplyAlpha256(QRgba64 rgba64, uint alpha256) +{ + return QRgba64::fromRgba64((rgba64.red() * alpha256) >> 8, + (rgba64.green() * alpha256) >> 8, + (rgba64.blue() * alpha256) >> 8, + (rgba64.alpha() * alpha256) >> 8); +} +static inline QRgba64 interpolate256(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) +{ + return QRgba64::fromRgba64(multiplyAlpha256(x, alpha1) + multiplyAlpha256(y, alpha2)); +} + +#ifdef __SSE2__ +static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty) +{ + __m128i vt = _mm_loadu_si128((const __m128i*)t); + if (disty) { + __m128i vb = _mm_loadu_si128((const __m128i*)b); + vt = _mm_mulhi_epu16(vt, _mm_set1_epi16(0x10000 - disty)); + vb = _mm_mulhi_epu16(vb, _mm_set1_epi16(disty)); + vt = _mm_add_epi16(vt, vb); + } + if (distx) { + const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); + const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); + vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); + vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); + } +#ifdef Q_PROCESSOR_X86_64 + return QRgba64::fromRgba64(_mm_cvtsi128_si64(vt)); +#else + QRgba64 out; + _mm_storel_epi64((__m128i*)&out, vt); + return out; +#endif // Q_PROCESSOR_X86_64 +} +#elif defined(__ARM_NEON__) +static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty) +{ + uint64x1x2_t vt = vld2_u64(reinterpret_cast(t)); + if (disty) { + uint64x1x2_t vb = vld2_u64(reinterpret_cast(b)); + uint32x4_t vt0 = vmull_n_u16(vreinterpret_u16_u64(vt.val[0]), 0x10000 - disty); + uint32x4_t vt1 = vmull_n_u16(vreinterpret_u16_u64(vt.val[1]), 0x10000 - disty); + vt0 = vmlal_n_u16(vt0, vreinterpret_u16_u64(vb.val[0]), disty); + vt1 = vmlal_n_u16(vt1, vreinterpret_u16_u64(vb.val[1]), disty); + vt.val[0] = vreinterpret_u64_u16(vshrn_n_u32(vt0, 16)); + vt.val[1] = vreinterpret_u64_u16(vshrn_n_u32(vt1, 16)); + } + if (distx) { + uint32x4_t vt0 = vmull_n_u16(vreinterpret_u16_u64(vt.val[0]), 0x10000 - distx); + vt0 = vmlal_n_u16(vt0, vreinterpret_u16_u64(vt.val[1]), distx); + vt.val[0] = vreinterpret_u64_u16(vshrn_n_u32(vt0, 16)); + } + QRgba64 out; + vst1_u64(reinterpret_cast(&out), vt.val[0]); + return out; +} +#else +static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty) +{ + const uint dx = distx>>8; + const uint dy = disty>>8; + const uint idx = 256 - dx; + const uint idy = 256 - dy; + QRgba64 xtop = interpolate256(t[0], idx, t[1], dx); + QRgba64 xbot = interpolate256(b[0], idx, b[1], dx); + return interpolate256(xtop, idy, xbot, dy); +} +#endif // __SSE2__ + #if Q_BYTE_ORDER == Q_BIG_ENDIAN static Q_ALWAYS_INLINE quint32 RGBA2ARGB(quint32 x) { quint32 rgb = x >> 8; diff --git a/src/gui/painting/qimagescale.cpp b/src/gui/painting/qimagescale.cpp index 22787b91fe..ca7930500e 100644 --- a/src/gui/painting/qimagescale.cpp +++ b/src/gui/painting/qimagescale.cpp @@ -41,6 +41,7 @@ #include "qimage.h" #include "qcolor.h" +#include "qrgba64_p.h" QT_BEGIN_NAMESPACE @@ -85,7 +86,7 @@ QT_BEGIN_NAMESPACE * #ifdef'ed code, and removal of unneeded border calculation code. * Later the code has been refactored, an SSE4.1 optimizated path have been * added instead of the removed MMX assembler, and scaling of clipped area - * removed. + * removed, and an RGBA64 version written * * Imlib2 is (C) Carsten Haitzler and various contributors. The MMX code * is by Willem Monsuwe . All other modifications are @@ -94,12 +95,11 @@ QT_BEGIN_NAMESPACE namespace QImageScale { - const unsigned int** qimageCalcYPoints(const unsigned int *src, int sw, int sh, int dh); - int* qimageCalcXPoints(int sw, int dw); - int* qimageCalcApoints(int s, int d, int up); - QImageScaleInfo* qimageFreeScaleInfo(QImageScaleInfo *isi); - QImageScaleInfo *qimageCalcScaleInfo(const QImage &img, int sw, int sh, - int dw, int dh, char aa); + static const unsigned int** qimageCalcYPoints(const unsigned int *src, int sw, int sh, int dh); + static int* qimageCalcXPoints(int sw, int dw); + static int* qimageCalcApoints(int s, int d, int up); + static QImageScaleInfo* qimageFreeScaleInfo(QImageScaleInfo *isi); + static QImageScaleInfo *qimageCalcScaleInfo(const QImage &img, int sw, int sh, int dw, int dh, char aa); } using namespace QImageScale; @@ -108,8 +108,8 @@ using namespace QImageScale; // Code ported from Imlib... // -const unsigned int** QImageScale::qimageCalcYPoints(const unsigned int *src, - int sw, int sh, int dh) +static const unsigned int** QImageScale::qimageCalcYPoints(const unsigned int *src, + int sw, int sh, int dh) { const unsigned int **p; int j = 0, rv = 0; @@ -138,7 +138,7 @@ const unsigned int** QImageScale::qimageCalcYPoints(const unsigned int *src, return(p); } -int* QImageScale::qimageCalcXPoints(int sw, int dw) +static int* QImageScale::qimageCalcXPoints(int sw, int dw) { int *p, j = 0, rv = 0; qint64 val, inc; @@ -167,7 +167,7 @@ int* QImageScale::qimageCalcXPoints(int sw, int dw) return p; } -int* QImageScale::qimageCalcApoints(int s, int d, int up) +static int* QImageScale::qimageCalcApoints(int s, int d, int up) { int *p, j = 0, rv = 0; @@ -214,7 +214,7 @@ int* QImageScale::qimageCalcApoints(int s, int d, int up) return p; } -QImageScaleInfo* QImageScale::qimageFreeScaleInfo(QImageScaleInfo *isi) +static QImageScaleInfo* QImageScale::qimageFreeScaleInfo(QImageScaleInfo *isi) { if (isi) { delete[] isi->xpoints; @@ -226,9 +226,9 @@ QImageScaleInfo* QImageScale::qimageFreeScaleInfo(QImageScaleInfo *isi) return 0; } -QImageScaleInfo* QImageScale::qimageCalcScaleInfo(const QImage &img, - int sw, int sh, - int dw, int dh, char aa) +static QImageScaleInfo* QImageScale::qimageCalcScaleInfo(const QImage &img, + int sw, int sh, + int dw, int dh, char aa) { QImageScaleInfo *isi; int scw, sch; @@ -333,7 +333,7 @@ static void qt_qimageScaleAARGBA_up_xy(QImageScaleInfo *isi, unsigned int *dest, } } -/* scale by area sampling */ +/* scale by area sampling - with alpha */ static void qt_qimageScaleAARGBA(QImageScaleInfo *isi, unsigned int *dest, int dw, int dh, int dow, int sow) { @@ -529,6 +529,204 @@ static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *des } } +static void qt_qimageScaleRgba64_up_x_down_y(QImageScaleInfo *isi, QRgba64 *dest, + int dw, int dh, int dow, int sow); + +static void qt_qimageScaleRgba64_down_x_up_y(QImageScaleInfo *isi, QRgba64 *dest, + int dw, int dh, int dow, int sow); + +static void qt_qimageScaleRgba64_down_xy(QImageScaleInfo *isi, QRgba64 *dest, + int dw, int dh, int dow, int sow); + +static void qt_qimageScaleRgba64_up_xy(QImageScaleInfo *isi, QRgba64 *dest, + int dw, int dh, int dow, int sow) +{ + const QRgba64 **ypoints = (const QRgba64 **)isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + for (int y = 0; y < dh; y++) { + const QRgba64 *sptr = ypoints[y]; + QRgba64 *dptr = dest + (y * dow); + const int yap = yapoints[y]; + if (yap > 0) { + for (int x = 0; x < dw; x++) { + const QRgba64 *pix = sptr + xpoints[x]; + const int xap = xapoints[x]; + if (xap > 0) + *dptr = interpolate_4_pixels_rgb64(pix, pix + sow, xap * 256, yap * 256); + else + *dptr = interpolate256(pix[0], 256 - yap, pix[sow], yap); + dptr++; + } + } else { + for (int x = 0; x < dw; x++) { + const QRgba64 *pix = sptr + xpoints[x]; + const int xap = xapoints[x]; + *dptr = interpolate256(pix[0], 256 - xap, pix[1], xap); + dptr++; + } + } + } +} + +void qt_qimageScaleRgba64(QImageScaleInfo *isi, QRgba64 *dest, + int dw, int dh, int dow, int sow) +{ + if (isi->xup_yup == 3) + qt_qimageScaleRgba64_up_xy(isi, dest, dw, dh, dow, sow); + else if (isi->xup_yup == 1) + qt_qimageScaleRgba64_up_x_down_y(isi, dest, dw, dh, dow, sow); + else if (isi->xup_yup == 2) + qt_qimageScaleRgba64_down_x_up_y(isi, dest, dw, dh, dow, sow); + else + qt_qimageScaleRgba64_down_xy(isi, dest, dw, dh, dow, sow); +} + +inline static void qt_qimageScaleRgba64_helper(const QRgba64 *pix, int xyap, int Cxy, int step, qint64 &r, qint64 &g, qint64 &b, qint64 &a) +{ + r = pix->red() * xyap; + g = pix->green() * xyap; + b = pix->blue() * xyap; + a = pix->alpha() * xyap; + int j; + for (j = (1 << 14) - xyap; j > Cxy; j -= Cxy ){ + pix += step; + r += pix->red() * Cxy; + g += pix->green() * Cxy; + b += pix->blue() * Cxy; + a += pix->alpha() * Cxy; + } + pix += step; + r += pix->red() * j; + g += pix->green() * j; + b += pix->blue() * j; + a += pix->alpha() * j; +} + +static void qt_qimageScaleRgba64_up_x_down_y(QImageScaleInfo *isi, QRgba64 *dest, + int dw, int dh, int dow, int sow) +{ + const QRgba64 **ypoints = (const QRgba64 **)isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + for (int y = 0; y < dh; y++) { + int Cy = (yapoints[y]) >> 16; + int yap = (yapoints[y]) & 0xffff; + + QRgba64 *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + const QRgba64 *sptr = ypoints[y] + xpoints[x]; + qint64 r, g, b, a; + qt_qimageScaleRgba64_helper(sptr, yap, Cy, sow, r, g, b, a); + + int xap = xapoints[x]; + if (xap > 0) { + qint64 rr, gg, bb, aa; + qt_qimageScaleRgba64_helper(sptr + 1, yap, Cy, sow, rr, gg, bb, aa); + + r = r * (256 - xap); + g = g * (256 - xap); + b = b * (256 - xap); + a = a * (256 - xap); + r = (r + (rr * xap)) >> 8; + g = (g + (gg * xap)) >> 8; + b = (b + (bb * xap)) >> 8; + a = (a + (aa * xap)) >> 8; + } + *dptr++ = qRgba64(r >> 14, g >> 14, b >> 14, a >> 14); + } + } +} + +static void qt_qimageScaleRgba64_down_x_up_y(QImageScaleInfo *isi, QRgba64 *dest, + int dw, int dh, int dow, int sow) +{ + const QRgba64 **ypoints = (const QRgba64 **)isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + for (int y = 0; y < dh; y++) { + QRgba64 *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + int Cx = xapoints[x] >> 16; + int xap = xapoints[x] & 0xffff; + + const QRgba64 *sptr = ypoints[y] + xpoints[x]; + qint64 r, g, b, a; + qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, r, g, b, a); + + int yap = yapoints[y]; + if (yap > 0) { + qint64 rr, gg, bb, aa; + qt_qimageScaleRgba64_helper(sptr + sow, xap, Cx, 1, rr, gg, bb, aa); + + r = r * (256 - yap); + g = g * (256 - yap); + b = b * (256 - yap); + a = a * (256 - yap); + r = (r + (rr * yap)) >> 8; + g = (g + (gg * yap)) >> 8; + b = (b + (bb * yap)) >> 8; + a = (a + (aa * yap)) >> 8; + } + *dptr = qRgba64(r >> 14, g >> 14, b >> 14, a >> 14); + dptr++; + } + } +} + +static void qt_qimageScaleRgba64_down_xy(QImageScaleInfo *isi, QRgba64 *dest, + int dw, int dh, int dow, int sow) +{ + const QRgba64 **ypoints = (const QRgba64 **)isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + for (int y = 0; y < dh; y++) { + int Cy = (yapoints[y]) >> 16; + int yap = (yapoints[y]) & 0xffff; + + QRgba64 *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + int Cx = xapoints[x] >> 16; + int xap = xapoints[x] & 0xffff; + + const QRgba64 *sptr = ypoints[y] + xpoints[x]; + qint64 rx, gx, bx, ax; + qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax); + + qint64 r = rx * yap; + qint64 g = gx * yap; + qint64 b = bx * yap; + qint64 a = ax * yap; + int j; + for (j = (1 << 14) - yap; j > Cy; j -= Cy) { + sptr += sow; + qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax); + r += rx * Cy; + g += gx * Cy; + b += bx * Cy; + a += ax * Cy; + } + sptr += sow; + qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax); + r += rx * j; + g += gx * j; + b += bx * j; + a += ax * j; + + *dptr = qRgba64(r >> 28, g >> 28, b >> 28, a >> 28); + dptr++; + } + } +} + static void qt_qimageScaleAARGB_up_x_down_y(QImageScaleInfo *isi, unsigned int *dest, int dw, int dh, int dow, int sow); @@ -745,7 +943,10 @@ QImage qSmoothScaleImage(const QImage &src, int dw, int dh) return QImage(); } - if (src.hasAlphaChannel()) + if (src.depth() > 32) + qt_qimageScaleRgba64(scaleinfo, (QRgba64 *)buffer.scanLine(0), + dw, dh, dw, src.bytesPerLine() / 8); + else if (src.hasAlphaChannel()) qt_qimageScaleAARGBA(scaleinfo, (unsigned int *)buffer.scanLine(0), dw, dh, dw, src.bytesPerLine() / 4); else diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h index 1ed0e82182..b7e4d4d905 100644 --- a/src/gui/painting/qrgba64_p.h +++ b/src/gui/painting/qrgba64_p.h @@ -64,14 +64,6 @@ inline QRgba64 combineAlpha256(QRgba64 rgba64, uint alpha256) return QRgba64::fromRgba64(rgba64.red(), rgba64.green(), rgba64.blue(), (rgba64.alpha() * alpha256) >> 8); } -inline QRgba64 multiplyAlpha256(QRgba64 rgba64, uint alpha256) -{ - return QRgba64::fromRgba64((rgba64.red() * alpha256) >> 8, - (rgba64.green() * alpha256) >> 8, - (rgba64.blue() * alpha256) >> 8, - (rgba64.alpha() * alpha256) >> 8); -} - inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535) { return QRgba64::fromRgba64(qt_div_65535(rgba64.red() * alpha65535), @@ -126,11 +118,6 @@ inline T multiplyAlpha255(T rgba64, uint alpha255) #endif } -inline QRgba64 interpolate256(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) -{ - return QRgba64::fromRgba64(multiplyAlpha256(x, alpha1) + multiplyAlpha256(y, alpha2)); -} - inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) { return QRgba64::fromRgba64(multiplyAlpha255(x, alpha1) + multiplyAlpha255(y, alpha2)); -- cgit v1.2.3