From dc82a0f4f3a380edfe910a78f3bdd32210975b85 Mon Sep 17 00:00:00 2001
From: Allan Sandfeld Jensen <allan.jensen@qt.io>
Date: Thu, 21 Jun 2018 13:18:30 +0200
Subject: Smooth image scaling for 64bit images

Adds support for smooth scaling 64bit images.

Task-number: QTBUG-45858
Change-Id: If46030fb8e7d684159f852a3b8266a74e5e6700c
Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
---
 src/gui/painting/qdrawhelper.cpp |  37 ------
 src/gui/painting/qdrawhelper_p.h |  71 ++++++++++++
 src/gui/painting/qimagescale.cpp | 235 ++++++++++++++++++++++++++++++++++++---
 src/gui/painting/qrgba64_p.h     |  13 ---
 4 files changed, 289 insertions(+), 67 deletions(-)

(limited to 'src/gui/painting')
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 4b68c22e95..98baffc740 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -2277,43 +2277,6 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, u
 }
 #endif
 
-#if defined(__SSE2__)
-static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty)
-{
-    __m128i vt = _mm_loadu_si128((const __m128i*)t);
-    if (disty) {
-       __m128i vb = _mm_loadu_si128((const __m128i*)b);
-        vt = _mm_mulhi_epu16(vt, _mm_set1_epi16(0x10000 - disty));
-        vb = _mm_mulhi_epu16(vb, _mm_set1_epi16(disty));
-        vt = _mm_add_epi16(vt, vb);
-    }
-    if (distx) {
-        const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
-        const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
-        vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
-        vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
-    }
-#ifdef Q_PROCESSOR_X86_64
-    return QRgba64::fromRgba64(_mm_cvtsi128_si64(vt));
-#else
-    QRgba64 out;
-    _mm_storel_epi64((__m128i*)&out, vt);
-    return out;
-#endif
-}
-#else
-static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty)
-{
-    const uint dx = distx>>8;
-    const uint dy = disty>>8;
-    const uint idx = 256 - dx;
-    const uint idy = 256 - dy;
-    QRgba64 xtop = interpolate256(t[0], idx, t[1], dx);
-    QRgba64 xbot = interpolate256(b[0], idx, b[1], dx);
-    return interpolate256(xtop, idy, xbot, dy);
-}
-#endif
-
 template<TextureBlendType blendType>
 void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
 
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 078ab62251..fb08261205 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -747,6 +747,77 @@ static constexpr inline bool hasFastInterpolate4() { return false; }
 
 #endif
 
+static inline QRgba64 multiplyAlpha256(QRgba64 rgba64, uint alpha256)
+{
+    return QRgba64::fromRgba64((rgba64.red()   * alpha256) >> 8,
+                               (rgba64.green() * alpha256) >> 8,
+                               (rgba64.blue()  * alpha256) >> 8,
+                               (rgba64.alpha() * alpha256) >> 8);
+}
+static inline QRgba64 interpolate256(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
+{
+    return QRgba64::fromRgba64(multiplyAlpha256(x, alpha1) + multiplyAlpha256(y, alpha2));
+}
+
+#ifdef __SSE2__
+static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty)
+{
+    __m128i vt = _mm_loadu_si128((const __m128i*)t);
+    if (disty) {
+       __m128i vb = _mm_loadu_si128((const __m128i*)b);
+        vt = _mm_mulhi_epu16(vt, _mm_set1_epi16(0x10000 - disty));
+        vb = _mm_mulhi_epu16(vb, _mm_set1_epi16(disty));
+        vt = _mm_add_epi16(vt, vb);
+    }
+    if (distx) {
+        const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
+        const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
+        vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
+        vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
+    }
+#ifdef Q_PROCESSOR_X86_64
+    return QRgba64::fromRgba64(_mm_cvtsi128_si64(vt));
+#else
+    QRgba64 out;
+    _mm_storel_epi64((__m128i*)&out, vt);
+    return out;
+#endif // Q_PROCESSOR_X86_64
+}
+#elif defined(__ARM_NEON__)
+static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty)
+{
+    uint64x1x2_t vt = vld2_u64(reinterpret_cast<const uint64_t *>(t));
+    if (disty) {
+        uint64x1x2_t vb = vld2_u64(reinterpret_cast<const uint64_t *>(b));
+        uint32x4_t vt0 = vmull_n_u16(vreinterpret_u16_u64(vt.val[0]), 0x10000 - disty);
+        uint32x4_t vt1 = vmull_n_u16(vreinterpret_u16_u64(vt.val[1]), 0x10000 - disty);
+        vt0 = vmlal_n_u16(vt0, vreinterpret_u16_u64(vb.val[0]), disty);
+        vt1 = vmlal_n_u16(vt1, vreinterpret_u16_u64(vb.val[1]), disty);
+        vt.val[0] = vreinterpret_u64_u16(vshrn_n_u32(vt0, 16));
+        vt.val[1] = vreinterpret_u64_u16(vshrn_n_u32(vt1, 16));
+    }
+    if (distx) {
+        uint32x4_t vt0 = vmull_n_u16(vreinterpret_u16_u64(vt.val[0]), 0x10000 - distx);
+        vt0 = vmlal_n_u16(vt0, vreinterpret_u16_u64(vt.val[1]), distx);
+        vt.val[0] = vreinterpret_u64_u16(vshrn_n_u32(vt0, 16));
+    }
+    QRgba64 out;
+    vst1_u64(reinterpret_cast<uint64_t *>(&out), vt.val[0]);
+    return out;
+}
+#else
+static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty)
+{
+    const uint dx = distx>>8;
+    const uint dy = disty>>8;
+    const uint idx = 256 - dx;
+    const uint idy = 256 - dy;
+    QRgba64 xtop = interpolate256(t[0], idx, t[1], dx);
+    QRgba64 xbot = interpolate256(b[0], idx, b[1], dx);
+    return interpolate256(xtop, idy, xbot, dy);
+}
+#endif // __SSE2__
+
 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
 static Q_ALWAYS_INLINE quint32 RGBA2ARGB(quint32 x) {
     quint32 rgb = x >> 8;
diff --git a/src/gui/painting/qimagescale.cpp b/src/gui/painting/qimagescale.cpp
index 22787b91fe..ca7930500e 100644
--- a/src/gui/painting/qimagescale.cpp
+++ b/src/gui/painting/qimagescale.cpp
@@ -41,6 +41,7 @@
 
 #include "qimage.h"
 #include "qcolor.h"
+#include "qrgba64_p.h"
 
 QT_BEGIN_NAMESPACE
 
@@ -85,7 +86,7 @@ QT_BEGIN_NAMESPACE
  * #ifdef'ed code, and removal of unneeded border calculation code.
  * Later the code has been refactored, an SSE4.1 optimizated path have been
  * added instead of the removed MMX assembler, and scaling of clipped area
- * removed.
+ * removed, and an RGBA64 version written
  *
  * Imlib2 is (C) Carsten Haitzler and various contributors. The MMX code
  * is by Willem Monsuwe <willem@stack.nl>. All other modifications are
@@ -94,12 +95,11 @@ QT_BEGIN_NAMESPACE
 
 
 namespace QImageScale {
-    const unsigned int** qimageCalcYPoints(const unsigned int *src, int sw, int sh, int dh);
-    int* qimageCalcXPoints(int sw, int dw);
-    int* qimageCalcApoints(int s, int d, int up);
-    QImageScaleInfo* qimageFreeScaleInfo(QImageScaleInfo *isi);
-    QImageScaleInfo *qimageCalcScaleInfo(const QImage &img, int sw, int sh,
-                                         int dw, int dh, char aa);
+    static const unsigned int** qimageCalcYPoints(const unsigned int *src, int sw, int sh, int dh);
+    static int* qimageCalcXPoints(int sw, int dw);
+    static int* qimageCalcApoints(int s, int d, int up);
+    static QImageScaleInfo* qimageFreeScaleInfo(QImageScaleInfo *isi);
+    static QImageScaleInfo *qimageCalcScaleInfo(const QImage &img, int sw, int sh, int dw, int dh, char aa);
 }
 
 using namespace QImageScale;
@@ -108,8 +108,8 @@ using namespace QImageScale;
 // Code ported from Imlib...
 //
 
-const unsigned int** QImageScale::qimageCalcYPoints(const unsigned int *src,
-                                                    int sw, int sh, int dh)
+static const unsigned int** QImageScale::qimageCalcYPoints(const unsigned int *src,
+                                                           int sw, int sh, int dh)
 {
     const unsigned int **p;
     int j = 0, rv = 0;
@@ -138,7 +138,7 @@ const unsigned int** QImageScale::qimageCalcYPoints(const unsigned int *src,
     return(p);
 }
 
-int* QImageScale::qimageCalcXPoints(int sw, int dw)
+static int* QImageScale::qimageCalcXPoints(int sw, int dw)
 {
     int *p, j = 0, rv = 0;
     qint64 val, inc;
@@ -167,7 +167,7 @@ int* QImageScale::qimageCalcXPoints(int sw, int dw)
    return p;
 }
 
-int* QImageScale::qimageCalcApoints(int s, int d, int up)
+static int* QImageScale::qimageCalcApoints(int s, int d, int up)
 {
     int *p, j = 0, rv = 0;
 
@@ -214,7 +214,7 @@ int* QImageScale::qimageCalcApoints(int s, int d, int up)
     return p;
 }
 
-QImageScaleInfo* QImageScale::qimageFreeScaleInfo(QImageScaleInfo *isi)
+static QImageScaleInfo* QImageScale::qimageFreeScaleInfo(QImageScaleInfo *isi)
 {
     if (isi) {
         delete[] isi->xpoints;
@@ -226,9 +226,9 @@ QImageScaleInfo* QImageScale::qimageFreeScaleInfo(QImageScaleInfo *isi)
     return 0;
 }
 
-QImageScaleInfo* QImageScale::qimageCalcScaleInfo(const QImage &img,
-                                                  int sw, int sh,
-                                                  int dw, int dh, char aa)
+static QImageScaleInfo* QImageScale::qimageCalcScaleInfo(const QImage &img,
+                                                         int sw, int sh,
+                                                         int dw, int dh, char aa)
 {
     QImageScaleInfo *isi;
     int scw, sch;
@@ -333,7 +333,7 @@ static void qt_qimageScaleAARGBA_up_xy(QImageScaleInfo *isi, unsigned int *dest,
     }
 }
 
-/* scale by area sampling */
+/* scale by area sampling - with alpha */
 static void qt_qimageScaleAARGBA(QImageScaleInfo *isi, unsigned int *dest,
                                  int dw, int dh, int dow, int sow)
 {
@@ -529,6 +529,204 @@ static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *des
     }
 }
 
+static void qt_qimageScaleRgba64_up_x_down_y(QImageScaleInfo *isi, QRgba64 *dest,
+                                             int dw, int dh, int dow, int sow);
+
+static void qt_qimageScaleRgba64_down_x_up_y(QImageScaleInfo *isi, QRgba64 *dest,
+                                             int dw, int dh, int dow, int sow);
+
+static void qt_qimageScaleRgba64_down_xy(QImageScaleInfo *isi, QRgba64 *dest,
+                                         int dw, int dh, int dow, int sow);
+
+static void qt_qimageScaleRgba64_up_xy(QImageScaleInfo *isi, QRgba64 *dest,
+                                       int dw, int dh, int dow, int sow)
+{
+    const QRgba64 **ypoints = (const QRgba64 **)isi->ypoints;
+    int *xpoints = isi->xpoints;
+    int *xapoints = isi->xapoints;
+    int *yapoints = isi->yapoints;
+
+    for (int y = 0; y < dh; y++) {
+        const QRgba64 *sptr = ypoints[y];
+        QRgba64 *dptr = dest + (y * dow);
+        const int yap = yapoints[y];
+        if (yap > 0) {
+            for (int x = 0; x < dw; x++) {
+                const QRgba64 *pix = sptr + xpoints[x];
+                const int xap = xapoints[x];
+                if (xap > 0)
+                    *dptr = interpolate_4_pixels_rgb64(pix, pix + sow, xap * 256, yap * 256);
+                else
+                    *dptr = interpolate256(pix[0], 256 - yap, pix[sow], yap);
+                dptr++;
+            }
+        } else {
+            for (int x = 0; x < dw; x++) {
+                const QRgba64 *pix = sptr + xpoints[x];
+                const int xap = xapoints[x];
+                *dptr = interpolate256(pix[0], 256 - xap, pix[1], xap);
+                dptr++;
+            }
+        }
+    }
+}
+
+void qt_qimageScaleRgba64(QImageScaleInfo *isi, QRgba64 *dest,
+                          int dw, int dh, int dow, int sow)
+{
+    if (isi->xup_yup == 3)
+        qt_qimageScaleRgba64_up_xy(isi, dest, dw, dh, dow, sow);
+    else if (isi->xup_yup == 1)
+        qt_qimageScaleRgba64_up_x_down_y(isi, dest, dw, dh, dow, sow);
+    else if (isi->xup_yup == 2)
+        qt_qimageScaleRgba64_down_x_up_y(isi, dest, dw, dh, dow, sow);
+    else
+        qt_qimageScaleRgba64_down_xy(isi, dest, dw, dh, dow, sow);
+}
+
+inline static void qt_qimageScaleRgba64_helper(const QRgba64 *pix, int xyap, int Cxy, int step, qint64 &r, qint64 &g, qint64 &b, qint64 &a)
+{
+    r = pix->red()   * xyap;
+    g = pix->green() * xyap;
+    b = pix->blue()  * xyap;
+    a = pix->alpha() * xyap;
+    int j;
+    for (j = (1 << 14) - xyap; j > Cxy; j -= Cxy ){
+        pix += step;
+        r += pix->red()   * Cxy;
+        g += pix->green() * Cxy;
+        b += pix->blue()  * Cxy;
+        a += pix->alpha() * Cxy;
+    }
+    pix += step;
+    r += pix->red()   * j;
+    g += pix->green() * j;
+    b += pix->blue()  * j;
+    a += pix->alpha() * j;
+}
+
+static void qt_qimageScaleRgba64_up_x_down_y(QImageScaleInfo *isi, QRgba64 *dest,
+                                             int dw, int dh, int dow, int sow)
+{
+    const QRgba64 **ypoints = (const QRgba64 **)isi->ypoints;
+    int *xpoints = isi->xpoints;
+    int *xapoints = isi->xapoints;
+    int *yapoints = isi->yapoints;
+
+    for (int y = 0; y < dh; y++) {
+        int Cy = (yapoints[y]) >> 16;
+        int yap = (yapoints[y]) & 0xffff;
+
+        QRgba64 *dptr = dest + (y * dow);
+        for (int x = 0; x < dw; x++) {
+            const QRgba64 *sptr = ypoints[y] + xpoints[x];
+            qint64 r, g, b, a;
+            qt_qimageScaleRgba64_helper(sptr, yap, Cy, sow, r, g, b, a);
+
+            int xap = xapoints[x];
+            if (xap > 0) {
+                qint64 rr, gg, bb, aa;
+                qt_qimageScaleRgba64_helper(sptr + 1, yap, Cy, sow, rr, gg, bb, aa);
+
+                r = r * (256 - xap);
+                g = g * (256 - xap);
+                b = b * (256 - xap);
+                a = a * (256 - xap);
+                r = (r + (rr * xap)) >> 8;
+                g = (g + (gg * xap)) >> 8;
+                b = (b + (bb * xap)) >> 8;
+                a = (a + (aa * xap)) >> 8;
+            }
+            *dptr++ = qRgba64(r >> 14, g >> 14, b >> 14, a >> 14);
+        }
+    }
+}
+
+static void qt_qimageScaleRgba64_down_x_up_y(QImageScaleInfo *isi, QRgba64 *dest,
+                                             int dw, int dh, int dow, int sow)
+{
+    const QRgba64 **ypoints = (const QRgba64 **)isi->ypoints;
+    int *xpoints = isi->xpoints;
+    int *xapoints = isi->xapoints;
+    int *yapoints = isi->yapoints;
+
+    for (int y = 0; y < dh; y++) {
+        QRgba64 *dptr = dest + (y * dow);
+        for (int x = 0; x < dw; x++) {
+            int Cx = xapoints[x] >> 16;
+            int xap = xapoints[x] & 0xffff;
+
+            const QRgba64 *sptr = ypoints[y] + xpoints[x];
+            qint64 r, g, b, a;
+            qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, r, g, b, a);
+
+            int yap = yapoints[y];
+            if (yap > 0) {
+                qint64 rr, gg, bb, aa;
+                qt_qimageScaleRgba64_helper(sptr + sow, xap, Cx, 1, rr, gg, bb, aa);
+
+                r = r * (256 - yap);
+                g = g * (256 - yap);
+                b = b * (256 - yap);
+                a = a * (256 - yap);
+                r = (r + (rr * yap)) >> 8;
+                g = (g + (gg * yap)) >> 8;
+                b = (b + (bb * yap)) >> 8;
+                a = (a + (aa * yap)) >> 8;
+            }
+            *dptr = qRgba64(r >> 14, g >> 14, b >> 14, a >> 14);
+            dptr++;
+        }
+    }
+}
+
+static void qt_qimageScaleRgba64_down_xy(QImageScaleInfo *isi, QRgba64 *dest,
+                                         int dw, int dh, int dow, int sow)
+{
+    const QRgba64 **ypoints = (const QRgba64 **)isi->ypoints;
+    int *xpoints = isi->xpoints;
+    int *xapoints = isi->xapoints;
+    int *yapoints = isi->yapoints;
+
+    for (int y = 0; y < dh; y++) {
+        int Cy = (yapoints[y]) >> 16;
+        int yap = (yapoints[y]) & 0xffff;
+
+        QRgba64 *dptr = dest + (y * dow);
+        for (int x = 0; x < dw; x++) {
+            int Cx = xapoints[x] >> 16;
+            int xap = xapoints[x] & 0xffff;
+
+            const QRgba64 *sptr = ypoints[y] + xpoints[x];
+            qint64 rx, gx, bx, ax;
+            qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
+
+            qint64 r = rx * yap;
+            qint64 g = gx * yap;
+            qint64 b = bx * yap;
+            qint64 a = ax * yap;
+            int j;
+            for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+                sptr += sow;
+                qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
+                r += rx * Cy;
+                g += gx * Cy;
+                b += bx * Cy;
+                a += ax * Cy;
+            }
+            sptr += sow;
+            qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
+            r += rx * j;
+            g += gx * j;
+            b += bx * j;
+            a += ax * j;
+
+            *dptr = qRgba64(r >> 28, g >> 28, b >> 28, a >> 28);
+            dptr++;
+        }
+    }
+}
+
 static void qt_qimageScaleAARGB_up_x_down_y(QImageScaleInfo *isi, unsigned int *dest,
                                             int dw, int dh, int dow, int sow);
 
@@ -745,7 +943,10 @@ QImage qSmoothScaleImage(const QImage &src, int dw, int dh)
         return QImage();
     }
 
-    if (src.hasAlphaChannel())
+    if (src.depth() > 32)
+        qt_qimageScaleRgba64(scaleinfo, (QRgba64 *)buffer.scanLine(0),
+                             dw, dh, dw, src.bytesPerLine() / 8);
+    else if (src.hasAlphaChannel())
         qt_qimageScaleAARGBA(scaleinfo, (unsigned int *)buffer.scanLine(0),
                              dw, dh, dw, src.bytesPerLine() / 4);
     else
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h
index 1ed0e82182..b7e4d4d905 100644
--- a/src/gui/painting/qrgba64_p.h
+++ b/src/gui/painting/qrgba64_p.h
@@ -64,14 +64,6 @@ inline QRgba64 combineAlpha256(QRgba64 rgba64, uint alpha256)
     return QRgba64::fromRgba64(rgba64.red(), rgba64.green(), rgba64.blue(), (rgba64.alpha() * alpha256) >> 8);
 }
 
-inline QRgba64 multiplyAlpha256(QRgba64 rgba64, uint alpha256)
-{
-    return QRgba64::fromRgba64((rgba64.red()   * alpha256) >> 8,
-                               (rgba64.green() * alpha256) >> 8,
-                               (rgba64.blue()  * alpha256) >> 8,
-                               (rgba64.alpha() * alpha256) >> 8);
-}
-
 inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535)
 {
     return QRgba64::fromRgba64(qt_div_65535(rgba64.red()   * alpha65535),
@@ -126,11 +118,6 @@ inline T multiplyAlpha255(T rgba64, uint alpha255)
 #endif
 }
 
-inline QRgba64 interpolate256(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
-{
-    return QRgba64::fromRgba64(multiplyAlpha256(x, alpha1) + multiplyAlpha256(y, alpha2));
-}
-
 inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
 {
     return QRgba64::fromRgba64(multiplyAlpha255(x, alpha1) + multiplyAlpha255(y, alpha2));
-- 
cgit v1.2.3