1 files changed, 102 insertions, 148 deletions
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 387481a915..833ddd7b16 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -1,41 +1,5 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtGui module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
 
 #ifndef QDRAWHELPER_P_H
 #define QDRAWHELPER_P_H
@@ -65,13 +29,13 @@
 #include "private/qrasterdefs_p.h"
 #include <private/qsimd_p.h>
 
-#include <QtCore/qsharedpointer.h>
+#include <memory>
 
 QT_BEGIN_NAMESPACE
 
 #if defined(Q_CC_GNU)
 #  define Q_DECL_RESTRICT __restrict__
-#  if defined(Q_PROCESSOR_X86_32) && defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL)
+#  if defined(Q_PROCESSOR_X86_32) && defined(Q_CC_GNU) && !defined(Q_CC_CLANG)
 #    define Q_DECL_VECTORCALL __attribute__((sseregparm,regparm(3)))
 #  else
 #    define Q_DECL_VECTORCALL
@@ -89,13 +53,6 @@ static const uint RMASK = 0x00ff0000;
 static const uint GMASK = 0x0000ff00;
 static const uint BMASK = 0x000000ff;
 
-/*******************************************************************************
- * QSpan
- *
- * duplicate definition of FT_Span
- */
-typedef QT_FT_Span QSpan;
-
 struct QSolidData;
 struct QTextureData;
 struct QGradientData;
@@ -108,6 +65,9 @@ class QRasterBuffer;
 class QClipData;
 class QRasterPaintEngineState;
 
+template<typename F> class QRgbaFloat;
+typedef QRgbaFloat<float> QRgbaFloat32;
+
 typedef QT_FT_SpanFunc ProcessSpans;
 typedef void (*BitmapBlitFunc)(QRasterBuffer *rasterBuffer,
                                int x, int y, const QRgba64 &color,
@@ -180,9 +140,9 @@ struct quint24 {
     uchar data[3];
 };
 
-void qBlendGradient(int count, const QSpan *spans, void *userData);
-void qBlendTexture(int count, const QSpan *spans, void *userData);
-#ifdef __SSE2__
+void qBlendGradient(int count, const QT_FT_Span *spans, void *userData);
+void qBlendTexture(int count, const QT_FT_Span *spans, void *userData);
+#ifdef Q_PROCESSOR_X86
 extern void (*qt_memfill64)(quint64 *dest, quint64 value, qsizetype count);
 extern void (*qt_memfill32)(quint32 *dest, quint32 value, qsizetype count);
 #else
@@ -194,8 +154,10 @@ extern void qt_memfill16(quint16 *dest, quint16 value, qsizetype count);
 
 typedef void (QT_FASTCALL *CompositionFunction)(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, uint const_alpha);
 typedef void (QT_FASTCALL *CompositionFunction64)(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, uint const_alpha);
+typedef void (QT_FASTCALL *CompositionFunctionFP)(QRgbaFloat32 *Q_DECL_RESTRICT dest, const QRgbaFloat32 *Q_DECL_RESTRICT src, int length, uint const_alpha);
 typedef void (QT_FASTCALL *CompositionFunctionSolid)(uint *dest, int length, uint color, uint const_alpha);
 typedef void (QT_FASTCALL *CompositionFunctionSolid64)(QRgba64 *dest, int length, QRgba64 color, uint const_alpha);
+typedef void (QT_FASTCALL *CompositionFunctionSolidFP)(QRgbaFloat32 *dest, int length, QRgbaFloat32 color, uint const_alpha);
 
 struct LinearGradientValues
 {
@@ -212,17 +174,19 @@ struct RadialGradientValues
     qreal dr;
     qreal sqrfr;
     qreal a;
-    qreal inv2a;
     bool extended;
 };
 
 struct Operator;
 typedef uint* (QT_FASTCALL *DestFetchProc)(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length);
 typedef QRgba64* (QT_FASTCALL *DestFetchProc64)(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length);
+typedef QRgbaFloat32* (QT_FASTCALL *DestFetchProcFP)(QRgbaFloat32 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length);
 typedef void (QT_FASTCALL *DestStoreProc)(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length);
 typedef void (QT_FASTCALL *DestStoreProc64)(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
+typedef void (QT_FASTCALL *DestStoreProcFP)(QRasterBuffer *rasterBuffer, int x, int y, const QRgbaFloat32 *buffer, int length);
 typedef const uint* (QT_FASTCALL *SourceFetchProc)(uint *buffer, const Operator *o, const QSpanData *data, int y, int x, int length);
 typedef const QRgba64* (QT_FASTCALL *SourceFetchProc64)(QRgba64 *buffer, const Operator *o, const QSpanData *data, int y, int x, int length);
+typedef const QRgbaFloat32* (QT_FASTCALL *SourceFetchProcFP)(QRgbaFloat32 *buffer, const Operator *o, const QSpanData *data, int y, int x, int length);
 
 struct Operator
 {
@@ -239,6 +203,12 @@ struct Operator
     CompositionFunctionSolid64 funcSolid64;
     CompositionFunction64 func64;
 
+    DestFetchProcFP destFetchFP;
+    DestStoreProcFP destStoreFP;
+    SourceFetchProcFP srcFetchFP;
+    CompositionFunctionSolidFP funcSolidFP;
+    CompositionFunctionFP funcFP;
+
     union {
         LinearGradientValues linear;
         RadialGradientValues radial;
@@ -295,7 +265,7 @@ struct QGradientData
 #define GRADIENT_STOPTABLE_SIZE 1024
 #define GRADIENT_STOPTABLE_SIZE_SHIFT 10
 
-#if QT_CONFIG(raster_64bit)
+#if QT_CONFIG(raster_64bit) || QT_CONFIG(raster_fp)
     const QRgba64 *colorTable64; //[GRADIENT_STOPTABLE_SIZE];
 #endif
     const QRgb *colorTable32; //[GRADIENT_STOPTABLE_SIZE];
@@ -317,11 +287,12 @@ struct QTextureData
     int y2;
     qsizetype bytesPerLine;
     QImage::Format format;
-    const QVector<QRgb> *colorTable;
+    const QList<QRgb> *colorTable;
     bool hasAlpha;
     enum Type {
         Plain,
-        Tiled
+        Tiled,
+        Pattern
     };
     Type type;
     int const_alpha;
@@ -349,24 +320,20 @@ struct QSpanData
         ConicalGradient,
         Texture
     } type : 8;
-    int txop : 8;
-    int fast_matrix : 1;
+    signed int txop : 8;
+    uint fast_matrix : 1;
     bool bilinear;
     QImage *tempImage;
-    QRgba64 solidColor;
+    QColor solidColor;
     union {
         QGradientData gradient;
         QTextureData texture;
     };
-    class Pinnable {
-    protected:
-        ~Pinnable() {}
-    }; // QSharedPointer<const void> is not supported
-    QSharedPointer<const Pinnable> cachedGradient;
+    std::shared_ptr<const void> cachedGradient;
 
 
     void init(QRasterBuffer *rb, const QRasterPaintEngine *pe);
-    void setup(const QBrush &brush, int alpha, QPainter::CompositionMode compositionMode);
+    void setup(const QBrush &brush, int alpha, QPainter::CompositionMode compositionMode, bool isCosmetic);
     void setupMatrix(const QTransform &matrix, int bilinear);
     void initTexture(const QImage *image, int alpha, QTextureData::Type = QTextureData::Plain, const QRect &sourceRect = QRect());
     void adjustSpanMethods();
@@ -434,12 +401,12 @@ const BlendType * QT_FASTCALL qt_fetch_radial_gradient_template(BlendType *buffe
     bool affine = !data->m13 && !data->m23;
 
     BlendType *end = buffer + length;
+    qreal inv_a = 1 / qreal(2 * op->radial.a);
+
     if (affine) {
         rx -= data->gradient.radial.focal.x;
         ry -= data->gradient.radial.focal.y;
 
-        qreal inv_a = 1 / qreal(2 * op->radial.a);
-
         const qreal delta_rx = data->m11;
         const qreal delta_ry = data->m12;
 
@@ -472,7 +439,7 @@ const BlendType * QT_FASTCALL qt_fetch_radial_gradient_template(BlendType *buffe
 
         while (buffer < end) {
             if (rw == 0) {
-                *buffer = 0;
+                *buffer = RadialFetchFunc::null();
             } else {
                 qreal invRw = 1 / rw;
                 qreal gx = rx * invRw - data->gradient.radial.focal.x;
@@ -484,8 +451,8 @@ const BlendType * QT_FASTCALL qt_fetch_radial_gradient_template(BlendType *buffe
                 if (det >= 0) {
                     qreal detSqrt = qSqrt(det);
 
-                    qreal s0 = (-b - detSqrt) * op->radial.inv2a;
-                    qreal s1 = (-b + detSqrt) * op->radial.inv2a;
+                    qreal s0 = (-b - detSqrt) * inv_a;
+                    qreal s1 = (-b + detSqrt) * inv_a;
 
                     qreal s = qMax(s0, s1);
 
@@ -605,7 +572,7 @@ public:
     }
 };
 
-static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) {
+static inline uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) {
     uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
     t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8;
     t &= 0xff00ff;
@@ -619,7 +586,7 @@ static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b
 
 #if Q_PROCESSOR_WORDSIZE == 8 // 64-bit versions
 
-static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
+static inline uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
     quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
     t += (((quint64(y)) | ((quint64(y)) << 24)) & 0x00ff00ff00ff00ff) * b;
     t >>= 8;
@@ -627,7 +594,7 @@ static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b
     return (uint(t)) | (uint(t >> 24));
 }
 
-static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
+static inline uint BYTE_MUL(uint x, uint a) {
     quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
     t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8;
     t &= 0x00ff00ff00ff00ff;
@@ -636,7 +603,7 @@ static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
 
 #else // 32-bit versions
 
-static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
+static inline uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
     uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
     t >>= 8;
     t &= 0xff00ff;
@@ -647,7 +614,7 @@ static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b
     return x;
 }
 
-static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
+static inline uint BYTE_MUL(uint x, uint a) {
     uint t = (x & 0xff00ff) * a;
     t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8;
     t &= 0xff00ff;
@@ -660,7 +627,7 @@ static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
 }
 #endif
 
-static Q_ALWAYS_INLINE void blend_pixel(quint32 &dst, const quint32 src)
+static inline void blend_pixel(quint32 &dst, const quint32 src)
 {
     if (src >= 0xff000000)
         dst = src;
@@ -668,7 +635,7 @@ static Q_ALWAYS_INLINE void blend_pixel(quint32 &dst, const quint32 src)
         dst = src + BYTE_MUL(dst, qAlpha(~src));
 }
 
-static Q_ALWAYS_INLINE void blend_pixel(quint32 &dst, const quint32 src, const int const_alpha)
+static inline void blend_pixel(quint32 &dst, const quint32 src, const int const_alpha)
 {
     if (const_alpha == 255)
         return blend_pixel(dst, src);
@@ -679,7 +646,7 @@ static Q_ALWAYS_INLINE void blend_pixel(quint32 &dst, const quint32 src, const i
 }
 
 #if defined(__SSE2__)
-static Q_ALWAYS_INLINE uint interpolate_4_pixels_sse2(__m128i vt, __m128i vb, uint distx, uint disty)
+static inline uint Q_DECL_VECTORCALL interpolate_4_pixels_sse2(__m128i vt, __m128i vb, uint distx, uint disty)
 {
     // First interpolate top and bottom pixels in parallel.
     vt = _mm_unpacklo_epi8(vt, _mm_setzero_si128());
@@ -720,7 +687,7 @@ static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint dis
 static constexpr inline bool hasFastInterpolate4() { return true; }
 
 #elif defined(__ARM_NEON__)
-static Q_ALWAYS_INLINE uint interpolate_4_pixels_neon(uint32x2_t vt32, uint32x2_t vb32, uint distx, uint disty)
+static inline uint interpolate_4_pixels_neon(uint32x2_t vt32, uint32x2_t vb32, uint distx, uint disty)
 {
     uint16x8_t vt16 = vmovl_u8(vreinterpret_u8_u32(vt32));
     uint16x8_t vb16 = vmovl_u8(vreinterpret_u8_u32(vb32));
@@ -844,24 +811,75 @@ static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba6
 }
 #endif // __SSE2__
 
-static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16(uint x, uint a) {
+#if QT_CONFIG(raster_fp)
+static inline QRgbaFloat32 multiplyAlpha_rgba32f(QRgbaFloat32 c, float a)
+{
+    return QRgbaFloat32 { c.r * a, c.g * a, c.b * a, c.a * a };
+}
+
+static inline QRgbaFloat32 interpolate_rgba32f(QRgbaFloat32 x, float alpha1, QRgbaFloat32 y, float alpha2)
+{
+    x = multiplyAlpha_rgba32f(x, alpha1);
+    y = multiplyAlpha_rgba32f(y, alpha2);
+    return QRgbaFloat32 { x.r + y.r, x.g + y.g, x.b + y.b, x.a + y.a };
+}
+#ifdef __SSE2__
+static inline __m128 Q_DECL_VECTORCALL interpolate_rgba32f(__m128 x, __m128 alpha1, __m128 y, __m128 alpha2)
+{
+    return _mm_add_ps(_mm_mul_ps(x, alpha1), _mm_mul_ps(y, alpha2));
+}
+#endif
+
+static inline QRgbaFloat32 interpolate_4_pixels_rgba32f(const QRgbaFloat32 t[], const QRgbaFloat32 b[], uint distx, uint disty)
+{
+    constexpr float f = 1.0f / 65536.0f;
+    const float dx = distx * f;
+    const float dy = disty * f;
+    const float idx = 1.0f - dx;
+    const float idy = 1.0f - dy;
+#ifdef __SSE2__
+    const __m128 vtl = _mm_load_ps((const float *)&t[0]);
+    const __m128 vtr = _mm_load_ps((const float *)&t[1]);
+    const __m128 vbl = _mm_load_ps((const float *)&b[0]);
+    const __m128 vbr = _mm_load_ps((const float *)&b[1]);
+
+    const __m128 vdx = _mm_set1_ps(dx);
+    const __m128 vidx = _mm_set1_ps(idx);
+    __m128 vt = interpolate_rgba32f(vtl, vidx, vtr, vdx);
+    __m128 vb = interpolate_rgba32f(vbl, vidx, vbr, vdx);
+    const __m128 vdy = _mm_set1_ps(dy);
+    const __m128 vidy = _mm_set1_ps(idy);
+    vt = interpolate_rgba32f(vt, vidy, vb, vdy);
+    QRgbaFloat32 res;
+    _mm_store_ps((float*)&res, vt);
+    return res;
+#else
+    QRgbaFloat32 xtop = interpolate_rgba32f(t[0], idx, t[1], dx);
+    QRgbaFloat32 xbot = interpolate_rgba32f(b[0], idx, b[1], dx);
+    xtop = interpolate_rgba32f(xtop, idy, xbot, dy);
+    return xtop;
+#endif
+}
+#endif // QT_CONFIG(raster_fp)
+
+static inline uint BYTE_MUL_RGB16(uint x, uint a) {
     a += 1;
     uint t = (((x & 0x07e0)*a) >> 8) & 0x07e0;
     t |= (((x & 0xf81f)*(a>>2)) >> 6) & 0xf81f;
     return t;
 }
 
-static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16_32(uint x, uint a) {
+static inline uint BYTE_MUL_RGB16_32(uint x, uint a) {
     uint t = (((x & 0xf81f07e0) >> 5)*a) & 0xf81f07e0;
     t |= (((x & 0x07e0f81f)*a) >> 5) & 0x07e0f81f;
     return t;
 }
 
 // qt_div_255 is a fast rounded division by 255 using an approximation that is accurate for all positive 16-bit integers
-static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE int qt_div_255(int x) { return (x + (x>>8) + 0x80) >> 8; }
-static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_257_floor(uint x) { return  (x - (x >> 8)) >> 8; }
-static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_257(uint x) { return qt_div_257_floor(x + 128); }
-static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_65535(uint x) { return (x + (x>>16) + 0x8000U) >> 16; }
+static constexpr inline int qt_div_255(int x) { return (x + (x>>8) + 0x80) >> 8; }
+static constexpr inline uint qt_div_257_floor(uint x) { return  (x - (x >> 8)) >> 8; }
+static constexpr inline uint qt_div_257(uint x) { return qt_div_257_floor(x + 128); }
+static constexpr inline uint qt_div_65535(uint x) { return (x + (x>>16) + 0x8000U) >> 16; }
 
 template <class T> inline void qt_memfill_template(T *dest, T color, qsizetype count)
 {
@@ -1008,7 +1026,7 @@ inline uint comp_func_Plus_one_pixel(uint d, const uint s)
 #undef AMIX
 
 // must be multiple of 4 for easier SIMD implementations
-static Q_CONSTEXPR int BufferSize = 2048;
+static constexpr int BufferSize = 2048;
 
 // A buffer of intermediate results used by simple bilinear scaling.
 struct IntermediateBuffer
@@ -1023,70 +1041,6 @@ struct IntermediateBuffer
     quint32 buffer_ag[BufferSize+2];
 };
 
-template <QPixelLayout::BPP bpp>
-inline uint QT_FASTCALL qFetchPixel(const uchar *, int)
-{
-    Q_UNREACHABLE();
-    return 0;
-}
-
-template <>
-inline uint QT_FASTCALL qFetchPixel<QPixelLayout::BPP1LSB>(const uchar *src, int index)
-{
-    return (src[index >> 3] >> (index & 7)) & 1;
-}
-
-template <>
-inline uint QT_FASTCALL qFetchPixel<QPixelLayout::BPP1MSB>(const uchar *src, int index)
-{
-    return (src[index >> 3] >> (~index & 7)) & 1;
-}
-
-template <>
-inline uint QT_FASTCALL qFetchPixel<QPixelLayout::BPP8>(const uchar *src, int index)
-{
-    return src[index];
-}
-
-template <>
-inline uint QT_FASTCALL qFetchPixel<QPixelLayout::BPP16>(const uchar *src, int index)
-{
-    return reinterpret_cast<const quint16 *>(src)[index];
-}
-
-template <>
-inline uint QT_FASTCALL qFetchPixel<QPixelLayout::BPP24>(const uchar *src, int index)
-{
-    return reinterpret_cast<const quint24 *>(src)[index];
-}
-
-template <>
-inline uint QT_FASTCALL qFetchPixel<QPixelLayout::BPP32>(const uchar *src, int index)
-{
-    return reinterpret_cast<const uint *>(src)[index];
-}
-
-template <>
-inline uint QT_FASTCALL qFetchPixel<QPixelLayout::BPP64>(const uchar *src, int index)
-{
-    // We have to do the conversion in fetch to fit into a 32bit uint
-    QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index];
-    return c.toArgb32();
-}
-
-typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index);
-
-constexpr FetchPixelFunc qFetchPixelTable[QPixelLayout::BPPCount] = {
-    nullptr, // BPPNone
-    qFetchPixel<QPixelLayout::BPP1MSB>,
-    qFetchPixel<QPixelLayout::BPP1LSB>,
-    qFetchPixel<QPixelLayout::BPP8>,
-    qFetchPixel<QPixelLayout::BPP16>,
-    qFetchPixel<QPixelLayout::BPP24>,
-    qFetchPixel<QPixelLayout::BPP32>,
-    qFetchPixel<QPixelLayout::BPP64>,
-};
-
 QT_END_NAMESPACE
 
 #endif // QDRAWHELPER_P_H