diff options
Diffstat (limited to 'src/gui/painting')
37 files changed, 1481 insertions, 658 deletions
diff --git a/src/gui/painting/qblendfunctions.cpp b/src/gui/painting/qblendfunctions.cpp index 2dd5144e40..348eceb47f 100644 --- a/src/gui/painting/qblendfunctions.cpp +++ b/src/gui/painting/qblendfunctions.cpp @@ -187,19 +187,11 @@ void qt_blend_rgb16_on_rgb16(uchar *dst, int dbpl, #endif if (const_alpha == 256) { - if (w <= 64) { - while (h--) { - QT_MEMCPY_USHORT(dst, src, w); - dst += dbpl; - src += sbpl; - } - } else { - int length = w << 1; - while (h--) { - memcpy(dst, src, length); - dst += dbpl; - src += sbpl; - } + int length = w << 1; + while (h--) { + memcpy(dst, src, length); + dst += dbpl; + src += sbpl; } } else if (const_alpha != 0) { quint16 *d = (quint16 *) dst; diff --git a/src/gui/painting/qcolor.cpp b/src/gui/painting/qcolor.cpp index 318d55c4c3..b3fa1eedde 100644 --- a/src/gui/painting/qcolor.cpp +++ b/src/gui/painting/qcolor.cpp @@ -2487,6 +2487,15 @@ QColor QColor::fromHslF(qreal h, qreal s, qreal l, qreal a) return color; } +/*! + \obsolete + + Use the \c const overload instead. +*/ +void QColor::getCmyk(int *c, int *m, int *y, int *k, int *a) +{ + const_cast<const QColor *>(this)->getCmyk(c, m, y, k, a); +} /*! Sets the contents pointed to by \a c, \a m, \a y, \a k, and \a a, to the @@ -2498,7 +2507,7 @@ QColor QColor::fromHslF(qreal h, qreal s, qreal l, qreal a) \sa setCmyk(), {QColor#The CMYK Color Model}{The CMYK Color Model} */ -void QColor::getCmyk(int *c, int *m, int *y, int *k, int *a) +void QColor::getCmyk(int *c, int *m, int *y, int *k, int *a) const { if (!c || !m || !y || !k) return; @@ -2518,6 +2527,16 @@ void QColor::getCmyk(int *c, int *m, int *y, int *k, int *a) } /*! + \obsolete + + Use the \c const overload instead. +*/ +void QColor::getCmykF(qreal *c, qreal *m, qreal *y, qreal *k, qreal *a) +{ + const_cast<const QColor *>(this)->getCmykF(c, m, y, k, a); +} + +/*! Sets the contents pointed to by \a c, \a m, \a y, \a k, and \a a, to the cyan, magenta, yellow, black, and alpha-channel (transparency) components of the color's CMYK value. @@ -2527,7 +2546,7 @@ void QColor::getCmyk(int *c, int *m, int *y, int *k, int *a) \sa setCmykF(), {QColor#The CMYK Color Model}{The CMYK Color Model} */ -void QColor::getCmykF(qreal *c, qreal *m, qreal *y, qreal *k, qreal *a) +void QColor::getCmykF(qreal *c, qreal *m, qreal *y, qreal *k, qreal *a) const { if (!c || !m || !y || !k) return; @@ -2676,18 +2695,13 @@ QColor QColor::fromCmykF(qreal c, qreal m, qreal y, qreal k, qreal a) recommend using the darker() function for this purpose. If the \a factor is 0 or negative, the return value is unspecified. - The function converts the current RGB color to HSV, multiplies the value - (V) component by \a factor and converts the color back to RGB. + The function converts the current color to HSV, multiplies the value + (V) component by \a factor and converts the color back to it's original + color spec. \sa darker(), isValid() */ - -/*! - \obsolete - - Use lighter(\a factor) instead. -*/ -QColor QColor::light(int factor) const Q_DECL_NOTHROW +QColor QColor::lighter(int factor) const Q_DECL_NOTHROW { if (factor <= 0) // invalid lightness factor return *this; @@ -2726,18 +2740,13 @@ QColor QColor::light(int factor) const Q_DECL_NOTHROW but we recommend using the lighter() function for this purpose. If the \a factor is 0 or negative, the return value is unspecified. - The function converts the current RGB color to HSV, divides the value (V) - component by \a factor and converts the color back to RGB. + The function converts the current color to HSV, divides the value (V) + component by \a factor and converts the color back to it's original + color spec. \sa lighter(), isValid() */ - -/*! - \obsolete - - Use darker(\a factor) instead. -*/ -QColor QColor::dark(int factor) const Q_DECL_NOTHROW +QColor QColor::darker(int factor) const Q_DECL_NOTHROW { if (factor <= 0) // invalid darkness factor return *this; @@ -2751,6 +2760,28 @@ QColor QColor::dark(int factor) const Q_DECL_NOTHROW return hsv.convertTo(cspec); } +#if QT_DEPRECATED_SINCE(5, 13) +/*! + \obsolete + + Use lighter(\a factor) instead. +*/ +QColor QColor::light(int factor) const Q_DECL_NOTHROW +{ + return lighter(factor); +} + +/*! + \obsolete + + Use darker(\a factor) instead. +*/ +QColor QColor::dark(int factor) const Q_DECL_NOTHROW +{ + return darker(factor); +} +#endif + #if QT_VERSION < QT_VERSION_CHECK(6,0,0) /*! Assigns a copy of \a color to this color, and returns a reference to it. diff --git a/src/gui/painting/qcolor.h b/src/gui/painting/qcolor.h index f7a9e9db59..4822612cf8 100644 --- a/src/gui/painting/qcolor.h +++ b/src/gui/painting/qcolor.h @@ -172,10 +172,12 @@ public: qreal yellowF() const Q_DECL_NOTHROW; qreal blackF() const Q_DECL_NOTHROW; - void getCmyk(int *c, int *m, int *y, int *k, int *a = nullptr); + void getCmyk(int *c, int *m, int *y, int *k, int *a = nullptr); // ### Qt 6: remove + void getCmyk(int *c, int *m, int *y, int *k, int *a = nullptr) const; void setCmyk(int c, int m, int y, int k, int a = 255); - void getCmykF(qreal *c, qreal *m, qreal *y, qreal *k, qreal *a = nullptr); + void getCmykF(qreal *c, qreal *m, qreal *y, qreal *k, qreal *a = nullptr); // ### Qt 6: remove + void getCmykF(qreal *c, qreal *m, qreal *y, qreal *k, qreal *a = nullptr) const; void setCmykF(qreal c, qreal m, qreal y, qreal k, qreal a = 1.0); int hslHue() const Q_DECL_NOTHROW; // 0 <= hue < 360 @@ -217,9 +219,13 @@ public: static QColor fromHsl(int h, int s, int l, int a = 255); static QColor fromHslF(qreal h, qreal s, qreal l, qreal a = 1.0); +#if QT_DEPRECATED_SINCE(5, 13) + QT_DEPRECATED_X("Use QColor::lighter() instead") Q_REQUIRED_RESULT QColor light(int f = 150) const Q_DECL_NOTHROW; - Q_REQUIRED_RESULT QColor lighter(int f = 150) const Q_DECL_NOTHROW; + QT_DEPRECATED_X("Use QColor::darker() instead") Q_REQUIRED_RESULT QColor dark(int f = 200) const Q_DECL_NOTHROW; +#endif + Q_REQUIRED_RESULT QColor lighter(int f = 150) const Q_DECL_NOTHROW; Q_REQUIRED_RESULT QColor darker(int f = 200) const Q_DECL_NOTHROW; bool operator==(const QColor &c) const Q_DECL_NOTHROW; @@ -306,12 +312,6 @@ inline QColor::QColor(const QColor &acolor) Q_DECL_NOTHROW inline bool QColor::isValid() const Q_DECL_NOTHROW { return cspec != Invalid; } -inline QColor QColor::lighter(int f) const Q_DECL_NOTHROW -{ return light(f); } - -inline QColor QColor::darker(int f) const Q_DECL_NOTHROW -{ return dark(f); } - QT_END_NAMESPACE #endif // QCOLOR_H diff --git a/src/gui/painting/qcompositionfunctions.cpp b/src/gui/painting/qcompositionfunctions.cpp index 027bf23115..5c1afe8425 100644 --- a/src/gui/painting/qcompositionfunctions.cpp +++ b/src/gui/painting/qcompositionfunctions.cpp @@ -1107,7 +1107,7 @@ static inline uint multiply_op_rgb64(uint dst, uint src, uint da, uint sa) } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Multiply_impl(uint *dest, int length, uint color, const T &coverage) +static inline void comp_func_solid_Multiply_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -1130,7 +1130,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Multiply_impl(uint *dest, } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Multiply_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void comp_func_solid_Multiply_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -1169,7 +1169,7 @@ void QT_FASTCALL comp_func_solid_Multiply_rgb64(QRgba64 *dest, int length, QRgba } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Multiply_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Multiply_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -1190,7 +1190,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Multiply_impl(uint *Q_DECL_REST } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Multiply_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Multiply_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; @@ -1231,7 +1231,7 @@ void QT_FASTCALL comp_func_Multiply_rgb64(QRgba64 *Q_DECL_RESTRICT dest, const Q = Sca + Dca - Sca.Dca */ template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Screen_impl(uint *dest, int length, uint color, const T &coverage) +static inline void comp_func_solid_Screen_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -1254,7 +1254,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Screen_impl(uint *dest, i } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Screen_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void comp_func_solid_Screen_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -1293,7 +1293,7 @@ void QT_FASTCALL comp_func_solid_Screen_rgb64(QRgba64 *dest, int length, QRgba64 } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Screen_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Screen_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -1314,7 +1314,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Screen_impl(uint *Q_DECL_RESTRI } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Screen_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Screen_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; @@ -1375,7 +1375,7 @@ static inline uint overlay_op_rgb64(uint dst, uint src, uint da, uint sa) } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Overlay_impl(uint *dest, int length, uint color, const T &coverage) +static inline void comp_func_solid_Overlay_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -1398,7 +1398,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Overlay_impl(uint *dest, } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Overlay_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void comp_func_solid_Overlay_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -1437,7 +1437,7 @@ void QT_FASTCALL comp_func_solid_Overlay_rgb64(QRgba64 *dest, int length, QRgba6 } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Overlay_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Overlay_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -1458,7 +1458,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Overlay_impl(uint *Q_DECL_RESTR } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Overlay_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Overlay_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; @@ -1509,7 +1509,7 @@ static inline uint darken_op_rgb64(uint dst, uint src, uint da, uint sa) } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Darken_impl(uint *dest, int length, uint color, const T &coverage) +static inline void comp_func_solid_Darken_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -1532,7 +1532,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Darken_impl(uint *dest, i } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Darken_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void comp_func_solid_Darken_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -1571,7 +1571,7 @@ void QT_FASTCALL comp_func_solid_Darken_rgb64(QRgba64 *dest, int length, QRgba64 } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Darken_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Darken_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -1592,7 +1592,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Darken_impl(uint *Q_DECL_RESTRI } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Darken_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Darken_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; @@ -1643,7 +1643,7 @@ static inline uint lighten_op_rgb64(uint dst, uint src, uint da, uint sa) } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Lighten_impl(uint *dest, int length, uint color, const T &coverage) +static inline void comp_func_solid_Lighten_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -1666,7 +1666,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Lighten_impl(uint *dest, } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Lighten_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void comp_func_solid_Lighten_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -1705,7 +1705,7 @@ void QT_FASTCALL comp_func_solid_Lighten_rgb64(QRgba64 *dest, int length, QRgba6 } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Lighten_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Lighten_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -1726,7 +1726,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Lighten_impl(uint *Q_DECL_RESTR } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Lighten_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Lighten_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; @@ -1795,7 +1795,7 @@ static inline uint color_dodge_op_rgb64(qint64 dst, qint64 src, qint64 da, qint6 } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_ColorDodge_impl(uint *dest, int length, uint color, const T &coverage) +static inline void comp_func_solid_ColorDodge_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -1818,7 +1818,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_ColorDodge_impl(uint *des } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_ColorDodge_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void comp_func_solid_ColorDodge_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -1857,7 +1857,7 @@ void QT_FASTCALL comp_func_solid_ColorDodge_rgb64(QRgba64 *dest, int length, QRg } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_ColorDodge_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_ColorDodge_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -1878,7 +1878,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_ColorDodge_impl(uint *Q_DECL_RE } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_ColorDodge_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_ColorDodge_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; @@ -1947,7 +1947,7 @@ static inline uint color_burn_op_rgb64(qint64 dst, qint64 src, qint64 da, qint64 } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_ColorBurn_impl(uint *dest, int length, uint color, const T &coverage) +static inline void comp_func_solid_ColorBurn_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -1970,7 +1970,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_ColorBurn_impl(uint *dest } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_ColorBurn_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void comp_func_solid_ColorBurn_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -2009,7 +2009,7 @@ void QT_FASTCALL comp_func_solid_ColorBurn_rgb64(QRgba64 *dest, int length, QRgb } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_ColorBurn_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_ColorBurn_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -2030,7 +2030,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_ColorBurn_impl(uint *Q_DECL_RES } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_ColorBurn_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_ColorBurn_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; @@ -2093,7 +2093,7 @@ static inline uint hardlight_op_rgb64(uint dst, uint src, uint da, uint sa) } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_HardLight_impl(uint *dest, int length, uint color, const T &coverage) +static inline void comp_func_solid_HardLight_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -2116,7 +2116,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_HardLight_impl(uint *dest } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_HardLight_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void comp_func_solid_HardLight_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -2155,7 +2155,7 @@ void QT_FASTCALL comp_func_solid_HardLight_rgb64(QRgba64 *dest, int length, QRgb } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_HardLight_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_HardLight_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -2176,7 +2176,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_HardLight_impl(uint *Q_DECL_RES } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_HardLight_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_HardLight_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; @@ -2252,7 +2252,7 @@ static inline uint soft_light_op_rgb64(qint64 dst, qint64 src, qint64 da, qint64 } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_SoftLight_impl(uint *dest, int length, uint color, const T &coverage) +static inline void comp_func_solid_SoftLight_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -2275,7 +2275,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_SoftLight_impl(uint *dest } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_SoftLight_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void comp_func_solid_SoftLight_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -2314,7 +2314,7 @@ void QT_FASTCALL comp_func_solid_SoftLight_rgb64(QRgba64 *dest, int length, QRgb } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_SoftLight_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_SoftLight_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -2335,7 +2335,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_SoftLight_impl(uint *Q_DECL_RES } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_SoftLight_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_SoftLight_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; @@ -2386,7 +2386,7 @@ static inline uint difference_op_rgb64(qint64 dst, qint64 src, qint64 da, qint64 } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Difference_impl(uint *dest, int length, uint color, const T &coverage) +static inline void comp_func_solid_Difference_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -2409,7 +2409,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Difference_impl(uint *des } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Difference_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void comp_func_solid_Difference_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -2448,7 +2448,7 @@ void QT_FASTCALL comp_func_solid_Difference_rgb64(QRgba64 *dest, int length, QRg } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Difference_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Difference_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -2469,7 +2469,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Difference_impl(uint *Q_DECL_RE } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Difference_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Difference_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; @@ -2509,7 +2509,7 @@ void QT_FASTCALL comp_func_Difference_rgb64(QRgba64 *Q_DECL_RESTRICT dest, const Dca' = (Sca.Da + Dca.Sa - 2.Sca.Dca) + Sca.(1 - Da) + Dca.(1 - Sa) */ template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void QT_FASTCALL comp_func_solid_Exclusion_impl(uint *dest, int length, uint color, const T &coverage) +static inline void QT_FASTCALL comp_func_solid_Exclusion_impl(uint *dest, int length, uint color, const T &coverage) { int sa = qAlpha(color); int sr = qRed(color); @@ -2532,7 +2532,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void QT_FASTCALL comp_func_solid_Exclusion_imp } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void QT_FASTCALL comp_func_solid_Exclusion_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) +static inline void QT_FASTCALL comp_func_solid_Exclusion_impl(QRgba64 *dest, int length, QRgba64 color, const T &coverage) { uint sa = color.alpha(); uint sr = color.red(); @@ -2572,7 +2572,7 @@ void QT_FASTCALL comp_func_solid_Exclusion_rgb64(QRgba64 *dest, int length, QRgb } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Exclusion_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Exclusion_impl(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { uint d = dest[i]; @@ -2593,7 +2593,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Exclusion_impl(uint *Q_DECL_RES } template <typename T> -Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Exclusion_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) +static inline void comp_func_Exclusion_impl(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, const T &coverage) { for (int i = 0; i < length; ++i) { QRgba64 d = dest[i]; diff --git a/src/gui/painting/qcosmeticstroker.cpp b/src/gui/painting/qcosmeticstroker.cpp index 436a62d486..0fb89a75b5 100644 --- a/src/gui/painting/qcosmeticstroker.cpp +++ b/src/gui/painting/qcosmeticstroker.cpp @@ -289,7 +289,7 @@ void QCosmeticStroker::setup() drawCaps = state->lastPen.capStyle() != Qt::FlatCap; if (strokeSelection & FastDraw) { - color = multiplyAlpha256(state->penData.solid.color, opacity).toArgb32(); + color = multiplyAlpha256(state->penData.solidColor, opacity).toArgb32(); QRasterBuffer *buffer = state->penData.rasterBuffer; pixels = (uint *)buffer->buffer(); ppl = buffer->stride<quint32>(); diff --git a/src/gui/painting/qdatabuffer_p.h b/src/gui/painting/qdatabuffer_p.h index 7cac2ac358..28d5f6d6c5 100644 --- a/src/gui/painting/qdatabuffer_p.h +++ b/src/gui/painting/qdatabuffer_p.h @@ -60,7 +60,7 @@ QT_BEGIN_NAMESPACE template <typename Type> class QDataBuffer { - Q_DISABLE_COPY(QDataBuffer) + Q_DISABLE_COPY_MOVE(QDataBuffer) public: QDataBuffer(int res) { diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index ca3590e280..2dd18f6dfc 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1,7 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2018 The Qt Company Ltd. -** Copyright (C) 2016 Intel Corporation. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtGui module of the Qt Toolkit. @@ -55,6 +55,7 @@ #endif #include <private/qguiapplication_p.h> #include <private/qrgba64_p.h> +#include <qendian.h> #include <qloggingcategory.h> #include <qmath.h> @@ -856,6 +857,44 @@ static const QRgba64 *QT_FASTCALL fetchGrayscale8ToRGB64(QRgba64 *buffer, const return buffer; } +static void QT_FASTCALL convertGrayscale16ToRGB32(uint *buffer, int count, const QVector<QRgb> *) +{ + for (int i = 0; i < count; ++i) { + const uint x = qt_div_257(buffer[i]); + buffer[i] = qRgb(x, x, x); + } +} + +static const uint *QT_FASTCALL fetchGrayscale16ToRGB32(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index; + for (int i = 0; i < count; ++i) { + const uint x = qt_div_257(s[i]); + buffer[i] = qRgb(x, x, x); + } + return buffer; +} + +static const QRgba64 *QT_FASTCALL convertGrayscale16ToRGBA64(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + const unsigned short *s = reinterpret_cast<const unsigned short *>(src); + for (int i = 0; i < count; ++i) + buffer[i] = QRgba64::fromRgba64(s[i], s[i], s[i], 65535); + return buffer; +} + +static const QRgba64 *QT_FASTCALL fetchGrayscale16ToRGBA64(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index; + for (int i = 0; i < count; ++i) { + buffer[i] = QRgba64::fromRgba64(s[i], s[i], s[i], 65535); + } + return buffer; +} + static void QT_FASTCALL storeARGB32FromARGB32PM(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *) { @@ -1047,18 +1086,8 @@ static const QRgba64 *QT_FASTCALL fetchRGB32ToRGB64(QRgba64 *buffer, const uchar static const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { -#ifdef __SSE2__ - qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count); - for (int i = 0; i < count; ++i) - buffer[i] = buffer[i].premultiplied(); -#elif defined(__ARM_NEON__) - qConvertARGB32PMToRGBA64PM_neon<false, false>(buffer, src, count); - for (int i = 0; i < count; ++i) - buffer[i] = buffer[i].premultiplied(); -#else for (int i = 0; i < count; ++i) buffer[i] = QRgba64::fromArgb32(src[i]).premultiplied(); -#endif return buffer; } @@ -1110,18 +1139,8 @@ static const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM(QRgba64 *buffer, const u static const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { -#ifdef __SSE2__ - qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count); - for (int i = 0; i < count; ++i) - buffer[i] = buffer[i].premultiplied(); -#elif defined(__ARM_NEON__) - qConvertARGB32PMToRGBA64PM_neon<true, false>(buffer, src, count); - for (int i = 0; i < count; ++i) - buffer[i] = buffer[i].premultiplied(); -#else for (int i = 0; i < count; ++i) buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i])).premultiplied(); -#endif return buffer; } @@ -1361,6 +1380,22 @@ static void QT_FASTCALL storeGrayscale8FromARGB32PM(uchar *dest, const uint *src dest[index + i] = qGray(qUnpremultiply(src[i])); } +static void QT_FASTCALL storeGrayscale16FromRGB32(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index; + for (int i = 0; i < count; ++i) + d[i] = qGray(src[i]) * 257; +} + +static void QT_FASTCALL storeGrayscale16FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index; + for (int i = 0; i < count; ++i) + d[i] = qGray(qUnpremultiply(src[i])) * 257; +} + static const uint *QT_FASTCALL fetchRGB64ToRGB32(uint *buffer, const uchar *src, int index, int count, const QVector<QRgb> *, QDitherInfo *) { @@ -1487,7 +1522,11 @@ QPixelLayout qPixelLayouts[QImage::NImageFormats] = { { true, true, QPixelLayout::BPP64, nullptr, convertPassThrough, nullptr, fetchRGB64ToRGB32, fetchPassThrough64, - storeRGB64FromRGB32, storeRGB64FromRGB32 } // Format_RGBA64_Premultiplied + storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBA64_Premultiplied + { false, false, QPixelLayout::BPP16, nullptr, + convertGrayscale16ToRGB32, convertGrayscale16ToRGBA64, + fetchGrayscale16ToRGB32, fetchGrayscale16ToRGBA64, + storeGrayscale16FromARGB32PM, storeGrayscale16FromRGB32 } // Format_Grayscale16 }; Q_STATIC_ASSERT(sizeof(qPixelLayouts) / sizeof(*qPixelLayouts) == QImage::NImageFormats); @@ -1563,6 +1602,16 @@ static void QT_FASTCALL storeRGBA64PMFromRGBA64PM(uchar *dest, const QRgba64 *sr memcpy(d, src, count * sizeof(QRgba64)); } +static void QT_FASTCALL storeGray16FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + quint16 *d = reinterpret_cast<quint16*>(dest) + index; + for (int i = 0; i < count; ++i) { + QRgba64 s = src[i].unpremultiplied(); + d[i] = qGray(s.red(), s.green(), s.blue()); + } +} + ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats] = { nullptr, nullptr, @@ -1591,7 +1640,8 @@ ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats] = { storeGenericFromRGBA64PM<QImage::Format_Grayscale8>, storeRGBX64FromRGBA64PM, storeRGBA64FromRGBA64PM, - storeRGBA64PMFromRGBA64PM + storeRGBA64PMFromRGBA64PM, + storeGray16FromRGBA64PM }; /* @@ -1695,6 +1745,7 @@ static DestFetchProc destFetchProc[QImage::NImageFormats] = destFetch, // Format_RGBX64 destFetch, // Format_RGBA64 destFetch, // Format_RGBA64_Premultiplied + destFetch, // Format_Grayscale16 }; static DestFetchProc64 destFetchProc64[QImage::NImageFormats] = @@ -1727,6 +1778,7 @@ static DestFetchProc64 destFetchProc64[QImage::NImageFormats] = destFetchRGB64, // Format_RGBX64 destFetch64, // Format_RGBA64 destFetchRGB64, // Format_RGBA64_Premultiplied + destFetch64, // Format_Grayscale16 }; /* @@ -1880,6 +1932,7 @@ static DestStoreProc destStoreProc[QImage::NImageFormats] = destStore, // Format_RGBX64 destStore, // Format_RGBA64 destStore, // Format_RGBA64_Premultiplied + destStore, // Format_Grayscale16 }; static DestStoreProc64 destStoreProc64[QImage::NImageFormats] = @@ -1911,7 +1964,8 @@ static DestStoreProc64 destStoreProc64[QImage::NImageFormats] = destStore64, // Format_Grayscale8 0, // Format_RGBX64 destStore64RGBA64, // Format_RGBA64 - 0 // Format_RGBA64_Premultiplied + 0, // Format_RGBA64_Premultiplied + destStore64, // Format_Grayscale16 }; /* @@ -3838,6 +3892,7 @@ static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = { fetchUntransformed, // RGBX64 fetchUntransformed, // RGBA64 fetchUntransformed, // RGBA64_Premultiplied + fetchUntransformed, // Grayscale16 }; static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = { @@ -4232,7 +4287,7 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in switch(data->type) { case QSpanData::Solid: - solidSource = data->solid.color.isOpaque(); + solidSource = data->solidColor.isOpaque(); op.srcFetch = 0; op.srcFetch64 = 0; break; @@ -4280,7 +4335,7 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in } ++spans; } - if (!alphaSpans) { + if (!alphaSpans && spanCount > 0) { // If all spans are opaque we do not need to fetch dest. // But don't clear passthrough destFetch as they are just as fast and save destStore. if (op.destFetch != destFetchARGB32P) @@ -4301,23 +4356,62 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in return op; } +static void spanfill_from_first(QRasterBuffer *rasterBuffer, QPixelLayout::BPP bpp, int x, int y, int length) +{ + switch (bpp) { + case QPixelLayout::BPP64: { + quint64 *dest = reinterpret_cast<quint64 *>(rasterBuffer->scanLine(y)) + x; + qt_memfill_template(dest + 1, dest[0], length - 1); + break; + } + case QPixelLayout::BPP32: { + quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y)) + x; + qt_memfill_template(dest + 1, dest[0], length - 1); + break; + } + case QPixelLayout::BPP24: { + quint24 *dest = reinterpret_cast<quint24 *>(rasterBuffer->scanLine(y)) + x; + qt_memfill_template(dest + 1, dest[0], length - 1); + break; + } + case QPixelLayout::BPP16: { + quint16 *dest = reinterpret_cast<quint16 *>(rasterBuffer->scanLine(y)) + x; + qt_memfill_template(dest + 1, dest[0], length - 1); + break; + } + case QPixelLayout::BPP8: { + uchar *dest = rasterBuffer->scanLine(y) + x; + memset(dest + 1, dest[0], length - 1); + break; + } + default: + Q_UNREACHABLE(); + } +} // -------------------- blend methods --------------------- -#if !defined(Q_CC_SUN) -static -#endif -void blend_color_generic(int count, const QSpan *spans, void *userData) +static void blend_color_generic(int count, const QSpan *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); uint buffer[BufferSize]; - Operator op = getOperator(data, spans, count); - const uint color = data->solid.color.toArgb32(); + Operator op = getOperator(data, nullptr, 0); + const uint color = data->solidColor.toArgb32(); + bool solidFill = data->rasterBuffer->compositionMode == QPainter::CompositionMode_Source + || (data->rasterBuffer->compositionMode == QPainter::CompositionMode_SourceOver && qAlpha(color) == 255); + QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp; while (count--) { int x = spans->x; int length = spans->len; + if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length) { + // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels + op.destStore(data->rasterBuffer, x, spans->y, &color, 1); + spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length); + length = 0; + } + while (length) { int l = qMin(BufferSize, length); uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l); @@ -4335,15 +4429,15 @@ static void blend_color_argb(int count, const QSpan *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - const Operator op = getOperator(data, spans, count); - const uint color = data->solid.color.toArgb32(); + const Operator op = getOperator(data, nullptr, 0); + const uint color = data->solidColor.toArgb32(); if (op.mode == QPainter::CompositionMode_Source) { // inline for performance while (count--) { uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x; if (spans->coverage == 255) { - QT_MEMFILL_UINT(target, spans->len, color); + qt_memfill(target, color, spans->len); } else { uint c = BYTE_MUL(color, spans->coverage); int ialpha = 255 - spans->coverage; @@ -4365,29 +4459,26 @@ static void blend_color_argb(int count, const QSpan *spans, void *userData) void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - Operator op = getOperator(data, spans, count); + Operator op = getOperator(data, nullptr, 0); if (!op.funcSolid64) { qCDebug(lcQtGuiDrawHelper, "blend_color_generic_rgb64: unsupported 64bit blend attempted, falling back to 32-bit"); return blend_color_generic(count, spans, userData); } alignas(8) QRgba64 buffer[BufferSize]; - const QRgba64 color = data->solid.color; + const QRgba64 color = data->solidColor; bool solidFill = data->rasterBuffer->compositionMode == QPainter::CompositionMode_Source || (data->rasterBuffer->compositionMode == QPainter::CompositionMode_SourceOver && color.isOpaque()); - bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32; + QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp; while (count--) { int x = spans->x; int length = spans->len; - if (solidFill && isBpp32 && spans->coverage == 255) { + if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length) { // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels - if (length > 0) { - op.destStore64(data->rasterBuffer, x, spans->y, &color, 1); - uint *dest = (uint*)data->rasterBuffer->scanLine(spans->y) + x; - qt_memfill32(dest + 1, dest[0], length - 1); - length = 0; - } + op.destStore64(data->rasterBuffer, x, spans->y, &color, 1); + spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length); + length = 0; } while (length) { @@ -4413,16 +4504,16 @@ static void blend_color_rgb16(int count, const QSpan *spans, void *userData) from qt_gradient_quint16 with minimal overhead. */ QPainter::CompositionMode mode = data->rasterBuffer->compositionMode; - if (mode == QPainter::CompositionMode_SourceOver && data->solid.color.isOpaque()) + if (mode == QPainter::CompositionMode_SourceOver && data->solidColor.isOpaque()) mode = QPainter::CompositionMode_Source; if (mode == QPainter::CompositionMode_Source) { // inline for performance - ushort c = data->solid.color.toRgb16(); + ushort c = data->solidColor.toRgb16(); while (count--) { ushort *target = ((ushort *)data->rasterBuffer->scanLine(spans->y)) + spans->x; if (spans->coverage == 255) { - QT_MEMFILL_USHORT(target, spans->len, c); + qt_memfill(target, c, spans->len); } else { ushort color = BYTE_MUL_RGB16(c, spans->coverage); int ialpha = 255 - spans->coverage; @@ -4439,7 +4530,7 @@ static void blend_color_rgb16(int count, const QSpan *spans, void *userData) if (mode == QPainter::CompositionMode_SourceOver) { while (count--) { - uint color = BYTE_MUL(data->solid.color.toArgb32(), spans->coverage); + uint color = BYTE_MUL(data->solidColor.toArgb32(), spans->coverage); int ialpha = qAlpha(~color); ushort c = qConvertRgb32To16(color); ushort *target = ((ushort *)data->rasterBuffer->scanLine(spans->y)) + spans->x; @@ -5216,6 +5307,7 @@ void qBlendTexture(int count, const QSpan *spans, void *userData) case QImage::Format_RGBX64: case QImage::Format_RGBA64: case QImage::Format_RGBA64_Premultiplied: + case QImage::Format_Grayscale16: proc = processTextureSpansGeneric64[blendType]; break; case QImage::Format_Invalid: @@ -5228,7 +5320,111 @@ void qBlendTexture(int count, const QSpan *spans, void *userData) proc(count, spans, userData); } -template <class DST> Q_STATIC_TEMPLATE_FUNCTION +static void blend_vertical_gradient_argb(int count, const QSpan *spans, void *userData) +{ + QSpanData *data = reinterpret_cast<QSpanData *>(userData); + + LinearGradientValues linear; + getLinearGradientValues(&linear, data); + + CompositionFunctionSolid funcSolid = + functionForModeSolid[data->rasterBuffer->compositionMode]; + + /* + The logic for vertical gradient calculations is a mathematically + reduced copy of that in fetchLinearGradient() - which is basically: + + qreal ry = data->m22 * (y + 0.5) + data->dy; + qreal t = linear.dy*ry + linear.off; + t *= (GRADIENT_STOPTABLE_SIZE - 1); + quint32 color = + qt_gradient_pixel_fixed(&data->gradient, + int(t * FIXPT_SIZE)); + + This has then been converted to fixed point to improve performance. + */ + const int gss = GRADIENT_STOPTABLE_SIZE - 1; + int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE); + int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE)); + + while (count--) { + int y = spans->y; + int x = spans->x; + + quint32 *dst = (quint32 *)(data->rasterBuffer->scanLine(y)) + x; + quint32 color = + qt_gradient_pixel_fixed(&data->gradient, yinc * y + off); + + funcSolid(dst, spans->len, color, spans->coverage); + ++spans; + } +} + +template<ProcessSpans blend_color> +static void blend_vertical_gradient(int count, const QSpan *spans, void *userData) +{ + QSpanData *data = reinterpret_cast<QSpanData *>(userData); + + LinearGradientValues linear; + getLinearGradientValues(&linear, data); + + // Based on the same logic as blend_vertical_gradient_argb. + + const int gss = GRADIENT_STOPTABLE_SIZE - 1; + int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE); + int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE)); + + while (count--) { + int y = spans->y; + + data->solidColor = qt_gradient_pixel64_fixed(&data->gradient, yinc * y + off); + blend_color(1, spans, userData); + ++spans; + } +} + +void qBlendGradient(int count, const QSpan *spans, void *userData) +{ + QSpanData *data = reinterpret_cast<QSpanData *>(userData); + bool isVerticalGradient = + data->txop <= QTransform::TxScale && + data->type == QSpanData::LinearGradient && + data->gradient.linear.end.x == data->gradient.linear.origin.x; + switch (data->rasterBuffer->format) { + case QImage::Format_RGB16: + if (isVerticalGradient) + return blend_vertical_gradient<blend_color_rgb16>(count, spans, userData); + return blend_src_generic(count, spans, userData); + case QImage::Format_RGB32: + case QImage::Format_ARGB32_Premultiplied: + if (isVerticalGradient) + return blend_vertical_gradient_argb(count, spans, userData); + return blend_src_generic(count, spans, userData); +#if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8) + case QImage::Format_ARGB32: + case QImage::Format_RGBA8888: +#endif + case QImage::Format_BGR30: + case QImage::Format_A2BGR30_Premultiplied: + case QImage::Format_RGB30: + case QImage::Format_A2RGB30_Premultiplied: + case QImage::Format_RGBX64: + case QImage::Format_RGBA64: + case QImage::Format_RGBA64_Premultiplied: + if (isVerticalGradient) + return blend_vertical_gradient<blend_color_generic_rgb64>(count, spans, userData); + return blend_src_generic_rgb64(count, spans, userData); + case QImage::Format_Invalid: + break; + default: + if (isVerticalGradient) + return blend_vertical_gradient<blend_color_generic>(count, spans, userData); + return blend_src_generic(count, spans, userData); + } + Q_UNREACHABLE(); +} + +template <class DST> static inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer, int x, int y, DST color, const uchar *map, @@ -5290,103 +5486,6 @@ inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer, } } -static void qt_gradient_argb32(int count, const QSpan *spans, void *userData) -{ - QSpanData *data = reinterpret_cast<QSpanData *>(userData); - - bool isVerticalGradient = - data->txop <= QTransform::TxScale && - data->type == QSpanData::LinearGradient && - data->gradient.linear.end.x == data->gradient.linear.origin.x; - - if (isVerticalGradient) { - LinearGradientValues linear; - getLinearGradientValues(&linear, data); - - CompositionFunctionSolid funcSolid = - functionForModeSolid[data->rasterBuffer->compositionMode]; - - /* - The logic for vertical gradient calculations is a mathematically - reduced copy of that in fetchLinearGradient() - which is basically: - - qreal ry = data->m22 * (y + 0.5) + data->dy; - qreal t = linear.dy*ry + linear.off; - t *= (GRADIENT_STOPTABLE_SIZE - 1); - quint32 color = - qt_gradient_pixel_fixed(&data->gradient, - int(t * FIXPT_SIZE)); - - This has then been converted to fixed point to improve performance. - */ - const int gss = GRADIENT_STOPTABLE_SIZE - 1; - int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE); - int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE)); - - while (count--) { - int y = spans->y; - int x = spans->x; - - quint32 *dst = (quint32 *)(data->rasterBuffer->scanLine(y)) + x; - quint32 color = - qt_gradient_pixel_fixed(&data->gradient, yinc * y + off); - - funcSolid(dst, spans->len, color, spans->coverage); - ++spans; - } - - } else { - blend_src_generic(count, spans, userData); - } -} - -static void qt_gradient_quint16(int count, const QSpan *spans, void *userData) -{ - QSpanData *data = reinterpret_cast<QSpanData *>(userData); - - bool isVerticalGradient = - data->txop <= QTransform::TxScale && - data->type == QSpanData::LinearGradient && - data->gradient.linear.end.x == data->gradient.linear.origin.x; - - if (isVerticalGradient) { - - LinearGradientValues linear; - getLinearGradientValues(&linear, data); - - /* - The logic for vertical gradient calculations is a mathematically - reduced copy of that in fetchLinearGradient() - which is basically: - - qreal ry = data->m22 * (y + 0.5) + data->dy; - qreal t = linear.dy*ry + linear.off; - t *= (GRADIENT_STOPTABLE_SIZE - 1); - quint32 color = - qt_gradient_pixel_fixed(&data->gradient, - int(t * FIXPT_SIZE)); - - This has then been converted to fixed point to improve performance. - */ - const int gss = GRADIENT_STOPTABLE_SIZE - 1; - int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE); - int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE)); - - // Save the fillData since we overwrite it when setting solid.color. - QGradientData gradient = data->gradient; - while (count--) { - int y = spans->y; - - data->solid.color = QRgba64::fromArgb32(qt_gradient_pixel_fixed(&gradient, yinc * y + off)); - blend_color_rgb16(1, spans, userData); - ++spans; - } - data->gradient = gradient; - - } else { - blend_src_generic(count, spans, userData); - } -} - inline static void qt_bitmapblit_argb32(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 &color, const uchar *map, @@ -6007,29 +6106,25 @@ static void qt_rectfill_quint64(QRasterBuffer *rasterBuffer, DrawHelper qDrawHelper[QImage::NImageFormats] = { // Format_Invalid, - { 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0 }, // Format_Mono, { blend_color_generic, - blend_src_generic, 0, 0, 0, 0 }, // Format_MonoLSB, { blend_color_generic, - blend_src_generic, 0, 0, 0, 0 }, // Format_Indexed8, { blend_color_generic, - blend_src_generic, 0, 0, 0, 0 }, // Format_RGB32, { blend_color_argb, - qt_gradient_argb32, qt_bitmapblit_argb32, qt_alphamapblit_argb32, qt_alphargbblit_argb32, @@ -6038,7 +6133,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_ARGB32, { blend_color_generic, - blend_src_generic, qt_bitmapblit_argb32, qt_alphamapblit_argb32, qt_alphargbblit_argb32, @@ -6047,7 +6141,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_ARGB32_Premultiplied { blend_color_argb, - qt_gradient_argb32, qt_bitmapblit_argb32, qt_alphamapblit_argb32, qt_alphargbblit_argb32, @@ -6056,7 +6149,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGB16 { blend_color_rgb16, - qt_gradient_quint16, qt_bitmapblit_quint16, qt_alphamapblit_quint16, qt_alphargbblit_generic, @@ -6065,7 +6157,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_ARGB8565_Premultiplied { blend_color_generic, - blend_src_generic, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6074,7 +6165,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGB666 { blend_color_generic, - blend_src_generic, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6083,7 +6173,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_ARGB6666_Premultiplied { blend_color_generic, - blend_src_generic, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6092,7 +6181,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGB555 { blend_color_generic, - blend_src_generic, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6101,7 +6189,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_ARGB8555_Premultiplied { blend_color_generic, - blend_src_generic, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6110,7 +6197,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGB888 { blend_color_generic, - blend_src_generic, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6119,7 +6205,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGB444 { blend_color_generic, - blend_src_generic, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6128,7 +6213,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_ARGB4444_Premultiplied { blend_color_generic, - blend_src_generic, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6137,7 +6221,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGBX8888 { blend_color_generic, - blend_src_generic, qt_bitmapblit_rgba8888, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6146,7 +6229,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGBA8888 { blend_color_generic, - blend_src_generic, qt_bitmapblit_rgba8888, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6155,7 +6237,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGB8888_Premultiplied { blend_color_generic, - blend_src_generic, qt_bitmapblit_rgba8888, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6164,7 +6245,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_BGR30 { blend_color_generic_rgb64, - blend_src_generic_rgb64, qt_bitmapblit_rgb30<PixelOrderBGR>, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6173,7 +6253,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_A2BGR30_Premultiplied { blend_color_generic_rgb64, - blend_src_generic_rgb64, qt_bitmapblit_rgb30<PixelOrderBGR>, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6182,7 +6261,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGB30 { blend_color_generic_rgb64, - blend_src_generic_rgb64, qt_bitmapblit_rgb30<PixelOrderRGB>, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6191,7 +6269,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_A2RGB30_Premultiplied { blend_color_generic_rgb64, - blend_src_generic_rgb64, qt_bitmapblit_rgb30<PixelOrderRGB>, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6200,7 +6277,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_Alpha8 { blend_color_generic, - blend_src_generic, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6209,7 +6285,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_Grayscale8 { blend_color_generic, - blend_src_generic, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6218,7 +6293,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGBX64 { blend_color_generic_rgb64, - blend_src_generic_rgb64, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6227,7 +6301,6 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGBA64 { blend_color_generic_rgb64, - blend_src_generic_rgb64, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, @@ -6236,82 +6309,98 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = // Format_RGBA64_Premultiplied { blend_color_generic_rgb64, - blend_src_generic_rgb64, 0, qt_alphamapblit_generic, qt_alphargbblit_generic, qt_rectfill_quint64 }, + // Format_Grayscale16 + { + blend_color_generic_rgb64, + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + qt_rectfill_quint16 + }, }; -#if defined(Q_CC_MSVC) && !defined(_MIPS_) -template <class T> -inline void qt_memfill_template(T *dest, T color, int count) +#if !defined(__SSE2__) +void qt_memfill64(quint64 *dest, quint64 color, qsizetype count) { - while (count--) - *dest++ = color; + qt_memfill_template<quint64>(dest, color, count); } +#endif -#else - -template <class T> -inline void qt_memfill_template(T *dest, T color, int count) +#if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && !defined(Q_CC_CLANG) +__attribute__((optimize("no-tree-vectorize"))) +#endif +void qt_memfill24(quint24 *dest, quint24 color, qsizetype count) { - int n = (count + 7) / 8; - switch (count & 0x07) - { - case 0: do { *dest++ = color; Q_FALLTHROUGH(); - case 7: *dest++ = color; Q_FALLTHROUGH(); - case 6: *dest++ = color; Q_FALLTHROUGH(); - case 5: *dest++ = color; Q_FALLTHROUGH(); - case 4: *dest++ = color; Q_FALLTHROUGH(); - case 3: *dest++ = color; Q_FALLTHROUGH(); - case 2: *dest++ = color; Q_FALLTHROUGH(); - case 1: *dest++ = color; - } while (--n > 0); - } -} +# ifdef QT_COMPILER_SUPPORTS_SSSE3 + extern void qt_memfill24_ssse3(quint24 *, quint24, qsizetype); + if (qCpuHasFeature(SSSE3)) + return qt_memfill24_ssse3(dest, color, count); +# endif -template <> -inline void qt_memfill_template(quint16 *dest, quint16 value, int count) -{ - if (count < 3) { - switch (count) { - case 2: *dest++ = value; Q_FALLTHROUGH(); - case 1: *dest = value; - } + const quint32 v = color; + quint24 *end = dest + count; + + // prolog: align dest to 32bit + while ((quintptr(dest) & 0x3) && dest < end) { + *dest++ = v; + } + if (dest >= end) return; + + const uint val1 = qFromBigEndian((v << 8) | (v >> 16)); + const uint val2 = qFromBigEndian((v << 16) | (v >> 8)); + const uint val3 = qFromBigEndian((v << 24) | (v >> 0)); + + for ( ; dest <= (end - 4); dest += 4) { + quint32 *dst = reinterpret_cast<quint32 *>(dest); + dst[0] = val1; + dst[1] = val2; + dst[2] = val3; } - const int align = (quintptr)(dest) & 0x3; - switch (align) { - case 2: *dest++ = value; --count; + // less than 4px left + switch (end - dest) { + case 3: + *dest++ = v; + Q_FALLTHROUGH(); + case 2: + *dest++ = v; + Q_FALLTHROUGH(); + case 1: + *dest++ = v; + } +} + +void qt_memfill16(quint16 *dest, quint16 value, qsizetype count) +{ + const int align = quintptr(dest) & 0x3; + if (align) { + *dest++ = value; + --count; } - const quint32 value32 = (value << 16) | value; - qt_memfill(reinterpret_cast<quint32*>(dest), value32, count / 2); if (count & 0x1) dest[count - 1] = value; -} -#endif -void qt_memfill64(quint64 *dest, quint64 color, int count) -{ - qt_memfill_template<quint64>(dest, color, count); + const quint32 value32 = (value << 16) | value; + qt_memfill32(reinterpret_cast<quint32*>(dest), value32, count / 2); } -#if !defined(__SSE2__) -void qt_memfill16(quint16 *dest, quint16 color, int count) -{ - qt_memfill_template<quint16>(dest, color, count); -} -#endif #if !defined(__SSE2__) && !defined(__ARM_NEON__) && !defined(__MIPS_DSP__) -void qt_memfill32(quint32 *dest, quint32 color, int count) +void qt_memfill32(quint32 *dest, quint32 color, qsizetype count) { qt_memfill_template<quint32>(dest, color, count); } #endif +#ifdef __SSE2__ +decltype(qt_memfill32_sse2) *qt_memfill32 = nullptr; +decltype(qt_memfill64_sse2) *qt_memfill64 = nullptr; +#endif #ifdef QT_COMPILER_SUPPORTS_SSE4_1 template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); @@ -6325,6 +6414,10 @@ static void qInitDrawhelperFunctions() qInitBlendFunctions(); #ifdef __SSE2__ +# ifndef __AVX2__ + qt_memfill32 = qt_memfill32_sse2; + qt_memfill64 = qt_memfill64_sse2; +# endif qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2; qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2; qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2; @@ -6401,6 +6494,14 @@ static void qInitDrawhelperFunctions() const QVector<QRgb> *, QDitherInfo *); extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, @@ -6413,10 +6514,18 @@ static void qInitDrawhelperFunctions() const QVector<QRgb> *, QDitherInfo *); extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length); extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length); +# ifndef __AVX2__ qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4; qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; + qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_sse4; + qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_sse4; + qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4; + qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4; + qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4; + qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4; +# endif qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4; @@ -6431,6 +6540,8 @@ static void qInitDrawhelperFunctions() #if defined(QT_COMPILER_SUPPORTS_AVX2) if (qCpuHasFeature(ArchHaswell)) { + qt_memfill32 = qt_memfill32_avx2; + qt_memfill64 = qt_memfill64_avx2; extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha); @@ -6470,6 +6581,26 @@ static void qInitDrawhelperFunctions() bilinearFastTransformHelperARGB32PM[0][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2; bilinearFastTransformHelperARGB32PM[0][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2; bilinearFastTransformHelperARGB32PM[0][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2; + + extern void QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, int count, const QVector<QRgb> *); + extern void QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, int count, const QVector<QRgb> *); + extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_avx2; + qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2; + qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_avx2; + qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2; + + extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 *, const uint *, int, const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uint *, int count, const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QVector<QRgb> *, QDitherInfo *); + qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_avx2; + qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_avx2; + qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_avx2; + qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_avx2; } #endif @@ -6505,6 +6636,14 @@ static void qInitDrawhelperFunctions() const QVector<QRgb> *, QDitherInfo *); extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); extern void QT_FASTCALL storeARGB32FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); extern void QT_FASTCALL storeRGBA8888FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count, @@ -6514,10 +6653,16 @@ static void qInitDrawhelperFunctions() qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_neon; qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon; qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_neon; + qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_neon; + qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_neon; qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_neon; qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon; qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_neon; + qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon; + qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon; qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_neon; + qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon; + qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon; #endif #if defined(ENABLE_PIXMAN_DRAWHELPERS) diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp index ec6643deed..8c69e21569 100644 --- a/src/gui/painting/qdrawhelper_avx2.cpp +++ b/src/gui/painting/qdrawhelper_avx2.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2018 The Qt Company Ltd. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtGui module of the Qt Toolkit. @@ -38,6 +39,7 @@ ****************************************************************************/ #include "qdrawhelper_p.h" +#include "qdrawhelper_x86_p.h" #include "qdrawingprimitive_sse2_p.h" #include "qrgba64_p.h" @@ -53,7 +55,8 @@ enum { // Vectorized blend functions: // See BYTE_MUL_SSE2 for details. -inline static void BYTE_MUL_AVX2(__m256i &pixelVector, const __m256i &alphaChannel, const __m256i &colorMask, const __m256i &half) +inline static void Q_DECL_VECTORCALL +BYTE_MUL_AVX2(__m256i &pixelVector, __m256i alphaChannel, __m256i colorMask, __m256i half) { __m256i pixelVectorAG = _mm256_srli_epi16(pixelVector, 8); __m256i pixelVectorRB = _mm256_and_si256(pixelVector, colorMask); @@ -72,7 +75,8 @@ inline static void BYTE_MUL_AVX2(__m256i &pixelVector, const __m256i &alphaChann pixelVector = _mm256_or_si256(pixelVectorAG, pixelVectorRB); } -inline static void BYTE_MUL_RGB64_AVX2(__m256i &pixelVector, const __m256i &alphaChannel, const __m256i &colorMask, const __m256i &half) +inline static void Q_DECL_VECTORCALL +BYTE_MUL_RGB64_AVX2(__m256i &pixelVector, __m256i alphaChannel, __m256i colorMask, __m256i half) { __m256i pixelVectorAG = _mm256_srli_epi32(pixelVector, 16); __m256i pixelVectorRB = _mm256_and_si256(pixelVector, colorMask); @@ -92,7 +96,8 @@ inline static void BYTE_MUL_RGB64_AVX2(__m256i &pixelVector, const __m256i &alph } // See INTERPOLATE_PIXEL_255_SSE2 for details. -inline static void INTERPOLATE_PIXEL_255_AVX2(const __m256i &srcVector, __m256i &dstVector, const __m256i &alphaChannel, const __m256i &oneMinusAlphaChannel, const __m256i &colorMask, const __m256i &half) +inline static void Q_DECL_VECTORCALL +INTERPOLATE_PIXEL_255_AVX2(__m256i srcVector, __m256i &dstVector, __m256i alphaChannel, __m256i oneMinusAlphaChannel, __m256i colorMask, __m256i half) { const __m256i srcVectorAG = _mm256_srli_epi16(srcVector, 8); const __m256i dstVectorAG = _mm256_srli_epi16(dstVector, 8); @@ -114,7 +119,8 @@ inline static void INTERPOLATE_PIXEL_255_AVX2(const __m256i &srcVector, __m256i dstVector = _mm256_or_si256(finalAG, finalRB); } -inline static void INTERPOLATE_PIXEL_RGB64_AVX2(const __m256i &srcVector, __m256i &dstVector, const __m256i &alphaChannel, const __m256i &oneMinusAlphaChannel, const __m256i &colorMask, const __m256i &half) +inline static void Q_DECL_VECTORCALL +INTERPOLATE_PIXEL_RGB64_AVX2(__m256i srcVector, __m256i &dstVector, __m256i alphaChannel, __m256i oneMinusAlphaChannel, __m256i colorMask, __m256i half) { const __m256i srcVectorAG = _mm256_srli_epi32(srcVector, 16); const __m256i dstVectorAG = _mm256_srli_epi32(dstVector, 16); @@ -138,7 +144,7 @@ inline static void INTERPOLATE_PIXEL_RGB64_AVX2(const __m256i &srcVector, __m256 // See BLEND_SOURCE_OVER_ARGB32_SSE2 for details. -inline static void BLEND_SOURCE_OVER_ARGB32_AVX2(quint32 *dst, const quint32 *src, const int length) +inline static void Q_DECL_VECTORCALL BLEND_SOURCE_OVER_ARGB32_AVX2(quint32 *dst, const quint32 *src, const int length) { const __m256i half = _mm256_set1_epi16(0x80); const __m256i one = _mm256_set1_epi16(0xff); @@ -209,7 +215,8 @@ inline static void BLEND_SOURCE_OVER_ARGB32_AVX2(quint32 *dst, const quint32 *sr // See BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2 for details. -inline static void BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_AVX2(quint32 *dst, const quint32 *src, const int length, const int const_alpha) +inline static void Q_DECL_VECTORCALL +BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_AVX2(quint32 *dst, const quint32 *src, const int length, const int const_alpha) { int x = 0; @@ -316,6 +323,66 @@ void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl, } } +static Q_NEVER_INLINE +void Q_DECL_VECTORCALL qt_memfillXX_avx2(uchar *dest, __m256i value256, qsizetype bytes) +{ + __m128i value128 = _mm256_castsi256_si128(value256); + + // main body + __m256i *dst256 = reinterpret_cast<__m256i *>(dest); + uchar *end = dest + bytes; + while (reinterpret_cast<uchar *>(dst256 + 4) <= end) { + _mm256_storeu_si256(dst256 + 0, value256); + _mm256_storeu_si256(dst256 + 1, value256); + _mm256_storeu_si256(dst256 + 2, value256); + _mm256_storeu_si256(dst256 + 3, value256); + dst256 += 4; + } + + // first epilogue: fewer than 128 bytes / 32 entries + bytes = end - reinterpret_cast<uchar *>(dst256); + switch (bytes / sizeof(value256)) { + case 3: _mm256_storeu_si256(dst256++, value256); Q_FALLTHROUGH(); + case 2: _mm256_storeu_si256(dst256++, value256); Q_FALLTHROUGH(); + case 1: _mm256_storeu_si256(dst256++, value256); + } + + // second epilogue: fewer than 32 bytes + __m128i *dst128 = reinterpret_cast<__m128i *>(dst256); + if (bytes & sizeof(value128)) + _mm_storeu_si128(dst128++, value128); + + // third epilogue: fewer than 16 bytes + if (bytes & 8) + _mm_storel_epi64(reinterpret_cast<__m128i *>(end - 8), value128); +} + +void qt_memfill64_avx2(quint64 *dest, quint64 value, qsizetype count) +{ +#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL) + // work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80820 + __m128i value64 = _mm_set_epi64x(0, value); // _mm_cvtsi64_si128(value); +# ifdef Q_PROCESSOR_X86_64 + asm ("" : "+x" (value64)); +# endif + __m256i value256 = _mm256_broadcastq_epi64(value64); +#else + __m256i value256 = _mm256_set1_epi64x(value); +#endif + + qt_memfillXX_avx2(reinterpret_cast<uchar *>(dest), value256, count * sizeof(quint64)); +} + +void qt_memfill32_avx2(quint32 *dest, quint32 value, qsizetype count) +{ + if (count % 2) { + // odd number of pixels, round to even + *dest++ = value; + --count; + } + qt_memfillXX_avx2(reinterpret_cast<uchar *>(dest), _mm256_set1_epi32(value), count * sizeof(quint32)); +} + void QT_FASTCALL comp_func_SourceOver_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) { Q_ASSERT(const_alpha < 256); @@ -928,6 +995,237 @@ void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint * } } +static inline __m128i maskFromCount(qsizetype count) +{ + Q_ASSERT(count > 0); + static const qint64 data[] = { -1, -1, 0, 0 }; + auto ptr = reinterpret_cast<const quint8 *>(data) + sizeof(__m128i); + + if (count > int(sizeof(__m128i))) + return _mm_set1_epi8(-1); + + return _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr - count)); +} + +template<bool RGBA> +static void convertARGBToARGB32PM_avx2(uint *buffer, const uint *src, qsizetype count) +{ + qsizetype i = 0; + const __m256i alphaMask = _mm256_set1_epi32(0xff000000); + const __m256i rgbaMask = _mm256_broadcastsi128_si256(_mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15)); + const __m256i shuffleMask = _mm256_broadcastsi128_si256(_mm_setr_epi8(6, 7, 6, 7, 6, 7, 6, 7, 14, 15, 14, 15, 14, 15, 14, 15)); + const __m256i half = _mm256_set1_epi16(0x0080); + const __m256i zero = _mm256_setzero_si256(); + + for (; i < count - 7; i += 8) { + __m256i srcVector = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + i)); + if (!_mm256_testz_si256(srcVector, alphaMask)) { + // keep the two _mm_test[zc]_siXXX next to each other + bool cf = _mm256_testc_si256(srcVector, alphaMask); + if (RGBA) + srcVector = _mm256_shuffle_epi8(srcVector, rgbaMask); + if (!cf) { + __m256i src1 = _mm256_unpacklo_epi8(srcVector, zero); + __m256i src2 = _mm256_unpackhi_epi8(srcVector, zero); + __m256i alpha1 = _mm256_shuffle_epi8(src1, shuffleMask); + __m256i alpha2 = _mm256_shuffle_epi8(src2, shuffleMask); + src1 = _mm256_mullo_epi16(src1, alpha1); + src2 = _mm256_mullo_epi16(src2, alpha2); + src1 = _mm256_add_epi16(src1, _mm256_srli_epi16(src1, 8)); + src2 = _mm256_add_epi16(src2, _mm256_srli_epi16(src2, 8)); + src1 = _mm256_add_epi16(src1, half); + src2 = _mm256_add_epi16(src2, half); + src1 = _mm256_srli_epi16(src1, 8); + src2 = _mm256_srli_epi16(src2, 8); + src1 = _mm256_blend_epi16(src1, alpha1, 0x88); + src2 = _mm256_blend_epi16(src2, alpha2, 0x88); + srcVector = _mm256_packus_epi16(src1, src2); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + i), srcVector); + } else { + if (buffer != src || RGBA) + _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + i), srcVector); + } + } else { + _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + i), zero); + } + } + + for ( ; i < count; i += 4) { + __m128i maskedAlphaMask = _mm256_castsi256_si128(alphaMask); + __m128i mask = maskFromCount((count - i) * sizeof(*src)); + maskedAlphaMask = _mm_and_si128(mask, maskedAlphaMask); + __m128i srcVector = _mm_maskload_epi32(reinterpret_cast<const int *>(src), mask); + + if (!_mm_testz_si128(srcVector, maskedAlphaMask)) { + // keep the two _mm_test[zc]_siXXX next to each other + bool cf = _mm_testc_si128(srcVector, maskedAlphaMask); + if (RGBA) + srcVector = _mm_shuffle_epi8(srcVector, _mm256_castsi256_si128(rgbaMask)); + if (!cf) { + __m128i src1 = _mm_unpacklo_epi8(srcVector, _mm256_castsi256_si128(zero)); + __m128i src2 = _mm_unpackhi_epi8(srcVector, _mm256_castsi256_si128(zero)); + __m128i alpha1 = _mm_shuffle_epi8(src1, _mm256_castsi256_si128(shuffleMask)); + __m128i alpha2 = _mm_shuffle_epi8(src2, _mm256_castsi256_si128(shuffleMask)); + src1 = _mm_mullo_epi16(src1, alpha1); + src2 = _mm_mullo_epi16(src2, alpha2); + src1 = _mm_add_epi16(src1, _mm_srli_epi16(src1, 8)); + src2 = _mm_add_epi16(src2, _mm_srli_epi16(src2, 8)); + src1 = _mm_add_epi16(src1, _mm256_castsi256_si128(half)); + src2 = _mm_add_epi16(src2, _mm256_castsi256_si128(half)); + src1 = _mm_srli_epi16(src1, 8); + src2 = _mm_srli_epi16(src2, 8); + src1 = _mm_blend_epi16(src1, alpha1, 0x88); + src2 = _mm_blend_epi16(src2, alpha2, 0x88); + srcVector = _mm_packus_epi16(src1, src2); + _mm_maskstore_epi32(reinterpret_cast<int *>(buffer + i), mask, srcVector); + } else { + if (buffer != src || RGBA) + _mm_maskstore_epi32(reinterpret_cast<int *>(buffer + i), mask, srcVector); + } + } else { + _mm_maskstore_epi32(reinterpret_cast<int *>(buffer + i), mask, _mm256_castsi256_si128(zero)); + } + } +} + +void QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, int count, const QVector<QRgb> *) +{ + convertARGBToARGB32PM_avx2<false>(buffer, buffer, count); +} + +void QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, int count, const QVector<QRgb> *) +{ + convertARGBToARGB32PM_avx2<true>(buffer, buffer, count); +} + +const uint *QT_FASTCALL fetchARGB32ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToARGB32PM_avx2<false>(buffer, reinterpret_cast<const uint *>(src) + index, count); + return buffer; +} + +const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToARGB32PM_avx2<true>(buffer, reinterpret_cast<const uint *>(src) + index, count); + return buffer; +} + +template<bool RGBA> +static void convertARGBToRGBA64PM_avx2(QRgba64 *buffer, const uint *src, qsizetype count) +{ + qsizetype i = 0; + const __m256i alphaMask = _mm256_broadcastsi128_si256(_mm_set1_epi32(0xff000000)); + const __m256i rgbaMask = _mm256_broadcastsi128_si256(_mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15)); + const __m256i shuffleMask = _mm256_broadcastsi128_si256(_mm_setr_epi8(6, 7, 6, 7, 6, 7, 6, 7, 14, 15, 14, 15, 14, 15, 14, 15)); + const __m256i zero = _mm256_setzero_si256(); + + for (; i < count - 7; i += 8) { + __m256i src1, src2; + __m256i srcVector = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + i)); + if (!_mm256_testz_si256(srcVector, alphaMask)) { + // keep the two _mm_test[zc]_siXXX next to each other + bool cf = _mm256_testc_si256(srcVector, alphaMask); + if (!RGBA) + srcVector = _mm256_shuffle_epi8(srcVector, rgbaMask); + + // The two unpack instructions unpack the low and upper halves of + // each 128-bit half of the 256-bit register. Here's the tracking + // of what's where: (p is 32-bit, P is 64-bit) + // as loaded: [ p1, p2, p3, p4; p5, p6, p7, p8 ] + // after permute4x64 [ p1, p2, p5, p6; p3, p4, p7, p8 ] + // after unpacklo/hi [ P1, P2; P3, P4 ] [ P5, P6; P7, P8 ] + srcVector = _mm256_permute4x64_epi64(srcVector, _MM_SHUFFLE(3, 1, 2, 0)); + + if (!cf) { + src1 = _mm256_unpacklo_epi8(srcVector, zero); + src2 = _mm256_unpackhi_epi8(srcVector, zero); + __m256i alpha1 = _mm256_shuffle_epi8(src1, shuffleMask); + __m256i alpha2 = _mm256_shuffle_epi8(src2, shuffleMask); + src1 = _mm256_mullo_epi16(src1, alpha1); + src2 = _mm256_mullo_epi16(src2, alpha2); + alpha1 = _mm256_unpacklo_epi8(srcVector, srcVector); + alpha2 = _mm256_unpackhi_epi8(srcVector, srcVector); + src1 = _mm256_add_epi16(src1, _mm256_srli_epi16(src1, 7)); + src2 = _mm256_add_epi16(src2, _mm256_srli_epi16(src2, 7)); + src1 = _mm256_blend_epi16(src1, alpha1, 0x88); + src2 = _mm256_blend_epi16(src2, alpha2, 0x88); + } else { + src1 = _mm256_unpacklo_epi8(srcVector, srcVector); + src2 = _mm256_unpackhi_epi8(srcVector, srcVector); + } + } else { + src1 = src2 = zero; + } + _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + i), src1); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + i) + 1, src2); + } + + for ( ; i < count; i += 4) { + __m128i maskedAlphaMask = _mm256_castsi256_si128(alphaMask); + __m128i mask = maskFromCount((count - i) * sizeof(*src)); + maskedAlphaMask = _mm_and_si128(mask, maskedAlphaMask); + __m128i srcVector = _mm_maskload_epi32(reinterpret_cast<const int *>(src), mask); + __m256i src; + + if (!_mm_testz_si128(srcVector, maskedAlphaMask)) { + // keep the two _mm_test[zc]_siXXX next to each other + bool cf = _mm_testc_si128(srcVector, maskedAlphaMask); + if (!RGBA) + srcVector = _mm_shuffle_epi8(srcVector, _mm256_castsi256_si128(rgbaMask)); + if (!cf) { + src = _mm256_cvtepu8_epi16(srcVector); + __m256i alpha = _mm256_shuffle_epi8(src, shuffleMask); + src = _mm256_mullo_epi16(src, alpha); + + __m128i alpha1 = _mm_unpacklo_epi8(srcVector, srcVector); + __m128i alpha2 = _mm_unpackhi_epi8(srcVector, srcVector); + alpha = _mm256_inserti128_si256(_mm256_castsi128_si256(alpha1), alpha2, 1); + src = _mm256_add_epi16(src, _mm256_srli_epi16(src, 7)); + src = _mm256_blend_epi16(src, alpha, 0x88); + } else { + const __m128i src1 = _mm_unpacklo_epi8(srcVector, srcVector); + const __m128i src2 = _mm_unpackhi_epi8(srcVector, srcVector); + src = _mm256_castsi128_si256(src1); + src = _mm256_inserti128_si256(src, src2, 1); + } + } else { + src = zero; + } + __m256i xmask = _mm256_cvtepi32_epi64(mask); + _mm256_maskstore_epi64(reinterpret_cast<qint64 *>(buffer + i), xmask, src); + }; +} + +const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToRGBA64PM_avx2<false>(buffer, src, count); + return buffer; +} + +const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToRGBA64PM_avx2<true>(buffer, src, count); + return buffer; +} + +const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_avx2(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToRGBA64PM_avx2<false>(buffer, reinterpret_cast<const uint *>(src) + index, count); + return buffer; +} + +const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToRGBA64PM_avx2<true>(buffer, reinterpret_cast<const uint *>(src) + index, count); + return buffer; +} + QT_END_NAMESPACE #endif diff --git a/src/gui/painting/qdrawhelper_mips_dsp.cpp b/src/gui/painting/qdrawhelper_mips_dsp.cpp index e92a6606de..17597deb1d 100644 --- a/src/gui/painting/qdrawhelper_mips_dsp.cpp +++ b/src/gui/painting/qdrawhelper_mips_dsp.cpp @@ -43,7 +43,7 @@ QT_BEGIN_NAMESPACE -void qt_memfill32(quint32 *dest, quint32 color, int count) +void qt_memfill32(quint32 *dest, quint32 color, qsizetype count) { qt_memfill32_asm_mips_dsp(dest, color, count); } diff --git a/src/gui/painting/qdrawhelper_mips_dsp_p.h b/src/gui/painting/qdrawhelper_mips_dsp_p.h index 36c4af2732..a3d0410274 100644 --- a/src/gui/painting/qdrawhelper_mips_dsp_p.h +++ b/src/gui/painting/qdrawhelper_mips_dsp_p.h @@ -57,7 +57,7 @@ QT_BEGIN_NAMESPACE #if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) -extern "C" void qt_memfill32_asm_mips_dsp(quint32 *dest, quint32 value, int count); +extern "C" void qt_memfill32_asm_mips_dsp(quint32 *dest, quint32 value, qsizetype count); extern "C" void comp_func_SourceOver_asm_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha); diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index 629dfe2358..3fcbcfd053 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -47,7 +47,7 @@ QT_BEGIN_NAMESPACE -void qt_memfill32(quint32 *dest, quint32 value, int count) +void qt_memfill32(quint32 *dest, quint32 value, qsizetype count) { const int epilogueSize = count % 16; #if !defined(Q_PROCESSOR_ARM_64) @@ -84,20 +84,20 @@ void qt_memfill32(quint32 *dest, quint32 value, int count) switch (epilogueSize) { - case 15: *dest++ = value; - case 14: *dest++ = value; - case 13: *dest++ = value; - case 12: *dest++ = value; - case 11: *dest++ = value; - case 10: *dest++ = value; - case 9: *dest++ = value; - case 8: *dest++ = value; - case 7: *dest++ = value; - case 6: *dest++ = value; - case 5: *dest++ = value; - case 4: *dest++ = value; - case 3: *dest++ = value; - case 2: *dest++ = value; + case 15: *dest++ = value; Q_FALLTHROUGH(); + case 14: *dest++ = value; Q_FALLTHROUGH(); + case 13: *dest++ = value; Q_FALLTHROUGH(); + case 12: *dest++ = value; Q_FALLTHROUGH(); + case 11: *dest++ = value; Q_FALLTHROUGH(); + case 10: *dest++ = value; Q_FALLTHROUGH(); + case 9: *dest++ = value; Q_FALLTHROUGH(); + case 8: *dest++ = value; Q_FALLTHROUGH(); + case 7: *dest++ = value; Q_FALLTHROUGH(); + case 6: *dest++ = value; Q_FALLTHROUGH(); + case 5: *dest++ = value; Q_FALLTHROUGH(); + case 4: *dest++ = value; Q_FALLTHROUGH(); + case 3: *dest++ = value; Q_FALLTHROUGH(); + case 2: *dest++ = value; Q_FALLTHROUGH(); case 1: *dest++ = value; } } @@ -791,7 +791,7 @@ void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha) { if ((const_alpha & qAlpha(color)) == 255) { - QT_MEMFILL_UINT(destPixels, length, color); + qt_memfill32(destPixels, color, length); } else { if (const_alpha != 255) color = BYTE_MUL(color, const_alpha); @@ -1149,6 +1149,72 @@ static inline void convertARGBToARGB32PM_neon(uint *buffer, const uint *src, int } } +template<bool RGBA> +static inline void convertARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count) +{ + if (count <= 0) + return; + + const uint8x8_t shuffleMask = { 3, 3, 3, 3, 7, 7, 7, 7}; + const uint64x2_t blendMask = vdupq_n_u64(Q_UINT64_C(0xffff000000000000)); + + int i = 0; + for (; i < count-3; i += 4) { + uint32x4_t vs32 = vld1q_u32(src + i); + uint32x4_t alphaVector = vshrq_n_u32(vs32, 24); +#if defined(Q_PROCESSOR_ARM_64) + uint32_t alphaSum = vaddvq_u32(alphaVector); +#else + // no vaddvq_u32 + uint32x2_t tmp = vpadd_u32(vget_low_u32(alphaVector), vget_high_u32(alphaVector)); + uint32_t alphaSum = vget_lane_u32(vpadd_u32(tmp, tmp), 0); +#endif + if (alphaSum) { + if (!RGBA) + vs32 = vrgba2argb(vs32); + const uint8x16_t vs8 = vreinterpretq_u8_u32(vs32); + const uint8x16x2_t v = vzipq_u8(vs8, vs8); + if (alphaSum != 255 * 4) { + const uint8x8_t s1 = vreinterpret_u8_u32(vget_low_u32(vs32)); + const uint8x8_t s2 = vreinterpret_u8_u32(vget_high_u32(vs32)); + const uint8x8_t alpha1 = vtbl1_u8(s1, shuffleMask); + const uint8x8_t alpha2 = vtbl1_u8(s2, shuffleMask); + uint16x8_t src1 = vmull_u8(s1, alpha1); + uint16x8_t src2 = vmull_u8(s2, alpha2); + // convert from 0->(255x255) to 0->(255x257) + src1 = vsraq_n_u16(src1, src1, 7); + src2 = vsraq_n_u16(src2, src2, 7); + + // now restore alpha from the trivial conversion + const uint64x2_t d1 = vbslq_u64(blendMask, vreinterpretq_u64_u8(v.val[0]), vreinterpretq_u64_u16(src1)); + const uint64x2_t d2 = vbslq_u64(blendMask, vreinterpretq_u64_u8(v.val[1]), vreinterpretq_u64_u16(src2)); + + vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u64(d1)); + buffer += 2; + vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u64(d2)); + buffer += 2; + } else { + vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[0])); + buffer += 2; + vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[1])); + buffer += 2; + } + } else { + vst1q_u16((uint16_t *)buffer, vdupq_n_u16(0)); + buffer += 2; + vst1q_u16((uint16_t *)buffer, vdupq_n_u16(0)); + buffer += 2; + } + } + + SIMD_EPILOGUE(i, count, 3) { + uint s = src[i]; + if (RGBA) + s = RGBA2ARGB(s); + *buffer++ = QRgba64::fromArgb32(s).premultiplied(); + } +} + static inline float32x4_t reciprocal_mul_ps(float32x4_t a, float mul) { float32x4_t ia = vrecpeq_f32(a); // estimate 1/a @@ -1258,6 +1324,34 @@ const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_neon(uint *buffer, const uchar * return buffer; } +const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGB32ToRGBA64PM_neon<false>(buffer, src, count); + return buffer; +} + +const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGB32ToRGBA64PM_neon<true>(buffer, src, count); + return buffer; +} + +const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGB32ToRGBA64PM_neon<false>(buffer, reinterpret_cast<const uint *>(src) + index, count); + return buffer; +} + +const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGB32ToRGBA64PM_neon<true>(buffer, reinterpret_cast<const uint *>(src) + index, count); + return buffer; +} + void QT_FASTCALL storeRGB32FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *) { diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index 40475a9bde..19e1f21a3b 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -123,7 +123,7 @@ void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, const QTransform &targetRectTransform, int const_alpha); -void qt_memfill32_neon(quint32 *dest, quint32 value, int count); +void qt_memfill32_neon(quint32 *dest, quint32 value, qsizetype count); void qt_memrotate90_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index fb08261205..37108949d6 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -69,14 +69,18 @@ QT_BEGIN_NAMESPACE #if defined(Q_CC_GNU) -# define Q_STATIC_TEMPLATE_FUNCTION static # define Q_DECL_RESTRICT __restrict__ +# if defined(Q_PROCESSOR_X86_32) && defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL) +# define Q_DECL_VECTORCALL __attribute__((sseregparm,regparm(3))) +# else +# define Q_DECL_VECTORCALL +# endif #elif defined(Q_CC_MSVC) -# define Q_STATIC_TEMPLATE_FUNCTION static # define Q_DECL_RESTRICT __restrict +# define Q_DECL_VECTORCALL __vectorcall #else -# define Q_STATIC_TEMPLATE_FUNCTION static # define Q_DECL_RESTRICT +# define Q_DECL_VECTORCALL #endif static const uint AMASK = 0xff000000; @@ -149,7 +153,6 @@ typedef void (*MemRotateFunc)(const uchar *srcPixels, int w, int h, int sbpl, uc struct DrawHelper { ProcessSpans blendColor; - ProcessSpans blendGradient; BitmapBlitFunc bitmapBlit; AlphamapBlitFunc alphamapBlit; AlphaRGBBlitFunc alphaRGBBlit; @@ -162,10 +165,33 @@ extern SrcOverTransformFunc qTransformFunctions[QImage::NImageFormats][QImage::N extern DrawHelper qDrawHelper[QImage::NImageFormats]; +struct quint24 { + quint24() = default; + quint24(uint value) + { + data[0] = uchar(value >> 16); + data[1] = uchar(value >> 8); + data[2] = uchar(value); + } + operator uint() const + { + return data[2] | (data[1] << 8) | (data[0] << 16); + } + + uchar data[3]; +}; + +void qBlendGradient(int count, const QSpan *spans, void *userData); void qBlendTexture(int count, const QSpan *spans, void *userData); -extern void qt_memfill64(quint64 *dest, quint64 value, int count); -extern void qt_memfill32(quint32 *dest, quint32 value, int count); -extern void qt_memfill16(quint16 *dest, quint16 value, int count); +#ifdef __SSE2__ +extern void (*qt_memfill64)(quint64 *dest, quint64 value, qsizetype count); +extern void (*qt_memfill32)(quint32 *dest, quint32 value, qsizetype count); +#else +extern void qt_memfill64(quint64 *dest, quint64 value, qsizetype count); +extern void qt_memfill32(quint32 *dest, quint32 value, qsizetype count); +#endif +extern void qt_memfill24(quint24 *dest, quint24 value, qsizetype count); +extern void qt_memfill16(quint16 *dest, quint16 value, qsizetype count); typedef void (QT_FASTCALL *CompositionFunction)(uint *Q_DECL_RESTRICT dest, const uint *Q_DECL_RESTRICT src, int length, uint const_alpha); typedef void (QT_FASTCALL *CompositionFunction64)(QRgba64 *Q_DECL_RESTRICT dest, const QRgba64 *Q_DECL_RESTRICT src, int length, uint const_alpha); @@ -222,11 +248,6 @@ struct Operator class QRasterPaintEngine; -struct QSolidData -{ - QRgba64 color; -}; - struct QLinearGradientData { struct { @@ -331,8 +352,8 @@ struct QSpanData int fast_matrix : 1; bool bilinear; QImage *tempImage; + QRgba64 solidColor; union { - QSolidData solid; QGradientData gradient; QTextureData texture; }; @@ -858,6 +879,8 @@ static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16_32(uint x, uint a) { // qt_div_255 is a fast rounded division by 255 using an approximation that is accurate for all positive 16-bit integers static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE int qt_div_255(int x) { return (x + (x>>8) + 0x80) >> 8; } +static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_257_floor(uint x) { return (x - (x >> 8)) >> 8; } +static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_257(uint x) { return qt_div_257_floor(x + 128); } static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_65535(uint x) { return (x + (x>>16) + 0x8000U) >> 16; } static Q_ALWAYS_INLINE uint qAlphaRgb30(uint c) @@ -868,76 +891,63 @@ static Q_ALWAYS_INLINE uint qAlphaRgb30(uint c) return a; } -struct quint24 { - quint24() = default; - quint24(uint value); - operator uint() const; - uchar data[3]; -}; - -inline quint24::quint24(uint value) +template <class T> inline void qt_memfill_template(T *dest, T color, qsizetype count) { - data[0] = uchar(value >> 16); - data[1] = uchar(value >> 8); - data[2] = uchar(value); + if (!count) + return; + + qsizetype n = (count + 7) / 8; + switch (count & 0x07) + { + case 0: do { *dest++ = color; Q_FALLTHROUGH(); + case 7: *dest++ = color; Q_FALLTHROUGH(); + case 6: *dest++ = color; Q_FALLTHROUGH(); + case 5: *dest++ = color; Q_FALLTHROUGH(); + case 4: *dest++ = color; Q_FALLTHROUGH(); + case 3: *dest++ = color; Q_FALLTHROUGH(); + case 2: *dest++ = color; Q_FALLTHROUGH(); + case 1: *dest++ = color; + } while (--n > 0); + } } -inline quint24::operator uint() const +template <class T> inline void qt_memfill(T *dest, T value, qsizetype count) { - return data[2] | (data[1] << 8) | (data[0] << 16); + qt_memfill_template(dest, value, count); } -template <class T> Q_STATIC_TEMPLATE_FUNCTION -void qt_memfill(T *dest, T value, int count); - -template<> inline void qt_memfill(quint64 *dest, quint64 color, int count) +template<> inline void qt_memfill(quint64 *dest, quint64 color, qsizetype count) { qt_memfill64(dest, color, count); } -template<> inline void qt_memfill(quint32 *dest, quint32 color, int count) +template<> inline void qt_memfill(quint32 *dest, quint32 color, qsizetype count) { qt_memfill32(dest, color, count); } -template<> inline void qt_memfill(quint16 *dest, quint16 color, int count) +template<> inline void qt_memfill(quint24 *dest, quint24 color, qsizetype count) { - qt_memfill16(dest, color, count); + qt_memfill24(dest, color, count); } -template<> inline void qt_memfill(quint8 *dest, quint8 color, int count) +template<> inline void qt_memfill(quint16 *dest, quint16 color, qsizetype count) { - memset(dest, color, count); + qt_memfill16(dest, color, count); } -template <class T> -inline void qt_memfill(T *dest, T value, int count) +template<> inline void qt_memfill(quint8 *dest, quint8 color, qsizetype count) { - if (!count) - return; - - int n = (count + 7) / 8; - switch (count & 0x07) - { - case 0: do { *dest++ = value; Q_FALLTHROUGH(); - case 7: *dest++ = value; Q_FALLTHROUGH(); - case 6: *dest++ = value; Q_FALLTHROUGH(); - case 5: *dest++ = value; Q_FALLTHROUGH(); - case 4: *dest++ = value; Q_FALLTHROUGH(); - case 3: *dest++ = value; Q_FALLTHROUGH(); - case 2: *dest++ = value; Q_FALLTHROUGH(); - case 1: *dest++ = value; - } while (--n > 0); - } + memset(dest, color, count); } -template <class T> Q_STATIC_TEMPLATE_FUNCTION +template <class T> static inline void qt_rectfill(T *dest, T value, int x, int y, int width, int height, qsizetype stride) { char *d = reinterpret_cast<char*>(dest + x) + y * stride; if (uint(stride) == (width * sizeof(T))) { - qt_memfill(reinterpret_cast<T*>(d), value, width * height); + qt_memfill(reinterpret_cast<T*>(d), value, qsizetype(width) * height); } else { for (int j = 0; j < height; ++j) { dest = reinterpret_cast<T*>(d); @@ -947,52 +957,6 @@ inline void qt_rectfill(T *dest, T value, } } -#define QT_MEMFILL_UINT(dest, length, color) \ - qt_memfill<quint32>(dest, color, length); - -#define QT_MEMFILL_USHORT(dest, length, color) \ - qt_memfill<quint16>(dest, color, length); - -#define QT_MEMCPY_REV_UINT(dest, src, length) \ -do { \ - /* Duff's device */ \ - uint *_d = (uint*)(dest) + length; \ - const uint *_s = (uint*)(src) + length; \ - int n = ((length) + 7) / 8; \ - switch ((length) & 0x07) \ - { \ - case 0: do { *--_d = *--_s; Q_FALLTHROUGH(); \ - case 7: *--_d = *--_s; Q_FALLTHROUGH(); \ - case 6: *--_d = *--_s; Q_FALLTHROUGH(); \ - case 5: *--_d = *--_s; Q_FALLTHROUGH(); \ - case 4: *--_d = *--_s; Q_FALLTHROUGH(); \ - case 3: *--_d = *--_s; Q_FALLTHROUGH(); \ - case 2: *--_d = *--_s; Q_FALLTHROUGH(); \ - case 1: *--_d = *--_s; \ - } while (--n > 0); \ - } \ -} while (false) - -#define QT_MEMCPY_USHORT(dest, src, length) \ -do { \ - /* Duff's device */ \ - ushort *_d = (ushort*)(dest); \ - const ushort *_s = (const ushort*)(src); \ - int n = ((length) + 7) / 8; \ - switch ((length) & 0x07) \ - { \ - case 0: do { *_d++ = *_s++; Q_FALLTHROUGH(); \ - case 7: *_d++ = *_s++; Q_FALLTHROUGH(); \ - case 6: *_d++ = *_s++; Q_FALLTHROUGH(); \ - case 5: *_d++ = *_s++; Q_FALLTHROUGH(); \ - case 4: *_d++ = *_s++; Q_FALLTHROUGH(); \ - case 3: *_d++ = *_s++; Q_FALLTHROUGH(); \ - case 2: *_d++ = *_s++; Q_FALLTHROUGH(); \ - case 1: *_d++ = *_s++; \ - } while (--n > 0); \ - } \ -} while (false) - inline ushort qConvertRgb32To16(uint c) { return (((c) >> 3) & 0x001f) diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 3212ffdd2d..c82f41ec88 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -233,19 +233,71 @@ void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, u } } -void qt_memfill32(quint32 *dest, quint32 value, int count) +#ifndef __AVX2__ +static Q_NEVER_INLINE +void Q_DECL_VECTORCALL qt_memfillXX_aligned(void *dest, __m128i value128, quintptr bytecount) { - if (count < 7) { + __m128i *dst128 = reinterpret_cast<__m128i *>(dest); + __m128i *end128 = reinterpret_cast<__m128i *>(static_cast<uchar *>(dest) + bytecount); + + while (dst128 + 4 <= end128) { + _mm_store_si128(dst128 + 0, value128); + _mm_store_si128(dst128 + 1, value128); + _mm_store_si128(dst128 + 2, value128); + _mm_store_si128(dst128 + 3, value128); + dst128 += 4; + } + + bytecount %= 4 * sizeof(__m128i); + switch (bytecount / sizeof(__m128i)) { + case 3: _mm_store_si128(dst128++, value128); Q_FALLTHROUGH(); + case 2: _mm_store_si128(dst128++, value128); Q_FALLTHROUGH(); + case 1: _mm_store_si128(dst128++, value128); + } +} + +void qt_memfill64_sse2(quint64 *dest, quint64 value, qsizetype count) +{ + quintptr misaligned = quintptr(dest) % sizeof(__m128i); + if (misaligned && count) { +#if defined(Q_PROCESSOR_X86_32) + // Before SSE came out, the alignment of the stack used to be only 4 + // bytes and some OS/ABIs (notably, code generated by MSVC) still only + // align to that. In any case, we cannot count on the alignment of + // quint64 to be 8 -- see QtPrivate::AlignOf_WorkaroundForI386Abi in + // qglobal.h. + // + // If the pointer is not aligned to at least 8 bytes, then we'll never + // in turn hit a multiple of 16 for the qt_memfillXX_aligned call + // below. + if (Q_UNLIKELY(misaligned % sizeof(quint64))) + return qt_memfill_template(dest, value, count); +#endif + + *dest++ = value; + --count; + } + + if (count % 2) { + dest[count - 1] = value; + --count; + } + + qt_memfillXX_aligned(dest, _mm_set1_epi64x(value), count * sizeof(quint64)); +} + +void qt_memfill32_sse2(quint32 *dest, quint32 value, qsizetype count) +{ + if (count < 4) { + // this simplifies the code below: the first switch can fall through + // without checking the value of count switch (count) { - case 6: *dest++ = value; Q_FALLTHROUGH(); - case 5: *dest++ = value; Q_FALLTHROUGH(); - case 4: *dest++ = value; Q_FALLTHROUGH(); case 3: *dest++ = value; Q_FALLTHROUGH(); case 2: *dest++ = value; Q_FALLTHROUGH(); case 1: *dest = value; } return; - }; + } const int align = (quintptr)(dest) & 0xf; switch (align) { @@ -263,25 +315,9 @@ void qt_memfill32(quint32 *dest, quint32 value, int count) } } - int count128 = count / 4; - __m128i *dst128 = reinterpret_cast<__m128i*>(dest); - __m128i *end128 = dst128 + count128; - const __m128i value128 = _mm_set_epi32(value, value, value, value); - - while (dst128 + 3 < end128) { - _mm_stream_si128(dst128 + 0, value128); - _mm_stream_si128(dst128 + 1, value128); - _mm_stream_si128(dst128 + 2, value128); - _mm_stream_si128(dst128 + 3, value128); - dst128 += 4; - } - - switch (count128 & 0x3) { - case 3: _mm_stream_si128(dst128++, value128); Q_FALLTHROUGH(); - case 2: _mm_stream_si128(dst128++, value128); Q_FALLTHROUGH(); - case 1: _mm_stream_si128(dst128++, value128); - } + qt_memfillXX_aligned(dest, _mm_set1_epi32(value), count * sizeof(quint32)); } +#endif // !__AVX2__ void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha) { @@ -314,28 +350,6 @@ void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, u } } -void qt_memfill16(quint16 *dest, quint16 value, int count) -{ - if (count < 3) { - switch (count) { - case 2: *dest++ = value; Q_FALLTHROUGH(); - case 1: *dest = value; - } - return; - } - - const int align = (quintptr)(dest) & 0x3; - switch (align) { - case 2: *dest++ = value; --count; - } - - const quint32 value32 = (value << 16) | value; - qt_memfill32(reinterpret_cast<quint32*>(dest), value32, count / 2); - - if (count & 0x1) - dest[count - 1] = value; -} - void qt_bitmapblit32_sse2_base(QRasterBuffer *rasterBuffer, int x, int y, quint32 color, const uchar *src, int width, int height, int stride) @@ -440,30 +454,30 @@ public: union Vect_buffer_i { Int32x4 v; int i[4]; }; union Vect_buffer_f { Float32x4 v; float f[4]; }; - static inline Float32x4 v_dup(float x) { return _mm_set1_ps(x); } - static inline Float32x4 v_dup(double x) { return _mm_set1_ps(x); } - static inline Int32x4 v_dup(int x) { return _mm_set1_epi32(x); } - static inline Int32x4 v_dup(uint x) { return _mm_set1_epi32(x); } + static inline Float32x4 Q_DECL_VECTORCALL v_dup(float x) { return _mm_set1_ps(x); } + static inline Float32x4 Q_DECL_VECTORCALL v_dup(double x) { return _mm_set1_ps(x); } + static inline Int32x4 Q_DECL_VECTORCALL v_dup(int x) { return _mm_set1_epi32(x); } + static inline Int32x4 Q_DECL_VECTORCALL v_dup(uint x) { return _mm_set1_epi32(x); } - static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); } - static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); } + static inline Float32x4 Q_DECL_VECTORCALL v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); } + static inline Int32x4 Q_DECL_VECTORCALL v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); } - static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); } - static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); } - static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); } + static inline Float32x4 Q_DECL_VECTORCALL v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); } + static inline Float32x4 Q_DECL_VECTORCALL v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); } + static inline Int32x4 Q_DECL_VECTORCALL v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); } - static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); } + static inline Int32x4 Q_DECL_VECTORCALL v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); } - static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); } - static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); } + static inline Float32x4 Q_DECL_VECTORCALL v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); } + static inline Int32x4 Q_DECL_VECTORCALL v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); } - static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); } + static inline Float32x4 Q_DECL_VECTORCALL v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); } - static inline Float32x4 v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); } + static inline Float32x4 Q_DECL_VECTORCALL v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); } - static inline Int32x4 v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); } + static inline Int32x4 Q_DECL_VECTORCALL v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); } - static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b) { return _mm_castps_si128(_mm_cmpgt_ps(a, b)); } + static inline Int32x4 Q_DECL_VECTORCALL v_greaterOrEqual(Float32x4 a, Float32x4 b) { return _mm_castps_si128(_mm_cmpgt_ps(a, b)); } }; const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data, diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp index f6c2f11eaf..1da3b75ade 100644 --- a/src/gui/painting/qdrawhelper_sse4.cpp +++ b/src/gui/painting/qdrawhelper_sse4.cpp @@ -45,6 +45,7 @@ QT_BEGIN_NAMESPACE +#ifndef __AVX2__ template<bool RGBA> static void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count) { @@ -94,7 +95,57 @@ static void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count) } } -static inline __m128 reciprocal_mul_ps(__m128 a, float mul) +template<bool RGBA> +static void convertARGBToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count) +{ + int i = 0; + const __m128i alphaMask = _mm_set1_epi32(0xff000000); + const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); + const __m128i shuffleMask = _mm_setr_epi8(6, 7, 6, 7, 6, 7, 6, 7, 14, 15, 14, 15, 14, 15, 14, 15); + const __m128i zero = _mm_setzero_si128(); + + for (; i < count - 3; i += 4) { + __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]); + if (!_mm_testz_si128(srcVector, alphaMask)) { + if (!_mm_testc_si128(srcVector, alphaMask)) { + if (!RGBA) + srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); + __m128i src1 = _mm_unpacklo_epi8(srcVector, zero); + __m128i src2 = _mm_unpackhi_epi8(srcVector, zero); + __m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask); + __m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask); + src1 = _mm_mullo_epi16(src1, alpha1); + src2 = _mm_mullo_epi16(src2, alpha2); + alpha1 = _mm_unpacklo_epi8(srcVector, srcVector); + alpha2 = _mm_unpackhi_epi8(srcVector, srcVector); + src1 = _mm_add_epi16(src1, _mm_srli_epi16(src1, 7)); + src2 = _mm_add_epi16(src2, _mm_srli_epi16(src2, 7)); + src1 = _mm_blend_epi16(src1, alpha1, 0x88); + src2 = _mm_blend_epi16(src2, alpha2, 0x88); + _mm_storeu_si128((__m128i *)&buffer[i], src1); + _mm_storeu_si128((__m128i *)&buffer[i + 2], src2); + } else { + if (!RGBA) + srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); + const __m128i src1 = _mm_unpacklo_epi8(srcVector, srcVector); + const __m128i src2 = _mm_unpackhi_epi8(srcVector, srcVector); + _mm_storeu_si128((__m128i *)&buffer[i], src1); + _mm_storeu_si128((__m128i *)&buffer[i + 2], src2); + } + } else { + _mm_storeu_si128((__m128i *)&buffer[i], zero); + _mm_storeu_si128((__m128i *)&buffer[i + 2], zero); + } + } + + SIMD_EPILOGUE(i, count, 3) { + const uint s = RGBA ? RGBA2ARGB(src[i]) : src[i]; + buffer[i] = QRgba64::fromArgb32(s).premultiplied(); + } +} +#endif // __AVX2__ + +static inline __m128 Q_DECL_VECTORCALL reciprocal_mul_ps(__m128 a, float mul) { __m128 ia = _mm_rcp_ps(a); // Approximate 1/a // Improve precision of ia using Newton-Raphson @@ -259,6 +310,7 @@ static inline void convertARGBFromRGBA64PM_sse4(uint *buffer, const QRgba64 *src } } +#ifndef __AVX2__ void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *) { convertARGBToARGB32PM_sse4<false>(buffer, buffer, count); @@ -269,6 +321,20 @@ void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const Q convertARGBToARGB32PM_sse4<true>(buffer, buffer, count); } +const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToRGBA64PM_sse4<false>(buffer, src, count); + return buffer; +} + +const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToRGBA64PM_sse4<true>(buffer, src, count); + return buffer; +} + const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count, const QVector<QRgb> *, QDitherInfo *) { @@ -283,6 +349,21 @@ const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar * return buffer; } +const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToRGBA64PM_sse4<false>(buffer, reinterpret_cast<const uint *>(src) + index, count); + return buffer; +} + +const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToRGBA64PM_sse4<true>(buffer, reinterpret_cast<const uint *>(src) + index, count); + return buffer; +} +#endif // __AVX2__ + void QT_FASTCALL storeRGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *) { diff --git a/src/gui/painting/qdrawhelper_ssse3.cpp b/src/gui/painting/qdrawhelper_ssse3.cpp index 42d760d5cc..35d61c3e6c 100644 --- a/src/gui/painting/qdrawhelper_ssse3.cpp +++ b/src/gui/painting/qdrawhelper_ssse3.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2018 The Qt Company Ltd. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtGui module of the Qt Toolkit. @@ -79,55 +80,58 @@ QT_BEGIN_NAMESPACE // The computation being done is: // result = s + d * (1-alpha) // with shortcuts if fully opaque or fully transparent. -#define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ - int x = 0; \ -\ - /* First, get dst aligned. */ \ - ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ - blend_pixel(dst[x], src[x]); \ - } \ -\ - const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\ -\ - if (!minusOffsetToAlignSrcOn16Bytes) {\ - /* src is aligned, usual algorithm but with aligned operations.\ - See the SSE2 version for more documentation on the algorithm itself. */\ - const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\ - for (; x < length-3; x += 4) { \ - const __m128i srcVector = _mm_load_si128((const __m128i *)&src[x]); \ - const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ - if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ - _mm_store_si128((__m128i *)&dst[x], srcVector); \ - } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ - __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ - alphaChannel = _mm_sub_epi16(one, alphaChannel); \ - const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ - __m128i destMultipliedByOneMinusAlpha; \ - BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ - const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ - _mm_store_si128((__m128i *)&dst[x], result); \ - } \ - } /* end for() */\ - } else if ((length - x) >= 8) {\ - /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\ - __m128i srcVectorPrevLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\ - const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\ -\ - const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\ - switch (palignrOffset) {\ - case 4:\ - BLENDING_LOOP(4, length)\ - break;\ - case 8:\ - BLENDING_LOOP(8, length)\ - break;\ - case 12:\ - BLENDING_LOOP(12, length)\ - break;\ - }\ - }\ - for (; x < length; ++x) \ - blend_pixel(dst[x], src[x]); \ +static inline void Q_DECL_VECTORCALL +BLEND_SOURCE_OVER_ARGB32_SSSE3(quint32 *dst, const quint32 *src, int length, + __m128i nullVector, __m128i half, __m128i one, __m128i colorMask, __m128i alphaMask) +{ + int x = 0; + + /* First, get dst aligned. */ + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { + blend_pixel(dst[x], src[x]); + } + + const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3; + + if (!minusOffsetToAlignSrcOn16Bytes) { + /* src is aligned, usual algorithm but with aligned operations. + See the SSE2 version for more documentation on the algorithm itself. */ + const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3); + for (; x < length-3; x += 4) { + const __m128i srcVector = _mm_load_si128((const __m128i *)&src[x]); + const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { + _mm_store_si128((__m128i *)&dst[x], srcVector); + } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { + __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); + alphaChannel = _mm_sub_epi16(one, alphaChannel); + const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); + __m128i destMultipliedByOneMinusAlpha; + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); + _mm_store_si128((__m128i *)&dst[x], result); + } + } /* end for() */ + } else if ((length - x) >= 8) { + /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */ + __m128i srcVectorPrevLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]); + const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2; + + const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3); + switch (palignrOffset) { + case 4: + BLENDING_LOOP(4, length) + break; + case 8: + BLENDING_LOOP(8, length) + break; + case 12: + BLENDING_LOOP(12, length) + break; + } + } + for (; x < length; ++x) + blend_pixel(dst[x], src[x]); } void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl, @@ -185,6 +189,71 @@ const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Ope return buffer; } +void qt_memfill24_ssse3(quint24 *dest, quint24 color, qsizetype count) +{ + // LCM of 12 and 16 bytes is 48 bytes (16 px) + quint32 v = color; + __m128i m = _mm_cvtsi32_si128(v); + quint24 *end = dest + count; + + constexpr uchar x = 2, y = 1, z = 0; + Q_DECL_ALIGN(__m128i) static const uchar + shuffleMask[16 + 1] = { x, y, z, x, y, z, x, y, z, x, y, z, x, y, z, x, y }; + + __m128i mval1 = _mm_shuffle_epi8(m, _mm_load_si128(reinterpret_cast<const __m128i *>(shuffleMask))); + __m128i mval2 = _mm_shuffle_epi8(m, _mm_loadu_si128(reinterpret_cast<const __m128i *>(shuffleMask + 1))); + __m128i mval3 = _mm_alignr_epi8(mval2, mval1, 2); + + for ( ; dest + 16 <= end; dest += 16) { +#ifdef __AVX__ + // Store using 32-byte AVX instruction + __m256 mval12 = _mm256_castps128_ps256(_mm_castsi128_ps(mval1)); + mval12 = _mm256_insertf128_ps(mval12, _mm_castsi128_ps(mval2), 1); + _mm256_storeu_ps(reinterpret_cast<float *>(dest), mval12); +#else + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 0, mval1); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 1, mval2); +#endif + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 2, mval3); + } + + if (count < 3) { + if (count > 1) + end[-2] = v; + if (count) + end[-1] = v; + return; + } + + // less than 16px/48B left + uchar *ptr = reinterpret_cast<uchar *>(dest); + uchar *ptr_end = reinterpret_cast<uchar *>(end); + qptrdiff left = ptr_end - ptr; + if (left >= 24) { + // 8px/24B or more left + _mm_storeu_si128(reinterpret_cast<__m128i *>(ptr) + 0, mval1); + _mm_storel_epi64(reinterpret_cast<__m128i *>(ptr) + 1, mval2); + ptr += 24; + left -= 24; + } + + // less than 8px/24B left + + if (left >= 16) { + // but more than 5px/15B left + _mm_storeu_si128(reinterpret_cast<__m128i *>(ptr) , mval1); + } else if (left >= 8) { + // but more than 2px/6B left + _mm_storel_epi64(reinterpret_cast<__m128i *>(ptr), mval1); + } + + if (left) { + // 1 or 2px left + // store 8 bytes ending with the right values (will overwrite a bit) + _mm_storel_epi64(reinterpret_cast<__m128i *>(ptr_end - 8), mval2); + } +} + QT_END_NAMESPACE #endif // QT_COMPILER_SUPPORTS_SSSE3 diff --git a/src/gui/painting/qdrawhelper_x86_p.h b/src/gui/painting/qdrawhelper_x86_p.h index cefc213999..5749d8c9fb 100644 --- a/src/gui/painting/qdrawhelper_x86_p.h +++ b/src/gui/painting/qdrawhelper_x86_p.h @@ -57,8 +57,8 @@ QT_BEGIN_NAMESPACE #ifdef __SSE2__ -void qt_memfill32(quint32 *dest, quint32 value, int count); -void qt_memfill16(quint16 *dest, quint16 value, int count); +void qt_memfill64_sse2(quint64 *dest, quint64 value, qsizetype count); +void qt_memfill32_sse2(quint32 *dest, quint32 value, qsizetype count); void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 &color, const uchar *src, int width, int height, int stride); @@ -79,6 +79,9 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, extern CompositionFunction qt_functionForMode_SSE2[]; extern CompositionFunctionSolid qt_functionForModeSolid_SSE2[]; + +void qt_memfill64_avx2(quint64 *dest, quint64 value, qsizetype count); +void qt_memfill32_avx2(quint32 *dest, quint32 value, qsizetype count); #endif // __SSE2__ static const int numCompositionFunctions = 38; diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h index b237ea1611..cc8d230fa8 100644 --- a/src/gui/painting/qdrawingprimitive_sse2_p.h +++ b/src/gui/painting/qdrawingprimitive_sse2_p.h @@ -42,7 +42,7 @@ #include <QtGui/private/qtguiglobal_p.h> #include <private/qsimd_p.h> -#include "qdrawhelper_p.h" +#include "qdrawhelper_x86_p.h" #include "qrgba64_p.h" #ifdef __SSE2__ @@ -232,7 +232,7 @@ QT_END_NAMESPACE QT_BEGIN_NAMESPACE #if QT_COMPILER_SUPPORTS_HERE(SSE4_1) QT_FUNCTION_TARGET(SSE2) -Q_ALWAYS_INLINE void reciprocal_mul_ss(__m128 &ia, const __m128 a, float mul) +Q_ALWAYS_INLINE void Q_DECL_VECTORCALL reciprocal_mul_ss(__m128 &ia, const __m128 a, float mul) { ia = _mm_rcp_ss(a); // Approximate 1/a // Improve precision of ia using Newton-Raphson diff --git a/src/gui/painting/qimagescale_sse4.cpp b/src/gui/painting/qimagescale_sse4.cpp index 34d6b3882e..5861a2e2ff 100644 --- a/src/gui/painting/qimagescale_sse4.cpp +++ b/src/gui/painting/qimagescale_sse4.cpp @@ -39,6 +39,7 @@ #include "qimagescale_p.h" #include "qimage.h" +#include <private/qdrawhelper_x86_p.h> #include <private/qsimd_p.h> #if defined(QT_COMPILER_SUPPORTS_SSE4_1) @@ -47,7 +48,8 @@ QT_BEGIN_NAMESPACE using namespace QImageScale; -inline static __m128i qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy) +inline static __m128i Q_DECL_VECTORCALL +qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy) { __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix)); __m128i vx = _mm_mullo_epi32(vpix, vxyap); diff --git a/src/gui/painting/qmemrotate.cpp b/src/gui/painting/qmemrotate.cpp index 43aeff3268..9cb787fb2c 100644 --- a/src/gui/painting/qmemrotate.cpp +++ b/src/gui/painting/qmemrotate.cpp @@ -44,7 +44,7 @@ QT_BEGIN_NAMESPACE static const int tileSize = 32; template <class T> -Q_STATIC_TEMPLATE_FUNCTION +static inline void qt_memrotate90_tiled(const T *src, int w, int h, int sstride, T *dest, int dstride) { sstride /= sizeof(T); @@ -103,7 +103,7 @@ inline void qt_memrotate90_tiled(const T *src, int w, int h, int sstride, T *des } template <class T> -Q_STATIC_TEMPLATE_FUNCTION +static inline void qt_memrotate90_tiled_unpacked(const T *src, int w, int h, int sstride, T *dest, int dstride) { @@ -131,7 +131,7 @@ inline void qt_memrotate90_tiled_unpacked(const T *src, int w, int h, int sstrid } template <class T> -Q_STATIC_TEMPLATE_FUNCTION +static inline void qt_memrotate270_tiled(const T *src, int w, int h, int sstride, T *dest, int dstride) { sstride /= sizeof(T); @@ -190,7 +190,7 @@ inline void qt_memrotate270_tiled(const T *src, int w, int h, int sstride, T *de } template <class T> -Q_STATIC_TEMPLATE_FUNCTION +static inline void qt_memrotate270_tiled_unpacked(const T *src, int w, int h, int sstride, T *dest, int dstride) { @@ -219,7 +219,7 @@ inline void qt_memrotate270_tiled_unpacked(const T *src, int w, int h, int sstri template <class T> -Q_STATIC_TEMPLATE_FUNCTION +static inline void qt_memrotate90_template(const T *src, int srcWidth, int srcHeight, int srcStride, T *dest, int dstStride) { @@ -246,7 +246,7 @@ inline void qt_memrotate90_template<quint64>(const quint64 *src, int w, int h, i } template <class T> -Q_STATIC_TEMPLATE_FUNCTION +static inline void qt_memrotate180_template(const T *src, int w, int h, int sstride, T *dest, int dstride) { const char *s = (const char*)(src) + (h - 1) * sstride; @@ -261,7 +261,7 @@ inline void qt_memrotate180_template(const T *src, int w, int h, int sstride, T } template <class T> -Q_STATIC_TEMPLATE_FUNCTION +static inline void qt_memrotate270_template(const T *src, int srcWidth, int srcHeight, int srcStride, T *dest, int dstStride) { diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp index 90b6d16551..0f5c7756ad 100644 --- a/src/gui/painting/qpaintengine_raster.cpp +++ b/src/gui/painting/qpaintengine_raster.cpp @@ -873,7 +873,7 @@ void QRasterPaintEngine::updateRasterState() && s->intOpacity == 256 && (mode == QPainter::CompositionMode_Source || (mode == QPainter::CompositionMode_SourceOver - && s->penData.solid.color.isOpaque())); + && s->penData.solidColor.isOpaque())); } s->dirty = 0; @@ -1536,9 +1536,9 @@ static void fillRect_normalized(const QRect &r, QSpanData *data, if (data->fillRect && (mode == QPainter::CompositionMode_Source || (mode == QPainter::CompositionMode_SourceOver - && data->solid.color.isOpaque()))) + && data->solidColor.isOpaque()))) { - data->fillRect(data->rasterBuffer, x1, y1, width, height, data->solid.color); + data->fillRect(data->rasterBuffer, x1, y1, width, height, data->solidColor); return; } } @@ -1900,9 +1900,9 @@ void QRasterPaintEngine::fillRect(const QRectF &r, const QColor &color) Q_D(QRasterPaintEngine); QRasterPaintEngineState *s = state(); - d->solid_color_filler.solid.color = qPremultiply(combineAlpha256(color.rgba64(), s->intOpacity)); + d->solid_color_filler.solidColor = qPremultiply(combineAlpha256(color.rgba64(), s->intOpacity)); - if (d->solid_color_filler.solid.color.isTransparent() + if (d->solid_color_filler.solidColor.isTransparent() && s->composition_mode == QPainter::CompositionMode_SourceOver) { return; } @@ -2356,14 +2356,14 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe case QImage::Format_A2BGR30_Premultiplied: case QImage::Format_A2RGB30_Premultiplied: // Combine premultiplied color with the opacity set on the painter. - d->solid_color_filler.solid.color = multiplyAlpha256(QRgba64::fromArgb32(color), s->intOpacity); + d->solid_color_filler.solidColor = multiplyAlpha256(QRgba64::fromArgb32(color), s->intOpacity); break; default: - d->solid_color_filler.solid.color = qPremultiply(combineAlpha256(QRgba64::fromArgb32(color), s->intOpacity)); + d->solid_color_filler.solidColor = qPremultiply(combineAlpha256(QRgba64::fromArgb32(color), s->intOpacity)); break; } - if (d->solid_color_filler.solid.color.isTransparent() && s->composition_mode == QPainter::CompositionMode_SourceOver) + if (d->solid_color_filler.solidColor.isTransparent() && s->composition_mode == QPainter::CompositionMode_SourceOver) return; d->solid_color_filler.clip = d->clip(); @@ -2713,20 +2713,20 @@ void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx if (unclipped) { if (depth == 1) { if (s->penData.bitmapBlit) { - s->penData.bitmapBlit(rb, rx, ry, s->penData.solid.color, + s->penData.bitmapBlit(rb, rx, ry, s->penData.solidColor, scanline, w, h, bpl); return; } } else if (depth == 8) { if (s->penData.alphamapBlit) { - s->penData.alphamapBlit(rb, rx, ry, s->penData.solid.color, + s->penData.alphamapBlit(rb, rx, ry, s->penData.solidColor, scanline, w, h, bpl, 0, useGammaCorrection); return; } } else if (depth == 32) { // (A)RGB Alpha mask where the alpha component is not used. if (s->penData.alphaRGBBlit) { - s->penData.alphaRGBBlit(rb, rx, ry, s->penData.solid.color, + s->penData.alphaRGBBlit(rb, rx, ry, s->penData.solidColor, (const uint *) scanline, w, h, bpl / 4, 0, useGammaCorrection); return; } @@ -2755,10 +2755,10 @@ void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx ry = ny; } if (depth == 8) - s->penData.alphamapBlit(rb, rx, ry, s->penData.solid.color, + s->penData.alphamapBlit(rb, rx, ry, s->penData.solidColor, scanline, w, h, bpl, clip, useGammaCorrection); else if (depth == 32) - s->penData.alphaRGBBlit(rb, rx, ry, s->penData.solid.color, + s->penData.alphaRGBBlit(rb, rx, ry, s->penData.solidColor, (const uint *) scanline, w, h, bpl / 4, clip, useGammaCorrection); return; } @@ -3835,7 +3835,6 @@ QImage::Format QRasterBuffer::prepare(QImage *image) bytes_per_line = image->bytesPerLine(); format = image->format(); - drawHelper = qDrawHelper + format; if (image->depth() == 1 && image->colorTable().size() == 2) { monoDestinationWithClut = true; const QVector<QRgb> colorTable = image->colorTable(); @@ -3883,51 +3882,15 @@ void QClipData::initialize() Q_CHECK_PTR(m_clipLines); QT_TRY { - m_spans = (QSpan *)malloc(clipSpanHeight*sizeof(QSpan)); allocated = clipSpanHeight; - Q_CHECK_PTR(m_spans); - QT_TRY { - if (hasRectClip) { - int y = 0; - while (y < ymin) { - m_clipLines[y].spans = 0; - m_clipLines[y].count = 0; - ++y; - } - - const int len = clipRect.width(); - count = 0; - while (y < ymax) { - QSpan *span = m_spans + count; - span->x = xmin; - span->len = len; - span->y = y; - span->coverage = 255; - ++count; - - m_clipLines[y].spans = span; - m_clipLines[y].count = 1; - ++y; - } - - while (y < clipSpanHeight) { - m_clipLines[y].spans = 0; - m_clipLines[y].count = 0; - ++y; - } - } else if (hasRegionClip) { - + if (hasRegionClip) { const auto rects = clipRegion.begin(); const int numRects = clipRegion.rectCount(); - - { // resize - const int maxSpans = (ymax - ymin) * numRects; - if (maxSpans > allocated) { - m_spans = q_check_ptr((QSpan *)realloc(m_spans, maxSpans * sizeof(QSpan))); - allocated = maxSpans; - } - } + const int maxSpans = (ymax - ymin) * numRects; + allocated = qMax(allocated, maxSpans); + m_spans = (QSpan *)malloc(allocated * sizeof(QSpan)); + Q_CHECK_PTR(m_spans); int y = 0; int firstInBand = 0; @@ -3974,6 +3937,40 @@ void QClipData::initialize() ++y; } + return; + } + + m_spans = (QSpan *)malloc(allocated * sizeof(QSpan)); + Q_CHECK_PTR(m_spans); + + if (hasRectClip) { + int y = 0; + while (y < ymin) { + m_clipLines[y].spans = 0; + m_clipLines[y].count = 0; + ++y; + } + + const int len = clipRect.width(); + count = 0; + while (y < ymax) { + QSpan *span = m_spans + count; + span->x = xmin; + span->len = len; + span->y = y; + span->coverage = 255; + ++count; + + m_clipLines[y].spans = span; + m_clipLines[y].count = 1; + ++y; + } + + while (y < clipSpanHeight) { + m_clipLines[y].spans = 0; + m_clipLines[y].count = 0; + ++y; + } } } QT_CATCH(...) { free(m_spans); // have to free m_spans again or someone might think that we were successfully initialized. @@ -4596,8 +4593,8 @@ void QSpanData::setup(const QBrush &brush, int alpha, QPainter::CompositionMode case Qt::SolidPattern: { type = Solid; QColor c = qbrush_color(brush); - solid.color = qPremultiply(combineAlpha256(c.rgba64(), alpha)); - if (solid.color.isTransparent() && compositionMode == QPainter::CompositionMode_SourceOver) + solidColor = qPremultiply(combineAlpha256(c.rgba64(), alpha)); + if (solidColor.isTransparent() && compositionMode == QPainter::CompositionMode_SourceOver) type = None; break; } @@ -4723,17 +4720,19 @@ void QSpanData::adjustSpanMethods() case None: unclipped_blend = 0; break; - case Solid: - unclipped_blend = rasterBuffer->drawHelper->blendColor; - bitmapBlit = rasterBuffer->drawHelper->bitmapBlit; - alphamapBlit = rasterBuffer->drawHelper->alphamapBlit; - alphaRGBBlit = rasterBuffer->drawHelper->alphaRGBBlit; - fillRect = rasterBuffer->drawHelper->fillRect; + case Solid: { + const DrawHelper &drawHelper = qDrawHelper[rasterBuffer->format]; + unclipped_blend = drawHelper.blendColor; + bitmapBlit = drawHelper.bitmapBlit; + alphamapBlit = drawHelper.alphamapBlit; + alphaRGBBlit = drawHelper.alphaRGBBlit; + fillRect = drawHelper.fillRect; break; + } case LinearGradient: case RadialGradient: case ConicalGradient: - unclipped_blend = rasterBuffer->drawHelper->blendGradient; + unclipped_blend = qBlendGradient; break; case Texture: unclipped_blend = qBlendTexture; diff --git a/src/gui/painting/qpaintengine_raster_p.h b/src/gui/painting/qpaintengine_raster_p.h index 14eddf07b1..881144d1c2 100644 --- a/src/gui/painting/qpaintengine_raster_p.h +++ b/src/gui/painting/qpaintengine_raster_p.h @@ -471,7 +471,6 @@ public: QPainter::CompositionMode compositionMode; QImage::Format format; - DrawHelper *drawHelper; QImage colorizeBitmap(const QImage &image, const QColor &color); private: diff --git a/src/gui/painting/qpaintengineex.cpp b/src/gui/painting/qpaintengineex.cpp index f4cbf15fc7..22d3fb3001 100644 --- a/src/gui/painting/qpaintengineex.cpp +++ b/src/gui/painting/qpaintengineex.cpp @@ -439,6 +439,9 @@ void QPaintEngineEx::stroke(const QVectorPath &path, const QPen &pen) } } + if (d->activeStroker == &d->stroker) + d->stroker.setForceOpen(path.hasExplicitOpen()); + const QPainterPath::ElementType *types = path.elements(); const qreal *points = path.points(); int pointCount = path.elementCount(); @@ -1097,7 +1100,7 @@ bool QPaintEngineEx::shouldDrawCachedGlyphs(QFontEngine *fontEngine, const QTran }(), 2); qreal pixelSize = fontEngine->fontDef.pixelSize; - return (pixelSize * pixelSize * qAbs(m.determinant())) < maxCachedGlyphSizeSquared; + return (pixelSize * pixelSize * qAbs(m.determinant())) <= maxCachedGlyphSizeSquared; } QT_END_NAMESPACE diff --git a/src/gui/painting/qpainter.cpp b/src/gui/painting/qpainter.cpp index b70b29e54e..df2f5e11d3 100644 --- a/src/gui/painting/qpainter.cpp +++ b/src/gui/painting/qpainter.cpp @@ -1437,6 +1437,13 @@ void QPainterPrivate::updateState(QPainterState *newState) by slightly less than half a pixel. Also will treat default constructed pens as cosmetic. Potentially useful when porting a Qt 4 application to Qt 5. + \value LosslessImageRendering Use a lossless image rendering, whenever possible. + Currently, this hint is only used when QPainter is employed to output a PDF + file through QPrinter or QPdfWriter, where drawImage()/drawPixmap() calls + will encode images using a lossless compression algorithm instead of lossy + JPEG compression. + This value was added in Qt 5.13. + \sa renderHints(), setRenderHint(), {QPainter#Rendering Quality}{Rendering Quality}, {Concentric Circles Example} diff --git a/src/gui/painting/qpainter.h b/src/gui/painting/qpainter.h index 482f5fb63d..87c4899f0b 100644 --- a/src/gui/painting/qpainter.h +++ b/src/gui/painting/qpainter.h @@ -91,7 +91,8 @@ public: SmoothPixmapTransform = 0x04, HighQualityAntialiasing = 0x08, NonCosmeticDefaultPen = 0x10, - Qt4CompatiblePainting = 0x20 + Qt4CompatiblePainting = 0x20, + LosslessImageRendering = 0x40, }; Q_FLAG(RenderHint) diff --git a/src/gui/painting/qpainter_p.h b/src/gui/painting/qpainter_p.h index 2d44577310..930180e9fa 100644 --- a/src/gui/painting/qpainter_p.h +++ b/src/gui/painting/qpainter_p.h @@ -51,6 +51,7 @@ // We mean it. // +#include <QtCore/qvarlengtharray.h> #include <QtGui/private/qtguiglobal_p.h> #include "QtGui/qbrush.h" #include "QtGui/qfont.h" @@ -202,7 +203,7 @@ public: QPainterPrivate **d_ptrs; QPainterState *state; - QVector<QPainterState*> states; + QVarLengthArray<QPainterState *, 8> states; mutable QPainterDummyState *dummyState; diff --git a/src/gui/painting/qpainterpath.cpp b/src/gui/painting/qpainterpath.cpp index c5ccf0003d..3687bcf7d0 100644 --- a/src/gui/painting/qpainterpath.cpp +++ b/src/gui/painting/qpainterpath.cpp @@ -629,6 +629,55 @@ QPainterPath::~QPainterPath() } /*! + Clears the path elements stored. + + This allows the path to reuse previous memory allocations. + + \sa reserve(), capacity() + \since 5.13 +*/ +void QPainterPath::clear() +{ + if (!d_ptr) + return; + + detach(); + d_func()->clear(); +} + +/*! + Reserves a given amount of elements in QPainterPath's internal memory. + + Attempts to allocate memory for at least \a size elements. + + \sa clear(), capacity(), QVector::reserve() + \since 5.13 +*/ +void QPainterPath::reserve(int size) +{ + Q_D(QPainterPath); + if ((!d && size > 0) || (d && d->elements.capacity() < size)) { + detach(); + d->elements.reserve(size); + } +} + +/*! + Returns the number of elements allocated by the QPainterPath. + + \sa clear(), reserve() + \since 5.13 +*/ +int QPainterPath::capacity() const +{ + Q_D(QPainterPath); + if (d) + return d->elements.capacity(); + + return 0; +} + +/*! Closes the current subpath by drawing a line to the beginning of the subpath, automatically starting a new path. The current point of the new path is (0, 0). @@ -2271,13 +2320,19 @@ static inline bool epsilonCompare(const QPointF &a, const QPointF &b, const QSiz bool QPainterPath::operator==(const QPainterPath &path) const { QPainterPathData *d = reinterpret_cast<QPainterPathData *>(d_func()); - if (path.d_func() == d) + QPainterPathData *other_d = path.d_func(); + if (other_d == d) return true; - else if (!d || !path.d_func()) + else if (!d || !other_d) { + if (!d && other_d->elements.empty() && other_d->fillRule == Qt::OddEvenFill) + return true; + if (!other_d && d && d->elements.empty() && d->fillRule == Qt::OddEvenFill) + return true; return false; - else if (d->fillRule != path.d_func()->fillRule) + } + else if (d->fillRule != other_d->fillRule) return false; - else if (d->elements.size() != path.d_func()->elements.size()) + else if (d->elements.size() != other_d->elements.size()) return false; const qreal qt_epsilon = sizeof(qreal) == sizeof(double) ? 1e-12 : qreal(1e-5); @@ -2287,8 +2342,8 @@ bool QPainterPath::operator==(const QPainterPath &path) const epsilon.rheight() *= qt_epsilon; for (int i = 0; i < d->elements.size(); ++i) - if (d->elements.at(i).type != path.d_func()->elements.at(i).type - || !epsilonCompare(d->elements.at(i), path.d_func()->elements.at(i), epsilon)) + if (d->elements.at(i).type != other_d->elements.at(i).type + || !epsilonCompare(d->elements.at(i), other_d->elements.at(i), epsilon)) return false; return true; diff --git a/src/gui/painting/qpainterpath.h b/src/gui/painting/qpainterpath.h index db39c1c5a0..770b8f48d0 100644 --- a/src/gui/painting/qpainterpath.h +++ b/src/gui/painting/qpainterpath.h @@ -97,8 +97,13 @@ public: { qSwap(d_ptr, other.d_ptr); return *this; } #endif ~QPainterPath(); + inline void swap(QPainterPath &other) Q_DECL_NOEXCEPT { d_ptr.swap(other.d_ptr); } + void clear(); + void reserve(int size); + int capacity() const; + void closeSubpath(); void moveTo(const QPointF &p); diff --git a/src/gui/painting/qpainterpath_p.h b/src/gui/painting/qpainterpath_p.h index 92d9a4ea66..a36c8005bc 100644 --- a/src/gui/painting/qpainterpath_p.h +++ b/src/gui/painting/qpainterpath_p.h @@ -157,7 +157,7 @@ public: QVectorPath path; private: - Q_DISABLE_COPY(QVectorPathConverter) + Q_DISABLE_COPY_MOVE(QVectorPathConverter) }; class QPainterPathData : public QPainterPathPrivate @@ -194,6 +194,7 @@ public: inline bool isClosed() const; inline void close(); inline void maybeMoveTo(); + inline void clear(); const QVectorPath &vectorPath() { if (!pathConverter) @@ -290,6 +291,25 @@ inline void QPainterPathData::maybeMoveTo() } } +inline void QPainterPathData::clear() +{ + Q_ASSERT(ref.load() == 1); + + elements.clear(); + + cStart = 0; + + bounds = {}; + controlBounds = {}; + + require_moveTo = false; + dirtyBounds = false; + dirtyControlBounds = false; + convex = false; + + delete pathConverter; + pathConverter = nullptr; +} #define KAPPA qreal(0.5522847498) diff --git a/src/gui/painting/qpathclipper_p.h b/src/gui/painting/qpathclipper_p.h index 64e684e1ad..c25a479807 100644 --- a/src/gui/painting/qpathclipper_p.h +++ b/src/gui/painting/qpathclipper_p.h @@ -86,7 +86,7 @@ public: static QPainterPath intersect(const QPainterPath &path, const QRectF &rect); private: - Q_DISABLE_COPY(QPathClipper) + Q_DISABLE_COPY_MOVE(QPathClipper) enum ClipperMode { ClipMode, // do the full clip diff --git a/src/gui/painting/qpdf.cpp b/src/gui/painting/qpdf.cpp index ae3df6f9ec..b23cc903b1 100644 --- a/src/gui/painting/qpdf.cpp +++ b/src/gui/painting/qpdf.cpp @@ -856,7 +856,6 @@ void QPdfEngine::drawRects (const QRectF *rects, int rectCount) if (!d->hasPen && !d->hasBrush) return; - QBrush penBrush = d->pen.brush(); if (d->simplePen || !d->hasPen) { // draw strokes natively in this case for better output if(!d->simplePen && !d->stroker.matrix.isIdentity()) @@ -949,7 +948,8 @@ void QPdfEngine::drawPixmap (const QRectF &rectangle, const QPixmap &pixmap, con QPixmap pm = sourceRect != pixmap.rect() ? pixmap.copy(sourceRect) : pixmap; QImage image = pm.toImage(); bool bitmap = true; - const int object = d->addImage(image, &bitmap, pm.cacheKey()); + const bool lossless = painter()->testRenderHint(QPainter::LosslessImageRendering); + const int object = d->addImage(image, &bitmap, lossless, pm.cacheKey()); if (object < 0) return; @@ -988,7 +988,8 @@ void QPdfEngine::drawImage(const QRectF &rectangle, const QImage &image, const Q QRect sourceRect = sr.toRect(); QImage im = sourceRect != image.rect() ? image.copy(sourceRect) : image; bool bitmap = true; - const int object = d->addImage(im, &bitmap, im.cacheKey()); + const bool lossless = painter()->testRenderHint(QPainter::LosslessImageRendering); + const int object = d->addImage(im, &bitmap, lossless, im.cacheKey()); if (object < 0) return; @@ -2633,6 +2634,8 @@ int QPdfEnginePrivate::addConstantAlphaObject(int brushAlpha, int penAlpha) int QPdfEnginePrivate::addBrushPattern(const QTransform &m, bool *specifyColor, int *gStateObject) { + Q_Q(QPdfEngine); + int paintType = 2; // Uncolored tiling int w = 8; int h = 8; @@ -2662,7 +2665,8 @@ int QPdfEnginePrivate::addBrushPattern(const QTransform &m, bool *specifyColor, return 0; QImage image = brush.textureImage(); bool bitmap = true; - imageObject = addImage(image, &bitmap, image.cacheKey()); + const bool lossless = q->painter()->testRenderHint(QPainter::LosslessImageRendering); + imageObject = addImage(image, &bitmap, lossless, image.cacheKey()); if (imageObject != -1) { QImage::Format f = image.format(); if (f != QImage::Format_MonoLSB && f != QImage::Format_Mono) { @@ -2724,7 +2728,7 @@ static inline bool is_monochrome(const QVector<QRgb> &colorTable) /*! * Adds an image to the pdf and return the pdf-object id. Returns -1 if adding the image failed. */ -int QPdfEnginePrivate::addImage(const QImage &img, bool *bitmap, qint64 serial_no) +int QPdfEnginePrivate::addImage(const QImage &img, bool *bitmap, bool lossless, qint64 serial_no) { if (img.isNull()) return -1; @@ -2785,7 +2789,7 @@ int QPdfEnginePrivate::addImage(const QImage &img, bool *bitmap, qint64 serial_n bool hasAlpha = false; bool hasMask = false; - if (QImageWriter::supportedImageFormats().contains("jpeg") && !grayscale) { + if (QImageWriter::supportedImageFormats().contains("jpeg") && !grayscale && !lossless) { QBuffer buffer(&imageData); QImageWriter writer(&buffer, "jpeg"); writer.setQuality(94); diff --git a/src/gui/painting/qpdf_p.h b/src/gui/painting/qpdf_p.h index e2526de67d..e337c61f64 100644 --- a/src/gui/painting/qpdf_p.h +++ b/src/gui/painting/qpdf_p.h @@ -242,7 +242,7 @@ public: void writeHeader(); void writeTail(); - int addImage(const QImage &image, bool *bitmap, qint64 serial_no); + int addImage(const QImage &image, bool *bitmap, bool lossless, qint64 serial_no); int addConstantAlphaObject(int brushAlpha, int penAlpha = 255); int addBrushPattern(const QTransform &matrix, bool *specifyColor, int *gStateObject); diff --git a/src/gui/painting/qpdfwriter.cpp b/src/gui/painting/qpdfwriter.cpp index 6c85d65538..258939a763 100644 --- a/src/gui/painting/qpdfwriter.cpp +++ b/src/gui/painting/qpdfwriter.cpp @@ -76,7 +76,7 @@ public: : QPagedPaintDevicePrivate(), pd(d) {} - virtual ~QPdfPagedPaintDevicePrivate() + ~QPdfPagedPaintDevicePrivate() {} bool setPageLayout(const QPageLayout &newPageLayout) override diff --git a/src/gui/painting/qrgba64.h b/src/gui/painting/qrgba64.h index 0e5344cacb..0e0b567890 100644 --- a/src/gui/painting/qrgba64.h +++ b/src/gui/painting/qrgba64.h @@ -118,7 +118,27 @@ public: Q_DECL_CONSTEXPR quint8 alpha8() const { return div_257(alpha()); } Q_DECL_CONSTEXPR uint toArgb32() const { +#if defined(__cpp_constexpr) && __cpp_constexpr-0 >= 201304 + quint64 br = rgba & Q_UINT64_C(0xffff0000ffff); + quint64 ag = (rgba >> 16) & Q_UINT64_C(0xffff0000ffff); + br += Q_UINT64_C(0x8000000080); + ag += Q_UINT64_C(0x8000000080); + br = (br - ((br >> 8) & Q_UINT64_C(0xffff0000ffff))) >> 8; + ag = (ag - ((ag >> 8) & Q_UINT64_C(0xffff0000ffff))); +#if Q_BYTE_ORDER == Q_BIG_ENDIAN + return ((br << 24) & 0xff000000) + | ((ag >> 24) & 0xff0000) + | ((br >> 24) & 0xff00) + | ((ag >> 8) & 0xff); +#else + return ((ag >> 16) & 0xff000000) + | ((br << 16) & 0xff0000) + | (ag & 0xff00) + | ((br >> 32) & 0xff); +#endif +#else return uint((alpha8() << 24) | (red8() << 16) | (green8() << 8) | blue8()); +#endif } Q_DECL_CONSTEXPR ushort toRgb16() const { @@ -131,11 +151,20 @@ public: return *this; if (isTransparent()) return QRgba64::fromRgba64(0); - const quint32 a = alpha(); - const quint16 r = div_65535(red() * a); - const quint16 g = div_65535(green() * a); - const quint16 b = div_65535(blue() * a); - return fromRgba64(r, g, b, quint16(a)); + const quint64 a = alpha(); + quint64 br = (rgba & Q_UINT64_C(0xffff0000ffff)) * a; + quint64 ag = ((rgba >> 16) & Q_UINT64_C(0xffff0000ffff)) * a; + br = (br + ((br >> 16) & Q_UINT64_C(0xffff0000ffff)) + Q_UINT64_C(0x800000008000)); + ag = (ag + ((ag >> 16) & Q_UINT64_C(0xffff0000ffff)) + Q_UINT64_C(0x800000008000)); +#if Q_BYTE_ORDER == Q_BIG_ENDIAN + ag = ag & Q_UINT64_C(0xffff0000ffff0000); + br = (br >> 16) & Q_UINT64_C(0xffff00000000); + return fromRgba64(a | br | ag); +#else + br = (br >> 16) & Q_UINT64_C(0xffff0000ffff); + ag = ag & Q_UINT64_C(0xffff0000); + return fromRgba64((a << 48) | br | ag); +#endif } Q_DECL_RELAXED_CONSTEXPR QRgba64 unpremultiplied() const @@ -163,7 +192,6 @@ private: static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE quint8 div_257_floor(uint x) { return quint8((x - (x >> 8)) >> 8); } static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE quint8 div_257(quint16 x) { return div_257_floor(x + 128U); } - static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE quint16 div_65535(uint x) { return quint16((x + (x>>16) + 0x8000U) >> 16); } Q_DECL_RELAXED_CONSTEXPR Q_ALWAYS_INLINE QRgba64 unpremultiplied_32bit() const { if (isOpaque() || isTransparent()) diff --git a/src/gui/painting/qstroker.cpp b/src/gui/painting/qstroker.cpp index 4776545be6..0a3d802b21 100644 --- a/src/gui/painting/qstroker.cpp +++ b/src/gui/painting/qstroker.cpp @@ -369,7 +369,8 @@ void QStrokerOps::strokeEllipse(const QRectF &rect, void *data, const QTransform QStroker::QStroker() : m_capStyle(SquareJoin), m_joinStyle(FlatJoin), m_back1X(0), m_back1Y(0), - m_back2X(0), m_back2Y(0) + m_back2X(0), m_back2Y(0), + m_forceOpen(false) { m_strokeWidth = qt_real_to_fixed(1); m_miterLimit = qt_real_to_fixed(2); @@ -748,7 +749,7 @@ template <class Iterator> bool qt_stroke_side(Iterator *it, } } - if (start == prev) { + if (start == prev && !stroker->forceOpen()) { // closed subpath, join first and last point #ifdef QPP_STROKE_DEBUG qDebug("\n ---> (side) closed subpath"); diff --git a/src/gui/painting/qstroker_p.h b/src/gui/painting/qstroker_p.h index 1a7c184e1a..722a0904f3 100644 --- a/src/gui/painting/qstroker_p.h +++ b/src/gui/painting/qstroker_p.h @@ -222,6 +222,9 @@ public: void setMiterLimit(qfixed length) { m_miterLimit = length; } qfixed miterLimit() const { return m_miterLimit; } + void setForceOpen(bool state) { m_forceOpen = state; } + bool forceOpen() { return m_forceOpen; } + void joinPoints(qfixed x, qfixed y, const QLineF &nextLine, LineJoinMode join); inline void emitMoveTo(qfixed x, qfixed y); inline void emitLineTo(qfixed x, qfixed y); @@ -247,6 +250,8 @@ protected: qfixed m_back2X; qfixed m_back2Y; + + bool m_forceOpen; }; class Q_GUI_EXPORT QDashStroker : public QStrokerOps diff --git a/src/gui/painting/qvectorpath_p.h b/src/gui/painting/qvectorpath_p.h index d1b08ed423..1b649a5d2a 100644 --- a/src/gui/painting/qvectorpath_p.h +++ b/src/gui/painting/qvectorpath_p.h @@ -99,7 +99,8 @@ public: // Shape rendering specifiers... OddEvenFill = 0x1000, WindingFill = 0x2000, - ImplicitClose = 0x4000 + ImplicitClose = 0x4000, + ExplicitOpen = 0x8000 }; // ### Falcon: introduca a struct XY for points so lars is not so confused... @@ -124,6 +125,7 @@ public: inline bool isCacheable() const { return m_hints & ShouldUseCacheHint; } inline bool hasImplicitClose() const { return m_hints & ImplicitClose; } + inline bool hasExplicitOpen() const { return m_hints & ExplicitOpen; } inline bool hasWindingFill() const { return m_hints & WindingFill; } inline void makeCacheable() const { m_hints |= ShouldUseCacheHint; m_cache = 0; } @@ -142,7 +144,7 @@ public: case QPaintEngine::ConvexMode: return ConvexPolygonHint | ImplicitClose; case QPaintEngine::OddEvenMode: return PolygonHint | OddEvenFill | ImplicitClose; case QPaintEngine::WindingMode: return PolygonHint | WindingFill | ImplicitClose; - case QPaintEngine::PolylineMode: return PolygonHint; + case QPaintEngine::PolylineMode: return PolygonHint | ExplicitOpen; default: return 0; } } @@ -194,7 +196,7 @@ public: private: - Q_DISABLE_COPY(QVectorPath) + Q_DISABLE_COPY_MOVE(QVectorPath) const QPainterPath::ElementType *m_elements; const qreal *m_points; |