diff options
Diffstat (limited to 'src/gui/painting/qdrawhelper.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 3519 |
1 files changed, 1744 insertions, 1775 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 0f2d163840..bbeb9fd9ea 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2018 The Qt Company Ltd. ** Copyright (C) 2016 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** @@ -73,9 +73,6 @@ enum { half_point = 1 << 15 }; -// must be multiple of 4 for easier SIMD implementations -static const int buffer_size = 2048; - template<QImage::Format> Q_DECL_CONSTEXPR uint redWidth(); template<QImage::Format> Q_DECL_CONSTEXPR uint redShift(); template<QImage::Format> Q_DECL_CONSTEXPR uint greenWidth(); @@ -94,6 +91,10 @@ template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB4444_Premultiplied> template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; } template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; } template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } +template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBX8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } + template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB16>() { return 11; } template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB444>() { return 8; } template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB555>() { return 10; } @@ -103,6 +104,15 @@ template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB4444_Premultiplied> template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8555_Premultiplied>() { return 18; } template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8565_Premultiplied>() { return 19; } template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB6666_Premultiplied>() { return 12; } +#if Q_BYTE_ORDER == Q_BIG_ENDIAN +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 24; } +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 24; } +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; } +#else +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 0; } +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 0; } +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; } +#endif template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB16>() { return 6; } template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB444>() { return 4; } template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB555>() { return 5; } @@ -112,6 +122,10 @@ template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB4444_Premultiplie template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; } template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8565_Premultiplied>() { return 6; } template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } +template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBX8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } + template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB16>() { return 5; } template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB444>() { return 4; } template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB555>() { return 5; } @@ -121,6 +135,15 @@ template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB4444_Premultiplie template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8555_Premultiplied>() { return 13; } template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8565_Premultiplied>() { return 13; } template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB6666_Premultiplied>() { return 6; } +#if Q_BYTE_ORDER == Q_BIG_ENDIAN +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 16; } +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 16; } +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; } +#else +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; } +#endif template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB16>() { return 5; } template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB444>() { return 4; } template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB555>() { return 5; } @@ -130,6 +153,10 @@ template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB4444_Premultiplied template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; } template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; } template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } +template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBX8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } + template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB16>() { return 0; } template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB444>() { return 0; } template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB555>() { return 0; } @@ -139,6 +166,15 @@ template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB4444_Premultiplied template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8555_Premultiplied>() { return 8; } template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8565_Premultiplied>() { return 8; } template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB6666_Premultiplied>() { return 0; } +#if Q_BYTE_ORDER == Q_BIG_ENDIAN +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; } +#else +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 16; } +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 16; } +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; } +#endif template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB16>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB444>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB555>() { return 0; } @@ -148,6 +184,10 @@ template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB4444_Premultiplie template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8555_Premultiplied>() { return 8; } template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8565_Premultiplied>() { return 8; } template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } +template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBX8888>() { return 0; } +template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } + template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB16>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB444>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB555>() { return 0; } @@ -157,138 +197,200 @@ template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB4444_Premultiplie template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8555_Premultiplied>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8565_Premultiplied>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB6666_Premultiplied>() { return 18; } +#if Q_BYTE_ORDER == Q_BIG_ENDIAN +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 0; } +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 0; } +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; } +#else +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 24; } +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 24; } +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; } +#endif -template<QImage::Format> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel(); -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB16>() { return QPixelLayout::BPP16; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB444>() { return QPixelLayout::BPP16; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB555>() { return QPixelLayout::BPP16; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB666>() { return QPixelLayout::BPP24; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB888>() { return QPixelLayout::BPP24; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB4444_Premultiplied>() { return QPixelLayout::BPP16; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8555_Premultiplied>() { return QPixelLayout::BPP24; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8565_Premultiplied>() { return QPixelLayout::BPP24; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB6666_Premultiplied>() { return QPixelLayout::BPP24; } +template<QImage::Format> constexpr QPixelLayout::BPP bitsPerPixel(); +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB16>() { return QPixelLayout::BPP16; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB444>() { return QPixelLayout::BPP16; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB555>() { return QPixelLayout::BPP16; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB666>() { return QPixelLayout::BPP24; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB888>() { return QPixelLayout::BPP24; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB4444_Premultiplied>() { return QPixelLayout::BPP16; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8555_Premultiplied>() { return QPixelLayout::BPP24; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8565_Premultiplied>() { return QPixelLayout::BPP24; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB6666_Premultiplied>() { return QPixelLayout::BPP24; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBX8888>() { return QPixelLayout::BPP32; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888>() { return QPixelLayout::BPP32; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888_Premultiplied>() { return QPixelLayout::BPP32; } -template<QImage::Format Format> -static const uint *QT_FASTCALL convertToRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +typedef const uint *(QT_FASTCALL *FetchPixelsFunc)(uint *buffer, const uchar *src, int index, int count); + +template <QPixelLayout::BPP bpp> static +uint QT_FASTCALL fetchPixel(const uchar *, int) { - auto conversion = [](uint s) { - // MSVC needs these constexpr defined in here otherwise it will create a capture. - Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1); - Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1); - Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1); + Q_UNREACHABLE(); + return 0; +} - Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>(); - Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>(); - Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>(); +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1LSB>(const uchar *src, int index) +{ + return (src[index >> 3] >> (index & 7)) & 1; +} - Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8; - Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8; - Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8; +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1MSB>(const uchar *src, int index) +{ + return (src[index >> 3] >> (~index & 7)) & 1; +} - uint red = (s >> redShift<Format>()) & redMask; - uint green = (s >> greenShift<Format>()) & greenMask; - uint blue = (s >> blueShift<Format>()) & blueMask; +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP8>(const uchar *src, int index) +{ + return src[index]; +} - red = ((red << redLeftShift) | (red >> redRightShift)) << 16; - green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8; - blue = (blue << blueLeftShift) | (blue >> blueRightShift); - return 0xff000000 | red | green | blue; - }; +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP16>(const uchar *src, int index) +{ + return reinterpret_cast<const quint16 *>(src)[index]; +} - UNALIASED_CONVERSION_LOOP(buffer, src, count, conversion); - return buffer; +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP24>(const uchar *src, int index) +{ + return reinterpret_cast<const quint24 *>(src)[index]; } -template<QImage::Format Format> -static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar *src, int index) { - Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1); - Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1); - Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1); + return reinterpret_cast<const uint *>(src)[index]; +} - Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>(); - Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>(); - Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>(); +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP64>(const uchar *src, int index) +{ + // We have to do the conversion in fetch to fit into a 32bit uint + QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index]; + return c.toArgb32(); +} - Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8; - Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8; - Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8; +template <QPixelLayout::BPP bpp> +static quint64 QT_FASTCALL fetchPixel64(const uchar *src, int index) +{ + Q_STATIC_ASSERT(bpp != QPixelLayout::BPP64); + return fetchPixel<bpp>(src, index); +} - for (int i = 0; i < count; ++i) { - uint red = (src[i] >> redShift<Format>()) & redMask; - uint green = (src[i] >> greenShift<Format>()) & greenMask; - uint blue = (src[i] >> blueShift<Format>()) & blueMask; +template <QPixelLayout::BPP width> static +void QT_FASTCALL storePixel(uchar *dest, int index, uint pixel); - red = ((red << redLeftShift) | (red >> redRightShift)); - green = ((green << greenLeftShift) | (green >> greenRightShift)); - blue = (blue << blueLeftShift) | (blue >> blueRightShift); - buffer[i] = QRgba64::fromRgba(red, green, blue, 255); - } +template <> +inline void QT_FASTCALL storePixel<QPixelLayout::BPP16>(uchar *dest, int index, uint pixel) +{ + reinterpret_cast<quint16 *>(dest)[index] = quint16(pixel); +} - return buffer; +template <> +inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index, uint pixel) +{ + reinterpret_cast<quint24 *>(dest)[index] = quint24(pixel); } +typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index); + +static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = { + 0, // BPPNone + fetchPixel<QPixelLayout::BPP1MSB>, // BPP1MSB + fetchPixel<QPixelLayout::BPP1LSB>, // BPP1LSB + fetchPixel<QPixelLayout::BPP8>, // BPP8 + fetchPixel<QPixelLayout::BPP16>, // BPP16 + fetchPixel<QPixelLayout::BPP24>, // BPP24 + fetchPixel<QPixelLayout::BPP32>, // BPP32 + fetchPixel<QPixelLayout::BPP64> // BPP64 +}; + template<QImage::Format Format> -static const uint *QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static Q_ALWAYS_INLINE uint convertPixelToRGB32(uint s) { - Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1); Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1); Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1); Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1); - Q_CONSTEXPR uchar alphaLeftShift = 8 - alphaWidth<Format>(); Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>(); Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>(); Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>(); - Q_CONSTEXPR uchar alphaRightShift = 2 * alphaWidth<Format>() - 8; Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8; Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8; Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8; - Q_CONSTEXPR bool mustMin = (alphaWidth<Format>() != redWidth<Format>()) || - (alphaWidth<Format>() != greenWidth<Format>()) || - (alphaWidth<Format>() != blueWidth<Format>()); + uint red = (s >> redShift<Format>()) & redMask; + uint green = (s >> greenShift<Format>()) & greenMask; + uint blue = (s >> blueShift<Format>()) & blueMask; - if (mustMin) { - for (int i = 0; i < count; ++i) { - uint alpha = (src[i] >> alphaShift<Format>()) & alphaMask; - uint red = (src[i] >> redShift<Format>()) & redMask; - uint green = (src[i] >> greenShift<Format>()) & greenMask; - uint blue = (src[i] >> blueShift<Format>()) & blueMask; - - alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift); - red = qMin(alpha, (red << redLeftShift) | (red >> redRightShift)); - green = qMin(alpha, (green << greenLeftShift) | (green >> greenRightShift)); - blue = qMin(alpha, (blue << blueLeftShift) | (blue >> blueRightShift)); - buffer[i] = (alpha << 24) | (red << 16) | (green << 8) | blue; - } - } else { - for (int i = 0; i < count; ++i) { - uint alpha = (src[i] >> alphaShift<Format>()) & alphaMask; - uint red = (src[i] >> redShift<Format>()) & redMask; - uint green = (src[i] >> greenShift<Format>()) & greenMask; - uint blue = (src[i] >> blueShift<Format>()) & blueMask; - - alpha = ((alpha << alphaLeftShift) | (alpha >> alphaRightShift)) << 24; - red = ((red << redLeftShift) | (red >> redRightShift)) << 16; - green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8; - blue = (blue << blueLeftShift) | (blue >> blueRightShift); - buffer[i] = alpha | red | green | blue; - } + red = ((red << redLeftShift) | (red >> redRightShift)) << 16; + green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8; + blue = (blue << blueLeftShift) | (blue >> blueRightShift); + return 0xff000000 | red | green | blue; +} + +template<QImage::Format Format> +static void QT_FASTCALL convertToRGB32(uint *buffer, int count, const QVector<QRgb> *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToRGB32<Format>(buffer[i]); +} + +#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3 +extern const uint * QT_FASTCALL fetchPixelsBPP24_ssse3(uint *dest, const uchar*src, int index, int count); +#endif + +template<QImage::Format Format> +static const uint *QT_FASTCALL fetchRGBToRGB32(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); +#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3 + if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) { + // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3 + // to vectorize the deforested version below. + fetchPixelsBPP24_ssse3(buffer, src, index, count); + convertToRGB32<Format>(buffer, count, nullptr); + return buffer; } +#endif + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToRGB32<Format>(fetchPixel<BPP>(src, index + i)); + return buffer; +} +template<QImage::Format Format> +static Q_ALWAYS_INLINE QRgba64 convertPixelToRGB64(uint s) +{ + return QRgba64::fromArgb32(convertPixelToRGB32<Format>(s)); +} + +template<QImage::Format Format> +static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToRGB64<Format>(src[i]); return buffer; } template<QImage::Format Format> -static const QRgba64 *QT_FASTCALL convertARGBPMToARGB64PM(QRgba64 *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static const QRgba64 *QT_FASTCALL fetchRGBToRGB64(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToRGB64<Format>(fetchPixel<bitsPerPixel<Format>()>(src, index + i)); + return buffer; +} + +template<QImage::Format Format> +static Q_ALWAYS_INLINE uint convertPixelToARGB32PM(uint s) { Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1); Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1); @@ -309,62 +411,101 @@ static const QRgba64 *QT_FASTCALL convertARGBPMToARGB64PM(QRgba64 *buffer, const (alphaWidth<Format>() != greenWidth<Format>()) || (alphaWidth<Format>() != blueWidth<Format>()); + uint alpha = (s >> alphaShift<Format>()) & alphaMask; + uint red = (s >> redShift<Format>()) & redMask; + uint green = (s >> greenShift<Format>()) & greenMask; + uint blue = (s >> blueShift<Format>()) & blueMask; + + alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift); + red = (red << redLeftShift) | (red >> redRightShift); + green = (green << greenLeftShift) | (green >> greenRightShift); + blue = (blue << blueLeftShift) | (blue >> blueRightShift); + if (mustMin) { - for (int i = 0; i < count; ++i) { - uint alpha = (src[i] >> alphaShift<Format>()) & alphaMask; - uint red = (src[i] >> redShift<Format>()) & redMask; - uint green = (src[i] >> greenShift<Format>()) & greenMask; - uint blue = (src[i] >> blueShift<Format>()) & blueMask; - - alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift); - red = qMin(alpha, (red << redLeftShift) | (red >> redRightShift)); - green = qMin(alpha, (green << greenLeftShift) | (green >> greenRightShift)); - blue = qMin(alpha, (blue << blueLeftShift) | (blue >> blueRightShift)); - buffer[i] = QRgba64::fromRgba(red, green, blue, alpha); - } - } else { - for (int i = 0; i < count; ++i) { - uint alpha = (src[i] >> alphaShift<Format>()) & alphaMask; - uint red = (src[i] >> redShift<Format>()) & redMask; - uint green = (src[i] >> greenShift<Format>()) & greenMask; - uint blue = (src[i] >> blueShift<Format>()) & blueMask; - - alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift); - red = (red << redLeftShift) | (red >> redRightShift); - green = (green << greenLeftShift) | (green >> greenRightShift); - blue = (blue << blueLeftShift) | (blue >> blueRightShift); - buffer[i] = QRgba64::fromRgba(red, green, blue, alpha); - } + red = qMin(alpha, red); + green = qMin(alpha, green); + blue = qMin(alpha, blue); } + return (alpha << 24) | (red << 16) | (green << 8) | blue; +} + +template<QImage::Format Format> +static void QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToARGB32PM<Format>(buffer[i]); +} + +template<QImage::Format Format> +static const uint *QT_FASTCALL fetchARGBPMToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); +#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3 + if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) { + // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3 + // to vectorize the deforested version below. + fetchPixelsBPP24_ssse3(buffer, src, index, count); + convertARGBPMToARGB32PM<Format>(buffer, count, nullptr); + return buffer; + } +#endif + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToARGB32PM<Format>(fetchPixel<BPP>(src, index + i)); + return buffer; +} + +template<QImage::Format Format> +static Q_ALWAYS_INLINE QRgba64 convertPixelToRGBA64PM(uint s) +{ + return QRgba64::fromArgb32(convertPixelToARGB32PM<Format>(s)); +} + +template<QImage::Format Format> +static const QRgba64 *QT_FASTCALL convertARGBPMToRGBA64PM(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToRGB64<Format>(src[i]); + return buffer; +} + +template<QImage::Format Format> +static const QRgba64 *QT_FASTCALL fetchARGBPMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>(); + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToRGBA64PM<Format>(fetchPixel<bpp>(src, index + i)); return buffer; } template<QImage::Format Format, bool fromRGB> -static const uint *QT_FASTCALL convertRGBFromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *dither) +static void QT_FASTCALL storeRGBFromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *dither) { Q_CONSTEXPR uchar rWidth = redWidth<Format>(); Q_CONSTEXPR uchar gWidth = greenWidth<Format>(); Q_CONSTEXPR uchar bWidth = blueWidth<Format>(); + constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); // RGB32 -> RGB888 is not a precision loss. if (!dither || (rWidth == 8 && gWidth == 8 && bWidth == 8)) { - auto conversion = [](uint s) { - const uint c = fromRGB ? s : qUnpremultiply(s); - Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1; - Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1; - Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1; - Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>(); - Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>(); - Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>(); + Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1; + Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1; + Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1; + Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>(); + Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>(); + Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>(); + for (int i = 0; i < count; ++i) { + const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]); const uint r = ((c >> rRightShift) & rMask) << redShift<Format>(); const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>(); const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>(); - return r | g | b; + storePixel<BPP>(dest, index + i, r | g | b); }; - UNALIASED_CONVERSION_LOOP(buffer, src, count, conversion); } else { // We do ordered dither by using a rounding conversion, but instead of // adding half of input precision, we add the adjusted result from the @@ -384,38 +525,39 @@ static const uint *QT_FASTCALL convertRGBFromARGB32PM(uint *buffer, const uint * r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth); g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth); b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth); - buffer[i] = (r << redShift<Format>()) - | (g << greenShift<Format>()) - | (b << blueShift<Format>()); + const uint s = (r << redShift<Format>()) + | (g << greenShift<Format>()) + | (b << blueShift<Format>()); + storePixel<BPP>(dest, index + i, s); } } - return buffer; } template<QImage::Format Format, bool fromRGB> -static const uint *QT_FASTCALL convertARGBPMFromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *dither) +static void QT_FASTCALL storeARGBPMFromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *dither) { + constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); if (!dither) { - auto conversion = [](uint c) { - Q_CONSTEXPR uint aMask = (1 << alphaWidth<Format>()) - 1; - Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1; - Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1; - Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1; - - Q_CONSTEXPR uchar aRightShift = 32 - alphaWidth<Format>(); - Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>(); - Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>(); - Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>(); - - Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>(); + Q_CONSTEXPR uint aMask = (1 << alphaWidth<Format>()) - 1; + Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1; + Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1; + Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1; + + Q_CONSTEXPR uchar aRightShift = 32 - alphaWidth<Format>(); + Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>(); + Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>(); + Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>(); + + Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>(); + for (int i = 0; i < count; ++i) { + const uint c = src[i]; const uint a = fromRGB ? aOpaque : (((c >> aRightShift) & aMask) << alphaShift<Format>()); const uint r = ((c >> rRightShift) & rMask) << redShift<Format>(); const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>(); const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>(); - return a | r | g | b; + storePixel<BPP>(dest, index + i, a | r | g | b); }; - UNALIASED_CONVERSION_LOOP(buffer, src, count, conversion); } else { Q_CONSTEXPR uchar aWidth = alphaWidth<Format>(); Q_CONSTEXPR uchar rWidth = redWidth<Format>(); @@ -441,59 +583,147 @@ static const uint *QT_FASTCALL convertARGBPMFromARGB32PM(uint *buffer, const uin r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth); g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth); b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth); - buffer[i] = (a << alphaShift<Format>()) - | (r << redShift<Format>()) - | (g << greenShift<Format>()) - | (b << blueShift<Format>()); + uint s = (a << alphaShift<Format>()) + | (r << redShift<Format>()) + | (g << greenShift<Format>()) + | (b << blueShift<Format>()); + storePixel<BPP>(dest, index + i, s); } } - return buffer; } +template<QImage::Format Format> +static void QT_FASTCALL rbSwap(uchar *dst, const uchar *src, int count) +{ + Q_CONSTEXPR uchar aWidth = alphaWidth<Format>(); + Q_CONSTEXPR uchar aShift = alphaShift<Format>(); + Q_CONSTEXPR uchar rWidth = redWidth<Format>(); + Q_CONSTEXPR uchar rShift = redShift<Format>(); + Q_CONSTEXPR uchar gWidth = greenWidth<Format>(); + Q_CONSTEXPR uchar gShift = greenShift<Format>(); + Q_CONSTEXPR uchar bWidth = blueWidth<Format>(); + Q_CONSTEXPR uchar bShift = blueShift<Format>(); #ifdef Q_COMPILER_CONSTEXPR + Q_STATIC_ASSERT(rWidth == bWidth); +#endif + Q_CONSTEXPR uint redBlueMask = (1 << rWidth) - 1; + Q_CONSTEXPR uint alphaGreenMask = (((1 << aWidth) - 1) << aShift) + | (((1 << gWidth) - 1) << gShift); + constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>(); + + for (int i = 0; i < count; ++i) { + const uint c = fetchPixel<bpp>(src, i); + const uint r = (c >> rShift) & redBlueMask; + const uint b = (c >> bShift) & redBlueMask; + const uint t = (c & alphaGreenMask) + | (r << bShift) + | (b << rShift); + storePixel<bpp>(dst, i, t); + } +} + +static void QT_FASTCALL rbSwap_rgb32(uchar *d, const uchar *s, int count) +{ + const uint *src = reinterpret_cast<const uint *>(s); + uint *dest = reinterpret_cast<uint *>(d); + for (int i = 0; i < count; ++i) { + const uint c = src[i]; + const uint ag = c & 0xff00ff00; + const uint rb = c & 0x00ff00ff; + dest[i] = ag | (rb << 16) | (rb >> 16); + } +} + +#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN +template<> +void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count) +{ + return rbSwap_rgb32(d, s, count); +} +#else +template<> +void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count) +{ + const uint *src = reinterpret_cast<const uint *>(s); + uint *dest = reinterpret_cast<uint *>(d); + for (int i = 0; i < count; ++i) { + const uint c = src[i]; + const uint rb = c & 0xff00ff00; + const uint ga = c & 0x00ff00ff; + dest[i] = ga | (rb << 16) | (rb >> 16); + } +} +#endif + +static void QT_FASTCALL rbSwap_rgb30(uchar *d, const uchar *s, int count) +{ + const uint *src = reinterpret_cast<const uint *>(s); + uint *dest = reinterpret_cast<uint *>(d); + for (int i = 0; i < count; ++i) + dest[i] = qRgbSwapRgb30(src[i]); +} template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutRGB() { return QPixelLayout{ - uchar(redWidth<Format>()), uchar(redShift<Format>()), - uchar(greenWidth<Format>()), uchar(greenShift<Format>()), - uchar(blueWidth<Format>()), uchar(blueShift<Format>()), - 0, 0, - false, bitsPerPixel<Format>(), + false, + false, + bitsPerPixel<Format>(), + rbSwap<Format>, convertToRGB32<Format>, - convertRGBFromARGB32PM<Format, false>, - convertRGBFromARGB32PM<Format, true>, - convertToRGB64<Format> + convertToRGB64<Format>, + fetchRGBToRGB32<Format>, + fetchRGBToRGB64<Format>, + storeRGBFromARGB32PM<Format, false>, + storeRGBFromARGB32PM<Format, true> }; } template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutARGBPM() { return QPixelLayout{ - uchar(redWidth<Format>()), uchar(redShift<Format>()), - uchar(greenWidth<Format>()), uchar(greenShift<Format>()), - uchar(blueWidth<Format>()), uchar(blueShift<Format>()), - uchar(alphaWidth<Format>()), uchar(alphaShift<Format>()), - true, bitsPerPixel<Format>(), + true, + true, + bitsPerPixel<Format>(), + rbSwap<Format>, convertARGBPMToARGB32PM<Format>, - convertARGBPMFromARGB32PM<Format, false>, - convertARGBPMFromARGB32PM<Format, true>, - convertARGBPMToARGB64PM<Format> + convertARGBPMToRGBA64PM<Format>, + fetchARGBPMToARGB32PM<Format>, + fetchARGBPMToRGBA64PM<Format>, + storeARGBPMFromARGB32PM<Format, false>, + storeARGBPMFromARGB32PM<Format, true> }; } -#endif - -// To convert in place, let 'dest' and 'src' be the same. -static const uint *QT_FASTCALL convertIndexedToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *clut, QDitherInfo *) +static void QT_FASTCALL convertIndexedToARGB32PM(uint *buffer, int count, const QVector<QRgb> *clut) { for (int i = 0; i < count; ++i) - buffer[i] = qPremultiply(clut->at(src[i])); + buffer[i] = qPremultiply(clut->at(buffer[i])); +} + +template<QPixelLayout::BPP BPP> +static const uint *QT_FASTCALL fetchIndexedToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *clut, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) { + const uint s = fetchPixel<BPP>(src, index + i); + buffer[i] = qPremultiply(clut->at(s)); + } + return buffer; +} + +template<QPixelLayout::BPP BPP> +static const QRgba64 *QT_FASTCALL fetchIndexedToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *clut, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) { + const uint s = fetchPixel<BPP>(src, index + i); + buffer[i] = QRgba64::fromArgb32(clut->at(s)).premultiplied(); + } return buffer; } -static const QRgba64 *QT_FASTCALL convertIndexedToARGB64PM(QRgba64 *buffer, const uint *src, int count, +static const QRgba64 *QT_FASTCALL convertIndexedToRGBA64PM(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *clut, QDitherInfo *) { for (int i = 0; i < count; ++i) @@ -501,44 +731,77 @@ static const QRgba64 *QT_FASTCALL convertIndexedToARGB64PM(QRgba64 *buffer, cons return buffer; } -static const uint *QT_FASTCALL convertPassThrough(uint *, const uint *src, int, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL convertPassThrough(uint *, int, const QVector<QRgb> *) { - return src; } -static const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static const uint *QT_FASTCALL fetchPassThrough(uint *, const uchar *src, int index, int, + const QVector<QRgb> *, QDitherInfo *) { - return qt_convertARGB32ToARGB32PM(buffer, src, count); + return reinterpret_cast<const uint *>(src) + index; } -static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static const QRgba64 *QT_FASTCALL fetchPassThrough64(QRgba64 *, const uchar *src, int index, int, + const QVector<QRgb> *, QDitherInfo *) { - UNALIASED_CONVERSION_LOOP(buffer, src, count, RGBA2ARGB); - return buffer; + return reinterpret_cast<const QRgba64 *>(src) + index; } -static const uint *QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storePassThrough(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - return qt_convertRGBA8888ToARGB32PM(buffer, src, count); + uint *d = reinterpret_cast<uint *>(dest) + index; + if (d != src) + memcpy(d, src, count * sizeof(uint)); } -static const uint *QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) +{ + qt_convertARGB32ToARGB32PM(buffer, buffer, count); +} + +static const uint *QT_FASTCALL fetchARGB32ToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + return qt_convertARGB32ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count); +} + +static void QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) { for (int i = 0; i < count; ++i) - buffer[i] = qRgba(0, 0, 0, src[i]); + buffer[i] = RGBA2ARGB(buffer[i]); +} + +static const uint *QT_FASTCALL fetchRGBA8888PMToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + const uint *s = reinterpret_cast<const uint *>(src) + index; + UNALIASED_CONVERSION_LOOP(buffer, s, count, RGBA2ARGB); return buffer; } -static const uint *QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) +{ + qt_convertRGBA8888ToARGB32PM(buffer, buffer, count); +} + +static const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + return qt_convertRGBA8888ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count); +} + +static void QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, int count, const QVector<QRgb> *) { for (int i = 0; i < count; ++i) - buffer[i] = qRgb(src[i], src[i], src[i]); + buffer[i] = qRgba(0, 0, 0, buffer[i]); +} + +static const uint *QT_FASTCALL fetchAlpha8ToRGB32(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = qRgba(0, 0, 0, src[index + i]); return buffer; } @@ -549,6 +812,31 @@ static const QRgba64 *QT_FASTCALL convertAlpha8ToRGB64(QRgba64 *buffer, const ui buffer[i] = QRgba64::fromRgba(0, 0, 0, src[i]); return buffer; } +static const QRgba64 *QT_FASTCALL fetchAlpha8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = QRgba64::fromRgba(0, 0, 0, src[index + i]); + return buffer; +} + +static void QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, int count, const QVector<QRgb> *) +{ + for (int i = 0; i < count; ++i) { + const uint s = buffer[i]; + buffer[i] = qRgb(s, s, s); + } +} + +static const uint *QT_FASTCALL fetchGrayscale8ToRGB32(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) { + const uint s = src[index + i]; + buffer[i] = qRgb(s, s, s); + } + return buffer; +} static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) @@ -558,24 +846,33 @@ static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, cons return buffer; } -static const uint *QT_FASTCALL convertARGB32FromARGB32PM(uint *buffer, const uint *src, int count, +static const QRgba64 *QT_FASTCALL fetchGrayscale8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count, const QVector<QRgb> *, QDitherInfo *) { - for (int i = 0; i < count; ++i) - buffer[i] = qUnpremultiply(src[i]); + for (int i = 0; i < count; ++i) { + const uint s = src[index + i]; + buffer[i] = QRgba64::fromRgba(s, s, s, 255); + } return buffer; } -static const uint *QT_FASTCALL convertRGBA8888PMFromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeARGB32FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - UNALIASED_CONVERSION_LOOP(buffer, src, count, ARGB2RGBA); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return qUnpremultiply(c); }); +} + +static void QT_FASTCALL storeRGBA8888PMFromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, ARGB2RGBA); } #ifdef __SSE2__ template<bool RGBA, bool maskAlpha> -static inline void qConvertARGB32PMToARGB64PM_sse2(QRgba64 *buffer, const uint *src, int count) +static inline void qConvertARGB32PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count) { if (count <= 0) return; @@ -618,6 +915,66 @@ static inline void qConvertARGB32PMToARGB64PM_sse2(QRgba64 *buffer, const uint * *buffer++ = QRgba64::fromArgb32(s); } } + +template<QtPixelOrder PixelOrder> +static inline void qConvertRGBA64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *buffer, int count) +{ + const __m128i gmask = _mm_set1_epi32(0x000ffc00); + const __m128i cmask = _mm_set1_epi32(0x000003ff); + int i = 0; + __m128i vr, vg, vb, va; + for (; i < count && uintptr_t(buffer) & 0xF; ++i) { + *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); + } + + for (; i < count-15; i += 16) { + // Repremultiplying is really expensive and hard to do in SIMD without AVX2, + // so we try to avoid it by checking if it is needed 16 samples at a time. + __m128i vOr = _mm_set1_epi32(0); + __m128i vAnd = _mm_set1_epi32(0xffffffff); + for (int j = 0; j < 16; j += 2) { + __m128i vs = _mm_load_si128((const __m128i*)(buffer + j)); + vOr = _mm_or_si128(vOr, vs); + vAnd = _mm_and_si128(vAnd, vs); + } + const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7)); + const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7)); + + if (andAlpha == 0xffff) { + for (int j = 0; j < 16; j += 2) { + __m128i vs = _mm_load_si128((const __m128i*)buffer); + buffer += 2; + vr = _mm_srli_epi64(vs, 6); + vg = _mm_srli_epi64(vs, 16 + 6 - 10); + vb = _mm_srli_epi64(vs, 32 + 6); + vr = _mm_and_si128(vr, cmask); + vg = _mm_and_si128(vg, gmask); + vb = _mm_and_si128(vb, cmask); + va = _mm_srli_epi64(vs, 48 + 14); + if (PixelOrder == PixelOrderRGB) + vr = _mm_slli_epi32(vr, 20); + else + vb = _mm_slli_epi32(vb, 20); + va = _mm_slli_epi32(va, 30); + __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va)); + vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0)); + _mm_storel_epi64((__m128i*)dest, vd); + dest += 2; + } + } else if (orAlpha == 0) { + for (int j = 0; j < 16; ++j) { + *dest++ = 0; + buffer++; + } + } else { + for (int j = 0; j < 16; ++j) + *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); + } + } + + SIMD_EPILOGUE(i, count, 15) + *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); +} #elif defined(__ARM_NEON__) template<bool RGBA, bool maskAlpha> static inline void qConvertARGB32PMToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count) @@ -671,7 +1028,7 @@ static const QRgba64 *QT_FASTCALL convertRGB32ToRGB64(QRgba64 *buffer, const uin const QVector<QRgb> *, QDitherInfo *) { #ifdef __SSE2__ - qConvertARGB32PMToARGB64PM_sse2<false, true>(buffer, src, count); + qConvertARGB32PMToRGBA64PM_sse2<false, true>(buffer, src, count); #elif defined(__ARM_NEON__) qConvertARGB32PMToRGBA64PM_neon<false, true>(buffer, src, count); #else @@ -681,11 +1038,17 @@ static const QRgba64 *QT_FASTCALL convertRGB32ToRGB64(QRgba64 *buffer, const uin return buffer; } -static const QRgba64 *QT_FASTCALL convertARGB32ToARGB64PM(QRgba64 *buffer, const uint *src, int count, +static const QRgba64 *QT_FASTCALL fetchRGB32ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + return convertRGB32ToRGB64(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); +} + +static const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { #ifdef __SSE2__ - qConvertARGB32PMToARGB64PM_sse2<false, false>(buffer, src, count); + qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count); for (int i = 0; i < count; ++i) buffer[i] = buffer[i].premultiplied(); #elif defined(__ARM_NEON__) @@ -699,11 +1062,17 @@ static const QRgba64 *QT_FASTCALL convertARGB32ToARGB64PM(QRgba64 *buffer, const return buffer; } -static const QRgba64 *QT_FASTCALL convertARGB32PMToARGB64PM(QRgba64 *buffer, const uint *src, int count, +static const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + return convertARGB32ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); +} + +static const QRgba64 *QT_FASTCALL convertARGB32PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { #ifdef __SSE2__ - qConvertARGB32PMToARGB64PM_sse2<false, false>(buffer, src, count); + qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count); #elif defined(__ARM_NEON__) qConvertARGB32PMToRGBA64PM_neon<false, false>(buffer, src, count); #else @@ -713,11 +1082,36 @@ static const QRgba64 *QT_FASTCALL convertARGB32PMToARGB64PM(QRgba64 *buffer, con return buffer; } -static const QRgba64 *QT_FASTCALL convertRGBA8888ToARGB64PM(QRgba64 *buffer, const uint *src, int count, +static const QRgba64 *QT_FASTCALL fetchARGB32PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + return convertARGB32PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); +} + +static void convertRGBA64ToRGBA64PM(QRgba64 *buffer, int count) +{ + for (int i = 0; i < count; ++i) + buffer[i] = buffer[i].premultiplied(); +} + +static void convertRGBA64PMToRGBA64PM(QRgba64 *, int) +{ +} + +static const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index; + for (int i = 0; i < count; ++i) + buffer[i] = QRgba64::fromRgba64(s[i]).premultiplied(); + return buffer; +} + +static const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { #ifdef __SSE2__ - qConvertARGB32PMToARGB64PM_sse2<true, false>(buffer, src, count); + qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count); for (int i = 0; i < count; ++i) buffer[i] = buffer[i].premultiplied(); #elif defined(__ARM_NEON__) @@ -731,11 +1125,17 @@ static const QRgba64 *QT_FASTCALL convertRGBA8888ToARGB64PM(QRgba64 *buffer, con return buffer; } -static const QRgba64 *QT_FASTCALL convertRGBA8888PMToARGB64PM(QRgba64 *buffer, const uint *src, int count, +static const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + return convertRGBA8888ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); +} + +static const QRgba64 *QT_FASTCALL convertRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { #ifdef __SSE2__ - qConvertARGB32PMToARGB64PM_sse2<true, false>(buffer, src, count); + qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count); #elif defined(__ARM_NEON__) qConvertARGB32PMToRGBA64PM_neon<true, false>(buffer, src, count); #else @@ -745,33 +1145,45 @@ static const QRgba64 *QT_FASTCALL convertRGBA8888PMToARGB64PM(QRgba64 *buffer, c return buffer; } -static const uint *QT_FASTCALL convertRGBA8888FromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static const QRgba64 *QT_FASTCALL fetchRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - for (int i = 0; i < count; ++i) - buffer[i] = ARGB2RGBA(qUnpremultiply(src[i])); - return buffer; + return convertRGBA8888PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); } -static const uint *QT_FASTCALL convertRGBXFromRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeRGBA8888FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - UNALIASED_CONVERSION_LOOP(buffer, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | c); }); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(qUnpremultiply(c)); }); } -static const uint *QT_FASTCALL convertRGBXFromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeRGBXFromRGB32(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | c); }); +} + +static void QT_FASTCALL storeRGBXFromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | qUnpremultiply(c)); }); +} + +template<QtPixelOrder PixelOrder> +static void QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) { for (int i = 0; i < count; ++i) - buffer[i] = ARGB2RGBA(0xff000000 | qUnpremultiply(src[i])); - return buffer; + buffer[i] = qConvertA2rgb30ToArgb32<PixelOrder>(buffer[i]); } template<QtPixelOrder PixelOrder> -static const uint *QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *dither) +static const uint *QT_FASTCALL fetchA2RGB30PMToARGB32PM(uint *buffer, const uchar *s, int index, int count, + const QVector<QRgb> *, QDitherInfo *dither) { + const uint *src = reinterpret_cast<const uint *>(s) + index; if (!dither) { UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertA2rgb30ToArgb32<PixelOrder>); } else { @@ -796,7 +1208,7 @@ static const uint *QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, const ui #ifdef __SSE2__ template<QtPixelOrder PixelOrder> -static inline void qConvertA2RGB30PMToARGB64PM_sse2(QRgba64 *buffer, const uint *src, int count) +static inline void qConvertA2RGB30PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count) { if (count <= 0) return; @@ -839,11 +1251,11 @@ static inline void qConvertA2RGB30PMToARGB64PM_sse2(QRgba64 *buffer, const uint #endif template<QtPixelOrder PixelOrder> -static const QRgba64 *QT_FASTCALL convertA2RGB30PMToARGB64PM(QRgba64 *buffer, const uint *src, int count, +static const QRgba64 *QT_FASTCALL convertA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { #ifdef __SSE2__ - qConvertA2RGB30PMToARGB64PM_sse2<PixelOrder>(buffer, src, count); + qConvertA2RGB30PMToRGBA64PM_sse2<PixelOrder>(buffer, src, count); #else for (int i = 0; i < count; ++i) buffer[i] = qConvertA2rgb30ToRgb64<PixelOrder>(src[i]); @@ -852,290 +1264,334 @@ static const QRgba64 *QT_FASTCALL convertA2RGB30PMToARGB64PM(QRgba64 *buffer, co } template<QtPixelOrder PixelOrder> -static const uint *QT_FASTCALL convertA2RGB30PMFromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static const QRgba64 *QT_FASTCALL fetchA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertArgb32ToA2rgb30<PixelOrder>); - return buffer; + return convertA2RGB30PMToRGBA64PM<PixelOrder>(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); } template<QtPixelOrder PixelOrder> -static const uint *QT_FASTCALL convertRGB30FromRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeA2RGB30PMFromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - for (int i = 0; i < count; ++i) - buffer[i] = qConvertRgb32ToRgb30<PixelOrder>(src[i]); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, qConvertArgb32ToA2rgb30<PixelOrder>); } template<QtPixelOrder PixelOrder> -static const uint *QT_FASTCALL convertRGB30FromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeRGB30FromRGB32(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertRgb32ToRgb30<PixelOrder>); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>); } -static const uint *QT_FASTCALL convertAlpha8FromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +template<QtPixelOrder PixelOrder> +static void QT_FASTCALL storeRGB30FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - for (int i = 0; i < count; ++i) - buffer[i] = qAlpha(src[i]); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>); } -static const uint *QT_FASTCALL convertGrayscale8FromRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +template<bool RGBA> +void qt_convertRGBA64ToARGB32(uint *dst, const QRgba64 *src, int count) { - for (int i = 0; i < count; ++i) - buffer[i] = qGray(src[i]); - return buffer; + int i = 0; +#ifdef __SSE2__ + if (((uintptr_t)dst & 0x7) && count > 0) { + uint s = (*src++).toArgb32(); + if (RGBA) + s = ARGB2RGBA(s); + *dst++ = s; + i++; + } + const __m128i vhalf = _mm_set1_epi32(0x80); + const __m128i vzero = _mm_setzero_si128(); + for (; i < count-1; i += 2) { + __m128i vs = _mm_loadu_si128((const __m128i*)src); + src += 2; + if (!RGBA) { + vs = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2)); + vs = _mm_shufflehi_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2)); + } + __m128i v1 = _mm_unpacklo_epi16(vs, vzero); + __m128i v2 = _mm_unpackhi_epi16(vs, vzero); + v1 = _mm_add_epi32(v1, vhalf); + v2 = _mm_add_epi32(v2, vhalf); + v1 = _mm_sub_epi32(v1, _mm_srli_epi32(v1, 8)); + v2 = _mm_sub_epi32(v2, _mm_srli_epi32(v2, 8)); + v1 = _mm_srli_epi32(v1, 8); + v2 = _mm_srli_epi32(v2, 8); + v1 = _mm_packs_epi32(v1, v2); + v1 = _mm_packus_epi16(v1, vzero); + _mm_storel_epi64((__m128i*)(dst), v1); + dst += 2; + } +#endif + for (; i < count; i++) { + uint s = (*src++).toArgb32(); + if (RGBA) + s = ARGB2RGBA(s); + *dst++ = s; + } } +template void qt_convertRGBA64ToARGB32<false>(uint *dst, const QRgba64 *src, int count); +template void qt_convertRGBA64ToARGB32<true>(uint *dst, const QRgba64 *src, int count); -static const uint *QT_FASTCALL convertGrayscale8FromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) + +static void QT_FASTCALL storeAlpha8FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { for (int i = 0; i < count; ++i) - buffer[i] = qGray(qUnpremultiply(src[i])); - return buffer; + dest[index + i] = qAlpha(src[i]); } -template <QPixelLayout::BPP bpp> static -uint QT_FASTCALL fetchPixel(const uchar *, int) +static void QT_FASTCALL storeGrayscale8FromRGB32(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - Q_UNREACHABLE(); - return 0; + for (int i = 0; i < count; ++i) + dest[index + i] = qGray(src[i]); } -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1LSB>(const uchar *src, int index) +static void QT_FASTCALL storeGrayscale8FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - return (src[index >> 3] >> (index & 7)) & 1; + for (int i = 0; i < count; ++i) + dest[index + i] = qGray(qUnpremultiply(src[i])); } -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1MSB>(const uchar *src, int index) +static const uint *QT_FASTCALL fetchRGB64ToRGB32(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - return (src[index >> 3] >> (~index & 7)) & 1; + const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index; + for (int i = 0; i < count; ++i) + buffer[i] = toArgb32(s[i]); + return buffer; } -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP8>(const uchar *src, int index) +static void QT_FASTCALL storeRGB64FromRGB32(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - return src[index]; + QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index; + for (int i = 0; i < count; ++i) + d[i] = QRgba64::fromArgb32(src[i]); } -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP16>(const uchar *src, int index) +static const uint *QT_FASTCALL fetchRGBA64ToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - return reinterpret_cast<const quint16 *>(src)[index]; + const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index; + for (int i = 0; i < count; ++i) + buffer[i] = toArgb32(s[i].premultiplied()); + return buffer; } -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP24>(const uchar *src, int index) +static void QT_FASTCALL storeRGBA64FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - return reinterpret_cast<const quint24 *>(src)[index]; + QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index; + for (int i = 0; i < count; ++i) + d[i] = QRgba64::fromArgb32(src[i]).unpremultiplied(); } -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar *src, int index) -{ - return reinterpret_cast<const uint *>(src)[index]; -} +// Note: +// convertToArgb32() assumes that no color channel is less than 4 bits. +// storeRGBFromARGB32PM() assumes that no color channel is more than 8 bits. +// QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits. +QPixelLayout qPixelLayouts[QImage::NImageFormats] = { + { false, false, QPixelLayout::BPPNone, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }, // Format_Invalid + { false, false, QPixelLayout::BPP1MSB, nullptr, + convertIndexedToARGB32PM, convertIndexedToRGBA64PM, + fetchIndexedToARGB32PM<QPixelLayout::BPP1MSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1MSB>, + nullptr, nullptr }, // Format_Mono + { false, false, QPixelLayout::BPP1LSB, nullptr, + convertIndexedToARGB32PM, convertIndexedToRGBA64PM, + fetchIndexedToARGB32PM<QPixelLayout::BPP1LSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1LSB>, + nullptr, nullptr }, // Format_MonoLSB + { false, false, QPixelLayout::BPP8, nullptr, + convertIndexedToARGB32PM, convertIndexedToRGBA64PM, + fetchIndexedToARGB32PM<QPixelLayout::BPP8>, fetchIndexedToRGBA64PM<QPixelLayout::BPP8>, + nullptr, nullptr }, // Format_Indexed8 + // Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong, + // but everywhere this generic conversion would be wrong is currently overloaded. + { false, false, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough, + convertRGB32ToRGB64, fetchPassThrough, fetchRGB32ToRGB64, storePassThrough, storePassThrough }, // Format_RGB32 + { true, false, QPixelLayout::BPP32, rbSwap_rgb32, convertARGB32ToARGB32PM, + convertARGB32ToRGBA64PM, fetchARGB32ToARGB32PM, fetchARGB32ToRGBA64PM, storeARGB32FromARGB32PM, storePassThrough }, // Format_ARGB32 + { true, true, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough, + convertARGB32PMToRGBA64PM, fetchPassThrough, fetchARGB32PMToRGBA64PM, storePassThrough, storePassThrough }, // Format_ARGB32_Premultiplied + pixelLayoutRGB<QImage::Format_RGB16>(), + pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(), + pixelLayoutRGB<QImage::Format_RGB666>(), + pixelLayoutARGBPM<QImage::Format_ARGB6666_Premultiplied>(), + pixelLayoutRGB<QImage::Format_RGB555>(), + pixelLayoutARGBPM<QImage::Format_ARGB8555_Premultiplied>(), + pixelLayoutRGB<QImage::Format_RGB888>(), + pixelLayoutRGB<QImage::Format_RGB444>(), + pixelLayoutARGBPM<QImage::Format_ARGB4444_Premultiplied>(), + { false, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM, + convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBXFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBX8888 + { true, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888ToARGB32PM, + convertRGBA8888ToRGBA64PM, fetchRGBA8888ToARGB32PM, fetchRGBA8888ToRGBA64PM, storeRGBA8888FromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888 + { true, true, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM, + convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBA8888PMFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888_Premultiplied + { false, false, QPixelLayout::BPP32, rbSwap_rgb30, + convertA2RGB30PMToARGB32PM<PixelOrderBGR>, + convertA2RGB30PMToRGBA64PM<PixelOrderBGR>, + fetchA2RGB30PMToARGB32PM<PixelOrderBGR>, + fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>, + storeRGB30FromARGB32PM<PixelOrderBGR>, + storeRGB30FromRGB32<PixelOrderBGR> + }, // Format_BGR30 + { true, true, QPixelLayout::BPP32, rbSwap_rgb30, + convertA2RGB30PMToARGB32PM<PixelOrderBGR>, + convertA2RGB30PMToRGBA64PM<PixelOrderBGR>, + fetchA2RGB30PMToARGB32PM<PixelOrderBGR>, + fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>, + storeA2RGB30PMFromARGB32PM<PixelOrderBGR>, + storeRGB30FromRGB32<PixelOrderBGR> + }, // Format_A2BGR30_Premultiplied + { false, false, QPixelLayout::BPP32, rbSwap_rgb30, + convertA2RGB30PMToARGB32PM<PixelOrderRGB>, + convertA2RGB30PMToRGBA64PM<PixelOrderRGB>, + fetchA2RGB30PMToARGB32PM<PixelOrderRGB>, + fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>, + storeRGB30FromARGB32PM<PixelOrderRGB>, + storeRGB30FromRGB32<PixelOrderRGB> + }, // Format_RGB30 + { true, true, QPixelLayout::BPP32, rbSwap_rgb30, + convertA2RGB30PMToARGB32PM<PixelOrderRGB>, + convertA2RGB30PMToRGBA64PM<PixelOrderRGB>, + fetchA2RGB30PMToARGB32PM<PixelOrderRGB>, + fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>, + storeA2RGB30PMFromARGB32PM<PixelOrderRGB>, + storeRGB30FromRGB32<PixelOrderRGB> + }, // Format_A2RGB30_Premultiplied + { true, true, QPixelLayout::BPP8, nullptr, + convertAlpha8ToRGB32, convertAlpha8ToRGB64, + fetchAlpha8ToRGB32, fetchAlpha8ToRGB64, + storeAlpha8FromARGB32PM, nullptr }, // Format_Alpha8 + { false, false, QPixelLayout::BPP8, nullptr, + convertGrayscale8ToRGB32, convertGrayscale8ToRGB64, + fetchGrayscale8ToRGB32, fetchGrayscale8ToRGB64, + storeGrayscale8FromARGB32PM, storeGrayscale8FromRGB32 }, // Format_Grayscale8 + { false, false, QPixelLayout::BPP64, nullptr, + convertPassThrough, nullptr, + fetchRGB64ToRGB32, fetchPassThrough64, + storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBX64 + { true, false, QPixelLayout::BPP64, nullptr, + convertARGB32ToARGB32PM, nullptr, + fetchRGBA64ToARGB32PM, fetchRGBA64ToRGBA64PM, + storeRGBA64FromARGB32PM, storeRGB64FromRGB32 }, // Format_RGBA64 + { true, true, QPixelLayout::BPP64, nullptr, + convertPassThrough, nullptr, + fetchRGB64ToRGB32, fetchPassThrough64, + storeRGB64FromRGB32, storeRGB64FromRGB32 } // Format_RGBA64_Premultiplied +}; -template <QPixelLayout::BPP bpp> -inline const uint *QT_FASTCALL fetchPixels(uint *buffer, const uchar *src, int index, int count) -{ - for (int i = 0; i < count; ++i) - buffer[i] = fetchPixel<bpp>(src, index + i); - return buffer; -} +Q_STATIC_ASSERT(sizeof(qPixelLayouts) / sizeof(*qPixelLayouts) == QImage::NImageFormats); -template <> -inline const uint *QT_FASTCALL fetchPixels<QPixelLayout::BPP32>(uint *, const uchar *src, int index, int) +static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length) { - return reinterpret_cast<const uint *>(src) + index; + for (int i = 0; i < length; ++i) { + dest[i] = toArgb32(src[i]); + } } -template <QPixelLayout::BPP width> static -void QT_FASTCALL storePixel(uchar *dest, int index, uint pixel); - -template <> -inline void QT_FASTCALL storePixel<QPixelLayout::BPP1LSB>(uchar *dest, int index, uint pixel) +template<QImage::Format format> +static void QT_FASTCALL storeGenericFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, + const QVector<QRgb> *clut, QDitherInfo *dither) { - if (pixel) - dest[index >> 3] |= 1 << (index & 7); - else - dest[index >> 3] &= ~(1 << (index & 7)); + uint buffer[BufferSize]; + convertFromRgb64(buffer, src, count); + qPixelLayouts[format].storeFromARGB32PM(dest, buffer, index, count, clut, dither); } -template <> -inline void QT_FASTCALL storePixel<QPixelLayout::BPP1MSB>(uchar *dest, int index, uint pixel) +static void QT_FASTCALL storeARGB32FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - if (pixel) - dest[index >> 3] |= 1 << (~index & 7); - else - dest[index >> 3] &= ~(1 << (~index & 7)); + uint *d = (uint*)dest + index; + for (int i = 0; i < count; ++i) + d[i] = toArgb32(src[i].unpremultiplied()); } -template <> -inline void QT_FASTCALL storePixel<QPixelLayout::BPP8>(uchar *dest, int index, uint pixel) +static void QT_FASTCALL storeRGBA8888FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - dest[index] = uchar(pixel); + uint *d = (uint*)dest + index; + for (int i = 0; i < count; ++i) + d[i] = toRgba8888(src[i].unpremultiplied()); } -template <> -inline void QT_FASTCALL storePixel<QPixelLayout::BPP16>(uchar *dest, int index, uint pixel) +template<QtPixelOrder PixelOrder> +static void QT_FASTCALL storeRGB30FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - reinterpret_cast<quint16 *>(dest)[index] = quint16(pixel); + uint *d = (uint*)dest + index; +#ifdef __SSE2__ + qConvertRGBA64PMToA2RGB30PM_sse2<PixelOrder>(d, src, count); +#else + for (int i = 0; i < count; ++i) + d[i] = qConvertRgb64ToRgb30<PixelOrder>(src[i]); +#endif } -template <> -inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index, uint pixel) +static void QT_FASTCALL storeRGBX64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - reinterpret_cast<quint24 *>(dest)[index] = quint24(pixel); + QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index; + for (int i = 0; i < count; ++i) { + d[i] = src[i].unpremultiplied(); + d[i].setAlpha(65535); + } } -template <QPixelLayout::BPP width> -inline void QT_FASTCALL storePixels(uchar *dest, const uint *src, int index, int count) +static void QT_FASTCALL storeRGBA64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { + QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index; for (int i = 0; i < count; ++i) - storePixel<width>(dest, index + i, src[i]); + d[i] = src[i].unpremultiplied(); } -template <> -inline void QT_FASTCALL storePixels<QPixelLayout::BPP32>(uchar *dest, const uint *src, int index, int count) +static void QT_FASTCALL storeRGBA64PMFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - memcpy(reinterpret_cast<uint *>(dest) + index, src, count * sizeof(uint)); -} - -// Note: -// convertToArgb32() assumes that no color channel is less than 4 bits. -// convertFromArgb32() assumes that no color channel is more than 8 bits. -// QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits. -QPixelLayout qPixelLayouts[QImage::NImageFormats] = { - { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPPNone, 0, 0, 0, 0 }, // Format_Invalid - { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP1MSB, convertIndexedToARGB32PM, 0, 0, convertIndexedToARGB64PM }, // Format_Mono - { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP1LSB, convertIndexedToARGB32PM, 0, 0, convertIndexedToARGB64PM }, // Format_MonoLSB - { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP8, convertIndexedToARGB32PM, 0, 0, convertIndexedToARGB64PM }, // Format_Indexed8 - // Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong, - // but everywhere this generic conversion would be wrong is currently overloaded. - { 8, 16, 8, 8, 8, 0, 0, 0, false, QPixelLayout::BPP32, convertPassThrough, convertPassThrough, convertPassThrough, convertRGB32ToRGB64 }, // Format_RGB32 - { 8, 16, 8, 8, 8, 0, 8, 24, false, QPixelLayout::BPP32, convertARGB32ToARGB32PM, convertARGB32FromARGB32PM, convertPassThrough, convertARGB32ToARGB64PM }, // Format_ARGB32 - { 8, 16, 8, 8, 8, 0, 8, 24, true, QPixelLayout::BPP32, convertPassThrough, convertPassThrough, convertPassThrough, convertARGB32PMToARGB64PM }, // Format_ARGB32_Premultiplied -#ifdef Q_COMPILER_CONSTEXPR - pixelLayoutRGB<QImage::Format_RGB16>(), - pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(), - pixelLayoutRGB<QImage::Format_RGB666>(), - pixelLayoutARGBPM<QImage::Format_ARGB6666_Premultiplied>(), - pixelLayoutRGB<QImage::Format_RGB555>(), - pixelLayoutARGBPM<QImage::Format_ARGB8555_Premultiplied>(), - pixelLayoutRGB<QImage::Format_RGB888>(), - pixelLayoutRGB<QImage::Format_RGB444>(), - pixelLayoutARGBPM<QImage::Format_ARGB4444_Premultiplied>(), -#else - { 5, 11, 6, 5, 5, 0, 0, 0, false, QPixelLayout::BPP16, - convertToRGB32<QImage::Format_RGB16>, - convertRGBFromARGB32PM<QImage::Format_RGB16, false>, - convertRGBFromARGB32PM<QImage::Format_RGB16, true>, - convertToRGB64<QImage::Format_RGB16>, - }, - { 5, 19, 6, 13, 5, 8, 8, 0, true, QPixelLayout::BPP24, - convertARGBPMToARGB32PM<QImage::Format_ARGB8565_Premultiplied>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB8565_Premultiplied, false>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB8565_Premultiplied, true>, - convertARGBPMToARGB64PM<QImage::Format_ARGB8565_Premultiplied>, - }, - { 6, 12, 6, 6, 6, 0, 0, 0, false, QPixelLayout::BPP24, - convertToRGB32<QImage::Format_RGB666>, - convertRGBFromARGB32PM<QImage::Format_RGB666, false>, - convertRGBFromARGB32PM<QImage::Format_RGB666, true>, - convertToRGB64<QImage::Format_RGB666>, - }, - { 6, 12, 6, 6, 6, 0, 6, 18, true, QPixelLayout::BPP24, - convertARGBPMToARGB32PM<QImage::Format_ARGB6666_Premultiplied>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB6666_Premultiplied, false>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB6666_Premultiplied, true>, - convertARGBPMToARGB64PM<QImage::Format_ARGB6666_Premultiplied>, - }, - { 5, 10, 5, 5, 5, 0, 0, 0, false, QPixelLayout::BPP16, - convertToRGB32<QImage::Format_RGB555>, - convertRGBFromARGB32PM<QImage::Format_RGB555, false>, - convertRGBFromARGB32PM<QImage::Format_RGB555, true>, - convertToRGB64<QImage::Format_RGB555>, - }, - { 5, 18, 5, 13, 5, 8, 8, 0, true, QPixelLayout::BPP24, - convertARGBPMToARGB32PM<QImage::Format_ARGB8555_Premultiplied>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB8555_Premultiplied, false>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB8555_Premultiplied, true>, - convertARGBPMToARGB64PM<QImage::Format_ARGB8555_Premultiplied>, - }, - { 8, 16, 8, 8, 8, 0, 0, 0, false, QPixelLayout::BPP24, - convertToRGB32<QImage::Format_RGB888>, - convertRGBFromARGB32PM<QImage::Format_RGB888, false>, - convertRGBFromARGB32PM<QImage::Format_RGB888, true>, - convertToRGB64<QImage::Format_RGB888>, - }, - { 4, 8, 4, 4, 4, 0, 0, 0, false, QPixelLayout::BPP16, - convertToRGB32<QImage::Format_RGB444>, - convertRGBFromARGB32PM<QImage::Format_RGB444, false>, - convertRGBFromARGB32PM<QImage::Format_RGB444, true>, - convertToRGB64<QImage::Format_RGB444>, - }, - { 4, 8, 4, 4, 4, 0, 4, 12, true, QPixelLayout::BPP16, - convertARGBPMToARGB32PM<QImage::Format_ARGB4444_Premultiplied>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB4444_Premultiplied, false>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB4444_Premultiplied, true>, - convertARGBPMToARGB64PM<QImage::Format_ARGB4444_Premultiplied>, - }, -#endif -#if Q_BYTE_ORDER == Q_BIG_ENDIAN - { 8, 24, 8, 16, 8, 8, 0, 0, false, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBXFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM }, // Format_RGBX8888 - { 8, 24, 8, 16, 8, 8, 8, 0, false, QPixelLayout::BPP32, convertRGBA8888ToARGB32PM, convertRGBA8888FromARGB32PM, convertRGBXFromRGB32, convertRGBA8888ToARGB64PM }, // Format_RGBA8888 - { 8, 24, 8, 16, 8, 8, 8, 0, true, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBA8888PMFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM}, // Format_RGBA8888_Premultiplied -#else - { 8, 0, 8, 8, 8, 16, 0, 24, false, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBXFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM }, // Format_RGBX8888 - { 8, 0, 8, 8, 8, 16, 8, 24, false, QPixelLayout::BPP32, convertRGBA8888ToARGB32PM, convertRGBA8888FromARGB32PM, convertRGBXFromRGB32, convertRGBA8888ToARGB64PM }, // Format_RGBA8888 (ABGR32) - { 8, 0, 8, 8, 8, 16, 8, 24, true, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBA8888PMFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM }, // Format_RGBA8888_Premultiplied -#endif - { 10, 20, 10, 10, 10, 0, 0, 30, false, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderBGR>, convertRGB30FromARGB32PM<PixelOrderBGR>, convertRGB30FromRGB32<PixelOrderBGR>, convertA2RGB30PMToARGB64PM<PixelOrderBGR> }, // Format_BGR30 - { 10, 20, 10, 10, 10, 0, 2, 30, true, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderBGR>, convertA2RGB30PMFromARGB32PM<PixelOrderBGR>, convertRGB30FromRGB32<PixelOrderBGR>, convertA2RGB30PMToARGB64PM<PixelOrderBGR> }, // Format_A2BGR30_Premultiplied - { 10, 0, 10, 10, 10, 20, 0, 30, false, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderRGB>, convertRGB30FromARGB32PM<PixelOrderRGB>, convertRGB30FromRGB32<PixelOrderRGB>, convertA2RGB30PMToARGB64PM<PixelOrderRGB> }, // Format_RGB30 - { 10, 0, 10, 10, 10, 20, 2, 30, true, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderRGB>, convertA2RGB30PMFromARGB32PM<PixelOrderRGB>, convertRGB30FromRGB32<PixelOrderRGB>, convertA2RGB30PMToARGB64PM<PixelOrderRGB> }, // Format_A2RGB30_Premultiplied - { 0, 0, 0, 0, 0, 0, 8, 0, true, QPixelLayout::BPP8, convertAlpha8ToRGB32, convertAlpha8FromARGB32PM, 0, convertAlpha8ToRGB64 }, // Format_Alpha8 - { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP8, convertGrayscale8ToRGB32, convertGrayscale8FromARGB32PM, convertGrayscale8FromRGB32, convertGrayscale8ToRGB64 } // Format_Grayscale8 -}; - -const FetchPixelsFunc qFetchPixels[QPixelLayout::BPPCount] = { - 0, // BPPNone - fetchPixels<QPixelLayout::BPP1MSB>, // BPP1MSB - fetchPixels<QPixelLayout::BPP1LSB>, // BPP1LSB - fetchPixels<QPixelLayout::BPP8>, // BPP8 - fetchPixels<QPixelLayout::BPP16>, // BPP16 - fetchPixels<QPixelLayout::BPP24>, // BPP24 - fetchPixels<QPixelLayout::BPP32> // BPP32 -}; - -StorePixelsFunc qStorePixels[QPixelLayout::BPPCount] = { - 0, // BPPNone - storePixels<QPixelLayout::BPP1MSB>, // BPP1MSB - storePixels<QPixelLayout::BPP1LSB>, // BPP1LSB - storePixels<QPixelLayout::BPP8>, // BPP8 - storePixels<QPixelLayout::BPP16>, // BPP16 - storePixels<QPixelLayout::BPP24>, // BPP24 - storePixels<QPixelLayout::BPP32> // BPP32 -}; - -typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index); - -static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = { - 0, // BPPNone - fetchPixel<QPixelLayout::BPP1MSB>, // BPP1MSB - fetchPixel<QPixelLayout::BPP1LSB>, // BPP1LSB - fetchPixel<QPixelLayout::BPP8>, // BPP8 - fetchPixel<QPixelLayout::BPP16>, // BPP16 - fetchPixel<QPixelLayout::BPP24>, // BPP24 - fetchPixel<QPixelLayout::BPP32> // BPP32 + QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index; + if (d != src) + memcpy(d, src, count * sizeof(QRgba64)); +} + +ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats] = { + nullptr, + nullptr, + nullptr, + nullptr, + storeGenericFromRGBA64PM<QImage::Format_RGB32>, + storeARGB32FromRGBA64PM, + storeGenericFromRGBA64PM<QImage::Format_ARGB32_Premultiplied>, + storeGenericFromRGBA64PM<QImage::Format_RGB16>, + storeGenericFromRGBA64PM<QImage::Format_ARGB8565_Premultiplied>, + storeGenericFromRGBA64PM<QImage::Format_RGB666>, + storeGenericFromRGBA64PM<QImage::Format_ARGB6666_Premultiplied>, + storeGenericFromRGBA64PM<QImage::Format_RGB555>, + storeGenericFromRGBA64PM<QImage::Format_ARGB8555_Premultiplied>, + storeGenericFromRGBA64PM<QImage::Format_RGB888>, + storeGenericFromRGBA64PM<QImage::Format_RGB444>, + storeGenericFromRGBA64PM<QImage::Format_ARGB4444_Premultiplied>, + storeGenericFromRGBA64PM<QImage::Format_RGBX8888>, + storeRGBA8888FromRGBA64PM, + storeGenericFromRGBA64PM<QImage::Format_RGBA8888_Premultiplied>, + storeRGB30FromRGBA64PM<PixelOrderBGR>, + storeRGB30FromRGBA64PM<PixelOrderBGR>, + storeRGB30FromRGBA64PM<PixelOrderRGB>, + storeRGB30FromRGBA64PM<PixelOrderRGB>, + storeGenericFromRGBA64PM<QImage::Format_Alpha8>, + storeGenericFromRGBA64PM<QImage::Format_Grayscale8>, + storeRGBX64FromRGBA64PM, + storeRGBA64FromRGBA64PM, + storeRGBA64PMFromRGBA64PM }; /* @@ -1185,23 +1641,23 @@ static uint * QT_FASTCALL destFetchRGB16(uint *buffer, QRasterBuffer *rasterBuff static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) { const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; - const uint *ptr = qFetchPixels[layout->bpp](buffer, rasterBuffer->scanLine(y), x, length); - return const_cast<uint *>(layout->convertToARGB32PM(buffer, ptr, length, 0, 0)); + return const_cast<uint *>(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr)); +} + +static uint *QT_FASTCALL destFetchUndefined(uint *buffer, QRasterBuffer *, int, int, int) +{ + return buffer; } static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) { const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; - uint buffer32[buffer_size]; - const uint *ptr = qFetchPixels[layout->bpp](buffer32, rasterBuffer->scanLine(y), x, length); - return const_cast<QRgba64 *>(layout->convertToARGB64PM(buffer, ptr, length, 0, 0)); + return const_cast<QRgba64 *>(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr)); } -static QRgba64 *QT_FASTCALL destFetch64uint32(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) +static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 *, QRasterBuffer *rasterBuffer, int x, int y, int) { - const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; - const uint *src = ((const uint *)rasterBuffer->scanLine(y)) + x; - return const_cast<QRgba64 *>(layout->convertToARGB64PM(buffer, src, length, 0, 0)); + return (QRgba64 *)rasterBuffer->scanLine(y) + x; } static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer *, int, int, int) @@ -1236,6 +1692,9 @@ static DestFetchProc destFetchProc[QImage::NImageFormats] = destFetch, // Format_A2RGB30_Premultiplied destFetch, // Format_Alpha8 destFetch, // Format_Grayscale8 + destFetch, // Format_RGBX64 + destFetch, // Format_RGBA64 + destFetch, // Format_RGBA64_Premultiplied }; static DestFetchProc64 destFetchProc64[QImage::NImageFormats] = @@ -1244,9 +1703,9 @@ static DestFetchProc64 destFetchProc64[QImage::NImageFormats] = 0, // Format_Mono, 0, // Format_MonoLSB 0, // Format_Indexed8 - destFetch64uint32, // Format_RGB32 - destFetch64uint32, // Format_ARGB32, - destFetch64uint32, // Format_ARGB32_Premultiplied + destFetch64, // Format_RGB32 + destFetch64, // Format_ARGB32, + destFetch64, // Format_ARGB32_Premultiplied destFetch64, // Format_RGB16 destFetch64, // Format_ARGB8565_Premultiplied destFetch64, // Format_RGB666 @@ -1256,15 +1715,18 @@ static DestFetchProc64 destFetchProc64[QImage::NImageFormats] = destFetch64, // Format_RGB888 destFetch64, // Format_RGB444 destFetch64, // Format_ARGB4444_Premultiplied - destFetch64uint32, // Format_RGBX8888 - destFetch64uint32, // Format_RGBA8888 - destFetch64uint32, // Format_RGBA8888_Premultiplied - destFetch64uint32, // Format_BGR30 - destFetch64uint32, // Format_A2BGR30_Premultiplied - destFetch64uint32, // Format_RGB30 - destFetch64uint32, // Format_A2RGB30_Premultiplied + destFetch64, // Format_RGBX8888 + destFetch64, // Format_RGBA8888 + destFetch64, // Format_RGBA8888_Premultiplied + destFetch64, // Format_BGR30 + destFetch64, // Format_A2BGR30_Premultiplied + destFetch64, // Format_RGB30 + destFetch64, // Format_A2RGB30_Premultiplied destFetch64, // Format_Alpha8 destFetch64, // Format_Grayscale8 + destFetchRGB64, // Format_RGBX64 + destFetch64, // Format_RGBA64 + destFetchRGB64, // Format_RGBA64_Premultiplied }; /* @@ -1365,143 +1827,29 @@ static void QT_FASTCALL destStoreRGB16(QRasterBuffer *rasterBuffer, int x, int y static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length) { - uint buf[buffer_size]; const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; - StorePixelsFunc store = qStorePixels[layout->bpp]; + ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM; + if (!layout->premultiplied && !layout->hasAlphaChannel) + store = layout->storeFromRGB32; uchar *dest = rasterBuffer->scanLine(y); - while (length) { - int l = qMin(length, buffer_size); - const uint *ptr = 0; - if (!layout->premultiplied && !layout->alphaWidth) - ptr = layout->convertFromRGB32(buf, buffer, l, 0, 0); - else - ptr = layout->convertFromARGB32PM(buf, buffer, l, 0, 0); - store(dest, ptr, x, l); - length -= l; - buffer += l; - x += l; - } -} - -static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length) -{ - for (int i = 0; i < length; ++i) { - dest[i] = toArgb32(src[i]); - } + store(dest, buffer, x, length, nullptr, nullptr); } static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) { - uint buf[buffer_size]; - const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; - StorePixelsFunc store = qStorePixels[layout->bpp]; + auto store = qStoreFromRGBA64PM[rasterBuffer->format]; uchar *dest = rasterBuffer->scanLine(y); - while (length) { - int l = qMin(length, buffer_size); - const uint *ptr = 0; - convertFromRgb64(buf, buffer, l); - if (!layout->premultiplied && !layout->alphaWidth) - ptr = layout->convertFromRGB32(buf, buf, l, 0, 0); - else - ptr = layout->convertFromARGB32PM(buf, buf, l, 0, 0); - store(dest, ptr, x, l); - length -= l; - buffer += l; - x += l; - } + store(dest, buffer, x, length, nullptr, nullptr); } -#ifdef __SSE2__ -template<QtPixelOrder PixelOrder> -static inline void qConvertARGB64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *buffer, int count) +static void QT_FASTCALL destStore64RGBA64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) { - const __m128i gmask = _mm_set1_epi32(0x000ffc00); - const __m128i cmask = _mm_set1_epi32(0x000003ff); - int i = 0; - __m128i vr, vg, vb, va; - for (; i < count && uintptr_t(buffer) & 0xF; ++i) { - *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); - } - - for (; i < count-15; i += 16) { - // Repremultiplying is really expensive and hard to do in SIMD without AVX2, - // so we try to avoid it by checking if it is needed 16 samples at a time. - __m128i vOr = _mm_set1_epi32(0); - __m128i vAnd = _mm_set1_epi32(0xffffffff); - for (int j = 0; j < 16; j += 2) { - __m128i vs = _mm_load_si128((const __m128i*)(buffer + j)); - vOr = _mm_or_si128(vOr, vs); - vAnd = _mm_and_si128(vAnd, vs); - } - const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7)); - const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7)); - - if (andAlpha == 0xffff) { - for (int j = 0; j < 16; j += 2) { - __m128i vs = _mm_load_si128((const __m128i*)buffer); - buffer += 2; - vr = _mm_srli_epi64(vs, 6); - vg = _mm_srli_epi64(vs, 16 + 6 - 10); - vb = _mm_srli_epi64(vs, 32 + 6); - vr = _mm_and_si128(vr, cmask); - vg = _mm_and_si128(vg, gmask); - vb = _mm_and_si128(vb, cmask); - va = _mm_srli_epi64(vs, 48 + 14); - if (PixelOrder == PixelOrderRGB) - vr = _mm_slli_epi32(vr, 20); - else - vb = _mm_slli_epi32(vb, 20); - va = _mm_slli_epi32(va, 30); - __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va)); - vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0)); - _mm_storel_epi64((__m128i*)dest, vd); - dest += 2; - } - } else if (orAlpha == 0) { - for (int j = 0; j < 16; ++j) { - *dest++ = 0; - buffer++; - } - } else { - for (int j = 0; j < 16; ++j) - *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); - } - } - - SIMD_EPILOGUE(i, count, 15) - *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); -} -#endif - -static void QT_FASTCALL destStore64ARGB32(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) -{ - uint *dest = (uint*)rasterBuffer->scanLine(y) + x; + QRgba64 *dest = reinterpret_cast<QRgba64*>(rasterBuffer->scanLine(y)) + x; for (int i = 0; i < length; ++i) { - dest[i] = toArgb32(buffer[i].unpremultiplied()); + dest[i] = buffer[i].unpremultiplied(); } } -static void QT_FASTCALL destStore64RGBA8888(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) -{ - uint *dest = (uint*)rasterBuffer->scanLine(y) + x; - for (int i = 0; i < length; ++i) { - dest[i] = toRgba8888(buffer[i].unpremultiplied()); - } -} - -template<QtPixelOrder PixelOrder> -static void QT_FASTCALL destStore64RGB30(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) -{ - uint *dest = (uint*)rasterBuffer->scanLine(y) + x; -#ifdef __SSE2__ - qConvertARGB64PMToA2RGB30PM_sse2<PixelOrder>(dest, buffer, length); -#else - for (int i = 0; i < length; ++i) { - dest[i] = qConvertRgb64ToRgb30<PixelOrder>(buffer[i]); - } -#endif -} - static DestStoreProc destStoreProc[QImage::NImageFormats] = { 0, // Format_Invalid @@ -1529,16 +1877,19 @@ static DestStoreProc destStoreProc[QImage::NImageFormats] = destStore, // Format_A2RGB30_Premultiplied destStore, // Format_Alpha8 destStore, // Format_Grayscale8 + destStore, // Format_RGBX64 + destStore, // Format_RGBA64 + destStore, // Format_RGBA64_Premultiplied }; static DestStoreProc64 destStoreProc64[QImage::NImageFormats] = { 0, // Format_Invalid - destStore64, // Format_Mono, - destStore64, // Format_MonoLSB + 0, // Format_Mono, + 0, // Format_MonoLSB 0, // Format_Indexed8 destStore64, // Format_RGB32 - destStore64ARGB32, // Format_ARGB32, + destStore64, // Format_ARGB32, destStore64, // Format_ARGB32_Premultiplied destStore64, // Format_RGB16 destStore64, // Format_ARGB8565_Premultiplied @@ -1550,14 +1901,17 @@ static DestStoreProc64 destStoreProc64[QImage::NImageFormats] = destStore64, // Format_RGB444 destStore64, // Format_ARGB4444_Premultiplied destStore64, // Format_RGBX8888 - destStore64RGBA8888, // Format_RGBA8888 + destStore64, // Format_RGBA8888 destStore64, // Format_RGBA8888_Premultiplied - destStore64RGB30<PixelOrderBGR>, // Format_BGR30 - destStore64RGB30<PixelOrderBGR>, // Format_A2BGR30_Premultiplied - destStore64RGB30<PixelOrderRGB>, // Format_RGB30 - destStore64RGB30<PixelOrderRGB>, // Format_A2RGB30_Premultiplied + destStore64, // Format_BGR30 + destStore64, // Format_A2BGR30_Premultiplied + destStore64, // Format_RGB30 + destStore64, // Format_A2RGB30_Premultiplied destStore64, // Format_Alpha8 destStore64, // Format_Grayscale8 + 0, // Format_RGBX64 + destStore64RGBA64, // Format_RGBA64 + 0 // Format_RGBA64_Premultiplied }; /* @@ -1589,15 +1943,14 @@ static const uint *QT_FASTCALL fetchUntransformed(uint *buffer, const Operator * const QSpanData *data, int y, int x, int length) { const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; - const uint *ptr = qFetchPixels[layout->bpp](buffer, data->texture.scanLine(y), x, length); - return layout->convertToARGB32PM(buffer, ptr, length, data->texture.colorTable, 0); + return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr); } static const uint *QT_FASTCALL fetchUntransformedARGB32PM(uint *, const Operator *, const QSpanData *data, int y, int x, int) { const uchar *scanLine = data->texture.scanLine(y); - return ((const uint *)scanLine) + x; + return reinterpret_cast<const uint *>(scanLine) + x; } static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Operator *, @@ -1614,39 +1967,47 @@ static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Op const QSpanData *data, int y, int x, int length) { const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; - if (layout->bpp != QPixelLayout::BPP32) { - uint buffer32[buffer_size]; - const uint *ptr = qFetchPixels[layout->bpp](buffer32, data->texture.scanLine(y), x, length); - return layout->convertToARGB64PM(buffer, ptr, length, data->texture.colorTable, 0); + return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr); +} + +static const QRgba64 *QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 *, const Operator *, + const QSpanData *data, int y, int x, int) +{ + const uchar *scanLine = data->texture.scanLine(y); + return reinterpret_cast<const QRgba64 *>(scanLine) + x; +} + +template<TextureBlendType blendType> +inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v) +{ + Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled); + if (blendType == BlendTransformedTiled) { + if (v < 0 || v >= max) { + v %= max; + if (v < 0) v += max; + } } else { - const uint *src = (const uint *)data->texture.scanLine(y) + x; - return layout->convertToARGB64PM(buffer, src, length, data->texture.colorTable, 0); + v = qBound(l1, v, l2); } } -template<TextureBlendType blendType, QPixelLayout::BPP bpp> -static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const QSpanData *data, - int y, int x, int length) +template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T> +static void QT_FASTCALL fetchTransformed_fetcher(T *buffer, const QSpanData *data, + int y, int x, int length) { Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled); - const int image_width = data->texture.width; - const int image_height = data->texture.height; - const int image_x1 = data->texture.x1; - const int image_y1 = data->texture.y1; - const int image_x2 = data->texture.x2 - 1; - const int image_y2 = data->texture.y2 - 1; + const QTextureData &image = data->texture; const qreal cx = x + qreal(0.5); const qreal cy = y + qreal(0.5); + constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint); const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; - if (bpp != QPixelLayout::BPPNone) // Like this to not ICE on GCC 5.3.1 + if (!useFetch) Q_ASSERT(layout->bpp == bpp); // When templated 'fetch' should be inlined at compile time: const FetchPixelFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel<bpp>); - uint *const end = buffer + length; - uint *b = buffer; if (data->fast_matrix) { // The increment pr x in the scanline int fdx = (int)(data->m11 * fixed_scale); @@ -1657,24 +2018,105 @@ static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale); - while (b < end) { - int px = fx >> 16; - int py = fy >> 16; - - if (blendType == BlendTransformedTiled) { - px %= image_width; - py %= image_height; - if (px < 0) px += image_width; - if (py < 0) py += image_height; - } else { - px = qBound(image_x1, px, image_x2); - py = qBound(image_y1, py, image_y2); + if (fdy == 0) { // simple scale, no rotation or shear + int py = (fy >> 16); + fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py); + const uchar *src = image.scanLine(py); + + int i = 0; + if (blendType == BlendTransformed) { + int fastLen = length; + if (fdx > 0) + fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx)); + else if (fdx < 0) + fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx)); + + for (; i < fastLen; ++i) { + int x1 = (fx >> 16); + int x2 = x1; + fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1); + if (x1 == x2) + break; + if (useFetch) + buffer[i] = fetch(src, x1); + else + buffer[i] = reinterpret_cast<const T*>(src)[x1]; + fx += fdx; + } + + for (; i < fastLen; ++i) { + int px = (fx >> 16); + if (useFetch) + buffer[i] = fetch(src, px); + else + buffer[i] = reinterpret_cast<const T*>(src)[px]; + fx += fdx; + } } - *b = fetch(data->texture.scanLine(py), px); - fx += fdx; - fy += fdy; - ++b; + for (; i < length; ++i) { + int px = (fx >> 16); + fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px); + if (useFetch) + buffer[i] = fetch(src, px); + else + buffer[i] = reinterpret_cast<const T*>(src)[px]; + fx += fdx; + } + } else { // rotation or shear + int i = 0; + if (blendType == BlendTransformed) { + int fastLen = length; + if (fdx > 0) + fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx)); + else if (fdx < 0) + fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx)); + if (fdy > 0) + fastLen = qMin(fastLen, int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy)); + else if (fdy < 0) + fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy)); + + for (; i < fastLen; ++i) { + int x1 = (fx >> 16); + int y1 = (fy >> 16); + int x2 = x1; + int y2 = y1; + fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1); + fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1); + if (x1 == x2 && y1 == y2) + break; + if (useFetch) + buffer[i] = fetch(image.scanLine(y1), x1); + else + buffer[i] = reinterpret_cast<const T*>(image.scanLine(y1))[x1]; + fx += fdx; + fy += fdy; + } + + for (; i < fastLen; ++i) { + int px = (fx >> 16); + int py = (fy >> 16); + if (useFetch) + buffer[i] = fetch(image.scanLine(py), px); + else + buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px]; + fx += fdx; + fy += fdy; + } + } + + for (; i < length; ++i) { + int px = (fx >> 16); + int py = (fy >> 16); + fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px); + fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py); + if (useFetch) + buffer[i] = fetch(image.scanLine(py), px); + else + buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px]; + fx += fdx; + fy += fdy; + } } } else { const qreal fdx = data->m11; @@ -1685,23 +2127,21 @@ static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, qreal fy = data->m22 * cy + data->m12 * cx + data->dy; qreal fw = data->m23 * cy + data->m13 * cx + data->m33; + T *const end = buffer + length; + T *b = buffer; while (b < end) { const qreal iw = fw == 0 ? 1 : 1 / fw; const qreal tx = fx * iw; const qreal ty = fy * iw; - int px = int(tx) - (tx < 0); - int py = int(ty) - (ty < 0); - - if (blendType == BlendTransformedTiled) { - px %= image_width; - py %= image_height; - if (px < 0) px += image_width; - if (py < 0) py += image_height; - } else { - px = qBound(image_x1, px, image_x2); - py = qBound(image_y1, py, image_y2); - } - *b = fetch(data->texture.scanLine(py), px); + int px = qFloor(tx); + int py = qFloor(ty); + + fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py); + fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px); + if (useFetch) + *b = fetch(image.scanLine(py), px); + else + *b = reinterpret_cast<const T*>(image.scanLine(py))[px]; fx += fdx; fy += fdy; @@ -1713,115 +2153,37 @@ static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, ++b; } } - return layout->convertToARGB32PM(buffer, buffer, length, data->texture.colorTable, 0); +} + +template<TextureBlendType blendType, QPixelLayout::BPP bpp> +static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const QSpanData *data, + int y, int x, int length) +{ + Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled); + const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; + fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length); + layout->convertToARGB32PM(buffer, length, data->texture.colorTable); + return buffer; } template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Operator *, const QSpanData *data, int y, int x, int length) { - const int image_width = data->texture.width; - const int image_height = data->texture.height; - const int image_x1 = data->texture.x1; - const int image_y1 = data->texture.y1; - const int image_x2 = data->texture.x2 - 1; - const int image_y2 = data->texture.y2 - 1; - - const qreal cx = x + qreal(0.5); - const qreal cy = y + qreal(0.5); - const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; - FetchPixelFunc fetch = qFetchPixel[layout->bpp]; - const QVector<QRgb> *clut = data->texture.colorTable; - - uint buffer32[buffer_size]; - QRgba64 *b = buffer; - if (data->fast_matrix) { - // The increment pr x in the scanline - int fdx = (int)(data->m11 * fixed_scale); - int fdy = (int)(data->m12 * fixed_scale); - - int fx = int((data->m21 * cy - + data->m11 * cx + data->dx) * fixed_scale); - int fy = int((data->m22 * cy - + data->m12 * cx + data->dy) * fixed_scale); - - int i = 0, j = 0; - while (i < length) { - if (j == buffer_size) { - layout->convertToARGB64PM(b, buffer32, buffer_size, clut, 0); - b += buffer_size; - j = 0; - } - int px = fx >> 16; - int py = fy >> 16; - - if (blendType == BlendTransformedTiled) { - px %= image_width; - py %= image_height; - if (px < 0) px += image_width; - if (py < 0) py += image_height; - } else { - px = qBound(image_x1, px, image_x2); - py = qBound(image_y1, py, image_y2); - } - buffer32[j] = fetch(data->texture.scanLine(py), px); - - fx += fdx; - fy += fdy; - ++i; ++j; - } - if (j > 0) { - layout->convertToARGB64PM(b, buffer32, j, clut, 0); - b += j; - } - } else { - const qreal fdx = data->m11; - const qreal fdy = data->m12; - const qreal fdw = data->m13; - - qreal fx = data->m21 * cy + data->m11 * cx + data->dx; - qreal fy = data->m22 * cy + data->m12 * cx + data->dy; - qreal fw = data->m23 * cy + data->m13 * cx + data->m33; - - int i = 0, j = 0; - while (i < length) { - if (j == buffer_size) { - layout->convertToARGB64PM(b, buffer32, buffer_size, clut, 0); - b += buffer_size; - j = 0; - } - const qreal iw = fw == 0 ? 1 : 1 / fw; - const qreal tx = fx * iw; - const qreal ty = fy * iw; - int px = int(tx) - (tx < 0); - int py = int(ty) - (ty < 0); - - if (blendType == BlendTransformedTiled) { - px %= image_width; - py %= image_height; - if (px < 0) px += image_width; - if (py < 0) py += image_height; - } else { - px = qBound(image_x1, px, image_x2); - py = qBound(image_y1, py, image_y2); - } - buffer32[j] = fetch(data->texture.scanLine(py), px); - - fx += fdx; - fy += fdy; - fw += fdw; - //force increment to avoid /0 - if (!fw) { - fw += fdw; - } - ++i; ++j; - } - if (j > 0) { - layout->convertToARGB64PM(b, buffer32, j, clut, 0); - b += j; - } + if (layout->bpp != QPixelLayout::BPP64) { + uint buffer32[BufferSize]; + Q_ASSERT(length <= BufferSize); + if (layout->bpp == QPixelLayout::BPP32) + fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length); + else + fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length); + return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr); } + + fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, QRgba64>(buffer, data, y, x, length); + if (data->texture.format == QImage::Format_RGBA64) + convertRGBA64ToRGBA64PM(buffer, length); return buffer; } @@ -1915,43 +2277,6 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, u } #endif -#if defined(__SSE2__) -static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint distx, uint disty) -{ - __m128i vt = _mm_loadu_si128((const __m128i*)t); - if (disty) { - __m128i vb = _mm_loadu_si128((const __m128i*)b); - vt = _mm_mulhi_epu16(vt, _mm_set1_epi16(0x10000 - disty)); - vb = _mm_mulhi_epu16(vb, _mm_set1_epi16(disty)); - vt = _mm_add_epi16(vt, vb); - } - if (distx) { - const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); - const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); - vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); - vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); - } -#ifdef Q_PROCESSOR_X86_64 - return QRgba64::fromRgba64(_mm_cvtsi128_si64(vt)); -#else - QRgba64 out; - _mm_storel_epi64((__m128i*)&out, vt); - return out; -#endif -} -#else -static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint distx, uint disty) -{ - const uint dx = distx>>8; - const uint dy = disty>>8; - const uint idx = 256 - dx; - const uint idy = 256 - dy; - QRgba64 xtop = interpolate256(t[0], idx, t[1], dx); - QRgba64 xbot = interpolate256(b[0], idx, b[1], dx); - return interpolate256(xtop, idy, xbot, dy); -} -#endif - template<TextureBlendType blendType> void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2); @@ -1982,7 +2307,7 @@ inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, } enum FastTransformTypes { - SimpleUpscaleTransform, + SimpleScaleTransform, UpscaleTransform, DownscaleTransform, RotateTransform, @@ -1990,11 +2315,38 @@ enum FastTransformTypes { NFastTransformTypes }; +// Completes the partial interpolation stored in IntermediateBuffer. +// by performing the x-axis interpolation and joining the RB and AG buffers. +static void QT_FASTCALL intermediate_adder(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx) +{ +#if defined(QT_COMPILER_SUPPORTS_AVX2) + extern void QT_FASTCALL intermediate_adder_avx2(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx); + if (qCpuHasFeature(AVX2)) + return intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx); +#endif + + // Switch to intermediate buffer coordinates + fx -= offset * fixed_scale; + + while (b < end) { + const int x = (fx >> 16); + + const uint distx = (fx & 0x0000ffff) >> 8; + const uint idistx = 256 - distx; + const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + 1] * distx) & 0xff00ff00; + const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + 1] * distx) & 0xff00ff00; + *b = (rb >> 8) | ag; + b++; + fx += fdx; + } + fx += offset * fixed_scale; +} + typedef void (QT_FASTCALL *BilinearFastTransformHelper)(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy); template<TextureBlendType blendType> -static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(uint *b, uint *end, const QTextureData &image, - int &fx, int &fy, int fdx, int /*fdy*/) +static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/) { int y1 = (fy >> 16); int y2; @@ -2011,16 +2363,12 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u const int offset = (fx + adjust) >> 16; int x = offset; - // The idea is first to do the interpolation between the row s1 and the row s2 - // into an intermediate buffer, then we interpolate between two pixel of this buffer. - - // intermediate_buffer[0] is a buffer of red-blue component of the pixel, in the form 0x00RR00BB - // intermediate_buffer[1] is the alpha-green component of the pixel, in the form 0x00AA00GG - // +1 for the last pixel to interpolate with, and +1 for rounding errors. - quint32 intermediate_buffer[2][buffer_size + 2]; - // count is the size used in the intermediate_buffer. + IntermediateBuffer intermediate; + // count is the size used in the intermediate.buffer. int count = (qint64(length) * qAbs(fdx) + fixed_scale - 1) / fixed_scale + 2; - Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case + // length is supposed to be <= BufferSize either because data->m11 < 1 or + // data->m11 < 2, and any larger buffers split + Q_ASSERT(count <= BufferSize + 2); int f = 0; int lim = count; if (blendType == BlendTransformedBilinearTiled) { @@ -2035,8 +2383,8 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; do { - intermediate_buffer[0][f] = rb; - intermediate_buffer[1][f] = ag; + intermediate.buffer_rb[f] = rb; + intermediate.buffer_ag[f] = ag; f++; x++; } while (x < image.x1 && f < lim); @@ -2069,10 +2417,10 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u // Add the values, and shift to only keep 8 significant bits per colors __m128i rAG =_mm_add_epi16(topAG, bottomAG); rAG = _mm_srli_epi16(rAG, 8); - _mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG); + _mm_storeu_si128((__m128i*)(&intermediate.buffer_ag[f]), rAG); __m128i rRB =_mm_add_epi16(topRB, bottomRB); rRB = _mm_srli_epi16(rRB, 8); - _mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB); + _mm_storeu_si128((__m128i*)(&intermediate.buffer_rb[f]), rRB); } #elif defined(__ARM_NEON__) const int16x8_t disty_ = vdupq_n_s16(disty); @@ -2099,10 +2447,10 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u // Add the values, and shift to only keep 8 significant bits per colors int16x8_t rAG = vaddq_s16(topAG, bottomAG); rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8)); - vst1q_s16((int16_t*)(&intermediate_buffer[1][f]), rAG); + vst1q_s16((int16_t*)(&intermediate.buffer_ag[f]), rAG); int16x8_t rRB = vaddq_s16(topRB, bottomRB); rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); - vst1q_s16((int16_t*)(&intermediate_buffer[0][f]), rRB); + vst1q_s16((int16_t*)(&intermediate.buffer_rb[f]), rRB); } #endif } @@ -2116,28 +2464,13 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u uint t = s1[x]; uint b = s2[x]; - intermediate_buffer[0][f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; - intermediate_buffer[1][f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + intermediate.buffer_rb[f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + intermediate.buffer_ag[f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; x++; } - // Now interpolate the values from the intermediate_buffer to get the final result. - fx -= offset * fixed_scale; // Switch to intermediate buffer coordinates - - while (b < end) { - int x1 = (fx >> 16); - int x2 = x1 + 1; - Q_ASSERT(x1 >= 0); - Q_ASSERT(x2 < count); - - int distx = (fx & 0x0000ffff) >> 8; - int idistx = 256 - distx; - int rb = ((intermediate_buffer[0][x1] * idistx + intermediate_buffer[0][x2] * distx) >> 8) & 0xff00ff; - int ag = (intermediate_buffer[1][x1] * idistx + intermediate_buffer[1][x2] * distx) & 0xff00ff00; - *b = rb | ag; - b++; - fx += fdx; - } + // Now interpolate the values from the intermediate.buffer to get the final result. + intermediate_adder(b, end, intermediate, offset, fx, fdx); } template<TextureBlendType blendType> @@ -2603,14 +2936,14 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[2][NFastTransformTypes] = { { - fetchTransformedBilinearARGB32PM_simple_upscale_helper<BlendTransformedBilinear>, + fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinear>, fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>, fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>, fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>, fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear> }, { - fetchTransformedBilinearARGB32PM_simple_upscale_helper<BlendTransformedBilinearTiled>, + fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinearTiled>, fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>, fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>, fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>, @@ -2645,7 +2978,13 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c if (fdy == 0) { // simple scale, no rotation or shear if (qAbs(fdx) <= fixed_scale) { // simple scale up on X - bilinearFastTransformHelperARGB32PM[tiled][SimpleUpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy); + bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](b, end, data->texture, fx, fy, fdx, fdy); + } else if (qAbs(fdx) <= 2 * fixed_scale) { + // simple scale down on X, less than 2x + const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2); + bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer, buffer + mid, data->texture, fx, fy, fdx, fdy); + if (mid != length) + bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy); } else if (qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y) bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy); @@ -2713,16 +3052,13 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c return buffer; } -template<TextureBlendType blendType, QPixelLayout::BPP bpp> -static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, uint *end, const QTextureData &image, - int &fx, int &fy, int fdx, int /*fdy*/) +template<TextureBlendType blendType> +static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/) { const QPixelLayout *layout = &qPixelLayouts[image.format]; const QVector<QRgb> *clut = image.colorTable; - Q_ASSERT(bpp == QPixelLayout::BPPNone || bpp == layout->bpp); - // When templated 'fetch' should be inlined at compile time: - const FetchPixelsFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixels[layout->bpp] : fetchPixels<bpp>; - const ConvertFunc convertToARGB32PM = layout->convertToARGB32PM; + const FetchAndConvertPixelsFunc fetch = layout->fetchToARGB32PM; int y1 = (fy >> 16); int y2; @@ -2739,16 +3075,14 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, const int offset = (fx + adjust) >> 16; int x = offset; - // The idea is first to do the interpolation between the row s1 and the row s2 - // into an intermediate buffer, then we interpolate between two pixel of this buffer. - // +1 for the last pixel to interpolate with, and +1 for rounding errors. - uint buf1[buffer_size + 2]; - uint buf2[buffer_size + 2]; + IntermediateBuffer intermediate; + uint *buf1 = intermediate.buffer_rb; + uint *buf2 = intermediate.buffer_ag; const uint *ptr1; const uint *ptr2; int count = (qint64(length) * qAbs(fdx) + fixed_scale - 1) / fixed_scale + 2; - Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case + Q_ASSERT(count <= BufferSize + 2); if (blendType == BlendTransformedBilinearTiled) { x %= image.width; @@ -2757,10 +3091,8 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, int len1 = qMin(count, image.width - x); int len2 = qMin(x, count - len1); - ptr1 = fetch(buf1, s1, x, len1); - ptr1 = convertToARGB32PM(buf1, ptr1, len1, clut, 0); - ptr2 = fetch(buf2, s2, x, len1); - ptr2 = convertToARGB32PM(buf2, ptr2, len1, clut, 0); + ptr1 = fetch(buf1, s1, x, len1, clut, nullptr); + ptr2 = fetch(buf2, s2, x, len1, clut, nullptr); for (int i = 0; i < len1; ++i) { uint t = ptr1[i]; uint b = ptr2[i]; @@ -2769,10 +3101,8 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, } if (len2) { - ptr1 = fetch(buf1 + len1, s1, 0, len2); - ptr1 = convertToARGB32PM(buf1 + len1, ptr1, len2, clut, 0); - ptr2 = fetch(buf2 + len1, s2, 0, len2); - ptr2 = convertToARGB32PM(buf2 + len1, ptr2, len2, clut, 0); + ptr1 = fetch(buf1 + len1, s1, 0, len2, clut, nullptr); + ptr2 = fetch(buf2 + len1, s2, 0, len2, clut, nullptr); for (int i = 0; i < len2; ++i) { uint t = ptr1[i]; uint b = ptr2[i]; @@ -2780,6 +3110,7 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff; } } + // Generate the rest by repeatedly repeating the previous set of pixels for (int i = image.width; i < count; ++i) { buf1[i] = buf1[i - image.width]; buf2[i] = buf2[i - image.width]; @@ -2790,10 +3121,8 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, int len = qMax(1, end - start); int leading = start - x; - ptr1 = fetch(buf1 + leading, s1, start, len); - ptr1 = convertToARGB32PM(buf1 + leading, ptr1, len, clut, 0); - ptr2 = fetch(buf2 + leading, s2, start, len); - ptr2 = convertToARGB32PM(buf2 + leading, ptr2, len, clut, 0); + ptr1 = fetch(buf1 + leading, s1, start, len, clut, nullptr); + ptr2 = fetch(buf2 + leading, s2, start, len, clut, nullptr); for (int i = 0; i < len; ++i) { uint t = ptr1[i]; @@ -2812,35 +3141,21 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, } } - // Now interpolate the values from the intermediate_buffer to get the final result. - fx -= offset * fixed_scale; // Switch to intermediate buffer coordinates - - while (b < end) { - int x1 = (fx >> 16); - int x2 = x1 + 1; - Q_ASSERT(x1 >= 0); - Q_ASSERT(x2 < count); - - int distx = (fx & 0x0000ffff) >> 8; - int idistx = 256 - distx; - int rb = ((buf1[x1] * idistx + buf1[x2] * distx) >> 8) & 0xff00ff; - int ag = (buf2[x1] * idistx + buf2[x2] * distx) & 0xff00ff00; - *b++ = rb | ag; - fx += fdx; - } + // Now interpolate the values from the intermediate.buffer to get the final result. + intermediate_adder(b, end, intermediate, offset, fx, fdx); } -typedef void (QT_FASTCALL *BilinearFastTransformFetcher)(uint *buf1, uint *buf2, const int len, const QTextureData &image, - int fx, int fy, const int fdx, const int fdy); - -template<TextureBlendType blendType, QPixelLayout::BPP bpp> -static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, const int len, const QTextureData &image, +template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T> +static void QT_FASTCALL fetchTransformedBilinear_fetcher(T *buf1, T *buf2, const int len, const QTextureData &image, int fx, int fy, const int fdx, const int fdy) { const QPixelLayout &layout = qPixelLayouts[image.format]; - Q_ASSERT(bpp == QPixelLayout::BPPNone || bpp == layout.bpp); - // When templated 'fetch1' should be inlined at compile time: + constexpr bool useFetch = (bpp < QPixelLayout::BPP32); + if (useFetch) + Q_ASSERT(sizeof(T) == sizeof(uint)); + else + Q_ASSERT(layout.bpp == bpp); const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout.bpp] : fetchPixel<bpp>; if (fdy == 0) { int y1 = (fy >> 16); @@ -2857,8 +3172,13 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); if (x1 != x2) break; - buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1); - buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1); + if (useFetch) { + buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1); + buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1); + } else { + buf1[i * 2 + 0] = buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x1]; + buf2[i * 2 + 0] = buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x1]; + } fx += fdx; } int fastLen = len; @@ -2869,10 +3189,17 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, for (; i < fastLen; ++i) { int x = (fx >> 16); - buf1[i * 2 + 0] = fetch1(s1, x); - buf1[i * 2 + 1] = fetch1(s1, x + 1); - buf2[i * 2 + 0] = fetch1(s2, x); - buf2[i * 2 + 1] = fetch1(s2, x + 1); + if (useFetch) { + buf1[i * 2 + 0] = fetch1(s1, x); + buf1[i * 2 + 1] = fetch1(s1, x + 1); + buf2[i * 2 + 0] = fetch1(s2, x); + buf2[i * 2 + 1] = fetch1(s2, x + 1); + } else { + buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x]; + buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1]; + buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x]; + buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1]; + } fx += fdx; } } @@ -2881,10 +3208,17 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, int x1 = (fx >> 16); int x2; fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); - buf1[i * 2 + 0] = fetch1(s1, x1); - buf1[i * 2 + 1] = fetch1(s1, x2); - buf2[i * 2 + 0] = fetch1(s2, x1); - buf2[i * 2 + 1] = fetch1(s2, x2); + if (useFetch) { + buf1[i * 2 + 0] = fetch1(s1, x1); + buf1[i * 2 + 1] = fetch1(s1, x2); + buf2[i * 2 + 0] = fetch1(s2, x1); + buf2[i * 2 + 1] = fetch1(s2, x2); + } else { + buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1]; + buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2]; + buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1]; + buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2]; + } fx += fdx; } } else { @@ -2901,10 +3235,17 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, break; const uchar *s1 = image.scanLine(y1); const uchar *s2 = image.scanLine(y2); - buf1[i * 2 + 0] = fetch1(s1, x1); - buf1[i * 2 + 1] = fetch1(s1, x2); - buf2[i * 2 + 0] = fetch1(s2, x1); - buf2[i * 2 + 1] = fetch1(s2, x2); + if (useFetch) { + buf1[i * 2 + 0] = fetch1(s1, x1); + buf1[i * 2 + 1] = fetch1(s1, x2); + buf2[i * 2 + 0] = fetch1(s2, x1); + buf2[i * 2 + 1] = fetch1(s2, x2); + } else { + buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1]; + buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2]; + buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1]; + buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2]; + } fx += fdx; fy += fdy; } @@ -2923,10 +3264,17 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, int y = (fy >> 16); const uchar *s1 = image.scanLine(y); const uchar *s2 = s1 + image.bytesPerLine; - buf1[i * 2 + 0] = fetch1(s1, x); - buf1[i * 2 + 1] = fetch1(s1, x + 1); - buf2[i * 2 + 0] = fetch1(s2, x); - buf2[i * 2 + 1] = fetch1(s2, x + 1); + if (useFetch) { + buf1[i * 2 + 0] = fetch1(s1, x); + buf1[i * 2 + 1] = fetch1(s1, x + 1); + buf2[i * 2 + 0] = fetch1(s2, x); + buf2[i * 2 + 1] = fetch1(s2, x + 1); + } else { + buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x]; + buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1]; + buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x]; + buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1]; + } fx += fdx; fy += fdy; } @@ -2942,10 +3290,17 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, const uchar *s1 = image.scanLine(y1); const uchar *s2 = image.scanLine(y2); - buf1[i * 2 + 0] = fetch1(s1, x1); - buf1[i * 2 + 1] = fetch1(s1, x2); - buf2[i * 2 + 0] = fetch1(s2, x1); - buf2[i * 2 + 1] = fetch1(s2, x2); + if (useFetch) { + buf1[i * 2 + 0] = fetch1(s1, x1); + buf1[i * 2 + 1] = fetch1(s1, x2); + buf2[i * 2 + 0] = fetch1(s2, x1); + buf2[i * 2 + 1] = fetch1(s2, x2); + } else { + buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1]; + buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2]; + buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1]; + buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2]; + } fx += fdx; fy += fdy; } @@ -2977,18 +3332,23 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper if (fdy == 0) { // simple scale, no rotation or shear if (qAbs(fdx) <= fixed_scale) { // scale up on X - fetchTransformedBilinear_simple_upscale_helper<blendType, bpp>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy); + fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy); + } else if (qAbs(fdx) <= 2 * fixed_scale) { // scale down on X less than 2x + const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2); + fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + mid, data->texture, fx, fy, fdx, fdy); + if (mid != length) + fetchTransformedBilinear_simple_scale_helper<blendType>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy); } else { - const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>; + const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>; - uint buf1[buffer_size]; - uint buf2[buffer_size]; + uint buf1[BufferSize]; + uint buf2[BufferSize]; uint *b = buffer; while (length) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0); - layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); - layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); + layout->convertToARGB32PM(buf1, len * 2, clut); + layout->convertToARGB32PM(buf2, len * 2, clut); if (hasFastInterpolate4() || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y) int disty = (fy & 0x0000ffff) >> 8; @@ -3014,16 +3374,16 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper } } } else { // rotation or shear - const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>; + const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>; - uint buf1[buffer_size]; - uint buf2[buffer_size]; + uint buf1[BufferSize]; + uint buf2[BufferSize]; uint *b = buffer; while (length) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy); - layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); - layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); + layout->convertToARGB32PM(buf1, len * 2, clut); + layout->convertToARGB32PM(buf2, len * 2, clut); if (hasFastInterpolate4() || qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.)) { // If we are zooming more than 8 times, we use 8bit precision for the position. @@ -3070,15 +3430,15 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper qreal fy = data->m22 * cy + data->m12 * cx + data->dy; qreal fw = data->m23 * cy + data->m13 * cx + data->m33; - uint buf1[buffer_size]; - uint buf2[buffer_size]; + uint buf1[BufferSize]; + uint buf2[BufferSize]; uint *b = buffer; - int distxs[buffer_size / 2]; - int distys[buffer_size / 2]; + int distxs[BufferSize / 2]; + int distys[BufferSize / 2]; while (length) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); for (int i = 0; i < len; ++i) { const qreal iw = fw == 0 ? 1 : 1 / fw; const qreal px = fx * iw - qreal(0.5); @@ -3110,8 +3470,8 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper fw += fdw; } - layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); - layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); + layout->convertToARGB32PM(buf1, len * 2, clut); + layout->convertToARGB32PM(buf2, len * 2, clut); for (int i = 0; i < len; ++i) { int distx = distxs[i]; @@ -3128,19 +3488,27 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper } template<TextureBlendType blendType> -static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, const Operator *, - const QSpanData *data, int y, int x, int length) +static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint32(QRgba64 *buffer, const QSpanData *data, + int y, int x, int length) { - const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; + const QTextureData &texture = data->texture; + const QPixelLayout *layout = &qPixelLayouts[texture.format]; const QVector<QRgb> *clut = data->texture.colorTable; const qreal cx = x + qreal(0.5); const qreal cy = y + qreal(0.5); + uint sbuf1[BufferSize]; + uint sbuf2[BufferSize]; + QRgba64 buf1[BufferSize]; + QRgba64 buf2[BufferSize]; + QRgba64 *end = buffer + length; + QRgba64 *b = buffer; + if (data->fast_matrix) { // The increment pr x in the scanline - int fdx = (int)(data->m11 * fixed_scale); - int fdy = (int)(data->m12 * fixed_scale); + const int fdx = (int)(data->m11 * fixed_scale); + const int fdy = (int)(data->m12 * fixed_scale); int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale); int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale); @@ -3148,20 +3516,14 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co fx -= half_point; fy -= half_point; - const BilinearFastTransformFetcher fetcher = + const auto fetcher = (layout->bpp == QPixelLayout::BPP32) - ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32> - : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone>; + ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint> + : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>; if (fdy == 0) { //simple scale, no rotation - - uint sbuf1[buffer_size]; - uint sbuf2[buffer_size]; - quint64 buf1[buffer_size]; - quint64 buf2[buffer_size]; - QRgba64 *b = buffer; while (length) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); int disty = (fy & 0x0000ffff); #if defined(__SSE2__) const __m128i vdy = _mm_set1_epi16(disty); @@ -3169,9 +3531,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co #endif fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy); - layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0); + layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, 0); if (disty) - layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0); + layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, 0); for (int i = 0; i < len; ++i) { int distx = (fx & 0x0000ffff); @@ -3191,33 +3553,26 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co } _mm_storel_epi64((__m128i*)(b+i), vt); #else - b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty); + b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty); #endif fx += fdx; } length -= len; b += len; } - } else { //rotation - uint sbuf1[buffer_size]; - uint sbuf2[buffer_size]; - quint64 buf1[buffer_size]; - quint64 buf2[buffer_size]; - QRgba64 *end = buffer + length; - QRgba64 *b = buffer; - + } else { // rotation or shear while (b < end) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy); - layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0); - layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0); + layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, 0); + layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, 0); for (int i = 0; i < len; ++i) { int distx = (fx & 0x0000ffff); int disty = (fy & 0x0000ffff); - b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty); + b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty); fx += fdx; fy += fdy; } @@ -3226,7 +3581,7 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co b += len; } } - } else { + } else { // !(data->fast_matrix) const QTextureData &image = data->texture; const qreal fdx = data->m11; @@ -3238,17 +3593,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co qreal fw = data->m23 * cy + data->m13 * cx + data->m33; FetchPixelFunc fetch = qFetchPixel[layout->bpp]; - uint sbuf1[buffer_size]; - uint sbuf2[buffer_size]; - quint64 buf1[buffer_size]; - quint64 buf2[buffer_size]; - QRgba64 *b = buffer; - int distxs[buffer_size / 2]; - int distys[buffer_size / 2]; + int distxs[BufferSize / 2]; + int distys[BufferSize / 2]; - while (length) { - int len = qMin(length, buffer_size / 2); + while (b < end) { + int len = qMin(length, BufferSize / 2); for (int i = 0; i < len; ++i) { const qreal iw = fw == 0 ? 1 : 1 / fw; const qreal px = fx * iw - qreal(0.5); @@ -3265,21 +3615,165 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2); - const uchar *s1 = data->texture.scanLine(y1); - const uchar *s2 = data->texture.scanLine(y2); + const uchar *s1 = texture.scanLine(y1); + const uchar *s2 = texture.scanLine(y2); - if (layout->bpp == QPixelLayout::BPP32) { - sbuf1[i * 2 + 0] = ((const uint*)s1)[x1]; - sbuf1[i * 2 + 1] = ((const uint*)s1)[x2]; - sbuf2[i * 2 + 0] = ((const uint*)s2)[x1]; - sbuf2[i * 2 + 1] = ((const uint*)s2)[x2]; + sbuf1[i * 2 + 0] = fetch(s1, x1); + sbuf1[i * 2 + 1] = fetch(s1, x2); + sbuf2[i * 2 + 0] = fetch(s2, x1); + sbuf2[i * 2 + 1] = fetch(s2, x2); - } else { - sbuf1[i * 2 + 0] = fetch(s1, x1); - sbuf1[i * 2 + 1] = fetch(s1, x2); - sbuf2[i * 2 + 0] = fetch(s2, x1); - sbuf2[i * 2 + 1] = fetch(s2, x2); + fx += fdx; + fy += fdy; + fw += fdw; + //force increment to avoid /0 + if (!fw) + fw += fdw; + } + + layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, 0); + layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, 0); + + for (int i = 0; i < len; ++i) { + int distx = distxs[i]; + int disty = distys[i]; + b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty); + } + + length -= len; + b += len; + } + } + return buffer; +} + +template<TextureBlendType blendType> +static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint64(QRgba64 *buffer, const QSpanData *data, + int y, int x, int length) +{ + const QTextureData &texture = data->texture; + Q_ASSERT(qPixelLayouts[texture.format].bpp == QPixelLayout::BPP64); + const auto convert = (data->texture.format == QImage::Format_RGBA64) ? convertRGBA64ToRGBA64PM : convertRGBA64PMToRGBA64PM; + + const qreal cx = x + qreal(0.5); + const qreal cy = y + qreal(0.5); + + QRgba64 buf1[BufferSize]; + QRgba64 buf2[BufferSize]; + QRgba64 *end = buffer + length; + QRgba64 *b = buffer; + + if (data->fast_matrix) { + // The increment pr x in the scanline + const int fdx = (int)(data->m11 * fixed_scale); + const int fdy = (int)(data->m12 * fixed_scale); + + int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale); + int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale); + + fx -= half_point; + fy -= half_point; + const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, QRgba64>; + + if (fdy == 0) { //simple scale, no rotation + while (length) { + int len = qMin(length, BufferSize / 2); + int disty = (fy & 0x0000ffff); +#if defined(__SSE2__) + const __m128i vdy = _mm_set1_epi16(disty); + const __m128i vidy = _mm_set1_epi16(0x10000 - disty); +#endif + fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy); + + convert(buf1, len * 2); + if (disty) + convert(buf2, len * 2); + + for (int i = 0; i < len; ++i) { + int distx = (fx & 0x0000ffff); +#if defined(__SSE2__) + __m128i vt = _mm_loadu_si128((const __m128i*)(buf1 + i*2)); + if (disty) { + __m128i vb = _mm_loadu_si128((const __m128i*)(buf2 + i*2)); + vt = _mm_mulhi_epu16(vt, vidy); + vb = _mm_mulhi_epu16(vb, vdy); + vt = _mm_add_epi16(vt, vb); + } + if (distx) { + const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); + const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); + vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx)); + vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8)); + } + _mm_storel_epi64((__m128i*)(b+i), vt); +#else + b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty); +#endif + fx += fdx; } + length -= len; + b += len; + } + } else { // rotation or shear + while (b < end) { + int len = qMin(length, BufferSize / 2); + + fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy); + + convert(buf1, len * 2); + convert(buf2, len * 2); + + for (int i = 0; i < len; ++i) { + int distx = (fx & 0x0000ffff); + int disty = (fy & 0x0000ffff); + b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty); + fx += fdx; + fy += fdy; + } + + length -= len; + b += len; + } + } + } else { // !(data->fast_matrix) + const QTextureData &image = data->texture; + + const qreal fdx = data->m11; + const qreal fdy = data->m12; + const qreal fdw = data->m13; + + qreal fx = data->m21 * cy + data->m11 * cx + data->dx; + qreal fy = data->m22 * cy + data->m12 * cx + data->dy; + qreal fw = data->m23 * cy + data->m13 * cx + data->m33; + + int distxs[BufferSize / 2]; + int distys[BufferSize / 2]; + + while (b < end) { + int len = qMin(length, BufferSize / 2); + for (int i = 0; i < len; ++i) { + const qreal iw = fw == 0 ? 1 : 1 / fw; + const qreal px = fx * iw - qreal(0.5); + const qreal py = fy * iw - qreal(0.5); + + int x1 = int(px) - (px < 0); + int x2; + int y1 = int(py) - (py < 0); + int y2; + + distxs[i] = int((px - x1) * (1<<16)); + distys[i] = int((py - y1) * (1<<16)); + + fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2); + + const uchar *s1 = texture.scanLine(y1); + const uchar *s2 = texture.scanLine(y2); + + buf1[i * 2 + 0] = reinterpret_cast<const QRgba64 *>(s1)[x1]; + buf1[i * 2 + 1] = reinterpret_cast<const QRgba64 *>(s1)[x2]; + buf2[i * 2 + 0] = reinterpret_cast<const QRgba64 *>(s2)[x1]; + buf2[i * 2 + 1] = reinterpret_cast<const QRgba64 *>(s2)[x2]; fx += fdx; fy += fdy; @@ -3289,23 +3783,31 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co fw += fdw; } - layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0); - layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0); + convert(buf1, len * 2); + convert(buf2, len * 2); for (int i = 0; i < len; ++i) { int distx = distxs[i]; int disty = distys[i]; - b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty); + b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty); } length -= len; b += len; } } - return buffer; } +template<TextureBlendType blendType> +static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, const Operator *, + const QSpanData *data, int y, int x, int length) +{ + if (qPixelLayouts[data->texture.format].bpp == QPixelLayout::BPP64) + return fetchTransformedBilinear64_uint64<blendType>(buffer, data, y, x, length); + return fetchTransformedBilinear64_uint32<blendType>(buffer, data, y, x, length); +} + // FetchUntransformed can have more specialized methods added depending on SIMD features. static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = { 0, // Invalid @@ -3333,6 +3835,9 @@ static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = { fetchUntransformed, // Format_A2RGB30_Premultiplied fetchUntransformed, // Alpha8 fetchUntransformed, // Grayscale8 + fetchUntransformed, // RGBX64 + fetchUntransformed, // RGBA64 + fetchUntransformed, // RGBA64_Premultiplied }; static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = { @@ -3353,6 +3858,15 @@ static SourceFetchProc sourceFetchARGB32PM[NBlendTypes] = { fetchTransformedBilinearARGB32PM<BlendTransformedBilinearTiled> // BilinearTiled }; +static SourceFetchProc sourceFetchAny16[NBlendTypes] = { + fetchUntransformed, // Untransformed + fetchUntransformed, // Tiled + fetchTransformed<BlendTransformed, QPixelLayout::BPP16>, // Transformed + fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP16>, // TransformedTiled + fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP16>, // TransformedBilinear + fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP16> // TransformedBilinearTiled +}; + static SourceFetchProc sourceFetchAny32[NBlendTypes] = { fetchUntransformed, // Untransformed fetchUntransformed, // Tiled @@ -3371,17 +3885,35 @@ static const SourceFetchProc64 sourceFetchGeneric64[NBlendTypes] = { fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled }; +static const SourceFetchProc64 sourceFetchRGBA64PM[NBlendTypes] = { + fetchUntransformedRGBA64PM, // Untransformed + fetchUntransformedRGBA64PM, // Tiled + fetchTransformed64<BlendTransformed>, // Transformed + fetchTransformed64<BlendTransformedTiled>, // TransformedTiled + fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear + fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled +}; + static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage::Format format) { if (format == QImage::Format_RGB32 || format == QImage::Format_ARGB32_Premultiplied) return sourceFetchARGB32PM[blendType]; if (blendType == BlendUntransformed || blendType == BlendTiled) return sourceFetchUntransformed[format]; + if (qPixelLayouts[format].bpp == QPixelLayout::BPP16) + return sourceFetchAny16[blendType]; if (qPixelLayouts[format].bpp == QPixelLayout::BPP32) return sourceFetchAny32[blendType]; return sourceFetchGeneric[blendType]; } +static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QImage::Format format) +{ + if (format == QImage::Format_RGBX64 || format == QImage::Format_RGBA64_Premultiplied) + return sourceFetchRGBA64PM[blendType]; + return sourceFetchGeneric64[blendType]; +} + #define FIXPT_BITS 8 #define FIXPT_SIZE (1<<FIXPT_BITS) @@ -3724,7 +4256,7 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in case QSpanData::Texture: solidSource = !data->texture.hasAlpha; op.srcFetch = getSourceFetch(getBlendType(data), data->texture.format); - op.srcFetch64 = sourceFetchGeneric64[getBlendType(data)]; + op.srcFetch64 = getSourceFetch64(getBlendType(data), data->texture.format);; break; default: Q_UNREACHABLE(); @@ -3752,8 +4284,9 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in // If all spans are opaque we do not need to fetch dest. // But don't clear passthrough destFetch as they are just as fast and save destStore. if (op.destFetch != destFetchARGB32P) - op.destFetch = 0; - op.destFetch64 = destFetch64Undefined; + op.destFetch = destFetchUndefined; + if (op.destFetch64 != destFetchRGB64) + op.destFetch64 = destFetch64Undefined; } } @@ -3778,7 +4311,7 @@ static void blend_color_generic(int count, const QSpan *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - uint buffer[buffer_size]; + uint buffer[BufferSize]; Operator op = getOperator(data, spans, count); const uint color = data->solid.color.toArgb32(); @@ -3786,8 +4319,8 @@ void blend_color_generic(int count, const QSpan *spans, void *userData) int x = spans->x; int length = spans->len; while (length) { - int l = qMin(buffer_size, length); - uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer; + int l = qMin(BufferSize, length); + uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l); op.funcSolid(dest, l, color, spans->coverage); if (op.destStore) op.destStore(data->rasterBuffer, x, spans->y, dest, l); @@ -3802,7 +4335,7 @@ static void blend_color_argb(int count, const QSpan *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - Operator op = getOperator(data, spans, count); + const Operator op = getOperator(data, spans, count); const uint color = data->solid.color.toArgb32(); if (op.mode == QPainter::CompositionMode_Source) { @@ -3838,17 +4371,31 @@ void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData) return blend_color_generic(count, spans, userData); } - quint64 buffer[buffer_size]; + quint64 buffer[BufferSize]; const QRgba64 color = data->solid.color; + bool solidFill = data->rasterBuffer->compositionMode == QPainter::CompositionMode_Source + || (data->rasterBuffer->compositionMode == QPainter::CompositionMode_SourceOver && color.isOpaque()); + bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32; while (count--) { int x = spans->x; int length = spans->len; + if (solidFill && isBpp32 && spans->coverage == 255) { + // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels + if (length > 0) { + op.destStore64(data->rasterBuffer, x, spans->y, &color, 1); + uint *dest = (uint*)data->rasterBuffer->scanLine(spans->y) + x; + qt_memfill32(dest + 1, dest[0], length - 1); + length = 0; + } + } + while (length) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l); op.funcSolid64(dest, l, color, spans->coverage); - op.destStore64(data->rasterBuffer, x, spans->y, dest, l); + if (op.destStore64) + op.destStore64(data->rasterBuffer, x, spans->y, dest, l); length -= l; x += l; } @@ -3950,7 +4497,7 @@ void handleSpans(int count, const QSpan *spans, const QSpanData *data, T &handle int length = right - x; while (length) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); length -= l; int process_length = l; @@ -3997,8 +4544,8 @@ struct QBlendBase BlendType *dest; - BlendType buffer[buffer_size]; - BlendType src_buffer[buffer_size]; + BlendType buffer[BufferSize]; + BlendType src_buffer[BufferSize]; }; class BlendSrcGeneric : public QBlendBase<uint> @@ -4011,7 +4558,7 @@ public: const uint *fetch(int x, int y, int len) { - dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, y, len) : buffer; + dest = op.destFetch(buffer, data->rasterBuffer, x, y, len); return op.srcFetch(src_buffer, &op, data, y, x, len); } @@ -4037,7 +4584,7 @@ public: bool isSupported() const { - return op.func64 && op.destFetch64 && op.destStore64; + return op.func64 && op.destFetch64; } const quint64 *fetch(int x, int y, int len) @@ -4053,7 +4600,8 @@ public: void store(int x, int y, int len) { - op.destStore64(data->rasterBuffer, x, y, (QRgba64 *)dest, len); + if (op.destStore64) + op.destStore64(data->rasterBuffer, x, y, (QRgba64 *)dest, len); } }; @@ -4082,8 +4630,8 @@ static void blend_untransformed_generic(int count, const QSpan *spans, void *use { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - uint buffer[buffer_size]; - uint src_buffer[buffer_size]; + uint buffer[BufferSize]; + uint src_buffer[BufferSize]; Operator op = getOperator(data, spans, count); const int image_width = data->texture.width; @@ -4107,9 +4655,9 @@ static void blend_untransformed_generic(int count, const QSpan *spans, void *use if (length > 0) { const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l); - uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer; + uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l); op.func(dest, src, l, coverage); if (op.destStore) op.destStore(data->rasterBuffer, x, spans->y, dest, l); @@ -4132,8 +4680,8 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit"); return blend_untransformed_generic(count, spans, userData); } - quint64 buffer[buffer_size]; - quint64 src_buffer[buffer_size]; + quint64 buffer[BufferSize]; + quint64 src_buffer[BufferSize]; const int image_width = data->texture.width; const int image_height = data->texture.height; @@ -4156,11 +4704,12 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi if (length > 0) { const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l); QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l); op.func64(dest, src, l, coverage); - op.destStore64(data->rasterBuffer, x, spans->y, dest, l); + if (op.destStore64) + op.destStore64(data->rasterBuffer, x, spans->y, dest, l); x += l; sx += l; length -= l; @@ -4320,8 +4869,8 @@ static void blend_tiled_generic(int count, const QSpan *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - uint buffer[buffer_size]; - uint src_buffer[buffer_size]; + uint buffer[BufferSize]; + uint src_buffer[BufferSize]; Operator op = getOperator(data, spans, count); const int image_width = data->texture.width; @@ -4347,10 +4896,10 @@ static void blend_tiled_generic(int count, const QSpan *spans, void *userData) const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; + if (BufferSize < l) + l = BufferSize; const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l); - uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer; + uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l); op.func(dest, src, l, coverage); if (op.destStore) op.destStore(data->rasterBuffer, x, spans->y, dest, l); @@ -4373,8 +4922,8 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit"); return blend_tiled_generic(count, spans, userData); } - quint64 buffer[buffer_size]; - quint64 src_buffer[buffer_size]; + quint64 buffer[BufferSize]; + quint64 src_buffer[BufferSize]; const int image_width = data->texture.width; const int image_height = data->texture.height; @@ -4386,6 +4935,50 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD if (yoff < 0) yoff += image_height; + bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32; + if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && isBpp32) { + // If destination isn't blended into the result, we can do the tiling directly on destination pixels. + while (count--) { + int x = spans->x; + int y = spans->y; + int length = spans->len; + int sx = (xoff + spans->x) % image_width; + int sy = (spans->y + yoff) % image_height; + if (sx < 0) + sx += image_width; + if (sy < 0) + sy += image_height; + + int sl = qMin(image_width, length); + if (sx > 0 && sl > 0) { + int l = qMin(image_width - sx, sl); + const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l); + op.destStore64(data->rasterBuffer, x, y, src, l); + x += l; + sx += l; + sl -= l; + if (sx >= image_width) + sx = 0; + } + if (sl > 0) { + Q_ASSERT(sx == 0); + const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, sl); + op.destStore64(data->rasterBuffer, x, y, src, sl); + x += sl; + sx += sl; + sl -= sl; + if (sx >= image_width) + sx = 0; + } + uint *dest = (uint*)data->rasterBuffer->scanLine(y) + x - image_width; + for (int i = image_width; i < length; ++i) { + dest[i] = dest[i - image_width]; + } + ++spans; + } + return; + } + while (count--) { int x = spans->x; int length = spans->len; @@ -4399,12 +4992,13 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; + if (BufferSize < l) + l = BufferSize; const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l); QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l); op.func64(dest, src, l, coverage); - op.destStore64(data->rasterBuffer, x, spans->y, dest, l); + if (op.destStore64) + op.destStore64(data->rasterBuffer, x, spans->y, dest, l); x += l; sx += l; length -= l; @@ -4449,8 +5043,8 @@ static void blend_tiled_argb(int count, const QSpan *spans, void *userData) const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; + if (BufferSize < l) + l = BufferSize; const uint *src = (const uint *)data->texture.scanLine(sy) + sx; uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x; op.func(dest, src, l, coverage); @@ -4509,8 +5103,8 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData) int tx = x; while (length) { int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; + if (BufferSize < l) + l = BufferSize; quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + tx; const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx; memcpy(dest, src, l * sizeof(quint16)); @@ -4545,8 +5139,8 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData) if (alpha > 0) { while (length) { int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; + if (BufferSize < l) + l = BufferSize; quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x; const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx; blend_sourceOver_rgb16_rgb16(dest, src, l, alpha, ialpha); @@ -4562,723 +5156,12 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData) } } -static void blend_transformed_bilinear_rgb565(int count, const QSpan *spans, void *userData) -{ - QSpanData *data = reinterpret_cast<QSpanData*>(userData); - QPainter::CompositionMode mode = data->rasterBuffer->compositionMode; - - if (data->texture.format != QImage::Format_RGB16 - || (mode != QPainter::CompositionMode_SourceOver - && mode != QPainter::CompositionMode_Source)) - { - blend_src_generic(count, spans, userData); - return; - } - - quint16 buffer[buffer_size]; - - const int src_minx = data->texture.x1; - const int src_miny = data->texture.y1; - const int src_maxx = data->texture.x2 - 1; - const int src_maxy = data->texture.y2 - 1; - - if (data->fast_matrix) { - // The increment pr x in the scanline - const int fdx = (int)(data->m11 * fixed_scale); - const int fdy = (int)(data->m12 * fixed_scale); - - while (count--) { - const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8; - const quint8 alpha = (coverage + 1) >> 3; - const quint8 ialpha = 0x20 - alpha; - if (alpha == 0) { - ++spans; - continue; - } - - quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + spans->x; - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - int x = int((data->m21 * cy - + data->m11 * cx + data->dx) * fixed_scale) - half_point; - int y = int((data->m22 * cy - + data->m12 * cx + data->dy) * fixed_scale) - half_point; - int length = spans->len; - - while (length) { - int l; - quint16 *b; - if (ialpha == 0) { - l = length; - b = dest; - } else { - l = qMin(length, buffer_size); - b = buffer; - } - const quint16 *end = b + l; - - while (b < end) { - int x1 = (x >> 16); - int x2; - int y1 = (y >> 16); - int y2; - - fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(0, src_minx, src_maxx, x1, x2); - fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(0, src_miny, src_maxy, y1, y2); - - const quint16 *src1 = (const quint16*)data->texture.scanLine(y1); - const quint16 *src2 = (const quint16*)data->texture.scanLine(y2); - quint16 tl = src1[x1]; - const quint16 tr = src1[x2]; - quint16 bl = src2[x1]; - const quint16 br = src2[x2]; - - const uint distxsl8 = x & 0xff00; - const uint distysl8 = y & 0xff00; - const uint distx = distxsl8 >> 8; - const uint disty = distysl8 >> 8; - const uint distxy = distx * disty; - - const uint tlw = 0x10000 - distxsl8 - distysl8 + distxy; // (256 - distx) * (256 - disty) - const uint trw = distxsl8 - distxy; // distx * (256 - disty) - const uint blw = distysl8 - distxy; // (256 - distx) * disty - const uint brw = distxy; // distx * disty - uint red = ((tl & 0xf800) * tlw + (tr & 0xf800) * trw - + (bl & 0xf800) * blw + (br & 0xf800) * brw) & 0xf8000000; - uint green = ((tl & 0x07e0) * tlw + (tr & 0x07e0) * trw - + (bl & 0x07e0) * blw + (br & 0x07e0) * brw) & 0x07e00000; - uint blue = ((tl & 0x001f) * tlw + (tr & 0x001f) * trw - + (bl & 0x001f) * blw + (br & 0x001f) * brw); - *b = quint16((red | green | blue) >> 16); - - ++b; - x += fdx; - y += fdy; - } - - if (ialpha != 0) - blend_sourceOver_rgb16_rgb16(dest, buffer, l, alpha, ialpha); - - dest += l; - length -= l; - } - ++spans; - } - } else { - const qreal fdx = data->m11; - const qreal fdy = data->m12; - const qreal fdw = data->m13; - - while (count--) { - const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8; - const quint8 alpha = (coverage + 1) >> 3; - const quint8 ialpha = 0x20 - alpha; - if (alpha == 0) { - ++spans; - continue; - } - - quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + spans->x; - - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - - qreal x = data->m21 * cy + data->m11 * cx + data->dx; - qreal y = data->m22 * cy + data->m12 * cx + data->dy; - qreal w = data->m23 * cy + data->m13 * cx + data->m33; - - int length = spans->len; - while (length) { - int l; - quint16 *b; - if (ialpha == 0) { - l = length; - b = dest; - } else { - l = qMin(length, buffer_size); - b = buffer; - } - const quint16 *end = b + l; - - while (b < end) { - const qreal iw = w == 0 ? 1 : 1 / w; - const qreal px = x * iw - qreal(0.5); - const qreal py = y * iw - qreal(0.5); - - int x1 = int(px) - (px < 0); - int x2; - int y1 = int(py) - (py < 0); - int y2; - - fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(0, src_minx, src_maxx, x1, x2); - fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(0, src_miny, src_maxy, y1, y2); - - const quint16 *src1 = (const quint16 *)data->texture.scanLine(y1); - const quint16 *src2 = (const quint16 *)data->texture.scanLine(y2); - quint16 tl = src1[x1]; - const quint16 tr = src1[x2]; - quint16 bl = src2[x1]; - const quint16 br = src2[x2]; - - const uint distx = uint((px - x1) * 256); - const uint disty = uint((py - y1) * 256); - const uint distxsl8 = distx << 8; - const uint distysl8 = disty << 8; - const uint distxy = distx * disty; - - const uint tlw = 0x10000 - distxsl8 - distysl8 + distxy; // (256 - distx) * (256 - disty) - const uint trw = distxsl8 - distxy; // distx * (256 - disty) - const uint blw = distysl8 - distxy; // (256 - distx) * disty - const uint brw = distxy; // distx * disty - uint red = ((tl & 0xf800) * tlw + (tr & 0xf800) * trw - + (bl & 0xf800) * blw + (br & 0xf800) * brw) & 0xf8000000; - uint green = ((tl & 0x07e0) * tlw + (tr & 0x07e0) * trw - + (bl & 0x07e0) * blw + (br & 0x07e0) * brw) & 0x07e00000; - uint blue = ((tl & 0x001f) * tlw + (tr & 0x001f) * trw - + (bl & 0x001f) * blw + (br & 0x001f) * brw); - *b = quint16((red | green | blue) >> 16); - - ++b; - x += fdx; - y += fdy; - w += fdw; - } - - if (ialpha != 0) - blend_sourceOver_rgb16_rgb16(dest, buffer, l, alpha, ialpha); - - dest += l; - length -= l; - } - ++spans; - } - } -} - -static void blend_transformed_argb(int count, const QSpan *spans, void *userData) -{ - QSpanData *data = reinterpret_cast<QSpanData *>(userData); - if (data->texture.format != QImage::Format_ARGB32_Premultiplied - && data->texture.format != QImage::Format_RGB32) { - blend_src_generic(count, spans, userData); - return; - } - - CompositionFunction func = functionForMode[data->rasterBuffer->compositionMode]; - uint buffer[buffer_size]; - quint32 mask = (data->texture.format == QImage::Format_RGB32) ? 0xff000000 : 0; - - const int image_x1 = data->texture.x1; - const int image_y1 = data->texture.y1; - const int image_x2 = data->texture.x2 - 1; - const int image_y2 = data->texture.y2 - 1; - - if (data->fast_matrix) { - // The increment pr x in the scanline - int fdx = (int)(data->m11 * fixed_scale); - int fdy = (int)(data->m12 * fixed_scale); - - while (count--) { - void *t = data->rasterBuffer->scanLine(spans->y); - - uint *target = ((uint *)t) + spans->x; - - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - - int x = int((data->m21 * cy - + data->m11 * cx + data->dx) * fixed_scale); - int y = int((data->m22 * cy - + data->m12 * cx + data->dy) * fixed_scale); - - int length = spans->len; - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - while (length) { - int l = qMin(length, buffer_size); - const uint *end = buffer + l; - uint *b = buffer; - while (b < end) { - int px = qBound(image_x1, x >> 16, image_x2); - int py = qBound(image_y1, y >> 16, image_y2); - *b = reinterpret_cast<const uint *>(data->texture.scanLine(py))[px] | mask; - - x += fdx; - y += fdy; - ++b; - } - func(target, buffer, l, coverage); - target += l; - length -= l; - } - ++spans; - } - } else { - const qreal fdx = data->m11; - const qreal fdy = data->m12; - const qreal fdw = data->m13; - while (count--) { - void *t = data->rasterBuffer->scanLine(spans->y); - - uint *target = ((uint *)t) + spans->x; - - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - - qreal x = data->m21 * cy + data->m11 * cx + data->dx; - qreal y = data->m22 * cy + data->m12 * cx + data->dy; - qreal w = data->m23 * cy + data->m13 * cx + data->m33; - - int length = spans->len; - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - while (length) { - int l = qMin(length, buffer_size); - const uint *end = buffer + l; - uint *b = buffer; - while (b < end) { - const qreal iw = w == 0 ? 1 : 1 / w; - const qreal tx = x * iw; - const qreal ty = y * iw; - const int px = qBound(image_x1, int(tx) - (tx < 0), image_x2); - const int py = qBound(image_y1, int(ty) - (ty < 0), image_y2); - - *b = reinterpret_cast<const uint *>(data->texture.scanLine(py))[px] | mask; - x += fdx; - y += fdy; - w += fdw; - - ++b; - } - func(target, buffer, l, coverage); - target += l; - length -= l; - } - ++spans; - } - } -} - -static void blend_transformed_rgb565(int count, const QSpan *spans, void *userData) -{ - QSpanData *data = reinterpret_cast<QSpanData*>(userData); - QPainter::CompositionMode mode = data->rasterBuffer->compositionMode; - - if (data->texture.format != QImage::Format_RGB16 - || (mode != QPainter::CompositionMode_SourceOver - && mode != QPainter::CompositionMode_Source)) - { - blend_src_generic(count, spans, userData); - return; - } - - quint16 buffer[buffer_size]; - const int image_x1 = data->texture.x1; - const int image_y1 = data->texture.y1; - const int image_x2 = data->texture.x2 - 1; - const int image_y2 = data->texture.y2 - 1; - - if (data->fast_matrix) { - // The increment pr x in the scanline - const int fdx = (int)(data->m11 * fixed_scale); - const int fdy = (int)(data->m12 * fixed_scale); - - while (count--) { - const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8; - const quint8 alpha = (coverage + 1) >> 3; - const quint8 ialpha = 0x20 - alpha; - if (alpha == 0) { - ++spans; - continue; - } - - quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + spans->x; - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - int x = int((data->m21 * cy - + data->m11 * cx + data->dx) * fixed_scale); - int y = int((data->m22 * cy - + data->m12 * cx + data->dy) * fixed_scale); - int length = spans->len; - - while (length) { - int l; - quint16 *b; - if (ialpha == 0) { - l = length; - b = dest; - } else { - l = qMin(length, buffer_size); - b = buffer; - } - const quint16 *end = b + l; - - while (b < end) { - const int px = qBound(image_x1, x >> 16, image_x2); - const int py = qBound(image_y1, y >> 16, image_y2); - - *b = ((const quint16 *)data->texture.scanLine(py))[px]; - ++b; - - x += fdx; - y += fdy; - } - - if (ialpha != 0) - blend_sourceOver_rgb16_rgb16(dest, buffer, l, alpha, ialpha); - - dest += l; - length -= l; - } - ++spans; - } - } else { - const qreal fdx = data->m11; - const qreal fdy = data->m12; - const qreal fdw = data->m13; - - while (count--) { - const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8; - const quint8 alpha = (coverage + 1) >> 3; - const quint8 ialpha = 0x20 - alpha; - if (alpha == 0) { - ++spans; - continue; - } - - quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + spans->x; - - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - - qreal x = data->m21 * cy + data->m11 * cx + data->dx; - qreal y = data->m22 * cy + data->m12 * cx + data->dy; - qreal w = data->m23 * cy + data->m13 * cx + data->m33; - - int length = spans->len; - while (length) { - int l; - quint16 *b; - if (ialpha == 0) { - l = length; - b = dest; - } else { - l = qMin(length, buffer_size); - b = buffer; - } - const quint16 *end = b + l; - - while (b < end) { - const qreal iw = w == 0 ? 1 : 1 / w; - const qreal tx = x * iw; - const qreal ty = y * iw; - - const int px = qBound(image_x1, int(tx) - (tx < 0), image_x2); - const int py = qBound(image_y1, int(ty) - (ty < 0), image_y2); - - *b = ((const quint16 *)data->texture.scanLine(py))[px]; - ++b; - - x += fdx; - y += fdy; - w += fdw; - } - - if (ialpha != 0) - blend_sourceOver_rgb16_rgb16(dest, buffer, l, alpha, ialpha); - - dest += l; - length -= l; - } - ++spans; - } - } -} - -static void blend_transformed_tiled_argb(int count, const QSpan *spans, void *userData) -{ - QSpanData *data = reinterpret_cast<QSpanData *>(userData); - if (data->texture.format != QImage::Format_ARGB32_Premultiplied - && data->texture.format != QImage::Format_RGB32) { - blend_src_generic(count, spans, userData); - return; - } - - CompositionFunction func = functionForMode[data->rasterBuffer->compositionMode]; - uint buffer[buffer_size]; - - int image_width = data->texture.width; - int image_height = data->texture.height; - const qsizetype scanline_offset = data->texture.bytesPerLine / 4; - - if (data->fast_matrix) { - // The increment pr x in the scanline - int fdx = (int)(data->m11 * fixed_scale); - int fdy = (int)(data->m12 * fixed_scale); - - while (count--) { - void *t = data->rasterBuffer->scanLine(spans->y); - - uint *target = ((uint *)t) + spans->x; - const uint *image_bits = (const uint *)data->texture.imageData; - - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - - int x = int((data->m21 * cy - + data->m11 * cx + data->dx) * fixed_scale); - int y = int((data->m22 * cy - + data->m12 * cx + data->dy) * fixed_scale); - - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - int length = spans->len; - while (length) { - int l = qMin(length, buffer_size); - const uint *end = buffer + l; - uint *b = buffer; - int px16 = x % (image_width << 16); - int py16 = y % (image_height << 16); - int px_delta = fdx % (image_width << 16); - int py_delta = fdy % (image_height << 16); - while (b < end) { - if (px16 < 0) px16 += image_width << 16; - if (py16 < 0) py16 += image_height << 16; - int px = px16 >> 16; - int py = py16 >> 16; - int y_offset = py * scanline_offset; - - Q_ASSERT(px >= 0 && px < image_width); - Q_ASSERT(py >= 0 && py < image_height); - - *b = image_bits[y_offset + px]; - x += fdx; - y += fdy; - px16 += px_delta; - if (px16 >= image_width << 16) - px16 -= image_width << 16; - py16 += py_delta; - if (py16 >= image_height << 16) - py16 -= image_height << 16; - ++b; - } - func(target, buffer, l, coverage); - target += l; - length -= l; - } - ++spans; - } - } else { - const qreal fdx = data->m11; - const qreal fdy = data->m12; - const qreal fdw = data->m13; - while (count--) { - void *t = data->rasterBuffer->scanLine(spans->y); - - uint *target = ((uint *)t) + spans->x; - const uint *image_bits = (const uint *)data->texture.imageData; - - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - - qreal x = data->m21 * cy + data->m11 * cx + data->dx; - qreal y = data->m22 * cy + data->m12 * cx + data->dy; - qreal w = data->m23 * cy + data->m13 * cx + data->m33; - - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - int length = spans->len; - while (length) { - int l = qMin(length, buffer_size); - const uint *end = buffer + l; - uint *b = buffer; - while (b < end) { - const qreal iw = w == 0 ? 1 : 1 / w; - const qreal tx = x * iw; - const qreal ty = y * iw; - int px = int(tx) - (tx < 0); - int py = int(ty) - (ty < 0); - - px %= image_width; - py %= image_height; - if (px < 0) px += image_width; - if (py < 0) py += image_height; - int y_offset = py * scanline_offset; - - Q_ASSERT(px >= 0 && px < image_width); - Q_ASSERT(py >= 0 && py < image_height); - - *b = image_bits[y_offset + px]; - x += fdx; - y += fdy; - w += fdw; - //force increment to avoid /0 - if (!w) { - w += fdw; - } - ++b; - } - func(target, buffer, l, coverage); - target += l; - length -= l; - } - ++spans; - } - } -} - -static void blend_transformed_tiled_rgb565(int count, const QSpan *spans, void *userData) -{ - QSpanData *data = reinterpret_cast<QSpanData*>(userData); - QPainter::CompositionMode mode = data->rasterBuffer->compositionMode; - - if (data->texture.format != QImage::Format_RGB16 - || (mode != QPainter::CompositionMode_SourceOver - && mode != QPainter::CompositionMode_Source)) - { - blend_src_generic(count, spans, userData); - return; - } - - quint16 buffer[buffer_size]; - const int image_width = data->texture.width; - const int image_height = data->texture.height; - - if (data->fast_matrix) { - // The increment pr x in the scanline - const int fdx = (int)(data->m11 * fixed_scale); - const int fdy = (int)(data->m12 * fixed_scale); - - while (count--) { - const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8; - const quint8 alpha = (coverage + 1) >> 3; - const quint8 ialpha = 0x20 - alpha; - if (alpha == 0) { - ++spans; - continue; - } - - quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + spans->x; - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - int x = int((data->m21 * cy - + data->m11 * cx + data->dx) * fixed_scale); - int y = int((data->m22 * cy - + data->m12 * cx + data->dy) * fixed_scale); - int length = spans->len; - - while (length) { - int l; - quint16 *b; - if (ialpha == 0) { - l = length; - b = dest; - } else { - l = qMin(length, buffer_size); - b = buffer; - } - const quint16 *end = b + l; - - while (b < end) { - int px = (x >> 16) % image_width; - int py = (y >> 16) % image_height; - - if (px < 0) - px += image_width; - if (py < 0) - py += image_height; - - *b = ((const quint16 *)data->texture.scanLine(py))[px]; - ++b; - - x += fdx; - y += fdy; - } - - if (ialpha != 0) - blend_sourceOver_rgb16_rgb16(dest, buffer, l, alpha, ialpha); - - dest += l; - length -= l; - } - ++spans; - } - } else { - const qreal fdx = data->m11; - const qreal fdy = data->m12; - const qreal fdw = data->m13; - - while (count--) { - const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8; - const quint8 alpha = (coverage + 1) >> 3; - const quint8 ialpha = 0x20 - alpha; - if (alpha == 0) { - ++spans; - continue; - } - - quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + spans->x; - - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - - qreal x = data->m21 * cy + data->m11 * cx + data->dx; - qreal y = data->m22 * cy + data->m12 * cx + data->dy; - qreal w = data->m23 * cy + data->m13 * cx + data->m33; - - int length = spans->len; - while (length) { - int l; - quint16 *b; - if (ialpha == 0) { - l = length; - b = dest; - } else { - l = qMin(length, buffer_size); - b = buffer; - } - const quint16 *end = b + l; - - while (b < end) { - const qreal iw = w == 0 ? 1 : 1 / w; - const qreal tx = x * iw; - const qreal ty = y * iw; - - int px = int(tx) - (tx < 0); - int py = int(ty) - (ty < 0); - - px %= image_width; - py %= image_height; - if (px < 0) - px += image_width; - if (py < 0) - py += image_height; - - *b = ((const quint16 *)data->texture.scanLine(py))[px]; - ++b; - - x += fdx; - y += fdy; - w += fdw; - // force increment to avoid /0 - if (!w) - w += fdw; - } - - if (ialpha != 0) - blend_sourceOver_rgb16_rgb16(dest, buffer, l, alpha, ialpha); - - dest += l; - length -= l; - } - ++spans; - } - } -} - - /* Image formats here are target formats */ static const ProcessSpans processTextureSpansARGB32PM[NBlendTypes] = { blend_untransformed_argb, // Untransformed blend_tiled_argb, // Tiled - blend_transformed_argb, // Transformed - blend_transformed_tiled_argb, // TransformedTiled + blend_src_generic, // Transformed + blend_src_generic, // TransformedTiled blend_src_generic, // TransformedBilinear blend_src_generic // TransformedBilinearTiled }; @@ -5286,9 +5169,9 @@ static const ProcessSpans processTextureSpansARGB32PM[NBlendTypes] = { static const ProcessSpans processTextureSpansRGB16[NBlendTypes] = { blend_untransformed_rgb565, // Untransformed blend_tiled_rgb565, // Tiled - blend_transformed_rgb565, // Transformed - blend_transformed_tiled_rgb565, // TransformedTiled - blend_transformed_bilinear_rgb565, // TransformedBilinear + blend_src_generic, // Transformed + blend_src_generic, // TransformedTiled + blend_src_generic, // TransformedBilinear blend_src_generic // TransformedBilinearTiled }; @@ -5330,6 +5213,9 @@ void qBlendTexture(int count, const QSpan *spans, void *userData) case QImage::Format_A2BGR30_Premultiplied: case QImage::Format_RGB30: case QImage::Format_A2RGB30_Premultiplied: + case QImage::Format_RGBX64: + case QImage::Format_RGBA64: + case QImage::Format_RGBA64_Premultiplied: proc = processTextureSpansGeneric64[blendType]; break; case QImage::Format_Invalid: @@ -5586,7 +5472,7 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer, srcColor = colorProfile->toLinear(srcColor.unpremultiplied()).premultiplied(); } - quint64 buffer[buffer_size]; + quint64 buffer[BufferSize]; const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format]; const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format]; @@ -5595,13 +5481,14 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer, int i = x; int length = mapWidth; while (length > 0) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, i, y + ly, l); for (int j=0; j < l; ++j) { const int coverage = map[j + (i - x)]; alphamapblend_generic(coverage, dest, j, srcColor, color, colorProfile); } - destStore64(rasterBuffer, i, y + ly, dest, l); + if (destStore64) + destStore64(rasterBuffer, i, y + ly, dest, l); length -= l; i += l; } @@ -5624,14 +5511,15 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer, int end = qMin<int>(x + mapWidth, clip.x + clip.len); if (end <= start) continue; - Q_ASSERT(end - start <= buffer_size); + Q_ASSERT(end - start <= BufferSize); QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, start, clip.y, end - start); for (int xp=start; xp<end; ++xp) { const int coverage = map[xp - x]; alphamapblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile); } - destStore64(rasterBuffer, start, clip.y, dest, end - start); + if (destStore64) + destStore64(rasterBuffer, start, clip.y, dest, end - start); } // for (i -> line.count) map += mapStride; } // for (yp -> bottom) @@ -5903,7 +5791,7 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer, srcColor = colorProfile->toLinear(srcColor.unpremultiplied()).premultiplied(); } - quint64 buffer[buffer_size]; + quint64 buffer[BufferSize]; const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format]; const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format]; @@ -5912,13 +5800,14 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer, int i = x; int length = mapWidth; while (length > 0) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, i, y + ly, l); for (int j=0; j < l; ++j) { const uint coverage = src[j + (i - x)]; alphargbblend_generic(coverage, dest, j, srcColor, color, colorProfile); } - destStore64(rasterBuffer, i, y + ly, dest, l); + if (destStore64) + destStore64(rasterBuffer, i, y + ly, dest, l); length -= l; i += l; } @@ -5941,14 +5830,15 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer, int end = qMin<int>(x + mapWidth, clip.x + clip.len); if (end <= start) continue; - Q_ASSERT(end - start <= buffer_size); + Q_ASSERT(end - start <= BufferSize); QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, start, clip.y, end - start); for (int xp=start; xp<end; ++xp) { const uint coverage = src[xp - x]; alphargbblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile); } - destStore64(rasterBuffer, start, clip.y, dest, end - start); + if (destStore64) + destStore64(rasterBuffer, start, clip.y, dest, end - start); } // for (i -> line.count) src += srcStride; } // for (yp -> bottom) @@ -6031,8 +5921,24 @@ static void qt_rectfill_quint16(QRasterBuffer *rasterBuffer, int x, int y, int width, int height, const QRgba64 &color) { + const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format]; + quint32 c32 = color.toArgb32(); + quint16 c16; + layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c16), &c32, 0, 1, nullptr, nullptr); qt_rectfill<quint16>(reinterpret_cast<quint16 *>(rasterBuffer->buffer()), - color.toRgb16(), x, y, width, height, rasterBuffer->bytesPerLine()); + c16, x, y, width, height, rasterBuffer->bytesPerLine()); +} + +static void qt_rectfill_quint24(QRasterBuffer *rasterBuffer, + int x, int y, int width, int height, + const QRgba64 &color) +{ + const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format]; + quint32 c32 = color.toArgb32(); + quint24 c24; + layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c24), &c32, 0, 1, nullptr, nullptr); + qt_rectfill<quint24>(reinterpret_cast<quint24 *>(rasterBuffer->buffer()), + c24, x, y, width, height, rasterBuffer->bytesPerLine()); } static void qt_rectfill_nonpremul_argb32(QRasterBuffer *rasterBuffer, @@ -6084,6 +5990,17 @@ static void qt_rectfill_gray(QRasterBuffer *rasterBuffer, qGray(color.toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine()); } +static void qt_rectfill_quint64(QRasterBuffer *rasterBuffer, + int x, int y, int width, int height, + const QRgba64 &color) +{ + const auto store = qStoreFromRGBA64PM[rasterBuffer->format]; + quint64 c64; + store(reinterpret_cast<uchar *>(&c64), &color, 0, 1, nullptr, nullptr); + qt_rectfill<quint64>(reinterpret_cast<quint64 *>(rasterBuffer->buffer()), + c64, x, y, width, height, rasterBuffer->bytesPerLine()); +} + // Map table for destination image format. Contains function pointers // for blends of various types unto the destination @@ -6152,7 +6069,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint24 }, // Format_RGB666 { @@ -6161,7 +6078,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint24 }, // Format_ARGB6666_Premultiplied { @@ -6170,7 +6087,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint24 }, // Format_RGB555 { @@ -6179,7 +6096,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint16 }, // Format_ARGB8555_Premultiplied { @@ -6188,7 +6105,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint24 }, // Format_RGB888 { @@ -6197,7 +6114,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint24 }, // Format_RGB444 { @@ -6206,7 +6123,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint16 }, // Format_ARGB4444_Premultiplied { @@ -6215,7 +6132,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint16 }, // Format_RGBX8888 { @@ -6298,6 +6215,33 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = qt_alphargbblit_generic, qt_rectfill_gray }, + // Format_RGBX64 + { + blend_color_generic_rgb64, + blend_src_generic_rgb64, + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + qt_rectfill_quint64 + }, + // Format_RGBA64 + { + blend_color_generic_rgb64, + blend_src_generic_rgb64, + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + qt_rectfill_quint64 + }, + // Format_RGBA64_Premultiplied + { + blend_color_generic_rgb64, + blend_src_generic_rgb64, + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + qt_rectfill_quint64 + }, }; #if defined(Q_CC_MSVC) && !defined(_MIPS_) @@ -6370,7 +6314,7 @@ void qt_memfill32(quint32 *dest, quint32 color, int count) #endif #ifdef QT_COMPILER_SUPPORTS_SSE4_1 -template<QtPixelOrder> const uint *QT_FASTCALL convertA2RGB30PMFromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); +template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); #endif extern void qInitBlendFunctions(); @@ -6439,42 +6383,54 @@ static void qInitDrawhelperFunctions() int w, int h, int const_alpha); - extern void QT_FASTCALL storePixelsBPP24_ssse3(uchar *dest, const uint *src, int index, int count); extern const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data, int y, int x, int length); qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3; qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3; qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3; qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3; - qStorePixels[QPixelLayout::BPP24] = storePixelsBPP24_ssse3; sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_ssse3; } #endif // SSSE3 #if defined(QT_COMPILER_SUPPORTS_SSE4_1) if (qCpuHasFeature(SSE4_1)) { - extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, + extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *); + extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *); + extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, + extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count, + extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count, + extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); + extern void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + extern void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length); + extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length); + qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4; qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; + qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; - qPixelLayouts[QImage::Format_ARGB32].convertFromARGB32PM = convertARGB32FromARGB32PM_sse4; - qPixelLayouts[QImage::Format_RGBA8888].convertFromARGB32PM = convertRGBA8888FromARGB32PM_sse4; - qPixelLayouts[QImage::Format_RGBX8888].convertFromARGB32PM = convertRGBXFromARGB32PM_sse4; - qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].convertFromARGB32PM = convertA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>; - qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].convertFromARGB32PM = convertA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>; + qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4; + qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4; + qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4; + qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>; + qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>; + qStoreFromRGBA64PM[QImage::Format_ARGB32] = storeARGB32FromRGBA64PM_sse4; + qStoreFromRGBA64PM[QImage::Format_RGBA8888] = storeRGBA8888FromRGBA64PM_sse4; + destStoreProc64[QImage::Format_ARGB32] = destStore64ARGB32_sse4; + destStoreProc64[QImage::Format_RGBA8888] = destStore64RGBA8888_sse4; } #endif #if defined(QT_COMPILER_SUPPORTS_AVX2) - if (qCpuHasFeature(AVX2)) { + if (qCpuHasFeature(ArchHaswell)) { extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha); @@ -6504,14 +6460,14 @@ static void qInitDrawhelperFunctions() qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2; qt_functionForModeSolid64_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgb64_avx2; - extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uint *b, uint *end, const QTextureData &image, - int &fx, int &fy, int fdx, int /*fdy*/); + extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/); extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int /*fdy*/); extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy); - bilinearFastTransformHelperARGB32PM[0][SimpleUpscaleTransform] = fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2; + bilinearFastTransformHelperARGB32PM[0][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2; bilinearFastTransformHelperARGB32PM[0][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2; bilinearFastTransformHelperARGB32PM[0][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2; } @@ -6543,12 +6499,25 @@ static void qInitDrawhelperFunctions() sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon; #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN - extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, const uint *src, int count, + extern void QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, int count, const QVector<QRgb> *); + extern void QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, int count, const QVector<QRgb> *); + extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *); + extern void QT_FASTCALL storeARGB32FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + extern void QT_FASTCALL storeRGBA8888FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + extern void QT_FASTCALL storeRGBXFromARGB32PM_neon(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *); + qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_neon; qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon; + qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_neon; + qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_neon; qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon; + qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_neon; + qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_neon; #endif #if defined(ENABLE_PIXMAN_DRAWHELPERS) |