diff options
Diffstat (limited to 'src/gui/painting/qdrawhelper.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 1707 |
1 files changed, 817 insertions, 890 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index f3df62b855..c6d7ffd835 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2018 The Qt Company Ltd. ** Copyright (C) 2016 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** @@ -73,9 +73,6 @@ enum { half_point = 1 << 15 }; -// must be multiple of 4 for easier SIMD implementations -static const int buffer_size = 2048; - template<QImage::Format> Q_DECL_CONSTEXPR uint redWidth(); template<QImage::Format> Q_DECL_CONSTEXPR uint redShift(); template<QImage::Format> Q_DECL_CONSTEXPR uint greenWidth(); @@ -94,6 +91,10 @@ template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB4444_Premultiplied> template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; } template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; } template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } +template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBX8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } + template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB16>() { return 11; } template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB444>() { return 8; } template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB555>() { return 10; } @@ -103,6 +104,15 @@ template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB4444_Premultiplied> template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8555_Premultiplied>() { return 18; } template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8565_Premultiplied>() { return 19; } template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB6666_Premultiplied>() { return 12; } +#if Q_BYTE_ORDER == Q_BIG_ENDIAN +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 24; } +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 24; } +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; } +#else +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 0; } +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 0; } +template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; } +#endif template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB16>() { return 6; } template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB444>() { return 4; } template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB555>() { return 5; } @@ -112,6 +122,10 @@ template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB4444_Premultiplie template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; } template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8565_Premultiplied>() { return 6; } template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } +template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBX8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } + template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB16>() { return 5; } template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB444>() { return 4; } template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB555>() { return 5; } @@ -121,6 +135,15 @@ template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB4444_Premultiplie template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8555_Premultiplied>() { return 13; } template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8565_Premultiplied>() { return 13; } template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB6666_Premultiplied>() { return 6; } +#if Q_BYTE_ORDER == Q_BIG_ENDIAN +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 16; } +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 16; } +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; } +#else +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; } +#endif template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB16>() { return 5; } template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB444>() { return 4; } template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB555>() { return 5; } @@ -130,6 +153,10 @@ template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB4444_Premultiplied template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; } template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; } template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } +template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBX8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } + template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB16>() { return 0; } template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB444>() { return 0; } template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB555>() { return 0; } @@ -139,6 +166,15 @@ template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB4444_Premultiplied template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8555_Premultiplied>() { return 8; } template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8565_Premultiplied>() { return 8; } template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB6666_Premultiplied>() { return 0; } +#if Q_BYTE_ORDER == Q_BIG_ENDIAN +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; } +#else +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 16; } +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 16; } +template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; } +#endif template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB16>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB444>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB555>() { return 0; } @@ -148,6 +184,10 @@ template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB4444_Premultiplie template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8555_Premultiplied>() { return 8; } template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8565_Premultiplied>() { return 8; } template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } +template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBX8888>() { return 0; } +template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888>() { return 8; } +template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } + template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB16>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB444>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB555>() { return 0; } @@ -157,54 +197,129 @@ template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB4444_Premultiplie template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8555_Premultiplied>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8565_Premultiplied>() { return 0; } template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB6666_Premultiplied>() { return 18; } +#if Q_BYTE_ORDER == Q_BIG_ENDIAN +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 0; } +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 0; } +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; } +#else +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 24; } +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 24; } +template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; } +#endif -template<QImage::Format> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel(); -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB16>() { return QPixelLayout::BPP16; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB444>() { return QPixelLayout::BPP16; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB555>() { return QPixelLayout::BPP16; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB666>() { return QPixelLayout::BPP24; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB888>() { return QPixelLayout::BPP24; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB4444_Premultiplied>() { return QPixelLayout::BPP16; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8555_Premultiplied>() { return QPixelLayout::BPP24; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8565_Premultiplied>() { return QPixelLayout::BPP24; } -template<> Q_DECL_CONSTEXPR QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB6666_Premultiplied>() { return QPixelLayout::BPP24; } +template<QImage::Format> constexpr QPixelLayout::BPP bitsPerPixel(); +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB16>() { return QPixelLayout::BPP16; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB444>() { return QPixelLayout::BPP16; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB555>() { return QPixelLayout::BPP16; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB666>() { return QPixelLayout::BPP24; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB888>() { return QPixelLayout::BPP24; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB4444_Premultiplied>() { return QPixelLayout::BPP16; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8555_Premultiplied>() { return QPixelLayout::BPP24; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8565_Premultiplied>() { return QPixelLayout::BPP24; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB6666_Premultiplied>() { return QPixelLayout::BPP24; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBX8888>() { return QPixelLayout::BPP32; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888>() { return QPixelLayout::BPP32; } +template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888_Premultiplied>() { return QPixelLayout::BPP32; } -template<QImage::Format Format> -static const uint *QT_FASTCALL convertToRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +typedef const uint *(QT_FASTCALL *FetchPixelsFunc)(uint *buffer, const uchar *src, int index, int count); + +template <QPixelLayout::BPP bpp> static +uint QT_FASTCALL fetchPixel(const uchar *, int) { - auto conversion = [](uint s) { - // MSVC needs these constexpr defined in here otherwise it will create a capture. - Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1); - Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1); - Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1); + Q_UNREACHABLE(); + return 0; +} - Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>(); - Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>(); - Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>(); +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1LSB>(const uchar *src, int index) +{ + return (src[index >> 3] >> (index & 7)) & 1; +} - Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8; - Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8; - Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8; +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1MSB>(const uchar *src, int index) +{ + return (src[index >> 3] >> (~index & 7)) & 1; +} - uint red = (s >> redShift<Format>()) & redMask; - uint green = (s >> greenShift<Format>()) & greenMask; - uint blue = (s >> blueShift<Format>()) & blueMask; +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP8>(const uchar *src, int index) +{ + return src[index]; +} - red = ((red << redLeftShift) | (red >> redRightShift)) << 16; - green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8; - blue = (blue << blueLeftShift) | (blue >> blueRightShift); - return 0xff000000 | red | green | blue; - }; +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP16>(const uchar *src, int index) +{ + return reinterpret_cast<const quint16 *>(src)[index]; +} + +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP24>(const uchar *src, int index) +{ + return reinterpret_cast<const quint24 *>(src)[index]; +} + +template <> +inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar *src, int index) +{ + return reinterpret_cast<const uint *>(src)[index]; +} - UNALIASED_CONVERSION_LOOP(buffer, src, count, conversion); +template <QPixelLayout::BPP bpp> +inline const uint *QT_FASTCALL fetchPixels(uint *buffer, const uchar *src, int index, int count) +{ + for (int i = 0; i < count; ++i) + buffer[i] = fetchPixel<bpp>(src, index + i); return buffer; } +template <> +inline const uint *QT_FASTCALL fetchPixels<QPixelLayout::BPP32>(uint *, const uchar *src, int index, int) +{ + return reinterpret_cast<const uint *>(src) + index; +} + +template <QPixelLayout::BPP width> static +void QT_FASTCALL storePixel(uchar *dest, int index, uint pixel); + +template <> +inline void QT_FASTCALL storePixel<QPixelLayout::BPP16>(uchar *dest, int index, uint pixel) +{ + reinterpret_cast<quint16 *>(dest)[index] = quint16(pixel); +} + +template <> +inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index, uint pixel) +{ + reinterpret_cast<quint24 *>(dest)[index] = quint24(pixel); +} + +static FetchPixelsFunc qFetchPixels[QPixelLayout::BPPCount] = { + 0, // BPPNone + fetchPixels<QPixelLayout::BPP1MSB>, // BPP1MSB + fetchPixels<QPixelLayout::BPP1LSB>, // BPP1LSB + fetchPixels<QPixelLayout::BPP8>, // BPP8 + fetchPixels<QPixelLayout::BPP16>, // BPP16 + fetchPixels<QPixelLayout::BPP24>, // BPP24 + fetchPixels<QPixelLayout::BPP32> // BPP32 +}; + +typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index); + +static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = { + 0, // BPPNone + fetchPixel<QPixelLayout::BPP1MSB>, // BPP1MSB + fetchPixel<QPixelLayout::BPP1LSB>, // BPP1LSB + fetchPixel<QPixelLayout::BPP8>, // BPP8 + fetchPixel<QPixelLayout::BPP16>, // BPP16 + fetchPixel<QPixelLayout::BPP24>, // BPP24 + fetchPixel<QPixelLayout::BPP32> // BPP32 +}; + template<QImage::Format Format> -static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static Q_ALWAYS_INLINE uint convertPixelToRGB32(uint s) { Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1); Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1); @@ -218,23 +333,54 @@ static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *sr Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8; Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8; - for (int i = 0; i < count; ++i) { - uint red = (src[i] >> redShift<Format>()) & redMask; - uint green = (src[i] >> greenShift<Format>()) & greenMask; - uint blue = (src[i] >> blueShift<Format>()) & blueMask; + uint red = (s >> redShift<Format>()) & redMask; + uint green = (s >> greenShift<Format>()) & greenMask; + uint blue = (s >> blueShift<Format>()) & blueMask; + + red = ((red << redLeftShift) | (red >> redRightShift)) << 16; + green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8; + blue = (blue << blueLeftShift) | (blue >> blueRightShift); + return 0xff000000 | red | green | blue; +} + +template<QImage::Format Format> +static void QT_FASTCALL convertToRGB32(uint *buffer, int count, const QVector<QRgb> *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToRGB32<Format>(buffer[i]); +} - red = ((red << redLeftShift) | (red >> redRightShift)); - green = ((green << greenLeftShift) | (green >> greenRightShift)); - blue = (blue << blueLeftShift) | (blue >> blueRightShift); - buffer[i] = QRgba64::fromRgba(red, green, blue, 255); +template<QImage::Format Format> +static const uint *QT_FASTCALL fetchRGBToRGB32(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); +#if defined(__SSE2__) && !defined(__SSSE3__) + if (BPP == QPixelLayout::BPP24) { + // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3 + // to vectorize the deforested version below. + qFetchPixels[BPP](buffer, src, index, count); + convertToRGB32<Format>(buffer, count, nullptr); + return buffer; } +#endif + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToRGB32<Format>(fetchPixel<BPP>(src, index + i)); + return buffer; +} + +template<QImage::Format Format> +static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = QRgba64::fromArgb32(convertPixelToRGB32<Format>(src[i])); return buffer; } template<QImage::Format Format> -static const uint *QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static Q_ALWAYS_INLINE uint convertPixelToARGB32PM(uint s) { Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1); Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1); @@ -255,34 +401,48 @@ static const uint *QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, const uint (alphaWidth<Format>() != greenWidth<Format>()) || (alphaWidth<Format>() != blueWidth<Format>()); + uint alpha = (s >> alphaShift<Format>()) & alphaMask; + uint red = (s >> redShift<Format>()) & redMask; + uint green = (s >> greenShift<Format>()) & greenMask; + uint blue = (s >> blueShift<Format>()) & blueMask; + + alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift); + red = (red << redLeftShift) | (red >> redRightShift); + green = (green << greenLeftShift) | (green >> greenRightShift); + blue = (blue << blueLeftShift) | (blue >> blueRightShift); + if (mustMin) { - for (int i = 0; i < count; ++i) { - uint alpha = (src[i] >> alphaShift<Format>()) & alphaMask; - uint red = (src[i] >> redShift<Format>()) & redMask; - uint green = (src[i] >> greenShift<Format>()) & greenMask; - uint blue = (src[i] >> blueShift<Format>()) & blueMask; - - alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift); - red = qMin(alpha, (red << redLeftShift) | (red >> redRightShift)); - green = qMin(alpha, (green << greenLeftShift) | (green >> greenRightShift)); - blue = qMin(alpha, (blue << blueLeftShift) | (blue >> blueRightShift)); - buffer[i] = (alpha << 24) | (red << 16) | (green << 8) | blue; - } - } else { - for (int i = 0; i < count; ++i) { - uint alpha = (src[i] >> alphaShift<Format>()) & alphaMask; - uint red = (src[i] >> redShift<Format>()) & redMask; - uint green = (src[i] >> greenShift<Format>()) & greenMask; - uint blue = (src[i] >> blueShift<Format>()) & blueMask; - - alpha = ((alpha << alphaLeftShift) | (alpha >> alphaRightShift)) << 24; - red = ((red << redLeftShift) | (red >> redRightShift)) << 16; - green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8; - blue = (blue << blueLeftShift) | (blue >> blueRightShift); - buffer[i] = alpha | red | green | blue; - } + red = qMin(alpha, red); + green = qMin(alpha, green); + blue = qMin(alpha, blue); } + return (alpha << 24) | (red << 16) | (green << 8) | blue; +} + +template<QImage::Format Format> +static void QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToARGB32PM<Format>(buffer[i]); +} + +template<QImage::Format Format> +static const uint *QT_FASTCALL fetchARGBPMToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); +#if defined(__SSE2__) && !defined(__SSSE3__) + if (BPP == QPixelLayout::BPP24) { + // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3 + // to vectorize the deforested version below. + qFetchPixels[BPP](buffer, src, index, count); + convertARGBPMToARGB32PM<Format>(buffer, count, nullptr); + return buffer; + } +#endif + for (int i = 0; i < count; ++i) + buffer[i] = convertPixelToARGB32PM<Format>(fetchPixel<BPP>(src, index + i)); return buffer; } @@ -290,81 +450,37 @@ template<QImage::Format Format> static const QRgba64 *QT_FASTCALL convertARGBPMToARGB64PM(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { - Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1); - Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1); - Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1); - Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1); - - Q_CONSTEXPR uchar alphaLeftShift = 8 - alphaWidth<Format>(); - Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>(); - Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>(); - Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>(); - - Q_CONSTEXPR uchar alphaRightShift = 2 * alphaWidth<Format>() - 8; - Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8; - Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8; - Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8; - - Q_CONSTEXPR bool mustMin = (alphaWidth<Format>() != redWidth<Format>()) || - (alphaWidth<Format>() != greenWidth<Format>()) || - (alphaWidth<Format>() != blueWidth<Format>()); - - if (mustMin) { - for (int i = 0; i < count; ++i) { - uint alpha = (src[i] >> alphaShift<Format>()) & alphaMask; - uint red = (src[i] >> redShift<Format>()) & redMask; - uint green = (src[i] >> greenShift<Format>()) & greenMask; - uint blue = (src[i] >> blueShift<Format>()) & blueMask; - - alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift); - red = qMin(alpha, (red << redLeftShift) | (red >> redRightShift)); - green = qMin(alpha, (green << greenLeftShift) | (green >> greenRightShift)); - blue = qMin(alpha, (blue << blueLeftShift) | (blue >> blueRightShift)); - buffer[i] = QRgba64::fromRgba(red, green, blue, alpha); - } - } else { - for (int i = 0; i < count; ++i) { - uint alpha = (src[i] >> alphaShift<Format>()) & alphaMask; - uint red = (src[i] >> redShift<Format>()) & redMask; - uint green = (src[i] >> greenShift<Format>()) & greenMask; - uint blue = (src[i] >> blueShift<Format>()) & blueMask; - - alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift); - red = (red << redLeftShift) | (red >> redRightShift); - green = (green << greenLeftShift) | (green >> greenRightShift); - blue = (blue << blueLeftShift) | (blue >> blueRightShift); - buffer[i] = QRgba64::fromRgba(red, green, blue, alpha); - } - } + for (int i = 0; i < count; ++i) + buffer[i] = QRgba64::fromArgb32(convertPixelToARGB32PM<Format>(src[i])); return buffer; } template<QImage::Format Format, bool fromRGB> -static const uint *QT_FASTCALL convertRGBFromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *dither) +static void QT_FASTCALL storeRGBFromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *dither) { Q_CONSTEXPR uchar rWidth = redWidth<Format>(); Q_CONSTEXPR uchar gWidth = greenWidth<Format>(); Q_CONSTEXPR uchar bWidth = blueWidth<Format>(); + constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); // RGB32 -> RGB888 is not a precision loss. if (!dither || (rWidth == 8 && gWidth == 8 && bWidth == 8)) { - auto conversion = [](uint s) { - const uint c = fromRGB ? s : qUnpremultiply(s); - Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1; - Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1; - Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1; - Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>(); - Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>(); - Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>(); + Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1; + Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1; + Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1; + Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>(); + Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>(); + Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>(); + for (int i = 0; i < count; ++i) { + const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]); const uint r = ((c >> rRightShift) & rMask) << redShift<Format>(); const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>(); const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>(); - return r | g | b; + storePixel<BPP>(dest, index + i, r | g | b); }; - UNALIASED_CONVERSION_LOOP(buffer, src, count, conversion); } else { // We do ordered dither by using a rounding conversion, but instead of // adding half of input precision, we add the adjusted result from the @@ -384,38 +500,39 @@ static const uint *QT_FASTCALL convertRGBFromARGB32PM(uint *buffer, const uint * r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth); g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth); b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth); - buffer[i] = (r << redShift<Format>()) - | (g << greenShift<Format>()) - | (b << blueShift<Format>()); + const uint s = (r << redShift<Format>()) + | (g << greenShift<Format>()) + | (b << blueShift<Format>()); + storePixel<BPP>(dest, index + i, s); } } - return buffer; } template<QImage::Format Format, bool fromRGB> -static const uint *QT_FASTCALL convertARGBPMFromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *dither) +static void QT_FASTCALL storeARGBPMFromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *dither) { + constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); if (!dither) { - auto conversion = [](uint c) { - Q_CONSTEXPR uint aMask = (1 << alphaWidth<Format>()) - 1; - Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1; - Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1; - Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1; - - Q_CONSTEXPR uchar aRightShift = 32 - alphaWidth<Format>(); - Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>(); - Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>(); - Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>(); - - Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>(); + Q_CONSTEXPR uint aMask = (1 << alphaWidth<Format>()) - 1; + Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1; + Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1; + Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1; + + Q_CONSTEXPR uchar aRightShift = 32 - alphaWidth<Format>(); + Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>(); + Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>(); + Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>(); + + Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>(); + for (int i = 0; i < count; ++i) { + const uint c = src[i]; const uint a = fromRGB ? aOpaque : (((c >> aRightShift) & aMask) << alphaShift<Format>()); const uint r = ((c >> rRightShift) & rMask) << redShift<Format>(); const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>(); const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>(); - return a | r | g | b; + storePixel<BPP>(dest, index + i, a | r | g | b); }; - UNALIASED_CONVERSION_LOOP(buffer, src, count, conversion); } else { Q_CONSTEXPR uchar aWidth = alphaWidth<Format>(); Q_CONSTEXPR uchar rWidth = redWidth<Format>(); @@ -441,55 +558,117 @@ static const uint *QT_FASTCALL convertARGBPMFromARGB32PM(uint *buffer, const uin r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth); g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth); b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth); - buffer[i] = (a << alphaShift<Format>()) - | (r << redShift<Format>()) - | (g << greenShift<Format>()) - | (b << blueShift<Format>()); + uint s = (a << alphaShift<Format>()) + | (r << redShift<Format>()) + | (g << greenShift<Format>()) + | (b << blueShift<Format>()); + storePixel<BPP>(dest, index + i, s); } } - return buffer; } +template<QImage::Format Format> +static void QT_FASTCALL rbSwap(uchar *dst, const uchar *src, int count) +{ + Q_CONSTEXPR uchar aWidth = alphaWidth<Format>(); + Q_CONSTEXPR uchar aShift = alphaShift<Format>(); + Q_CONSTEXPR uchar rWidth = redWidth<Format>(); + Q_CONSTEXPR uchar rShift = redShift<Format>(); + Q_CONSTEXPR uchar gWidth = greenWidth<Format>(); + Q_CONSTEXPR uchar gShift = greenShift<Format>(); + Q_CONSTEXPR uchar bWidth = blueWidth<Format>(); + Q_CONSTEXPR uchar bShift = blueShift<Format>(); #ifdef Q_COMPILER_CONSTEXPR + Q_STATIC_ASSERT(rWidth == bWidth); +#endif + Q_CONSTEXPR uint redBlueMask = (1 << rWidth) - 1; + Q_CONSTEXPR uint alphaGreenMask = (((1 << aWidth) - 1) << aShift) + | (((1 << gWidth) - 1) << gShift); + constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>(); + + for (int i = 0; i < count; ++i) { + const uint c = fetchPixel<bpp>(src, i); + const uint r = (c >> rShift) & redBlueMask; + const uint b = (c >> bShift) & redBlueMask; + const uint t = (c & alphaGreenMask) + | (r << bShift) + | (b << rShift); + storePixel<bpp>(dst, i, t); + } +} + +static void QT_FASTCALL rbSwap_rgb32(uchar *d, const uchar *s, int count) +{ + const uint *src = reinterpret_cast<const uint *>(s); + uint *dest = reinterpret_cast<uint *>(d); + for (int i = 0; i < count; ++i) { + const uint c = src[i]; + const uint ag = c & 0xff00ff00; + const uint rb = c & 0x00ff00ff; + dest[i] = ag | (rb << 16) | (rb >> 16); + } +} + +#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN +template<> +void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count) +{ + return rbSwap_rgb32(d, s, count); +} +#endif + +static void QT_FASTCALL rbSwap_rgb30(uchar *d, const uchar *s, int count) +{ + const uint *src = reinterpret_cast<const uint *>(s); + uint *dest = reinterpret_cast<uint *>(d); + for (int i = 0; i < count; ++i) + dest[i] = qRgbSwapRgb30(src[i]); +} template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutRGB() { return QPixelLayout{ - uchar(redWidth<Format>()), uchar(redShift<Format>()), - uchar(greenWidth<Format>()), uchar(greenShift<Format>()), - uchar(blueWidth<Format>()), uchar(blueShift<Format>()), - 0, 0, - false, bitsPerPixel<Format>(), + false, + false, + bitsPerPixel<Format>(), + rbSwap<Format>, convertToRGB32<Format>, - convertRGBFromARGB32PM<Format, false>, - convertRGBFromARGB32PM<Format, true>, - convertToRGB64<Format> + convertToRGB64<Format>, + fetchRGBToRGB32<Format>, + storeRGBFromARGB32PM<Format, false>, + storeRGBFromARGB32PM<Format, true> }; } template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutARGBPM() { return QPixelLayout{ - uchar(redWidth<Format>()), uchar(redShift<Format>()), - uchar(greenWidth<Format>()), uchar(greenShift<Format>()), - uchar(blueWidth<Format>()), uchar(blueShift<Format>()), - uchar(alphaWidth<Format>()), uchar(alphaShift<Format>()), - true, bitsPerPixel<Format>(), + true, + true, + bitsPerPixel<Format>(), + rbSwap<Format>, convertARGBPMToARGB32PM<Format>, - convertARGBPMFromARGB32PM<Format, false>, - convertARGBPMFromARGB32PM<Format, true>, - convertARGBPMToARGB64PM<Format> + convertARGBPMToARGB64PM<Format>, + fetchARGBPMToARGB32PM<Format>, + storeARGBPMFromARGB32PM<Format, false>, + storeARGBPMFromARGB32PM<Format, true> }; } -#endif - -// To convert in place, let 'dest' and 'src' be the same. -static const uint *QT_FASTCALL convertIndexedToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *clut, QDitherInfo *) +static void QT_FASTCALL convertIndexedToARGB32PM(uint *buffer, int count, const QVector<QRgb> *clut) { for (int i = 0; i < count; ++i) - buffer[i] = qPremultiply(clut->at(src[i])); + buffer[i] = qPremultiply(clut->at(buffer[i])); +} + +template<QPixelLayout::BPP BPP> +static const uint *QT_FASTCALL fetchIndexedToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *clut, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) { + const uint s = fetchPixel<BPP>(src, index + i); + buffer[i] = qPremultiply(clut->at(s)); + } return buffer; } @@ -501,44 +680,71 @@ static const QRgba64 *QT_FASTCALL convertIndexedToARGB64PM(QRgba64 *buffer, cons return buffer; } -static const uint *QT_FASTCALL convertPassThrough(uint *, const uint *src, int, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL convertPassThrough(uint *, int, const QVector<QRgb> *) { - return src; } -static const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static const uint *QT_FASTCALL fetchPassThrough(uint *, const uchar *src, int index, int, + const QVector<QRgb> *, QDitherInfo *) { - return qt_convertARGB32ToARGB32PM(buffer, src, count); + return reinterpret_cast<const uint *>(src) + index; } -static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storePassThrough(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - UNALIASED_CONVERSION_LOOP(buffer, src, count, RGBA2ARGB); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + if (d != src) + memcpy(d, src, count * sizeof(uint)); } -static const uint *QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) { - return qt_convertRGBA8888ToARGB32PM(buffer, src, count); + qt_convertARGB32ToARGB32PM(buffer, buffer, count); } -static const uint *QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static const uint *QT_FASTCALL fetchARGB32ToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + return qt_convertARGB32ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count); +} + +static void QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) { for (int i = 0; i < count; ++i) - buffer[i] = qRgba(0, 0, 0, src[i]); + buffer[i] = RGBA2ARGB(buffer[i]); +} + +static const uint *QT_FASTCALL fetchRGBA8888PMToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + const uint *s = reinterpret_cast<const uint *>(src) + index; + UNALIASED_CONVERSION_LOOP(buffer, s, count, RGBA2ARGB); return buffer; } -static const uint *QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) +{ + qt_convertRGBA8888ToARGB32PM(buffer, buffer, count); +} + +static const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + return qt_convertRGBA8888ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count); +} + +static void QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, int count, const QVector<QRgb> *) { for (int i = 0; i < count; ++i) - buffer[i] = qRgb(src[i], src[i], src[i]); + buffer[i] = qRgba(0, 0, 0, buffer[i]); +} + +static const uint *QT_FASTCALL fetchAlpha8ToRGB32(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = qRgba(0, 0, 0, src[index + i]); return buffer; } @@ -550,6 +756,24 @@ static const QRgba64 *QT_FASTCALL convertAlpha8ToRGB64(QRgba64 *buffer, const ui return buffer; } +static void QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, int count, const QVector<QRgb> *) +{ + for (int i = 0; i < count; ++i) { + const uint s = buffer[i]; + buffer[i] = qRgb(s, s, s); + } +} + +static const uint *QT_FASTCALL fetchGrayscale8ToRGB32(uint *buffer, const uchar *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + for (int i = 0; i < count; ++i) { + const uint s = src[index + i]; + buffer[i] = qRgb(s, s, s); + } + return buffer; +} + static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { @@ -558,19 +782,18 @@ static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, cons return buffer; } -static const uint *QT_FASTCALL convertARGB32FromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeARGB32FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - for (int i = 0; i < count; ++i) - buffer[i] = qUnpremultiply(src[i]); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return qUnpremultiply(c); }); } -static const uint *QT_FASTCALL convertRGBA8888PMFromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeRGBA8888PMFromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - UNALIASED_CONVERSION_LOOP(buffer, src, count, ARGB2RGBA); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, ARGB2RGBA); } #ifdef __SSE2__ @@ -684,33 +907,39 @@ static const QRgba64 *QT_FASTCALL convertRGBA8888PMToARGB64PM(QRgba64 *buffer, c return buffer; } -static const uint *QT_FASTCALL convertRGBA8888FromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeRGBA8888FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - for (int i = 0; i < count; ++i) - buffer[i] = ARGB2RGBA(qUnpremultiply(src[i])); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(qUnpremultiply(c)); }); } -static const uint *QT_FASTCALL convertRGBXFromRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeRGBXFromRGB32(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - UNALIASED_CONVERSION_LOOP(buffer, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | c); }); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | c); }); } -static const uint *QT_FASTCALL convertRGBXFromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeRGBXFromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | qUnpremultiply(c)); }); +} + +template<QtPixelOrder PixelOrder> +static void QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) { for (int i = 0; i < count; ++i) - buffer[i] = ARGB2RGBA(0xff000000 | qUnpremultiply(src[i])); - return buffer; + buffer[i] = qConvertA2rgb30ToArgb32<PixelOrder>(buffer[i]); } template<QtPixelOrder PixelOrder> -static const uint *QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *dither) +static const uint *QT_FASTCALL fetchA2RGB30PMToARGB32PM(uint *buffer, const uchar *s, int index, int count, + const QVector<QRgb> *, QDitherInfo *dither) { + const uint *src = reinterpret_cast<const uint *>(s) + index; if (!dither) { UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertA2rgb30ToArgb32<PixelOrder>); } else { @@ -791,178 +1020,71 @@ static const QRgba64 *QT_FASTCALL convertA2RGB30PMToARGB64PM(QRgba64 *buffer, co } template<QtPixelOrder PixelOrder> -static const uint *QT_FASTCALL convertA2RGB30PMFromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeA2RGB30PMFromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertArgb32ToA2rgb30<PixelOrder>); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, qConvertArgb32ToA2rgb30<PixelOrder>); } template<QtPixelOrder PixelOrder> -static const uint *QT_FASTCALL convertRGB30FromRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeRGB30FromRGB32(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - for (int i = 0; i < count; ++i) - buffer[i] = qConvertRgb32ToRgb30<PixelOrder>(src[i]); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>); } template<QtPixelOrder PixelOrder> -static const uint *QT_FASTCALL convertRGB30FromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) -{ - UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertRgb32ToRgb30<PixelOrder>); - return buffer; -} - -static const uint *QT_FASTCALL convertAlpha8FromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) -{ - for (int i = 0; i < count; ++i) - buffer[i] = qAlpha(src[i]); - return buffer; -} - -static const uint *QT_FASTCALL convertGrayscale8FromRGB32(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeRGB30FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { - for (int i = 0; i < count; ++i) - buffer[i] = qGray(src[i]); - return buffer; + uint *d = reinterpret_cast<uint *>(dest) + index; + UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>); } -static const uint *QT_FASTCALL convertGrayscale8FromARGB32PM(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) +static void QT_FASTCALL storeAlpha8FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { for (int i = 0; i < count; ++i) - buffer[i] = qGray(qUnpremultiply(src[i])); - return buffer; -} - -template <QPixelLayout::BPP bpp> static -uint QT_FASTCALL fetchPixel(const uchar *, int) -{ - Q_UNREACHABLE(); - return 0; -} - -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1LSB>(const uchar *src, int index) -{ - return (src[index >> 3] >> (index & 7)) & 1; + dest[index + i] = qAlpha(src[i]); } -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1MSB>(const uchar *src, int index) -{ - return (src[index >> 3] >> (~index & 7)) & 1; -} - -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP8>(const uchar *src, int index) -{ - return src[index]; -} - -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP16>(const uchar *src, int index) -{ - return reinterpret_cast<const quint16 *>(src)[index]; -} - -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP24>(const uchar *src, int index) -{ - return reinterpret_cast<const quint24 *>(src)[index]; -} - -template <> -inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar *src, int index) -{ - return reinterpret_cast<const uint *>(src)[index]; -} - -template <QPixelLayout::BPP bpp> -inline const uint *QT_FASTCALL fetchPixels(uint *buffer, const uchar *src, int index, int count) +static void QT_FASTCALL storeGrayscale8FromRGB32(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { for (int i = 0; i < count; ++i) - buffer[i] = fetchPixel<bpp>(src, index + i); - return buffer; + dest[index + i] = qGray(src[i]); } -template <> -inline const uint *QT_FASTCALL fetchPixels<QPixelLayout::BPP32>(uint *, const uchar *src, int index, int) -{ - return reinterpret_cast<const uint *>(src) + index; -} - -template <QPixelLayout::BPP width> static -void QT_FASTCALL storePixel(uchar *dest, int index, uint pixel); - -template <> -inline void QT_FASTCALL storePixel<QPixelLayout::BPP1LSB>(uchar *dest, int index, uint pixel) -{ - if (pixel) - dest[index >> 3] |= 1 << (index & 7); - else - dest[index >> 3] &= ~(1 << (index & 7)); -} - -template <> -inline void QT_FASTCALL storePixel<QPixelLayout::BPP1MSB>(uchar *dest, int index, uint pixel) -{ - if (pixel) - dest[index >> 3] |= 1 << (~index & 7); - else - dest[index >> 3] &= ~(1 << (~index & 7)); -} - -template <> -inline void QT_FASTCALL storePixel<QPixelLayout::BPP8>(uchar *dest, int index, uint pixel) -{ - dest[index] = uchar(pixel); -} - -template <> -inline void QT_FASTCALL storePixel<QPixelLayout::BPP16>(uchar *dest, int index, uint pixel) -{ - reinterpret_cast<quint16 *>(dest)[index] = quint16(pixel); -} - -template <> -inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index, uint pixel) -{ - reinterpret_cast<quint24 *>(dest)[index] = quint24(pixel); -} - -template <QPixelLayout::BPP width> -inline void QT_FASTCALL storePixels(uchar *dest, const uint *src, int index, int count) +static void QT_FASTCALL storeGrayscale8FromARGB32PM(uchar *dest, const uint *src, int index, int count, + const QVector<QRgb> *, QDitherInfo *) { for (int i = 0; i < count; ++i) - storePixel<width>(dest, index + i, src[i]); + dest[index + i] = qGray(qUnpremultiply(src[i])); } -template <> -inline void QT_FASTCALL storePixels<QPixelLayout::BPP32>(uchar *dest, const uint *src, int index, int count) -{ - memcpy(reinterpret_cast<uint *>(dest) + index, src, count * sizeof(uint)); -} // Note: // convertToArgb32() assumes that no color channel is less than 4 bits. -// convertFromArgb32() assumes that no color channel is more than 8 bits. +// storeRGBFromARGB32PM() assumes that no color channel is more than 8 bits. // QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits. QPixelLayout qPixelLayouts[QImage::NImageFormats] = { - { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPPNone, 0, 0, 0, 0 }, // Format_Invalid - { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP1MSB, convertIndexedToARGB32PM, 0, 0, convertIndexedToARGB64PM }, // Format_Mono - { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP1LSB, convertIndexedToARGB32PM, 0, 0, convertIndexedToARGB64PM }, // Format_MonoLSB - { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP8, convertIndexedToARGB32PM, 0, 0, convertIndexedToARGB64PM }, // Format_Indexed8 + { false, false, QPixelLayout::BPPNone, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }, // Format_Invalid + { false, false, QPixelLayout::BPP1MSB, nullptr, convertIndexedToARGB32PM, + convertIndexedToARGB64PM, fetchIndexedToARGB32PM<QPixelLayout::BPP1MSB>, nullptr, nullptr }, // Format_Mono + { false, false, QPixelLayout::BPP1LSB, nullptr, convertIndexedToARGB32PM, + convertIndexedToARGB64PM, fetchIndexedToARGB32PM<QPixelLayout::BPP1LSB>, nullptr, nullptr }, // Format_MonoLSB + { false, false, QPixelLayout::BPP8, nullptr, convertIndexedToARGB32PM, + convertIndexedToARGB64PM, fetchIndexedToARGB32PM<QPixelLayout::BPP8>, nullptr, nullptr }, // Format_Indexed8 // Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong, // but everywhere this generic conversion would be wrong is currently overloaded. - { 8, 16, 8, 8, 8, 0, 0, 0, false, QPixelLayout::BPP32, convertPassThrough, convertPassThrough, convertPassThrough, convertRGB32ToRGB64 }, // Format_RGB32 - { 8, 16, 8, 8, 8, 0, 8, 24, false, QPixelLayout::BPP32, convertARGB32ToARGB32PM, convertARGB32FromARGB32PM, convertPassThrough, convertARGB32ToARGB64PM }, // Format_ARGB32 - { 8, 16, 8, 8, 8, 0, 8, 24, true, QPixelLayout::BPP32, convertPassThrough, convertPassThrough, convertPassThrough, convertARGB32PMToARGB64PM }, // Format_ARGB32_Premultiplied -#ifdef Q_COMPILER_CONSTEXPR + { false, false, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough, + convertRGB32ToRGB64, fetchPassThrough, storePassThrough, storePassThrough }, // Format_RGB32 + { true, false, QPixelLayout::BPP32, rbSwap_rgb32, convertARGB32ToARGB32PM, + convertARGB32ToARGB64PM, fetchARGB32ToARGB32PM, storeARGB32FromARGB32PM, storePassThrough }, // Format_ARGB32 + { true, true, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough, + convertARGB32PMToARGB64PM, fetchPassThrough, storePassThrough, storePassThrough }, // Format_ARGB32_Premultiplied pixelLayoutRGB<QImage::Format_RGB16>(), pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(), pixelLayoutRGB<QImage::Format_RGB666>(), @@ -972,109 +1094,44 @@ QPixelLayout qPixelLayouts[QImage::NImageFormats] = { pixelLayoutRGB<QImage::Format_RGB888>(), pixelLayoutRGB<QImage::Format_RGB444>(), pixelLayoutARGBPM<QImage::Format_ARGB4444_Premultiplied>(), -#else - { 5, 11, 6, 5, 5, 0, 0, 0, false, QPixelLayout::BPP16, - convertToRGB32<QImage::Format_RGB16>, - convertRGBFromARGB32PM<QImage::Format_RGB16, false>, - convertRGBFromARGB32PM<QImage::Format_RGB16, true>, - convertToRGB64<QImage::Format_RGB16>, - }, - { 5, 19, 6, 13, 5, 8, 8, 0, true, QPixelLayout::BPP24, - convertARGBPMToARGB32PM<QImage::Format_ARGB8565_Premultiplied>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB8565_Premultiplied, false>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB8565_Premultiplied, true>, - convertARGBPMToARGB64PM<QImage::Format_ARGB8565_Premultiplied>, - }, - { 6, 12, 6, 6, 6, 0, 0, 0, false, QPixelLayout::BPP24, - convertToRGB32<QImage::Format_RGB666>, - convertRGBFromARGB32PM<QImage::Format_RGB666, false>, - convertRGBFromARGB32PM<QImage::Format_RGB666, true>, - convertToRGB64<QImage::Format_RGB666>, - }, - { 6, 12, 6, 6, 6, 0, 6, 18, true, QPixelLayout::BPP24, - convertARGBPMToARGB32PM<QImage::Format_ARGB6666_Premultiplied>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB6666_Premultiplied, false>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB6666_Premultiplied, true>, - convertARGBPMToARGB64PM<QImage::Format_ARGB6666_Premultiplied>, - }, - { 5, 10, 5, 5, 5, 0, 0, 0, false, QPixelLayout::BPP16, - convertToRGB32<QImage::Format_RGB555>, - convertRGBFromARGB32PM<QImage::Format_RGB555, false>, - convertRGBFromARGB32PM<QImage::Format_RGB555, true>, - convertToRGB64<QImage::Format_RGB555>, - }, - { 5, 18, 5, 13, 5, 8, 8, 0, true, QPixelLayout::BPP24, - convertARGBPMToARGB32PM<QImage::Format_ARGB8555_Premultiplied>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB8555_Premultiplied, false>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB8555_Premultiplied, true>, - convertARGBPMToARGB64PM<QImage::Format_ARGB8555_Premultiplied>, - }, - { 8, 16, 8, 8, 8, 0, 0, 0, false, QPixelLayout::BPP24, - convertToRGB32<QImage::Format_RGB888>, - convertRGBFromARGB32PM<QImage::Format_RGB888, false>, - convertRGBFromARGB32PM<QImage::Format_RGB888, true>, - convertToRGB64<QImage::Format_RGB888>, - }, - { 4, 8, 4, 4, 4, 0, 0, 0, false, QPixelLayout::BPP16, - convertToRGB32<QImage::Format_RGB444>, - convertRGBFromARGB32PM<QImage::Format_RGB444, false>, - convertRGBFromARGB32PM<QImage::Format_RGB444, true>, - convertToRGB64<QImage::Format_RGB444>, - }, - { 4, 8, 4, 4, 4, 0, 4, 12, true, QPixelLayout::BPP16, - convertARGBPMToARGB32PM<QImage::Format_ARGB4444_Premultiplied>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB4444_Premultiplied, false>, - convertARGBPMFromARGB32PM<QImage::Format_ARGB4444_Premultiplied, true>, - convertARGBPMToARGB64PM<QImage::Format_ARGB4444_Premultiplied>, - }, -#endif -#if Q_BYTE_ORDER == Q_BIG_ENDIAN - { 8, 24, 8, 16, 8, 8, 0, 0, false, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBXFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM }, // Format_RGBX8888 - { 8, 24, 8, 16, 8, 8, 8, 0, false, QPixelLayout::BPP32, convertRGBA8888ToARGB32PM, convertRGBA8888FromARGB32PM, convertRGBXFromRGB32, convertRGBA8888ToARGB64PM }, // Format_RGBA8888 - { 8, 24, 8, 16, 8, 8, 8, 0, true, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBA8888PMFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM}, // Format_RGBA8888_Premultiplied -#else - { 8, 0, 8, 8, 8, 16, 0, 24, false, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBXFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM }, // Format_RGBX8888 - { 8, 0, 8, 8, 8, 16, 8, 24, false, QPixelLayout::BPP32, convertRGBA8888ToARGB32PM, convertRGBA8888FromARGB32PM, convertRGBXFromRGB32, convertRGBA8888ToARGB64PM }, // Format_RGBA8888 (ABGR32) - { 8, 0, 8, 8, 8, 16, 8, 24, true, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBA8888PMFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM }, // Format_RGBA8888_Premultiplied -#endif - { 10, 20, 10, 10, 10, 0, 0, 30, false, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderBGR>, convertRGB30FromARGB32PM<PixelOrderBGR>, convertRGB30FromRGB32<PixelOrderBGR>, convertA2RGB30PMToARGB64PM<PixelOrderBGR> }, // Format_BGR30 - { 10, 20, 10, 10, 10, 0, 2, 30, true, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderBGR>, convertA2RGB30PMFromARGB32PM<PixelOrderBGR>, convertRGB30FromRGB32<PixelOrderBGR>, convertA2RGB30PMToARGB64PM<PixelOrderBGR> }, // Format_A2BGR30_Premultiplied - { 10, 0, 10, 10, 10, 20, 0, 30, false, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderRGB>, convertRGB30FromARGB32PM<PixelOrderRGB>, convertRGB30FromRGB32<PixelOrderRGB>, convertA2RGB30PMToARGB64PM<PixelOrderRGB> }, // Format_RGB30 - { 10, 0, 10, 10, 10, 20, 2, 30, true, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderRGB>, convertA2RGB30PMFromARGB32PM<PixelOrderRGB>, convertRGB30FromRGB32<PixelOrderRGB>, convertA2RGB30PMToARGB64PM<PixelOrderRGB> }, // Format_A2RGB30_Premultiplied - { 0, 0, 0, 0, 0, 0, 8, 0, true, QPixelLayout::BPP8, convertAlpha8ToRGB32, convertAlpha8FromARGB32PM, 0, convertAlpha8ToRGB64 }, // Format_Alpha8 - { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP8, convertGrayscale8ToRGB32, convertGrayscale8FromARGB32PM, convertGrayscale8FromRGB32, convertGrayscale8ToRGB64 } // Format_Grayscale8 -}; - -const FetchPixelsFunc qFetchPixels[QPixelLayout::BPPCount] = { - 0, // BPPNone - fetchPixels<QPixelLayout::BPP1MSB>, // BPP1MSB - fetchPixels<QPixelLayout::BPP1LSB>, // BPP1LSB - fetchPixels<QPixelLayout::BPP8>, // BPP8 - fetchPixels<QPixelLayout::BPP16>, // BPP16 - fetchPixels<QPixelLayout::BPP24>, // BPP24 - fetchPixels<QPixelLayout::BPP32> // BPP32 -}; - -StorePixelsFunc qStorePixels[QPixelLayout::BPPCount] = { - 0, // BPPNone - storePixels<QPixelLayout::BPP1MSB>, // BPP1MSB - storePixels<QPixelLayout::BPP1LSB>, // BPP1LSB - storePixels<QPixelLayout::BPP8>, // BPP8 - storePixels<QPixelLayout::BPP16>, // BPP16 - storePixels<QPixelLayout::BPP24>, // BPP24 - storePixels<QPixelLayout::BPP32> // BPP32 -}; - -typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index); - -static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = { - 0, // BPPNone - fetchPixel<QPixelLayout::BPP1MSB>, // BPP1MSB - fetchPixel<QPixelLayout::BPP1LSB>, // BPP1LSB - fetchPixel<QPixelLayout::BPP8>, // BPP8 - fetchPixel<QPixelLayout::BPP16>, // BPP16 - fetchPixel<QPixelLayout::BPP24>, // BPP24 - fetchPixel<QPixelLayout::BPP32> // BPP32 + { false, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM, + convertRGBA8888PMToARGB64PM, fetchRGBA8888PMToARGB32PM, storeRGBXFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBX8888 + { true, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888ToARGB32PM, + convertRGBA8888ToARGB64PM, fetchRGBA8888ToARGB32PM, storeRGBA8888FromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888 + { true, true, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM, + convertRGBA8888PMToARGB64PM, fetchRGBA8888PMToARGB32PM, storeRGBA8888PMFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888_Premultiplied + { false, false, QPixelLayout::BPP32, rbSwap_rgb30, + convertA2RGB30PMToARGB32PM<PixelOrderBGR>, + convertA2RGB30PMToARGB64PM<PixelOrderBGR>, + fetchA2RGB30PMToARGB32PM<PixelOrderBGR>, + storeRGB30FromARGB32PM<PixelOrderBGR>, + storeRGB30FromRGB32<PixelOrderBGR> + }, // Format_BGR30 + { true, true, QPixelLayout::BPP32, rbSwap_rgb30, + convertA2RGB30PMToARGB32PM<PixelOrderBGR>, + convertA2RGB30PMToARGB64PM<PixelOrderBGR>, + fetchA2RGB30PMToARGB32PM<PixelOrderBGR>, + storeA2RGB30PMFromARGB32PM<PixelOrderBGR>, + storeRGB30FromRGB32<PixelOrderBGR> + }, // Format_A2BGR30_Premultiplied + { false, false, QPixelLayout::BPP32, rbSwap_rgb30, + convertA2RGB30PMToARGB32PM<PixelOrderRGB>, + convertA2RGB30PMToARGB64PM<PixelOrderRGB>, + fetchA2RGB30PMToARGB32PM<PixelOrderRGB>, + storeRGB30FromARGB32PM<PixelOrderRGB>, + storeRGB30FromRGB32<PixelOrderRGB> + }, // Format_RGB30 + { true, true, QPixelLayout::BPP32, rbSwap_rgb30, + convertA2RGB30PMToARGB32PM<PixelOrderRGB>, + convertA2RGB30PMToARGB64PM<PixelOrderRGB>, + fetchA2RGB30PMToARGB32PM<PixelOrderRGB>, + storeA2RGB30PMFromARGB32PM<PixelOrderRGB>, + storeRGB30FromRGB32<PixelOrderRGB> + }, // Format_A2RGB30_Premultiplied + { true, true, QPixelLayout::BPP8, nullptr, convertAlpha8ToRGB32, + convertAlpha8ToRGB64, fetchAlpha8ToRGB32, storeAlpha8FromARGB32PM, nullptr }, // Format_Alpha8 + { false, false, QPixelLayout::BPP8, nullptr, convertGrayscale8ToRGB32, + convertGrayscale8ToRGB64, fetchGrayscale8ToRGB32, storeGrayscale8FromARGB32PM, storeGrayscale8FromRGB32 } // Format_Grayscale8 }; /* @@ -1124,14 +1181,13 @@ static uint * QT_FASTCALL destFetchRGB16(uint *buffer, QRasterBuffer *rasterBuff static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) { const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; - const uint *ptr = qFetchPixels[layout->bpp](buffer, rasterBuffer->scanLine(y), x, length); - return const_cast<uint *>(layout->convertToARGB32PM(buffer, ptr, length, 0, 0)); + return const_cast<uint *>(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr)); } static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) { const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; - uint buffer32[buffer_size]; + uint buffer32[BufferSize]; const uint *ptr = qFetchPixels[layout->bpp](buffer32, rasterBuffer->scanLine(y), x, length); return const_cast<QRgba64 *>(layout->convertToARGB64PM(buffer, ptr, length, 0, 0)); } @@ -1304,22 +1360,12 @@ static void QT_FASTCALL destStoreRGB16(QRasterBuffer *rasterBuffer, int x, int y static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length) { - uint buf[buffer_size]; const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; - StorePixelsFunc store = qStorePixels[layout->bpp]; + ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM; + if (!layout->premultiplied && !layout->hasAlphaChannel) + store = layout->storeFromRGB32; uchar *dest = rasterBuffer->scanLine(y); - while (length) { - int l = qMin(length, buffer_size); - const uint *ptr = 0; - if (!layout->premultiplied && !layout->alphaWidth) - ptr = layout->convertFromRGB32(buf, buffer, l, 0, 0); - else - ptr = layout->convertFromARGB32PM(buf, buffer, l, 0, 0); - store(dest, ptr, x, l); - length -= l; - buffer += l; - x += l; - } + store(dest, buffer, x, length, nullptr, nullptr); } static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length) @@ -1331,19 +1377,16 @@ static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int len static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) { - uint buf[buffer_size]; + uint buf[BufferSize]; const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; - StorePixelsFunc store = qStorePixels[layout->bpp]; + ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM; + if (!layout->premultiplied && !layout->hasAlphaChannel) + store = layout->storeFromRGB32; uchar *dest = rasterBuffer->scanLine(y); while (length) { - int l = qMin(length, buffer_size); - const uint *ptr = 0; + int l = qMin(length, BufferSize); convertFromRgb64(buf, buffer, l); - if (!layout->premultiplied && !layout->alphaWidth) - ptr = layout->convertFromRGB32(buf, buf, l, 0, 0); - else - ptr = layout->convertFromARGB32PM(buf, buf, l, 0, 0); - store(dest, ptr, x, l); + store(dest, buf, x, l, nullptr, nullptr); length -= l; buffer += l; x += l; @@ -1528,8 +1571,7 @@ static const uint *QT_FASTCALL fetchUntransformed(uint *buffer, const Operator * const QSpanData *data, int y, int x, int length) { const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; - const uint *ptr = qFetchPixels[layout->bpp](buffer, data->texture.scanLine(y), x, length); - return layout->convertToARGB32PM(buffer, ptr, length, data->texture.colorTable, 0); + return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr); } static const uint *QT_FASTCALL fetchUntransformedARGB32PM(uint *, const Operator *, @@ -1554,7 +1596,7 @@ static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Op { const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; if (layout->bpp != QPixelLayout::BPP32) { - uint buffer32[buffer_size]; + uint buffer32[BufferSize]; const uint *ptr = qFetchPixels[layout->bpp](buffer32, data->texture.scanLine(y), x, length); return layout->convertToARGB64PM(buffer, ptr, length, data->texture.colorTable, 0); } else { @@ -1652,7 +1694,8 @@ static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, ++b; } } - return layout->convertToARGB32PM(buffer, buffer, length, data->texture.colorTable, 0); + layout->convertToARGB32PM(buffer, length, data->texture.colorTable); + return buffer; } template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */ @@ -1673,7 +1716,7 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper FetchPixelFunc fetch = qFetchPixel[layout->bpp]; const QVector<QRgb> *clut = data->texture.colorTable; - uint buffer32[buffer_size]; + uint buffer32[BufferSize]; QRgba64 *b = buffer; if (data->fast_matrix) { // The increment pr x in the scanline @@ -1687,9 +1730,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper int i = 0, j = 0; while (i < length) { - if (j == buffer_size) { - layout->convertToARGB64PM(b, buffer32, buffer_size, clut, 0); - b += buffer_size; + if (j == BufferSize) { + layout->convertToARGB64PM(b, buffer32, BufferSize, clut, 0); + b += BufferSize; j = 0; } int px = fx >> 16; @@ -1725,9 +1768,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper int i = 0, j = 0; while (i < length) { - if (j == buffer_size) { - layout->convertToARGB64PM(b, buffer32, buffer_size, clut, 0); - b += buffer_size; + if (j == BufferSize) { + layout->convertToARGB64PM(b, buffer32, BufferSize, clut, 0); + b += BufferSize; j = 0; } const qreal iw = fw == 0 ? 1 : 1 / fw; @@ -1921,7 +1964,7 @@ inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, } enum FastTransformTypes { - SimpleUpscaleTransform, + SimpleScaleTransform, UpscaleTransform, DownscaleTransform, RotateTransform, @@ -1929,11 +1972,38 @@ enum FastTransformTypes { NFastTransformTypes }; +// Completes the partial interpolation stored in IntermediateBuffer. +// by performing the x-axis interpolation and joining the RB and AG buffers. +static void QT_FASTCALL intermediate_adder(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx) +{ +#if defined(QT_COMPILER_SUPPORTS_AVX2) + extern void QT_FASTCALL intermediate_adder_avx2(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx); + if (qCpuHasFeature(AVX2)) + return intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx); +#endif + + // Switch to intermediate buffer coordinates + fx -= offset * fixed_scale; + + while (b < end) { + const int x = (fx >> 16); + + const uint distx = (fx & 0x0000ffff) >> 8; + const uint idistx = 256 - distx; + const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + 1] * distx) & 0xff00ff00; + const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + 1] * distx) & 0xff00ff00; + *b = (rb >> 8) | ag; + b++; + fx += fdx; + } + fx += offset * fixed_scale; +} + typedef void (QT_FASTCALL *BilinearFastTransformHelper)(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy); template<TextureBlendType blendType> -static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(uint *b, uint *end, const QTextureData &image, - int &fx, int &fy, int fdx, int /*fdy*/) +static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/) { int y1 = (fy >> 16); int y2; @@ -1950,16 +2020,12 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u const int offset = (fx + adjust) >> 16; int x = offset; - // The idea is first to do the interpolation between the row s1 and the row s2 - // into an intermediate buffer, then we interpolate between two pixel of this buffer. - - // intermediate_buffer[0] is a buffer of red-blue component of the pixel, in the form 0x00RR00BB - // intermediate_buffer[1] is the alpha-green component of the pixel, in the form 0x00AA00GG - // +1 for the last pixel to interpolate with, and +1 for rounding errors. - quint32 intermediate_buffer[2][buffer_size + 2]; - // count is the size used in the intermediate_buffer. + IntermediateBuffer intermediate; + // count is the size used in the intermediate.buffer. int count = (qint64(length) * qAbs(fdx) + fixed_scale - 1) / fixed_scale + 2; - Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case + // length is supposed to be <= BufferSize either because data->m11 < 1 or + // data->m11 < 2, and any larger buffers split + Q_ASSERT(count <= BufferSize + 2); int f = 0; int lim = count; if (blendType == BlendTransformedBilinearTiled) { @@ -1974,8 +2040,8 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; do { - intermediate_buffer[0][f] = rb; - intermediate_buffer[1][f] = ag; + intermediate.buffer_rb[f] = rb; + intermediate.buffer_ag[f] = ag; f++; x++; } while (x < image.x1 && f < lim); @@ -2008,10 +2074,10 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u // Add the values, and shift to only keep 8 significant bits per colors __m128i rAG =_mm_add_epi16(topAG, bottomAG); rAG = _mm_srli_epi16(rAG, 8); - _mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG); + _mm_storeu_si128((__m128i*)(&intermediate.buffer_ag[f]), rAG); __m128i rRB =_mm_add_epi16(topRB, bottomRB); rRB = _mm_srli_epi16(rRB, 8); - _mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB); + _mm_storeu_si128((__m128i*)(&intermediate.buffer_rb[f]), rRB); } #elif defined(__ARM_NEON__) const int16x8_t disty_ = vdupq_n_s16(disty); @@ -2038,10 +2104,10 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u // Add the values, and shift to only keep 8 significant bits per colors int16x8_t rAG = vaddq_s16(topAG, bottomAG); rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8)); - vst1q_s16((int16_t*)(&intermediate_buffer[1][f]), rAG); + vst1q_s16((int16_t*)(&intermediate.buffer_ag[f]), rAG); int16x8_t rRB = vaddq_s16(topRB, bottomRB); rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); - vst1q_s16((int16_t*)(&intermediate_buffer[0][f]), rRB); + vst1q_s16((int16_t*)(&intermediate.buffer_rb[f]), rRB); } #endif } @@ -2055,28 +2121,13 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u uint t = s1[x]; uint b = s2[x]; - intermediate_buffer[0][f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; - intermediate_buffer[1][f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + intermediate.buffer_rb[f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + intermediate.buffer_ag[f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; x++; } - // Now interpolate the values from the intermediate_buffer to get the final result. - fx -= offset * fixed_scale; // Switch to intermediate buffer coordinates - - while (b < end) { - int x1 = (fx >> 16); - int x2 = x1 + 1; - Q_ASSERT(x1 >= 0); - Q_ASSERT(x2 < count); - - int distx = (fx & 0x0000ffff) >> 8; - int idistx = 256 - distx; - int rb = ((intermediate_buffer[0][x1] * idistx + intermediate_buffer[0][x2] * distx) >> 8) & 0xff00ff; - int ag = (intermediate_buffer[1][x1] * idistx + intermediate_buffer[1][x2] * distx) & 0xff00ff00; - *b = rb | ag; - b++; - fx += fdx; - } + // Now interpolate the values from the intermediate.buffer to get the final result. + intermediate_adder(b, end, intermediate, offset, fx, fdx); } template<TextureBlendType blendType> @@ -2542,14 +2593,14 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[2][NFastTransformTypes] = { { - fetchTransformedBilinearARGB32PM_simple_upscale_helper<BlendTransformedBilinear>, + fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinear>, fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>, fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>, fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>, fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear> }, { - fetchTransformedBilinearARGB32PM_simple_upscale_helper<BlendTransformedBilinearTiled>, + fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinearTiled>, fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>, fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>, fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>, @@ -2584,7 +2635,13 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c if (fdy == 0) { // simple scale, no rotation or shear if (qAbs(fdx) <= fixed_scale) { // simple scale up on X - bilinearFastTransformHelperARGB32PM[tiled][SimpleUpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy); + bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](b, end, data->texture, fx, fy, fdx, fdy); + } else if (qAbs(fdx) <= 2 * fixed_scale) { + // simple scale down on X, less than 2x + const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2); + bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer, buffer + mid, data->texture, fx, fy, fdx, fdy); + if (mid != length) + bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy); } else if (qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y) bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy); @@ -2652,16 +2709,13 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c return buffer; } -template<TextureBlendType blendType, QPixelLayout::BPP bpp> -static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, uint *end, const QTextureData &image, - int &fx, int &fy, int fdx, int /*fdy*/) +template<TextureBlendType blendType> +static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/) { const QPixelLayout *layout = &qPixelLayouts[image.format]; const QVector<QRgb> *clut = image.colorTable; - Q_ASSERT(bpp == QPixelLayout::BPPNone || bpp == layout->bpp); - // When templated 'fetch' should be inlined at compile time: - const FetchPixelsFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixels[layout->bpp] : fetchPixels<bpp>; - const ConvertFunc convertToARGB32PM = layout->convertToARGB32PM; + const FetchAndConvertPixelsFunc fetch = layout->fetchToARGB32PM; int y1 = (fy >> 16); int y2; @@ -2678,16 +2732,14 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, const int offset = (fx + adjust) >> 16; int x = offset; - // The idea is first to do the interpolation between the row s1 and the row s2 - // into an intermediate buffer, then we interpolate between two pixel of this buffer. - // +1 for the last pixel to interpolate with, and +1 for rounding errors. - uint buf1[buffer_size + 2]; - uint buf2[buffer_size + 2]; + IntermediateBuffer intermediate; + uint *buf1 = intermediate.buffer_rb; + uint *buf2 = intermediate.buffer_ag; const uint *ptr1; const uint *ptr2; int count = (qint64(length) * qAbs(fdx) + fixed_scale - 1) / fixed_scale + 2; - Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case + Q_ASSERT(count <= BufferSize + 2); if (blendType == BlendTransformedBilinearTiled) { x %= image.width; @@ -2696,10 +2748,8 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, int len1 = qMin(count, image.width - x); int len2 = qMin(x, count - len1); - ptr1 = fetch(buf1, s1, x, len1); - ptr1 = convertToARGB32PM(buf1, ptr1, len1, clut, 0); - ptr2 = fetch(buf2, s2, x, len1); - ptr2 = convertToARGB32PM(buf2, ptr2, len1, clut, 0); + ptr1 = fetch(buf1, s1, x, len1, clut, nullptr); + ptr2 = fetch(buf2, s2, x, len1, clut, nullptr); for (int i = 0; i < len1; ++i) { uint t = ptr1[i]; uint b = ptr2[i]; @@ -2708,10 +2758,8 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, } if (len2) { - ptr1 = fetch(buf1 + len1, s1, 0, len2); - ptr1 = convertToARGB32PM(buf1 + len1, ptr1, len2, clut, 0); - ptr2 = fetch(buf2 + len1, s2, 0, len2); - ptr2 = convertToARGB32PM(buf2 + len1, ptr2, len2, clut, 0); + ptr1 = fetch(buf1 + len1, s1, 0, len2, clut, nullptr); + ptr2 = fetch(buf2 + len1, s2, 0, len2, clut, nullptr); for (int i = 0; i < len2; ++i) { uint t = ptr1[i]; uint b = ptr2[i]; @@ -2719,6 +2767,7 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff; } } + // Generate the rest by repeatedly repeating the previous set of pixels for (int i = image.width; i < count; ++i) { buf1[i] = buf1[i - image.width]; buf2[i] = buf2[i - image.width]; @@ -2729,10 +2778,8 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, int len = qMax(1, end - start); int leading = start - x; - ptr1 = fetch(buf1 + leading, s1, start, len); - ptr1 = convertToARGB32PM(buf1 + leading, ptr1, len, clut, 0); - ptr2 = fetch(buf2 + leading, s2, start, len); - ptr2 = convertToARGB32PM(buf2 + leading, ptr2, len, clut, 0); + ptr1 = fetch(buf1 + leading, s1, start, len, clut, nullptr); + ptr2 = fetch(buf2 + leading, s2, start, len, clut, nullptr); for (int i = 0; i < len; ++i) { uint t = ptr1[i]; @@ -2751,22 +2798,8 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, } } - // Now interpolate the values from the intermediate_buffer to get the final result. - fx -= offset * fixed_scale; // Switch to intermediate buffer coordinates - - while (b < end) { - int x1 = (fx >> 16); - int x2 = x1 + 1; - Q_ASSERT(x1 >= 0); - Q_ASSERT(x2 < count); - - int distx = (fx & 0x0000ffff) >> 8; - int idistx = 256 - distx; - int rb = ((buf1[x1] * idistx + buf1[x2] * distx) >> 8) & 0xff00ff; - int ag = (buf2[x1] * idistx + buf2[x2] * distx) & 0xff00ff00; - *b++ = rb | ag; - fx += fdx; - } + // Now interpolate the values from the intermediate.buffer to get the final result. + intermediate_adder(b, end, intermediate, offset, fx, fdx); } @@ -2916,18 +2949,23 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper if (fdy == 0) { // simple scale, no rotation or shear if (qAbs(fdx) <= fixed_scale) { // scale up on X - fetchTransformedBilinear_simple_upscale_helper<blendType, bpp>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy); + fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy); + } else if (qAbs(fdx) <= 2 * fixed_scale) { // scale down on X less than 2x + const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2); + fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + mid, data->texture, fx, fy, fdx, fdy); + if (mid != length) + fetchTransformedBilinear_simple_scale_helper<blendType>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy); } else { const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>; - uint buf1[buffer_size]; - uint buf2[buffer_size]; + uint buf1[BufferSize]; + uint buf2[BufferSize]; uint *b = buffer; while (length) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0); - layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); - layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); + layout->convertToARGB32PM(buf1, len * 2, clut); + layout->convertToARGB32PM(buf2, len * 2, clut); if (hasFastInterpolate4() || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y) int disty = (fy & 0x0000ffff) >> 8; @@ -2955,14 +2993,14 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper } else { // rotation or shear const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>; - uint buf1[buffer_size]; - uint buf2[buffer_size]; + uint buf1[BufferSize]; + uint buf2[BufferSize]; uint *b = buffer; while (length) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy); - layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); - layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); + layout->convertToARGB32PM(buf1, len * 2, clut); + layout->convertToARGB32PM(buf2, len * 2, clut); if (hasFastInterpolate4() || qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.)) { // If we are zooming more than 8 times, we use 8bit precision for the position. @@ -3009,15 +3047,15 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper qreal fy = data->m22 * cy + data->m12 * cx + data->dy; qreal fw = data->m23 * cy + data->m13 * cx + data->m33; - uint buf1[buffer_size]; - uint buf2[buffer_size]; + uint buf1[BufferSize]; + uint buf2[BufferSize]; uint *b = buffer; - int distxs[buffer_size / 2]; - int distys[buffer_size / 2]; + int distxs[BufferSize / 2]; + int distys[BufferSize / 2]; while (length) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); for (int i = 0; i < len; ++i) { const qreal iw = fw == 0 ? 1 : 1 / fw; const qreal px = fx * iw - qreal(0.5); @@ -3049,8 +3087,8 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper fw += fdw; } - layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); - layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); + layout->convertToARGB32PM(buf1, len * 2, clut); + layout->convertToARGB32PM(buf2, len * 2, clut); for (int i = 0; i < len; ++i) { int distx = distxs[i]; @@ -3094,13 +3132,13 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co if (fdy == 0) { //simple scale, no rotation - uint sbuf1[buffer_size]; - uint sbuf2[buffer_size]; - quint64 buf1[buffer_size]; - quint64 buf2[buffer_size]; + uint sbuf1[BufferSize]; + uint sbuf2[BufferSize]; + quint64 buf1[BufferSize]; + quint64 buf2[BufferSize]; QRgba64 *b = buffer; while (length) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); int disty = (fy & 0x0000ffff); #if defined(__SSE2__) const __m128i vdy = _mm_set1_epi16(disty); @@ -3138,15 +3176,15 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co b += len; } } else { //rotation - uint sbuf1[buffer_size]; - uint sbuf2[buffer_size]; - quint64 buf1[buffer_size]; - quint64 buf2[buffer_size]; + uint sbuf1[BufferSize]; + uint sbuf2[BufferSize]; + quint64 buf1[BufferSize]; + quint64 buf2[BufferSize]; QRgba64 *end = buffer + length; QRgba64 *b = buffer; while (b < end) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy); @@ -3177,17 +3215,17 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co qreal fw = data->m23 * cy + data->m13 * cx + data->m33; FetchPixelFunc fetch = qFetchPixel[layout->bpp]; - uint sbuf1[buffer_size]; - uint sbuf2[buffer_size]; - quint64 buf1[buffer_size]; - quint64 buf2[buffer_size]; + uint sbuf1[BufferSize]; + uint sbuf2[BufferSize]; + quint64 buf1[BufferSize]; + quint64 buf2[BufferSize]; QRgba64 *b = buffer; - int distxs[buffer_size / 2]; - int distys[buffer_size / 2]; + int distxs[BufferSize / 2]; + int distys[BufferSize / 2]; while (length) { - int len = qMin(length, buffer_size / 2); + int len = qMin(length, BufferSize / 2); for (int i = 0; i < len; ++i) { const qreal iw = fw == 0 ? 1 : 1 / fw; const qreal px = fx * iw - qreal(0.5); @@ -3292,6 +3330,15 @@ static SourceFetchProc sourceFetchARGB32PM[NBlendTypes] = { fetchTransformedBilinearARGB32PM<BlendTransformedBilinearTiled> // BilinearTiled }; +static SourceFetchProc sourceFetchAny16[NBlendTypes] = { + fetchUntransformed, // Untransformed + fetchUntransformed, // Tiled + fetchTransformed<BlendTransformed, QPixelLayout::BPP16>, // Transformed + fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP16>, // TransformedTiled + fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP16>, // TransformedBilinear + fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP16> // TransformedBilinearTiled +}; + static SourceFetchProc sourceFetchAny32[NBlendTypes] = { fetchUntransformed, // Untransformed fetchUntransformed, // Tiled @@ -3316,6 +3363,8 @@ static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage: return sourceFetchARGB32PM[blendType]; if (blendType == BlendUntransformed || blendType == BlendTiled) return sourceFetchUntransformed[format]; + if (qPixelLayouts[format].bpp == QPixelLayout::BPP16) + return sourceFetchAny16[blendType]; if (qPixelLayouts[format].bpp == QPixelLayout::BPP32) return sourceFetchAny32[blendType]; return sourceFetchGeneric[blendType]; @@ -3717,7 +3766,7 @@ static void blend_color_generic(int count, const QSpan *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - uint buffer[buffer_size]; + uint buffer[BufferSize]; Operator op = getOperator(data, spans, count); const uint color = data->solid.color.toArgb32(); @@ -3725,7 +3774,7 @@ void blend_color_generic(int count, const QSpan *spans, void *userData) int x = spans->x; int length = spans->len; while (length) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer; op.funcSolid(dest, l, color, spans->coverage); if (op.destStore) @@ -3777,14 +3826,27 @@ void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData) return blend_color_generic(count, spans, userData); } - quint64 buffer[buffer_size]; + quint64 buffer[BufferSize]; const QRgba64 color = data->solid.color; + bool solidFill = data->rasterBuffer->compositionMode == QPainter::CompositionMode_Source + || (data->rasterBuffer->compositionMode == QPainter::CompositionMode_SourceOver && color.isOpaque()); + bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32; while (count--) { int x = spans->x; int length = spans->len; + if (solidFill && isBpp32 && spans->coverage == 255) { + // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels + if (length > 0) { + op.destStore64(data->rasterBuffer, x, spans->y, &color, 1); + uint *dest = (uint*)data->rasterBuffer->scanLine(spans->y) + x; + qt_memfill32(dest + 1, dest[0], length - 1); + length = 0; + } + } + while (length) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l); op.funcSolid64(dest, l, color, spans->coverage); op.destStore64(data->rasterBuffer, x, spans->y, dest, l); @@ -3889,7 +3951,7 @@ void handleSpans(int count, const QSpan *spans, const QSpanData *data, T &handle int length = right - x; while (length) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); length -= l; int process_length = l; @@ -3936,8 +3998,8 @@ struct QBlendBase BlendType *dest; - BlendType buffer[buffer_size]; - BlendType src_buffer[buffer_size]; + BlendType buffer[BufferSize]; + BlendType src_buffer[BufferSize]; }; class BlendSrcGeneric : public QBlendBase<uint> @@ -4021,8 +4083,8 @@ static void blend_untransformed_generic(int count, const QSpan *spans, void *use { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - uint buffer[buffer_size]; - uint src_buffer[buffer_size]; + uint buffer[BufferSize]; + uint src_buffer[BufferSize]; Operator op = getOperator(data, spans, count); const int image_width = data->texture.width; @@ -4046,7 +4108,7 @@ static void blend_untransformed_generic(int count, const QSpan *spans, void *use if (length > 0) { const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l); uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer; op.func(dest, src, l, coverage); @@ -4071,8 +4133,8 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit"); return blend_untransformed_generic(count, spans, userData); } - quint64 buffer[buffer_size]; - quint64 src_buffer[buffer_size]; + quint64 buffer[BufferSize]; + quint64 src_buffer[BufferSize]; const int image_width = data->texture.width; const int image_height = data->texture.height; @@ -4095,7 +4157,7 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi if (length > 0) { const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l); QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l); op.func64(dest, src, l, coverage); @@ -4259,8 +4321,8 @@ static void blend_tiled_generic(int count, const QSpan *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - uint buffer[buffer_size]; - uint src_buffer[buffer_size]; + uint buffer[BufferSize]; + uint src_buffer[BufferSize]; Operator op = getOperator(data, spans, count); const int image_width = data->texture.width; @@ -4286,8 +4348,8 @@ static void blend_tiled_generic(int count, const QSpan *spans, void *userData) const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; + if (BufferSize < l) + l = BufferSize; const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l); uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer; op.func(dest, src, l, coverage); @@ -4312,8 +4374,8 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit"); return blend_tiled_generic(count, spans, userData); } - quint64 buffer[buffer_size]; - quint64 src_buffer[buffer_size]; + quint64 buffer[BufferSize]; + quint64 src_buffer[BufferSize]; const int image_width = data->texture.width; const int image_height = data->texture.height; @@ -4325,6 +4387,50 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD if (yoff < 0) yoff += image_height; + bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32; + if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && isBpp32) { + // If destination isn't blended into the result, we can do the tiling directly on destination pixels. + while (count--) { + int x = spans->x; + int y = spans->y; + int length = spans->len; + int sx = (xoff + spans->x) % image_width; + int sy = (spans->y + yoff) % image_height; + if (sx < 0) + sx += image_width; + if (sy < 0) + sy += image_height; + + int sl = qMin(image_width, length); + if (sx > 0 && sl > 0) { + int l = qMin(image_width - sx, sl); + const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l); + op.destStore64(data->rasterBuffer, x, y, src, l); + x += l; + sx += l; + sl -= l; + if (sx >= image_width) + sx = 0; + } + if (sl > 0) { + Q_ASSERT(sx == 0); + const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, sl); + op.destStore64(data->rasterBuffer, x, y, src, sl); + x += sl; + sx += sl; + sl -= sl; + if (sx >= image_width) + sx = 0; + } + uint *dest = (uint*)data->rasterBuffer->scanLine(y) + x - image_width; + for (int i = image_width; i < length; ++i) { + dest[i] = dest[i - image_width]; + } + ++spans; + } + return; + } + while (count--) { int x = spans->x; int length = spans->len; @@ -4338,8 +4444,8 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; + if (BufferSize < l) + l = BufferSize; const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l); QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l); op.func64(dest, src, l, coverage); @@ -4388,8 +4494,8 @@ static void blend_tiled_argb(int count, const QSpan *spans, void *userData) const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; + if (BufferSize < l) + l = BufferSize; const uint *src = (const uint *)data->texture.scanLine(sy) + sx; uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x; op.func(dest, src, l, coverage); @@ -4448,8 +4554,8 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData) int tx = x; while (length) { int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; + if (BufferSize < l) + l = BufferSize; quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + tx; const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx; memcpy(dest, src, l * sizeof(quint16)); @@ -4484,8 +4590,8 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData) if (alpha > 0) { while (length) { int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; + if (BufferSize < l) + l = BufferSize; quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x; const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx; blend_sourceOver_rgb16_rgb16(dest, src, l, alpha, ialpha); @@ -4501,199 +4607,6 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData) } } -static void blend_transformed_bilinear_rgb565(int count, const QSpan *spans, void *userData) -{ - QSpanData *data = reinterpret_cast<QSpanData*>(userData); - QPainter::CompositionMode mode = data->rasterBuffer->compositionMode; - - if (data->texture.format != QImage::Format_RGB16 - || (mode != QPainter::CompositionMode_SourceOver - && mode != QPainter::CompositionMode_Source)) - { - blend_src_generic(count, spans, userData); - return; - } - - quint16 buffer[buffer_size]; - - const int src_minx = data->texture.x1; - const int src_miny = data->texture.y1; - const int src_maxx = data->texture.x2 - 1; - const int src_maxy = data->texture.y2 - 1; - - if (data->fast_matrix) { - // The increment pr x in the scanline - const int fdx = (int)(data->m11 * fixed_scale); - const int fdy = (int)(data->m12 * fixed_scale); - - while (count--) { - const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8; - const quint8 alpha = (coverage + 1) >> 3; - const quint8 ialpha = 0x20 - alpha; - if (alpha == 0) { - ++spans; - continue; - } - - quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + spans->x; - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - int x = int((data->m21 * cy - + data->m11 * cx + data->dx) * fixed_scale) - half_point; - int y = int((data->m22 * cy - + data->m12 * cx + data->dy) * fixed_scale) - half_point; - int length = spans->len; - - while (length) { - int l; - quint16 *b; - if (ialpha == 0) { - l = length; - b = dest; - } else { - l = qMin(length, buffer_size); - b = buffer; - } - const quint16 *end = b + l; - - while (b < end) { - int x1 = (x >> 16); - int x2; - int y1 = (y >> 16); - int y2; - - fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(0, src_minx, src_maxx, x1, x2); - fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(0, src_miny, src_maxy, y1, y2); - - const quint16 *src1 = (const quint16*)data->texture.scanLine(y1); - const quint16 *src2 = (const quint16*)data->texture.scanLine(y2); - quint16 tl = src1[x1]; - const quint16 tr = src1[x2]; - quint16 bl = src2[x1]; - const quint16 br = src2[x2]; - - const uint distxsl8 = x & 0xff00; - const uint distysl8 = y & 0xff00; - const uint distx = distxsl8 >> 8; - const uint disty = distysl8 >> 8; - const uint distxy = distx * disty; - - const uint tlw = 0x10000 - distxsl8 - distysl8 + distxy; // (256 - distx) * (256 - disty) - const uint trw = distxsl8 - distxy; // distx * (256 - disty) - const uint blw = distysl8 - distxy; // (256 - distx) * disty - const uint brw = distxy; // distx * disty - uint red = ((tl & 0xf800) * tlw + (tr & 0xf800) * trw - + (bl & 0xf800) * blw + (br & 0xf800) * brw) & 0xf8000000; - uint green = ((tl & 0x07e0) * tlw + (tr & 0x07e0) * trw - + (bl & 0x07e0) * blw + (br & 0x07e0) * brw) & 0x07e00000; - uint blue = ((tl & 0x001f) * tlw + (tr & 0x001f) * trw - + (bl & 0x001f) * blw + (br & 0x001f) * brw); - *b = quint16((red | green | blue) >> 16); - - ++b; - x += fdx; - y += fdy; - } - - if (ialpha != 0) - blend_sourceOver_rgb16_rgb16(dest, buffer, l, alpha, ialpha); - - dest += l; - length -= l; - } - ++spans; - } - } else { - const qreal fdx = data->m11; - const qreal fdy = data->m12; - const qreal fdw = data->m13; - - while (count--) { - const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8; - const quint8 alpha = (coverage + 1) >> 3; - const quint8 ialpha = 0x20 - alpha; - if (alpha == 0) { - ++spans; - continue; - } - - quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + spans->x; - - const qreal cx = spans->x + qreal(0.5); - const qreal cy = spans->y + qreal(0.5); - - qreal x = data->m21 * cy + data->m11 * cx + data->dx; - qreal y = data->m22 * cy + data->m12 * cx + data->dy; - qreal w = data->m23 * cy + data->m13 * cx + data->m33; - - int length = spans->len; - while (length) { - int l; - quint16 *b; - if (ialpha == 0) { - l = length; - b = dest; - } else { - l = qMin(length, buffer_size); - b = buffer; - } - const quint16 *end = b + l; - - while (b < end) { - const qreal iw = w == 0 ? 1 : 1 / w; - const qreal px = x * iw - qreal(0.5); - const qreal py = y * iw - qreal(0.5); - - int x1 = int(px) - (px < 0); - int x2; - int y1 = int(py) - (py < 0); - int y2; - - fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(0, src_minx, src_maxx, x1, x2); - fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(0, src_miny, src_maxy, y1, y2); - - const quint16 *src1 = (const quint16 *)data->texture.scanLine(y1); - const quint16 *src2 = (const quint16 *)data->texture.scanLine(y2); - quint16 tl = src1[x1]; - const quint16 tr = src1[x2]; - quint16 bl = src2[x1]; - const quint16 br = src2[x2]; - - const uint distx = uint((px - x1) * 256); - const uint disty = uint((py - y1) * 256); - const uint distxsl8 = distx << 8; - const uint distysl8 = disty << 8; - const uint distxy = distx * disty; - - const uint tlw = 0x10000 - distxsl8 - distysl8 + distxy; // (256 - distx) * (256 - disty) - const uint trw = distxsl8 - distxy; // distx * (256 - disty) - const uint blw = distysl8 - distxy; // (256 - distx) * disty - const uint brw = distxy; // distx * disty - uint red = ((tl & 0xf800) * tlw + (tr & 0xf800) * trw - + (bl & 0xf800) * blw + (br & 0xf800) * brw) & 0xf8000000; - uint green = ((tl & 0x07e0) * tlw + (tr & 0x07e0) * trw - + (bl & 0x07e0) * blw + (br & 0x07e0) * brw) & 0x07e00000; - uint blue = ((tl & 0x001f) * tlw + (tr & 0x001f) * trw - + (bl & 0x001f) * blw + (br & 0x001f) * brw); - *b = quint16((red | green | blue) >> 16); - - ++b; - x += fdx; - y += fdy; - w += fdw; - } - - if (ialpha != 0) - blend_sourceOver_rgb16_rgb16(dest, buffer, l, alpha, ialpha); - - dest += l; - length -= l; - } - ++spans; - } - } -} - static void blend_transformed_argb(int count, const QSpan *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); @@ -4704,7 +4617,7 @@ static void blend_transformed_argb(int count, const QSpan *spans, void *userData } CompositionFunction func = functionForMode[data->rasterBuffer->compositionMode]; - uint buffer[buffer_size]; + uint buffer[BufferSize]; quint32 mask = (data->texture.format == QImage::Format_RGB32) ? 0xff000000 : 0; const int image_x1 = data->texture.x1; @@ -4733,7 +4646,7 @@ static void blend_transformed_argb(int count, const QSpan *spans, void *userData int length = spans->len; const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { - int l = qMin(length, buffer_size); + int l = qMin(length, BufferSize); const uint *end = buffer + l; uint *b = buffer; while (b < end) { @@ -4770,7 +4683,7 @@ static void blend_transformed_argb(int count, const QSpan *spans, void *userData int length = spans->len; const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; while (length) { - int l = qMin(length, buffer_size); + int l = qMin(length, BufferSize); const uint *end = buffer + l; uint *b = buffer; while (b < end) { @@ -4809,7 +4722,7 @@ static void blend_transformed_rgb565(int count, const QSpan *spans, void *userDa return; } - quint16 buffer[buffer_size]; + quint16 buffer[BufferSize]; const int image_x1 = data->texture.x1; const int image_y1 = data->texture.y1; const int image_x2 = data->texture.x2 - 1; @@ -4845,7 +4758,7 @@ static void blend_transformed_rgb565(int count, const QSpan *spans, void *userDa l = length; b = dest; } else { - l = qMin(length, buffer_size); + l = qMin(length, BufferSize); b = buffer; } const quint16 *end = b + l; @@ -4900,7 +4813,7 @@ static void blend_transformed_rgb565(int count, const QSpan *spans, void *userDa l = length; b = dest; } else { - l = qMin(length, buffer_size); + l = qMin(length, BufferSize); b = buffer; } const quint16 *end = b + l; @@ -4942,7 +4855,7 @@ static void blend_transformed_tiled_argb(int count, const QSpan *spans, void *us } CompositionFunction func = functionForMode[data->rasterBuffer->compositionMode]; - uint buffer[buffer_size]; + uint buffer[BufferSize]; int image_width = data->texture.width; int image_height = data->texture.height; @@ -4970,7 +4883,7 @@ static void blend_transformed_tiled_argb(int count, const QSpan *spans, void *us const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; int length = spans->len; while (length) { - int l = qMin(length, buffer_size); + int l = qMin(length, BufferSize); const uint *end = buffer + l; uint *b = buffer; int px16 = x % (image_width << 16); @@ -5024,7 +4937,7 @@ static void blend_transformed_tiled_argb(int count, const QSpan *spans, void *us const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; int length = spans->len; while (length) { - int l = qMin(length, buffer_size); + int l = qMin(length, BufferSize); const uint *end = buffer + l; uint *b = buffer; while (b < end) { @@ -5075,7 +4988,7 @@ static void blend_transformed_tiled_rgb565(int count, const QSpan *spans, void * return; } - quint16 buffer[buffer_size]; + quint16 buffer[BufferSize]; const int image_width = data->texture.width; const int image_height = data->texture.height; @@ -5109,7 +5022,7 @@ static void blend_transformed_tiled_rgb565(int count, const QSpan *spans, void * l = length; b = dest; } else { - l = qMin(length, buffer_size); + l = qMin(length, BufferSize); b = buffer; } const quint16 *end = b + l; @@ -5169,7 +5082,7 @@ static void blend_transformed_tiled_rgb565(int count, const QSpan *spans, void * l = length; b = dest; } else { - l = qMin(length, buffer_size); + l = qMin(length, BufferSize); b = buffer; } const quint16 *end = b + l; @@ -5227,7 +5140,7 @@ static const ProcessSpans processTextureSpansRGB16[NBlendTypes] = { blend_tiled_rgb565, // Tiled blend_transformed_rgb565, // Transformed blend_transformed_tiled_rgb565, // TransformedTiled - blend_transformed_bilinear_rgb565, // TransformedBilinear + blend_src_generic, // TransformedBilinear blend_src_generic // TransformedBilinearTiled }; @@ -5523,7 +5436,7 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer, srcColor = colorProfile->toLinear(srcColor.unpremultiplied()).premultiplied(); } - quint64 buffer[buffer_size]; + quint64 buffer[BufferSize]; const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format]; const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format]; @@ -5532,7 +5445,7 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer, int i = x; int length = mapWidth; while (length > 0) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, i, y + ly, l); for (int j=0; j < l; ++j) { const int coverage = map[j + (i - x)]; @@ -5561,7 +5474,7 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer, int end = qMin<int>(x + mapWidth, clip.x + clip.len); if (end <= start) continue; - Q_ASSERT(end - start <= buffer_size); + Q_ASSERT(end - start <= BufferSize); QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, start, clip.y, end - start); for (int xp=start; xp<end; ++xp) { @@ -5800,7 +5713,7 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer, srcColor = colorProfile->toLinear(srcColor.unpremultiplied()).premultiplied(); } - quint64 buffer[buffer_size]; + quint64 buffer[BufferSize]; const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format]; const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format]; @@ -5809,7 +5722,7 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer, int i = x; int length = mapWidth; while (length > 0) { - int l = qMin(buffer_size, length); + int l = qMin(BufferSize, length); QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, i, y + ly, l); for (int j=0; j < l; ++j) { const uint coverage = src[j + (i - x)]; @@ -5838,7 +5751,7 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer, int end = qMin<int>(x + mapWidth, clip.x + clip.len); if (end <= start) continue; - Q_ASSERT(end - start <= buffer_size); + Q_ASSERT(end - start <= BufferSize); QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, start, clip.y, end - start); for (int xp=start; xp<end; ++xp) { @@ -5928,8 +5841,24 @@ static void qt_rectfill_quint16(QRasterBuffer *rasterBuffer, int x, int y, int width, int height, const QRgba64 &color) { + const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format]; + quint32 c32 = color.toArgb32(); + quint16 c16; + layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c16), &c32, 0, 1, nullptr, nullptr); qt_rectfill<quint16>(reinterpret_cast<quint16 *>(rasterBuffer->buffer()), - color.toRgb16(), x, y, width, height, rasterBuffer->bytesPerLine()); + c16, x, y, width, height, rasterBuffer->bytesPerLine()); +} + +static void qt_rectfill_quint24(QRasterBuffer *rasterBuffer, + int x, int y, int width, int height, + const QRgba64 &color) +{ + const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format]; + quint32 c32 = color.toArgb32(); + quint24 c24; + layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c24), &c32, 0, 1, nullptr, nullptr); + qt_rectfill<quint24>(reinterpret_cast<quint24 *>(rasterBuffer->buffer()), + c24, x, y, width, height, rasterBuffer->bytesPerLine()); } static void qt_rectfill_nonpremul_argb32(QRasterBuffer *rasterBuffer, @@ -6049,7 +5978,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint24 }, // Format_RGB666 { @@ -6058,7 +5987,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint24 }, // Format_ARGB6666_Premultiplied { @@ -6067,7 +5996,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint24 }, // Format_RGB555 { @@ -6076,7 +6005,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint16 }, // Format_ARGB8555_Premultiplied { @@ -6085,7 +6014,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint24 }, // Format_RGB888 { @@ -6094,7 +6023,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint24 }, // Format_RGB444 { @@ -6103,7 +6032,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint16 }, // Format_ARGB4444_Premultiplied { @@ -6112,7 +6041,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = 0, qt_alphamapblit_generic, qt_alphargbblit_generic, - 0 + qt_rectfill_quint16 }, // Format_RGBX8888 { @@ -6267,7 +6196,7 @@ void qt_memfill32(quint32 *dest, quint32 color, int count) #endif #ifdef QT_COMPILER_SUPPORTS_SSE4_1 -template<QtPixelOrder> const uint *QT_FASTCALL convertA2RGB30PMFromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); +template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); #endif extern void qInitBlendFunctions(); @@ -6336,37 +6265,37 @@ static void qInitDrawhelperFunctions() int w, int h, int const_alpha); - extern void QT_FASTCALL storePixelsBPP24_ssse3(uchar *dest, const uint *src, int index, int count); extern const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data, int y, int x, int length); qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3; qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3; qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3; qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3; - qStorePixels[QPixelLayout::BPP24] = storePixelsBPP24_ssse3; sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_ssse3; +#ifndef __SSSE3__ + extern const uint * QT_FASTCALL fetchPixelsBPP24_ssse3(uint *dest, const uchar*src, int index, int count); + qFetchPixels[QPixelLayout::BPP24] = fetchPixelsBPP24_ssse3; +#endif } #endif // SSSE3 #if defined(QT_COMPILER_SUPPORTS_SSE4_1) if (qCpuHasFeature(SSE4_1)) { - extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, + extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *); + extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *); + extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count, + extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count, + extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; - qPixelLayouts[QImage::Format_ARGB32].convertFromARGB32PM = convertARGB32FromARGB32PM_sse4; - qPixelLayouts[QImage::Format_RGBA8888].convertFromARGB32PM = convertRGBA8888FromARGB32PM_sse4; - qPixelLayouts[QImage::Format_RGBX8888].convertFromARGB32PM = convertRGBXFromARGB32PM_sse4; - qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].convertFromARGB32PM = convertA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>; - qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].convertFromARGB32PM = convertA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>; + qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4; + qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4; + qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4; + qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>; + qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>; } #endif @@ -6401,14 +6330,14 @@ static void qInitDrawhelperFunctions() qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2; qt_functionForModeSolid64_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgb64_avx2; - extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uint *b, uint *end, const QTextureData &image, - int &fx, int &fy, int fdx, int /*fdy*/); + extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/); extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int /*fdy*/); extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy); - bilinearFastTransformHelperARGB32PM[0][SimpleUpscaleTransform] = fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2; + bilinearFastTransformHelperARGB32PM[0][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2; bilinearFastTransformHelperARGB32PM[0][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2; bilinearFastTransformHelperARGB32PM[0][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2; } @@ -6440,10 +6369,8 @@ static void qInitDrawhelperFunctions() sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon; #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN - extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *); + extern void QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, int count, const QVector<QRgb> *); + extern void QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, int count, const QVector<QRgb> *); qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon; qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon; #endif |