diff options
Diffstat (limited to 'src/gui/painting')
23 files changed, 2103 insertions, 1087 deletions
diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri index 86e35c39f8..63e345545c 100644 --- a/src/gui/painting/painting.pri +++ b/src/gui/painting/painting.pri @@ -8,6 +8,7 @@ HEADERS += \ painting/qbrush.h \ painting/qcolor.h \ painting/qcolor_p.h \ + painting/qcolorprofile_p.h \ painting/qcosmeticstroker_p.h \ painting/qdatabuffer_p.h \ painting/qdrawhelper_p.h \ @@ -63,11 +64,11 @@ SOURCES += \ painting/qblittable.cpp \ painting/qbrush.cpp \ painting/qcolor.cpp \ + painting/qcolorprofile.cpp \ painting/qcompositionfunctions.cpp \ painting/qcosmeticstroker.cpp \ painting/qdrawhelper.cpp \ painting/qemulationpaintengine.cpp \ - painting/qgammatables.cpp \ painting/qgrayraster.c \ painting/qimagescale.cpp \ painting/qmatrix.cpp \ diff --git a/src/gui/painting/qbrush.cpp b/src/gui/painting/qbrush.cpp index ebb035a2c1..cc3ee76f0d 100644 --- a/src/gui/painting/qbrush.cpp +++ b/src/gui/painting/qbrush.cpp @@ -1006,7 +1006,7 @@ bool QBrush::operator==(const QBrush &b) const */ QDebug operator<<(QDebug dbg, const QBrush &b) { - static const char *const BRUSH_STYLES[] = { + static const char BRUSH_STYLES[][24] = { "NoBrush", "SolidPattern", "Dense1Pattern", @@ -1025,7 +1025,7 @@ QDebug operator<<(QDebug dbg, const QBrush &b) "LinearGradientPattern", "RadialGradientPattern", "ConicalGradientPattern", - 0, 0, 0, 0, 0, 0, + "", "", "", "", "", "", "TexturePattern" // 24 }; @@ -1419,6 +1419,25 @@ void QGradient::setColorAt(qreal pos, const QColor &color) m_stops.insert(index, QGradientStop(pos, color)); } +static inline bool ok(QGradientStop stop) +{ + return stop.first >= 0 && stop.first <= 1; // rejects NaNs +} + +static inline bool ok(const QGradientStops &stops) +{ + qreal lastPos = -1; + for (const QGradientStop &stop : stops) { + if (Q_UNLIKELY(!ok(stop))) + return false; + const bool sorted = stop.first > lastPos; // rejects duplicates + if (Q_UNLIKELY(!sorted)) + return false; + lastPos = stop.first; + } + return true; +} + /*! \fn void QGradient::setStops(const QGradientStops &stopPoints) @@ -1430,6 +1449,14 @@ void QGradient::setColorAt(qreal pos, const QColor &color) */ void QGradient::setStops(const QGradientStops &stops) { + // ## Qt 6: consider taking \a stops by value, so we can move into m_stops + if (Q_LIKELY(ok(stops))) { + // fast path for the common case: if everything is ok with the stops, just copy them + m_stops = stops; + return; + } + // otherwise, to keep the pre-5.9 behavior, add them one after another, + // so each stop is checked, invalid ones are skipped, they are added in-order (which may be O(N^2)). m_stops.clear(); for (int i=0; i<stops.size(); ++i) setColorAt(stops.at(i).first, stops.at(i).second); diff --git a/src/gui/painting/qgammatables.cpp b/src/gui/painting/qcolorprofile.cpp index 1d76f7ee3c..3b7b0a248b 100644 --- a/src/gui/painting/qgammatables.cpp +++ b/src/gui/painting/qcolorprofile.cpp @@ -37,28 +37,51 @@ ** ****************************************************************************/ -#include <private/qdrawhelper_p.h> +#include "qcolorprofile_p.h" +#include <qmath.h> QT_BEGIN_NAMESPACE +QColorProfile *QColorProfile::fromGamma(qreal gamma) +{ + QColorProfile *cp = new QColorProfile; + + for (int i = 0; i <= (255 * 16); ++i) { + cp->m_toLinear[i] = ushort(qRound(qPow(i / qreal(255 * 16), gamma) * (255 * 256))); + cp->m_fromLinear[i] = ushort(qRound(qPow(i / qreal(255 * 16), qreal(1) / gamma) * (255 * 256))); + } + + return cp; +} -QDrawHelperGammaTables::QDrawHelperGammaTables(qreal smoothing) +static qreal srgbToLinear(qreal v) { - const qreal gray_gamma = 2.31; - for (int i=0; i<256; ++i) - qt_pow_gamma[i] = uint(qRound(qPow(i / qreal(255.), gray_gamma) * 2047)); - for (int i=0; i<2048; ++i) - qt_pow_invgamma[i] = uchar(qRound(qPow(i / qreal(2047.0), 1 / gray_gamma) * 255)); + const qreal a = 0.055; + if (v <= qreal(0.04045)) + return v / qreal(12.92); + else + return qPow((v + a) / (qreal(1) + a), qreal(2.4)); +} - refresh(smoothing); +static qreal linearToSrgb(qreal v) +{ + const qreal a = 0.055; + if (v <= qreal(0.0031308)) + return v * qreal(12.92); + else + return (qreal(1) + a) * qPow(v, qreal(1.0 / 2.4)) - a; } -void QDrawHelperGammaTables::refresh(qreal smoothing) +QColorProfile *QColorProfile::fromSRgb() { - for (int i=0; i<256; ++i) { - qt_pow_rgb_gamma[i] = uchar(qRound(qPow(i / qreal(255.0), smoothing) * 255)); - qt_pow_rgb_invgamma[i] = uchar(qRound(qPow(i / qreal(255.), 1 / smoothing) * 255)); + QColorProfile *cp = new QColorProfile; + + for (int i = 0; i <= (255 * 16); ++i) { + cp->m_toLinear[i] = ushort(qRound(srgbToLinear(i / qreal(255 * 16)) * (255 * 256))); + cp->m_fromLinear[i] = ushort(qRound(linearToSrgb(i / qreal(255 * 16)) * (255 * 256))); } + + return cp; } QT_END_NAMESPACE diff --git a/src/gui/painting/qcolorprofile_p.h b/src/gui/painting/qcolorprofile_p.h new file mode 100644 index 0000000000..ca1786ee6d --- /dev/null +++ b/src/gui/painting/qcolorprofile_p.h @@ -0,0 +1,157 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QCOLORPROFILE_P_H +#define QCOLORPROFILE_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +#include <QtGui/private/qtguiglobal_p.h> +#include <QtGui/qrgb.h> +#include <QtGui/qrgba64.h> + +QT_BEGIN_NAMESPACE + +class Q_GUI_EXPORT QColorProfile +{ +public: + static QColorProfile *fromGamma(qreal gamma); + static QColorProfile *fromSRgb(); + + // The following methods all convert opaque or unpremultiplied colors: + + QRgba64 toLinear64(QRgb rgb32) const + { + ushort r = m_toLinear[qRed(rgb32) << 4]; + ushort g = m_toLinear[qGreen(rgb32) << 4]; + ushort b = m_toLinear[qBlue(rgb32) << 4]; + r = r + (r >> 8); + g = g + (g >> 8); + b = b + (b >> 8); + return QRgba64::fromRgba64(r, g, b, qAlpha(rgb32) * 257); + } + + QRgb toLinear(QRgb rgb32) const + { + uchar r = (m_toLinear[qRed(rgb32) << 4] + 0x80) >> 8; + uchar g = (m_toLinear[qGreen(rgb32) << 4] + 0x80) >> 8; + uchar b = (m_toLinear[qBlue(rgb32) << 4] + 0x80) >> 8; + return qRgba(r, g, b, qAlpha(rgb32)); + } + + QRgba64 toLinear(QRgba64 rgb64) const + { + ushort r = rgb64.red(); + ushort g = rgb64.green(); + ushort b = rgb64.blue(); + r = r - (r >> 8); + g = g - (g >> 8); + b = b - (b >> 8); + r = m_toLinear[r >> 4]; + g = m_toLinear[g >> 4]; + b = m_toLinear[b >> 4]; + r = r + (r >> 8); + g = g + (g >> 8); + b = b + (b >> 8); + return QRgba64::fromRgba64(r, g, b, rgb64.alpha()); + } + + QRgb fromLinear64(QRgba64 rgb64) const + { + ushort r = rgb64.red(); + ushort g = rgb64.green(); + ushort b = rgb64.blue(); + r = r - (r >> 8); + g = g - (g >> 8); + b = b - (b >> 8); + r = (m_fromLinear[r >> 4] + 0x80) >> 8; + g = (m_fromLinear[g >> 4] + 0x80) >> 8; + b = (m_fromLinear[b >> 4] + 0x80) >> 8; + return qRgba(r, g, b, rgb64.alpha8()); + } + + QRgb fromLinear(QRgb rgb32) const + { + uchar r = (m_fromLinear[qRed(rgb32) << 4] + 0x80) >> 8; + uchar g = (m_fromLinear[qGreen(rgb32) << 4] + 0x80) >> 8; + uchar b = (m_fromLinear[qBlue(rgb32) << 4] + 0x80) >> 8; + return qRgba(r, g, b, qAlpha(rgb32)); + } + + QRgba64 fromLinear(QRgba64 rgb64) const + { + ushort r = rgb64.red(); + ushort g = rgb64.green(); + ushort b = rgb64.blue(); + r = r - (r >> 8); + g = g - (g >> 8); + b = b - (b >> 8); + r = m_fromLinear[r >> 4]; + g = m_fromLinear[g >> 4]; + b = m_fromLinear[b >> 4]; + r = r + (r >> 8); + g = g + (g >> 8); + b = b + (b >> 8); + return QRgba64::fromRgba64(r, g, b, rgb64.alpha()); + } + +private: + QColorProfile() { } + + // We translate to 0-65280 (255*256) instead to 0-65535 to make simple + // shifting an accurate conversion. + // We translate from 0-4080 (255*16) for the same speed up, and to keep + // the tables small enough to fit in most inner caches. + ushort m_toLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280] + ushort m_fromLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280] + +}; + +QT_END_NAMESPACE + +#endif // QCOLORPROFILE_P_H diff --git a/src/gui/painting/qcoregraphics.mm b/src/gui/painting/qcoregraphics.mm index 3753fa4e88..98fdd7f35e 100644 --- a/src/gui/painting/qcoregraphics.mm +++ b/src/gui/painting/qcoregraphics.mm @@ -1,31 +1,37 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: http://www.qt.io/licensing/ +** Copyright (C) 2017 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtGui module of the Qt Toolkit. ** -** $QT_BEGIN_LICENSE:LGPL21$ +** $QT_BEGIN_LICENSE:LGPL$ ** Commercial License Usage ** Licensees holding valid commercial Qt licenses may use this file in ** accordance with the commercial license agreement provided with the ** Software or, alternatively, in accordance with the terms contained in ** a written agreement between you and The Qt Company. For licensing terms -** and conditions see http://www.qt.io/terms-conditions. For further -** information use the contact form at http://www.qt.io/contact-us. +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 2.1 or version 3 as published by the Free -** Software Foundation and appearing in the file LICENSE.LGPLv21 and -** LICENSE.LGPLv3 included in the packaging of this file. Please review the -** following information to ensure the GNU Lesser General Public License -** requirements will be met: https://www.gnu.org/licenses/lgpl.html and -** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. ** -** As a special exception, The Qt Company gives you certain additional -** rights. These rights are described in The Qt Company LGPL Exception -** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. ** ** $QT_END_LICENSE$ ** @@ -39,6 +45,7 @@ #include <QtGui/private/qpaintengine_p.h> #include <QtCore/qdebug.h> #include <QtCore/qcoreapplication.h> +#include <QtCore/qoperatingsystemversion.h> QT_BEGIN_NAMESPACE @@ -106,29 +113,6 @@ QImage qt_mac_toQImage(CGImageRef image) #ifdef Q_OS_MACOS -QT_END_NAMESPACE - -@interface NSGraphicsContext (QtAdditions) - -+ (NSGraphicsContext *)qt_graphicsContextWithCGContext:(CGContextRef)graphicsPort flipped:(BOOL)initialFlippedState; - -@end - -@implementation NSGraphicsContext (QtAdditions) - -+ (NSGraphicsContext *)qt_graphicsContextWithCGContext:(CGContextRef)graphicsPort flipped:(BOOL)initialFlippedState -{ -#if QT_MAC_PLATFORM_SDK_EQUAL_OR_ABOVE(__MAC_10_10, __IPHONE_NA) - if (QT_PREPEND_NAMESPACE(QSysInfo::MacintoshVersion) >= QT_PREPEND_NAMESPACE(QSysInfo::MV_10_10)) - return [self graphicsContextWithCGContext:graphicsPort flipped:initialFlippedState]; -#endif - return [self graphicsContextWithGraphicsPort:graphicsPort flipped:initialFlippedState]; -} - -@end - -QT_BEGIN_NAMESPACE - static NSImage *qt_mac_cgimage_to_nsimage(CGImageRef image) { NSImage *newImage = [[NSImage alloc] initWithCGImage:image size:NSZeroSize]; @@ -155,7 +139,7 @@ NSImage *qt_mac_create_nsimage(const QIcon &icon, int defaultSize) QList<QSize> availableSizes = icon.availableSizes(); if (availableSizes.isEmpty() && defaultSize > 0) availableSizes << QSize(defaultSize, defaultSize); - foreach (QSize size, availableSizes) { + for (QSize size : qAsConst(availableSizes)) { QPixmap pm = icon.pixmap(size); if (pm.isNull()) continue; @@ -179,7 +163,7 @@ QPixmap qt_mac_toQPixmap(const NSImage *image, const QSizeF &size) QMacCGContext ctx(&pixmap); if (!ctx) return QPixmap(); - NSGraphicsContext *gc = [NSGraphicsContext qt_graphicsContextWithCGContext:ctx flipped:YES]; + NSGraphicsContext *gc = [NSGraphicsContext graphicsContextWithCGContext:ctx flipped:YES]; if (!gc) return QPixmap(); [NSGraphicsContext saveGraphicsState]; diff --git a/src/gui/painting/qcoregraphics_p.h b/src/gui/painting/qcoregraphics_p.h index 065910222d..54de3f332e 100644 --- a/src/gui/painting/qcoregraphics_p.h +++ b/src/gui/painting/qcoregraphics_p.h @@ -1,31 +1,37 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: http://www.qt.io/licensing/ +** Copyright (C) 2017 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtGui module of the Qt Toolkit. ** -** $QT_BEGIN_LICENSE:LGPL21$ +** $QT_BEGIN_LICENSE:LGPL$ ** Commercial License Usage ** Licensees holding valid commercial Qt licenses may use this file in ** accordance with the commercial license agreement provided with the ** Software or, alternatively, in accordance with the terms contained in ** a written agreement between you and The Qt Company. For licensing terms -** and conditions see http://www.qt.io/terms-conditions. For further -** information use the contact form at http://www.qt.io/contact-us. +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 2.1 or version 3 as published by the Free -** Software Foundation and appearing in the file LICENSE.LGPLv21 and -** LICENSE.LGPLv3 included in the packaging of this file. Please review the -** following information to ensure the GNU Lesser General Public License -** requirements will be met: https://www.gnu.org/licenses/lgpl.html and -** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. ** -** As a special exception, The Qt Company gives you certain additional -** rights. These rights are described in The Qt Company LGPL Exception -** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. ** ** $QT_END_LICENSE$ ** diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 7b3e1b991d..5c38648fe7 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -43,6 +43,7 @@ #include <qstylehints.h> #include <qguiapplication.h> #include <qatomic.h> +#include <private/qcolorprofile_p.h> #include <private/qdrawhelper_p.h> #include <private/qpaintengine_raster_p.h> #include <private/qpainter_p.h> @@ -1143,6 +1144,11 @@ static QRgba64 *QT_FASTCALL destFetch64uint32(QRgba64 *buffer, QRasterBuffer *ra return const_cast<QRgba64 *>(layout->convertToARGB64PM(buffer, src, length, 0, 0)); } +static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer *, int, int, int) +{ + return buffer; +} + static DestFetchProc destFetchProc[QImage::NImageFormats] = { 0, // Format_Invalid @@ -1175,8 +1181,8 @@ static DestFetchProc destFetchProc[QImage::NImageFormats] = static DestFetchProc64 destFetchProc64[QImage::NImageFormats] = { 0, // Format_Invalid - destFetch64, // Format_Mono, - destFetch64, // Format_MonoLSB + 0, // Format_Mono, + 0, // Format_MonoLSB 0, // Format_Indexed8 destFetch64uint32, // Format_RGB32 destFetch64uint32, // Format_ARGB32, @@ -1320,7 +1326,7 @@ static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, con static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length) { for (int i = 0; i < length; ++i) { - dest[i] = src[i].toArgb32(); + dest[i] = toArgb32(src[i]); } } @@ -1411,7 +1417,7 @@ static void QT_FASTCALL destStore64ARGB32(QRasterBuffer *rasterBuffer, int x, in { uint *dest = (uint*)rasterBuffer->scanLine(y) + x; for (int i = 0; i < length; ++i) { - dest[i] = buffer[i].unpremultiplied().toArgb32(); + dest[i] = toArgb32(buffer[i].unpremultiplied()); } } @@ -1419,7 +1425,7 @@ static void QT_FASTCALL destStore64RGBA8888(QRasterBuffer *rasterBuffer, int x, { uint *dest = (uint*)rasterBuffer->scanLine(y) + x; for (int i = 0; i < length; ++i) { - dest[i] = ARGB2RGBA(buffer[i].unpremultiplied().toArgb32()); + dest[i] = toRgba8888(buffer[i].unpremultiplied()); } } @@ -1914,562 +1920,695 @@ inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, Q_ASSERT(v2 >= l1 && v2 <= l2); } -template<TextureBlendType blendType> /* blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled */ -static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, const Operator *, - const QSpanData *data, int y, int x, - int length) -{ - int image_width = data->texture.width; - int image_height = data->texture.height; - - int image_x1 = data->texture.x1; - int image_y1 = data->texture.y1; - int image_x2 = data->texture.x2 - 1; - int image_y2 = data->texture.y2 - 1; - - const qreal cx = x + qreal(0.5); - const qreal cy = y + qreal(0.5); - - uint *end = buffer + length; - uint *b = buffer; - if (data->fast_matrix) { - // The increment pr x in the scanline - int fdx = (int)(data->m11 * fixed_scale); - int fdy = (int)(data->m12 * fixed_scale); - - int fx = int((data->m21 * cy - + data->m11 * cx + data->dx) * fixed_scale); - int fy = int((data->m22 * cy - + data->m12 * cx + data->dy) * fixed_scale); - - fx -= half_point; - fy -= half_point; - - if (fdy == 0) { //simple scale, no rotation - int y1 = (fy >> 16); - int y2; - fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); - const uint *s1 = (const uint *)data->texture.scanLine(y1); - const uint *s2 = (const uint *)data->texture.scanLine(y2); - - if (fdx <= fixed_scale && fdx > 0) { // scale up on X - int disty = (fy & 0x0000ffff) >> 8; - int idisty = 256 - disty; - int x = fx >> 16; +enum FastTransformTypes { + SimpleUpscaleTransform, + UpscaleTransform, + DownscaleTransform, + RotateTransform, + FastRotateTransform, + NFastTransformTypes +}; - // The idea is first to do the interpolation between the row s1 and the row s2 - // into an intermediate buffer, then we interpolate between two pixel of this buffer. +typedef void (QT_FASTCALL *BilinearFastTransformHelper)(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy); - // intermediate_buffer[0] is a buffer of red-blue component of the pixel, in the form 0x00RR00BB - // intermediate_buffer[1] is the alpha-green component of the pixel, in the form 0x00AA00GG - // +1 for the last pixel to interpolate with, and +1 for rounding errors. - quint32 intermediate_buffer[2][buffer_size + 2]; - // count is the size used in the intermediate_buffer. - int count = (qint64(length) * fdx + fixed_scale - 1) / fixed_scale + 2; - Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case - int f = 0; - int lim = count; - if (blendType == BlendTransformedBilinearTiled) { - x %= image_width; - if (x < 0) x += image_width; - } else { - lim = qMin(count, image_x2-x+1); - if (x < image_x1) { - Q_ASSERT(x <= image_x2); - uint t = s1[image_x1]; - uint b = s2[image_x1]; - quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; - quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; - do { - intermediate_buffer[0][f] = rb; - intermediate_buffer[1][f] = ag; - f++; - x++; - } while (x < image_x1 && f < lim); - } - } +template<TextureBlendType blendType> +static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/) +{ + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2); + const uint *s1 = (const uint *)image.scanLine(y1); + const uint *s2 = (const uint *)image.scanLine(y2); + + int disty = (fy & 0x0000ffff) >> 8; + int idisty = 256 - disty; + int x = fx >> 16; + int length = end - b; + + // The idea is first to do the interpolation between the row s1 and the row s2 + // into an intermediate buffer, then we interpolate between two pixel of this buffer. + + // intermediate_buffer[0] is a buffer of red-blue component of the pixel, in the form 0x00RR00BB + // intermediate_buffer[1] is the alpha-green component of the pixel, in the form 0x00AA00GG + // +1 for the last pixel to interpolate with, and +1 for rounding errors. + quint32 intermediate_buffer[2][buffer_size + 2]; + // count is the size used in the intermediate_buffer. + int count = (qint64(length) * fdx + fixed_scale - 1) / fixed_scale + 2; + Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case + int f = 0; + int lim = count; + if (blendType == BlendTransformedBilinearTiled) { + x %= image.width; + if (x < 0) x += image.width; + } else { + lim = qMin(count, image.x2 - x); + if (x < image.x1) { + Q_ASSERT(x < image.x2); + uint t = s1[image.x1]; + uint b = s2[image.x1]; + quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + do { + intermediate_buffer[0][f] = rb; + intermediate_buffer[1][f] = ag; + f++; + x++; + } while (x < image.x1 && f < lim); + } + } - if (blendType != BlendTransformedBilinearTiled) { + if (blendType != BlendTransformedBilinearTiled) { #if defined(__SSE2__) - const __m128i disty_ = _mm_set1_epi16(disty); - const __m128i idisty_ = _mm_set1_epi16(idisty); - const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); - - lim -= 3; - for (; f < lim; x += 4, f += 4) { - // Load 4 pixels from s1, and split the alpha-green and red-blue component - __m128i top = _mm_loadu_si128((const __m128i*)((const uint *)(s1)+x)); - __m128i topAG = _mm_srli_epi16(top, 8); - __m128i topRB = _mm_and_si128(top, colorMask); - // Multiplies each colour component by idisty - topAG = _mm_mullo_epi16 (topAG, idisty_); - topRB = _mm_mullo_epi16 (topRB, idisty_); - - // Same for the s2 vector - __m128i bottom = _mm_loadu_si128((const __m128i*)((const uint *)(s2)+x)); - __m128i bottomAG = _mm_srli_epi16(bottom, 8); - __m128i bottomRB = _mm_and_si128(bottom, colorMask); - bottomAG = _mm_mullo_epi16 (bottomAG, disty_); - bottomRB = _mm_mullo_epi16 (bottomRB, disty_); - - // Add the values, and shift to only keep 8 significant bits per colors - __m128i rAG =_mm_add_epi16(topAG, bottomAG); - rAG = _mm_srli_epi16(rAG, 8); - _mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG); - __m128i rRB =_mm_add_epi16(topRB, bottomRB); - rRB = _mm_srli_epi16(rRB, 8); - _mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB); - } + const __m128i disty_ = _mm_set1_epi16(disty); + const __m128i idisty_ = _mm_set1_epi16(idisty); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + + lim -= 3; + for (; f < lim; x += 4, f += 4) { + // Load 4 pixels from s1, and split the alpha-green and red-blue component + __m128i top = _mm_loadu_si128((const __m128i*)((const uint *)(s1)+x)); + __m128i topAG = _mm_srli_epi16(top, 8); + __m128i topRB = _mm_and_si128(top, colorMask); + // Multiplies each color component by idisty + topAG = _mm_mullo_epi16 (topAG, idisty_); + topRB = _mm_mullo_epi16 (topRB, idisty_); + + // Same for the s2 vector + __m128i bottom = _mm_loadu_si128((const __m128i*)((const uint *)(s2)+x)); + __m128i bottomAG = _mm_srli_epi16(bottom, 8); + __m128i bottomRB = _mm_and_si128(bottom, colorMask); + bottomAG = _mm_mullo_epi16 (bottomAG, disty_); + bottomRB = _mm_mullo_epi16 (bottomRB, disty_); + + // Add the values, and shift to only keep 8 significant bits per colors + __m128i rAG =_mm_add_epi16(topAG, bottomAG); + rAG = _mm_srli_epi16(rAG, 8); + _mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG); + __m128i rRB =_mm_add_epi16(topRB, bottomRB); + rRB = _mm_srli_epi16(rRB, 8); + _mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB); + } #elif defined(__ARM_NEON__) - const int16x8_t disty_ = vdupq_n_s16(disty); - const int16x8_t idisty_ = vdupq_n_s16(idisty); - const int16x8_t colorMask = vdupq_n_s16(0x00ff); - - lim -= 3; - for (; f < lim; x += 4, f += 4) { - // Load 4 pixels from s1, and split the alpha-green and red-blue component - int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x)); - int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8)); - int16x8_t topRB = vandq_s16(top, colorMask); - // Multiplies each colour component by idisty - topAG = vmulq_s16(topAG, idisty_); - topRB = vmulq_s16(topRB, idisty_); - - // Same for the s2 vector - int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x)); - int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8)); - int16x8_t bottomRB = vandq_s16(bottom, colorMask); - bottomAG = vmulq_s16(bottomAG, disty_); - bottomRB = vmulq_s16(bottomRB, disty_); - - // Add the values, and shift to only keep 8 significant bits per colors - int16x8_t rAG = vaddq_s16(topAG, bottomAG); - rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8)); - vst1q_s16((int16_t*)(&intermediate_buffer[1][f]), rAG); - int16x8_t rRB = vaddq_s16(topRB, bottomRB); - rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); - vst1q_s16((int16_t*)(&intermediate_buffer[0][f]), rRB); - } + const int16x8_t disty_ = vdupq_n_s16(disty); + const int16x8_t idisty_ = vdupq_n_s16(idisty); + const int16x8_t colorMask = vdupq_n_s16(0x00ff); + + lim -= 3; + for (; f < lim; x += 4, f += 4) { + // Load 4 pixels from s1, and split the alpha-green and red-blue component + int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x)); + int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8)); + int16x8_t topRB = vandq_s16(top, colorMask); + // Multiplies each color component by idisty + topAG = vmulq_s16(topAG, idisty_); + topRB = vmulq_s16(topRB, idisty_); + + // Same for the s2 vector + int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x)); + int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8)); + int16x8_t bottomRB = vandq_s16(bottom, colorMask); + bottomAG = vmulq_s16(bottomAG, disty_); + bottomRB = vmulq_s16(bottomRB, disty_); + + // Add the values, and shift to only keep 8 significant bits per colors + int16x8_t rAG = vaddq_s16(topAG, bottomAG); + rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8)); + vst1q_s16((int16_t*)(&intermediate_buffer[1][f]), rAG); + int16x8_t rRB = vaddq_s16(topRB, bottomRB); + rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); + vst1q_s16((int16_t*)(&intermediate_buffer[0][f]), rRB); + } #endif - } - for (; f < count; f++) { // Same as above but without sse2 - if (blendType == BlendTransformedBilinearTiled) { - if (x >= image_width) x -= image_width; - } else { - x = qMin(x, image_x2); - } + } + for (; f < count; f++) { // Same as above but without simd + if (blendType == BlendTransformedBilinearTiled) { + if (x >= image.width) x -= image.width; + } else { + x = qMin(x, image.x2 - 1); + } - uint t = s1[x]; - uint b = s2[x]; + uint t = s1[x]; + uint b = s2[x]; - intermediate_buffer[0][f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; - intermediate_buffer[1][f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; - x++; - } - // Now interpolate the values from the intermediate_buffer to get the final result. - fx &= fixed_scale - 1; - Q_ASSERT((fx >> 16) == 0); - while (b < end) { - int x1 = (fx >> 16); - int x2 = x1 + 1; - Q_ASSERT(x1 >= 0); - Q_ASSERT(x2 < count); + intermediate_buffer[0][f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + intermediate_buffer[1][f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + x++; + } + // Now interpolate the values from the intermediate_buffer to get the final result. + fx &= fixed_scale - 1; + Q_ASSERT((fx >> 16) == 0); + while (b < end) { + int x1 = (fx >> 16); + int x2 = x1 + 1; + Q_ASSERT(x1 >= 0); + Q_ASSERT(x2 < count); + + int distx = (fx & 0x0000ffff) >> 8; + int idistx = 256 - distx; + int rb = ((intermediate_buffer[0][x1] * idistx + intermediate_buffer[0][x2] * distx) >> 8) & 0xff00ff; + int ag = (intermediate_buffer[1][x1] * idistx + intermediate_buffer[1][x2] * distx) & 0xff00ff00; + *b = rb | ag; + b++; + fx += fdx; + } +} - int distx = (fx & 0x0000ffff) >> 8; - int idistx = 256 - distx; - int rb = ((intermediate_buffer[0][x1] * idistx + intermediate_buffer[0][x2] * distx) >> 8) & 0xff00ff; - int ag = (intermediate_buffer[1][x1] * idistx + intermediate_buffer[1][x2] * distx) & 0xff00ff00; - *b = rb | ag; - b++; - fx += fdx; - } - } else if ((fdx < 0 && fdx > -(fixed_scale / 8)) || std::abs(data->m22) < (1./8.)) { // scale up more than 8x - int y1 = (fy >> 16); - int y2; - fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); - const uint *s1 = (const uint *)data->texture.scanLine(y1); - const uint *s2 = (const uint *)data->texture.scanLine(y2); - int disty = (fy & 0x0000ffff) >> 8; - while (b < end) { - int x1 = (fx >> 16); - int x2; - fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); - uint tl = s1[x1]; - uint tr = s1[x2]; - uint bl = s2[x1]; - uint br = s2[x2]; - int distx = (fx & 0x0000ffff) >> 8; - *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); +template<TextureBlendType blendType> +static void QT_FASTCALL fetchTransformedBilinearARGB32PM_upscale_helper(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/) +{ + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2); + const uint *s1 = (const uint *)image.scanLine(y1); + const uint *s2 = (const uint *)image.scanLine(y2); + const int disty = (fy & 0x0000ffff) >> 8; + + if (blendType != BlendTransformedBilinearTiled) { + const qint64 min_fx = qint64(image.x1) * fixed_scale; + const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale; + while (b < end) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); + if (x1 != x2) + break; + uint top = s1[x1]; + uint bot = s2[x1]; + *b = INTERPOLATE_PIXEL_256(top, 256 - disty, bot, disty); + fx += fdx; + ++b; + } + uint *boundedEnd = end; + if (fdx > 0) + boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx); + else if (fdx < 0) + boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx); + + // A fast middle part without boundary checks + while (b < boundedEnd) { + int x = (fx >> 16); + int distx = (fx & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty); + fx += fdx; + ++b; + } + } - fx += fdx; - ++b; - } - } else { //scale down - int y1 = (fy >> 16); - int y2; - fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); - const uint *s1 = (const uint *)data->texture.scanLine(y1); - const uint *s2 = (const uint *)data->texture.scanLine(y2); - const int disty8 = (fy & 0x0000ffff) >> 8; - const int disty4 = (disty8 + 0x08) >> 4; - - if (blendType != BlendTransformedBilinearTiled) { -#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \ - const qint64 min_fx = qint64(image_x1) * fixed_scale; \ - const qint64 max_fx = qint64(image_x2) * fixed_scale; \ - while (b < end) { \ - int x1 = (fx >> 16); \ - int x2; \ - fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); \ - if (x1 != x2) \ - break; \ - uint top = s1[x1]; \ - uint bot = s2[x1]; \ - *b = INTERPOLATE_PIXEL_256(top, 256 - disty8, bot, disty8); \ - fx += fdx; \ - ++b; \ - } \ - uint *boundedEnd = end; \ - if (fdx > 0) \ - boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx); \ - else if (fdx < 0) \ - boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx); \ - boundedEnd -= 3; + while (b < end) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1 , x1, x2); + uint tl = s1[x1]; + uint tr = s1[x2]; + uint bl = s2[x1]; + uint br = s2[x2]; + int distx = (fx & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); + + fx += fdx; + ++b; + } +} +template<TextureBlendType blendType> +static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/) +{ + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2); + const uint *s1 = (const uint *)image.scanLine(y1); + const uint *s2 = (const uint *)image.scanLine(y2); + const int disty8 = (fy & 0x0000ffff) >> 8; + const int disty4 = (disty8 + 0x08) >> 4; + + if (blendType != BlendTransformedBilinearTiled) { + const qint64 min_fx = qint64(image.x1) * fixed_scale; + const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale; + while (b < end) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); + if (x1 != x2) + break; + uint top = s1[x1]; + uint bot = s2[x1]; + *b = INTERPOLATE_PIXEL_256(top, 256 - disty8, bot, disty8); + fx += fdx; + ++b; + } + uint *boundedEnd = end; + if (fdx > 0) + boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx); + else if (fdx < 0) + boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx); + // A fast middle part without boundary checks #if defined(__SSE2__) - BILINEAR_DOWNSCALE_BOUNDS_PROLOG - - const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); - const __m128i v_256 = _mm_set1_epi16(256); - const __m128i v_disty = _mm_set1_epi16(disty4); - const __m128i v_fdx = _mm_set1_epi32(fdx*4); - const __m128i v_fx_r = _mm_set1_epi32(0x8); - __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); - - while (b < boundedEnd) { - __m128i offset = _mm_srli_epi32(v_fx, 16); - const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); - const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); - const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); - const int offset3 = _mm_cvtsi128_si32(offset); - const __m128i tl = _mm_setr_epi32(s1[offset0], s1[offset1], s1[offset2], s1[offset3]); - const __m128i tr = _mm_setr_epi32(s1[offset0 + 1], s1[offset1 + 1], s1[offset2 + 1], s1[offset3 + 1]); - const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]); - const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]); - - __m128i v_distx = _mm_srli_epi16(v_fx, 8); - v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4); - v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); - v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); - - interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b); - b += 4; - v_fx = _mm_add_epi32(v_fx, v_fdx); - } - fx = _mm_cvtsi128_si32(v_fx); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i v_256 = _mm_set1_epi16(256); + const __m128i v_disty = _mm_set1_epi16(disty4); + const __m128i v_fdx = _mm_set1_epi32(fdx*4); + const __m128i v_fx_r = _mm_set1_epi32(0x8); + __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); + + while (b < boundedEnd - 3) { + __m128i offset = _mm_srli_epi32(v_fx, 16); + const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset3 = _mm_cvtsi128_si32(offset); + const __m128i tl = _mm_setr_epi32(s1[offset0], s1[offset1], s1[offset2], s1[offset3]); + const __m128i tr = _mm_setr_epi32(s1[offset0 + 1], s1[offset1 + 1], s1[offset2 + 1], s1[offset3 + 1]); + const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]); + const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]); + + __m128i v_distx = _mm_srli_epi16(v_fx, 8); + v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4); + v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); + v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); + + interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b); + b += 4; + v_fx = _mm_add_epi32(v_fx, v_fdx); + } + fx = _mm_cvtsi128_si32(v_fx); #elif defined(__ARM_NEON__) - BILINEAR_DOWNSCALE_BOUNDS_PROLOG - - const int16x8_t colorMask = vdupq_n_s16(0x00ff); - const int16x8_t invColorMask = vmvnq_s16(colorMask); - const int16x8_t v_256 = vdupq_n_s16(256); - const int16x8_t v_disty = vdupq_n_s16(disty4); - const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4); - int32x4_t v_fdx = vdupq_n_s32(fdx*4); + const int16x8_t colorMask = vdupq_n_s16(0x00ff); + const int16x8_t invColorMask = vmvnq_s16(colorMask); + const int16x8_t v_256 = vdupq_n_s16(256); + const int16x8_t v_disty = vdupq_n_s16(disty4); + const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4); + int32x4_t v_fdx = vdupq_n_s32(fdx*4); - int32x4_t v_fx = vmovq_n_s32(fx); - v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1); - v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2); - v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3); + int32x4_t v_fx = vmovq_n_s32(fx); + v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1); + v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2); + v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3); - const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff); - const int32x4_t v_fx_r = vdupq_n_s32(0x0800); + const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff); + const int32x4_t v_fx_r = vdupq_n_s32(0x0800); - while (b < boundedEnd) { - uint32x4x2_t v_top, v_bot; + while (b < boundedEnd - 3) { + uint32x4x2_t v_top, v_bot; - int x1 = (fx >> 16); - fx += fdx; - v_top = vld2q_lane_u32(s1 + x1, v_top, 0); - v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0); - x1 = (fx >> 16); - fx += fdx; - v_top = vld2q_lane_u32(s1 + x1, v_top, 1); - v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1); - x1 = (fx >> 16); - fx += fdx; - v_top = vld2q_lane_u32(s1 + x1, v_top, 2); - v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2); - x1 = (fx >> 16); - fx += fdx; - v_top = vld2q_lane_u32(s1 + x1, v_top, 3); - v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3); - - int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), 12); - v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16)); - - interpolate_4_pixels_16_neon( - vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]), - vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]), - vreinterpretq_s16_s32(v_distx), v_disty, v_disty_, - colorMask, invColorMask, v_256, b); - b+=4; - v_fx = vaddq_s32(v_fx, v_fdx); - } + int x1 = (fx >> 16); + fx += fdx; + v_top = vld2q_lane_u32(s1 + x1, v_top, 0); + v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0); + x1 = (fx >> 16); + fx += fdx; + v_top = vld2q_lane_u32(s1 + x1, v_top, 1); + v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1); + x1 = (fx >> 16); + fx += fdx; + v_top = vld2q_lane_u32(s1 + x1, v_top, 2); + v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2); + x1 = (fx >> 16); + fx += fdx; + v_top = vld2q_lane_u32(s1 + x1, v_top, 3); + v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3); + + int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), 12); + v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16)); + + interpolate_4_pixels_16_neon( + vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]), + vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]), + vreinterpretq_s16_s32(v_distx), v_disty, v_disty_, + colorMask, invColorMask, v_256, b); + b+=4; + v_fx = vaddq_s32(v_fx, v_fdx); + } #endif - } + while (b < boundedEnd) { + int x = (fx >> 16); +#if defined(__SSE2__) || defined(__ARM_NEON__) + int distx8 = (fx & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(s1 + x, s2 + x, distx8, disty8); +#else + uint tl = s1[x]; + uint tr = s1[x + 1]; + uint bl = s2[x]; + uint br = s2[x + 1]; + int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4); +#endif + fx += fdx; + ++b; + } + } - while (b < end) { - int x1 = (fx >> 16); - int x2; - fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); - uint tl = s1[x1]; - uint tr = s1[x2]; - uint bl = s2[x1]; - uint br = s2[x2]; + while (b < end) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); + uint tl = s1[x1]; + uint tr = s1[x2]; + uint bl = s2[x1]; + uint br = s2[x2]; #if defined(__SSE2__) || defined(__ARM_NEON__) - // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16. - int distx8 = (fx & 0x0000ffff) >> 8; - *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8); + // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16. + int distx8 = (fx & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8); #else - int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4); + int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4); #endif - fx += fdx; - ++b; - } - } - } else { //rotation - if (std::abs(data->m11) < (1./8.) || std::abs(data->m22) < (1./8.)) { - //if we are zooming more than 8 times, we use 8bit precision for the position. - while (b < end) { - int x1 = (fx >> 16); - int x2; - int y1 = (fy >> 16); - int y2; + fx += fdx; + ++b; + } +} - fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); - fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); +template<TextureBlendType blendType> +static void QT_FASTCALL fetchTransformedBilinearARGB32PM_rotate_helper(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int fdy) +{ + // if we are zooming more than 8 times, we use 8bit precision for the position. + while (b < end) { + int x1 = (fx >> 16); + int x2; + int y1 = (fy >> 16); + int y2; - const uint *s1 = (const uint *)data->texture.scanLine(y1); - const uint *s2 = (const uint *)data->texture.scanLine(y2); + fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2); - uint tl = s1[x1]; - uint tr = s1[x2]; - uint bl = s2[x1]; - uint br = s2[x2]; + const uint *s1 = (const uint *)image.scanLine(y1); + const uint *s2 = (const uint *)image.scanLine(y2); - int distx = (fx & 0x0000ffff) >> 8; - int disty = (fy & 0x0000ffff) >> 8; + uint tl = s1[x1]; + uint tr = s1[x2]; + uint bl = s2[x1]; + uint br = s2[x2]; - *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; - fx += fdx; - fy += fdy; - ++b; - } - } else { - //we are zooming less than 8x, use 4bit precision - - if (blendType != BlendTransformedBilinearTiled) { -#define BILINEAR_ROTATE_BOUNDS_PROLOG \ - const qint64 min_fx = qint64(image_x1) * fixed_scale; \ - const qint64 max_fx = qint64(image_x2) * fixed_scale; \ - const qint64 min_fy = qint64(image_y1) * fixed_scale; \ - const qint64 max_fy = qint64(image_y2) * fixed_scale; \ - while (b < end) { \ - int x1 = (fx >> 16); \ - int x2; \ - int y1 = (fy >> 16); \ - int y2; \ - fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); \ - fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); \ - if (x1 != x2 && y1 != y2) \ - break; \ - const uint *s1 = (const uint *)data->texture.scanLine(y1); \ - const uint *s2 = (const uint *)data->texture.scanLine(y2); \ - uint tl = s1[x1]; \ - uint tr = s1[x2]; \ - uint bl = s2[x1]; \ - uint br = s2[x2]; \ - int distx = (fx & 0x0000ffff) >> 8; \ - int disty = (fy & 0x0000ffff) >> 8; \ - *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); \ - fx += fdx; \ - fy += fdy; \ - ++b; \ - } \ - uint *boundedEnd = end; \ - if (fdx > 0) \ - boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx); \ - else if (fdx < 0) \ - boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx); \ - if (fdy > 0) \ - boundedEnd = qMin(boundedEnd, b + (max_fy - fy) / fdy); \ - else if (fdy < 0) \ - boundedEnd = qMin(boundedEnd, b + (min_fy - fy) / fdy); \ - boundedEnd -= 3; + *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); -#if defined(__SSE2__) - BILINEAR_ROTATE_BOUNDS_PROLOG + fx += fdx; + fy += fdy; + ++b; + } +} - const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); - const __m128i v_256 = _mm_set1_epi16(256); - const __m128i v_fdx = _mm_set1_epi32(fdx*4); - const __m128i v_fdy = _mm_set1_epi32(fdy*4); - const __m128i v_fxy_r = _mm_set1_epi32(0x8); - __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); - __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy); +template<TextureBlendType blendType> +static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int fdy) +{ + //we are zooming less than 8x, use 4bit precision + if (blendType != BlendTransformedBilinearTiled) { + const qint64 min_fx = qint64(image.x1) * fixed_scale; + const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale; + const qint64 min_fy = qint64(image.y1) * fixed_scale; + const qint64 max_fy = qint64(image.y2 - 1) * fixed_scale; + // first handle the possibly bounded part in the beginning + while (b < end) { + int x1 = (fx >> 16); + int x2; + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2); + if (x1 != x2 && y1 != y2) + break; + const uint *s1 = (const uint *)image.scanLine(y1); + const uint *s2 = (const uint *)image.scanLine(y2); + uint tl = s1[x1]; + uint tr = s1[x2]; + uint bl = s2[x1]; + uint br = s2[x2]; +#if defined(__SSE2__) || defined(__ARM_NEON__) + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); +#else + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; + int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); +#endif + fx += fdx; + fy += fdy; + ++b; + } + uint *boundedEnd = end; \ + if (fdx > 0) \ + boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx); \ + else if (fdx < 0) \ + boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx); \ + if (fdy > 0) \ + boundedEnd = qMin(boundedEnd, b + (max_fy - fy) / fdy); \ + else if (fdy < 0) \ + boundedEnd = qMin(boundedEnd, b + (min_fy - fy) / fdy); \ + + // until boundedEnd we can now have a fast middle part without boundary checks +#if defined(__SSE2__) + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i v_256 = _mm_set1_epi16(256); + const __m128i v_fdx = _mm_set1_epi32(fdx*4); + const __m128i v_fdy = _mm_set1_epi32(fdy*4); + const __m128i v_fxy_r = _mm_set1_epi32(0x8); + __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); + __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy); + + const uchar *textureData = image.imageData; + const int bytesPerLine = image.bytesPerLine; + const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0)); + + while (b < boundedEnd - 3) { + const __m128i vy = _mm_packs_epi32(_mm_srli_epi32(v_fy, 16), _mm_setzero_si128()); + // 4x16bit * 4x16bit -> 4x32bit + __m128i offset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epi16(vy, vbpl)); + offset = _mm_add_epi32(offset, _mm_srli_epi32(v_fx, 16)); + const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset3 = _mm_cvtsi128_si32(offset); + const uint *topData = (const uint *)(textureData); + const __m128i tl = _mm_setr_epi32(topData[offset0], topData[offset1], topData[offset2], topData[offset3]); + const __m128i tr = _mm_setr_epi32(topData[offset0 + 1], topData[offset1 + 1], topData[offset2 + 1], topData[offset3 + 1]); + const uint *bottomData = (const uint *)(textureData + bytesPerLine); + const __m128i bl = _mm_setr_epi32(bottomData[offset0], bottomData[offset1], bottomData[offset2], bottomData[offset3]); + const __m128i br = _mm_setr_epi32(bottomData[offset0 + 1], bottomData[offset1 + 1], bottomData[offset2 + 1], bottomData[offset3 + 1]); + + __m128i v_distx = _mm_srli_epi16(v_fx, 8); + __m128i v_disty = _mm_srli_epi16(v_fy, 8); + v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fxy_r), 4); + v_disty = _mm_srli_epi16(_mm_add_epi32(v_disty, v_fxy_r), 4); + v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); + v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); + v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0)); + v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0)); + + interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b); + b += 4; + v_fx = _mm_add_epi32(v_fx, v_fdx); + v_fy = _mm_add_epi32(v_fy, v_fdy); + } + fx = _mm_cvtsi128_si32(v_fx); + fy = _mm_cvtsi128_si32(v_fy); +#elif defined(__ARM_NEON__) + const int16x8_t colorMask = vdupq_n_s16(0x00ff); + const int16x8_t invColorMask = vmvnq_s16(colorMask); + const int16x8_t v_256 = vdupq_n_s16(256); + int32x4_t v_fdx = vdupq_n_s32(fdx * 4); + int32x4_t v_fdy = vdupq_n_s32(fdy * 4); + + const uchar *textureData = image.imageData; + const int bytesPerLine = image.bytesPerLine; + + int32x4_t v_fx = vmovq_n_s32(fx); + int32x4_t v_fy = vmovq_n_s32(fy); + v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1); + v_fy = vsetq_lane_s32(fy + fdy, v_fy, 1); + v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2); + v_fy = vsetq_lane_s32(fy + fdy * 2, v_fy, 2); + v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3); + v_fy = vsetq_lane_s32(fy + fdy * 3, v_fy, 3); + + const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff); + const int32x4_t v_round = vdupq_n_s32(0x0800); + + while (b < boundedEnd - 3) { + uint32x4x2_t v_top, v_bot; + + int x1 = (fx >> 16); + int y1 = (fy >> 16); + fx += fdx; fy += fdy; + const uchar *sl = textureData + bytesPerLine * y1; + const uint *s1 = reinterpret_cast<const uint *>(sl); + const uint *s2 = reinterpret_cast<const uint *>(sl + bytesPerLine); + v_top = vld2q_lane_u32(s1 + x1, v_top, 0); + v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0); + x1 = (fx >> 16); + y1 = (fy >> 16); + fx += fdx; fy += fdy; + sl = textureData + bytesPerLine * y1; + s1 = reinterpret_cast<const uint *>(sl); + s2 = reinterpret_cast<const uint *>(sl + bytesPerLine); + v_top = vld2q_lane_u32(s1 + x1, v_top, 1); + v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1); + x1 = (fx >> 16); + y1 = (fy >> 16); + fx += fdx; fy += fdy; + sl = textureData + bytesPerLine * y1; + s1 = reinterpret_cast<const uint *>(sl); + s2 = reinterpret_cast<const uint *>(sl + bytesPerLine); + v_top = vld2q_lane_u32(s1 + x1, v_top, 2); + v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2); + x1 = (fx >> 16); + y1 = (fy >> 16); + fx += fdx; fy += fdy; + sl = textureData + bytesPerLine * y1; + s1 = reinterpret_cast<const uint *>(sl); + s2 = reinterpret_cast<const uint *>(sl + bytesPerLine); + v_top = vld2q_lane_u32(s1 + x1, v_top, 3); + v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3); + + int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), 12); + int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), 12); + v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16)); + v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16)); + int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), 4); + + interpolate_4_pixels_16_neon( + vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]), + vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]), + vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty), + v_disty_, colorMask, invColorMask, v_256, b); + b += 4; + v_fx = vaddq_s32(v_fx, v_fdx); + v_fy = vaddq_s32(v_fy, v_fdy); + } +#endif + while (b < boundedEnd) { + int x = (fx >> 16); + int y = (fy >> 16); - const uchar *textureData = data->texture.imageData; - const int bytesPerLine = data->texture.bytesPerLine; - const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0)); + const uint *s1 = (const uint *)image.scanLine(y); + const uint *s2 = (const uint *)image.scanLine(y + 1); - while (b < boundedEnd) { - const __m128i vy = _mm_packs_epi32(_mm_srli_epi32(v_fy, 16), _mm_setzero_si128()); - // 4x16bit * 4x16bit -> 4x32bit - __m128i offset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epi16(vy, vbpl)); - offset = _mm_add_epi32(offset, _mm_srli_epi32(v_fx, 16)); - const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); - const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); - const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); - const int offset3 = _mm_cvtsi128_si32(offset); - const uint *topData = (const uint *)(textureData); - const __m128i tl = _mm_setr_epi32(topData[offset0], topData[offset1], topData[offset2], topData[offset3]); - const __m128i tr = _mm_setr_epi32(topData[offset0 + 1], topData[offset1 + 1], topData[offset2 + 1], topData[offset3 + 1]); - const uint *bottomData = (const uint *)(textureData + bytesPerLine); - const __m128i bl = _mm_setr_epi32(bottomData[offset0], bottomData[offset1], bottomData[offset2], bottomData[offset3]); - const __m128i br = _mm_setr_epi32(bottomData[offset0 + 1], bottomData[offset1 + 1], bottomData[offset2 + 1], bottomData[offset3 + 1]); - - __m128i v_distx = _mm_srli_epi16(v_fx, 8); - __m128i v_disty = _mm_srli_epi16(v_fy, 8); - v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fxy_r), 4); - v_disty = _mm_srli_epi16(_mm_add_epi32(v_disty, v_fxy_r), 4); - v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); - v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); - v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0)); - v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0)); - - interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b); - b += 4; - v_fx = _mm_add_epi32(v_fx, v_fdx); - v_fy = _mm_add_epi32(v_fy, v_fdy); - } - fx = _mm_cvtsi128_si32(v_fx); - fy = _mm_cvtsi128_si32(v_fy); -#elif defined(__ARM_NEON__) - BILINEAR_ROTATE_BOUNDS_PROLOG +#if defined(__SSE2__) || defined(__ARM_NEON__) + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty); +#else + uint tl = s1[x]; + uint tr = s1[x + 1]; + uint bl = s2[x]; + uint br = s2[x + 1]; + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; + int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); +#endif - const int16x8_t colorMask = vdupq_n_s16(0x00ff); - const int16x8_t invColorMask = vmvnq_s16(colorMask); - const int16x8_t v_256 = vdupq_n_s16(256); - int32x4_t v_fdx = vdupq_n_s32(fdx * 4); - int32x4_t v_fdy = vdupq_n_s32(fdy * 4); + fx += fdx; + fy += fdy; + ++b; + } + } - const uchar *textureData = data->texture.imageData; - const int bytesPerLine = data->texture.bytesPerLine; + while (b < end) { + int x1 = (fx >> 16); + int x2; + int y1 = (fy >> 16); + int y2; - int32x4_t v_fx = vmovq_n_s32(fx); - int32x4_t v_fy = vmovq_n_s32(fy); - v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1); - v_fy = vsetq_lane_s32(fy + fdy, v_fy, 1); - v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2); - v_fy = vsetq_lane_s32(fy + fdy * 2, v_fy, 2); - v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3); - v_fy = vsetq_lane_s32(fy + fdy * 3, v_fy, 3); + fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2); - const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff); - const int32x4_t v_round = vdupq_n_s32(0x0800); + const uint *s1 = (const uint *)image.scanLine(y1); + const uint *s2 = (const uint *)image.scanLine(y2); - while (b < boundedEnd) { - uint32x4x2_t v_top, v_bot; + uint tl = s1[x1]; + uint tr = s1[x2]; + uint bl = s2[x1]; + uint br = s2[x2]; - int x1 = (fx >> 16); - int y1 = (fy >> 16); - fx += fdx; fy += fdy; - const uchar *sl = textureData + bytesPerLine * y1; - const uint *s1 = reinterpret_cast<const uint *>(sl); - const uint *s2 = reinterpret_cast<const uint *>(sl + bytesPerLine); - v_top = vld2q_lane_u32(s1 + x1, v_top, 0); - v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0); - x1 = (fx >> 16); - y1 = (fy >> 16); - fx += fdx; fy += fdy; - sl = textureData + bytesPerLine * y1; - s1 = reinterpret_cast<const uint *>(sl); - s2 = reinterpret_cast<const uint *>(sl + bytesPerLine); - v_top = vld2q_lane_u32(s1 + x1, v_top, 1); - v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1); - x1 = (fx >> 16); - y1 = (fy >> 16); - fx += fdx; fy += fdy; - sl = textureData + bytesPerLine * y1; - s1 = reinterpret_cast<const uint *>(sl); - s2 = reinterpret_cast<const uint *>(sl + bytesPerLine); - v_top = vld2q_lane_u32(s1 + x1, v_top, 2); - v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2); - x1 = (fx >> 16); - y1 = (fy >> 16); - fx += fdx; fy += fdy; - sl = textureData + bytesPerLine * y1; - s1 = reinterpret_cast<const uint *>(sl); - s2 = reinterpret_cast<const uint *>(sl + bytesPerLine); - v_top = vld2q_lane_u32(s1 + x1, v_top, 3); - v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3); - - int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), 12); - int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), 12); - v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16)); - v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16)); - int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), 4); - - interpolate_4_pixels_16_neon( - vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]), - vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]), - vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty), - v_disty_, colorMask, invColorMask, v_256, b); - b += 4; - v_fx = vaddq_s32(v_fx, v_fdx); - v_fy = vaddq_s32(v_fy, v_fdy); - } +#if defined(__SSE2__) || defined(__ARM_NEON__) + // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16. + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); +#else + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; + int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); #endif - } - while (b < end) { - int x1 = (fx >> 16); - int x2; - int y1 = (fy >> 16); - int y2; + fx += fdx; + fy += fdy; + ++b; + } +} - fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); - fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); - const uint *s1 = (const uint *)data->texture.scanLine(y1); - const uint *s2 = (const uint *)data->texture.scanLine(y2); +static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[2][NFastTransformTypes] = { + { + fetchTransformedBilinearARGB32PM_simple_upscale_helper<BlendTransformedBilinear>, + fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>, + fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>, + fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>, + fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear> + }, + { + fetchTransformedBilinearARGB32PM_simple_upscale_helper<BlendTransformedBilinearTiled>, + fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>, + fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>, + fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>, + fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinearTiled> + } +}; - uint tl = s1[x1]; - uint tr = s1[x2]; - uint bl = s2[x1]; - uint br = s2[x2]; +template<TextureBlendType blendType> /* blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled */ +static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, const Operator *, + const QSpanData *data, int y, int x, + int length) +{ + const qreal cx = x + qreal(0.5); + const qreal cy = y + qreal(0.5); + Q_CONSTEXPR int tiled = (blendType == BlendTransformedBilinearTiled) ? 1 : 0; -#if defined(__SSE2__) || defined(__ARM_NEON__) - // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16. - int distx = (fx & 0x0000ffff) >> 8; - int disty = (fy & 0x0000ffff) >> 8; - *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); -#else - int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; - int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); -#endif + uint *end = buffer + length; + uint *b = buffer; + if (data->fast_matrix) { + // The increment pr x in the scanline + int fdx = (int)(data->m11 * fixed_scale); + int fdy = (int)(data->m12 * fixed_scale); - fx += fdx; - fy += fdy; - ++b; - } + int fx = int((data->m21 * cy + + data->m11 * cx + data->dx) * fixed_scale); + int fy = int((data->m22 * cy + + data->m12 * cx + data->dy) * fixed_scale); + + fx -= half_point; + fy -= half_point; + + if (fdy == 0) { // simple scale, no rotation or shear + if (fdx <= fixed_scale && fdx > 0) { + // simple scale up on X without mirroring + bilinearFastTransformHelperARGB32PM[tiled][SimpleUpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy); + } else if ((fdx < 0 && fdx > -(fixed_scale / 8)) || qAbs(data->m22) < qreal(1./8.)) { + // scale up more than 8x (on either Y or on X mirrored) + bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy); + } else { + // scale down on X (or up on X mirrored less than 8x) + bilinearFastTransformHelperARGB32PM[tiled][DownscaleTransform](b, end, data->texture, fx, fy, fdx, fdy); + } + } else { // rotation or shear + if (qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.) ) { + // if we are zooming more than 8 times, we use 8bit precision for the position. + bilinearFastTransformHelperARGB32PM[tiled][RotateTransform](b, end, data->texture, fx, fy, fdx, fdy); + } else { + // we are zooming less than 8x, use 4bit precision + bilinearFastTransformHelperARGB32PM[tiled][FastRotateTransform](b, end, data->texture, fx, fy, fdx, fdy); } } } else { + const QTextureData &image = data->texture; + const qreal fdx = data->m11; const qreal fdy = data->m12; const qreal fdw = data->m13; @@ -2491,8 +2630,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c int distx = int((px - x1) * 256); int disty = int((py - y1) * 256); - fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); - fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); + fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2); const uint *s1 = (const uint *)data->texture.scanLine(y1); const uint *s2 = (const uint *)data->texture.scanLine(y2); @@ -2674,7 +2813,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); - if ((fdx < 0 && fdx > -(fixed_scale / 8)) || std::abs(data->m22) < (1./8.)) { // scale up more than 8x + if ((fdx < 0 && fdx > -(fixed_scale / 8)) || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x int disty = (fy & 0x0000ffff) >> 8; for (int i = 0; i < len; ++i) { int distx = (fracX & 0x0000ffff) >> 8; @@ -2726,7 +2865,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); - if (std::abs(data->m11) < (1./8.) || std::abs(data->m22) < (1./8.)) { + if (qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.) ) { //if we are zooming more than 8 times, we use 8bit precision for the position. for (int i = 0; i < len; ++i) { int distx = (fracX & 0x0000ffff) >> 8; @@ -3603,27 +3742,23 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in op.destFetch = destFetchProc[data->rasterBuffer->format]; op.destFetch64 = destFetchProc64[data->rasterBuffer->format]; - if (op.mode == QPainter::CompositionMode_Source) { - switch (data->rasterBuffer->format) { - case QImage::Format_RGB32: - case QImage::Format_ARGB32_Premultiplied: - // don't clear destFetch as it sets up the pointer correctly to save one copy - break; - default: { - if (data->type == QSpanData::Texture && data->texture.const_alpha != 256) + if (op.mode == QPainter::CompositionMode_Source && + (data->type != QSpanData::Texture || data->texture.const_alpha == 256)) { + const QSpan *lastSpan = spans + spanCount; + bool alphaSpans = false; + while (spans < lastSpan) { + if (spans->coverage != 255) { + alphaSpans = true; break; - const QSpan *lastSpan = spans + spanCount; - bool alphaSpans = false; - while (spans < lastSpan) { - if (spans->coverage != 255) { - alphaSpans = true; - break; - } - ++spans; } - if (!alphaSpans) - op.destFetch = 0; + ++spans; } + if (!alphaSpans) { + // If all spans are opaque we do not need to fetch dest. + // But don't clear passthrough destFetch as they are just as fast and save destStore. + if (op.destFetch != destFetchARGB32P) + op.destFetch = 0; + op.destFetch64 = destFetch64Undefined; } } @@ -5191,6 +5326,8 @@ void qBlendTexture(int count, const QSpan *spans, void *userData) case QImage::Format_RGB16: proc = processTextureSpansRGB16[blendType]; break; + case QImage::Format_ARGB32: + case QImage::Format_RGBA8888: case QImage::Format_BGR30: case QImage::Format_A2BGR30_Premultiplied: case QImage::Format_RGB30: @@ -5403,134 +5540,200 @@ inline static void qt_bitmapblit_quint16(QRasterBuffer *rasterBuffer, map, mapWidth, mapHeight, mapStride); } -static void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer, +static inline void alphamapblend_generic(int coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorProfile *colorProfile) +{ + if (coverage == 0) { + // nothing + } else if (coverage == 255) { + dest[x] = src; + } else { + QRgba64 dstColor = dest[x]; + if (colorProfile) { + if (dstColor.isOpaque()) + dstColor = colorProfile->toLinear(dstColor); + else if (!dstColor.isTransparent()) + dstColor = colorProfile->toLinear(dstColor.unpremultiplied()).premultiplied(); + } + + dstColor = interpolate255(srcLinear, coverage, dstColor, 255 - coverage); + if (colorProfile) { + if (dstColor.isOpaque()) + dstColor = colorProfile->fromLinear(dstColor); + else if (!dstColor.isTransparent()) + dstColor = colorProfile->fromLinear(dstColor.unpremultiplied()).premultiplied(); + } + dest[x] = dstColor; + } +} + +static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 &color, const uchar *map, int mapWidth, int mapHeight, int mapStride, - const QClipData *) + const QClipData *clip, bool useGammaCorrection) { - const quint16 c = color.toRgb16(); - quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x; - const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16); + if (color.isTransparent()) + return; - while (mapHeight--) { - for (int i = 0; i < mapWidth; ++i) { - const int coverage = map[i]; + const QColorProfile *colorProfile = nullptr; - if (coverage == 0) { - // nothing - } else if (coverage == 255) { - dest[i] = c; - } else { - int ialpha = 255 - coverage; - dest[i] = BYTE_MUL_RGB16(c, coverage) - + BYTE_MUL_RGB16(dest[i], ialpha); + if (useGammaCorrection) + colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text(); + + QRgba64 srcColor = color; + if (colorProfile) { + if (color.isOpaque()) + srcColor = colorProfile->toLinear(srcColor); + else + srcColor = colorProfile->toLinear(srcColor.unpremultiplied()).premultiplied(); + } + + quint64 buffer[buffer_size]; + const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format]; + const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format]; + + if (!clip) { + for (int ly = 0; ly < mapHeight; ++ly) { + int i = x; + int length = mapWidth; + while (length > 0) { + int l = qMin(buffer_size, length); + QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, i, y + ly, l); + for (int j=0; j < l; ++j) { + const int coverage = map[j + (i - x)]; + alphamapblend_generic(coverage, dest, j, srcColor, color, colorProfile); + } + destStore64(rasterBuffer, i, y + ly, dest, l); + length -= l; + i += l; } + map += mapStride; } - dest += destStride; - map += mapStride; - } -} + } else { + int bottom = qMin(y + mapHeight, rasterBuffer->height()); -static inline void rgbBlendPixel(quint32 *dst, int coverage, int sr, int sg, int sb, const uchar *gamma, const uchar *invgamma) -{ - // Do a gray alphablend... - int da = qAlpha(*dst); - int dr = qRed(*dst); - int dg = qGreen(*dst); - int db = qBlue(*dst); + int top = qMax(y, 0); + map += (top - y) * mapStride; + + const_cast<QClipData *>(clip)->initialize(); + for (int yp = top; yp<bottom; ++yp) { + const QClipData::ClipLine &line = clip->m_clipLines[yp]; - if (da != 255 - ) { + for (int i=0; i<line.count; ++i) { + const QSpan &clip = line.spans[i]; - int a = qGray(coverage); - sr = qt_div_255(invgamma[sr] * a); - sg = qt_div_255(invgamma[sg] * a); - sb = qt_div_255(invgamma[sb] * a); + int start = qMax<int>(x, clip.x); + int end = qMin<int>(x + mapWidth, clip.x + clip.len); + Q_ASSERT(clip.len <= buffer_size); + QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, start, clip.y, clip.len); - int ia = 255 - a; - dr = qt_div_255(dr * ia); - dg = qt_div_255(dg * ia); - db = qt_div_255(db * ia); + for (int xp=start; xp<end; ++xp) { + const int coverage = map[xp - x]; + alphamapblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile); + } + destStore64(rasterBuffer, start, clip.y, dest, clip.len); + } // for (i -> line.count) + map += mapStride; + } // for (yp -> bottom) + } +} - *dst = ((a + qt_div_255((255 - a) * da)) << 24) - | ((sr + dr) << 16) - | ((sg + dg) << 8) - | ((sb + db)); +static inline void alphamapblend_quint16(int coverage, quint16 *dest, int x, const quint16 srcColor) +{ + if (coverage == 0) { + // nothing + } else if (coverage == 255) { + dest[x] = srcColor; + } else { + dest[x] = BYTE_MUL_RGB16(srcColor, coverage) + + BYTE_MUL_RGB16(dest[x], 255 - coverage); + } +} + +void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer, + int x, int y, const QRgba64 &color, + const uchar *map, + int mapWidth, int mapHeight, int mapStride, + const QClipData *clip, bool useGammaCorrection) +{ + if (useGammaCorrection) { + qt_alphamapblit_generic(rasterBuffer, x, y, color, map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection); return; } - int mr = qRed(coverage); - int mg = qGreen(coverage); - int mb = qBlue(coverage); + const quint16 c = color.toRgb16(); - dr = gamma[dr]; - dg = gamma[dg]; - db = gamma[db]; + if (!clip) { + quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x; + const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16); + while (mapHeight--) { + for (int i = 0; i < mapWidth; ++i) + alphamapblend_quint16(map[i], dest, i, c); + dest += destStride; + map += mapStride; + } + } else { + int top = qMax(y, 0); + int bottom = qMin(y + mapHeight, rasterBuffer->height()); + map += (top - y) * mapStride; - int nr = qt_div_255(sr * mr + dr * (255 - mr)); - int ng = qt_div_255(sg * mg + dg * (255 - mg)); - int nb = qt_div_255(sb * mb + db * (255 - mb)); + const_cast<QClipData *>(clip)->initialize(); + for (int yp = top; yp<bottom; ++yp) { + const QClipData::ClipLine &line = clip->m_clipLines[yp]; - nr = invgamma[nr]; - ng = invgamma[ng]; - nb = invgamma[nb]; + quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(yp)); - *dst = qRgb(nr, ng, nb); -} + for (int i=0; i<line.count; ++i) { + const QSpan &clip = line.spans[i]; + + int start = qMax<int>(x, clip.x); + int end = qMin<int>(x + mapWidth, clip.x + clip.len); -#if defined(Q_OS_WIN) -Q_GUI_EXPORT bool qt_needs_a8_gamma_correction = false; + for (int xp=start; xp<end; ++xp) + alphamapblend_quint16(map[xp - x], dest, xp, c); + } // for (i -> line.count) + map += mapStride; + } // for (yp -> bottom) + } +} -static inline void grayBlendPixel(quint32 *dst, int coverage, int sr, int sg, int sb, const uint *gamma, const uchar *invgamma) +static inline void rgbBlendPixel(quint32 *dst, int coverage, QRgba64 slinear, const QColorProfile *colorProfile, bool useGammaCorrection) { - // Do a gammacorrected gray alphablend... - int dr = qRed(*dst); - int dg = qGreen(*dst); - int db = qBlue(*dst); + // Do a gammacorrected RGB alphablend... + const QRgba64 dlinear = useGammaCorrection ? colorProfile->toLinear64(*dst) : QRgba64::fromArgb32(*dst); - dr = gamma[dr]; - dg = gamma[dg]; - db = gamma[db]; + QRgba64 blend = rgbBlend(dlinear, slinear, coverage); - int alpha = coverage; - int ialpha = 255 - alpha; - int nr = qt_div_255(sr * alpha + dr * ialpha); - int ng = qt_div_255(sg * alpha + dg * ialpha); - int nb = qt_div_255(sb * alpha + db * ialpha); + *dst = useGammaCorrection ? colorProfile->fromLinear64(blend) : toArgb32(blend); +} + +static inline void grayBlendPixel(quint32 *dst, int coverage, QRgba64 slinear, const QColorProfile *colorProfile) +{ + // Do a gammacorrected gray alphablend... + const QRgba64 dlinear = colorProfile->toLinear64(*dst); - nr = invgamma[nr]; - ng = invgamma[ng]; - nb = invgamma[nb]; + QRgba64 blend = interpolate255(slinear, coverage, dlinear, 255 - coverage); - *dst = qRgb(nr, ng, nb); + *dst = colorProfile->fromLinear64(blend); } -#endif static void qt_alphamapblit_uint32(QRasterBuffer *rasterBuffer, int x, int y, quint32 color, const uchar *map, int mapWidth, int mapHeight, int mapStride, - const QClipData *clip) + const QClipData *clip, bool useGammaCorrection) { const quint32 c = color; const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32); -#if defined(Q_OS_WIN) - const QDrawHelperGammaTables *tables = QGuiApplicationPrivate::instance()->gammaTables(); - if (!tables) + const QColorProfile *colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text(); + if (!colorProfile) return; - const uint *gamma = tables->qt_pow_gamma; - const uchar *invgamma = tables->qt_pow_invgamma; - - int sr = gamma[qRed(color)]; - int sg = gamma[qGreen(color)]; - int sb = gamma[qBlue(color)]; + const QRgba64 slinear = colorProfile->toLinear64(c); bool opaque_src = (qAlpha(color) == 255); - bool doGrayBlendPixel = opaque_src && qt_needs_a8_gamma_correction; -#endif + bool doGrayBlendPixel = opaque_src && useGammaCorrection; if (!clip) { quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x; @@ -5543,13 +5746,9 @@ static void qt_alphamapblit_uint32(QRasterBuffer *rasterBuffer, } else if (coverage == 255) { dest[i] = c; } else { -#if defined(Q_OS_WIN) - if (QSysInfo::WindowsVersion >= QSysInfo::WV_XP && doGrayBlendPixel - && qAlpha(dest[i]) == 255) { - grayBlendPixel(dest+i, coverage, sr, sg, sb, gamma, invgamma); - } else -#endif - { + if (doGrayBlendPixel && qAlpha(dest[i]) == 255) { + grayBlendPixel(dest+i, coverage, slinear, colorProfile); + } else { int ialpha = 255 - coverage; dest[i] = INTERPOLATE_PIXEL_255(c, coverage, dest[i], ialpha); } @@ -5584,13 +5783,9 @@ static void qt_alphamapblit_uint32(QRasterBuffer *rasterBuffer, } else if (coverage == 255) { dest[xp] = c; } else { -#if defined(Q_OS_WIN) - if (QSysInfo::WindowsVersion >= QSysInfo::WV_XP && doGrayBlendPixel - && qAlpha(dest[xp]) == 255) { - grayBlendPixel(dest+xp, coverage, sr, sg, sb, gamma, invgamma); - } else -#endif - { + if (doGrayBlendPixel && qAlpha(dest[xp]) == 255) { + grayBlendPixel(dest+xp, coverage, slinear, colorProfile); + } else { int ialpha = 255 - coverage; dest[xp] = INTERPOLATE_PIXEL_255(c, coverage, dest[xp], ialpha); } @@ -5608,9 +5803,9 @@ static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 &color, const uchar *map, int mapWidth, int mapHeight, int mapStride, - const QClipData *clip) + const QClipData *clip, bool useGammaCorrection) { - qt_alphamapblit_uint32(rasterBuffer, x, y, color.toArgb32(), map, mapWidth, mapHeight, mapStride, clip); + qt_alphamapblit_uint32(rasterBuffer, x, y, color.toArgb32(), map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection); } #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN @@ -5618,38 +5813,132 @@ static void qt_alphamapblit_rgba8888(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 &color, const uchar *map, int mapWidth, int mapHeight, int mapStride, - const QClipData *clip) + const QClipData *clip, bool useGammaCorrection) { - qt_alphamapblit_uint32(rasterBuffer, x, y, ARGB2RGBA(color.toArgb32()), map, mapWidth, mapHeight, mapStride, clip); + qt_alphamapblit_uint32(rasterBuffer, x, y, ARGB2RGBA(color.toArgb32()), map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection); } #endif -static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer, - int x, int y, const QRgba64 &color, - const uint *src, int mapWidth, int mapHeight, int srcStride, - const QClipData *clip) +static inline int qRgbAvg(QRgb rgb) { - const quint32 c = color.toArgb32(); + return (qRed(rgb) * 5 + qGreen(rgb) * 6 + qBlue(rgb) * 5) / 16; +} - int sr = qRed(c); - int sg = qGreen(c); - int sb = qBlue(c); - int sa = qAlpha(c); +static inline void alphargbblend_generic(uint coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorProfile *colorProfile) +{ + if (coverage == 0xff000000) { + // nothing + } else if (coverage == 0xffffffff) { + dest[x] = src; + } else { + QRgba64 dstColor = dest[x]; + if (dstColor.isOpaque()) { + if (colorProfile) + dstColor = colorProfile->toLinear(dstColor); + dstColor = rgbBlend(dstColor, srcLinear, coverage); + if (colorProfile) + dstColor = colorProfile->fromLinear(dstColor); + dest[x] = dstColor; + } else { + // Give up and do a gray alphablend. + if (colorProfile && !dstColor.isTransparent()) + dstColor = colorProfile->toLinear(dstColor.unpremultiplied()).premultiplied(); + const int a = qRgbAvg(coverage); + dstColor = interpolate255(srcLinear, coverage, dstColor, 255 - a); + if (colorProfile && !dstColor.isTransparent()) + dstColor = colorProfile->fromLinear(dstColor.unpremultiplied()).premultiplied(); + dest[x] = dstColor; + } + } +} - const QDrawHelperGammaTables *tables = QGuiApplicationPrivate::instance()->gammaTables(); - if (!tables) +static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer, + int x, int y, const QRgba64 &color, + const uint *src, int mapWidth, int mapHeight, int srcStride, + const QClipData *clip, bool useGammaCorrection) +{ + if (color.isTransparent()) return; - const uchar *gamma = tables->qt_pow_rgb_gamma; - const uchar *invgamma = tables->qt_pow_rgb_invgamma; + const QColorProfile *colorProfile = nullptr; + + if (useGammaCorrection) + colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text(); + + QRgba64 srcColor = color; + if (colorProfile) { + if (color.isOpaque()) + srcColor = colorProfile->toLinear(srcColor); + else + srcColor = colorProfile->toLinear(srcColor.unpremultiplied()).premultiplied(); + } + + quint64 buffer[buffer_size]; + const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format]; + const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format]; + + if (!clip) { + for (int ly = 0; ly < mapHeight; ++ly) { + int i = x; + int length = mapWidth; + while (length > 0) { + int l = qMin(buffer_size, length); + QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, i, y + ly, l); + for (int j=0; j < l; ++j) { + const uint coverage = src[j + (i - x)]; + alphargbblend_generic(coverage, dest, j, srcColor, color, colorProfile); + } + destStore64(rasterBuffer, i, y + ly, dest, l); + length -= l; + i += l; + } + src += srcStride; + } + } else { + int bottom = qMin(y + mapHeight, rasterBuffer->height()); + + int top = qMax(y, 0); + src += (top - y) * srcStride; + + const_cast<QClipData *>(clip)->initialize(); + for (int yp = top; yp<bottom; ++yp) { + const QClipData::ClipLine &line = clip->m_clipLines[yp]; + + for (int i=0; i<line.count; ++i) { + const QSpan &clip = line.spans[i]; + + int start = qMax<int>(x, clip.x); + int end = qMin<int>(x + mapWidth, clip.x + clip.len); + Q_ASSERT(clip.len <= buffer_size); + QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, start, clip.y, clip.len); + + for (int xp=start; xp<end; ++xp) { + const uint coverage = src[xp - x]; + alphargbblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile); + } + destStore64(rasterBuffer, start, clip.y, dest, clip.len); + } // for (i -> line.count) + src += srcStride; + } // for (yp -> bottom) + } +} + +static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer, + int x, int y, const QRgba64 &color, + const uint *src, int mapWidth, int mapHeight, int srcStride, + const QClipData *clip, bool useGammaCorrection) +{ + if (color.isTransparent()) + return; - sr = gamma[sr]; - sg = gamma[sg]; - sb = gamma[sb]; + const quint32 c = color.toArgb32(); - if (sa == 0) + const QColorProfile *colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text(); + if (!colorProfile) return; + const QRgba64 slinear = useGammaCorrection ? colorProfile->toLinear64(c) : color; + if (!clip) { quint32 *dst = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x; const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32); @@ -5659,7 +5948,16 @@ static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer, if (coverage == 0xffffffff) { dst[i] = c; } else if (coverage != 0xff000000) { - rgbBlendPixel(dst+i, coverage, sr, sg, sb, gamma, invgamma); + if (dst[i] >= 0xff000000) { + rgbBlendPixel(dst+i, coverage, slinear, colorProfile, useGammaCorrection); + } else { + // Give up and do a gray blend. + const int a = qRgbAvg(coverage); + if (useGammaCorrection) + grayBlendPixel(dst+i, a, slinear, colorProfile); + else + dst[i] = INTERPOLATE_PIXEL_255(c, a, dst[i], 255 - a); + } } } @@ -5689,7 +5987,16 @@ static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer, if (coverage == 0xffffffff) { dst[xp] = c; } else if (coverage != 0xff000000) { - rgbBlendPixel(dst+xp, coverage, sr, sg, sb, gamma, invgamma); + if (dst[xp] >= 0xff000000) { + rgbBlendPixel(dst+xp, coverage, slinear, colorProfile, useGammaCorrection); + } else { + // Give up and do a gray blend. + const int a = qRgbAvg(coverage); + if (useGammaCorrection) + grayBlendPixel(dst+xp, a, slinear, colorProfile); + else + dst[xp] = INTERPOLATE_PIXEL_255(c, a, dst[xp], 255 - coverage); + } } } } // for (i -> line.count) @@ -5822,56 +6129,80 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = qt_gradient_quint16, qt_bitmapblit_quint16, qt_alphamapblit_quint16, - 0, + qt_alphargbblit_generic, qt_rectfill_quint16 }, // Format_ARGB8565_Premultiplied { blend_color_generic, blend_src_generic, - 0, 0, 0, 0 + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + 0 }, // Format_RGB666 { blend_color_generic, blend_src_generic, - 0, 0, 0, 0 + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + 0 }, // Format_ARGB6666_Premultiplied { blend_color_generic, blend_src_generic, - 0, 0, 0, 0 + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + 0 }, // Format_RGB555 { blend_color_generic, blend_src_generic, - 0, 0, 0, 0 + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + 0 }, // Format_ARGB8555_Premultiplied { blend_color_generic, blend_src_generic, - 0, 0, 0, 0 + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + 0 }, // Format_RGB888 { blend_color_generic, blend_src_generic, - 0, 0, 0, 0 + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + 0 }, // Format_RGB444 { blend_color_generic, blend_src_generic, - 0, 0, 0, 0 + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + 0 }, // Format_ARGB4444_Premultiplied { blend_color_generic, blend_src_generic, - 0, 0, 0, 0 + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, + 0 }, // Format_RGBX8888 { @@ -5881,9 +6212,9 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN qt_alphamapblit_rgba8888, #else - 0, + qt_alphamapblit_generic, #endif - 0, + qt_alphargbblit_generic, qt_rectfill_rgba }, // Format_RGBA8888 @@ -5894,9 +6225,9 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN qt_alphamapblit_rgba8888, #else - 0, + qt_alphamapblit_generic, #endif - 0, + qt_alphargbblit_generic, qt_rectfill_nonpremul_rgba }, // Format_RGB8888_Premultiplied @@ -5907,9 +6238,9 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN qt_alphamapblit_rgba8888, #else - 0, + qt_alphamapblit_generic, #endif - 0, + qt_alphargbblit_generic, qt_rectfill_rgba }, // Format_BGR30 @@ -5917,8 +6248,8 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = blend_color_generic_rgb64, blend_src_generic_rgb64, qt_bitmapblit_rgb30<PixelOrderBGR>, - 0, - 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, qt_rectfill_rgb30<PixelOrderBGR> }, // Format_A2BGR30_Premultiplied @@ -5926,8 +6257,8 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = blend_color_generic_rgb64, blend_src_generic_rgb64, qt_bitmapblit_rgb30<PixelOrderBGR>, - 0, - 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, qt_rectfill_rgb30<PixelOrderBGR> }, // Format_RGB30 @@ -5935,8 +6266,8 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = blend_color_generic_rgb64, blend_src_generic_rgb64, qt_bitmapblit_rgb30<PixelOrderRGB>, - 0, - 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, qt_rectfill_rgb30<PixelOrderRGB> }, // Format_A2RGB30_Premultiplied @@ -5944,22 +6275,26 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = blend_color_generic_rgb64, blend_src_generic_rgb64, qt_bitmapblit_rgb30<PixelOrderRGB>, - 0, - 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, qt_rectfill_rgb30<PixelOrderRGB> }, // Format_Alpha8 { blend_color_generic, blend_src_generic, - 0, 0, 0, + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, qt_rectfill_alpha }, // Format_Grayscale8 { blend_color_generic, blend_src_generic, - 0, 0, 0, + 0, + qt_alphamapblit_generic, + qt_alphargbblit_generic, qt_rectfill_gray }, }; @@ -6117,20 +6452,18 @@ static void qInitDrawhelperFunctions() #if defined(QT_COMPILER_SUPPORTS_SSE4_1) if (qCpuHasFeature(SSE4_1)) { -#if !defined(__SSE4_1__) extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); - qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; - qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; -#endif extern const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); extern const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); extern const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); + qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; + qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; qPixelLayouts[QImage::Format_ARGB32].convertFromARGB32PM = convertARGB32FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].convertFromARGB32PM = convertRGBA8888FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBX8888].convertFromARGB32PM = convertRGBXFromARGB32PM_sse4; @@ -6141,14 +6474,6 @@ static void qInitDrawhelperFunctions() #if defined(QT_COMPILER_SUPPORTS_AVX2) if (qCpuHasFeature(AVX2)) { -#if !defined(__AVX2__) - extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *); - qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2; - qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2; -#endif extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha); @@ -6170,6 +6495,17 @@ static void qInitDrawhelperFunctions() qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_avx2; qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2; qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_avx2; + + extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/); + extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/); + extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int fdy); + + bilinearFastTransformHelperARGB32PM[0][SimpleUpscaleTransform] = fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2; + bilinearFastTransformHelperARGB32PM[0][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2; + bilinearFastTransformHelperARGB32PM[0][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2; } #endif @@ -6198,6 +6534,15 @@ static void qInitDrawhelperFunctions() sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon; +#if defined(Q_PROCESSOR_ARM_64) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN + extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *); + qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon; + qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon; +#endif + #if defined(ENABLE_PIXMAN_DRAWHELPERS) // The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64 qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon; @@ -6215,8 +6560,8 @@ static void qInitDrawhelperFunctions() destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon; destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon; - qMemRotateFunctions[QImage::Format_RGB16][0] = qt_memrotate90_16_neon; - qMemRotateFunctions[QImage::Format_RGB16][2] = qt_memrotate270_16_neon; + qMemRotateFunctions[QPixelLayout::BPP16][0] = qt_memrotate90_16_neon; + qMemRotateFunctions[QPixelLayout::BPP16][2] = qt_memrotate270_16_neon; #endif #endif // defined(__ARM_NEON__) diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp index 9c1335298e..a7e03a7bb3 100644 --- a/src/gui/painting/qdrawhelper_avx2.cpp +++ b/src/gui/painting/qdrawhelper_avx2.cpp @@ -44,18 +44,12 @@ QT_BEGIN_NAMESPACE -// Autovectorized premultiply functions: -const uint *QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) -{ - return qt_convertARGB32ToARGB32PM(buffer, src, count); -} +static Q_CONSTEXPR int BufferSize = 2048; -const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *) -{ - return qt_convertRGBA8888ToARGB32PM(buffer, src, count); -} +enum { + FixedScale = 1 << 16, + HalfPoint = 1 << 15 +}; // Vectorized blend functions: @@ -356,6 +350,413 @@ void QT_FASTCALL comp_func_solid_SourceOver_avx2(uint *destPixels, int length, u } } +#define interpolate_4_pixels_16_avx2(tlr1, tlr2, blr1, blr2, distx, disty, colorMask, v_256, b) \ +{ \ + /* Correct for later unpack */ \ + const __m256i vdistx = _mm256_permute4x64_epi64(distx, _MM_SHUFFLE(3, 1, 2, 0)); \ + const __m256i vdisty = _mm256_permute4x64_epi64(disty, _MM_SHUFFLE(3, 1, 2, 0)); \ + \ + __m256i dxdy = _mm256_mullo_epi16 (vdistx, vdisty); \ + const __m256i distx_ = _mm256_slli_epi16(vdistx, 4); \ + const __m256i disty_ = _mm256_slli_epi16(vdisty, 4); \ + __m256i idxidy = _mm256_add_epi16(dxdy, _mm256_sub_epi16(v_256, _mm256_add_epi16(distx_, disty_))); \ + __m256i dxidy = _mm256_sub_epi16(distx_, dxdy); \ + __m256i idxdy = _mm256_sub_epi16(disty_, dxdy); \ + \ + __m256i tlr1AG = _mm256_srli_epi16(tlr1, 8); \ + __m256i tlr1RB = _mm256_and_si256(tlr1, colorMask); \ + __m256i tlr2AG = _mm256_srli_epi16(tlr2, 8); \ + __m256i tlr2RB = _mm256_and_si256(tlr2, colorMask); \ + __m256i blr1AG = _mm256_srli_epi16(blr1, 8); \ + __m256i blr1RB = _mm256_and_si256(blr1, colorMask); \ + __m256i blr2AG = _mm256_srli_epi16(blr2, 8); \ + __m256i blr2RB = _mm256_and_si256(blr2, colorMask); \ + \ + __m256i odxidy1 = _mm256_unpacklo_epi32(idxidy, dxidy); \ + __m256i odxidy2 = _mm256_unpackhi_epi32(idxidy, dxidy); \ + tlr1AG = _mm256_mullo_epi16(tlr1AG, odxidy1); \ + tlr1RB = _mm256_mullo_epi16(tlr1RB, odxidy1); \ + tlr2AG = _mm256_mullo_epi16(tlr2AG, odxidy2); \ + tlr2RB = _mm256_mullo_epi16(tlr2RB, odxidy2); \ + __m256i odxdy1 = _mm256_unpacklo_epi32(idxdy, dxdy); \ + __m256i odxdy2 = _mm256_unpackhi_epi32(idxdy, dxdy); \ + blr1AG = _mm256_mullo_epi16(blr1AG, odxdy1); \ + blr1RB = _mm256_mullo_epi16(blr1RB, odxdy1); \ + blr2AG = _mm256_mullo_epi16(blr2AG, odxdy2); \ + blr2RB = _mm256_mullo_epi16(blr2RB, odxdy2); \ + \ + /* Add the values, and shift to only keep 8 significant bits per colors */ \ + __m256i topAG = _mm256_hadd_epi32(tlr1AG, tlr2AG); \ + __m256i topRB = _mm256_hadd_epi32(tlr1RB, tlr2RB); \ + __m256i botAG = _mm256_hadd_epi32(blr1AG, blr2AG); \ + __m256i botRB = _mm256_hadd_epi32(blr1RB, blr2RB); \ + __m256i rAG = _mm256_add_epi16(topAG, botAG); \ + __m256i rRB = _mm256_add_epi16(topRB, botRB); \ + rRB = _mm256_srli_epi16(rRB, 8); \ + /* Correct for hadd */ \ + rAG = _mm256_permute4x64_epi64(rAG, _MM_SHUFFLE(3, 1, 2, 0)); \ + rRB = _mm256_permute4x64_epi64(rRB, _MM_SHUFFLE(3, 1, 2, 0)); \ + _mm256_storeu_si256((__m256i*)(b), _mm256_blendv_epi8(rAG, rRB, colorMask)); \ +} + +inline void fetchTransformedBilinear_pixelBounds(int, int l1, int l2, int &v1, int &v2) +{ + if (v1 < l1) + v2 = v1 = l1; + else if (v1 >= l2) + v2 = v1 = l2; + else + v2 = v1 + 1; + Q_ASSERT(v1 >= l1 && v1 <= l2); + Q_ASSERT(v2 >= l1 && v2 <= l2); +} + +void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/) +{ + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds(image.height, image.y1, image.y2 - 1, y1, y2); + const uint *s1 = (const uint *)image.scanLine(y1); + const uint *s2 = (const uint *)image.scanLine(y2); + + int disty = (fy & 0x0000ffff) >> 8; + int idisty = 256 - disty; + int x = fx >> 16; + int length = end - b; + + // The idea is first to do the interpolation between the row s1 and the row s2 + // into an intermediate buffer, then we interpolate between two pixel of this buffer. + + // intermediate_buffer[0] is a buffer of red-blue component of the pixel, in the form 0x00RR00BB + // intermediate_buffer[1] is the alpha-green component of the pixel, in the form 0x00AA00GG + // +1 for the last pixel to interpolate with, and +1 for rounding errors. + quint32 intermediate_buffer[2][BufferSize + 2]; + // count is the size used in the intermediate_buffer. + int count = (qint64(length) * fdx + FixedScale - 1) / FixedScale + 2; + Q_ASSERT(count <= BufferSize + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case + int f = 0; + int lim = qMin(count, image.x2 - x); + if (x < image.x1) { + Q_ASSERT(x < image.x2); + uint t = s1[image.x1]; + uint b = s2[image.x1]; + quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + do { + intermediate_buffer[0][f] = rb; + intermediate_buffer[1][f] = ag; + f++; + x++; + } while (x < image.x1 && f < lim); + } + + const __m256i disty_ = _mm256_set1_epi16(disty); + const __m256i idisty_ = _mm256_set1_epi16(idisty); + const __m256i colorMask = _mm256_set1_epi32(0x00ff00ff); + + lim -= 7; + for (; f < lim; x += 8, f += 8) { + // Load 8 pixels from s1, and split the alpha-green and red-blue component + __m256i top = _mm256_loadu_si256((const __m256i*)((const uint *)(s1)+x)); + __m256i topAG = _mm256_srli_epi16(top, 8); + __m256i topRB = _mm256_and_si256(top, colorMask); + // Multiplies each color component by idisty + topAG = _mm256_mullo_epi16 (topAG, idisty_); + topRB = _mm256_mullo_epi16 (topRB, idisty_); + + // Same for the s2 vector + __m256i bottom = _mm256_loadu_si256((const __m256i*)((const uint *)(s2)+x)); + __m256i bottomAG = _mm256_srli_epi16(bottom, 8); + __m256i bottomRB = _mm256_and_si256(bottom, colorMask); + bottomAG = _mm256_mullo_epi16 (bottomAG, disty_); + bottomRB = _mm256_mullo_epi16 (bottomRB, disty_); + + // Add the values, and shift to only keep 8 significant bits per colors + __m256i rAG =_mm256_add_epi16(topAG, bottomAG); + rAG = _mm256_srli_epi16(rAG, 8); + _mm256_storeu_si256((__m256i*)(&intermediate_buffer[1][f]), rAG); + __m256i rRB =_mm256_add_epi16(topRB, bottomRB); + rRB = _mm256_srli_epi16(rRB, 8); + _mm256_storeu_si256((__m256i*)(&intermediate_buffer[0][f]), rRB); + } + + for (; f < count; f++) { // Same as above but without simd + x = qMin(x, image.x2 - 1); + + uint t = s1[x]; + uint b = s2[x]; + + intermediate_buffer[0][f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + intermediate_buffer[1][f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + x++; + } + // Now interpolate the values from the intermediate_buffer to get the final result. + fx &= FixedScale - 1; + Q_ASSERT((fx >> 16) == 0); + + const __m128i v_fdx = _mm_set1_epi32(fdx * 4); + const __m128i v_blend = _mm_set1_epi32(0x00800080); + __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); + + while (b < end - 3) { + const __m128i offset = _mm_srli_epi32(v_fx, 16); + __m256i vrb = _mm256_i32gather_epi64((const long long *)intermediate_buffer[0], offset, 4); + __m256i vag = _mm256_i32gather_epi64((const long long *)intermediate_buffer[1], offset, 4); + + __m128i vdx = _mm_and_si128(v_fx, _mm_set1_epi32(0x0000ffff)); + vdx = _mm_srli_epi16(vdx, 8); + __m128i vidx = _mm_sub_epi32(_mm_set1_epi32(256), vdx); + __m256i vmulx = _mm256_castsi128_si256(_mm_unpacklo_epi32(vidx, vdx)); + vmulx = _mm256_inserti128_si256(vmulx, _mm_unpackhi_epi32(vidx, vdx), 1); + + vrb = _mm256_mullo_epi32(vrb, vmulx); + vag = _mm256_mullo_epi32(vag, vmulx); + + __m256i vrbag = _mm256_hadd_epi32(vrb, vag); + vrbag = _mm256_permute4x64_epi64(vrbag, _MM_SHUFFLE(3, 1, 2, 0)); + + __m128i rb = _mm256_castsi256_si128(vrbag); + __m128i ag = _mm256_extracti128_si256(vrbag, 1); + rb = _mm_srli_epi16(rb, 8); + + _mm_storeu_si128((__m128i*)b, _mm_blendv_epi8(ag, rb, v_blend)); + + b += 4; + fx += 4 * fdx; + v_fx = _mm_add_epi32(v_fx, v_fdx); + } + while (b < end) { + int x = (fx >> 16); + + uint distx = (fx & 0x0000ffff) >> 8; + uint idistx = 256 - distx; + + uint rb = ((intermediate_buffer[0][x] * idistx + intermediate_buffer[0][x + 1] * distx) >> 8) & 0xff00ff; + uint ag = (intermediate_buffer[1][x] * idistx + intermediate_buffer[1][x + 1] * distx) & 0xff00ff00; + *b = rb | ag; + b++; + fx += fdx; + } +} + +void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/) +{ + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds(image.height, image.y1, image.y2 - 1, y1, y2); + const uint *s1 = (const uint *)image.scanLine(y1); + const uint *s2 = (const uint *)image.scanLine(y2); + const int disty8 = (fy & 0x0000ffff) >> 8; + const int disty4 = (disty8 + 0x08) >> 4; + + const qint64 min_fx = qint64(image.x1) * FixedScale; + const qint64 max_fx = qint64(image.x2 - 1) * FixedScale; + while (b < end) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds(image.width, image.x1, image.x2 - 1, x1, x2); + if (x1 != x2) + break; + uint top = s1[x1]; + uint bot = s2[x1]; + *b = INTERPOLATE_PIXEL_256(top, 256 - disty8, bot, disty8); + fx += fdx; + ++b; + } + uint *boundedEnd = end; + if (fdx > 0) + boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx); + else if (fdx < 0) + boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx); + + // A fast middle part without boundary checks + const __m256i vdistShuffle = + _mm256_setr_epi8(0, char(0x80), 0, char(0x80), 4, char(0x80), 4, char(0x80), 8, char(0x80), 8, char(0x80), 12, char(0x80), 12, char(0x80), + 0, char(0x80), 0, char(0x80), 4, char(0x80), 4, char(0x80), 8, char(0x80), 8, char(0x80), 12, char(0x80), 12, char(0x80)); + const __m256i colorMask = _mm256_set1_epi32(0x00ff00ff); + const __m256i v_256 = _mm256_set1_epi16(256); + const __m256i v_disty = _mm256_set1_epi16(disty4); + const __m256i v_fdx = _mm256_set1_epi32(fdx * 8); + const __m256i v_fx_r = _mm256_set1_epi32(0x08); + const __m256i v_index = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + __m256i v_fx = _mm256_set1_epi32(fx); + v_fx = _mm256_add_epi32(v_fx, _mm256_mullo_epi32(_mm256_set1_epi32(fdx), v_index)); + + while (b < boundedEnd - 7) { + const __m256i offset = _mm256_srli_epi32(v_fx, 16); + const __m128i offsetLo = _mm256_castsi256_si128(offset); + const __m128i offsetHi = _mm256_extracti128_si256(offset, 1); + const __m256i toplo = _mm256_i32gather_epi64((const long long *)s1, offsetLo, 4); + const __m256i tophi = _mm256_i32gather_epi64((const long long *)s1, offsetHi, 4); + const __m256i botlo = _mm256_i32gather_epi64((const long long *)s2, offsetLo, 4); + const __m256i bothi = _mm256_i32gather_epi64((const long long *)s2, offsetHi, 4); + + __m256i v_distx = _mm256_srli_epi16(v_fx, 8); + v_distx = _mm256_srli_epi16(_mm256_add_epi32(v_distx, v_fx_r), 4); + v_distx = _mm256_shuffle_epi8(v_distx, vdistShuffle); + + interpolate_4_pixels_16_avx2(toplo, tophi, botlo, bothi, v_distx, v_disty, colorMask, v_256, b); + b += 8; + v_fx = _mm256_add_epi32(v_fx, v_fdx); + } + fx = _mm_extract_epi32(_mm256_castsi256_si128(v_fx) , 0); + + while (b < boundedEnd) { + int x = (fx >> 16); + int distx8 = (fx & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(s1 + x, s2 + x, distx8, disty8); + fx += fdx; + ++b; + } + + while (b < end) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds(image.width, image.x1, image.x2 - 1, x1, x2); + uint tl = s1[x1]; + uint tr = s1[x2]; + uint bl = s2[x1]; + uint br = s2[x2]; + int distx8 = (fx & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8); + fx += fdx; + ++b; + } +} + +void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int fdy) +{ + const qint64 min_fx = qint64(image.x1) * FixedScale; + const qint64 max_fx = qint64(image.x2 - 1) * FixedScale; + const qint64 min_fy = qint64(image.y1) * FixedScale; + const qint64 max_fy = qint64(image.y2 - 1) * FixedScale; + // first handle the possibly bounded part in the beginning + while (b < end) { + int x1 = (fx >> 16); + int x2; + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds(image.height, image.y1, image.y2 - 1, y1, y2); + if (x1 != x2 && y1 != y2) + break; + const uint *s1 = (const uint *)image.scanLine(y1); + const uint *s2 = (const uint *)image.scanLine(y2); + uint tl = s1[x1]; + uint tr = s1[x2]; + uint bl = s2[x1]; + uint br = s2[x2]; + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); + fx += fdx; + fy += fdy; + ++b; + } + uint *boundedEnd = end; + if (fdx > 0) + boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx); + else if (fdx < 0) + boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx); + if (fdy > 0) + boundedEnd = qMin(boundedEnd, b + (max_fy - fy) / fdy); + else if (fdy < 0) + boundedEnd = qMin(boundedEnd, b + (min_fy - fy) / fdy); + + // until boundedEnd we can now have a fast middle part without boundary checks + const __m256i vdistShuffle = + _mm256_setr_epi8(0, char(0x80), 0, char(0x80), 4, char(0x80), 4, char(0x80), 8, char(0x80), 8, char(0x80), 12, char(0x80), 12, char(0x80), + 0, char(0x80), 0, char(0x80), 4, char(0x80), 4, char(0x80), 8, char(0x80), 8, char(0x80), 12, char(0x80), 12, char(0x80)); + const __m256i colorMask = _mm256_set1_epi32(0x00ff00ff); + const __m256i v_256 = _mm256_set1_epi16(256); + const __m256i v_fdx = _mm256_set1_epi32(fdx * 8); + const __m256i v_fdy = _mm256_set1_epi32(fdy * 8); + const __m256i v_fxy_r = _mm256_set1_epi32(0x08); + const __m256i v_index = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + __m256i v_fx = _mm256_set1_epi32(fx); + __m256i v_fy = _mm256_set1_epi32(fy); + v_fx = _mm256_add_epi32(v_fx, _mm256_mullo_epi32(_mm256_set1_epi32(fdx), v_index)); + v_fy = _mm256_add_epi32(v_fy, _mm256_mullo_epi32(_mm256_set1_epi32(fdy), v_index)); + + const uchar *textureData = image.imageData; + const int bytesPerLine = image.bytesPerLine; + const __m256i vbpl = _mm256_set1_epi16(bytesPerLine/4); + + while (b < boundedEnd - 7) { + const __m256i vy = _mm256_packs_epi32(_mm256_srli_epi32(v_fy, 16), _mm256_setzero_si256()); + // 8x16bit * 8x16bit -> 8x32bit + __m256i offset = _mm256_unpacklo_epi16(_mm256_mullo_epi16(vy, vbpl), _mm256_mulhi_epi16(vy, vbpl)); + offset = _mm256_add_epi32(offset, _mm256_srli_epi32(v_fx, 16)); + const __m128i offsetLo = _mm256_castsi256_si128(offset); + const __m128i offsetHi = _mm256_extracti128_si256(offset, 1); + const uint *topData = (const uint *)(textureData); + const uint *botData = (const uint *)(textureData + bytesPerLine); + const __m256i toplo = _mm256_i32gather_epi64((const long long *)topData, offsetLo, 4); + const __m256i tophi = _mm256_i32gather_epi64((const long long *)topData, offsetHi, 4); + const __m256i botlo = _mm256_i32gather_epi64((const long long *)botData, offsetLo, 4); + const __m256i bothi = _mm256_i32gather_epi64((const long long *)botData, offsetHi, 4); + + __m256i v_distx = _mm256_srli_epi16(v_fx, 8); + __m256i v_disty = _mm256_srli_epi16(v_fy, 8); + v_distx = _mm256_srli_epi16(_mm256_add_epi32(v_distx, v_fxy_r), 4); + v_disty = _mm256_srli_epi16(_mm256_add_epi32(v_disty, v_fxy_r), 4); + v_distx = _mm256_shuffle_epi8(v_distx, vdistShuffle); + v_disty = _mm256_shuffle_epi8(v_disty, vdistShuffle); + + interpolate_4_pixels_16_avx2(toplo, tophi, botlo, bothi, v_distx, v_disty, colorMask, v_256, b); + b += 8; + v_fx = _mm256_add_epi32(v_fx, v_fdx); + v_fy = _mm256_add_epi32(v_fy, v_fdy); + } + fx = _mm_extract_epi32(_mm256_castsi256_si128(v_fx) , 0); + fy = _mm_extract_epi32(_mm256_castsi256_si128(v_fy) , 0); + + while (b < boundedEnd) { + int x = (fx >> 16); + int y = (fy >> 16); + + const uint *s1 = (const uint *)image.scanLine(y); + const uint *s2 = (const uint *)image.scanLine(y + 1); + + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty); + + fx += fdx; + fy += fdy; + ++b; + } + + while (b < end) { + int x1 = (fx >> 16); + int x2; + int y1 = (fy >> 16); + int y2; + + fetchTransformedBilinear_pixelBounds(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds(image.height, image.y1, image.y2 - 1, y1, y2); + + const uint *s1 = (const uint *)image.scanLine(y1); + const uint *s2 = (const uint *)image.scanLine(y2); + + uint tl = s1[x1]; + uint tr = s1[x2]; + uint bl = s2[x1]; + uint br = s2[x2]; + + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); + + fx += fdx; + fy += fdy; + ++b; + } +} + QT_END_NAMESPACE #endif diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index a833520b00..4cbac009d8 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -535,12 +535,23 @@ void qt_blend_rgb32_on_rgb32_neon(uchar *destPixels, int dbpl, } #if defined(ENABLE_PIXMAN_DRAWHELPERS) +extern void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer, + int x, int y, const QRgba64 &color, + const uchar *map, + int mapWidth, int mapHeight, int mapStride, + const QClipData *clip, bool useGammaCorrection); + void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 &color, const uchar *bitmap, int mapWidth, int mapHeight, int mapStride, - const QClipData *) + const QClipData *clip, bool useGammaCorrection) { + if (clip || useGammaCorrection) { + qt_alphamapblit_quint16(rasterBuffer, x, y, color, bitmap, mapWidth, mapHeight, mapStride, clip, useGammaCorrection); + return; + } + quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x; const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16); @@ -1069,6 +1080,67 @@ const uint * QT_FASTCALL qt_fetchUntransformed_888_neon(uint *buffer, const Oper return buffer; } +#if defined(Q_PROCESSOR_ARM_64) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN +template<bool RGBA> +static inline void convertARGBToARGB32PM_neon(uint *buffer, const uint *src, int count) +{ + int i = 0; + const uint8x16_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15}; + const uint8x8_t shuffleMask = { 3, 3, 3, 3, 7, 7, 7, 7}; + const uint32x4_t blendMask = vdupq_n_u32(0xff000000); + + for (; i < count - 3; i += 4) { + uint32x4_t srcVector = vld1q_u32(src + i); + uint32x4_t alphaVector = vshrq_n_u32(srcVector, 24); + uint32_t alphaSum = vaddvq_u32(alphaVector); + if (alphaSum) { + if (alphaSum != 255 * 4) { + if (RGBA) + srcVector = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(srcVector), rgbaMask)); + const uint8x8_t s1 = vreinterpret_u8_u32(vget_low_u32(srcVector)); + const uint8x8_t s2 = vreinterpret_u8_u32(vget_high_u32(srcVector)); + const uint8x8_t alpha1 = vtbl1_u8(s1, shuffleMask); + const uint8x8_t alpha2 = vtbl1_u8(s2, shuffleMask); + uint16x8_t src1 = vmull_u8(s1, alpha1); + uint16x8_t src2 = vmull_u8(s2, alpha2); + src1 = vsraq_n_u16(src1, src1, 8); + src2 = vsraq_n_u16(src2, src2, 8); + const uint8x8_t d1 = vrshrn_n_u16(src1, 8); + const uint8x8_t d2 = vrshrn_n_u16(src2, 8); + const uint32x4_t d = vbslq_u32(blendMask, srcVector, vreinterpretq_u32_u8(vcombine_u8(d1, d2))); + vst1q_u32(buffer + i, d); + } else { + if (RGBA) + vst1q_u32(buffer + i, vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(srcVector), rgbaMask))); + else if (buffer != src) + vst1q_u32(buffer + i, srcVector); + } + } else { + vst1q_u32(buffer + i, vdupq_n_u32(0)); + } + } + + SIMD_EPILOGUE(i, count, 3) { + uint v = qPremultiply(src[i]); + buffer[i] = RGBA ? RGBA2ARGB(v) : v; + } +} + +const uint *QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToARGB32PM_neon<false>(buffer, src, count); + return buffer; +} + +const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *) +{ + convertARGBToARGB32PM_neon<true>(buffer, src, count); + return buffer; +} +#endif + QT_END_NAMESPACE #endif // __ARM_NEON__ diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index 3cf949fc32..40475a9bde 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -91,7 +91,7 @@ void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 &color, const uchar *bitmap, int mapWidth, int mapHeight, int mapStride, - const QClipData *clip); + const QClipData *clip, bool /*useGammaCorrection*/); void qt_scale_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int srch, diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 0e46962784..cf2213042d 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -113,13 +113,13 @@ typedef void (*AlphamapBlitFunc)(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 &color, const uchar *bitmap, int mapWidth, int mapHeight, int mapStride, - const QClipData *clip); + const QClipData *clip, bool useGammaCorrection); typedef void (*AlphaRGBBlitFunc)(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 &color, const uint *rgbmask, int mapWidth, int mapHeight, int mapStride, - const QClipData *clip); + const QClipData *clip, bool useGammaCorrection); typedef void (*RectFillFunc)(QRasterBuffer *rasterBuffer, int x, int y, int width, int height, @@ -159,7 +159,6 @@ struct DrawHelper { extern SrcOverBlendFunc qBlendFunctions[QImage::NImageFormats][QImage::NImageFormats]; extern SrcOverScaleFunc qScaleFunctions[QImage::NImageFormats][QImage::NImageFormats]; extern SrcOverTransformFunc qTransformFunctions[QImage::NImageFormats][QImage::NImageFormats]; -extern MemRotateFunc qMemRotateFunctions[QImage::NImageFormats][3]; extern DrawHelper qDrawHelper[QImage::NImageFormats]; @@ -351,18 +350,6 @@ struct QSpanData void adjustSpanMethods(); }; -struct QDrawHelperGammaTables -{ - explicit QDrawHelperGammaTables(qreal smoothing); - - void refresh(qreal smoothing); - - uchar qt_pow_rgb_gamma[256]; - uchar qt_pow_rgb_invgamma[256]; - uint qt_pow_gamma[256]; - uchar qt_pow_invgamma[2048]; -}; - static inline uint qt_gradient_clamp(const QGradientData *data, int ipos) { if (ipos < 0 || ipos >= GRADIENT_STOPTABLE_SIZE) { @@ -1244,6 +1231,7 @@ extern QPixelLayout qPixelLayouts[QImage::NImageFormats]; extern const FetchPixelsFunc qFetchPixels[QPixelLayout::BPPCount]; extern StorePixelsFunc qStorePixels[QPixelLayout::BPPCount]; +extern MemRotateFunc qMemRotateFunctions[QPixelLayout::BPPCount][3]; QT_END_NAMESPACE diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp index 257bad9eca..14bfaabf09 100644 --- a/src/gui/painting/qdrawhelper_sse4.cpp +++ b/src/gui/painting/qdrawhelper_sse4.cpp @@ -44,16 +44,67 @@ QT_BEGIN_NAMESPACE +template<bool RGBA> +static inline void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count) +{ + int i = 0; + const __m128i alphaMask = _mm_set1_epi32(0xff000000); + const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); + const __m128i shuffleMask = _mm_setr_epi8(6, 7, 6, 7, 6, 7, 6, 7, 14, 15, 14, 15, 14, 15, 14, 15); + const __m128i half = _mm_set1_epi16(0x0080); + const __m128i zero = _mm_setzero_si128(); + + for (; i < count - 3; i += 4) { + __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]); + if (!_mm_testz_si128(srcVector, alphaMask)) { + if (!_mm_testc_si128(srcVector, alphaMask)) { + if (RGBA) + srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); + __m128i src1 = _mm_unpacklo_epi8(srcVector, zero); + __m128i src2 = _mm_unpackhi_epi8(srcVector, zero); + __m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask); + __m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask); + src1 = _mm_mullo_epi16(src1, alpha1); + src2 = _mm_mullo_epi16(src2, alpha2); + src1 = _mm_add_epi16(src1, _mm_srli_epi16(src1, 8)); + src2 = _mm_add_epi16(src2, _mm_srli_epi16(src2, 8)); + src1 = _mm_add_epi16(src1, half); + src2 = _mm_add_epi16(src2, half); + src1 = _mm_srli_epi16(src1, 8); + src2 = _mm_srli_epi16(src2, 8); + src1 = _mm_blend_epi16(src1, alpha1, 0x88); + src2 = _mm_blend_epi16(src2, alpha2, 0x88); + srcVector = _mm_packus_epi16(src1, src2); + _mm_storeu_si128((__m128i *)&buffer[i], srcVector); + } else { + if (RGBA) + _mm_storeu_si128((__m128i *)&buffer[i], _mm_shuffle_epi8(srcVector, rgbaMask)); + else if (buffer != src) + _mm_storeu_si128((__m128i *)&buffer[i], srcVector); + } + } else { + _mm_storeu_si128((__m128i *)&buffer[i], _mm_setzero_si128()); + } + } + + SIMD_EPILOGUE(i, count, 3) { + uint v = qPremultiply(src[i]); + buffer[i] = RGBA ? RGBA2ARGB(v) : v; + } +} + const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { - return qt_convertARGB32ToARGB32PM(buffer, src, count); + convertARGBToARGB32PM_sse4<false>(buffer, src, count); + return buffer; } const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *) { - return qt_convertRGBA8888ToARGB32PM(buffer, src, count); + convertARGBToARGB32PM_sse4<true>(buffer, src, count); + return buffer; } const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, diff --git a/src/gui/painting/qmemrotate.cpp b/src/gui/painting/qmemrotate.cpp index 3fbae76de5..25aa6a3122 100644 --- a/src/gui/painting/qmemrotate.cpp +++ b/src/gui/painting/qmemrotate.cpp @@ -41,164 +41,10 @@ QT_BEGIN_NAMESPACE -#if QT_ROTATION_ALGORITHM == QT_ROTATION_TILED static const int tileSize = 32; -#endif - -#if Q_BYTE_ORDER == Q_BIG_ENDIAN -#if QT_ROTATION_ALGORITHM == QT_ROTATION_PACKED || QT_ROTATION_ALGORITHM == QT_ROTATION_TILED -#error Big endian version not implemented for the transformed driver! -#endif -#endif - -template <class T> -Q_STATIC_TEMPLATE_FUNCTION -inline void qt_memrotate90_cachedRead(const T *src, int w, int h, int sstride, T *dest, - int dstride) -{ - const char *s = reinterpret_cast<const char*>(src); - char *d = reinterpret_cast<char*>(dest); - for (int y = 0; y < h; ++y) { - for (int x = w - 1; x >= 0; --x) { - T *destline = reinterpret_cast<T *>(d + (w - x - 1) * dstride); - destline[y] = src[x]; - } - s += sstride; - src = reinterpret_cast<const T*>(s); - } -} template <class T> Q_STATIC_TEMPLATE_FUNCTION -inline void qt_memrotate270_cachedRead(const T *src, int w, int h, int sstride, T *dest, - int dstride) -{ - const char *s = reinterpret_cast<const char*>(src); - char *d = reinterpret_cast<char*>(dest); - s += (h - 1) * sstride; - for (int y = h - 1; y >= 0; --y) { - src = reinterpret_cast<const T*>(s); - for (int x = 0; x < w; ++x) { - T *destline = reinterpret_cast<T *>(d + x * dstride); - destline[h - y - 1] = src[x]; - } - s -= sstride; - } -} - -#if QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDWRITE - -template <class T> -Q_STATIC_TEMPLATE_FUNCTION -inline void qt_memrotate90_cachedWrite(const T *src, int w, int h, int sstride, T *dest, - int dstride) -{ - for (int x = w - 1; x >= 0; --x) { - T *d = dest + (w - x - 1) * dstride; - for (int y = 0; y < h; ++y) { - *d++ = src[y * sstride + x]; - } - } - -} - -template <class T> -Q_STATIC_TEMPLATE_FUNCTION -inline void qt_memrotate270_cachedWrite(const T *src, int w, int h, int sstride, T *dest, - int dstride) -{ - for (int x = 0; x < w; ++x) { - T *d = dest + x * dstride; - for (int y = h - 1; y >= 0; --y) { - *d++ = src[y * sstride + x]; - } - } -} - -#endif // QT_ROTATION_CACHEDWRITE - -#if QT_ROTATION_ALGORITHM == QT_ROTATION_PACKING - -// TODO: packing algorithms should probably be modified on 64-bit architectures - -template <class T> -Q_STATIC_TEMPLATE_FUNCTION -inline void qt_memrotate90_packing(const T *src, int w, int h, int sstride, T *dest, int dstride) -{ - sstride /= sizeof(T); - dstride /= sizeof(T); - - const int pack = sizeof(quint32) / sizeof(T); - const int unaligned = int((long(dest) & (sizeof(quint32)-1))) / sizeof(T); - - for (int x = w - 1; x >= 0; --x) { - int y = 0; - - for (int i = 0; i < unaligned; ++i) { - dest[(w - x - 1) * dstride + y] = src[y * sstride + x]; - ++y; - } - - quint32 *d = reinterpret_cast<quint32*>(dest + (w - x - 1) * dstride - + unaligned); - const int rest = (h - unaligned) % pack; - while (y < h - rest) { - quint32 c = src[y * sstride + x]; - for (int i = 1; i < pack; ++i) { - c |= src[(y + i) * sstride + x] << (sizeof(int) * 8 / pack * i); - } - *d++ = c; - y += pack; - } - - while (y < h) { - dest[(w - x - 1) * dstride + y] = src[y * sstride + x]; - ++y; - } - } -} - -template <class T> -Q_STATIC_TEMPLATE_FUNCTION -inline void qt_memrotate270_packing(const T *src, int w, int h, int sstride, T *dest, int dstride) -{ - sstride /= sizeof(T); - dstride /= sizeof(T); - - const int pack = sizeof(quint32) / sizeof(T); - const int unaligned = int((long(dest) & (sizeof(quint32)-1))) / sizeof(T); - - for (int x = 0; x < w; ++x) { - int y = h - 1; - - for (int i = 0; i < unaligned; ++i) { - dest[x * dstride + h - y - 1] = src[y * sstride + x]; - --y; - } - - quint32 *d = reinterpret_cast<quint32*>(dest + x * dstride - + unaligned); - const int rest = (h - unaligned) % pack; - while (y > rest) { - quint32 c = src[y * sstride + x]; - for (int i = 1; i < pack; ++i) { - c |= src[(y - i) * sstride + x] << (sizeof(int) * 8 / pack * i); - } - *d++ = c; - y -= pack; - } - while (y >= 0) { - dest[x * dstride + h - y - 1] = src[y * sstride + x]; - --y; - } - } -} - -#endif // QT_ROTATION_PACKING - -#if QT_ROTATION_ALGORITHM == QT_ROTATION_TILED -template <class T> -Q_STATIC_TEMPLATE_FUNCTION inline void qt_memrotate90_tiled(const T *src, int w, int h, int sstride, T *dest, int dstride) { sstride /= sizeof(T); @@ -235,7 +81,7 @@ inline void qt_memrotate90_tiled(const T *src, int w, int h, int sstride, T *des for (int y = starty; y < stopy; y += pack) { quint32 c = src[y * sstride + x]; for (int i = 1; i < pack; ++i) { - const int shift = (sizeof(int) * 8 / pack * i); + const int shift = (sizeof(T) * 8 * i); const T color = src[(y + i) * sstride + x]; c |= color << shift; } @@ -293,7 +139,7 @@ inline void qt_memrotate270_tiled(const T *src, int w, int h, int sstride, T *de const int pack = sizeof(quint32) / sizeof(T); const int unaligned = - qMin(uint((long(dest) & (sizeof(quint32)-1)) / sizeof(T)), uint(h)); + qMin(uint((quintptr(dest) & (sizeof(quint32)-1)) / sizeof(T)), uint(h)); const int restX = w % tileSize; const int restY = (h - unaligned) % tileSize; const int unoptimizedY = restY % pack; @@ -320,10 +166,10 @@ inline void qt_memrotate270_tiled(const T *src, int w, int h, int sstride, T *de for (int x = startx; x < stopx; ++x) { quint32 *d = reinterpret_cast<quint32*>(dest + x * dstride + h - 1 - starty); - for (int y = starty; y > stopy; y -= pack) { + for (int y = starty; y >= stopy; y -= pack) { quint32 c = src[y * sstride + x]; for (int i = 1; i < pack; ++i) { - const int shift = (sizeof(int) * 8 / pack * i); + const int shift = (sizeof(T) * 8 * i); const T color = src[(y - i) * sstride + x]; c |= color << shift; } @@ -371,22 +217,26 @@ inline void qt_memrotate270_tiled_unpacked(const T *src, int w, int h, int sstri } } -#endif // QT_ROTATION_ALGORITHM template <class T> Q_STATIC_TEMPLATE_FUNCTION inline void qt_memrotate90_template(const T *src, int srcWidth, int srcHeight, int srcStride, T *dest, int dstStride) { -#if QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDREAD - qt_memrotate90_cachedRead<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); -#elif QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDWRITE - qt_memrotate90_cachedWrite<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); -#elif QT_ROTATION_ALGORITHM == QT_ROTATION_PACKING - qt_memrotate90_packing<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); -#elif QT_ROTATION_ALGORITHM == QT_ROTATION_TILED - qt_memrotate90_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); +#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN + // packed algorithm assumes little endian and that sizeof(quint32)/sizeof(T) is an integer + if (sizeof(quint32) % sizeof(T) == 0) + qt_memrotate90_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); + else #endif + qt_memrotate90_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); +} + +template <> +inline void qt_memrotate90_template<quint32>(const quint32 *src, int w, int h, int sstride, quint32 *dest, int dstride) +{ + // packed algorithm doesn't have any benefit for quint32 + qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride); } template <class T> @@ -394,11 +244,11 @@ Q_STATIC_TEMPLATE_FUNCTION inline void qt_memrotate180_template(const T *src, int w, int h, int sstride, T *dest, int dstride) { const char *s = (const char*)(src) + (h - 1) * sstride; - for (int y = h - 1; y >= 0; --y) { - T *d = reinterpret_cast<T*>((char *)(dest) + (h - y - 1) * dstride); + for (int dy = 0; dy < h; ++dy) { + T *d = reinterpret_cast<T*>((char *)(dest) + dy * dstride); src = reinterpret_cast<const T*>(s); - for (int x = w - 1; x >= 0; --x) { - d[w - x - 1] = src[x]; + for (int dx = 0; dx < w; ++dx) { + d[dx] = src[w - 1 - dx]; } s -= sstride; } @@ -409,32 +259,20 @@ Q_STATIC_TEMPLATE_FUNCTION inline void qt_memrotate270_template(const T *src, int srcWidth, int srcHeight, int srcStride, T *dest, int dstStride) { -#if QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDREAD - qt_memrotate270_cachedRead<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); -#elif QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDWRITE - qt_memrotate270_cachedWrite<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); -#elif QT_ROTATION_ALGORITHM == QT_ROTATION_PACKING - qt_memrotate270_packing<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); -#elif QT_ROTATION_ALGORITHM == QT_ROTATION_TILED - qt_memrotate270_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); +#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN + // packed algorithm assumes little endian and that sizeof(quint32)/sizeof(T) is an integer + if (sizeof(quint32) % sizeof(T) == 0) + qt_memrotate270_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); + else #endif + qt_memrotate270_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride); } template <> -inline void qt_memrotate90_template<quint24>(const quint24 *src, int srcWidth, int srcHeight, - int srcStride, quint24 *dest, int dstStride) +inline void qt_memrotate270_template<quint32>(const quint32 *src, int w, int h, int sstride, quint32 *dest, int dstride) { -#if QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDREAD - qt_memrotate90_cachedRead<quint24>(src, srcWidth, srcHeight, srcStride, dest, dstStride); -#elif QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDWRITE - qt_memrotate90_cachedWrite<quint24>(src, srcWidth, srcHeight, srcStride, dest, dstStride); -#elif QT_ROTATION_ALGORITHM == QT_ROTATION_PACKING - // packed algorithm not implemented - qt_memrotate90_cachedRead<quint24>(src, srcWidth, srcHeight, srcStride, dest, dstStride); -#elif QT_ROTATION_ALGORITHM == QT_ROTATION_TILED - // packed algorithm not implemented - qt_memrotate90_tiled_unpacked<quint24>(src, srcWidth, srcHeight, srcStride, dest, dstStride); -#endif + // packed algorithm doesn't have any benefit for quint32 + qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride); } #define QT_IMPL_MEMROTATE(type) \ @@ -458,7 +296,7 @@ Q_GUI_EXPORT void qt_memrotate270(const type *src, int w, int h, int sstride, \ Q_GUI_EXPORT void qt_memrotate90(const type *src, int w, int h, int sstride, \ type *dest, int dstride) \ { \ - qt_memrotate90_tiled_unpacked<type>(src, w, h, sstride, dest, dstride); \ + qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride); \ } \ Q_GUI_EXPORT void qt_memrotate180(const type *src, int w, int h, int sstride, \ type *dest, int dstride) \ @@ -468,7 +306,7 @@ Q_GUI_EXPORT void qt_memrotate180(const type *src, int w, int h, int sstride, \ Q_GUI_EXPORT void qt_memrotate270(const type *src, int w, int h, int sstride, \ type *dest, int dstride) \ { \ - qt_memrotate270_tiled_unpacked<type>(src, w, h, sstride, dest, dstride); \ + qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride); \ } @@ -509,6 +347,21 @@ void qt_memrotate270_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *d qt_memrotate270((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl); } +void qt_memrotate90_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate90((const quint24 *)srcPixels, w, h, sbpl, (quint24 *)destPixels, dbpl); +} + +void qt_memrotate180_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate180((const quint24 *)srcPixels, w, h, sbpl, (quint24 *)destPixels, dbpl); +} + +void qt_memrotate270_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate270((const quint24 *)srcPixels, w, h, sbpl, (quint24 *)destPixels, dbpl); +} + void qt_memrotate90_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) { qt_memrotate90((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl); @@ -524,34 +377,16 @@ void qt_memrotate270_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *d qt_memrotate270((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl); } -MemRotateFunc qMemRotateFunctions[QImage::NImageFormats][3] = +MemRotateFunc qMemRotateFunctions[QPixelLayout::BPPCount][3] = // 90, 180, 270 { - { 0, 0, 0 }, // Format_Invalid, - { 0, 0, 0 }, // Format_Mono, - { 0, 0, 0 }, // Format_MonoLSB, - { 0, 0, 0 }, // Format_Indexed8, - { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGB32, - { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_ARGB32, - { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_ARGB32_Premultiplied, - { qt_memrotate90_16, qt_memrotate180_16, qt_memrotate270_16 }, // Format_RGB16, - { 0, 0, 0 }, // Format_ARGB8565_Premultiplied, - { 0, 0, 0 }, // Format_RGB666, - { 0, 0, 0 }, // Format_ARGB6666_Premultiplied, - { 0, 0, 0 }, // Format_RGB555, - { 0, 0, 0 }, // Format_ARGB8555_Premultiplied, - { 0, 0, 0 }, // Format_RGB888, - { 0, 0, 0 }, // Format_RGB444, - { 0, 0, 0 }, // Format_ARGB4444_Premultiplied, - { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGBX8888, - { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGBA8888, - { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGBA8888_Premultiplied, - { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_BGB30, - { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_A2BGR30_Premultiplied, - { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGB30, - { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_A2RGB30_Premultiplied, - { qt_memrotate90_8, qt_memrotate180_8, qt_memrotate270_8 }, // Format_Alpha8, - { qt_memrotate90_8, qt_memrotate180_8, qt_memrotate270_8 }, // Format_Grayscale8, + { 0, 0, 0 }, // BPPNone, + { 0, 0, 0 }, // BPP1MSB, + { 0, 0, 0 }, // BPP1LSB, + { qt_memrotate90_8, qt_memrotate180_8, qt_memrotate270_8 }, // BPP8, + { qt_memrotate90_16, qt_memrotate180_16, qt_memrotate270_16 }, // BPP16, + { qt_memrotate90_24, qt_memrotate180_24, qt_memrotate270_24 }, // BPP24 + { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // BPP32 }; QT_END_NAMESPACE diff --git a/src/gui/painting/qmemrotate_p.h b/src/gui/painting/qmemrotate_p.h index 62613d301a..9bc3fd1010 100644 --- a/src/gui/painting/qmemrotate_p.h +++ b/src/gui/painting/qmemrotate_p.h @@ -56,19 +56,6 @@ QT_BEGIN_NAMESPACE -#define QT_ROTATION_CACHEDREAD 1 -#define QT_ROTATION_CACHEDWRITE 2 -#define QT_ROTATION_PACKING 3 -#define QT_ROTATION_TILED 4 - -#ifndef QT_ROTATION_ALGORITHM -#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN -#define QT_ROTATION_ALGORITHM QT_ROTATION_TILED -#else -#define QT_ROTATION_ALGORITHM QT_ROTATION_CACHEDREAD -#endif -#endif - #define QT_DECL_MEMROTATE(type) \ void Q_GUI_EXPORT qt_memrotate90(const type*, int, int, int, type*, int); \ void Q_GUI_EXPORT qt_memrotate180(const type*, int, int, int, type*, int); \ diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp index 6d5eaf5aed..eb43453ddb 100644 --- a/src/gui/painting/qpaintengine_raster.cpp +++ b/src/gui/painting/qpaintengine_raster.cpp @@ -272,6 +272,35 @@ static void qt_debug_path(const QPainterPath &path) } #endif +// QRect::normalized() will change the width/height of the rectangle due to +// its incusive-integer definition of left/right vs width. This is not +// something we want to change in QRect as that would potentially introduce +// regressions all over the place, so we implement a straightforward +// normalized here. QRectF already does this, so QRectF::normalized() is ok to +// use. +static QRect qrect_normalized(const QRect &rect) +{ + int x, y, w, h; + if (Q_UNLIKELY(rect.width() < 0)) { + x = rect.x() + rect.width(); + w = -rect.width(); + } else { + x = rect.x(); + w = rect.width(); + } + + if (Q_UNLIKELY(rect.height() < 0)) { + y = rect.y() + rect.height(); + h = -rect.height(); + } else { + y = rect.y(); + h = rect.height(); + } + + return QRect(x, y, w, h); +} + + QRasterPaintEnginePrivate::QRasterPaintEnginePrivate() : QPaintEngineExPrivate(), cachedLines(0) @@ -1236,7 +1265,9 @@ void QRasterPaintEngine::clip(const QRect &rect, Qt::ClipOperation op) bool QRasterPaintEngine::setClipRectInDeviceCoords(const QRect &r, Qt::ClipOperation op) { Q_D(QRasterPaintEngine); - QRect clipRect = r & d->deviceRect; + // normalize before using the & operator which uses QRect::normalize() + // internally which will give us the wrong values. + QRect clipRect = qrect_normalized(r) & d->deviceRect; QRasterPaintEngineState *s = state(); if (op == Qt::ReplaceClip || s->clip == 0) { @@ -1471,7 +1502,7 @@ void QRasterPaintEngine::drawRects(const QRect *rects, int rectCount) int offset_x = int(s->matrix.dx()); int offset_y = int(s->matrix.dy()); while (r < lastRect) { - QRect rect = r->normalized(); + QRect rect = qrect_normalized(*r); QRect rr = rect.translated(offset_x, offset_y); fillRect_normalized(rr, &s->brushData, d); ++r; @@ -2266,8 +2297,9 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe && d->rasterBuffer->compositionMode == QPainter::CompositionMode_Source))) { RotationType rotationType = qRotationType(s->matrix); + const QPixelLayout::BPP plBpp = qPixelLayouts[d->rasterBuffer->format].bpp; - if (rotationType != NoRotation && qMemRotateFunctions[d->rasterBuffer->format][rotationType] && img.rect().contains(sr.toAlignedRect())) { + if (rotationType != NoRotation && qMemRotateFunctions[plBpp][rotationType] && img.rect().contains(sr.toAlignedRect())) { QRectF transformedTargetRect = s->matrix.mapRect(r); if ((!(s->renderHints & QPainter::SmoothPixmapTransform) && !(s->renderHints & QPainter::Antialiasing)) @@ -2297,7 +2329,7 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe uint cw = clippedSourceRect.width(); uint ch = clippedSourceRect.height(); - qMemRotateFunctions[d->rasterBuffer->format][rotationType](srcBase, cw, ch, sbpl, dstBase, dbpl); + qMemRotateFunctions[plBpp][rotationType](srcBase, cw, ch, sbpl, dstBase, dbpl); return; } @@ -2500,7 +2532,7 @@ void QRasterPaintEngine::drawTiledPixmap(const QRectF &r, const QPixmap &pixmap, QRectF rr = r; rr.translate(s->matrix.dx(), s->matrix.dy()); - fillRect_normalized(rr.toRect().normalized(), &d->image_filler, d); + fillRect_normalized(rr.normalized().toRect(), &d->image_filler, d); } } @@ -2523,7 +2555,7 @@ QRasterBuffer *QRasterPaintEngine::rasterBuffer() /*! \internal */ -void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx,int ry,int w,int h) +void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx,int ry,int w,int h, bool useGammaCorrection) { Q_D(QRasterPaintEngine); QRasterPaintEngineState *s = state(); @@ -2578,18 +2610,18 @@ void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx } else if (depth == 8) { if (s->penData.alphamapBlit) { s->penData.alphamapBlit(rb, rx, ry, s->penData.solid.color, - scanline, w, h, bpl, 0); + scanline, w, h, bpl, 0, useGammaCorrection); return; } } else if (depth == 32) { // (A)RGB Alpha mask where the alpha component is not used. if (s->penData.alphaRGBBlit) { s->penData.alphaRGBBlit(rb, rx, ry, s->penData.solid.color, - (const uint *) scanline, w, h, bpl / 4, 0); + (const uint *) scanline, w, h, bpl / 4, 0, useGammaCorrection); return; } } - } else if (d->deviceDepth == 32 && ((depth == 8 && s->penData.alphamapBlit) || (depth == 32 && s->penData.alphaRGBBlit))) { + } else if ((depth == 8 && s->penData.alphamapBlit) || (depth == 32 && s->penData.alphaRGBBlit)) { // (A)RGB Alpha mask where the alpha component is not used. if (!clip) { int nx = qMax(0, rx); @@ -2614,10 +2646,10 @@ void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx } if (depth == 8) s->penData.alphamapBlit(rb, rx, ry, s->penData.solid.color, - scanline, w, h, bpl, clip); + scanline, w, h, bpl, clip, useGammaCorrection); else if (depth == 32) s->penData.alphaRGBBlit(rb, rx, ry, s->penData.solid.color, - (const uint *) scanline, w, h, bpl / 4, clip); + (const uint *) scanline, w, h, bpl / 4, clip, useGammaCorrection); return; } } @@ -2775,7 +2807,8 @@ bool QRasterPaintEngine::drawCachedGlyphs(int numGlyphs, const glyph_t *glyphs, alphaPenBlt(alphaMap->constBits(), alphaMap->bytesPerLine(), alphaMap->depth(), qFloor(positions[i].x) + offset.x(), qRound(positions[i].y) + offset.y(), - alphaMap->width(), alphaMap->height()); + alphaMap->width(), alphaMap->height(), + fontEngine->expectsGammaCorrectedBlending()); fontEngine->unlockAlphaMapForGlyph(); } @@ -2836,7 +2869,7 @@ bool QRasterPaintEngine::drawCachedGlyphs(int numGlyphs, const glyph_t *glyphs, drawImage(QPoint(x, y), QImage(glyphBits, c.w, c.h, bpl, image.format())); s->matrix = originalTransform; } else { - alphaPenBlt(glyphBits, bpl, depth, x, y, c.w, c.h); + alphaPenBlt(glyphBits, bpl, depth, x, y, c.w, c.h, fontEngine->expectsGammaCorrectedBlending()); } } } @@ -2880,7 +2913,7 @@ bool QRasterPaintEnginePrivate::isUnclipped(const QRect &rect, const QRasterPaintEngineState *s = q->state(); const QClipData *cl = clip(); if (!cl) { - QRect r = rect.normalized(); + QRect r = qrect_normalized(rect); // inline contains() for performance (we know the rects are normalized) const QRect &r1 = deviceRect; return (r.left() >= r1.left() && r.right() <= r1.right() @@ -2895,7 +2928,7 @@ bool QRasterPaintEnginePrivate::isUnclipped(const QRect &rect, if (s->flags.antialiased) ++penWidth; - QRect r = rect.normalized(); + QRect r = qrect_normalized(rect); if (penWidth > 0) { r.setX(r.x() - penWidth); r.setY(r.y() - penWidth); @@ -4439,9 +4472,9 @@ void QSpanData::setup(const QBrush &brush, int alpha, QPainter::CompositionMode gradient.alphaColor = !brush.isOpaque() || alpha != 256; auto cacheInfo = qt_gradient_cache()->getBuffer(*g, alpha); - cachedGradient = cacheInfo; gradient.colorTable32 = cacheInfo->buffer32; gradient.colorTable64 = cacheInfo->buffer64; + cachedGradient = std::move(cacheInfo); gradient.spread = g->spread(); @@ -4461,9 +4494,9 @@ void QSpanData::setup(const QBrush &brush, int alpha, QPainter::CompositionMode gradient.alphaColor = !brush.isOpaque() || alpha != 256; auto cacheInfo = qt_gradient_cache()->getBuffer(*g, alpha); - cachedGradient = cacheInfo; gradient.colorTable32 = cacheInfo->buffer32; gradient.colorTable64 = cacheInfo->buffer64; + cachedGradient = std::move(cacheInfo); gradient.spread = g->spread(); @@ -4487,9 +4520,9 @@ void QSpanData::setup(const QBrush &brush, int alpha, QPainter::CompositionMode gradient.alphaColor = !brush.isOpaque() || alpha != 256; auto cacheInfo = qt_gradient_cache()->getBuffer(*g, alpha); - cachedGradient = cacheInfo; gradient.colorTable32 = cacheInfo->buffer32; gradient.colorTable64 = cacheInfo->buffer64; + cachedGradient = std::move(cacheInfo); gradient.spread = QGradient::RepeatSpread; diff --git a/src/gui/painting/qpaintengine_raster_p.h b/src/gui/painting/qpaintengine_raster_p.h index 59213220a6..d0b82b3a93 100644 --- a/src/gui/painting/qpaintengine_raster_p.h +++ b/src/gui/painting/qpaintengine_raster_p.h @@ -225,7 +225,7 @@ public: #endif QRasterBuffer *rasterBuffer(); - void alphaPenBlt(const void* src, int bpl, int depth, int rx,int ry,int w,int h); + void alphaPenBlt(const void* src, int bpl, int depth, int rx,int ry,int w,int h, bool useGammaCorrection); Type type() const Q_DECL_OVERRIDE { return Raster; } diff --git a/src/gui/painting/qpainter.h b/src/gui/painting/qpainter.h index 46817b9c73..64d15d5296 100644 --- a/src/gui/painting/qpainter.h +++ b/src/gui/painting/qpainter.h @@ -83,7 +83,6 @@ class Q_GUI_EXPORT QPainter { Q_DECLARE_PRIVATE(QPainter) Q_GADGET - Q_FLAGS(RenderHint RenderHints) public: enum RenderHint { @@ -94,8 +93,10 @@ public: NonCosmeticDefaultPen = 0x10, Qt4CompatiblePainting = 0x20 }; + Q_FLAG(RenderHint) Q_DECLARE_FLAGS(RenderHints, RenderHint) + Q_FLAG(RenderHints) class PixmapFragment { public: diff --git a/src/gui/painting/qpdf.cpp b/src/gui/painting/qpdf.cpp index 84e18a64dd..7b8bae1642 100644 --- a/src/gui/painting/qpdf.cpp +++ b/src/gui/painting/qpdf.cpp @@ -1504,16 +1504,25 @@ void QPdfEnginePrivate::writeInfo() printString(creator); xprintf("\n/Producer "); printString(QString::fromLatin1("Qt " QT_VERSION_STR)); - QDateTime now = QDateTime::currentDateTimeUtc(); + QDateTime now = QDateTime::currentDateTime(); QTime t = now.time(); QDate d = now.date(); - xprintf("\n/CreationDate (D:%d%02d%02d%02d%02d%02d)\n", + xprintf("\n/CreationDate (D:%d%02d%02d%02d%02d%02d", d.year(), d.month(), d.day(), t.hour(), t.minute(), t.second()); + int offset = now.offsetFromUtc(); + int hours = (offset / 60) / 60; + int mins = (offset / 60) % 60; + if (offset < 0) + xprintf("-%02d'%02d')\n", -hours, -mins); + else if (offset > 0) + xprintf("+%02d'%02d')\n", hours , mins); + else + xprintf("Z)\n"); xprintf(">>\n" "endobj\n"); } diff --git a/src/gui/painting/qregion.cpp b/src/gui/painting/qregion.cpp index 0571e1a328..3fb6f925b3 100644 --- a/src/gui/painting/qregion.cpp +++ b/src/gui/painting/qregion.cpp @@ -739,7 +739,7 @@ bool QRegion::intersects(const QRegion ®ion) const */ -#if !defined (Q_OS_UNIX) && !defined (Q_OS_WIN) +#if !defined (Q_OS_UNIX) && !defined (Q_OS_WIN) || defined(Q_CLANG_QDOC) /*! \overload \since 4.4 diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h index 0dadc038fa..7776a5b08a 100644 --- a/src/gui/painting/qrgba64_p.h +++ b/src/gui/painting/qrgba64_p.h @@ -185,6 +185,60 @@ inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b) qMin(a.alpha() + b.alpha(), 65535)); } +#if defined __SSE2__ +Q_ALWAYS_INLINE uint toArgb32(__m128i v) +{ + v = _mm_unpacklo_epi16(v, _mm_setzero_si128()); + v = _mm_add_epi32(v, _mm_set1_epi32(128)); + v = _mm_sub_epi32(v, _mm_srli_epi32(v, 8)); + v = _mm_srli_epi32(v, 8); + v = _mm_packs_epi32(v, v); + v = _mm_packus_epi16(v, v); + return _mm_cvtsi128_si32(v); +} +#elif defined __ARM_NEON__ +Q_ALWAYS_INLINE uint toArgb32(uint16x4_t v) +{ + v = vsub_u16(v, vrshr_n_u16(v, 8)); + v = vrshr_n_u16(v, 8); + uint8x8_t v8 = vmovn_u16(vcombine_u16(v, v)); + return vget_lane_u32(vreinterpret_u32_u8(v8), 0); +} +#endif + +inline uint toArgb32(QRgba64 rgba64) +{ +#if defined __SSE2__ + __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64); + v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3, 0, 1, 2)); + return toArgb32(v); +#elif defined __ARM_NEON__ + uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64))); +#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN + const uint8x8_t shuffleMask = { 4, 5, 2, 3, 0, 1, 6, 7 }; + v = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(v), shuffleMask)); +#else + v = vext_u16(v, v, 3); +#endif + return toArgb32(v); +#else + return rgba64.toArgb32(); +#endif +} + +inline uint toRgba8888(QRgba64 rgba64) +{ +#if defined __SSE2__ + __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64); + return toArgb32(v); +#elif defined __ARM_NEON__ + uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64))); + return toArgb32(v); +#else + return ARGB2RGBA(toArgb32(rgba64)); +#endif +} + #if defined(__SSE2__) Q_ALWAYS_INLINE __m128i addWithSaturation(__m128i a, __m128i b) { @@ -199,6 +253,53 @@ Q_ALWAYS_INLINE uint16x4_t addWithSaturation(uint16x4_t a, uint16x4_t b) } #endif +inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha) +{ + QRgba64 blend; +#if defined(__SSE2__) + __m128i vd = _mm_loadl_epi64((const __m128i *)&d); + __m128i vs = _mm_loadl_epi64((const __m128i *)&s); + __m128i va = _mm_cvtsi32_si128(rgbAlpha); + va = _mm_unpacklo_epi8(va, va); + va = _mm_shufflelo_epi16(va, _MM_SHUFFLE(3, 0, 1, 2)); + __m128i vb = _mm_xor_si128(_mm_set1_epi16(-1), va); + + vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va)); + vd = _mm_unpacklo_epi16(_mm_mullo_epi16(vd, vb), _mm_mulhi_epu16(vd, vb)); + vd = _mm_add_epi32(vd, vs); + vd = _mm_add_epi32(vd, _mm_srli_epi32(vd, 16)); + vd = _mm_add_epi32(vd, _mm_set1_epi32(0x8000)); + vd = _mm_srai_epi32(vd, 16); + vd = _mm_packs_epi32(vd, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i *)&blend, vd); +#elif defined(__ARM_NEON__) + uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d)); + uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s)); + uint8x8_t va8 = vreinterpret_u8_u32(vmov_n_u32(ARGB2RGBA(rgbAlpha))); + uint16x4_t va = vreinterpret_u16_u8(vzip_u8(va8, va8).val[0]); + uint16x4_t vb = vdup_n_u16(0xffff); + vb = vsub_u16(vb, va); + + uint32x4_t vs32 = vmull_u16(vs, va); + uint32x4_t vd32 = vmull_u16(vd, vb); + vd32 = vaddq_u32(vd32, vs32); + vd32 = vsraq_n_u32(vd32, vd32, 16); + vd = vrshrn_n_u32(vd32, 16); + vst1_u64(reinterpret_cast<uint64_t *>(&blend), vreinterpret_u64_u16(vd)); +#else + const int mr = qRed(rgbAlpha); + const int mg = qGreen(rgbAlpha); + const int mb = qBlue(rgbAlpha); + blend.setRed (qt_div_255(s.red() * mr + d.red() * (255 - mr))); + blend.setGreen(qt_div_255(s.green() * mg + d.green() * (255 - mg))); + blend.setBlue (qt_div_255(s.blue() * mb + d.blue() * (255 - mb))); + blend.setAlpha(s.alpha()); +#endif + return blend; +} + + QT_END_NAMESPACE #endif // QRGBA64_P_H diff --git a/src/gui/painting/qtransform.cpp b/src/gui/painting/qtransform.cpp index 2d841b2953..673f64fbca 100644 --- a/src/gui/painting/qtransform.cpp +++ b/src/gui/painting/qtransform.cpp @@ -1118,16 +1118,16 @@ QDataStream & operator>>(QDataStream &s, QTransform &t) #ifndef QT_NO_DEBUG_STREAM QDebug operator<<(QDebug dbg, const QTransform &m) { - static const char *const typeStr[] = + static const char typeStr[][12] = { "TxNone", "TxTranslate", "TxScale", - 0, + "", "TxRotate", - 0, 0, 0, + "", "", "", "TxShear", - 0, 0, 0, 0, 0, 0, 0, + "", "", "", "", "", "", "", "TxProject" }; diff --git a/src/gui/painting/qtriangulator.cpp b/src/gui/painting/qtriangulator.cpp index 6604d407f0..6d57eba123 100644 --- a/src/gui/painting/qtriangulator.cpp +++ b/src/gui/painting/qtriangulator.cpp @@ -50,10 +50,6 @@ #include <QtCore/qglobal.h> #include <QtCore/qpoint.h> #include <QtCore/qalgorithms.h> -#ifndef QT_NO_OPENGL -# include <private/qopenglcontext_p.h> -# include <private/qopenglextensions_p.h> -#endif #include <private/qrbtree_p.h> QT_BEGIN_NAMESPACE @@ -2266,23 +2262,12 @@ void QTriangulator<T>::MonotoneToTriangles::decompose() // qTriangulate // //============================================================================// -static bool hasElementIndexUint() -{ -#ifndef QT_NO_OPENGL - QOpenGLContext *context = QOpenGLContext::currentContext(); - if (!context) - return false; - return static_cast<QOpenGLExtensions *>(context->functions())->hasOpenGLExtension(QOpenGLExtensions::ElementIndexUint); -#else - return false; -#endif -} - Q_GUI_EXPORT QTriangleSet qTriangulate(const qreal *polygon, - int count, uint hint, const QTransform &matrix) + int count, uint hint, const QTransform &matrix, + bool allowUintIndices) { QTriangleSet triangleSet; - if (hasElementIndexUint()) { + if (allowUintIndices) { QTriangulator<quint32> triangulator; triangulator.initialize(polygon, count, hint, matrix); QVertexSet<quint32> vertexSet = triangulator.triangulate(); @@ -2300,10 +2285,13 @@ Q_GUI_EXPORT QTriangleSet qTriangulate(const qreal *polygon, } Q_GUI_EXPORT QTriangleSet qTriangulate(const QVectorPath &path, - const QTransform &matrix, qreal lod) + const QTransform &matrix, qreal lod, bool allowUintIndices) { QTriangleSet triangleSet; - if (hasElementIndexUint()) { + // For now systems that support 32-bit index values will always get 32-bit + // index values. This is not necessary ideal since 16-bit would be enough in + // many cases. TODO revisit this at a later point. + if (allowUintIndices) { QTriangulator<quint32> triangulator; triangulator.initialize(path, matrix, lod); QVertexSet<quint32> vertexSet = triangulator.triangulate(); @@ -2320,10 +2308,10 @@ Q_GUI_EXPORT QTriangleSet qTriangulate(const QVectorPath &path, } QTriangleSet qTriangulate(const QPainterPath &path, - const QTransform &matrix, qreal lod) + const QTransform &matrix, qreal lod, bool allowUintIndices) { QTriangleSet triangleSet; - if (hasElementIndexUint()) { + if (allowUintIndices) { QTriangulator<quint32> triangulator; triangulator.initialize(path, matrix, lod); QVertexSet<quint32> vertexSet = triangulator.triangulate(); @@ -2340,10 +2328,10 @@ QTriangleSet qTriangulate(const QPainterPath &path, } QPolylineSet qPolyline(const QVectorPath &path, - const QTransform &matrix, qreal lod) + const QTransform &matrix, qreal lod, bool allowUintIndices) { QPolylineSet polyLineSet; - if (hasElementIndexUint()) { + if (allowUintIndices) { QTriangulator<quint32> triangulator; triangulator.initialize(path, matrix, lod); QVertexSet<quint32> vertexSet = triangulator.polyline(); @@ -2360,10 +2348,10 @@ QPolylineSet qPolyline(const QVectorPath &path, } QPolylineSet qPolyline(const QPainterPath &path, - const QTransform &matrix, qreal lod) + const QTransform &matrix, qreal lod, bool allowUintIndices) { QPolylineSet polyLineSet; - if (hasElementIndexUint()) { + if (allowUintIndices) { QTriangulator<quint32> triangulator; triangulator.initialize(path, matrix, lod); QVertexSet<quint32> vertexSet = triangulator.polyline(); diff --git a/src/gui/painting/qtriangulator_p.h b/src/gui/painting/qtriangulator_p.h index 4d1aba099c..8f043fc925 100644 --- a/src/gui/painting/qtriangulator_p.h +++ b/src/gui/painting/qtriangulator_p.h @@ -137,11 +137,18 @@ struct Q_GUI_EXPORT QPolylineSet // integers, the polygon is triangulated, and then scaled back by 1/32. // 'hint' should be a combination of QVectorPath::Hints. // 'lod' is the level of detail. Default is 1. Curves are split into more lines when 'lod' is higher. -QTriangleSet Q_GUI_EXPORT qTriangulate(const qreal *polygon, int count, uint hint = QVectorPath::PolygonHint | QVectorPath::OddEvenFill, const QTransform &matrix = QTransform()); -QTriangleSet Q_GUI_EXPORT qTriangulate(const QVectorPath &path, const QTransform &matrix = QTransform(), qreal lod = 1); -QTriangleSet Q_GUI_EXPORT qTriangulate(const QPainterPath &path, const QTransform &matrix = QTransform(), qreal lod = 1); -QPolylineSet qPolyline(const QVectorPath &path, const QTransform &matrix = QTransform(), qreal lod = 1); -QPolylineSet Q_GUI_EXPORT qPolyline(const QPainterPath &path, const QTransform &matrix = QTransform(), qreal lod = 1); +QTriangleSet Q_GUI_EXPORT qTriangulate(const qreal *polygon, int count, + uint hint = QVectorPath::PolygonHint | QVectorPath::OddEvenFill, + const QTransform &matrix = QTransform(), + bool allowUintIndices = true); +QTriangleSet Q_GUI_EXPORT qTriangulate(const QVectorPath &path, const QTransform &matrix = QTransform(), + qreal lod = 1, bool allowUintIndices = true); +QTriangleSet Q_GUI_EXPORT qTriangulate(const QPainterPath &path, const QTransform &matrix = QTransform(), + qreal lod = 1, bool allowUintIndices = true); +QPolylineSet qPolyline(const QVectorPath &path, const QTransform &matrix = QTransform(), + qreal lod = 1, bool allowUintIndices = true); +QPolylineSet Q_GUI_EXPORT qPolyline(const QPainterPath &path, const QTransform &matrix = QTransform(), + qreal lod = 1, bool allowUintIndices = true); QT_END_NAMESPACE |