summaryrefslogtreecommitdiffstats
path: root/src/gui/painting
diff options
context:
space:
mode:
Diffstat (limited to 'src/gui/painting')
-rw-r--r--src/gui/painting/painting.pri3
-rw-r--r--src/gui/painting/qbrush.cpp27
-rw-r--r--src/gui/painting/qcolorprofile.cpp (renamed from src/gui/painting/qgammatables.cpp)47
-rw-r--r--src/gui/painting/qcolorprofile_p.h157
-rw-r--r--src/gui/painting/qcoregraphics.mm26
-rw-r--r--src/gui/painting/qdrawhelper.cpp1323
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp2
-rw-r--r--src/gui/painting/qdrawhelper_neon_p.h2
-rw-r--r--src/gui/painting/qdrawhelper_p.h18
-rw-r--r--src/gui/painting/qmemrotate.cpp275
-rw-r--r--src/gui/painting/qmemrotate_p.h13
-rw-r--r--src/gui/painting/qpaintengine_raster.cpp67
-rw-r--r--src/gui/painting/qpaintengine_raster_p.h2
-rw-r--r--src/gui/painting/qpainter.h3
-rw-r--r--src/gui/painting/qpdf.cpp13
-rw-r--r--src/gui/painting/qregion.cpp2
-rw-r--r--src/gui/painting/qrgba64_p.h100
-rw-r--r--src/gui/painting/qtriangulator.cpp40
-rw-r--r--src/gui/painting/qtriangulator_p.h17
19 files changed, 1173 insertions, 964 deletions
diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri
index 86e35c39f8..63e345545c 100644
--- a/src/gui/painting/painting.pri
+++ b/src/gui/painting/painting.pri
@@ -8,6 +8,7 @@ HEADERS += \
painting/qbrush.h \
painting/qcolor.h \
painting/qcolor_p.h \
+ painting/qcolorprofile_p.h \
painting/qcosmeticstroker_p.h \
painting/qdatabuffer_p.h \
painting/qdrawhelper_p.h \
@@ -63,11 +64,11 @@ SOURCES += \
painting/qblittable.cpp \
painting/qbrush.cpp \
painting/qcolor.cpp \
+ painting/qcolorprofile.cpp \
painting/qcompositionfunctions.cpp \
painting/qcosmeticstroker.cpp \
painting/qdrawhelper.cpp \
painting/qemulationpaintengine.cpp \
- painting/qgammatables.cpp \
painting/qgrayraster.c \
painting/qimagescale.cpp \
painting/qmatrix.cpp \
diff --git a/src/gui/painting/qbrush.cpp b/src/gui/painting/qbrush.cpp
index ebb035a2c1..06a820a859 100644
--- a/src/gui/painting/qbrush.cpp
+++ b/src/gui/painting/qbrush.cpp
@@ -1419,6 +1419,25 @@ void QGradient::setColorAt(qreal pos, const QColor &color)
m_stops.insert(index, QGradientStop(pos, color));
}
+static inline bool ok(QGradientStop stop)
+{
+ return stop.first >= 0 && stop.first <= 1; // rejects NaNs
+}
+
+static inline bool ok(const QGradientStops &stops)
+{
+ qreal lastPos = -1;
+ for (const QGradientStop &stop : stops) {
+ if (Q_UNLIKELY(!ok(stop)))
+ return false;
+ const bool sorted = stop.first > lastPos; // rejects duplicates
+ if (Q_UNLIKELY(!sorted))
+ return false;
+ lastPos = stop.first;
+ }
+ return true;
+}
+
/*!
\fn void QGradient::setStops(const QGradientStops &stopPoints)
@@ -1430,6 +1449,14 @@ void QGradient::setColorAt(qreal pos, const QColor &color)
*/
void QGradient::setStops(const QGradientStops &stops)
{
+ // ## Qt 6: consider taking \a stops by value, so we can move into m_stops
+ if (Q_LIKELY(ok(stops))) {
+ // fast path for the common case: if everything is ok with the stops, just copy them
+ m_stops = stops;
+ return;
+ }
+ // otherwise, to keep the pre-5.9 behavior, add them one after another,
+ // so each stop is checked, invalid ones are skipped, they are added in-order (which may be O(N^2)).
m_stops.clear();
for (int i=0; i<stops.size(); ++i)
setColorAt(stops.at(i).first, stops.at(i).second);
diff --git a/src/gui/painting/qgammatables.cpp b/src/gui/painting/qcolorprofile.cpp
index 1d76f7ee3c..3b7b0a248b 100644
--- a/src/gui/painting/qgammatables.cpp
+++ b/src/gui/painting/qcolorprofile.cpp
@@ -37,28 +37,51 @@
**
****************************************************************************/
-#include <private/qdrawhelper_p.h>
+#include "qcolorprofile_p.h"
+#include <qmath.h>
QT_BEGIN_NAMESPACE
+QColorProfile *QColorProfile::fromGamma(qreal gamma)
+{
+ QColorProfile *cp = new QColorProfile;
+
+ for (int i = 0; i <= (255 * 16); ++i) {
+ cp->m_toLinear[i] = ushort(qRound(qPow(i / qreal(255 * 16), gamma) * (255 * 256)));
+ cp->m_fromLinear[i] = ushort(qRound(qPow(i / qreal(255 * 16), qreal(1) / gamma) * (255 * 256)));
+ }
+
+ return cp;
+}
-QDrawHelperGammaTables::QDrawHelperGammaTables(qreal smoothing)
+static qreal srgbToLinear(qreal v)
{
- const qreal gray_gamma = 2.31;
- for (int i=0; i<256; ++i)
- qt_pow_gamma[i] = uint(qRound(qPow(i / qreal(255.), gray_gamma) * 2047));
- for (int i=0; i<2048; ++i)
- qt_pow_invgamma[i] = uchar(qRound(qPow(i / qreal(2047.0), 1 / gray_gamma) * 255));
+ const qreal a = 0.055;
+ if (v <= qreal(0.04045))
+ return v / qreal(12.92);
+ else
+ return qPow((v + a) / (qreal(1) + a), qreal(2.4));
+}
- refresh(smoothing);
+static qreal linearToSrgb(qreal v)
+{
+ const qreal a = 0.055;
+ if (v <= qreal(0.0031308))
+ return v * qreal(12.92);
+ else
+ return (qreal(1) + a) * qPow(v, qreal(1.0 / 2.4)) - a;
}
-void QDrawHelperGammaTables::refresh(qreal smoothing)
+QColorProfile *QColorProfile::fromSRgb()
{
- for (int i=0; i<256; ++i) {
- qt_pow_rgb_gamma[i] = uchar(qRound(qPow(i / qreal(255.0), smoothing) * 255));
- qt_pow_rgb_invgamma[i] = uchar(qRound(qPow(i / qreal(255.), 1 / smoothing) * 255));
+ QColorProfile *cp = new QColorProfile;
+
+ for (int i = 0; i <= (255 * 16); ++i) {
+ cp->m_toLinear[i] = ushort(qRound(srgbToLinear(i / qreal(255 * 16)) * (255 * 256)));
+ cp->m_fromLinear[i] = ushort(qRound(linearToSrgb(i / qreal(255 * 16)) * (255 * 256)));
}
+
+ return cp;
}
QT_END_NAMESPACE
diff --git a/src/gui/painting/qcolorprofile_p.h b/src/gui/painting/qcolorprofile_p.h
new file mode 100644
index 0000000000..ca1786ee6d
--- /dev/null
+++ b/src/gui/painting/qcolorprofile_p.h
@@ -0,0 +1,157 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtGui module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QCOLORPROFILE_P_H
+#define QCOLORPROFILE_P_H
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include <QtGui/private/qtguiglobal_p.h>
+#include <QtGui/qrgb.h>
+#include <QtGui/qrgba64.h>
+
+QT_BEGIN_NAMESPACE
+
+class Q_GUI_EXPORT QColorProfile
+{
+public:
+ static QColorProfile *fromGamma(qreal gamma);
+ static QColorProfile *fromSRgb();
+
+ // The following methods all convert opaque or unpremultiplied colors:
+
+ QRgba64 toLinear64(QRgb rgb32) const
+ {
+ ushort r = m_toLinear[qRed(rgb32) << 4];
+ ushort g = m_toLinear[qGreen(rgb32) << 4];
+ ushort b = m_toLinear[qBlue(rgb32) << 4];
+ r = r + (r >> 8);
+ g = g + (g >> 8);
+ b = b + (b >> 8);
+ return QRgba64::fromRgba64(r, g, b, qAlpha(rgb32) * 257);
+ }
+
+ QRgb toLinear(QRgb rgb32) const
+ {
+ uchar r = (m_toLinear[qRed(rgb32) << 4] + 0x80) >> 8;
+ uchar g = (m_toLinear[qGreen(rgb32) << 4] + 0x80) >> 8;
+ uchar b = (m_toLinear[qBlue(rgb32) << 4] + 0x80) >> 8;
+ return qRgba(r, g, b, qAlpha(rgb32));
+ }
+
+ QRgba64 toLinear(QRgba64 rgb64) const
+ {
+ ushort r = rgb64.red();
+ ushort g = rgb64.green();
+ ushort b = rgb64.blue();
+ r = r - (r >> 8);
+ g = g - (g >> 8);
+ b = b - (b >> 8);
+ r = m_toLinear[r >> 4];
+ g = m_toLinear[g >> 4];
+ b = m_toLinear[b >> 4];
+ r = r + (r >> 8);
+ g = g + (g >> 8);
+ b = b + (b >> 8);
+ return QRgba64::fromRgba64(r, g, b, rgb64.alpha());
+ }
+
+ QRgb fromLinear64(QRgba64 rgb64) const
+ {
+ ushort r = rgb64.red();
+ ushort g = rgb64.green();
+ ushort b = rgb64.blue();
+ r = r - (r >> 8);
+ g = g - (g >> 8);
+ b = b - (b >> 8);
+ r = (m_fromLinear[r >> 4] + 0x80) >> 8;
+ g = (m_fromLinear[g >> 4] + 0x80) >> 8;
+ b = (m_fromLinear[b >> 4] + 0x80) >> 8;
+ return qRgba(r, g, b, rgb64.alpha8());
+ }
+
+ QRgb fromLinear(QRgb rgb32) const
+ {
+ uchar r = (m_fromLinear[qRed(rgb32) << 4] + 0x80) >> 8;
+ uchar g = (m_fromLinear[qGreen(rgb32) << 4] + 0x80) >> 8;
+ uchar b = (m_fromLinear[qBlue(rgb32) << 4] + 0x80) >> 8;
+ return qRgba(r, g, b, qAlpha(rgb32));
+ }
+
+ QRgba64 fromLinear(QRgba64 rgb64) const
+ {
+ ushort r = rgb64.red();
+ ushort g = rgb64.green();
+ ushort b = rgb64.blue();
+ r = r - (r >> 8);
+ g = g - (g >> 8);
+ b = b - (b >> 8);
+ r = m_fromLinear[r >> 4];
+ g = m_fromLinear[g >> 4];
+ b = m_fromLinear[b >> 4];
+ r = r + (r >> 8);
+ g = g + (g >> 8);
+ b = b + (b >> 8);
+ return QRgba64::fromRgba64(r, g, b, rgb64.alpha());
+ }
+
+private:
+ QColorProfile() { }
+
+ // We translate to 0-65280 (255*256) instead to 0-65535 to make simple
+ // shifting an accurate conversion.
+ // We translate from 0-4080 (255*16) for the same speed up, and to keep
+ // the tables small enough to fit in most inner caches.
+ ushort m_toLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280]
+ ushort m_fromLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280]
+
+};
+
+QT_END_NAMESPACE
+
+#endif // QCOLORPROFILE_P_H
diff --git a/src/gui/painting/qcoregraphics.mm b/src/gui/painting/qcoregraphics.mm
index 3753fa4e88..a64a184e25 100644
--- a/src/gui/painting/qcoregraphics.mm
+++ b/src/gui/painting/qcoregraphics.mm
@@ -39,6 +39,7 @@
#include <QtGui/private/qpaintengine_p.h>
#include <QtCore/qdebug.h>
#include <QtCore/qcoreapplication.h>
+#include <QtCore/qoperatingsystemversion.h>
QT_BEGIN_NAMESPACE
@@ -106,29 +107,6 @@ QImage qt_mac_toQImage(CGImageRef image)
#ifdef Q_OS_MACOS
-QT_END_NAMESPACE
-
-@interface NSGraphicsContext (QtAdditions)
-
-+ (NSGraphicsContext *)qt_graphicsContextWithCGContext:(CGContextRef)graphicsPort flipped:(BOOL)initialFlippedState;
-
-@end
-
-@implementation NSGraphicsContext (QtAdditions)
-
-+ (NSGraphicsContext *)qt_graphicsContextWithCGContext:(CGContextRef)graphicsPort flipped:(BOOL)initialFlippedState
-{
-#if QT_MAC_PLATFORM_SDK_EQUAL_OR_ABOVE(__MAC_10_10, __IPHONE_NA)
- if (QT_PREPEND_NAMESPACE(QSysInfo::MacintoshVersion) >= QT_PREPEND_NAMESPACE(QSysInfo::MV_10_10))
- return [self graphicsContextWithCGContext:graphicsPort flipped:initialFlippedState];
-#endif
- return [self graphicsContextWithGraphicsPort:graphicsPort flipped:initialFlippedState];
-}
-
-@end
-
-QT_BEGIN_NAMESPACE
-
static NSImage *qt_mac_cgimage_to_nsimage(CGImageRef image)
{
NSImage *newImage = [[NSImage alloc] initWithCGImage:image size:NSZeroSize];
@@ -179,7 +157,7 @@ QPixmap qt_mac_toQPixmap(const NSImage *image, const QSizeF &size)
QMacCGContext ctx(&pixmap);
if (!ctx)
return QPixmap();
- NSGraphicsContext *gc = [NSGraphicsContext qt_graphicsContextWithCGContext:ctx flipped:YES];
+ NSGraphicsContext *gc = [NSGraphicsContext graphicsContextWithCGContext:ctx flipped:YES];
if (!gc)
return QPixmap();
[NSGraphicsContext saveGraphicsState];
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 298304c4ef..9b5f15470e 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -43,6 +43,7 @@
#include <qstylehints.h>
#include <qguiapplication.h>
#include <qatomic.h>
+#include <private/qcolorprofile_p.h>
#include <private/qdrawhelper_p.h>
#include <private/qpaintengine_raster_p.h>
#include <private/qpainter_p.h>
@@ -1320,7 +1321,7 @@ static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, con
static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length)
{
for (int i = 0; i < length; ++i) {
- dest[i] = src[i].toArgb32();
+ dest[i] = toArgb32(src[i]);
}
}
@@ -1411,7 +1412,7 @@ static void QT_FASTCALL destStore64ARGB32(QRasterBuffer *rasterBuffer, int x, in
{
uint *dest = (uint*)rasterBuffer->scanLine(y) + x;
for (int i = 0; i < length; ++i) {
- dest[i] = buffer[i].unpremultiplied().toArgb32();
+ dest[i] = toArgb32(buffer[i].unpremultiplied());
}
}
@@ -1419,7 +1420,7 @@ static void QT_FASTCALL destStore64RGBA8888(QRasterBuffer *rasterBuffer, int x,
{
uint *dest = (uint*)rasterBuffer->scanLine(y) + x;
for (int i = 0; i < length; ++i) {
- dest[i] = ARGB2RGBA(buffer[i].unpremultiplied().toArgb32());
+ dest[i] = toRgba8888(buffer[i].unpremultiplied());
}
}
@@ -1918,562 +1919,695 @@ inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int,
Q_ASSERT(v2 >= l1 && v2 <= l2);
}
-template<TextureBlendType blendType> /* blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled */
-static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, const Operator *,
- const QSpanData *data, int y, int x,
- int length)
-{
- int image_width = data->texture.width;
- int image_height = data->texture.height;
-
- int image_x1 = data->texture.x1;
- int image_y1 = data->texture.y1;
- int image_x2 = data->texture.x2 - 1;
- int image_y2 = data->texture.y2 - 1;
-
- const qreal cx = x + qreal(0.5);
- const qreal cy = y + qreal(0.5);
-
- uint *end = buffer + length;
- uint *b = buffer;
- if (data->fast_matrix) {
- // The increment pr x in the scanline
- int fdx = (int)(data->m11 * fixed_scale);
- int fdy = (int)(data->m12 * fixed_scale);
-
- int fx = int((data->m21 * cy
- + data->m11 * cx + data->dx) * fixed_scale);
- int fy = int((data->m22 * cy
- + data->m12 * cx + data->dy) * fixed_scale);
-
- fx -= half_point;
- fy -= half_point;
-
- if (fdy == 0) { //simple scale, no rotation
- int y1 = (fy >> 16);
- int y2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
- const uint *s1 = (const uint *)data->texture.scanLine(y1);
- const uint *s2 = (const uint *)data->texture.scanLine(y2);
-
- if (fdx <= fixed_scale && fdx > 0) { // scale up on X
- int disty = (fy & 0x0000ffff) >> 8;
- int idisty = 256 - disty;
- int x = fx >> 16;
+enum FastTransformTypes {
+ SimpleUpscaleTransform,
+ UpscaleTransform,
+ DownscaleTransform,
+ RotateTransform,
+ FastRotateTransform,
+ NFastTransformTypes
+};
- // The idea is first to do the interpolation between the row s1 and the row s2
- // into an intermediate buffer, then we interpolate between two pixel of this buffer.
+typedef void (QT_FASTCALL *BilinearFastTransformHelper)(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy);
- // intermediate_buffer[0] is a buffer of red-blue component of the pixel, in the form 0x00RR00BB
- // intermediate_buffer[1] is the alpha-green component of the pixel, in the form 0x00AA00GG
- // +1 for the last pixel to interpolate with, and +1 for rounding errors.
- quint32 intermediate_buffer[2][buffer_size + 2];
- // count is the size used in the intermediate_buffer.
- int count = (qint64(length) * fdx + fixed_scale - 1) / fixed_scale + 2;
- Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case
- int f = 0;
- int lim = count;
- if (blendType == BlendTransformedBilinearTiled) {
- x %= image_width;
- if (x < 0) x += image_width;
- } else {
- lim = qMin(count, image_x2-x+1);
- if (x < image_x1) {
- Q_ASSERT(x <= image_x2);
- uint t = s1[image_x1];
- uint b = s2[image_x1];
- quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
- quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
- do {
- intermediate_buffer[0][f] = rb;
- intermediate_buffer[1][f] = ag;
- f++;
- x++;
- } while (x < image_x1 && f < lim);
- }
- }
+template<TextureBlendType blendType>
+static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(uint *b, uint *end, const QTextureData &image,
+ int &fx, int &fy, int fdx, int /*fdy*/)
+{
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
+ const uint *s1 = (const uint *)image.scanLine(y1);
+ const uint *s2 = (const uint *)image.scanLine(y2);
+
+ int disty = (fy & 0x0000ffff) >> 8;
+ int idisty = 256 - disty;
+ int x = fx >> 16;
+ int length = end - b;
+
+ // The idea is first to do the interpolation between the row s1 and the row s2
+ // into an intermediate buffer, then we interpolate between two pixel of this buffer.
+
+ // intermediate_buffer[0] is a buffer of red-blue component of the pixel, in the form 0x00RR00BB
+ // intermediate_buffer[1] is the alpha-green component of the pixel, in the form 0x00AA00GG
+ // +1 for the last pixel to interpolate with, and +1 for rounding errors.
+ quint32 intermediate_buffer[2][buffer_size + 2];
+ // count is the size used in the intermediate_buffer.
+ int count = (qint64(length) * fdx + fixed_scale - 1) / fixed_scale + 2;
+ Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case
+ int f = 0;
+ int lim = count;
+ if (blendType == BlendTransformedBilinearTiled) {
+ x %= image.width;
+ if (x < 0) x += image.width;
+ } else {
+ lim = qMin(count, image.x2 - x);
+ if (x < image.x1) {
+ Q_ASSERT(x < image.x2);
+ uint t = s1[image.x1];
+ uint b = s2[image.x1];
+ quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ do {
+ intermediate_buffer[0][f] = rb;
+ intermediate_buffer[1][f] = ag;
+ f++;
+ x++;
+ } while (x < image.x1 && f < lim);
+ }
+ }
- if (blendType != BlendTransformedBilinearTiled) {
+ if (blendType != BlendTransformedBilinearTiled) {
#if defined(__SSE2__)
- const __m128i disty_ = _mm_set1_epi16(disty);
- const __m128i idisty_ = _mm_set1_epi16(idisty);
- const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
-
- lim -= 3;
- for (; f < lim; x += 4, f += 4) {
- // Load 4 pixels from s1, and split the alpha-green and red-blue component
- __m128i top = _mm_loadu_si128((const __m128i*)((const uint *)(s1)+x));
- __m128i topAG = _mm_srli_epi16(top, 8);
- __m128i topRB = _mm_and_si128(top, colorMask);
- // Multiplies each colour component by idisty
- topAG = _mm_mullo_epi16 (topAG, idisty_);
- topRB = _mm_mullo_epi16 (topRB, idisty_);
-
- // Same for the s2 vector
- __m128i bottom = _mm_loadu_si128((const __m128i*)((const uint *)(s2)+x));
- __m128i bottomAG = _mm_srli_epi16(bottom, 8);
- __m128i bottomRB = _mm_and_si128(bottom, colorMask);
- bottomAG = _mm_mullo_epi16 (bottomAG, disty_);
- bottomRB = _mm_mullo_epi16 (bottomRB, disty_);
-
- // Add the values, and shift to only keep 8 significant bits per colors
- __m128i rAG =_mm_add_epi16(topAG, bottomAG);
- rAG = _mm_srli_epi16(rAG, 8);
- _mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG);
- __m128i rRB =_mm_add_epi16(topRB, bottomRB);
- rRB = _mm_srli_epi16(rRB, 8);
- _mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB);
- }
+ const __m128i disty_ = _mm_set1_epi16(disty);
+ const __m128i idisty_ = _mm_set1_epi16(idisty);
+ const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+
+ lim -= 3;
+ for (; f < lim; x += 4, f += 4) {
+ // Load 4 pixels from s1, and split the alpha-green and red-blue component
+ __m128i top = _mm_loadu_si128((const __m128i*)((const uint *)(s1)+x));
+ __m128i topAG = _mm_srli_epi16(top, 8);
+ __m128i topRB = _mm_and_si128(top, colorMask);
+ // Multiplies each color component by idisty
+ topAG = _mm_mullo_epi16 (topAG, idisty_);
+ topRB = _mm_mullo_epi16 (topRB, idisty_);
+
+ // Same for the s2 vector
+ __m128i bottom = _mm_loadu_si128((const __m128i*)((const uint *)(s2)+x));
+ __m128i bottomAG = _mm_srli_epi16(bottom, 8);
+ __m128i bottomRB = _mm_and_si128(bottom, colorMask);
+ bottomAG = _mm_mullo_epi16 (bottomAG, disty_);
+ bottomRB = _mm_mullo_epi16 (bottomRB, disty_);
+
+ // Add the values, and shift to only keep 8 significant bits per colors
+ __m128i rAG =_mm_add_epi16(topAG, bottomAG);
+ rAG = _mm_srli_epi16(rAG, 8);
+ _mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG);
+ __m128i rRB =_mm_add_epi16(topRB, bottomRB);
+ rRB = _mm_srli_epi16(rRB, 8);
+ _mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB);
+ }
#elif defined(__ARM_NEON__)
- const int16x8_t disty_ = vdupq_n_s16(disty);
- const int16x8_t idisty_ = vdupq_n_s16(idisty);
- const int16x8_t colorMask = vdupq_n_s16(0x00ff);
-
- lim -= 3;
- for (; f < lim; x += 4, f += 4) {
- // Load 4 pixels from s1, and split the alpha-green and red-blue component
- int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x));
- int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8));
- int16x8_t topRB = vandq_s16(top, colorMask);
- // Multiplies each colour component by idisty
- topAG = vmulq_s16(topAG, idisty_);
- topRB = vmulq_s16(topRB, idisty_);
-
- // Same for the s2 vector
- int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x));
- int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8));
- int16x8_t bottomRB = vandq_s16(bottom, colorMask);
- bottomAG = vmulq_s16(bottomAG, disty_);
- bottomRB = vmulq_s16(bottomRB, disty_);
-
- // Add the values, and shift to only keep 8 significant bits per colors
- int16x8_t rAG = vaddq_s16(topAG, bottomAG);
- rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
- vst1q_s16((int16_t*)(&intermediate_buffer[1][f]), rAG);
- int16x8_t rRB = vaddq_s16(topRB, bottomRB);
- rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
- vst1q_s16((int16_t*)(&intermediate_buffer[0][f]), rRB);
- }
+ const int16x8_t disty_ = vdupq_n_s16(disty);
+ const int16x8_t idisty_ = vdupq_n_s16(idisty);
+ const int16x8_t colorMask = vdupq_n_s16(0x00ff);
+
+ lim -= 3;
+ for (; f < lim; x += 4, f += 4) {
+ // Load 4 pixels from s1, and split the alpha-green and red-blue component
+ int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x));
+ int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8));
+ int16x8_t topRB = vandq_s16(top, colorMask);
+ // Multiplies each color component by idisty
+ topAG = vmulq_s16(topAG, idisty_);
+ topRB = vmulq_s16(topRB, idisty_);
+
+ // Same for the s2 vector
+ int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x));
+ int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8));
+ int16x8_t bottomRB = vandq_s16(bottom, colorMask);
+ bottomAG = vmulq_s16(bottomAG, disty_);
+ bottomRB = vmulq_s16(bottomRB, disty_);
+
+ // Add the values, and shift to only keep 8 significant bits per colors
+ int16x8_t rAG = vaddq_s16(topAG, bottomAG);
+ rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
+ vst1q_s16((int16_t*)(&intermediate_buffer[1][f]), rAG);
+ int16x8_t rRB = vaddq_s16(topRB, bottomRB);
+ rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
+ vst1q_s16((int16_t*)(&intermediate_buffer[0][f]), rRB);
+ }
#endif
- }
- for (; f < count; f++) { // Same as above but without sse2
- if (blendType == BlendTransformedBilinearTiled) {
- if (x >= image_width) x -= image_width;
- } else {
- x = qMin(x, image_x2);
- }
+ }
+ for (; f < count; f++) { // Same as above but without simd
+ if (blendType == BlendTransformedBilinearTiled) {
+ if (x >= image.width) x -= image.width;
+ } else {
+ x = qMin(x, image.x2 - 1);
+ }
- uint t = s1[x];
- uint b = s2[x];
+ uint t = s1[x];
+ uint b = s2[x];
- intermediate_buffer[0][f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
- intermediate_buffer[1][f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
- x++;
- }
- // Now interpolate the values from the intermediate_buffer to get the final result.
- fx &= fixed_scale - 1;
- Q_ASSERT((fx >> 16) == 0);
- while (b < end) {
- int x1 = (fx >> 16);
- int x2 = x1 + 1;
- Q_ASSERT(x1 >= 0);
- Q_ASSERT(x2 < count);
+ intermediate_buffer[0][f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ intermediate_buffer[1][f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ x++;
+ }
+ // Now interpolate the values from the intermediate_buffer to get the final result.
+ fx &= fixed_scale - 1;
+ Q_ASSERT((fx >> 16) == 0);
+ while (b < end) {
+ int x1 = (fx >> 16);
+ int x2 = x1 + 1;
+ Q_ASSERT(x1 >= 0);
+ Q_ASSERT(x2 < count);
+
+ int distx = (fx & 0x0000ffff) >> 8;
+ int idistx = 256 - distx;
+ int rb = ((intermediate_buffer[0][x1] * idistx + intermediate_buffer[0][x2] * distx) >> 8) & 0xff00ff;
+ int ag = (intermediate_buffer[1][x1] * idistx + intermediate_buffer[1][x2] * distx) & 0xff00ff00;
+ *b = rb | ag;
+ b++;
+ fx += fdx;
+ }
+}
- int distx = (fx & 0x0000ffff) >> 8;
- int idistx = 256 - distx;
- int rb = ((intermediate_buffer[0][x1] * idistx + intermediate_buffer[0][x2] * distx) >> 8) & 0xff00ff;
- int ag = (intermediate_buffer[1][x1] * idistx + intermediate_buffer[1][x2] * distx) & 0xff00ff00;
- *b = rb | ag;
- b++;
- fx += fdx;
- }
- } else if ((fdx < 0 && fdx > -(fixed_scale / 8)) || std::abs(data->m22) < (1./8.)) { // scale up more than 8x
- int y1 = (fy >> 16);
- int y2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
- const uint *s1 = (const uint *)data->texture.scanLine(y1);
- const uint *s2 = (const uint *)data->texture.scanLine(y2);
- int disty = (fy & 0x0000ffff) >> 8;
- while (b < end) {
- int x1 = (fx >> 16);
- int x2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- uint tl = s1[x1];
- uint tr = s1[x2];
- uint bl = s2[x1];
- uint br = s2[x2];
- int distx = (fx & 0x0000ffff) >> 8;
- *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
+template<TextureBlendType blendType>
+static void QT_FASTCALL fetchTransformedBilinearARGB32PM_upscale_helper(uint *b, uint *end, const QTextureData &image,
+ int &fx, int &fy, int fdx, int /*fdy*/)
+{
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
+ const uint *s1 = (const uint *)image.scanLine(y1);
+ const uint *s2 = (const uint *)image.scanLine(y2);
+ const int disty = (fy & 0x0000ffff) >> 8;
+
+ if (blendType != BlendTransformedBilinearTiled) {
+ const qint64 min_fx = qint64(image.x1) * fixed_scale;
+ const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
+ while (b < end) {
+ int x1 = (fx >> 16);
+ int x2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ if (x1 != x2)
+ break;
+ uint top = s1[x1];
+ uint bot = s2[x1];
+ *b = INTERPOLATE_PIXEL_256(top, 256 - disty, bot, disty);
+ fx += fdx;
+ ++b;
+ }
+ uint *boundedEnd = end;
+ if (fdx > 0)
+ boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
+ else if (fdx < 0)
+ boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
+
+ // A fast middle part without boundary checks
+ while (b < boundedEnd) {
+ int x = (fx >> 16);
+ int distx = (fx & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty);
+ fx += fdx;
+ ++b;
+ }
+ }
- fx += fdx;
- ++b;
- }
- } else { //scale down
- int y1 = (fy >> 16);
- int y2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
- const uint *s1 = (const uint *)data->texture.scanLine(y1);
- const uint *s2 = (const uint *)data->texture.scanLine(y2);
- const int disty8 = (fy & 0x0000ffff) >> 8;
- const int disty4 = (disty8 + 0x08) >> 4;
-
- if (blendType != BlendTransformedBilinearTiled) {
-#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \
- const qint64 min_fx = qint64(image_x1) * fixed_scale; \
- const qint64 max_fx = qint64(image_x2) * fixed_scale; \
- while (b < end) { \
- int x1 = (fx >> 16); \
- int x2; \
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); \
- if (x1 != x2) \
- break; \
- uint top = s1[x1]; \
- uint bot = s2[x1]; \
- *b = INTERPOLATE_PIXEL_256(top, 256 - disty8, bot, disty8); \
- fx += fdx; \
- ++b; \
- } \
- uint *boundedEnd = end; \
- if (fdx > 0) \
- boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx); \
- else if (fdx < 0) \
- boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx); \
- boundedEnd -= 3;
+ while (b < end) {
+ int x1 = (fx >> 16);
+ int x2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1 , x1, x2);
+ uint tl = s1[x1];
+ uint tr = s1[x2];
+ uint bl = s2[x1];
+ uint br = s2[x2];
+ int distx = (fx & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
+
+ fx += fdx;
+ ++b;
+ }
+}
+template<TextureBlendType blendType>
+static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint *b, uint *end, const QTextureData &image,
+ int &fx, int &fy, int fdx, int /*fdy*/)
+{
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
+ const uint *s1 = (const uint *)image.scanLine(y1);
+ const uint *s2 = (const uint *)image.scanLine(y2);
+ const int disty8 = (fy & 0x0000ffff) >> 8;
+ const int disty4 = (disty8 + 0x08) >> 4;
+
+ if (blendType != BlendTransformedBilinearTiled) {
+ const qint64 min_fx = qint64(image.x1) * fixed_scale;
+ const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
+ while (b < end) {
+ int x1 = (fx >> 16);
+ int x2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ if (x1 != x2)
+ break;
+ uint top = s1[x1];
+ uint bot = s2[x1];
+ *b = INTERPOLATE_PIXEL_256(top, 256 - disty8, bot, disty8);
+ fx += fdx;
+ ++b;
+ }
+ uint *boundedEnd = end;
+ if (fdx > 0)
+ boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
+ else if (fdx < 0)
+ boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
+ // A fast middle part without boundary checks
#if defined(__SSE2__)
- BILINEAR_DOWNSCALE_BOUNDS_PROLOG
-
- const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
- const __m128i v_256 = _mm_set1_epi16(256);
- const __m128i v_disty = _mm_set1_epi16(disty4);
- const __m128i v_fdx = _mm_set1_epi32(fdx*4);
- const __m128i v_fx_r = _mm_set1_epi32(0x8);
- __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
-
- while (b < boundedEnd) {
- __m128i offset = _mm_srli_epi32(v_fx, 16);
- const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
- const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
- const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
- const int offset3 = _mm_cvtsi128_si32(offset);
- const __m128i tl = _mm_setr_epi32(s1[offset0], s1[offset1], s1[offset2], s1[offset3]);
- const __m128i tr = _mm_setr_epi32(s1[offset0 + 1], s1[offset1 + 1], s1[offset2 + 1], s1[offset3 + 1]);
- const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]);
- const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]);
-
- __m128i v_distx = _mm_srli_epi16(v_fx, 8);
- v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4);
- v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
- v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
-
- interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
- b += 4;
- v_fx = _mm_add_epi32(v_fx, v_fdx);
- }
- fx = _mm_cvtsi128_si32(v_fx);
+ const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+ const __m128i v_256 = _mm_set1_epi16(256);
+ const __m128i v_disty = _mm_set1_epi16(disty4);
+ const __m128i v_fdx = _mm_set1_epi32(fdx*4);
+ const __m128i v_fx_r = _mm_set1_epi32(0x8);
+ __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
+
+ while (b < boundedEnd - 3) {
+ __m128i offset = _mm_srli_epi32(v_fx, 16);
+ const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
+ const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
+ const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
+ const int offset3 = _mm_cvtsi128_si32(offset);
+ const __m128i tl = _mm_setr_epi32(s1[offset0], s1[offset1], s1[offset2], s1[offset3]);
+ const __m128i tr = _mm_setr_epi32(s1[offset0 + 1], s1[offset1 + 1], s1[offset2 + 1], s1[offset3 + 1]);
+ const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]);
+ const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]);
+
+ __m128i v_distx = _mm_srli_epi16(v_fx, 8);
+ v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4);
+ v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
+ v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
+
+ interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
+ b += 4;
+ v_fx = _mm_add_epi32(v_fx, v_fdx);
+ }
+ fx = _mm_cvtsi128_si32(v_fx);
#elif defined(__ARM_NEON__)
- BILINEAR_DOWNSCALE_BOUNDS_PROLOG
-
- const int16x8_t colorMask = vdupq_n_s16(0x00ff);
- const int16x8_t invColorMask = vmvnq_s16(colorMask);
- const int16x8_t v_256 = vdupq_n_s16(256);
- const int16x8_t v_disty = vdupq_n_s16(disty4);
- const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
- int32x4_t v_fdx = vdupq_n_s32(fdx*4);
+ const int16x8_t colorMask = vdupq_n_s16(0x00ff);
+ const int16x8_t invColorMask = vmvnq_s16(colorMask);
+ const int16x8_t v_256 = vdupq_n_s16(256);
+ const int16x8_t v_disty = vdupq_n_s16(disty4);
+ const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
+ int32x4_t v_fdx = vdupq_n_s32(fdx*4);
- int32x4_t v_fx = vmovq_n_s32(fx);
- v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
- v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
- v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
+ int32x4_t v_fx = vmovq_n_s32(fx);
+ v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
+ v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
+ v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
- const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
- const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
+ const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
+ const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
- while (b < boundedEnd) {
- uint32x4x2_t v_top, v_bot;
+ while (b < boundedEnd - 3) {
+ uint32x4x2_t v_top, v_bot;
- int x1 = (fx >> 16);
- fx += fdx;
- v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
- v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
- x1 = (fx >> 16);
- fx += fdx;
- v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
- v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
- x1 = (fx >> 16);
- fx += fdx;
- v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
- v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
- x1 = (fx >> 16);
- fx += fdx;
- v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
- v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
-
- int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), 12);
- v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
-
- interpolate_4_pixels_16_neon(
- vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
- vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
- vreinterpretq_s16_s32(v_distx), v_disty, v_disty_,
- colorMask, invColorMask, v_256, b);
- b+=4;
- v_fx = vaddq_s32(v_fx, v_fdx);
- }
+ int x1 = (fx >> 16);
+ fx += fdx;
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
+ x1 = (fx >> 16);
+ fx += fdx;
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
+ x1 = (fx >> 16);
+ fx += fdx;
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
+ x1 = (fx >> 16);
+ fx += fdx;
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
+
+ int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), 12);
+ v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
+
+ interpolate_4_pixels_16_neon(
+ vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
+ vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
+ vreinterpretq_s16_s32(v_distx), v_disty, v_disty_,
+ colorMask, invColorMask, v_256, b);
+ b+=4;
+ v_fx = vaddq_s32(v_fx, v_fdx);
+ }
#endif
- }
+ while (b < boundedEnd) {
+ int x = (fx >> 16);
+#if defined(__SSE2__) || defined(__ARM_NEON__)
+ int distx8 = (fx & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(s1 + x, s2 + x, distx8, disty8);
+#else
+ uint tl = s1[x];
+ uint tr = s1[x + 1];
+ uint bl = s2[x];
+ uint br = s2[x + 1];
+ int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4);
+#endif
+ fx += fdx;
+ ++b;
+ }
+ }
- while (b < end) {
- int x1 = (fx >> 16);
- int x2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- uint tl = s1[x1];
- uint tr = s1[x2];
- uint bl = s2[x1];
- uint br = s2[x2];
+ while (b < end) {
+ int x1 = (fx >> 16);
+ int x2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ uint tl = s1[x1];
+ uint tr = s1[x2];
+ uint bl = s2[x1];
+ uint br = s2[x2];
#if defined(__SSE2__) || defined(__ARM_NEON__)
- // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16.
- int distx8 = (fx & 0x0000ffff) >> 8;
- *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8);
+ // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16.
+ int distx8 = (fx & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8);
#else
- int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
- *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4);
+ int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4);
#endif
- fx += fdx;
- ++b;
- }
- }
- } else { //rotation
- if (std::abs(data->m11) < (1./8.) || std::abs(data->m22) < (1./8.)) {
- //if we are zooming more than 8 times, we use 8bit precision for the position.
- while (b < end) {
- int x1 = (fx >> 16);
- int x2;
- int y1 = (fy >> 16);
- int y2;
+ fx += fdx;
+ ++b;
+ }
+}
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
+template<TextureBlendType blendType>
+static void QT_FASTCALL fetchTransformedBilinearARGB32PM_rotate_helper(uint *b, uint *end, const QTextureData &image,
+ int &fx, int &fy, int fdx, int fdy)
+{
+ // if we are zooming more than 8 times, we use 8bit precision for the position.
+ while (b < end) {
+ int x1 = (fx >> 16);
+ int x2;
+ int y1 = (fy >> 16);
+ int y2;
- const uint *s1 = (const uint *)data->texture.scanLine(y1);
- const uint *s2 = (const uint *)data->texture.scanLine(y2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
- uint tl = s1[x1];
- uint tr = s1[x2];
- uint bl = s2[x1];
- uint br = s2[x2];
+ const uint *s1 = (const uint *)image.scanLine(y1);
+ const uint *s2 = (const uint *)image.scanLine(y2);
- int distx = (fx & 0x0000ffff) >> 8;
- int disty = (fy & 0x0000ffff) >> 8;
+ uint tl = s1[x1];
+ uint tr = s1[x2];
+ uint bl = s2[x1];
+ uint br = s2[x2];
- *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
+ int distx = (fx & 0x0000ffff) >> 8;
+ int disty = (fy & 0x0000ffff) >> 8;
- fx += fdx;
- fy += fdy;
- ++b;
- }
- } else {
- //we are zooming less than 8x, use 4bit precision
-
- if (blendType != BlendTransformedBilinearTiled) {
-#define BILINEAR_ROTATE_BOUNDS_PROLOG \
- const qint64 min_fx = qint64(image_x1) * fixed_scale; \
- const qint64 max_fx = qint64(image_x2) * fixed_scale; \
- const qint64 min_fy = qint64(image_y1) * fixed_scale; \
- const qint64 max_fy = qint64(image_y2) * fixed_scale; \
- while (b < end) { \
- int x1 = (fx >> 16); \
- int x2; \
- int y1 = (fy >> 16); \
- int y2; \
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); \
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); \
- if (x1 != x2 && y1 != y2) \
- break; \
- const uint *s1 = (const uint *)data->texture.scanLine(y1); \
- const uint *s2 = (const uint *)data->texture.scanLine(y2); \
- uint tl = s1[x1]; \
- uint tr = s1[x2]; \
- uint bl = s2[x1]; \
- uint br = s2[x2]; \
- int distx = (fx & 0x0000ffff) >> 8; \
- int disty = (fy & 0x0000ffff) >> 8; \
- *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); \
- fx += fdx; \
- fy += fdy; \
- ++b; \
- } \
- uint *boundedEnd = end; \
- if (fdx > 0) \
- boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx); \
- else if (fdx < 0) \
- boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx); \
- if (fdy > 0) \
- boundedEnd = qMin(boundedEnd, b + (max_fy - fy) / fdy); \
- else if (fdy < 0) \
- boundedEnd = qMin(boundedEnd, b + (min_fy - fy) / fdy); \
- boundedEnd -= 3;
+ *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
-#if defined(__SSE2__)
- BILINEAR_ROTATE_BOUNDS_PROLOG
+ fx += fdx;
+ fy += fdy;
+ ++b;
+ }
+}
- const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
- const __m128i v_256 = _mm_set1_epi16(256);
- const __m128i v_fdx = _mm_set1_epi32(fdx*4);
- const __m128i v_fdy = _mm_set1_epi32(fdy*4);
- const __m128i v_fxy_r = _mm_set1_epi32(0x8);
- __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
- __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
+template<TextureBlendType blendType>
+static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint *b, uint *end, const QTextureData &image,
+ int &fx, int &fy, int fdx, int fdy)
+{
+ //we are zooming less than 8x, use 4bit precision
+ if (blendType != BlendTransformedBilinearTiled) {
+ const qint64 min_fx = qint64(image.x1) * fixed_scale;
+ const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
+ const qint64 min_fy = qint64(image.y1) * fixed_scale;
+ const qint64 max_fy = qint64(image.y2 - 1) * fixed_scale;
+ // first handle the possibly bounded part in the beginning
+ while (b < end) {
+ int x1 = (fx >> 16);
+ int x2;
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
+ if (x1 != x2 && y1 != y2)
+ break;
+ const uint *s1 = (const uint *)image.scanLine(y1);
+ const uint *s2 = (const uint *)image.scanLine(y2);
+ uint tl = s1[x1];
+ uint tr = s1[x2];
+ uint bl = s2[x1];
+ uint br = s2[x2];
+#if defined(__SSE2__) || defined(__ARM_NEON__)
+ int distx = (fx & 0x0000ffff) >> 8;
+ int disty = (fy & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
+#else
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
+ *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
+#endif
+ fx += fdx;
+ fy += fdy;
+ ++b;
+ }
+ uint *boundedEnd = end; \
+ if (fdx > 0) \
+ boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx); \
+ else if (fdx < 0) \
+ boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx); \
+ if (fdy > 0) \
+ boundedEnd = qMin(boundedEnd, b + (max_fy - fy) / fdy); \
+ else if (fdy < 0) \
+ boundedEnd = qMin(boundedEnd, b + (min_fy - fy) / fdy); \
+
+ // until boundedEnd we can now have a fast middle part without boundary checks
+#if defined(__SSE2__)
+ const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+ const __m128i v_256 = _mm_set1_epi16(256);
+ const __m128i v_fdx = _mm_set1_epi32(fdx*4);
+ const __m128i v_fdy = _mm_set1_epi32(fdy*4);
+ const __m128i v_fxy_r = _mm_set1_epi32(0x8);
+ __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
+ __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
+
+ const uchar *textureData = image.imageData;
+ const int bytesPerLine = image.bytesPerLine;
+ const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0));
+
+ while (b < boundedEnd - 3) {
+ const __m128i vy = _mm_packs_epi32(_mm_srli_epi32(v_fy, 16), _mm_setzero_si128());
+ // 4x16bit * 4x16bit -> 4x32bit
+ __m128i offset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epi16(vy, vbpl));
+ offset = _mm_add_epi32(offset, _mm_srli_epi32(v_fx, 16));
+ const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
+ const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
+ const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
+ const int offset3 = _mm_cvtsi128_si32(offset);
+ const uint *topData = (const uint *)(textureData);
+ const __m128i tl = _mm_setr_epi32(topData[offset0], topData[offset1], topData[offset2], topData[offset3]);
+ const __m128i tr = _mm_setr_epi32(topData[offset0 + 1], topData[offset1 + 1], topData[offset2 + 1], topData[offset3 + 1]);
+ const uint *bottomData = (const uint *)(textureData + bytesPerLine);
+ const __m128i bl = _mm_setr_epi32(bottomData[offset0], bottomData[offset1], bottomData[offset2], bottomData[offset3]);
+ const __m128i br = _mm_setr_epi32(bottomData[offset0 + 1], bottomData[offset1 + 1], bottomData[offset2 + 1], bottomData[offset3 + 1]);
+
+ __m128i v_distx = _mm_srli_epi16(v_fx, 8);
+ __m128i v_disty = _mm_srli_epi16(v_fy, 8);
+ v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fxy_r), 4);
+ v_disty = _mm_srli_epi16(_mm_add_epi32(v_disty, v_fxy_r), 4);
+ v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
+ v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
+ v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
+ v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
+
+ interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
+ b += 4;
+ v_fx = _mm_add_epi32(v_fx, v_fdx);
+ v_fy = _mm_add_epi32(v_fy, v_fdy);
+ }
+ fx = _mm_cvtsi128_si32(v_fx);
+ fy = _mm_cvtsi128_si32(v_fy);
+#elif defined(__ARM_NEON__)
+ const int16x8_t colorMask = vdupq_n_s16(0x00ff);
+ const int16x8_t invColorMask = vmvnq_s16(colorMask);
+ const int16x8_t v_256 = vdupq_n_s16(256);
+ int32x4_t v_fdx = vdupq_n_s32(fdx * 4);
+ int32x4_t v_fdy = vdupq_n_s32(fdy * 4);
+
+ const uchar *textureData = image.imageData;
+ const int bytesPerLine = image.bytesPerLine;
+
+ int32x4_t v_fx = vmovq_n_s32(fx);
+ int32x4_t v_fy = vmovq_n_s32(fy);
+ v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
+ v_fy = vsetq_lane_s32(fy + fdy, v_fy, 1);
+ v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
+ v_fy = vsetq_lane_s32(fy + fdy * 2, v_fy, 2);
+ v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
+ v_fy = vsetq_lane_s32(fy + fdy * 3, v_fy, 3);
+
+ const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
+ const int32x4_t v_round = vdupq_n_s32(0x0800);
+
+ while (b < boundedEnd - 3) {
+ uint32x4x2_t v_top, v_bot;
+
+ int x1 = (fx >> 16);
+ int y1 = (fy >> 16);
+ fx += fdx; fy += fdy;
+ const uchar *sl = textureData + bytesPerLine * y1;
+ const uint *s1 = reinterpret_cast<const uint *>(sl);
+ const uint *s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
+ x1 = (fx >> 16);
+ y1 = (fy >> 16);
+ fx += fdx; fy += fdy;
+ sl = textureData + bytesPerLine * y1;
+ s1 = reinterpret_cast<const uint *>(sl);
+ s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
+ x1 = (fx >> 16);
+ y1 = (fy >> 16);
+ fx += fdx; fy += fdy;
+ sl = textureData + bytesPerLine * y1;
+ s1 = reinterpret_cast<const uint *>(sl);
+ s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
+ x1 = (fx >> 16);
+ y1 = (fy >> 16);
+ fx += fdx; fy += fdy;
+ sl = textureData + bytesPerLine * y1;
+ s1 = reinterpret_cast<const uint *>(sl);
+ s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
+
+ int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), 12);
+ int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), 12);
+ v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
+ v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16));
+ int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), 4);
+
+ interpolate_4_pixels_16_neon(
+ vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
+ vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
+ vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty),
+ v_disty_, colorMask, invColorMask, v_256, b);
+ b += 4;
+ v_fx = vaddq_s32(v_fx, v_fdx);
+ v_fy = vaddq_s32(v_fy, v_fdy);
+ }
+#endif
+ while (b < boundedEnd) {
+ int x = (fx >> 16);
+ int y = (fy >> 16);
- const uchar *textureData = data->texture.imageData;
- const int bytesPerLine = data->texture.bytesPerLine;
- const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0));
+ const uint *s1 = (const uint *)image.scanLine(y);
+ const uint *s2 = (const uint *)image.scanLine(y + 1);
- while (b < boundedEnd) {
- const __m128i vy = _mm_packs_epi32(_mm_srli_epi32(v_fy, 16), _mm_setzero_si128());
- // 4x16bit * 4x16bit -> 4x32bit
- __m128i offset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epi16(vy, vbpl));
- offset = _mm_add_epi32(offset, _mm_srli_epi32(v_fx, 16));
- const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
- const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
- const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4);
- const int offset3 = _mm_cvtsi128_si32(offset);
- const uint *topData = (const uint *)(textureData);
- const __m128i tl = _mm_setr_epi32(topData[offset0], topData[offset1], topData[offset2], topData[offset3]);
- const __m128i tr = _mm_setr_epi32(topData[offset0 + 1], topData[offset1 + 1], topData[offset2 + 1], topData[offset3 + 1]);
- const uint *bottomData = (const uint *)(textureData + bytesPerLine);
- const __m128i bl = _mm_setr_epi32(bottomData[offset0], bottomData[offset1], bottomData[offset2], bottomData[offset3]);
- const __m128i br = _mm_setr_epi32(bottomData[offset0 + 1], bottomData[offset1 + 1], bottomData[offset2 + 1], bottomData[offset3 + 1]);
-
- __m128i v_distx = _mm_srli_epi16(v_fx, 8);
- __m128i v_disty = _mm_srli_epi16(v_fy, 8);
- v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fxy_r), 4);
- v_disty = _mm_srli_epi16(_mm_add_epi32(v_disty, v_fxy_r), 4);
- v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
- v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
- v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
- v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
-
- interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
- b += 4;
- v_fx = _mm_add_epi32(v_fx, v_fdx);
- v_fy = _mm_add_epi32(v_fy, v_fdy);
- }
- fx = _mm_cvtsi128_si32(v_fx);
- fy = _mm_cvtsi128_si32(v_fy);
-#elif defined(__ARM_NEON__)
- BILINEAR_ROTATE_BOUNDS_PROLOG
+#if defined(__SSE2__) || defined(__ARM_NEON__)
+ int distx = (fx & 0x0000ffff) >> 8;
+ int disty = (fy & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty);
+#else
+ uint tl = s1[x];
+ uint tr = s1[x + 1];
+ uint bl = s2[x];
+ uint br = s2[x + 1];
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
+ *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
+#endif
- const int16x8_t colorMask = vdupq_n_s16(0x00ff);
- const int16x8_t invColorMask = vmvnq_s16(colorMask);
- const int16x8_t v_256 = vdupq_n_s16(256);
- int32x4_t v_fdx = vdupq_n_s32(fdx * 4);
- int32x4_t v_fdy = vdupq_n_s32(fdy * 4);
+ fx += fdx;
+ fy += fdy;
+ ++b;
+ }
+ }
- const uchar *textureData = data->texture.imageData;
- const int bytesPerLine = data->texture.bytesPerLine;
+ while (b < end) {
+ int x1 = (fx >> 16);
+ int x2;
+ int y1 = (fy >> 16);
+ int y2;
- int32x4_t v_fx = vmovq_n_s32(fx);
- int32x4_t v_fy = vmovq_n_s32(fy);
- v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
- v_fy = vsetq_lane_s32(fy + fdy, v_fy, 1);
- v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
- v_fy = vsetq_lane_s32(fy + fdy * 2, v_fy, 2);
- v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
- v_fy = vsetq_lane_s32(fy + fdy * 3, v_fy, 3);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
- const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
- const int32x4_t v_round = vdupq_n_s32(0x0800);
+ const uint *s1 = (const uint *)image.scanLine(y1);
+ const uint *s2 = (const uint *)image.scanLine(y2);
- while (b < boundedEnd) {
- uint32x4x2_t v_top, v_bot;
+ uint tl = s1[x1];
+ uint tr = s1[x2];
+ uint bl = s2[x1];
+ uint br = s2[x2];
- int x1 = (fx >> 16);
- int y1 = (fy >> 16);
- fx += fdx; fy += fdy;
- const uchar *sl = textureData + bytesPerLine * y1;
- const uint *s1 = reinterpret_cast<const uint *>(sl);
- const uint *s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
- v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
- v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
- x1 = (fx >> 16);
- y1 = (fy >> 16);
- fx += fdx; fy += fdy;
- sl = textureData + bytesPerLine * y1;
- s1 = reinterpret_cast<const uint *>(sl);
- s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
- v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
- v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
- x1 = (fx >> 16);
- y1 = (fy >> 16);
- fx += fdx; fy += fdy;
- sl = textureData + bytesPerLine * y1;
- s1 = reinterpret_cast<const uint *>(sl);
- s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
- v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
- v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
- x1 = (fx >> 16);
- y1 = (fy >> 16);
- fx += fdx; fy += fdy;
- sl = textureData + bytesPerLine * y1;
- s1 = reinterpret_cast<const uint *>(sl);
- s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
- v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
- v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
-
- int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), 12);
- int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), 12);
- v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
- v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16));
- int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), 4);
-
- interpolate_4_pixels_16_neon(
- vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
- vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
- vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty),
- v_disty_, colorMask, invColorMask, v_256, b);
- b += 4;
- v_fx = vaddq_s32(v_fx, v_fdx);
- v_fy = vaddq_s32(v_fy, v_fdy);
- }
+#if defined(__SSE2__) || defined(__ARM_NEON__)
+ // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16.
+ int distx = (fx & 0x0000ffff) >> 8;
+ int disty = (fy & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
+#else
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
+ *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
#endif
- }
- while (b < end) {
- int x1 = (fx >> 16);
- int x2;
- int y1 = (fy >> 16);
- int y2;
+ fx += fdx;
+ fy += fdy;
+ ++b;
+ }
+}
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
- const uint *s1 = (const uint *)data->texture.scanLine(y1);
- const uint *s2 = (const uint *)data->texture.scanLine(y2);
+static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[2][NFastTransformTypes] = {
+ {
+ fetchTransformedBilinearARGB32PM_simple_upscale_helper<BlendTransformedBilinear>,
+ fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>,
+ fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>,
+ fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>,
+ fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear>
+ },
+ {
+ fetchTransformedBilinearARGB32PM_simple_upscale_helper<BlendTransformedBilinearTiled>,
+ fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>,
+ fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>,
+ fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>,
+ fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinearTiled>
+ }
+};
- uint tl = s1[x1];
- uint tr = s1[x2];
- uint bl = s2[x1];
- uint br = s2[x2];
+template<TextureBlendType blendType> /* blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled */
+static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, const Operator *,
+ const QSpanData *data, int y, int x,
+ int length)
+{
+ const qreal cx = x + qreal(0.5);
+ const qreal cy = y + qreal(0.5);
+ Q_CONSTEXPR int tiled = (blendType == BlendTransformedBilinearTiled) ? 1 : 0;
-#if defined(__SSE2__) || defined(__ARM_NEON__)
- // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16.
- int distx = (fx & 0x0000ffff) >> 8;
- int disty = (fy & 0x0000ffff) >> 8;
- *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
-#else
- int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
- int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
- *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
-#endif
+ uint *end = buffer + length;
+ uint *b = buffer;
+ if (data->fast_matrix) {
+ // The increment pr x in the scanline
+ int fdx = (int)(data->m11 * fixed_scale);
+ int fdy = (int)(data->m12 * fixed_scale);
- fx += fdx;
- fy += fdy;
- ++b;
- }
+ int fx = int((data->m21 * cy
+ + data->m11 * cx + data->dx) * fixed_scale);
+ int fy = int((data->m22 * cy
+ + data->m12 * cx + data->dy) * fixed_scale);
+
+ fx -= half_point;
+ fy -= half_point;
+
+ if (fdy == 0) { // simple scale, no rotation or shear
+ if (fdx <= fixed_scale && fdx > 0) {
+ // simple scale up on X without mirroring
+ bilinearFastTransformHelperARGB32PM[tiled][SimpleUpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
+ } else if ((fdx < 0 && fdx > -(fixed_scale / 8)) || qAbs(data->m22) < qreal(1./8.)) {
+ // scale up more than 8x (on either Y or on X mirrored)
+ bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
+ } else {
+ // scale down on X (or up on X mirrored less than 8x)
+ bilinearFastTransformHelperARGB32PM[tiled][DownscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
+ }
+ } else { // rotation or shear
+ if (qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.) ) {
+ // if we are zooming more than 8 times, we use 8bit precision for the position.
+ bilinearFastTransformHelperARGB32PM[tiled][RotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
+ } else {
+ // we are zooming less than 8x, use 4bit precision
+ bilinearFastTransformHelperARGB32PM[tiled][FastRotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
}
}
} else {
+ const QTextureData &image = data->texture;
+
const qreal fdx = data->m11;
const qreal fdy = data->m12;
const qreal fdw = data->m13;
@@ -2495,8 +2629,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
int distx = int((px - x1) * 256);
int disty = int((py - y1) * 256);
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
const uint *s1 = (const uint *)data->texture.scanLine(y1);
const uint *s2 = (const uint *)data->texture.scanLine(y2);
@@ -2678,7 +2812,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0);
layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0);
- if ((fdx < 0 && fdx > -(fixed_scale / 8)) || std::abs(data->m22) < (1./8.)) { // scale up more than 8x
+ if ((fdx < 0 && fdx > -(fixed_scale / 8)) || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x
int disty = (fy & 0x0000ffff) >> 8;
for (int i = 0; i < len; ++i) {
int distx = (fracX & 0x0000ffff) >> 8;
@@ -2730,7 +2864,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0);
layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0);
- if (std::abs(data->m11) < (1./8.) || std::abs(data->m22) < (1./8.)) {
+ if (qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.) ) {
//if we are zooming more than 8 times, we use 8bit precision for the position.
for (int i = 0; i < len; ++i) {
int distx = (fracX & 0x0000ffff) >> 8;
@@ -5195,6 +5329,8 @@ void qBlendTexture(int count, const QSpan *spans, void *userData)
case QImage::Format_RGB16:
proc = processTextureSpansRGB16[blendType];
break;
+ case QImage::Format_ARGB32:
+ case QImage::Format_RGBA8888:
case QImage::Format_BGR30:
case QImage::Format_A2BGR30_Premultiplied:
case QImage::Format_RGB30:
@@ -5411,7 +5547,7 @@ static void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer,
int x, int y, const QRgba64 &color,
const uchar *map,
int mapWidth, int mapHeight, int mapStride,
- const QClipData *)
+ const QClipData *, bool /*useGammaCorrection*/)
{
const quint16 c = color.toRgb16();
quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
@@ -5436,105 +5572,43 @@ static void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer,
}
}
-static inline void rgbBlendPixel(quint32 *dst, int coverage, int sr, int sg, int sb, const uchar *gamma, const uchar *invgamma)
+static inline void rgbBlendPixel(quint32 *dst, int coverage, QRgba64 slinear, const QColorProfile *colorProfile, bool useGammaCorrection)
{
- // Do a gray alphablend...
- int da = qAlpha(*dst);
- int dr = qRed(*dst);
- int dg = qGreen(*dst);
- int db = qBlue(*dst);
-
- if (da != 255
- ) {
-
- int a = qGray(coverage);
- sr = qt_div_255(invgamma[sr] * a);
- sg = qt_div_255(invgamma[sg] * a);
- sb = qt_div_255(invgamma[sb] * a);
-
- int ia = 255 - a;
- dr = qt_div_255(dr * ia);
- dg = qt_div_255(dg * ia);
- db = qt_div_255(db * ia);
-
- *dst = ((a + qt_div_255((255 - a) * da)) << 24)
- | ((sr + dr) << 16)
- | ((sg + dg) << 8)
- | ((sb + db));
- return;
- }
-
- int mr = qRed(coverage);
- int mg = qGreen(coverage);
- int mb = qBlue(coverage);
-
- dr = gamma[dr];
- dg = gamma[dg];
- db = gamma[db];
-
- int nr = qt_div_255(sr * mr + dr * (255 - mr));
- int ng = qt_div_255(sg * mg + dg * (255 - mg));
- int nb = qt_div_255(sb * mb + db * (255 - mb));
+ // Do a gammacorrected RGB alphablend...
+ const QRgba64 dlinear = useGammaCorrection ? colorProfile->toLinear64(*dst) : QRgba64::fromArgb32(*dst);
- nr = invgamma[nr];
- ng = invgamma[ng];
- nb = invgamma[nb];
+ QRgba64 blend = rgbBlend(dlinear, slinear, coverage);
- *dst = qRgb(nr, ng, nb);
+ *dst = useGammaCorrection ? colorProfile->fromLinear64(blend) : toArgb32(blend);
}
-#if defined(Q_OS_WIN)
-Q_GUI_EXPORT bool qt_needs_a8_gamma_correction = false;
-
-static inline void grayBlendPixel(quint32 *dst, int coverage, int sr, int sg, int sb, const uint *gamma, const uchar *invgamma)
+static inline void grayBlendPixel(quint32 *dst, int coverage, QRgba64 slinear, const QColorProfile *colorProfile)
{
// Do a gammacorrected gray alphablend...
- int dr = qRed(*dst);
- int dg = qGreen(*dst);
- int db = qBlue(*dst);
-
- dr = gamma[dr];
- dg = gamma[dg];
- db = gamma[db];
-
- int alpha = coverage;
- int ialpha = 255 - alpha;
- int nr = qt_div_255(sr * alpha + dr * ialpha);
- int ng = qt_div_255(sg * alpha + dg * ialpha);
- int nb = qt_div_255(sb * alpha + db * ialpha);
+ const QRgba64 dlinear = colorProfile->toLinear64(*dst);
- nr = invgamma[nr];
- ng = invgamma[ng];
- nb = invgamma[nb];
+ QRgba64 blend = interpolate255(slinear, coverage, dlinear, 255 - coverage);
- *dst = qRgb(nr, ng, nb);
+ *dst = colorProfile->fromLinear64(blend);
}
-#endif
static void qt_alphamapblit_uint32(QRasterBuffer *rasterBuffer,
int x, int y, quint32 color,
const uchar *map,
int mapWidth, int mapHeight, int mapStride,
- const QClipData *clip)
+ const QClipData *clip, bool useGammaCorrection)
{
const quint32 c = color;
const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32);
-#if defined(Q_OS_WIN)
- const QDrawHelperGammaTables *tables = QGuiApplicationPrivate::instance()->gammaTables();
- if (!tables)
+ const QColorProfile *colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
+ if (!colorProfile)
return;
- const uint *gamma = tables->qt_pow_gamma;
- const uchar *invgamma = tables->qt_pow_invgamma;
-
- int sr = gamma[qRed(color)];
- int sg = gamma[qGreen(color)];
- int sb = gamma[qBlue(color)];
+ const QRgba64 slinear = colorProfile->toLinear64(c);
bool opaque_src = (qAlpha(color) == 255);
- bool doGrayBlendPixel = opaque_src && qt_needs_a8_gamma_correction;
-#endif
+ bool doGrayBlendPixel = opaque_src && useGammaCorrection;
if (!clip) {
quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
@@ -5547,13 +5621,9 @@ static void qt_alphamapblit_uint32(QRasterBuffer *rasterBuffer,
} else if (coverage == 255) {
dest[i] = c;
} else {
-#if defined(Q_OS_WIN)
- if (QSysInfo::WindowsVersion >= QSysInfo::WV_XP && doGrayBlendPixel
- && qAlpha(dest[i]) == 255) {
- grayBlendPixel(dest+i, coverage, sr, sg, sb, gamma, invgamma);
- } else
-#endif
- {
+ if (doGrayBlendPixel && qAlpha(dest[i]) == 255) {
+ grayBlendPixel(dest+i, coverage, slinear, colorProfile);
+ } else {
int ialpha = 255 - coverage;
dest[i] = INTERPOLATE_PIXEL_255(c, coverage, dest[i], ialpha);
}
@@ -5588,13 +5658,9 @@ static void qt_alphamapblit_uint32(QRasterBuffer *rasterBuffer,
} else if (coverage == 255) {
dest[xp] = c;
} else {
-#if defined(Q_OS_WIN)
- if (QSysInfo::WindowsVersion >= QSysInfo::WV_XP && doGrayBlendPixel
- && qAlpha(dest[xp]) == 255) {
- grayBlendPixel(dest+xp, coverage, sr, sg, sb, gamma, invgamma);
- } else
-#endif
- {
+ if (doGrayBlendPixel && qAlpha(dest[xp]) == 255) {
+ grayBlendPixel(dest+xp, coverage, slinear, colorProfile);
+ } else {
int ialpha = 255 - coverage;
dest[xp] = INTERPOLATE_PIXEL_255(c, coverage, dest[xp], ialpha);
}
@@ -5612,9 +5678,9 @@ static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer,
int x, int y, const QRgba64 &color,
const uchar *map,
int mapWidth, int mapHeight, int mapStride,
- const QClipData *clip)
+ const QClipData *clip, bool useGammaCorrection)
{
- qt_alphamapblit_uint32(rasterBuffer, x, y, color.toArgb32(), map, mapWidth, mapHeight, mapStride, clip);
+ qt_alphamapblit_uint32(rasterBuffer, x, y, color.toArgb32(), map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection);
}
#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
@@ -5622,34 +5688,31 @@ static void qt_alphamapblit_rgba8888(QRasterBuffer *rasterBuffer,
int x, int y, const QRgba64 &color,
const uchar *map,
int mapWidth, int mapHeight, int mapStride,
- const QClipData *clip)
+ const QClipData *clip, bool useGammaCorrection)
{
- qt_alphamapblit_uint32(rasterBuffer, x, y, ARGB2RGBA(color.toArgb32()), map, mapWidth, mapHeight, mapStride, clip);
+ qt_alphamapblit_uint32(rasterBuffer, x, y, ARGB2RGBA(color.toArgb32()), map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection);
}
#endif
+inline static int qRgbAvg(QRgb rgb)
+{
+ return (qRed(rgb) * 5 + qGreen(rgb) * 6 + qBlue(rgb) * 5) / 16;
+}
+
static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer,
int x, int y, const QRgba64 &color,
const uint *src, int mapWidth, int mapHeight, int srcStride,
- const QClipData *clip)
+ const QClipData *clip, bool useGammaCorrection)
{
const quint32 c = color.toArgb32();
- int sr = qRed(c);
- int sg = qGreen(c);
- int sb = qBlue(c);
int sa = qAlpha(c);
- const QDrawHelperGammaTables *tables = QGuiApplicationPrivate::instance()->gammaTables();
- if (!tables)
+ const QColorProfile *colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
+ if (!colorProfile)
return;
- const uchar *gamma = tables->qt_pow_rgb_gamma;
- const uchar *invgamma = tables->qt_pow_rgb_invgamma;
-
- sr = gamma[sr];
- sg = gamma[sg];
- sb = gamma[sb];
+ const QRgba64 slinear = useGammaCorrection ? colorProfile->toLinear64(c) : color;
if (sa == 0)
return;
@@ -5663,7 +5726,13 @@ static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer,
if (coverage == 0xffffffff) {
dst[i] = c;
} else if (coverage != 0xff000000) {
- rgbBlendPixel(dst+i, coverage, sr, sg, sb, gamma, invgamma);
+ if (dst[i] >= 0xff000000) {
+ rgbBlendPixel(dst+i, coverage, slinear, colorProfile, useGammaCorrection);
+ } else {
+ // Give up and do a naive blend.
+ const int a = qRgbAvg(coverage);
+ dst[i] = INTERPOLATE_PIXEL_255(c, a, dst[i], 255 - a);
+ }
}
}
@@ -5693,7 +5762,13 @@ static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer,
if (coverage == 0xffffffff) {
dst[xp] = c;
} else if (coverage != 0xff000000) {
- rgbBlendPixel(dst+xp, coverage, sr, sg, sb, gamma, invgamma);
+ if (dst[xp] >= 0xff000000) {
+ rgbBlendPixel(dst+xp, coverage, slinear, colorProfile, useGammaCorrection);
+ } else {
+ // Give up and do a naive blend.
+ const int a = qRgbAvg(coverage);
+ dst[xp] = INTERPOLATE_PIXEL_255(c, a, dst[xp], 255 - coverage);
+ }
}
}
} // for (i -> line.count)
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index a833520b00..cdb374f823 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -539,7 +539,7 @@ void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer,
int x, int y, const QRgba64 &color,
const uchar *bitmap,
int mapWidth, int mapHeight, int mapStride,
- const QClipData *)
+ const QClipData *, bool /*useGammaCorrection*/)
{
quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16);
diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h
index 3cf949fc32..40475a9bde 100644
--- a/src/gui/painting/qdrawhelper_neon_p.h
+++ b/src/gui/painting/qdrawhelper_neon_p.h
@@ -91,7 +91,7 @@ void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer,
int x, int y, const QRgba64 &color,
const uchar *bitmap,
int mapWidth, int mapHeight, int mapStride,
- const QClipData *clip);
+ const QClipData *clip, bool /*useGammaCorrection*/);
void qt_scale_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl, int srch,
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 0e46962784..cf2213042d 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -113,13 +113,13 @@ typedef void (*AlphamapBlitFunc)(QRasterBuffer *rasterBuffer,
int x, int y, const QRgba64 &color,
const uchar *bitmap,
int mapWidth, int mapHeight, int mapStride,
- const QClipData *clip);
+ const QClipData *clip, bool useGammaCorrection);
typedef void (*AlphaRGBBlitFunc)(QRasterBuffer *rasterBuffer,
int x, int y, const QRgba64 &color,
const uint *rgbmask,
int mapWidth, int mapHeight, int mapStride,
- const QClipData *clip);
+ const QClipData *clip, bool useGammaCorrection);
typedef void (*RectFillFunc)(QRasterBuffer *rasterBuffer,
int x, int y, int width, int height,
@@ -159,7 +159,6 @@ struct DrawHelper {
extern SrcOverBlendFunc qBlendFunctions[QImage::NImageFormats][QImage::NImageFormats];
extern SrcOverScaleFunc qScaleFunctions[QImage::NImageFormats][QImage::NImageFormats];
extern SrcOverTransformFunc qTransformFunctions[QImage::NImageFormats][QImage::NImageFormats];
-extern MemRotateFunc qMemRotateFunctions[QImage::NImageFormats][3];
extern DrawHelper qDrawHelper[QImage::NImageFormats];
@@ -351,18 +350,6 @@ struct QSpanData
void adjustSpanMethods();
};
-struct QDrawHelperGammaTables
-{
- explicit QDrawHelperGammaTables(qreal smoothing);
-
- void refresh(qreal smoothing);
-
- uchar qt_pow_rgb_gamma[256];
- uchar qt_pow_rgb_invgamma[256];
- uint qt_pow_gamma[256];
- uchar qt_pow_invgamma[2048];
-};
-
static inline uint qt_gradient_clamp(const QGradientData *data, int ipos)
{
if (ipos < 0 || ipos >= GRADIENT_STOPTABLE_SIZE) {
@@ -1244,6 +1231,7 @@ extern QPixelLayout qPixelLayouts[QImage::NImageFormats];
extern const FetchPixelsFunc qFetchPixels[QPixelLayout::BPPCount];
extern StorePixelsFunc qStorePixels[QPixelLayout::BPPCount];
+extern MemRotateFunc qMemRotateFunctions[QPixelLayout::BPPCount][3];
QT_END_NAMESPACE
diff --git a/src/gui/painting/qmemrotate.cpp b/src/gui/painting/qmemrotate.cpp
index 3fbae76de5..25aa6a3122 100644
--- a/src/gui/painting/qmemrotate.cpp
+++ b/src/gui/painting/qmemrotate.cpp
@@ -41,164 +41,10 @@
QT_BEGIN_NAMESPACE
-#if QT_ROTATION_ALGORITHM == QT_ROTATION_TILED
static const int tileSize = 32;
-#endif
-
-#if Q_BYTE_ORDER == Q_BIG_ENDIAN
-#if QT_ROTATION_ALGORITHM == QT_ROTATION_PACKED || QT_ROTATION_ALGORITHM == QT_ROTATION_TILED
-#error Big endian version not implemented for the transformed driver!
-#endif
-#endif
-
-template <class T>
-Q_STATIC_TEMPLATE_FUNCTION
-inline void qt_memrotate90_cachedRead(const T *src, int w, int h, int sstride, T *dest,
- int dstride)
-{
- const char *s = reinterpret_cast<const char*>(src);
- char *d = reinterpret_cast<char*>(dest);
- for (int y = 0; y < h; ++y) {
- for (int x = w - 1; x >= 0; --x) {
- T *destline = reinterpret_cast<T *>(d + (w - x - 1) * dstride);
- destline[y] = src[x];
- }
- s += sstride;
- src = reinterpret_cast<const T*>(s);
- }
-}
template <class T>
Q_STATIC_TEMPLATE_FUNCTION
-inline void qt_memrotate270_cachedRead(const T *src, int w, int h, int sstride, T *dest,
- int dstride)
-{
- const char *s = reinterpret_cast<const char*>(src);
- char *d = reinterpret_cast<char*>(dest);
- s += (h - 1) * sstride;
- for (int y = h - 1; y >= 0; --y) {
- src = reinterpret_cast<const T*>(s);
- for (int x = 0; x < w; ++x) {
- T *destline = reinterpret_cast<T *>(d + x * dstride);
- destline[h - y - 1] = src[x];
- }
- s -= sstride;
- }
-}
-
-#if QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDWRITE
-
-template <class T>
-Q_STATIC_TEMPLATE_FUNCTION
-inline void qt_memrotate90_cachedWrite(const T *src, int w, int h, int sstride, T *dest,
- int dstride)
-{
- for (int x = w - 1; x >= 0; --x) {
- T *d = dest + (w - x - 1) * dstride;
- for (int y = 0; y < h; ++y) {
- *d++ = src[y * sstride + x];
- }
- }
-
-}
-
-template <class T>
-Q_STATIC_TEMPLATE_FUNCTION
-inline void qt_memrotate270_cachedWrite(const T *src, int w, int h, int sstride, T *dest,
- int dstride)
-{
- for (int x = 0; x < w; ++x) {
- T *d = dest + x * dstride;
- for (int y = h - 1; y >= 0; --y) {
- *d++ = src[y * sstride + x];
- }
- }
-}
-
-#endif // QT_ROTATION_CACHEDWRITE
-
-#if QT_ROTATION_ALGORITHM == QT_ROTATION_PACKING
-
-// TODO: packing algorithms should probably be modified on 64-bit architectures
-
-template <class T>
-Q_STATIC_TEMPLATE_FUNCTION
-inline void qt_memrotate90_packing(const T *src, int w, int h, int sstride, T *dest, int dstride)
-{
- sstride /= sizeof(T);
- dstride /= sizeof(T);
-
- const int pack = sizeof(quint32) / sizeof(T);
- const int unaligned = int((long(dest) & (sizeof(quint32)-1))) / sizeof(T);
-
- for (int x = w - 1; x >= 0; --x) {
- int y = 0;
-
- for (int i = 0; i < unaligned; ++i) {
- dest[(w - x - 1) * dstride + y] = src[y * sstride + x];
- ++y;
- }
-
- quint32 *d = reinterpret_cast<quint32*>(dest + (w - x - 1) * dstride
- + unaligned);
- const int rest = (h - unaligned) % pack;
- while (y < h - rest) {
- quint32 c = src[y * sstride + x];
- for (int i = 1; i < pack; ++i) {
- c |= src[(y + i) * sstride + x] << (sizeof(int) * 8 / pack * i);
- }
- *d++ = c;
- y += pack;
- }
-
- while (y < h) {
- dest[(w - x - 1) * dstride + y] = src[y * sstride + x];
- ++y;
- }
- }
-}
-
-template <class T>
-Q_STATIC_TEMPLATE_FUNCTION
-inline void qt_memrotate270_packing(const T *src, int w, int h, int sstride, T *dest, int dstride)
-{
- sstride /= sizeof(T);
- dstride /= sizeof(T);
-
- const int pack = sizeof(quint32) / sizeof(T);
- const int unaligned = int((long(dest) & (sizeof(quint32)-1))) / sizeof(T);
-
- for (int x = 0; x < w; ++x) {
- int y = h - 1;
-
- for (int i = 0; i < unaligned; ++i) {
- dest[x * dstride + h - y - 1] = src[y * sstride + x];
- --y;
- }
-
- quint32 *d = reinterpret_cast<quint32*>(dest + x * dstride
- + unaligned);
- const int rest = (h - unaligned) % pack;
- while (y > rest) {
- quint32 c = src[y * sstride + x];
- for (int i = 1; i < pack; ++i) {
- c |= src[(y - i) * sstride + x] << (sizeof(int) * 8 / pack * i);
- }
- *d++ = c;
- y -= pack;
- }
- while (y >= 0) {
- dest[x * dstride + h - y - 1] = src[y * sstride + x];
- --y;
- }
- }
-}
-
-#endif // QT_ROTATION_PACKING
-
-#if QT_ROTATION_ALGORITHM == QT_ROTATION_TILED
-template <class T>
-Q_STATIC_TEMPLATE_FUNCTION
inline void qt_memrotate90_tiled(const T *src, int w, int h, int sstride, T *dest, int dstride)
{
sstride /= sizeof(T);
@@ -235,7 +81,7 @@ inline void qt_memrotate90_tiled(const T *src, int w, int h, int sstride, T *des
for (int y = starty; y < stopy; y += pack) {
quint32 c = src[y * sstride + x];
for (int i = 1; i < pack; ++i) {
- const int shift = (sizeof(int) * 8 / pack * i);
+ const int shift = (sizeof(T) * 8 * i);
const T color = src[(y + i) * sstride + x];
c |= color << shift;
}
@@ -293,7 +139,7 @@ inline void qt_memrotate270_tiled(const T *src, int w, int h, int sstride, T *de
const int pack = sizeof(quint32) / sizeof(T);
const int unaligned =
- qMin(uint((long(dest) & (sizeof(quint32)-1)) / sizeof(T)), uint(h));
+ qMin(uint((quintptr(dest) & (sizeof(quint32)-1)) / sizeof(T)), uint(h));
const int restX = w % tileSize;
const int restY = (h - unaligned) % tileSize;
const int unoptimizedY = restY % pack;
@@ -320,10 +166,10 @@ inline void qt_memrotate270_tiled(const T *src, int w, int h, int sstride, T *de
for (int x = startx; x < stopx; ++x) {
quint32 *d = reinterpret_cast<quint32*>(dest + x * dstride
+ h - 1 - starty);
- for (int y = starty; y > stopy; y -= pack) {
+ for (int y = starty; y >= stopy; y -= pack) {
quint32 c = src[y * sstride + x];
for (int i = 1; i < pack; ++i) {
- const int shift = (sizeof(int) * 8 / pack * i);
+ const int shift = (sizeof(T) * 8 * i);
const T color = src[(y - i) * sstride + x];
c |= color << shift;
}
@@ -371,22 +217,26 @@ inline void qt_memrotate270_tiled_unpacked(const T *src, int w, int h, int sstri
}
}
-#endif // QT_ROTATION_ALGORITHM
template <class T>
Q_STATIC_TEMPLATE_FUNCTION
inline void qt_memrotate90_template(const T *src, int srcWidth, int srcHeight, int srcStride,
T *dest, int dstStride)
{
-#if QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDREAD
- qt_memrotate90_cachedRead<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
-#elif QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDWRITE
- qt_memrotate90_cachedWrite<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
-#elif QT_ROTATION_ALGORITHM == QT_ROTATION_PACKING
- qt_memrotate90_packing<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
-#elif QT_ROTATION_ALGORITHM == QT_ROTATION_TILED
- qt_memrotate90_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
+#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+ // packed algorithm assumes little endian and that sizeof(quint32)/sizeof(T) is an integer
+ if (sizeof(quint32) % sizeof(T) == 0)
+ qt_memrotate90_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
+ else
#endif
+ qt_memrotate90_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
+}
+
+template <>
+inline void qt_memrotate90_template<quint32>(const quint32 *src, int w, int h, int sstride, quint32 *dest, int dstride)
+{
+ // packed algorithm doesn't have any benefit for quint32
+ qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride);
}
template <class T>
@@ -394,11 +244,11 @@ Q_STATIC_TEMPLATE_FUNCTION
inline void qt_memrotate180_template(const T *src, int w, int h, int sstride, T *dest, int dstride)
{
const char *s = (const char*)(src) + (h - 1) * sstride;
- for (int y = h - 1; y >= 0; --y) {
- T *d = reinterpret_cast<T*>((char *)(dest) + (h - y - 1) * dstride);
+ for (int dy = 0; dy < h; ++dy) {
+ T *d = reinterpret_cast<T*>((char *)(dest) + dy * dstride);
src = reinterpret_cast<const T*>(s);
- for (int x = w - 1; x >= 0; --x) {
- d[w - x - 1] = src[x];
+ for (int dx = 0; dx < w; ++dx) {
+ d[dx] = src[w - 1 - dx];
}
s -= sstride;
}
@@ -409,32 +259,20 @@ Q_STATIC_TEMPLATE_FUNCTION
inline void qt_memrotate270_template(const T *src, int srcWidth, int srcHeight, int srcStride,
T *dest, int dstStride)
{
-#if QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDREAD
- qt_memrotate270_cachedRead<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
-#elif QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDWRITE
- qt_memrotate270_cachedWrite<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
-#elif QT_ROTATION_ALGORITHM == QT_ROTATION_PACKING
- qt_memrotate270_packing<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
-#elif QT_ROTATION_ALGORITHM == QT_ROTATION_TILED
- qt_memrotate270_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
+#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+ // packed algorithm assumes little endian and that sizeof(quint32)/sizeof(T) is an integer
+ if (sizeof(quint32) % sizeof(T) == 0)
+ qt_memrotate270_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
+ else
#endif
+ qt_memrotate270_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
}
template <>
-inline void qt_memrotate90_template<quint24>(const quint24 *src, int srcWidth, int srcHeight,
- int srcStride, quint24 *dest, int dstStride)
+inline void qt_memrotate270_template<quint32>(const quint32 *src, int w, int h, int sstride, quint32 *dest, int dstride)
{
-#if QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDREAD
- qt_memrotate90_cachedRead<quint24>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
-#elif QT_ROTATION_ALGORITHM == QT_ROTATION_CACHEDWRITE
- qt_memrotate90_cachedWrite<quint24>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
-#elif QT_ROTATION_ALGORITHM == QT_ROTATION_PACKING
- // packed algorithm not implemented
- qt_memrotate90_cachedRead<quint24>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
-#elif QT_ROTATION_ALGORITHM == QT_ROTATION_TILED
- // packed algorithm not implemented
- qt_memrotate90_tiled_unpacked<quint24>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
-#endif
+ // packed algorithm doesn't have any benefit for quint32
+ qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride);
}
#define QT_IMPL_MEMROTATE(type) \
@@ -458,7 +296,7 @@ Q_GUI_EXPORT void qt_memrotate270(const type *src, int w, int h, int sstride, \
Q_GUI_EXPORT void qt_memrotate90(const type *src, int w, int h, int sstride, \
type *dest, int dstride) \
{ \
- qt_memrotate90_tiled_unpacked<type>(src, w, h, sstride, dest, dstride); \
+ qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride); \
} \
Q_GUI_EXPORT void qt_memrotate180(const type *src, int w, int h, int sstride, \
type *dest, int dstride) \
@@ -468,7 +306,7 @@ Q_GUI_EXPORT void qt_memrotate180(const type *src, int w, int h, int sstride, \
Q_GUI_EXPORT void qt_memrotate270(const type *src, int w, int h, int sstride, \
type *dest, int dstride) \
{ \
- qt_memrotate270_tiled_unpacked<type>(src, w, h, sstride, dest, dstride); \
+ qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride); \
}
@@ -509,6 +347,21 @@ void qt_memrotate270_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *d
qt_memrotate270((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl);
}
+void qt_memrotate90_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
+{
+ qt_memrotate90((const quint24 *)srcPixels, w, h, sbpl, (quint24 *)destPixels, dbpl);
+}
+
+void qt_memrotate180_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
+{
+ qt_memrotate180((const quint24 *)srcPixels, w, h, sbpl, (quint24 *)destPixels, dbpl);
+}
+
+void qt_memrotate270_24(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
+{
+ qt_memrotate270((const quint24 *)srcPixels, w, h, sbpl, (quint24 *)destPixels, dbpl);
+}
+
void qt_memrotate90_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate90((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl);
@@ -524,34 +377,16 @@ void qt_memrotate270_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *d
qt_memrotate270((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl);
}
-MemRotateFunc qMemRotateFunctions[QImage::NImageFormats][3] =
+MemRotateFunc qMemRotateFunctions[QPixelLayout::BPPCount][3] =
// 90, 180, 270
{
- { 0, 0, 0 }, // Format_Invalid,
- { 0, 0, 0 }, // Format_Mono,
- { 0, 0, 0 }, // Format_MonoLSB,
- { 0, 0, 0 }, // Format_Indexed8,
- { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGB32,
- { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_ARGB32,
- { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_ARGB32_Premultiplied,
- { qt_memrotate90_16, qt_memrotate180_16, qt_memrotate270_16 }, // Format_RGB16,
- { 0, 0, 0 }, // Format_ARGB8565_Premultiplied,
- { 0, 0, 0 }, // Format_RGB666,
- { 0, 0, 0 }, // Format_ARGB6666_Premultiplied,
- { 0, 0, 0 }, // Format_RGB555,
- { 0, 0, 0 }, // Format_ARGB8555_Premultiplied,
- { 0, 0, 0 }, // Format_RGB888,
- { 0, 0, 0 }, // Format_RGB444,
- { 0, 0, 0 }, // Format_ARGB4444_Premultiplied,
- { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGBX8888,
- { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGBA8888,
- { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGBA8888_Premultiplied,
- { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_BGB30,
- { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_A2BGR30_Premultiplied,
- { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGB30,
- { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_A2RGB30_Premultiplied,
- { qt_memrotate90_8, qt_memrotate180_8, qt_memrotate270_8 }, // Format_Alpha8,
- { qt_memrotate90_8, qt_memrotate180_8, qt_memrotate270_8 }, // Format_Grayscale8,
+ { 0, 0, 0 }, // BPPNone,
+ { 0, 0, 0 }, // BPP1MSB,
+ { 0, 0, 0 }, // BPP1LSB,
+ { qt_memrotate90_8, qt_memrotate180_8, qt_memrotate270_8 }, // BPP8,
+ { qt_memrotate90_16, qt_memrotate180_16, qt_memrotate270_16 }, // BPP16,
+ { qt_memrotate90_24, qt_memrotate180_24, qt_memrotate270_24 }, // BPP24
+ { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // BPP32
};
QT_END_NAMESPACE
diff --git a/src/gui/painting/qmemrotate_p.h b/src/gui/painting/qmemrotate_p.h
index 62613d301a..9bc3fd1010 100644
--- a/src/gui/painting/qmemrotate_p.h
+++ b/src/gui/painting/qmemrotate_p.h
@@ -56,19 +56,6 @@
QT_BEGIN_NAMESPACE
-#define QT_ROTATION_CACHEDREAD 1
-#define QT_ROTATION_CACHEDWRITE 2
-#define QT_ROTATION_PACKING 3
-#define QT_ROTATION_TILED 4
-
-#ifndef QT_ROTATION_ALGORITHM
-#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
-#define QT_ROTATION_ALGORITHM QT_ROTATION_TILED
-#else
-#define QT_ROTATION_ALGORITHM QT_ROTATION_CACHEDREAD
-#endif
-#endif
-
#define QT_DECL_MEMROTATE(type) \
void Q_GUI_EXPORT qt_memrotate90(const type*, int, int, int, type*, int); \
void Q_GUI_EXPORT qt_memrotate180(const type*, int, int, int, type*, int); \
diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp
index 6d5eaf5aed..fc4f2a9944 100644
--- a/src/gui/painting/qpaintengine_raster.cpp
+++ b/src/gui/painting/qpaintengine_raster.cpp
@@ -272,6 +272,35 @@ static void qt_debug_path(const QPainterPath &path)
}
#endif
+// QRect::normalized() will change the width/height of the rectangle due to
+// its incusive-integer definition of left/right vs width. This is not
+// something we want to change in QRect as that would potentially introduce
+// regressions all over the place, so we implement a straightforward
+// normalized here. QRectF already does this, so QRectF::normalized() is ok to
+// use.
+static QRect qrect_normalized(const QRect &rect)
+{
+ int x, y, w, h;
+ if (Q_UNLIKELY(rect.width() < 0)) {
+ x = rect.x() + rect.width();
+ w = -rect.width();
+ } else {
+ x = rect.x();
+ w = rect.width();
+ }
+
+ if (Q_UNLIKELY(rect.height() < 0)) {
+ y = rect.y() + rect.height();
+ h = -rect.height();
+ } else {
+ y = rect.y();
+ h = rect.height();
+ }
+
+ return QRect(x, y, w, h);
+}
+
+
QRasterPaintEnginePrivate::QRasterPaintEnginePrivate() :
QPaintEngineExPrivate(),
cachedLines(0)
@@ -1236,7 +1265,9 @@ void QRasterPaintEngine::clip(const QRect &rect, Qt::ClipOperation op)
bool QRasterPaintEngine::setClipRectInDeviceCoords(const QRect &r, Qt::ClipOperation op)
{
Q_D(QRasterPaintEngine);
- QRect clipRect = r & d->deviceRect;
+ // normalize before using the & operator which uses QRect::normalize()
+ // internally which will give us the wrong values.
+ QRect clipRect = qrect_normalized(r) & d->deviceRect;
QRasterPaintEngineState *s = state();
if (op == Qt::ReplaceClip || s->clip == 0) {
@@ -1471,7 +1502,7 @@ void QRasterPaintEngine::drawRects(const QRect *rects, int rectCount)
int offset_x = int(s->matrix.dx());
int offset_y = int(s->matrix.dy());
while (r < lastRect) {
- QRect rect = r->normalized();
+ QRect rect = qrect_normalized(*r);
QRect rr = rect.translated(offset_x, offset_y);
fillRect_normalized(rr, &s->brushData, d);
++r;
@@ -2266,8 +2297,9 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe
&& d->rasterBuffer->compositionMode == QPainter::CompositionMode_Source)))
{
RotationType rotationType = qRotationType(s->matrix);
+ const QPixelLayout::BPP plBpp = qPixelLayouts[d->rasterBuffer->format].bpp;
- if (rotationType != NoRotation && qMemRotateFunctions[d->rasterBuffer->format][rotationType] && img.rect().contains(sr.toAlignedRect())) {
+ if (rotationType != NoRotation && qMemRotateFunctions[plBpp][rotationType] && img.rect().contains(sr.toAlignedRect())) {
QRectF transformedTargetRect = s->matrix.mapRect(r);
if ((!(s->renderHints & QPainter::SmoothPixmapTransform) && !(s->renderHints & QPainter::Antialiasing))
@@ -2297,7 +2329,7 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe
uint cw = clippedSourceRect.width();
uint ch = clippedSourceRect.height();
- qMemRotateFunctions[d->rasterBuffer->format][rotationType](srcBase, cw, ch, sbpl, dstBase, dbpl);
+ qMemRotateFunctions[plBpp][rotationType](srcBase, cw, ch, sbpl, dstBase, dbpl);
return;
}
@@ -2500,7 +2532,7 @@ void QRasterPaintEngine::drawTiledPixmap(const QRectF &r, const QPixmap &pixmap,
QRectF rr = r;
rr.translate(s->matrix.dx(), s->matrix.dy());
- fillRect_normalized(rr.toRect().normalized(), &d->image_filler, d);
+ fillRect_normalized(rr.normalized().toRect(), &d->image_filler, d);
}
}
@@ -2523,7 +2555,7 @@ QRasterBuffer *QRasterPaintEngine::rasterBuffer()
/*!
\internal
*/
-void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx,int ry,int w,int h)
+void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx,int ry,int w,int h, bool useGammaCorrection)
{
Q_D(QRasterPaintEngine);
QRasterPaintEngineState *s = state();
@@ -2578,14 +2610,14 @@ void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx
} else if (depth == 8) {
if (s->penData.alphamapBlit) {
s->penData.alphamapBlit(rb, rx, ry, s->penData.solid.color,
- scanline, w, h, bpl, 0);
+ scanline, w, h, bpl, 0, useGammaCorrection);
return;
}
} else if (depth == 32) {
// (A)RGB Alpha mask where the alpha component is not used.
if (s->penData.alphaRGBBlit) {
s->penData.alphaRGBBlit(rb, rx, ry, s->penData.solid.color,
- (const uint *) scanline, w, h, bpl / 4, 0);
+ (const uint *) scanline, w, h, bpl / 4, 0, useGammaCorrection);
return;
}
}
@@ -2614,10 +2646,10 @@ void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx
}
if (depth == 8)
s->penData.alphamapBlit(rb, rx, ry, s->penData.solid.color,
- scanline, w, h, bpl, clip);
+ scanline, w, h, bpl, clip, useGammaCorrection);
else if (depth == 32)
s->penData.alphaRGBBlit(rb, rx, ry, s->penData.solid.color,
- (const uint *) scanline, w, h, bpl / 4, clip);
+ (const uint *) scanline, w, h, bpl / 4, clip, useGammaCorrection);
return;
}
}
@@ -2775,7 +2807,8 @@ bool QRasterPaintEngine::drawCachedGlyphs(int numGlyphs, const glyph_t *glyphs,
alphaPenBlt(alphaMap->constBits(), alphaMap->bytesPerLine(), alphaMap->depth(),
qFloor(positions[i].x) + offset.x(),
qRound(positions[i].y) + offset.y(),
- alphaMap->width(), alphaMap->height());
+ alphaMap->width(), alphaMap->height(),
+ fontEngine->expectsGammaCorrectedBlending());
fontEngine->unlockAlphaMapForGlyph();
}
@@ -2836,7 +2869,7 @@ bool QRasterPaintEngine::drawCachedGlyphs(int numGlyphs, const glyph_t *glyphs,
drawImage(QPoint(x, y), QImage(glyphBits, c.w, c.h, bpl, image.format()));
s->matrix = originalTransform;
} else {
- alphaPenBlt(glyphBits, bpl, depth, x, y, c.w, c.h);
+ alphaPenBlt(glyphBits, bpl, depth, x, y, c.w, c.h, fontEngine->expectsGammaCorrectedBlending());
}
}
}
@@ -2880,7 +2913,7 @@ bool QRasterPaintEnginePrivate::isUnclipped(const QRect &rect,
const QRasterPaintEngineState *s = q->state();
const QClipData *cl = clip();
if (!cl) {
- QRect r = rect.normalized();
+ QRect r = qrect_normalized(rect);
// inline contains() for performance (we know the rects are normalized)
const QRect &r1 = deviceRect;
return (r.left() >= r1.left() && r.right() <= r1.right()
@@ -2895,7 +2928,7 @@ bool QRasterPaintEnginePrivate::isUnclipped(const QRect &rect,
if (s->flags.antialiased)
++penWidth;
- QRect r = rect.normalized();
+ QRect r = qrect_normalized(rect);
if (penWidth > 0) {
r.setX(r.x() - penWidth);
r.setY(r.y() - penWidth);
@@ -4439,9 +4472,9 @@ void QSpanData::setup(const QBrush &brush, int alpha, QPainter::CompositionMode
gradient.alphaColor = !brush.isOpaque() || alpha != 256;
auto cacheInfo = qt_gradient_cache()->getBuffer(*g, alpha);
- cachedGradient = cacheInfo;
gradient.colorTable32 = cacheInfo->buffer32;
gradient.colorTable64 = cacheInfo->buffer64;
+ cachedGradient = std::move(cacheInfo);
gradient.spread = g->spread();
@@ -4461,9 +4494,9 @@ void QSpanData::setup(const QBrush &brush, int alpha, QPainter::CompositionMode
gradient.alphaColor = !brush.isOpaque() || alpha != 256;
auto cacheInfo = qt_gradient_cache()->getBuffer(*g, alpha);
- cachedGradient = cacheInfo;
gradient.colorTable32 = cacheInfo->buffer32;
gradient.colorTable64 = cacheInfo->buffer64;
+ cachedGradient = std::move(cacheInfo);
gradient.spread = g->spread();
@@ -4487,9 +4520,9 @@ void QSpanData::setup(const QBrush &brush, int alpha, QPainter::CompositionMode
gradient.alphaColor = !brush.isOpaque() || alpha != 256;
auto cacheInfo = qt_gradient_cache()->getBuffer(*g, alpha);
- cachedGradient = cacheInfo;
gradient.colorTable32 = cacheInfo->buffer32;
gradient.colorTable64 = cacheInfo->buffer64;
+ cachedGradient = std::move(cacheInfo);
gradient.spread = QGradient::RepeatSpread;
diff --git a/src/gui/painting/qpaintengine_raster_p.h b/src/gui/painting/qpaintengine_raster_p.h
index 59213220a6..d0b82b3a93 100644
--- a/src/gui/painting/qpaintengine_raster_p.h
+++ b/src/gui/painting/qpaintengine_raster_p.h
@@ -225,7 +225,7 @@ public:
#endif
QRasterBuffer *rasterBuffer();
- void alphaPenBlt(const void* src, int bpl, int depth, int rx,int ry,int w,int h);
+ void alphaPenBlt(const void* src, int bpl, int depth, int rx,int ry,int w,int h, bool useGammaCorrection);
Type type() const Q_DECL_OVERRIDE { return Raster; }
diff --git a/src/gui/painting/qpainter.h b/src/gui/painting/qpainter.h
index 46817b9c73..64d15d5296 100644
--- a/src/gui/painting/qpainter.h
+++ b/src/gui/painting/qpainter.h
@@ -83,7 +83,6 @@ class Q_GUI_EXPORT QPainter
{
Q_DECLARE_PRIVATE(QPainter)
Q_GADGET
- Q_FLAGS(RenderHint RenderHints)
public:
enum RenderHint {
@@ -94,8 +93,10 @@ public:
NonCosmeticDefaultPen = 0x10,
Qt4CompatiblePainting = 0x20
};
+ Q_FLAG(RenderHint)
Q_DECLARE_FLAGS(RenderHints, RenderHint)
+ Q_FLAG(RenderHints)
class PixmapFragment {
public:
diff --git a/src/gui/painting/qpdf.cpp b/src/gui/painting/qpdf.cpp
index 84e18a64dd..7b8bae1642 100644
--- a/src/gui/painting/qpdf.cpp
+++ b/src/gui/painting/qpdf.cpp
@@ -1504,16 +1504,25 @@ void QPdfEnginePrivate::writeInfo()
printString(creator);
xprintf("\n/Producer ");
printString(QString::fromLatin1("Qt " QT_VERSION_STR));
- QDateTime now = QDateTime::currentDateTimeUtc();
+ QDateTime now = QDateTime::currentDateTime();
QTime t = now.time();
QDate d = now.date();
- xprintf("\n/CreationDate (D:%d%02d%02d%02d%02d%02d)\n",
+ xprintf("\n/CreationDate (D:%d%02d%02d%02d%02d%02d",
d.year(),
d.month(),
d.day(),
t.hour(),
t.minute(),
t.second());
+ int offset = now.offsetFromUtc();
+ int hours = (offset / 60) / 60;
+ int mins = (offset / 60) % 60;
+ if (offset < 0)
+ xprintf("-%02d'%02d')\n", -hours, -mins);
+ else if (offset > 0)
+ xprintf("+%02d'%02d')\n", hours , mins);
+ else
+ xprintf("Z)\n");
xprintf(">>\n"
"endobj\n");
}
diff --git a/src/gui/painting/qregion.cpp b/src/gui/painting/qregion.cpp
index 0571e1a328..3fb6f925b3 100644
--- a/src/gui/painting/qregion.cpp
+++ b/src/gui/painting/qregion.cpp
@@ -739,7 +739,7 @@ bool QRegion::intersects(const QRegion &region) const
*/
-#if !defined (Q_OS_UNIX) && !defined (Q_OS_WIN)
+#if !defined (Q_OS_UNIX) && !defined (Q_OS_WIN) || defined(Q_CLANG_QDOC)
/*!
\overload
\since 4.4
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h
index 0dadc038fa..2a17d8a624 100644
--- a/src/gui/painting/qrgba64_p.h
+++ b/src/gui/painting/qrgba64_p.h
@@ -185,6 +185,60 @@ inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b)
qMin(a.alpha() + b.alpha(), 65535));
}
+#if defined __SSE2__
+Q_ALWAYS_INLINE uint toArgb32(__m128i v)
+{
+ v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
+ v = _mm_add_epi32(v, _mm_set1_epi32(128));
+ v = _mm_sub_epi32(v, _mm_srli_epi32(v, 8));
+ v = _mm_srli_epi32(v, 8);
+ v = _mm_packs_epi32(v, v);
+ v = _mm_packus_epi16(v, v);
+ return _mm_cvtsi128_si32(v);
+}
+#elif defined __ARM_NEON__
+Q_ALWAYS_INLINE uint toArgb32(uint16x4_t v)
+{
+ v = vsub_u16(v, vrshr_n_u16(v, 8));
+ v = vrshr_n_u16(v, 8);
+ uint8x8_t v8 = vmovn_u16(vcombine_u16(v, v));
+ return vget_lane_u32(vreinterpret_u32_u8(v8), 0);
+}
+#endif
+
+inline uint toArgb32(QRgba64 rgba64)
+{
+#if defined __SSE2__
+ __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64);
+ v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3, 0, 1, 2));
+ return toArgb32(v);
+#elif defined __ARM_NEON__
+ uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
+#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+ const uint8x8_t shuffleMask = { 4, 5, 2, 3, 0, 1, 6, 7 };
+ v = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(v), shuffleMask));
+#else
+ v = vext_u16(v, v, 3);
+#endif
+ return toArgb32(v);
+#else
+ return rgba64.toArgb32();
+#endif
+}
+
+inline uint toRgba8888(QRgba64 rgba64)
+{
+#if defined __SSE2__
+ __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64);
+ return toArgb32(v);
+#elif defined __ARM_NEON__
+ uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
+ return toArgb32(v);
+#else
+ return ARGB2RGBA(toArgb32(rgba64));
+#endif
+}
+
#if defined(__SSE2__)
Q_ALWAYS_INLINE __m128i addWithSaturation(__m128i a, __m128i b)
{
@@ -199,6 +253,52 @@ Q_ALWAYS_INLINE uint16x4_t addWithSaturation(uint16x4_t a, uint16x4_t b)
}
#endif
+inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha)
+{
+ QRgba64 blend;
+#ifdef __SSE2__
+ __m128i vd = _mm_loadl_epi64((const __m128i *)&d);
+ __m128i vs = _mm_loadl_epi64((const __m128i *)&s);
+ __m128i va = _mm_cvtsi32_si128(rgbAlpha);
+ va = _mm_unpacklo_epi8(va, va);
+ __m128i vb = _mm_xor_si128(_mm_set1_epi16(-1), va);
+
+ vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va));
+ vd = _mm_unpacklo_epi16(_mm_mullo_epi16(vd, vb), _mm_mulhi_epu16(vd, vb));
+ vd = _mm_add_epi32(vd, vs);
+ vd = _mm_add_epi32(vd, _mm_srli_epi32(vd, 16));
+ vd = _mm_add_epi32(vd, _mm_set1_epi32(0x8000));
+ vd = _mm_srai_epi32(vd, 16);
+ vd = _mm_packs_epi32(vd, _mm_setzero_si128());
+
+ _mm_storel_epi64((__m128i *)&blend, vd);
+#elif defined(__ARM_NEON__)
+ uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d));
+ uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s));
+ uint8x8_t va8 = vreinterpret_u8_u32(vmov_n_u32(rgbAlpha));
+ uint16x4_t va = vreinterpret_u16_u8(vzip_u8(va8, va8).val[0]);
+ uint16x4_t vb = vdup_n_u16(0xffff);
+ vb = vsub_u16(vb, va);
+
+ uint32x4_t vs32 = vmull_u16(vs, va);
+ uint32x4_t vd32 = vmull_u16(vd, vb);
+ vd32 = vaddq_u32(vd32, vs32);
+ vd32 = vsraq_n_u32(vd32, vd32, 16);
+ vd = vrshrn_n_u32(vd32, 16);
+ vst1_u64(reinterpret_cast<uint64_t *>(&blend), vreinterpret_u64_u16(vd));
+#else
+ const int mr = qRed(rgbAlpha);
+ const int mg = qGreen(rgbAlpha);
+ const int mb = qBlue(rgbAlpha);
+ blend.setRed (qt_div_255(s.red() * mr + d.red() * (255 - mr)));
+ blend.setGreen(qt_div_255(s.green() * mg + d.green() * (255 - mg)));
+ blend.setBlue (qt_div_255(s.blue() * mb + d.blue() * (255 - mb)));
+ blend.setAlpha(s.alpha());
+#endif
+ return blend;
+}
+
+
QT_END_NAMESPACE
#endif // QRGBA64_P_H
diff --git a/src/gui/painting/qtriangulator.cpp b/src/gui/painting/qtriangulator.cpp
index 6604d407f0..6d57eba123 100644
--- a/src/gui/painting/qtriangulator.cpp
+++ b/src/gui/painting/qtriangulator.cpp
@@ -50,10 +50,6 @@
#include <QtCore/qglobal.h>
#include <QtCore/qpoint.h>
#include <QtCore/qalgorithms.h>
-#ifndef QT_NO_OPENGL
-# include <private/qopenglcontext_p.h>
-# include <private/qopenglextensions_p.h>
-#endif
#include <private/qrbtree_p.h>
QT_BEGIN_NAMESPACE
@@ -2266,23 +2262,12 @@ void QTriangulator<T>::MonotoneToTriangles::decompose()
// qTriangulate //
//============================================================================//
-static bool hasElementIndexUint()
-{
-#ifndef QT_NO_OPENGL
- QOpenGLContext *context = QOpenGLContext::currentContext();
- if (!context)
- return false;
- return static_cast<QOpenGLExtensions *>(context->functions())->hasOpenGLExtension(QOpenGLExtensions::ElementIndexUint);
-#else
- return false;
-#endif
-}
-
Q_GUI_EXPORT QTriangleSet qTriangulate(const qreal *polygon,
- int count, uint hint, const QTransform &matrix)
+ int count, uint hint, const QTransform &matrix,
+ bool allowUintIndices)
{
QTriangleSet triangleSet;
- if (hasElementIndexUint()) {
+ if (allowUintIndices) {
QTriangulator<quint32> triangulator;
triangulator.initialize(polygon, count, hint, matrix);
QVertexSet<quint32> vertexSet = triangulator.triangulate();
@@ -2300,10 +2285,13 @@ Q_GUI_EXPORT QTriangleSet qTriangulate(const qreal *polygon,
}
Q_GUI_EXPORT QTriangleSet qTriangulate(const QVectorPath &path,
- const QTransform &matrix, qreal lod)
+ const QTransform &matrix, qreal lod, bool allowUintIndices)
{
QTriangleSet triangleSet;
- if (hasElementIndexUint()) {
+ // For now systems that support 32-bit index values will always get 32-bit
+ // index values. This is not necessary ideal since 16-bit would be enough in
+ // many cases. TODO revisit this at a later point.
+ if (allowUintIndices) {
QTriangulator<quint32> triangulator;
triangulator.initialize(path, matrix, lod);
QVertexSet<quint32> vertexSet = triangulator.triangulate();
@@ -2320,10 +2308,10 @@ Q_GUI_EXPORT QTriangleSet qTriangulate(const QVectorPath &path,
}
QTriangleSet qTriangulate(const QPainterPath &path,
- const QTransform &matrix, qreal lod)
+ const QTransform &matrix, qreal lod, bool allowUintIndices)
{
QTriangleSet triangleSet;
- if (hasElementIndexUint()) {
+ if (allowUintIndices) {
QTriangulator<quint32> triangulator;
triangulator.initialize(path, matrix, lod);
QVertexSet<quint32> vertexSet = triangulator.triangulate();
@@ -2340,10 +2328,10 @@ QTriangleSet qTriangulate(const QPainterPath &path,
}
QPolylineSet qPolyline(const QVectorPath &path,
- const QTransform &matrix, qreal lod)
+ const QTransform &matrix, qreal lod, bool allowUintIndices)
{
QPolylineSet polyLineSet;
- if (hasElementIndexUint()) {
+ if (allowUintIndices) {
QTriangulator<quint32> triangulator;
triangulator.initialize(path, matrix, lod);
QVertexSet<quint32> vertexSet = triangulator.polyline();
@@ -2360,10 +2348,10 @@ QPolylineSet qPolyline(const QVectorPath &path,
}
QPolylineSet qPolyline(const QPainterPath &path,
- const QTransform &matrix, qreal lod)
+ const QTransform &matrix, qreal lod, bool allowUintIndices)
{
QPolylineSet polyLineSet;
- if (hasElementIndexUint()) {
+ if (allowUintIndices) {
QTriangulator<quint32> triangulator;
triangulator.initialize(path, matrix, lod);
QVertexSet<quint32> vertexSet = triangulator.polyline();
diff --git a/src/gui/painting/qtriangulator_p.h b/src/gui/painting/qtriangulator_p.h
index 4d1aba099c..8f043fc925 100644
--- a/src/gui/painting/qtriangulator_p.h
+++ b/src/gui/painting/qtriangulator_p.h
@@ -137,11 +137,18 @@ struct Q_GUI_EXPORT QPolylineSet
// integers, the polygon is triangulated, and then scaled back by 1/32.
// 'hint' should be a combination of QVectorPath::Hints.
// 'lod' is the level of detail. Default is 1. Curves are split into more lines when 'lod' is higher.
-QTriangleSet Q_GUI_EXPORT qTriangulate(const qreal *polygon, int count, uint hint = QVectorPath::PolygonHint | QVectorPath::OddEvenFill, const QTransform &matrix = QTransform());
-QTriangleSet Q_GUI_EXPORT qTriangulate(const QVectorPath &path, const QTransform &matrix = QTransform(), qreal lod = 1);
-QTriangleSet Q_GUI_EXPORT qTriangulate(const QPainterPath &path, const QTransform &matrix = QTransform(), qreal lod = 1);
-QPolylineSet qPolyline(const QVectorPath &path, const QTransform &matrix = QTransform(), qreal lod = 1);
-QPolylineSet Q_GUI_EXPORT qPolyline(const QPainterPath &path, const QTransform &matrix = QTransform(), qreal lod = 1);
+QTriangleSet Q_GUI_EXPORT qTriangulate(const qreal *polygon, int count,
+ uint hint = QVectorPath::PolygonHint | QVectorPath::OddEvenFill,
+ const QTransform &matrix = QTransform(),
+ bool allowUintIndices = true);
+QTriangleSet Q_GUI_EXPORT qTriangulate(const QVectorPath &path, const QTransform &matrix = QTransform(),
+ qreal lod = 1, bool allowUintIndices = true);
+QTriangleSet Q_GUI_EXPORT qTriangulate(const QPainterPath &path, const QTransform &matrix = QTransform(),
+ qreal lod = 1, bool allowUintIndices = true);
+QPolylineSet qPolyline(const QVectorPath &path, const QTransform &matrix = QTransform(),
+ qreal lod = 1, bool allowUintIndices = true);
+QPolylineSet Q_GUI_EXPORT qPolyline(const QPainterPath &path, const QTransform &matrix = QTransform(),
+ qreal lod = 1, bool allowUintIndices = true);
QT_END_NAMESPACE