diff options
Diffstat (limited to 'src/gui/painting/qdrawhelper.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 1420 |
1 files changed, 792 insertions, 628 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 2029b2d7b9..b7a943be38 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1,42 +1,6 @@ -/**************************************************************************** -** -** Copyright (C) 2021 The Qt Company Ltd. -** Copyright (C) 2018 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtGui module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2022 The Qt Company Ltd. +// Copyright (C) 2018 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #include "qdrawhelper_p.h" @@ -61,6 +25,16 @@ #include <qloggingcategory.h> #include <qmath.h> +#if QT_CONFIG(thread) && !defined(Q_OS_WASM) +#define QT_USE_THREAD_PARALLEL_FILLS +#endif + +#if defined(QT_USE_THREAD_PARALLEL_FILLS) +#include <qsemaphore.h> +#include <qthreadpool.h> +#include <private/qthreadpool_p.h> +#endif + QT_BEGIN_NAMESPACE Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper") @@ -77,8 +51,7 @@ constexpr int half_point = 1 << 15; template <QPixelLayout::BPP bpp> static inline uint QT_FASTCALL fetch1Pixel(const uchar *, int) { - Q_UNREACHABLE(); - return 0; + Q_UNREACHABLE_RETURN(0); } template <> @@ -203,7 +176,7 @@ static void QT_FASTCALL convertRGBA32FPMToRGBA64PM(QRgba64 *buffer, int count) } } -static Convert64Func convert64ToRGBA64PM[QImage::NImageFormats] = { +static Convert64Func convert64ToRGBA64PM[] = { nullptr, nullptr, nullptr, @@ -240,7 +213,10 @@ static Convert64Func convert64ToRGBA64PM[QImage::NImageFormats] = { convertRGBA32FPMToRGBA64PM, convertRGBA32FToRGBA64PM, convertRGBA32FPMToRGBA64PM, + nullptr, }; + +static_assert(std::size(convert64ToRGBA64PM) == QImage::NImageFormats); #endif #if QT_CONFIG(raster_fp) @@ -274,7 +250,7 @@ static void QT_FASTCALL convertRGBA16FToRGBA32F(QRgbaFloat32 *buffer, const quin qFloatFromFloat16((float *)buffer, (const qfloat16 *)src, count * 4); } -static Convert64ToFPFunc convert64ToRGBA32F[QImage::NImageFormats] = { +static Convert64ToFPFunc convert64ToRGBA32F[] = { nullptr, nullptr, nullptr, @@ -311,8 +287,11 @@ static Convert64ToFPFunc convert64ToRGBA32F[QImage::NImageFormats] = { nullptr, nullptr, nullptr, + nullptr, }; +static_assert(std::size(convert64ToRGBA32F) == QImage::NImageFormats); + static void convertRGBA32FToRGBA32FPM(QRgbaFloat32 *buffer, int count) { for (int i = 0; i < count; ++i) @@ -380,7 +359,7 @@ static uint *QT_FASTCALL destFetchUndefined(uint *buffer, QRasterBuffer *, int, return buffer; } -static DestFetchProc destFetchProc[QImage::NImageFormats] = +static DestFetchProc destFetchProc[] = { nullptr, // Format_Invalid destFetchMono, // Format_Mono, @@ -418,8 +397,11 @@ static DestFetchProc destFetchProc[QImage::NImageFormats] = destFetch, // Format_RGBX32FPx4 destFetch, // Format_RGBA32FPx4 destFetch, // Format_RGBA32FPx4_Premultiplied + destFetch, // Format_CMYK8888 }; +static_assert(std::size(destFetchProc) == QImage::NImageFormats); + #if QT_CONFIG(raster_64bit) static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) { @@ -437,7 +419,7 @@ static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer return buffer; } -static DestFetchProc64 destFetchProc64[QImage::NImageFormats] = +static DestFetchProc64 destFetchProc64[] = { nullptr, // Format_Invalid nullptr, // Format_Mono, @@ -475,7 +457,10 @@ static DestFetchProc64 destFetchProc64[QImage::NImageFormats] = destFetch64, // Format_RGBX32FPx4 destFetch64, // Format_RGBA32FPx4 destFetch64, // Format_RGBA32FPx4_Premultiplied + destFetch64, // Format_CMYK8888 }; + +static_assert(std::size(destFetchProc64) == QImage::NImageFormats); #endif #if QT_CONFIG(raster_fp) @@ -484,10 +469,57 @@ static QRgbaFloat32 *QT_FASTCALL destFetchFP(QRgbaFloat32 *buffer, QRasterBuffer return const_cast<QRgbaFloat32 *>(qFetchToRGBA32F[rasterBuffer->format](buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr)); } +static QRgbaFloat32 *QT_FASTCALL destFetchRGBFP(QRgbaFloat32 *, QRasterBuffer *rasterBuffer, int x, int y, int) +{ + return reinterpret_cast<QRgbaFloat32 *>(rasterBuffer->scanLine(y)) + x; +} + static QRgbaFloat32 *QT_FASTCALL destFetchFPUndefined(QRgbaFloat32 *buffer, QRasterBuffer *, int, int, int) { return buffer; } +static DestFetchProcFP destFetchProcFP[] = +{ + nullptr, // Format_Invalid + nullptr, // Format_Mono, + nullptr, // Format_MonoLSB + nullptr, // Format_Indexed8 + destFetchFP, // Format_RGB32 + destFetchFP, // Format_ARGB32, + destFetchFP, // Format_ARGB32_Premultiplied + destFetchFP, // Format_RGB16 + destFetchFP, // Format_ARGB8565_Premultiplied + destFetchFP, // Format_RGB666 + destFetchFP, // Format_ARGB6666_Premultiplied + destFetchFP, // Format_RGB555 + destFetchFP, // Format_ARGB8555_Premultiplied + destFetchFP, // Format_RGB888 + destFetchFP, // Format_RGB444 + destFetchFP, // Format_ARGB4444_Premultiplied + destFetchFP, // Format_RGBX8888 + destFetchFP, // Format_RGBA8888 + destFetchFP, // Format_RGBA8888_Premultiplied + destFetchFP, // Format_BGR30 + destFetchFP, // Format_A2BGR30_Premultiplied + destFetchFP, // Format_RGB30 + destFetchFP, // Format_A2RGB30_Premultiplied + destFetchFP, // Format_Alpha8 + destFetchFP, // Format_Grayscale8 + destFetchFP, // Format_RGBX64 + destFetchFP, // Format_RGBA64 + destFetchFP, // Format_RGBA64_Premultiplied + destFetchFP, // Format_Grayscale16 + destFetchFP, // Format_BGR888 + destFetchFP, // Format_RGBX16FPx4 + destFetchFP, // Format_RGBA16FPx4 + destFetchFP, // Format_RGBA16FPx4_Premultiplied + destFetchRGBFP, // Format_RGBX32FPx4 + destFetchFP, // Format_RGBA32FPx4 + destFetchRGBFP, // Format_RGBA32FPx4_Premultiplied + destFetchFP, // Format_CMYK8888 +}; + +static_assert(std::size(destFetchProcFP) == QImage::NImageFormats); #endif /* @@ -496,9 +528,8 @@ static QRgbaFloat32 *QT_FASTCALL destFetchFPUndefined(QRgbaFloat32 *buffer, QRas */ static inline QRgb findNearestColor(QRgb color, QRasterBuffer *rbuf) { - QRgb color_0 = qPremultiply(rbuf->destColor0); - QRgb color_1 = qPremultiply(rbuf->destColor1); - color = qPremultiply(color); + const QRgb color_0 = rbuf->destColor0; + const QRgb color_1 = rbuf->destColor1; int r = qRed(color); int g = qGreen(color); @@ -613,7 +644,7 @@ static void QT_FASTCALL destStoreGray8(QRasterBuffer *rasterBuffer, int x, int y QColorTransform tf = QColorSpacePrivate::get(fromCS)->transformationToXYZ(); QColorTransformPrivate *tfd = QColorTransformPrivate::get(tf); - tfd->apply(data, buffer, length, QColorTransformPrivate::InputPremultiplied); + tfd->applyReturnGray(data, buffer, length, QColorTransformPrivate::InputPremultiplied); } } @@ -637,11 +668,11 @@ static void QT_FASTCALL destStoreGray16(QRasterBuffer *rasterBuffer, int x, int QRgba64 tmp_line[BufferSize]; for (int k = 0; k < length; ++k) tmp_line[k] = QRgba64::fromArgb32(buffer[k]); - tfd->apply(data, tmp_line, length, QColorTransformPrivate::InputPremultiplied); + tfd->applyReturnGray(data, tmp_line, length, QColorTransformPrivate::InputPremultiplied); } } -static DestStoreProc destStoreProc[QImage::NImageFormats] = +static DestStoreProc destStoreProc[] = { nullptr, // Format_Invalid destStoreMono, // Format_Mono, @@ -679,8 +710,11 @@ static DestStoreProc destStoreProc[QImage::NImageFormats] = destStore, // Format_RGBX32FPx4 destStore, // Format_RGBA32FPx4 destStore, // Format_RGBA32FPx4_Premultiplied + destStore, // Format_CMYK8888 }; +static_assert(std::size(destStoreProc) == QImage::NImageFormats); + #if QT_CONFIG(raster_64bit) static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) { @@ -715,7 +749,7 @@ static void QT_FASTCALL destStore64Gray8(QRasterBuffer *rasterBuffer, int x, int QColorTransformPrivate *tfd = QColorTransformPrivate::get(tf); quint16 gray_line[BufferSize]; - tfd->apply(gray_line, buffer, length, QColorTransformPrivate::InputPremultiplied); + tfd->applyReturnGray(gray_line, buffer, length, QColorTransformPrivate::InputPremultiplied); for (int k = 0; k < length; ++k) data[k] = qt_div_257(gray_line[k]); } @@ -737,11 +771,11 @@ static void QT_FASTCALL destStore64Gray16(QRasterBuffer *rasterBuffer, int x, in QColorSpace fromCS = rasterBuffer->colorSpace.isValid() ? rasterBuffer->colorSpace : QColorSpace::SRgb; QColorTransform tf = QColorSpacePrivate::get(fromCS)->transformationToXYZ(); QColorTransformPrivate *tfd = QColorTransformPrivate::get(tf); - tfd->apply(data, buffer, length, QColorTransformPrivate::InputPremultiplied); + tfd->applyReturnGray(data, buffer, length, QColorTransformPrivate::InputPremultiplied); } } -static DestStoreProc64 destStoreProc64[QImage::NImageFormats] = +static DestStoreProc64 destStoreProc64[] = { nullptr, // Format_Invalid nullptr, // Format_Mono, @@ -779,7 +813,10 @@ static DestStoreProc64 destStoreProc64[QImage::NImageFormats] = destStore64, // Format_RGBX32FPx4 destStore64, // Format_RGBA32FPx4 destStore64, // Format_RGBA32FPx4_Premultiplied + destStore64, // Format_CMYK8888 }; + +static_assert(std::size(destStoreProc64) == QImage::NImageFormats); #endif #if QT_CONFIG(raster_fp) @@ -3054,7 +3091,7 @@ static const QRgbaFloat32 *QT_FASTCALL fetchTransformedBilinearFP(QRgbaFloat32 * #endif // QT_CONFIG(raster_fp) // FetchUntransformed can have more specialized methods added depending on SIMD features. -static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = { +static SourceFetchProc sourceFetchUntransformed[] = { nullptr, // Invalid fetchUntransformed, // Mono fetchUntransformed, // MonoLsb @@ -3091,9 +3128,12 @@ static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = { fetchUntransformed, // RGBX32Px4 fetchUntransformed, // RGBA32FPx4 fetchUntransformed, // RGBA32FPx4_Premultiplied + fetchUntransformed, // CMYK8888 }; -static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = { +static_assert(std::size(sourceFetchUntransformed) == QImage::NImageFormats); + +static const SourceFetchProc sourceFetchGeneric[] = { fetchUntransformed, // Untransformed fetchUntransformed, // Tiled fetchTransformed<BlendTransformed, QPixelLayout::BPPNone>, // Transformed @@ -3102,7 +3142,9 @@ static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = { fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPPNone> // TransformedBilinearTiled }; -static SourceFetchProc sourceFetchARGB32PM[NBlendTypes] = { +static_assert(std::size(sourceFetchGeneric) == NBlendTypes); + +static SourceFetchProc sourceFetchARGB32PM[] = { fetchUntransformedARGB32PM, // Untransformed fetchUntransformedARGB32PM, // Tiled fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed @@ -3111,7 +3153,9 @@ static SourceFetchProc sourceFetchARGB32PM[NBlendTypes] = { fetchTransformedBilinearARGB32PM<BlendTransformedBilinearTiled> // BilinearTiled }; -static SourceFetchProc sourceFetchAny16[NBlendTypes] = { +static_assert(std::size(sourceFetchARGB32PM) == NBlendTypes); + +static SourceFetchProc sourceFetchAny16[] = { fetchUntransformed, // Untransformed fetchUntransformed, // Tiled fetchTransformed<BlendTransformed, QPixelLayout::BPP16>, // Transformed @@ -3120,7 +3164,9 @@ static SourceFetchProc sourceFetchAny16[NBlendTypes] = { fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP16> // TransformedBilinearTiled }; -static SourceFetchProc sourceFetchAny32[NBlendTypes] = { +static_assert(std::size(sourceFetchAny16) == NBlendTypes); + +static SourceFetchProc sourceFetchAny32[] = { fetchUntransformed, // Untransformed fetchUntransformed, // Tiled fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed @@ -3129,6 +3175,8 @@ static SourceFetchProc sourceFetchAny32[NBlendTypes] = { fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP32> // TransformedBilinearTiled }; +static_assert(std::size(sourceFetchAny32) == NBlendTypes); + static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage::Format format) { if (format == QImage::Format_RGB32 || format == QImage::Format_ARGB32_Premultiplied) @@ -3143,7 +3191,7 @@ static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage: } #if QT_CONFIG(raster_64bit) -static const SourceFetchProc64 sourceFetchGeneric64[NBlendTypes] = { +static const SourceFetchProc64 sourceFetchGeneric64[] = { fetchUntransformed64, // Untransformed fetchUntransformed64, // Tiled fetchTransformed64<BlendTransformed>, // Transformed @@ -3152,7 +3200,9 @@ static const SourceFetchProc64 sourceFetchGeneric64[NBlendTypes] = { fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled }; -static const SourceFetchProc64 sourceFetchRGBA64PM[NBlendTypes] = { +static_assert(std::size(sourceFetchGeneric64) == NBlendTypes); + +static const SourceFetchProc64 sourceFetchRGBA64PM[] = { fetchUntransformedRGBA64PM, // Untransformed fetchUntransformedRGBA64PM, // Tiled fetchTransformed64<BlendTransformed>, // Transformed @@ -3161,6 +3211,8 @@ static const SourceFetchProc64 sourceFetchRGBA64PM[NBlendTypes] = { fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled }; +static_assert(std::size(sourceFetchRGBA64PM) == NBlendTypes); + static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QImage::Format format) { if (format == QImage::Format_RGBX64 || format == QImage::Format_RGBA64_Premultiplied) @@ -3170,7 +3222,7 @@ static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QIm #endif #if QT_CONFIG(raster_fp) -static const SourceFetchProcFP sourceFetchGenericFP[NBlendTypes] = { +static const SourceFetchProcFP sourceFetchGenericFP[] = { fetchUntransformedFP, // Untransformed fetchUntransformedFP, // Tiled fetchTransformedFP<BlendTransformed>, // Transformed @@ -3179,6 +3231,8 @@ static const SourceFetchProcFP sourceFetchGenericFP[NBlendTypes] = { fetchTransformedBilinearFP<BlendTransformedBilinearTiled> // BilinearTiled }; +static_assert(std::size(sourceFetchGenericFP) == NBlendTypes); + static inline SourceFetchProcFP getSourceFetchFP(TextureBlendType blendType, QImage::Format /*format*/) { return sourceFetchGenericFP[blendType]; @@ -3239,6 +3293,7 @@ public: static Type null() { return 0; } static Type fetchSingle(const QGradientData& gradient, qreal v) { + Q_ASSERT(std::isfinite(v)); return qt_gradient_pixel(&gradient, v); } static Type fetchSingle(const QGradientData& gradient, int v) @@ -3259,6 +3314,7 @@ public: static Type null() { return QRgba64::fromRgba64(0); } static Type fetchSingle(const QGradientData& gradient, qreal v) { + Q_ASSERT(std::isfinite(v)); return qt_gradient_pixel64(&gradient, v); } static Type fetchSingle(const QGradientData& gradient, int v) @@ -3280,6 +3336,7 @@ public: static Type null() { return QRgbaFloat32::fromRgba64(0,0,0,0); } static Type fetchSingle(const QGradientData& gradient, qreal v) { + Q_ASSERT(std::isfinite(v)); return qt_gradient_pixelFP(&gradient, v); } static Type fetchSingle(const QGradientData& gradient, int v) @@ -3397,7 +3454,6 @@ static void QT_FASTCALL getRadialGradientValues(RadialGradientValues *v, const Q v->sqrfr = data->gradient.radial.focal.radius * data->gradient.radial.focal.radius; v->a = v->dr * v->dr - v->dx*v->dx - v->dy*v->dy; - v->inv2a = 1 / (2 * v->a); v->extended = !qFuzzyIsNull(data->gradient.radial.focal.radius) || v->a <= 0; } @@ -3430,7 +3486,13 @@ public: } } else { while (buffer < end) { - *buffer++ = GradientBase::fetchSingle(data->gradient, qSqrt(det) - b); + BlendType result = GradientBase::null(); + if (det >= 0) { + qreal w = qSqrt(det) - b; + result = GradientBase::fetchSingle(data->gradient, w); + } + + *buffer++ = result; det += delta_det; delta_det += delta_delta_det; @@ -3564,34 +3626,33 @@ static const CompositionFunctionFP *functionForModeFP = qt_functionForModeFP_C; static TextureBlendType getBlendType(const QSpanData *data) { TextureBlendType ft; - if (data->txop <= QTransform::TxTranslate) - if (data->texture.type == QTextureData::Tiled || data->texture.type == QTextureData::Pattern) + if (data->texture.type == QTextureData::Pattern) + ft = BlendTiled; + else if (data->txop <= QTransform::TxTranslate) + if (data->texture.type == QTextureData::Tiled) ft = BlendTiled; else ft = BlendUntransformed; else if (data->bilinear) - if (data->texture.type == QTextureData::Tiled || data->texture.type == QTextureData::Pattern) + if (data->texture.type == QTextureData::Tiled) ft = BlendTransformedBilinearTiled; else ft = BlendTransformedBilinear; else - if (data->texture.type == QTextureData::Pattern) - ft = BlendTiled; - else if (data->texture.type == QTextureData::Tiled) + if (data->texture.type == QTextureData::Tiled) ft = BlendTransformedTiled; else ft = BlendTransformed; return ft; } -static inline Operator getOperator(const QSpanData *data, const QSpan *spans, int spanCount) +static inline Operator getOperator(const QSpanData *data, const QT_FT_Span *spans, int spanCount) { Operator op; bool solidSource = false; - switch(data->type) { case QSpanData::Solid: - solidSource = data->solidColor.isOpaque(); + solidSource = data->solidColor.alphaF() >= 1.0f; op.srcFetch = nullptr; op.srcFetch64 = nullptr; op.srcFetchFP = nullptr; @@ -3632,7 +3693,7 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in solidSource = !data->texture.hasAlpha; op.srcFetch = getSourceFetch(getBlendType(data), data->texture.format); #if QT_CONFIG(raster_64bit) - op.srcFetch64 = getSourceFetch64(getBlendType(data), data->texture.format);; + op.srcFetch64 = getSourceFetch64(getBlendType(data), data->texture.format); #endif #if QT_CONFIG(raster_fp) op.srcFetchFP = getSourceFetchFP(getBlendType(data), data->texture.format); @@ -3660,14 +3721,13 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in op.destFetch64 = nullptr; #endif #if QT_CONFIG(raster_fp) - if (data->rasterBuffer->format > QImage::Format_Indexed8) - op.destFetchFP = destFetchFP; - else + op.destFetchFP = destFetchProcFP[data->rasterBuffer->format]; +#else + op.destFetchFP = nullptr; #endif - op.destFetchFP = nullptr; if (op.mode == QPainter::CompositionMode_Source && (data->type != QSpanData::Texture || data->texture.const_alpha == 256)) { - const QSpan *lastSpan = spans + spanCount; + const QT_FT_Span *lastSpan = spans + spanCount; bool alphaSpans = false; while (spans < lastSpan) { if (spans->coverage != 255) { @@ -3686,7 +3746,8 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in op.destFetch64 = destFetch64Undefined; #endif #if QT_CONFIG(raster_fp) - op.destFetchFP = destFetchFPUndefined; + if (op.destFetchFP != destFetchRGBFP) + op.destFetchFP = destFetchFPUndefined; #endif } } @@ -3758,44 +3819,69 @@ static void spanfill_from_first(QRasterBuffer *rasterBuffer, QPixelLayout::BPP b // -------------------- blend methods --------------------- -static void blend_color_generic(int count, const QSpan *spans, void *userData) +#if defined(QT_USE_THREAD_PARALLEL_FILLS) +#define QT_THREAD_PARALLEL_FILLS(function) \ + const int segments = (count + 32) / 64; \ + QThreadPool *threadPool = QThreadPoolPrivate::qtGuiInstance(); \ + if (segments > 1 && qPixelLayouts[data->rasterBuffer->format].bpp >= QPixelLayout::BPP8 \ + && threadPool && !threadPool->contains(QThread::currentThread())) { \ + QSemaphore semaphore; \ + int c = 0; \ + for (int i = 0; i < segments; ++i) { \ + int cn = (count - c) / (segments - i); \ + threadPool->start([&, c, cn]() { \ + function(c, c + cn); \ + semaphore.release(1); \ + }, 1); \ + c += cn; \ + } \ + semaphore.acquire(segments); \ + } else \ + function(0, count) +#else +#define QT_THREAD_PARALLEL_FILLS(function) function(0, count) +#endif + +static void blend_color_generic(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - uint buffer[BufferSize]; - Operator op = getOperator(data, nullptr, 0); - const uint color = data->solidColor.toArgb32(); + const Operator op = getOperator(data, nullptr, 0); + const uint color = data->solidColor.rgba(); const bool solidFill = op.mode == QPainter::CompositionMode_Source; const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp; - while (count--) { - int x = spans->x; - int length = spans->len; - if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length) { - // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels - op.destStore(data->rasterBuffer, x, spans->y, &color, 1); - spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length); - length = 0; - } + auto function = [=] (int cStart, int cEnd) { + alignas(16) uint buffer[BufferSize]; + for (int c = cStart; c < cEnd; ++c) { + int x = spans[c].x; + int length = spans[c].len; + if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStore) { + // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels + op.destStore(data->rasterBuffer, x, spans[c].y, &color, 1); + spanfill_from_first(data->rasterBuffer, bpp, x, spans[c].y, length); + length = 0; + } - while (length) { - int l = qMin(BufferSize, length); - uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l); - op.funcSolid(dest, l, color, spans->coverage); - if (op.destStore) - op.destStore(data->rasterBuffer, x, spans->y, dest, l); - length -= l; - x += l; + while (length) { + int l = qMin(BufferSize, length); + uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l); + op.funcSolid(dest, l, color, spans[c].coverage); + if (op.destStore) + op.destStore(data->rasterBuffer, x, spans[c].y, dest, l); + length -= l; + x += l; + } } - ++spans; - } + }; + QT_THREAD_PARALLEL_FILLS(function); } -static void blend_color_argb(int count, const QSpan *spans, void *userData) +static void blend_color_argb(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); const Operator op = getOperator(data, nullptr, 0); - const uint color = data->solidColor.toArgb32(); + const uint color = data->solidColor.rgba(); if (op.mode == QPainter::CompositionMode_Source) { // inline for performance @@ -3817,155 +3903,167 @@ static void blend_color_argb(int count, const QSpan *spans, void *userData) } return; } - - while (count--) { - uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x; - op.funcSolid(target, spans->len, color, spans->coverage); - ++spans; - } + const auto funcSolid = op.funcSolid; + auto function = [=] (int cStart, int cEnd) { + for (int c = cStart; c < cEnd; ++c) { + uint *target = ((uint *)data->rasterBuffer->scanLine(spans[c].y)) + spans[c].x; + funcSolid(target, spans[c].len, color, spans[c].coverage); + } + }; + QT_THREAD_PARALLEL_FILLS(function); } -static void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData) +static void blend_color_generic_rgb64(int count, const QT_FT_Span *spans, void *userData) { #if QT_CONFIG(raster_64bit) QSpanData *data = reinterpret_cast<QSpanData *>(userData); - Operator op = getOperator(data, nullptr, 0); + const Operator op = getOperator(data, nullptr, 0); if (!op.funcSolid64) { qCDebug(lcQtGuiDrawHelper, "blend_color_generic_rgb64: unsupported 64bit blend attempted, falling back to 32-bit"); return blend_color_generic(count, spans, userData); } - alignas(8) QRgba64 buffer[BufferSize]; - const QRgba64 color = data->solidColor; + const QRgba64 color = data->solidColor.rgba64(); const bool solidFill = op.mode == QPainter::CompositionMode_Source; const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp; - while (count--) { - int x = spans->x; - int length = spans->len; - if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length && op.destStore64) { - // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels - op.destStore64(data->rasterBuffer, x, spans->y, &color, 1); - spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length); - length = 0; - } + auto function = [=, &op] (int cStart, int cEnd) + { + alignas(16) QRgba64 buffer[BufferSize]; + for (int c = cStart; c < cEnd; ++c) { + int x = spans[c].x; + int length = spans[c].len; + if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStore64) { + // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels + op.destStore64(data->rasterBuffer, x, spans[c].y, &color, 1); + spanfill_from_first(data->rasterBuffer, bpp, x, spans[c].y, length); + length = 0; + } - while (length) { - int l = qMin(BufferSize, length); - QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l); - op.funcSolid64(dest, l, color, spans->coverage); - if (op.destStore64) - op.destStore64(data->rasterBuffer, x, spans->y, dest, l); - length -= l; - x += l; + while (length) { + int l = qMin(BufferSize, length); + QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l); + op.funcSolid64(dest, l, color, spans[c].coverage); + if (op.destStore64) + op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l); + length -= l; + x += l; + } } - ++spans; - } + }; + QT_THREAD_PARALLEL_FILLS(function); #else blend_color_generic(count, spans, userData); #endif } -static void blend_color_generic_fp(int count, const QSpan *spans, void *userData) +static void blend_color_generic_fp(int count, const QT_FT_Span *spans, void *userData) { #if QT_CONFIG(raster_fp) QSpanData *data = reinterpret_cast<QSpanData *>(userData); - Operator op = getOperator(data, nullptr, 0); + const Operator op = getOperator(data, nullptr, 0); if (!op.funcSolidFP || !op.destFetchFP) { qCDebug(lcQtGuiDrawHelper, "blend_color_generic_fp: unsupported 4xF16 blend attempted, falling back to 32-bit"); return blend_color_generic(count, spans, userData); } - QRgbaFloat32 buffer[BufferSize]; - const QRgbaFloat32 color = qConvertRgb64ToRgbaF32(data->solidColor); + float r, g, b, a; + data->solidColor.getRgbF(&r, &g, &b, &a); + const QRgbaFloat32 color{r, g, b, a}; const bool solidFill = op.mode == QPainter::CompositionMode_Source; QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp; - while (count--) { - int x = spans->x; - int length = spans->len; - if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length && op.destStoreFP) { - // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels - op.destStoreFP(data->rasterBuffer, x, spans->y, &color, 1); - spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length); - length = 0; - } + auto function = [=, &op] (int cStart, int cEnd) + { + alignas(16) QRgbaFloat32 buffer[BufferSize]; + for (int c = cStart; c < cEnd; ++c) { + int x = spans[c].x; + int length = spans[c].len; + if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStoreFP) { + // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels + op.destStoreFP(data->rasterBuffer, x, spans[c].y, &color, 1); + spanfill_from_first(data->rasterBuffer, bpp, x, spans[c].y, length); + length = 0; + } - while (length) { - int l = qMin(BufferSize, length); - QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans->y, l); - op.funcSolidFP(dest, l, color, spans->coverage); - if (op.destStoreFP) - op.destStoreFP(data->rasterBuffer, x, spans->y, dest, l); - length -= l; - x += l; + while (length) { + int l = qMin(BufferSize, length); + QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l); + op.funcSolidFP(dest, l, color, spans[c].coverage); + if (op.destStoreFP) + op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l); + length -= l; + x += l; + } } - ++spans; - } + }; + QT_THREAD_PARALLEL_FILLS(function); #else blend_color_generic(count, spans, userData); #endif } template <typename T> -void handleSpans(int count, const QSpan *spans, const QSpanData *data, T &handler) +void handleSpans(int count, const QT_FT_Span *spans, const QSpanData *data, const Operator &op) { - uint const_alpha = 256; - if (data->type == QSpanData::Texture) - const_alpha = data->texture.const_alpha; + const int const_alpha = (data->type == QSpanData::Texture) ? data->texture.const_alpha : 256; + const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256; - int coverage = 0; - while (count) { - if (!spans->len) { - ++spans; - --count; - continue; - } - int x = spans->x; - const int y = spans->y; - int right = x + spans->len; + auto function = [=, &op] (int cStart, int cEnd) + { + T handler(data, op); + int coverage = 0; + for (int c = cStart; c < cEnd;) { + if (!spans[c].len) { + ++c; + continue; + } + int x = spans[c].x; + const int y = spans[c].y; + int right = x + spans[c].len; + const bool fetchDest = !solidSource || spans[c].coverage < 255; - // compute length of adjacent spans - for (int i = 1; i < count && spans[i].y == y && spans[i].x == right; ++i) - right += spans[i].len; - int length = right - x; + // compute length of adjacent spans + for (int i = c + 1; i < cEnd && spans[i].y == y && spans[i].x == right && fetchDest == (!solidSource || spans[i].coverage < 255); ++i) + right += spans[i].len; + int length = right - x; - while (length) { - int l = qMin(BufferSize, length); - length -= l; + while (length) { + int l = qMin(BufferSize, length); + length -= l; - int process_length = l; - int process_x = x; + int process_length = l; + int process_x = x; - const auto *src = handler.fetch(process_x, y, process_length); - int offset = 0; - while (l > 0) { - if (x == spans->x) // new span? - coverage = (spans->coverage * const_alpha) >> 8; + const auto *src = handler.fetch(process_x, y, process_length, fetchDest); + int offset = 0; + while (l > 0) { + if (x == spans[c].x) // new span? + coverage = (spans[c].coverage * const_alpha) >> 8; - int right = spans->x + spans->len; - int len = qMin(l, right - x); + int right = spans[c].x + spans[c].len; + int len = qMin(l, right - x); - handler.process(x, y, len, coverage, src, offset); + handler.process(x, y, len, coverage, src, offset); - l -= len; - x += len; - offset += len; + l -= len; + x += len; + offset += len; - if (x == right) { // done with current span? - ++spans; - --count; + if (x == right) // done with current span? + ++c; } + handler.store(process_x, y, process_length); } - handler.store(process_x, y, process_length); } - } + }; + QT_THREAD_PARALLEL_FILLS(function); } struct QBlendBase { - QSpanData *data; - Operator op; + const QSpanData *data; + const Operator &op; }; class BlendSrcGeneric : public QBlendBase @@ -3974,14 +4072,17 @@ public: uint *dest = nullptr; alignas(16) uint buffer[BufferSize]; alignas(16) uint src_buffer[BufferSize]; - BlendSrcGeneric(QSpanData *d, const Operator &o) + BlendSrcGeneric(const QSpanData *d, const Operator &o) : QBlendBase{d, o} { } - const uint *fetch(int x, int y, int len) + const uint *fetch(int x, int y, int len, bool fetchDest) { - dest = op.destFetch(buffer, data->rasterBuffer, x, y, len); + if (fetchDest || op.destFetch == destFetchARGB32P) + dest = op.destFetch(buffer, data->rasterBuffer, x, y, len); + else + dest = buffer; return op.srcFetch(src_buffer, &op, data, y, x, len); } @@ -4004,7 +4105,7 @@ public: QRgba64 *dest = nullptr; alignas(16) QRgba64 buffer[BufferSize]; alignas(16) QRgba64 src_buffer[BufferSize]; - BlendSrcGenericRGB64(QSpanData *d, const Operator &o) + BlendSrcGenericRGB64(const QSpanData *d, const Operator &o) : QBlendBase{d, o} { } @@ -4014,9 +4115,12 @@ public: return op.func64 && op.destFetch64; } - const QRgba64 *fetch(int x, int y, int len) + const QRgba64 *fetch(int x, int y, int len, bool fetchDest) { - dest = op.destFetch64(buffer, data->rasterBuffer, x, y, len); + if (fetchDest || op.destFetch64 == destFetchRGB64) + dest = op.destFetch64(buffer, data->rasterBuffer, x, y, len); + else + dest = buffer; return op.srcFetch64(src_buffer, &op, data, y, x, len); } @@ -4040,7 +4144,7 @@ public: QRgbaFloat32 *dest = nullptr; alignas(16) QRgbaFloat32 buffer[BufferSize]; alignas(16) QRgbaFloat32 src_buffer[BufferSize]; - BlendSrcGenericRGBFP(QSpanData *d, const Operator &o) + BlendSrcGenericRGBFP(const QSpanData *d, const Operator &o) : QBlendBase{d, o} { } @@ -4050,9 +4154,12 @@ public: return op.funcFP && op.destFetchFP && op.srcFetchFP; } - const QRgbaFloat32 *fetch(int x, int y, int len) + const QRgbaFloat32 *fetch(int x, int y, int len, bool fetchDest) { - dest = op.destFetchFP(buffer, data->rasterBuffer, x, y, len); + if (fetchDest || op.destFetchFP == destFetchRGBFP) + dest = op.destFetchFP(buffer, data->rasterBuffer, x, y, len); + else + dest = buffer; return op.srcFetchFP(src_buffer, &op, data, y, x, len); } @@ -4069,196 +4176,212 @@ public: }; #endif -static void blend_src_generic(int count, const QSpan *spans, void *userData) +static void blend_src_generic(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - BlendSrcGeneric blend(data, getOperator(data, spans, count)); - handleSpans(count, spans, data, blend); + const Operator op = getOperator(data, nullptr, 0); + handleSpans<BlendSrcGeneric>(count, spans, data, op); } #if QT_CONFIG(raster_64bit) -static void blend_src_generic_rgb64(int count, const QSpan *spans, void *userData) +static void blend_src_generic_rgb64(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - Operator op = getOperator(data, spans, count); - BlendSrcGenericRGB64 blend64(data, op); - if (blend64.isSupported()) - handleSpans(count, spans, data, blend64); - else { + const Operator op = getOperator(data, nullptr, 0); + if (op.func64 && op.destFetch64) { + handleSpans<BlendSrcGenericRGB64>(count, spans, data, op); + } else { qCDebug(lcQtGuiDrawHelper, "blend_src_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit"); - BlendSrcGeneric blend32(data, op); - handleSpans(count, spans, data, blend32); + handleSpans<BlendSrcGeneric>(count, spans, data, op); } } #endif #if QT_CONFIG(raster_fp) -static void blend_src_generic_fp(int count, const QSpan *spans, void *userData) +static void blend_src_generic_fp(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - Operator op = getOperator(data, spans, count); - BlendSrcGenericRGBFP blendFP(data, op); - if (blendFP.isSupported()) - handleSpans(count, spans, data, blendFP); - else { + const Operator op = getOperator(data, spans, count); + if (op.funcFP && op.destFetchFP && op.srcFetchFP) { + handleSpans<BlendSrcGenericRGBFP>(count, spans, data, op); + } else { qCDebug(lcQtGuiDrawHelper, "blend_src_generic_fp: unsupported 4xFP blend attempted, falling back to 32-bit"); - BlendSrcGeneric blend32(data, op); - handleSpans(count, spans, data, blend32); + handleSpans<BlendSrcGeneric>(count, spans, data, op); } } #endif -static void blend_untransformed_generic(int count, const QSpan *spans, void *userData) +static void blend_untransformed_generic(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - uint buffer[BufferSize]; - uint src_buffer[BufferSize]; - Operator op = getOperator(data, spans, count); + const Operator op = getOperator(data, spans, count); const int image_width = data->texture.width; const int image_height = data->texture.height; - int xoff = -qRound(-data->dx); - int yoff = -qRound(-data->dy); + const int const_alpha = data->texture.const_alpha; + const int xoff = -qRound(-data->dx); + const int yoff = -qRound(-data->dy); + const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256 && op.destFetch != destFetchARGB32P; - for (; count--; spans++) { - if (!spans->len) - continue; - int x = spans->x; - int length = spans->len; - int sx = xoff + x; - int sy = yoff + spans->y; - if (sy >= 0 && sy < image_height && sx < image_width) { - if (sx < 0) { - x -= sx; - length += sx; - sx = 0; - } - if (sx + length > image_width) - length = image_width - sx; - if (length > 0) { - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - while (length) { - int l = qMin(BufferSize, length); - const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l); - uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l); - op.func(dest, src, l, coverage); - if (op.destStore) - op.destStore(data->rasterBuffer, x, spans->y, dest, l); - x += l; - sx += l; - length -= l; + auto function = [=, &op] (int cStart, int cEnd) + { + alignas(16) uint buffer[BufferSize]; + alignas(16) uint src_buffer[BufferSize]; + for (int c = cStart; c < cEnd; ++c) { + if (!spans[c].len) + continue; + int x = spans[c].x; + int length = spans[c].len; + int sx = xoff + x; + int sy = yoff + spans[c].y; + const bool fetchDest = !solidSource || spans[c].coverage < 255; + if (sy >= 0 && sy < image_height && sx < image_width) { + if (sx < 0) { + x -= sx; + length += sx; + sx = 0; + } + if (sx + length > image_width) + length = image_width - sx; + if (length > 0) { + const int coverage = (spans[c].coverage * const_alpha) >> 8; + while (length) { + int l = qMin(BufferSize, length); + const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l); + uint *dest = fetchDest ? op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer; + op.func(dest, src, l, coverage); + if (op.destStore) + op.destStore(data->rasterBuffer, x, spans[c].y, dest, l); + x += l; + sx += l; + length -= l; + } } } } - } + }; + QT_THREAD_PARALLEL_FILLS(function); } #if QT_CONFIG(raster_64bit) -static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, void *userData) +static void blend_untransformed_generic_rgb64(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - Operator op = getOperator(data, spans, count); + const Operator op = getOperator(data, spans, count); if (!op.func64) { qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit"); return blend_untransformed_generic(count, spans, userData); } - alignas(8) QRgba64 buffer[BufferSize]; - alignas(8) QRgba64 src_buffer[BufferSize]; const int image_width = data->texture.width; const int image_height = data->texture.height; - int xoff = -qRound(-data->dx); - int yoff = -qRound(-data->dy); + const int const_alpha = data->texture.const_alpha; + const int xoff = -qRound(-data->dx); + const int yoff = -qRound(-data->dy); + const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256 && op.destFetch64 != destFetchRGB64; - for (; count--; spans++) { - if (!spans->len) - continue; - int x = spans->x; - int length = spans->len; - int sx = xoff + x; - int sy = yoff + spans->y; - if (sy >= 0 && sy < image_height && sx < image_width) { - if (sx < 0) { - x -= sx; - length += sx; - sx = 0; - } - if (sx + length > image_width) - length = image_width - sx; - if (length > 0) { - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - while (length) { - int l = qMin(BufferSize, length); - const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l); - QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l); - op.func64(dest, src, l, coverage); - if (op.destStore64) - op.destStore64(data->rasterBuffer, x, spans->y, dest, l); - x += l; - sx += l; - length -= l; + auto function = [=, &op] (int cStart, int cEnd) + { + alignas(16) QRgba64 buffer[BufferSize]; + alignas(16) QRgba64 src_buffer[BufferSize]; + for (int c = cStart; c < cEnd; ++c) { + if (!spans[c].len) + continue; + int x = spans[c].x; + int length = spans[c].len; + int sx = xoff + x; + int sy = yoff + spans[c].y; + const bool fetchDest = !solidSource || spans[c].coverage < 255; + if (sy >= 0 && sy < image_height && sx < image_width) { + if (sx < 0) { + x -= sx; + length += sx; + sx = 0; + } + if (sx + length > image_width) + length = image_width - sx; + if (length > 0) { + const int coverage = (spans[c].coverage * const_alpha) >> 8; + while (length) { + int l = qMin(BufferSize, length); + const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l); + QRgba64 *dest = fetchDest ? op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer; + op.func64(dest, src, l, coverage); + if (op.destStore64) + op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l); + x += l; + sx += l; + length -= l; + } } } } - } + }; + QT_THREAD_PARALLEL_FILLS(function); } #endif #if QT_CONFIG(raster_fp) -static void blend_untransformed_generic_fp(int count, const QSpan *spans, void *userData) +static void blend_untransformed_generic_fp(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - Operator op = getOperator(data, spans, count); + const Operator op = getOperator(data, spans, count); if (!op.funcFP) { qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgbaf16: unsupported 4xFP16 blend attempted, falling back to 32-bit"); return blend_untransformed_generic(count, spans, userData); } - QRgbaFloat32 buffer[BufferSize]; - QRgbaFloat32 src_buffer[BufferSize]; const int image_width = data->texture.width; const int image_height = data->texture.height; - int xoff = -qRound(-data->dx); - int yoff = -qRound(-data->dy); + const int xoff = -qRound(-data->dx); + const int yoff = -qRound(-data->dy); + const bool solidSource = op.mode == QPainter::CompositionMode_Source && data->texture.const_alpha == 256 && op.destFetchFP != destFetchRGBFP; - for (; count--; spans++) { - if (!spans->len) - continue; - int x = spans->x; - int length = spans->len; - int sx = xoff + x; - int sy = yoff + spans->y; - if (sy >= 0 && sy < image_height && sx < image_width) { - if (sx < 0) { - x -= sx; - length += sx; - sx = 0; - } - if (sx + length > image_width) - length = image_width - sx; - if (length > 0) { - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - while (length) { - int l = qMin(BufferSize, length); - const QRgbaFloat32 *src = op.srcFetchFP(src_buffer, &op, data, sy, sx, l); - QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans->y, l); - op.funcFP(dest, src, l, coverage); - if (op.destStoreFP) - op.destStoreFP(data->rasterBuffer, x, spans->y, dest, l); - x += l; - sx += l; - length -= l; + auto function = [=, &op] (int cStart, int cEnd) + { + alignas(16) QRgbaFloat32 buffer[BufferSize]; + alignas(16) QRgbaFloat32 src_buffer[BufferSize]; + for (int c = cStart; c < cEnd; ++c) { + if (!spans[c].len) + continue; + int x = spans[c].x; + int length = spans[c].len; + int sx = xoff + x; + int sy = yoff + spans[c].y; + const bool fetchDest = !solidSource || spans[c].coverage < 255; + if (sy >= 0 && sy < image_height && sx < image_width) { + if (sx < 0) { + x -= sx; + length += sx; + sx = 0; + } + if (sx + length > image_width) + length = image_width - sx; + if (length > 0) { + const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8; + while (length) { + int l = qMin(BufferSize, length); + const QRgbaFloat32 *src = op.srcFetchFP(src_buffer, &op, data, sy, sx, l); + QRgbaFloat32 *dest = fetchDest ? op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer; + op.funcFP(dest, src, l, coverage); + if (op.destStoreFP) + op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l); + x += l; + sx += l; + length -= l; + } } } } - } + }; + QT_THREAD_PARALLEL_FILLS(function); } #endif -static void blend_untransformed_argb(int count, const QSpan *spans, void *userData) +static void blend_untransformed_argb(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); if (data->texture.format != QImage::Format_ARGB32_Premultiplied @@ -4267,36 +4390,41 @@ static void blend_untransformed_argb(int count, const QSpan *spans, void *userDa return; } - Operator op = getOperator(data, spans, count); + const Operator op = getOperator(data, spans, count); const int image_width = data->texture.width; const int image_height = data->texture.height; - int xoff = -qRound(-data->dx); - int yoff = -qRound(-data->dy); + const int const_alpha = data->texture.const_alpha; + const int xoff = -qRound(-data->dx); + const int yoff = -qRound(-data->dy); - for (; count--; spans++) { - if (!spans->len) - continue; - int x = spans->x; - int length = spans->len; - int sx = xoff + x; - int sy = yoff + spans->y; - if (sy >= 0 && sy < image_height && sx < image_width) { - if (sx < 0) { - x -= sx; - length += sx; - sx = 0; - } - if (sx + length > image_width) - length = image_width - sx; - if (length > 0) { - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - const uint *src = (const uint *)data->texture.scanLine(sy) + sx; - uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x; - op.func(dest, src, length, coverage); + auto function = [=, &op] (int cStart, int cEnd) + { + for (int c = cStart; c < cEnd; ++c) { + if (!spans[c].len) + continue; + int x = spans[c].x; + int length = spans[c].len; + int sx = xoff + x; + int sy = yoff + spans[c].y; + if (sy >= 0 && sy < image_height && sx < image_width) { + if (sx < 0) { + x -= sx; + length += sx; + sx = 0; + } + if (sx + length > image_width) + length = image_width - sx; + if (length > 0) { + const int coverage = (spans[c].coverage * const_alpha) >> 8; + const uint *src = (const uint *)data->texture.scanLine(sy) + sx; + uint *dest = ((uint *)data->rasterBuffer->scanLine(spans[c].y)) + x; + op.func(dest, src, length, coverage); + } } } - } + }; + QT_THREAD_PARALLEL_FILLS(function); } static inline quint16 interpolate_pixel_rgb16_255(quint16 x, quint8 a, @@ -4350,7 +4478,7 @@ static inline void blend_sourceOver_rgb16_rgb16(quint16 *Q_DECL_RESTRICT dest, } } -static void blend_untransformed_rgb565(int count, const QSpan *spans, void *userData) +static void blend_untransformed_rgb565(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData*>(userData); QPainter::CompositionMode mode = data->rasterBuffer->compositionMode; @@ -4368,57 +4496,54 @@ static void blend_untransformed_rgb565(int count, const QSpan *spans, void *user int xoff = -qRound(-data->dx); int yoff = -qRound(-data->dy); - const QSpan *end = spans + count; - while (spans < end) { - if (!spans->len) { - ++spans; - continue; - } - const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8; - if (coverage == 0) { - ++spans; - continue; - } - - int x = spans->x; - int length = spans->len; - int sx = xoff + x; - int sy = yoff + spans->y; - if (sy >= 0 && sy < image_height && sx < image_width) { - if (sx < 0) { - x -= sx; - length += sx; - sx = 0; - } - if (sx + length > image_width) - length = image_width - sx; - if (length > 0) { - quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + x; - const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx; - if (coverage == 255) { - memcpy(dest, src, length * sizeof(quint16)); - } else { - const quint8 alpha = (coverage + 1) >> 3; - const quint8 ialpha = 0x20 - alpha; - if (alpha > 0) - blend_sourceOver_rgb16_rgb16(dest, src, length, alpha, ialpha); + auto function = [=](int cStart, int cEnd) + { + for (int c = cStart; c < cEnd; ++c) { + if (!spans[c].len) + continue; + const quint8 coverage = (data->texture.const_alpha * spans[c].coverage) >> 8; + if (coverage == 0) + continue; + + int x = spans[c].x; + int length = spans[c].len; + int sx = xoff + x; + int sy = yoff + spans[c].y; + if (sy >= 0 && sy < image_height && sx < image_width) { + if (sx < 0) { + x -= sx; + length += sx; + sx = 0; + } + if (sx + length > image_width) + length = image_width - sx; + if (length > 0) { + quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans[c].y) + x; + const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx; + if (coverage == 255) { + memcpy(dest, src, length * sizeof(quint16)); + } else { + const quint8 alpha = (coverage + 1) >> 3; + const quint8 ialpha = 0x20 - alpha; + if (alpha > 0) + blend_sourceOver_rgb16_rgb16(dest, src, length, alpha, ialpha); + } } } } - ++spans; - } + }; + QT_THREAD_PARALLEL_FILLS(function); } -static void blend_tiled_generic(int count, const QSpan *spans, void *userData) +static void blend_tiled_generic(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - uint buffer[BufferSize]; - uint src_buffer[BufferSize]; - Operator op = getOperator(data, spans, count); + const Operator op = getOperator(data, spans, count); const int image_width = data->texture.width; const int image_height = data->texture.height; + const int const_alpha = data->texture.const_alpha; int xoff = -qRound(-data->dx) % image_width; int yoff = -qRound(-data->dy) % image_height; @@ -4427,48 +4552,51 @@ static void blend_tiled_generic(int count, const QSpan *spans, void *userData) if (yoff < 0) yoff += image_height; - while (count--) { - int x = spans->x; - int length = spans->len; - int sx = (xoff + spans->x) % image_width; - int sy = (spans->y + yoff) % image_height; - if (sx < 0) - sx += image_width; - if (sy < 0) - sy += image_height; - - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - while (length) { - int l = qMin(image_width - sx, length); - if (BufferSize < l) - l = BufferSize; - const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l); - uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l); - op.func(dest, src, l, coverage); - if (op.destStore) - op.destStore(data->rasterBuffer, x, spans->y, dest, l); - x += l; - sx += l; - length -= l; - if (sx >= image_width) - sx = 0; + auto function = [=, &op](int cStart, int cEnd) + { + alignas(16) uint buffer[BufferSize]; + alignas(16) uint src_buffer[BufferSize]; + for (int c = cStart; c < cEnd; ++c) { + int x = spans[c].x; + int length = spans[c].len; + int sx = (xoff + spans[c].x) % image_width; + int sy = (spans[c].y + yoff) % image_height; + if (sx < 0) + sx += image_width; + if (sy < 0) + sy += image_height; + + const int coverage = (spans[c].coverage * const_alpha) >> 8; + while (length) { + int l = qMin(image_width - sx, length); + if (BufferSize < l) + l = BufferSize; + const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l); + uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l); + op.func(dest, src, l, coverage); + if (op.destStore) + op.destStore(data->rasterBuffer, x, spans[c].y, dest, l); + x += l; + sx += l; + length -= l; + if (sx >= image_width) + sx = 0; + } } - ++spans; - } + }; + QT_THREAD_PARALLEL_FILLS(function); } #if QT_CONFIG(raster_64bit) -static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userData) +static void blend_tiled_generic_rgb64(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - Operator op = getOperator(data, spans, count); + const Operator op = getOperator(data, spans, count); if (!op.func64) { qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit"); return blend_tiled_generic(count, spans, userData); } - alignas(8) QRgba64 buffer[BufferSize]; - alignas(8) QRgba64 src_buffer[BufferSize]; const int image_width = data->texture.width; const int image_height = data->texture.height; @@ -4483,6 +4611,7 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32; bool isBpp64 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP64; if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && (isBpp32 || isBpp64)) { + alignas(16) QRgba64 src_buffer[BufferSize]; // If destination isn't blended into the result, we can do the tiling directly on destination pixels. while (count--) { int x = spans->x; @@ -4530,49 +4659,52 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD return; } - while (count--) { - int x = spans->x; - int length = spans->len; - int sx = (xoff + spans->x) % image_width; - int sy = (spans->y + yoff) % image_height; - if (sx < 0) - sx += image_width; - if (sy < 0) - sy += image_height; - - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - while (length) { - int l = qMin(image_width - sx, length); - if (BufferSize < l) - l = BufferSize; - const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l); - QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l); - op.func64(dest, src, l, coverage); - if (op.destStore64) - op.destStore64(data->rasterBuffer, x, spans->y, dest, l); - x += l; - sx += l; - length -= l; - if (sx >= image_width) - sx = 0; + auto function = [=, &op](int cStart, int cEnd) + { + alignas(16) QRgba64 buffer[BufferSize]; + alignas(16) QRgba64 src_buffer[BufferSize]; + for (int c = cStart; c < cEnd; ++c) { + int x = spans[c].x; + int length = spans[c].len; + int sx = (xoff + spans[c].x) % image_width; + int sy = (spans[c].y + yoff) % image_height; + if (sx < 0) + sx += image_width; + if (sy < 0) + sy += image_height; + + const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8; + while (length) { + int l = qMin(image_width - sx, length); + if (BufferSize < l) + l = BufferSize; + const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l); + QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l); + op.func64(dest, src, l, coverage); + if (op.destStore64) + op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l); + x += l; + sx += l; + length -= l; + if (sx >= image_width) + sx = 0; + } } - ++spans; - } + }; + QT_THREAD_PARALLEL_FILLS(function); } #endif #if QT_CONFIG(raster_fp) -static void blend_tiled_generic_fp(int count, const QSpan *spans, void *userData) +static void blend_tiled_generic_fp(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); - Operator op = getOperator(data, spans, count); + const Operator op = getOperator(data, spans, count); if (!op.funcFP) { qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_fp: unsupported 4xFP blend attempted, falling back to 32-bit"); return blend_tiled_generic(count, spans, userData); } - QRgbaFloat32 buffer[BufferSize]; - QRgbaFloat32 src_buffer[BufferSize]; const int image_width = data->texture.width; const int image_height = data->texture.height; @@ -4586,38 +4718,43 @@ static void blend_tiled_generic_fp(int count, const QSpan *spans, void *userData // Consider tiling optimizing like the other versions. - while (count--) { - int x = spans->x; - int length = spans->len; - int sx = (xoff + spans->x) % image_width; - int sy = (spans->y + yoff) % image_height; - if (sx < 0) - sx += image_width; - if (sy < 0) - sy += image_height; - - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - while (length) { - int l = qMin(image_width - sx, length); - if (BufferSize < l) - l = BufferSize; - const QRgbaFloat32 *src = op.srcFetchFP(src_buffer, &op, data, sy, sx, l); - QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans->y, l); - op.funcFP(dest, src, l, coverage); - if (op.destStoreFP) - op.destStoreFP(data->rasterBuffer, x, spans->y, dest, l); - x += l; - sx += l; - length -= l; - if (sx >= image_width) - sx = 0; + auto function = [=, &op](int cStart, int cEnd) + { + alignas(16) QRgbaFloat32 buffer[BufferSize]; + alignas(16) QRgbaFloat32 src_buffer[BufferSize]; + for (int c = cStart; c < cEnd; ++c) { + int x = spans[c].x; + int length = spans[c].len; + int sx = (xoff + spans[c].x) % image_width; + int sy = (spans[c].y + yoff) % image_height; + if (sx < 0) + sx += image_width; + if (sy < 0) + sy += image_height; + + const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8; + while (length) { + int l = qMin(image_width - sx, length); + if (BufferSize < l) + l = BufferSize; + const QRgbaFloat32 *src = op.srcFetchFP(src_buffer, &op, data, sy, sx, l); + QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l); + op.funcFP(dest, src, l, coverage); + if (op.destStoreFP) + op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l); + x += l; + sx += l; + length -= l; + if (sx >= image_width) + sx = 0; + } } - ++spans; - } + }; + QT_THREAD_PARALLEL_FILLS(function); } #endif -static void blend_tiled_argb(int count, const QSpan *spans, void *userData) +static void blend_tiled_argb(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); if (data->texture.format != QImage::Format_ARGB32_Premultiplied @@ -4626,10 +4763,10 @@ static void blend_tiled_argb(int count, const QSpan *spans, void *userData) return; } - Operator op = getOperator(data, spans, count); + const Operator op = getOperator(data, spans, count); - int image_width = data->texture.width; - int image_height = data->texture.height; + const int image_width = data->texture.width; + const int image_height = data->texture.height; int xoff = -qRound(-data->dx) % image_width; int yoff = -qRound(-data->dy) % image_height; @@ -4637,36 +4774,40 @@ static void blend_tiled_argb(int count, const QSpan *spans, void *userData) xoff += image_width; if (yoff < 0) yoff += image_height; + const auto func = op.func; + const int const_alpha = data->texture.const_alpha; + + auto function = [=] (int cStart, int cEnd) { + for (int c = cStart; c < cEnd; ++c) { + int x = spans[c].x; + int length = spans[c].len; + int sx = (xoff + spans[c].x) % image_width; + int sy = (spans[c].y + yoff) % image_height; + if (sx < 0) + sx += image_width; + if (sy < 0) + sy += image_height; - while (count--) { - int x = spans->x; - int length = spans->len; - int sx = (xoff + spans->x) % image_width; - int sy = (spans->y + yoff) % image_height; - if (sx < 0) - sx += image_width; - if (sy < 0) - sy += image_height; - - const int coverage = (spans->coverage * data->texture.const_alpha) >> 8; - while (length) { - int l = qMin(image_width - sx, length); - if (BufferSize < l) - l = BufferSize; - const uint *src = (const uint *)data->texture.scanLine(sy) + sx; - uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x; - op.func(dest, src, l, coverage); - x += l; - sx += l; - length -= l; - if (sx >= image_width) - sx = 0; + const int coverage = (spans[c].coverage * const_alpha) >> 8; + while (length) { + int l = qMin(image_width - sx, length); + if (BufferSize < l) + l = BufferSize; + const uint *src = (const uint *)data->texture.scanLine(sy) + sx; + uint *dest = ((uint *)data->rasterBuffer->scanLine(spans[c].y)) + x; + func(dest, src, l, coverage); + x += l; + sx += l; + length -= l; + if (sx >= image_width) + sx = 0; + } } - ++spans; - } + }; + QT_THREAD_PARALLEL_FILLS(function); } -static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData) +static void blend_tiled_rgb565(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData*>(userData); QPainter::CompositionMode mode = data->rasterBuffer->compositionMode; @@ -4689,79 +4830,80 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData) if (yoff < 0) yoff += image_height; - while (count--) { - const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8; - if (coverage == 0) { - ++spans; - continue; - } - - int x = spans->x; - int length = spans->len; - int sx = (xoff + spans->x) % image_width; - int sy = (spans->y + yoff) % image_height; - if (sx < 0) - sx += image_width; - if (sy < 0) - sy += image_height; - - if (coverage == 255) { - // Copy the first texture block - length = qMin(image_width,length); - int tx = x; - while (length) { - int l = qMin(image_width - sx, length); - if (BufferSize < l) - l = BufferSize; - quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + tx; - const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx; - memcpy(dest, src, l * sizeof(quint16)); - length -= l; - tx += l; - sx += l; - if (sx >= image_width) - sx = 0; - } + const int const_alpha = data->texture.const_alpha; + auto function = [=] (int cStart, int cEnd) { + for (int c = cStart; c < cEnd; ++c) { + const quint8 coverage = (const_alpha * spans[c].coverage) >> 8; + if (coverage == 0) + continue; + + int x = spans[c].x; + int length = spans[c].len; + int sx = (xoff + spans[c].x) % image_width; + int sy = (spans[c].y + yoff) % image_height; + if (sx < 0) + sx += image_width; + if (sy < 0) + sy += image_height; - // Now use the rasterBuffer as the source of the texture, - // We can now progressively copy larger blocks - // - Less cpu time in code figuring out what to copy - // We are dealing with one block of data - // - More likely to fit in the cache - // - can use memcpy - int copy_image_width = qMin(image_width, int(spans->len)); - length = spans->len - copy_image_width; - quint16 *src = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x; - quint16 *dest = src + copy_image_width; - while (copy_image_width < length) { - memcpy(dest, src, copy_image_width * sizeof(quint16)); - dest += copy_image_width; - length -= copy_image_width; - copy_image_width *= 2; - } - if (length > 0) - memcpy(dest, src, length * sizeof(quint16)); - } else { - const quint8 alpha = (coverage + 1) >> 3; - const quint8 ialpha = 0x20 - alpha; - if (alpha > 0) { + if (coverage == 255) { + // Copy the first texture block + length = qMin(image_width,length); + int tx = x; while (length) { int l = qMin(image_width - sx, length); if (BufferSize < l) l = BufferSize; - quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x; + quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans[c].y)) + tx; const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx; - blend_sourceOver_rgb16_rgb16(dest, src, l, alpha, ialpha); - x += l; - sx += l; + memcpy(dest, src, l * sizeof(quint16)); length -= l; + tx += l; + sx += l; if (sx >= image_width) sx = 0; } + + // Now use the rasterBuffer as the source of the texture, + // We can now progressively copy larger blocks + // - Less cpu time in code figuring out what to copy + // We are dealing with one block of data + // - More likely to fit in the cache + // - can use memcpy + int copy_image_width = qMin(image_width, int(spans[c].len)); + length = spans[c].len - copy_image_width; + quint16 *src = ((quint16 *)data->rasterBuffer->scanLine(spans[c].y)) + x; + quint16 *dest = src + copy_image_width; + while (copy_image_width < length) { + memcpy(dest, src, copy_image_width * sizeof(quint16)); + dest += copy_image_width; + length -= copy_image_width; + copy_image_width *= 2; + } + if (length > 0) + memcpy(dest, src, length * sizeof(quint16)); + } else { + const quint8 alpha = (coverage + 1) >> 3; + const quint8 ialpha = 0x20 - alpha; + if (alpha > 0) { + while (length) { + int l = qMin(image_width - sx, length); + if (BufferSize < l) + l = BufferSize; + quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans[c].y)) + x; + const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx; + blend_sourceOver_rgb16_rgb16(dest, src, l, alpha, ialpha); + x += l; + sx += l; + length -= l; + if (sx >= image_width) + sx = 0; + } + } } } - ++spans; - } + }; + QT_THREAD_PARALLEL_FILLS(function); } /* Image formats here are target formats */ @@ -4813,15 +4955,14 @@ static const ProcessSpans processTextureSpansGenericFP[NBlendTypes] = { blend_src_generic_fp // TransformedBilinearTiled }; #endif -void qBlendTexture(int count, const QSpan *spans, void *userData) +void qBlendTexture(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); TextureBlendType blendType = getBlendType(data); ProcessSpans proc; switch (data->rasterBuffer->format) { case QImage::Format_Invalid: - Q_UNREACHABLE(); - return; + Q_UNREACHABLE_RETURN(); case QImage::Format_ARGB32_Premultiplied: proc = processTextureSpansARGB32PM[blendType]; break; @@ -4869,16 +5010,11 @@ void qBlendTexture(int count, const QSpan *spans, void *userData) proc(count, spans, userData); } -static void blend_vertical_gradient_argb(int count, const QSpan *spans, void *userData) +static inline bool calculate_fixed_gradient_factors(int count, const QT_FT_Span *spans, + const QSpanData *data, + const LinearGradientValues &linear, + int *pyinc, int *poff) { - QSpanData *data = reinterpret_cast<QSpanData *>(userData); - - LinearGradientValues linear; - getLinearGradientValues(&linear, data); - - CompositionFunctionSolid funcSolid = - functionForModeSolid[data->rasterBuffer->compositionMode]; - /* The logic for vertical gradient calculations is a mathematically reduced copy of that in fetchLinearGradient() - which is basically: @@ -4893,8 +5029,32 @@ static void blend_vertical_gradient_argb(int count, const QSpan *spans, void *us This has then been converted to fixed point to improve performance. */ const int gss = GRADIENT_STOPTABLE_SIZE - 1; - int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE); - int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE)); + qreal ryinc = linear.dy * data->m22 * gss * FIXPT_SIZE; + qreal roff = (linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss * FIXPT_SIZE; + const int limit = std::numeric_limits<int>::max() - FIXPT_SIZE; + if (count && (std::fabs(ryinc) < limit) && (std::fabs(roff) < limit) + && (std::fabs(ryinc * spans->y + roff) < limit) + && (std::fabs(ryinc * (spans + count - 1)->y + roff) < limit)) { + *pyinc = int(ryinc); + *poff = int(roff); + return true; + } + return false; +} + +static bool blend_vertical_gradient_argb(int count, const QT_FT_Span *spans, void *userData) +{ + QSpanData *data = reinterpret_cast<QSpanData *>(userData); + + LinearGradientValues linear; + getLinearGradientValues(&linear, data); + + CompositionFunctionSolid funcSolid = + functionForModeSolid[data->rasterBuffer->compositionMode]; + + int yinc(0), off(0); + if (!calculate_fixed_gradient_factors(count, spans, data, linear, &yinc, &off)) + return false; while (count--) { int y = spans->y; @@ -4907,21 +5067,20 @@ static void blend_vertical_gradient_argb(int count, const QSpan *spans, void *us funcSolid(dst, spans->len, color, spans->coverage); ++spans; } + return true; } template<ProcessSpans blend_color> -static void blend_vertical_gradient(int count, const QSpan *spans, void *userData) +static bool blend_vertical_gradient(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); LinearGradientValues linear; getLinearGradientValues(&linear, data); - // Based on the same logic as blend_vertical_gradient_argb. - - const int gss = GRADIENT_STOPTABLE_SIZE - 1; - int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE); - int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE)); + int yinc(0), off(0); + if (!calculate_fixed_gradient_factors(count, spans, data, linear, &yinc, &off)) + return false; while (count--) { int y = spans->y; @@ -4929,14 +5088,15 @@ static void blend_vertical_gradient(int count, const QSpan *spans, void *userDat #if QT_CONFIG(raster_64bit) data->solidColor = qt_gradient_pixel64_fixed(&data->gradient, yinc * y + off); #else - data->solidColor = QRgba64::fromArgb32(qt_gradient_pixel_fixed(&data->gradient, yinc * y + off)); + data->solidColor = qt_gradient_pixel_fixed(&data->gradient, yinc * y + off); #endif blend_color(1, spans, userData); ++spans; } + return true; } -void qBlendGradient(int count, const QSpan *spans, void *userData) +void qBlendGradient(int count, const QT_FT_Span *spans, void *userData) { QSpanData *data = reinterpret_cast<QSpanData *>(userData); bool isVerticalGradient = @@ -4948,8 +5108,8 @@ void qBlendGradient(int count, const QSpan *spans, void *userData) break; case QImage::Format_RGB32: case QImage::Format_ARGB32_Premultiplied: - if (isVerticalGradient) - return blend_vertical_gradient_argb(count, spans, userData); + if (isVerticalGradient && blend_vertical_gradient_argb(count, spans, userData)) + return; return blend_src_generic(count, spans, userData); #if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8) case QImage::Format_ARGB32: @@ -4971,8 +5131,8 @@ void qBlendGradient(int count, const QSpan *spans, void *userData) case QImage::Format_RGBA32FPx4_Premultiplied: #endif #if QT_CONFIG(raster_64bit) - if (isVerticalGradient) - return blend_vertical_gradient<blend_color_generic_rgb64>(count, spans, userData); + if (isVerticalGradient && blend_vertical_gradient<blend_color_generic_rgb64>(count, spans, userData)) + return; return blend_src_generic_rgb64(count, spans, userData); #endif // QT_CONFIG(raster_64bit) #if QT_CONFIG(raster_fp) @@ -4982,13 +5142,13 @@ void qBlendGradient(int count, const QSpan *spans, void *userData) case QImage::Format_RGBX32FPx4: case QImage::Format_RGBA32FPx4: case QImage::Format_RGBA32FPx4_Premultiplied: - if (isVerticalGradient) - return blend_vertical_gradient<blend_color_generic_fp>(count, spans, userData); + if (isVerticalGradient && blend_vertical_gradient<blend_color_generic_fp>(count, spans, userData)) + return; return blend_src_generic_fp(count, spans, userData); #endif default: - if (isVerticalGradient) - return blend_vertical_gradient<blend_color_generic>(count, spans, userData); + if (isVerticalGradient && blend_vertical_gradient<blend_color_generic>(count, spans, userData)) + return; return blend_src_generic(count, spans, userData); } Q_UNREACHABLE(); @@ -5004,7 +5164,7 @@ inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer, const int destStride = rasterBuffer->stride<DST>(); if (mapWidth > 8) { - while (mapHeight--) { + while (--mapHeight >= 0) { int x0 = 0; int n = 0; for (int x = 0; x < mapWidth; x += 8) { @@ -5034,7 +5194,7 @@ inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer, map += mapStride; } } else { - while (mapHeight--) { + while (--mapHeight >= 0) { int x0 = 0; int n = 0; for (uchar s = *map; s; s <<= 1) { @@ -5218,7 +5378,7 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer, const QClipData::ClipLine &line = clip->m_clipLines[yp]; for (int i=0; i<line.count; ++i) { - const QSpan &clip = line.spans[i]; + const QT_FT_Span &clip = line.spans[i]; int start = qMax<int>(x, clip.x); int end = qMin<int>(x + mapWidth, clip.x + clip.len); @@ -5292,7 +5452,7 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer, const QClipData::ClipLine &line = clip->m_clipLines[yp]; for (int i=0; i<line.count; ++i) { - const QSpan &clip = line.spans[i]; + const QT_FT_Span &clip = line.spans[i]; int start = qMax<int>(x, clip.x); int end = qMin<int>(x + mapWidth, clip.x + clip.len); @@ -5342,7 +5502,7 @@ void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer, if (!clip) { quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x; const int destStride = rasterBuffer->stride<quint16>(); - while (mapHeight--) { + while (--mapHeight >= 0) { for (int i = 0; i < mapWidth; ++i) alphamapblend_quint16(map[i], dest, i, c); dest += destStride; @@ -5360,7 +5520,7 @@ void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer, quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(yp)); for (int i=0; i<line.count; ++i) { - const QSpan &clip = line.spans[i]; + const QT_FT_Span &clip = line.spans[i]; int start = qMax<int>(x, clip.x); int end = qMin<int>(x + mapWidth, clip.x + clip.len); @@ -5396,7 +5556,7 @@ static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer, if (!clip) { quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x; - while (mapHeight--) { + while (--mapHeight >= 0) { for (int i = 0; i < mapWidth; ++i) { const int coverage = map[i]; alphamapblend_argb32(dest + i, coverage, srcColor, c, colorProfile); @@ -5417,7 +5577,7 @@ static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer, quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(yp)); for (int i=0; i<line.count; ++i) { - const QSpan &clip = line.spans[i]; + const QT_FT_Span &clip = line.spans[i]; int start = qMax<int>(x, clip.x); int end = qMin<int>(x + mapWidth, clip.x + clip.len); @@ -5518,7 +5678,7 @@ static inline void alphargbblend_argb32(quint32 *dst, uint coverage, const QRgba static inline void rgbBlendPixel(QRgba64 &dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile) { // Do a gammacorrected RGB alphablend... - const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(dst) : dst; + const QRgba64 dlinear = colorProfile ? colorProfile->toLinear(dst) : dst; QRgba64 blend = rgbBlend(dlinear, slinear, coverage); @@ -5597,7 +5757,7 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer, const QClipData::ClipLine &line = clip->m_clipLines[yp]; for (int i=0; i<line.count; ++i) { - const QSpan &clip = line.spans[i]; + const QT_FT_Span &clip = line.spans[i]; int start = qMax<int>(x, clip.x); int end = qMin<int>(x + mapWidth, clip.x + clip.len); @@ -5670,7 +5830,7 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer, const QClipData::ClipLine &line = clip->m_clipLines[yp]; for (int i=0; i<line.count; ++i) { - const QSpan &clip = line.spans[i]; + const QT_FT_Span &clip = line.spans[i]; int start = qMax<int>(x, clip.x); int end = qMin<int>(x + mapWidth, clip.x + clip.len); @@ -5714,7 +5874,7 @@ static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer, if (!clip) { quint32 *dst = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x; const int destStride = rasterBuffer->stride<quint32>(); - while (mapHeight--) { + while (--mapHeight >= 0) { for (int i = 0; i < mapWidth; ++i) { const uint coverage = src[i]; alphargbblend_argb32(dst + i, coverage, srcColor, c, colorProfile); @@ -5736,7 +5896,7 @@ static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer, quint32 *dst = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(yp)); for (int i=0; i<line.count; ++i) { - const QSpan &clip = line.spans[i]; + const QT_FT_Span &clip = line.spans[i]; int start = qMax<int>(x, clip.x); int end = qMin<int>(x + mapWidth, clip.x + clip.len); @@ -5858,7 +6018,7 @@ static void qt_rectfill_fp32x4(QRasterBuffer *rasterBuffer, // Map table for destination image format. Contains function pointers // for blends of various types unto the destination -DrawHelper qDrawHelper[QImage::NImageFormats] = +DrawHelper qDrawHelper[] = { // Format_Invalid, { nullptr, nullptr, nullptr, nullptr, nullptr }, @@ -6135,14 +6295,16 @@ DrawHelper qDrawHelper[QImage::NImageFormats] = }, }; -#if !defined(__SSE2__) +static_assert(std::size(qDrawHelper) == QImage::NImageFormats); + +#if !defined(Q_PROCESSOR_X86) void qt_memfill64(quint64 *dest, quint64 color, qsizetype count) { qt_memfill_template<quint64>(dest, color, count); } #endif -#if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && !defined(Q_CC_CLANG) +#if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_CLANG) __attribute__((optimize("no-tree-vectorize"))) #endif void qt_memfill24(quint24 *dest, quint24 color, qsizetype count) @@ -6202,16 +6364,15 @@ void qt_memfill16(quint16 *dest, quint16 value, qsizetype count) qt_memfill32(reinterpret_cast<quint32*>(dest), value32, count / 2); } -#if !defined(__SSE2__) && !defined(__ARM_NEON__) && !defined(__MIPS_DSP__) +#if defined(Q_PROCESSOR_X86) +void (*qt_memfill32)(quint32 *dest, quint32 value, qsizetype count) = nullptr; +void (*qt_memfill64)(quint64 *dest, quint64 value, qsizetype count) = nullptr; +#elif !defined(__ARM_NEON__) && !defined(__MIPS_DSP__) void qt_memfill32(quint32 *dest, quint32 color, qsizetype count) { qt_memfill_template<quint32>(dest, color, count); } #endif -#ifdef __SSE2__ -decltype(qt_memfill32_sse2) *qt_memfill32 = nullptr; -decltype(qt_memfill64_sse2) *qt_memfill64 = nullptr; -#endif #ifdef QT_COMPILER_SUPPORTS_SSE4_1 template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QList<QRgb> *, QDitherInfo *); @@ -6224,8 +6385,11 @@ static void qInitDrawhelperFunctions() // Set up basic blend function tables. qInitBlendFunctions(); -#ifdef __SSE2__ -# ifndef __AVX2__ +#if defined(Q_PROCESSOR_X86) && !defined(__SSE2__) + qt_memfill32 = qt_memfill_template<quint32>; + qt_memfill64 = qt_memfill_template<quint64>; +#elif defined(__SSE2__) +# ifndef __haswell__ qt_memfill32 = qt_memfill32_sse2; qt_memfill64 = qt_memfill64_sse2; # endif @@ -6332,7 +6496,7 @@ static void qInitDrawhelperFunctions() extern void QT_FASTCALL storeRGBx64FromRGBA64PM_sse4(uchar *, const QRgba64 *, int, int, const QList<QRgb> *, QDitherInfo *); extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length); extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length); -# ifndef __AVX2__ +# ifndef __haswell__ qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4; qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4; |