From 3ce88e13b9cc56e00bc3f075e9649c6b291ae01c Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Mon, 9 Mar 2015 10:16:18 +0100 Subject: Add AVX2 autovectorized versions of premultiply Following up on using GCC's autovectorizing for faster SSE4.1 premultiply, this patch adds specialized autovectorized versions of premultiply for AVX2, giving another almost doubling in speed. To make the speed up for AVX2 and also SSE4_1 available to non-GCC compilers, the target-specific methods have been moved to separate files. Change-Id: I97ce05be67f4adeeb9a096eef80fd5fb662099f3 Reviewed-by: Gunnar Sletta --- src/gui/image/image.pri | 2 + src/gui/image/qimage_avx2.cpp | 61 +++++++++++++++++++++++++ src/gui/image/qimage_conversions.cpp | 42 +++++++---------- src/gui/image/qimage_sse4.cpp | 70 +++++++++++++++++++++++++++++ src/gui/painting/painting.pri | 2 + src/gui/painting/qdrawhelper.cpp | 85 +++++++++-------------------------- src/gui/painting/qdrawhelper_avx2.cpp | 54 ++++++++++++++++++++++ src/gui/painting/qdrawhelper_p.h | 16 +++++++ src/gui/painting/qdrawhelper_sse4.cpp | 79 ++++++++++++++++++++++++++++++++ 9 files changed, 321 insertions(+), 90 deletions(-) create mode 100644 src/gui/image/qimage_avx2.cpp create mode 100644 src/gui/image/qimage_sse4.cpp create mode 100644 src/gui/painting/qdrawhelper_avx2.cpp create mode 100644 src/gui/painting/qdrawhelper_sse4.cpp (limited to 'src/gui') diff --git a/src/gui/image/image.pri b/src/gui/image/image.pri index 7022a6efd0..8db944e5e3 100644 --- a/src/gui/image/image.pri +++ b/src/gui/image/image.pri @@ -80,6 +80,8 @@ contains(QT_CONFIG, gif):include($$PWD/qgifhandler.pri) # SIMD SSE2_SOURCES += image/qimage_sse2.cpp SSSE3_SOURCES += image/qimage_ssse3.cpp +SSE4_1_SOURCES += image/qimage_sse4.cpp +AVX2_SOURCES += image/qimage_avx2.cpp NEON_SOURCES += image/qimage_neon.cpp MIPS_DSPR2_SOURCES += image/qimage_mips_dspr2.cpp MIPS_DSPR2_ASM += image/qimage_mips_dspr2_asm.S diff --git a/src/gui/image/qimage_avx2.cpp b/src/gui/image/qimage_avx2.cpp new file mode 100644 index 0000000000..c52baec948 --- /dev/null +++ b/src/gui/image/qimage_avx2.cpp @@ -0,0 +1,61 @@ +/**************************************************************************** +** +** Copyright (C) 2015 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL21$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 or version 3 as published by the Free +** Software Foundation and appearing in the file LICENSE.LGPLv21 and +** LICENSE.LGPLv3 included in the packaging of this file. Please review the +** following information to ensure the GNU Lesser General Public License +** requirements will be met: https://www.gnu.org/licenses/lgpl.html and +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** As a special exception, The Qt Company gives you certain additional +** rights. These rights are described in The Qt Company LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include +#include +#include +#include + +#ifdef QT_COMPILER_SUPPORTS_AVX2 + +QT_BEGIN_NAMESPACE + +void convert_ARGB_to_ARGB_PM_avx2(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +{ + Q_ASSERT(src->format == QImage::Format_ARGB32 || src->format == QImage::Format_RGBA8888); + Q_ASSERT(dest->format == QImage::Format_ARGB32_Premultiplied || dest->format == QImage::Format_RGBA8888_Premultiplied); + Q_ASSERT(src->width == dest->width); + Q_ASSERT(src->height == dest->height); + + const uint *src_data = (uint *) src->data; + uint *dest_data = (uint *) dest->data; + for (int i = 0; i < src->height; ++i) { + qt_convertARGB32ToARGB32PM(dest_data, src_data, src->width); + src_data += src->bytes_per_line >> 2; + dest_data += dest->bytes_per_line >> 2; + } +} + +QT_END_NAMESPACE + +#endif // QT_COMPILER_SUPPORTS_AVX2 diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp index 5103d820d6..fe76c6d3ba 100644 --- a/src/gui/image/qimage_conversions.cpp +++ b/src/gui/image/qimage_conversions.cpp @@ -32,7 +32,6 @@ ****************************************************************************/ #include -#include #include #include #include @@ -110,17 +109,6 @@ static const uint *QT_FASTCALL convertRGB32FromARGB32PM(uint *buffer, const uint return buffer; } -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) -QT_FUNCTION_TARGET(SSE4_1) -static const uint *QT_FASTCALL convertRGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, - const QPixelLayout *, const QRgb *) -{ - for (int i = 0; i < count; ++i) - buffer[i] = 0xff000000 | qUnpremultiply_sse4(src[i]); - return buffer; -} -#endif - static const uint *QT_FASTCALL convertRGB32ToARGB32PM(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *) { @@ -129,6 +117,10 @@ static const uint *QT_FASTCALL convertRGB32ToARGB32PM(uint *buffer, const uint * return buffer; } +#ifdef QT_COMPILER_SUPPORTS_SSE4_1 +extern const uint *QT_FASTCALL convertRGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); +#endif + void convert_generic(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { // Cannot be used with indexed formats. @@ -152,7 +144,7 @@ void convert_generic(QImageData *dest, const QImageData *src, Qt::ImageConversio if (src->format == QImage::Format_RGB32) convertToARGB32PM = convertRGB32ToARGB32PM; if (dest->format == QImage::Format_RGB32) { -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) +#ifdef QT_COMPILER_SUPPORTS_SSE4_1 if (qCpuHasFeature(SSE4_1)) convertFromARGB32PM = convertRGB32FromARGB32PM_sse4; else @@ -201,7 +193,7 @@ bool convert_generic_inplace(QImageData *data, QImage::Format dst_format, Qt::Im if (data->format == QImage::Format_RGB32) convertToARGB32PM = convertRGB32ToARGB32PM; if (dst_format == QImage::Format_RGB32) { -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) +#ifdef QT_COMPILER_SUPPORTS_SSE4_1 if (qCpuHasFeature(SSE4_1)) convertFromARGB32PM = convertRGB32FromARGB32PM_sse4; else @@ -257,7 +249,7 @@ static bool convert_passthrough_inplace(QImageData *data, Qt::ImageConversionFla return true; } -static inline void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +static void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) { Q_ASSERT(src->format == QImage::Format_ARGB32 || src->format == QImage::Format_RGBA8888); Q_ASSERT(dest->format == QImage::Format_ARGB32_Premultiplied || dest->format == QImage::Format_RGBA8888_Premultiplied); @@ -281,15 +273,6 @@ static inline void convert_ARGB_to_ARGB_PM(QImageData *dest, const QImageData *s } } -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) -QT_FUNCTION_TARGET(SSE4_1) -static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags flags) -{ - // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. - convert_ARGB_to_ARGB_PM(dest, src, flags); -} -#endif - Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32(quint32 *dest_data, const uchar *src_data, int len) { int pixel = 0; @@ -2804,13 +2787,22 @@ void qInitImageConversions() } #endif -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) +#if defined(QT_COMPILER_SUPPORTS_SSE4_1) && !defined(__SSE4_1__) if (qCpuHasFeature(SSE4_1)) { + extern void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags); qimage_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4; qimage_converter_map[QImage::Format_RGBA8888][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_ARGB_PM_sse4; } #endif +#if defined(QT_COMPILER_SUPPORTS_AVX2) && !defined(__AVX2__) + if (qCpuHasFeature(AVX2)) { + extern void convert_ARGB_to_ARGB_PM_avx2(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags); + qimage_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_avx2; + qimage_converter_map[QImage::Format_RGBA8888][QImage::Format_RGBA8888_Premultiplied] = convert_ARGB_to_ARGB_PM_avx2; + } +#endif + #if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64) extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags); qimage_converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon; diff --git a/src/gui/image/qimage_sse4.cpp b/src/gui/image/qimage_sse4.cpp new file mode 100644 index 0000000000..5fad4f572a --- /dev/null +++ b/src/gui/image/qimage_sse4.cpp @@ -0,0 +1,70 @@ +/**************************************************************************** +** +** Copyright (C) 2015 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL21$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 or version 3 as published by the Free +** Software Foundation and appearing in the file LICENSE.LGPLv21 and +** LICENSE.LGPLv3 included in the packaging of this file. Please review the +** following information to ensure the GNU Lesser General Public License +** requirements will be met: https://www.gnu.org/licenses/lgpl.html and +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** As a special exception, The Qt Company gives you certain additional +** rights. These rights are described in The Qt Company LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include +#include +#include +#include +#include + +#ifdef QT_COMPILER_SUPPORTS_SSE4_1 + +QT_BEGIN_NAMESPACE + +const uint *QT_FASTCALL convertRGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = 0xff000000 | qUnpremultiply_sse4(src[i]); + return buffer; +} + +void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +{ + Q_ASSERT(src->format == QImage::Format_ARGB32 || src->format == QImage::Format_RGBA8888); + Q_ASSERT(dest->format == QImage::Format_ARGB32_Premultiplied || dest->format == QImage::Format_RGBA8888_Premultiplied); + Q_ASSERT(src->width == dest->width); + Q_ASSERT(src->height == dest->height); + + const uint *src_data = (uint *) src->data; + uint *dest_data = (uint *) dest->data; + for (int i = 0; i < src->height; ++i) { + qt_convertARGB32ToARGB32PM(dest_data, src_data, src->width); + src_data += src->bytes_per_line >> 2; + dest_data += dest->bytes_per_line >> 2; + } +} + +QT_END_NAMESPACE + +#endif // QT_COMPILER_SUPPORTS_SSE4_1 diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri index 6ee1beaa3c..2f2d3daaf8 100644 --- a/src/gui/painting/painting.pri +++ b/src/gui/painting/painting.pri @@ -93,6 +93,8 @@ SOURCES += \ SSE2_SOURCES += painting/qdrawhelper_sse2.cpp SSSE3_SOURCES += painting/qdrawhelper_ssse3.cpp +SSE4_1_SOURCES += painting/qdrawhelper_sse4.cpp +AVX2_SOURCES += painting/qdrawhelper_avx2.cpp !ios { CONFIG += no_clang_integrated_as diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 04857fb0d6..0b269cc5a7 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -355,24 +355,11 @@ static const uint *QT_FASTCALL convertPassThrough(uint *, const uint *src, int, return src; } -static inline const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count, - const QPixelLayout *, const QRgb *) -{ - for (int i = 0; i < count; ++i) - buffer[i] = qPremultiply(src[i]); - return buffer; -} - -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) -QT_FUNCTION_TARGET(SSE4_1) -static const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, - const QPixelLayout *layout, const QRgb *clut) +static const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) { - // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. - return convertARGB32ToARGB32PM(buffer, src, count, layout, clut); + return qt_convertARGB32ToARGB32PM(buffer, src, count); } -#endif - static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *) @@ -382,24 +369,12 @@ static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const u return buffer; } -static inline const uint *QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count, +static const uint *QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *) { - for (int i = 0; i < count; ++i) - buffer[i] = qPremultiply(RGBA2ARGB(src[i])); - return buffer; + return qt_convertRGBA8888ToARGB32PM(buffer, src, count); } -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) && !defined(__SSE4_1__) -QT_FUNCTION_TARGET(SSE4_1) -static const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, - const QPixelLayout *layout, const QRgb *clut) -{ - // Twice as fast autovectorized due to SSE4.1 PMULLD instructions. - return convertRGBA8888ToARGB32PM(buffer, src, count, layout, clut); -} -#endif - static const uint *QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *) { @@ -424,18 +399,6 @@ static const uint *QT_FASTCALL convertARGB32FromARGB32PM(uint *buffer, const uin return buffer; } -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) -QT_FUNCTION_TARGET(SSE4_1) -static const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, - const QPixelLayout *, const QRgb *) -{ - for (int i = 0; i < count; ++i) - buffer[i] = qUnpremultiply_sse4(src[i]); - return buffer; -} -#endif - - static const uint *QT_FASTCALL convertRGBA8888PMFromARGB32PM(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *) { @@ -452,17 +415,6 @@ static const uint *QT_FASTCALL convertRGBA8888FromARGB32PM(uint *buffer, const u return buffer; } -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) -QT_FUNCTION_TARGET(SSE4_1) -static const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count, - const QPixelLayout *, const QRgb *) -{ - for (int i = 0; i < count; ++i) - buffer[i] = ARGB2RGBA(qUnpremultiply_sse4(src[i])); - return buffer; -} -#endif - static const uint *QT_FASTCALL convertRGBXFromRGB32(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *) { @@ -479,17 +431,6 @@ static const uint *QT_FASTCALL convertRGBXFromARGB32PM(uint *buffer, const uint return buffer; } -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) -QT_FUNCTION_TARGET(SSE4_1) -static const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count, - const QPixelLayout *, const QRgb *) -{ - for (int i = 0; i < count; ++i) - buffer[i] = ARGB2RGBA(0xff000000 | qUnpremultiply_sse4(src[i])); - return buffer; -} -#endif - template static const uint *QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *) @@ -6793,18 +6734,32 @@ void qInitDrawhelperAsm() } #endif // SSSE3 -#if QT_COMPILER_SUPPORTS_HERE(SSE4_1) +#if QT_COMPILER_SUPPORTS_SSE4_1 if (qCpuHasFeature(SSE4_1)) { #if !defined(__SSE4_1__) + extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); + extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; #endif + extern const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); + extern const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); + extern const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); qPixelLayouts[QImage::Format_ARGB32].convertFromARGB32PM = convertARGB32FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].convertFromARGB32PM = convertRGBA8888FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBX8888].convertFromARGB32PM = convertRGBXFromARGB32PM_sse4; } #endif +#if QT_COMPILER_SUPPORTS_AVX2 && !defined(__AVX2__) + if (qCpuHasFeature(AVX2)) { + extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); + extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, const uint *src, int count, const QPixelLayout *, const QRgb *); + qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2; + qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2; + } +#endif + functionForModeAsm = qt_functionForMode_SSE2; functionForModeSolidAsm = qt_functionForModeSolid_SSE2; #endif // SSE2 diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp new file mode 100644 index 0000000000..5716be682b --- /dev/null +++ b/src/gui/painting/qdrawhelper_avx2.cpp @@ -0,0 +1,54 @@ +/**************************************************************************** +** +** Copyright (C) 2015 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL21$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 or version 3 as published by the Free +** Software Foundation and appearing in the file LICENSE.LGPLv21 and +** LICENSE.LGPLv3 included in the packaging of this file. Please review the +** following information to ensure the GNU Lesser General Public License +** requirements will be met: https://www.gnu.org/licenses/lgpl.html and +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** As a special exception, The Qt Company gives you certain additional +** rights. These rights are described in The Qt Company LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include + +#if defined(QT_COMPILER_SUPPORTS_AVX2) + +QT_BEGIN_NAMESPACE + +const uint *QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) +{ + return qt_convertARGB32ToARGB32PM(buffer, src, count); +} + +const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) +{ + return qt_convertRGBA8888ToARGB32PM(buffer, src, count); +} + +QT_END_NAMESPACE + +#endif diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 08bc0776f7..51e51fd53f 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -893,6 +893,22 @@ inline int qBlue565(quint16 rgb) { return (b << 3) | (b >> 2); } + +static Q_ALWAYS_INLINE const uint *qt_convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count) +{ + for (int i = 0; i < count; ++i) + buffer[i] = qPremultiply(src[i]); + return buffer; +} + +static Q_ALWAYS_INLINE const uint *qt_convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count) +{ + for (int i = 0; i < count; ++i) + buffer[i] = qPremultiply(RGBA2ARGB(src[i])); + return buffer; +} + + const uint qt_bayer_matrix[16][16] = { { 0x1, 0xc0, 0x30, 0xf0, 0xc, 0xcc, 0x3c, 0xfc, 0x3, 0xc3, 0x33, 0xf3, 0xf, 0xcf, 0x3f, 0xff}, diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp new file mode 100644 index 0000000000..43a3958997 --- /dev/null +++ b/src/gui/painting/qdrawhelper_sse4.cpp @@ -0,0 +1,79 @@ +/**************************************************************************** +** +** Copyright (C) 2015 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL21$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 or version 3 as published by the Free +** Software Foundation and appearing in the file LICENSE.LGPLv21 and +** LICENSE.LGPLv3 included in the packaging of this file. Please review the +** following information to ensure the GNU Lesser General Public License +** requirements will be met: https://www.gnu.org/licenses/lgpl.html and +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** As a special exception, The Qt Company gives you certain additional +** rights. These rights are described in The Qt Company LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include +#include + +#if defined(QT_COMPILER_SUPPORTS_SSE4_1) + +QT_BEGIN_NAMESPACE + +const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) +{ + return qt_convertARGB32ToARGB32PM(buffer, src, count); +} + +const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) +{ + return qt_convertRGBA8888ToARGB32PM(buffer, src, count); +} + +const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = qUnpremultiply_sse4(src[i]); + return buffer; +} + +const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = ARGB2RGBA(qUnpremultiply_sse4(src[i])); + return buffer; +} + +const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count, + const QPixelLayout *, const QRgb *) +{ + for (int i = 0; i < count; ++i) + buffer[i] = ARGB2RGBA(0xff000000 | qUnpremultiply_sse4(src[i])); + return buffer; +} + +QT_END_NAMESPACE + +#endif -- cgit v1.2.3