diff options
Diffstat (limited to 'src/gui/painting/qdrawhelper_sse4.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper_sse4.cpp | 123 |
1 files changed, 75 insertions, 48 deletions
diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp index dc9755e414..a7b4e6ba76 100644 --- a/src/gui/painting/qdrawhelper_sse4.cpp +++ b/src/gui/painting/qdrawhelper_sse4.cpp @@ -1,41 +1,5 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtGui module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2016 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #include <private/qdrawhelper_p.h> #include <private/qdrawingprimitive_sse2_p.h> @@ -46,7 +10,7 @@ QT_BEGIN_NAMESPACE -#ifndef __AVX2__ +#ifndef __haswell__ template<bool RGBA> static void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count) { @@ -142,7 +106,7 @@ static void convertARGBToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int cou buffer[i] = QRgba64::fromArgb32(s).premultiplied(); } } -#endif // __AVX2__ +#endif // __haswell__ static inline __m128 Q_DECL_VECTORCALL reciprocal_mul_ps(__m128 a, float mul) { @@ -411,7 +375,7 @@ static inline void convertRGBA64FromRGBA64PM_sse4(QRgba64 *buffer, const QRgba64 } } -#ifndef __AVX2__ +#ifndef __haswell__ void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QList<QRgb> *) { convertARGBToARGB32PM_sse4<false>(buffer, buffer, count); @@ -463,7 +427,7 @@ const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const u convertARGBToRGBA64PM_sse4<true>(buffer, reinterpret_cast<const uint *>(src) + index, count); return buffer; } -#endif // __AVX2__ +#endif // __haswell__ void QT_FASTCALL storeRGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QList<QRgb> *, QDitherInfo *) @@ -502,6 +466,13 @@ void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, i d[i] = qConvertArgb32ToA2rgb30_sse4<PixelOrder>(src[i]); } +template +void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>(uchar *dest, const uint *src, int index, int count, + const QList<QRgb> *, QDitherInfo *); +template +void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>(uchar *dest, const uint *src, int index, int count, + const QList<QRgb> *, QDitherInfo *); + #if QT_CONFIG(raster_64bit) void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) { @@ -544,12 +515,68 @@ void QT_FASTCALL storeRGBx64FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, i convertRGBA64FromRGBA64PM_sse4<true>(d, src, count); } -template -void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>(uchar *dest, const uint *src, int index, int count, - const QList<QRgb> *, QDitherInfo *); -template -void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>(uchar *dest, const uint *src, int index, int count, - const QList<QRgb> *, QDitherInfo *); +#if QT_CONFIG(raster_fp) +const QRgbaFloat32 *QT_FASTCALL fetchRGBA32FToRGBA32F_sse4(QRgbaFloat32 *buffer, const uchar *src, int index, int count, + const QList<QRgb> *, QDitherInfo *) +{ + const QRgbaFloat32 *s = reinterpret_cast<const QRgbaFloat32 *>(src) + index; + for (int i = 0; i < count; ++i) { + __m128 vsf = _mm_load_ps(reinterpret_cast<const float *>(s + i)); + __m128 vsa = _mm_shuffle_ps(vsf, vsf, _MM_SHUFFLE(3, 3, 3, 3)); + vsf = _mm_mul_ps(vsf, vsa); + vsf = _mm_insert_ps(vsf, vsa, 0x30); + _mm_store_ps(reinterpret_cast<float *>(buffer + i), vsf); + } + return buffer; +} + +void QT_FASTCALL storeRGBX32FFromRGBA32F_sse4(uchar *dest, const QRgbaFloat32 *src, int index, int count, + const QList<QRgb> *, QDitherInfo *) +{ + QRgbaFloat32 *d = reinterpret_cast<QRgbaFloat32 *>(dest) + index; + const __m128 zero = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); + for (int i = 0; i < count; ++i) { + __m128 vsf = _mm_load_ps(reinterpret_cast<const float *>(src + i)); + const __m128 vsa = _mm_shuffle_ps(vsf, vsf, _MM_SHUFFLE(3, 3, 3, 3)); + const float a = _mm_cvtss_f32(vsa); + if (a == 1.0f) + { } + else if (a == 0.0f) + vsf = zero; + else { + __m128 vsr = _mm_rcp_ps(vsa); + vsr = _mm_sub_ps(_mm_add_ps(vsr, vsr), _mm_mul_ps(vsr, _mm_mul_ps(vsr, vsa))); + vsf = _mm_mul_ps(vsf, vsr); + vsf = _mm_insert_ps(vsf, _mm_set_ss(1.0f), 0x30); + } + _mm_store_ps(reinterpret_cast<float *>(d + i), vsf); + } +} + +void QT_FASTCALL storeRGBA32FFromRGBA32F_sse4(uchar *dest, const QRgbaFloat32 *src, int index, int count, + const QList<QRgb> *, QDitherInfo *) +{ + QRgbaFloat32 *d = reinterpret_cast<QRgbaFloat32 *>(dest) + index; + const __m128 zero = _mm_set1_ps(0.0f); + for (int i = 0; i < count; ++i) { + __m128 vsf = _mm_load_ps(reinterpret_cast<const float *>(src + i)); + const __m128 vsa = _mm_shuffle_ps(vsf, vsf, _MM_SHUFFLE(3, 3, 3, 3)); + const float a = _mm_cvtss_f32(vsa); + if (a == 1.0f) + { } + else if (a == 0.0f) + vsf = zero; + else { + __m128 vsr = _mm_rcp_ps(vsa); + vsr = _mm_sub_ps(_mm_add_ps(vsr, vsr), _mm_mul_ps(vsr, _mm_mul_ps(vsr, vsa))); + vsr = _mm_insert_ps(vsr, _mm_set_ss(1.0f), 0x30); + vsf = _mm_mul_ps(vsf, vsr); + } + _mm_store_ps(reinterpret_cast<float *>(d + i), vsf); + } +} +#endif + QT_END_NAMESPACE |