diff options
Diffstat (limited to 'src/gui/painting/qrgba64_p.h')
-rw-r--r-- | src/gui/painting/qrgba64_p.h | 181 |
1 files changed, 111 insertions, 70 deletions
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h index 58eefad68b..ae8b6fd8cb 100644 --- a/src/gui/painting/qrgba64_p.h +++ b/src/gui/painting/qrgba64_p.h @@ -1,41 +1,5 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtGui module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2020 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #ifndef QRGBA64_P_H #define QRGBA64_P_H @@ -64,15 +28,7 @@ inline QRgba64 combineAlpha256(QRgba64 rgba64, uint alpha256) return QRgba64::fromRgba64(rgba64.red(), rgba64.green(), rgba64.blue(), (rgba64.alpha() * alpha256) >> 8); } -inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535) -{ - return QRgba64::fromRgba64(qt_div_65535(rgba64.red() * alpha65535), - qt_div_65535(rgba64.green() * alpha65535), - qt_div_65535(rgba64.blue() * alpha65535), - qt_div_65535(rgba64.alpha() * alpha65535)); -} - -#ifdef __SSE2__ +#if defined(__SSE2__) static inline __m128i Q_DECL_VECTORCALL multiplyAlpha65535(__m128i rgba64, __m128i va) { __m128i vs = rgba64; @@ -80,7 +36,7 @@ static inline __m128i Q_DECL_VECTORCALL multiplyAlpha65535(__m128i rgba64, __m12 vs = _mm_add_epi32(vs, _mm_srli_epi32(vs, 16)); vs = _mm_add_epi32(vs, _mm_set1_epi32(0x8000)); vs = _mm_srai_epi32(vs, 16); - vs = _mm_packs_epi32(vs, _mm_setzero_si128()); + vs = _mm_packs_epi32(vs, vs); return vs; } static inline __m128i Q_DECL_VECTORCALL multiplyAlpha65535(__m128i rgba64, uint alpha65535) @@ -103,28 +59,49 @@ static inline uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535) } #endif +static inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535) +{ +#if defined(__SSE2__) + const __m128i v = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&rgba64)); + const __m128i vr = multiplyAlpha65535(v, alpha65535); + QRgba64 r; + _mm_storel_epi64(reinterpret_cast<__m128i *>(&r), vr); + return r; +#elif defined(__ARM_NEON__) + const uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64))); + const uint16x4_t vr = multiplyAlpha65535(v, alpha65535); + QRgba64 r; + vst1_u64(reinterpret_cast<uint64_t *>(&r), vreinterpret_u64_u16(vr)); + return r; +#else + return QRgba64::fromRgba64(qt_div_65535(rgba64.red() * alpha65535), + qt_div_65535(rgba64.green() * alpha65535), + qt_div_65535(rgba64.blue() * alpha65535), + qt_div_65535(rgba64.alpha() * alpha65535)); +#endif +} + +#if defined(__SSE2__) || defined(__ARM_NEON__) template<typename T> static inline T Q_DECL_VECTORCALL multiplyAlpha255(T rgba64, uint alpha255) { -#if defined(__SSE2__) || defined(__ARM_NEON__) return multiplyAlpha65535(rgba64, alpha255 * 257); +} #else +template<typename T> +static inline T multiplyAlpha255(T rgba64, uint alpha255) +{ return QRgba64::fromRgba64(qt_div_255(rgba64.red() * alpha255), qt_div_255(rgba64.green() * alpha255), qt_div_255(rgba64.blue() * alpha255), qt_div_255(rgba64.alpha() * alpha255)); -#endif -} - -inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) -{ - return QRgba64::fromRgba64(multiplyAlpha255(x, alpha1) + multiplyAlpha255(y, alpha2)); } +#endif #if defined __SSE2__ static inline __m128i Q_DECL_VECTORCALL interpolate255(__m128i x, uint alpha1, __m128i y, uint alpha2) { - return _mm_add_epi32(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2)); + return _mm_add_epi16(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2)); } #endif @@ -135,20 +112,36 @@ inline uint16x4_t interpolate255(uint16x4_t x, uint alpha1, uint16x4_t y, uint a } #endif -inline QRgba64 interpolate65535(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) +static inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) { - return QRgba64::fromRgba64(multiplyAlpha65535(x, alpha1) + multiplyAlpha65535(y, alpha2)); +#if defined(__SSE2__) + const __m128i vx = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&x)); + const __m128i vy = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&y)); + const __m128i vr = interpolate255(vx, alpha1, vy, alpha2); + QRgba64 r; + _mm_storel_epi64(reinterpret_cast<__m128i *>(&r), vr); + return r; +#elif defined(__ARM_NEON__) + const uint16x4_t vx = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&x))); + const uint16x4_t vy = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&y))); + const uint16x4_t vr = interpolate255(vx, alpha1, vy, alpha2); + QRgba64 r; + vst1_u64(reinterpret_cast<uint64_t *>(&r), vreinterpret_u64_u16(vr)); + return r; +#else + return QRgba64::fromRgba64(multiplyAlpha255(x, alpha1) + multiplyAlpha255(y, alpha2)); +#endif } #if defined __SSE2__ static inline __m128i Q_DECL_VECTORCALL interpolate65535(__m128i x, uint alpha1, __m128i y, uint alpha2) { - return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2)); + return _mm_add_epi16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2)); } -// alpha2 below is const-ref because otherwise MSVC2015 complains that it can't 16-byte align the argument. -static inline __m128i Q_DECL_VECTORCALL interpolate65535(__m128i x, __m128i alpha1, __m128i y, const __m128i &alpha2) + +static inline __m128i Q_DECL_VECTORCALL interpolate65535(__m128i x, __m128i alpha1, __m128i y, __m128i alpha2) { - return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2)); + return _mm_add_epi16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2)); } #endif @@ -163,12 +156,42 @@ inline uint16x4_t interpolate65535(uint16x4_t x, uint16x4_t alpha1, uint16x4_t y } #endif -inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b) +static inline QRgba64 interpolate65535(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) { +#if defined(__SSE2__) + const __m128i vx = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&x)); + const __m128i vy = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&y)); + const __m128i vr = interpolate65535(vx, alpha1, vy, alpha2); + QRgba64 r; + _mm_storel_epi64(reinterpret_cast<__m128i *>(&r), vr); + return r; +#elif defined(__ARM_NEON__) + const uint16x4_t vx = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&x))); + const uint16x4_t vy = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&y))); + const uint16x4_t vr = interpolate65535(vx, alpha1, vy, alpha2); + QRgba64 r; + vst1_u64(reinterpret_cast<uint64_t *>(&r), vreinterpret_u64_u16(vr)); + return r; +#else + return QRgba64::fromRgba64(multiplyAlpha65535(x, alpha1) + multiplyAlpha65535(y, alpha2)); +#endif +} + +static inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b) +{ +#if defined(__SSE2__) + const __m128i va = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&a)); + const __m128i vb = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&b)); + const __m128i vr = _mm_adds_epu16(va, vb); + QRgba64 r; + _mm_storel_epi64(reinterpret_cast<__m128i *>(&r), vr); + return r; +#else return QRgba64::fromRgba64(qMin(a.red() + b.red(), 65535), qMin(a.green() + b.green(), 65535), qMin(a.blue() + b.blue(), 65535), qMin(a.alpha() + b.alpha(), 65535)); +#endif } #if QT_COMPILER_SUPPORTS_HERE(SSE2) @@ -196,7 +219,7 @@ static inline uint toArgb32(uint16x4_t v) static inline uint toArgb32(QRgba64 rgba64) { #if defined __SSE2__ - __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64); + __m128i v = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&rgba64)); v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3, 0, 1, 2)); return toArgb32(v); #elif defined __ARM_NEON__ @@ -216,7 +239,7 @@ static inline uint toArgb32(QRgba64 rgba64) static inline uint toRgba8888(QRgba64 rgba64) { #if defined __SSE2__ - __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64); + __m128i v = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&rgba64)); return toArgb32(v); #elif defined __ARM_NEON__ uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64))); @@ -230,8 +253,8 @@ static inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha) { QRgba64 blend; #if defined(__SSE2__) - __m128i vd = _mm_loadl_epi64((const __m128i *)&d); - __m128i vs = _mm_loadl_epi64((const __m128i *)&s); + __m128i vd = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&d)); + __m128i vs = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&s)); __m128i va = _mm_cvtsi32_si128(rgbAlpha); va = _mm_unpacklo_epi8(va, va); va = _mm_shufflelo_epi16(va, _MM_SHUFFLE(3, 0, 1, 2)); @@ -243,9 +266,9 @@ static inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha) vd = _mm_add_epi32(vd, _mm_srli_epi32(vd, 16)); vd = _mm_add_epi32(vd, _mm_set1_epi32(0x8000)); vd = _mm_srai_epi32(vd, 16); - vd = _mm_packs_epi32(vd, _mm_setzero_si128()); + vd = _mm_packs_epi32(vd, vd); - _mm_storel_epi64((__m128i *)&blend, vd); + _mm_storel_epi64(reinterpret_cast<__m128i *>(&blend), vd); #elif defined(__ARM_NEON__) uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d)); uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s)); @@ -276,8 +299,17 @@ static inline void blend_pixel(QRgba64 &dst, QRgba64 src) { if (src.isOpaque()) dst = src; - else if (!src.isTransparent()) + else if (!src.isTransparent()) { +#if defined(__SSE2__) + const __m128i vd = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&dst)); + const __m128i vs = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&src)); + const __m128i via = _mm_xor_si128(_mm_set1_epi16(-1), _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 3, 3, 3))); + const __m128i vr = _mm_add_epi16(vs, multiplyAlpha65535(vd, via)); + _mm_storel_epi64(reinterpret_cast<__m128i *>(&dst), vr); +#else dst = src + multiplyAlpha65535(dst, 65535 - src.alpha()); +#endif + } } static inline void blend_pixel(QRgba64 &dst, QRgba64 src, const int const_alpha) @@ -285,8 +317,17 @@ static inline void blend_pixel(QRgba64 &dst, QRgba64 src, const int const_alpha) if (const_alpha == 255) return blend_pixel(dst, src); if (!src.isTransparent()) { +#if defined(__SSE2__) + const __m128i vd = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&dst)); + __m128i vs = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&src)); + vs = multiplyAlpha255(vs, const_alpha); + const __m128i via = _mm_xor_si128(_mm_set1_epi16(-1), _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 3, 3, 3))); + const __m128i vr = _mm_add_epi16(vs, multiplyAlpha65535(vd, via)); + _mm_storel_epi64(reinterpret_cast<__m128i *>(&dst), vr); +#else src = multiplyAlpha255(src, const_alpha); dst = src + multiplyAlpha65535(dst, 65535 - src.alpha()); +#endif } } |