summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qdrawhelper_avx2.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gui/painting/qdrawhelper_avx2.cpp')
-rw-r--r--src/gui/painting/qdrawhelper_avx2.cpp93
1 files changed, 30 insertions, 63 deletions
diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp
index 3e7743e980..34de69ecf4 100644
--- a/src/gui/painting/qdrawhelper_avx2.cpp
+++ b/src/gui/painting/qdrawhelper_avx2.cpp
@@ -1,42 +1,6 @@
-/****************************************************************************
-**
-** Copyright (C) 2018 The Qt Company Ltd.
-** Copyright (C) 2018 Intel Corporation.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtGui module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2018 The Qt Company Ltd.
+// Copyright (C) 2018 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
#include "qdrawhelper_p.h"
#include "qdrawhelper_x86_p.h"
@@ -359,7 +323,7 @@ void Q_DECL_VECTORCALL qt_memfillXX_avx2(uchar *dest, __m256i value256, qsizetyp
void qt_memfill64_avx2(quint64 *dest, quint64 value, qsizetype count)
{
-#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL)
+#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG)
// work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80820
__m128i value64 = _mm_set_epi64x(0, value); // _mm_cvtsi64_si128(value);
# ifdef Q_PROCESSOR_X86_64
@@ -457,7 +421,7 @@ void QT_FASTCALL comp_func_SourceOver_rgb64_avx2(QRgba64 *dst, const QRgba64 *sr
#endif
#if QT_CONFIG(raster_fp)
-void QT_FASTCALL comp_func_SourceOver_rgbafp_avx2(QRgba32F *dst, const QRgba32F *src, int length, uint const_alpha)
+void QT_FASTCALL comp_func_SourceOver_rgbafp_avx2(QRgbaFloat32 *dst, const QRgbaFloat32 *src, int length, uint const_alpha)
{
Q_ASSERT(const_alpha < 256); // const_alpha is in [0-255]
@@ -478,14 +442,14 @@ void QT_FASTCALL comp_func_SourceOver_rgbafp_avx2(QRgba32F *dst, const QRgba32F
_mm256_storeu_ps((float *)(dst + x), dstVector);
}
if (x < length) {
- __m128 srcVector = _mm_load_ps((float *)(src + x));
- __m128 dstVector = _mm_load_ps((const float *)(dst + x));
+ __m128 srcVector = _mm_loadu_ps((const float *)&src[x]);
+ __m128 dstVector = _mm_loadu_ps((const float *)&dst[x]);
srcVector = _mm_mul_ps(srcVector, constAlphaVector);
__m128 alphaChannel = _mm_permute_ps(srcVector, _MM_SHUFFLE(3, 3, 3, 3));
alphaChannel = _mm_sub_ps(one, alphaChannel);
dstVector = _mm_mul_ps(dstVector, alphaChannel);
dstVector = _mm_add_ps(dstVector, srcVector);
- _mm_store_ps((float *)(dst + x), dstVector);
+ _mm_storeu_ps((float *)(dst + x), dstVector);
}
}
#endif
@@ -557,11 +521,11 @@ void QT_FASTCALL comp_func_Source_rgb64_avx2(QRgba64 *dst, const QRgba64 *src, i
#endif
#if QT_CONFIG(raster_fp)
-void QT_FASTCALL comp_func_Source_rgbafp_avx2(QRgba32F *dst, const QRgba32F *src, int length, uint const_alpha)
+void QT_FASTCALL comp_func_Source_rgbafp_avx2(QRgbaFloat32 *dst, const QRgbaFloat32 *src, int length, uint const_alpha)
{
Q_ASSERT(const_alpha < 256); // const_alpha is in [0-255]
if (const_alpha == 255) {
- ::memcpy(dst, src, length * sizeof(QRgba32F));
+ ::memcpy(dst, src, length * sizeof(QRgbaFloat32));
} else {
const float ca = const_alpha / 255.f;
const float cia = 1.0f - ca;
@@ -580,12 +544,12 @@ void QT_FASTCALL comp_func_Source_rgbafp_avx2(QRgba32F *dst, const QRgba32F *src
_mm256_storeu_ps((float *)&dst[x], dstVector);
}
if (x < length) {
- __m128 srcVector = _mm_load_ps((const float *)&src[x]);
- __m128 dstVector = _mm_load_ps((const float *)&dst[x]);
+ __m128 srcVector = _mm_loadu_ps((const float *)&src[x]);
+ __m128 dstVector = _mm_loadu_ps((const float *)&dst[x]);
srcVector = _mm_mul_ps(srcVector, constAlphaVector);
dstVector = _mm_mul_ps(dstVector, oneMinusConstAlpha);
dstVector = _mm_add_ps(dstVector, srcVector);
- _mm_store_ps((float *)&dst[x], dstVector);
+ _mm_storeu_ps((float *)&dst[x], dstVector);
}
}
}
@@ -656,7 +620,7 @@ void QT_FASTCALL comp_func_solid_SourceOver_rgb64_avx2(QRgba64 *destPixels, int
#endif
#if QT_CONFIG(raster_fp)
-void QT_FASTCALL comp_func_solid_Source_rgbafp_avx2(QRgba32F *dst, int length, QRgba32F color, uint const_alpha)
+void QT_FASTCALL comp_func_solid_Source_rgbafp_avx2(QRgbaFloat32 *dst, int length, QRgbaFloat32 color, uint const_alpha)
{
Q_ASSERT(const_alpha < 256); // const_alpha is in [0-255]
if (const_alpha == 255) {
@@ -666,7 +630,7 @@ void QT_FASTCALL comp_func_solid_Source_rgbafp_avx2(QRgba32F *dst, int length, Q
const float a = const_alpha / 255.0f;
const __m128 alphaVector = _mm_set1_ps(a);
const __m128 minusAlphaVector = _mm_set1_ps(1.0f - a);
- __m128 colorVector = _mm_load_ps((const float *)&color);
+ __m128 colorVector = _mm_loadu_ps((const float *)&color);
colorVector = _mm_mul_ps(colorVector, alphaVector);
const __m256 colorVector256 = _mm256_insertf128_ps(_mm256_castps128_ps256(colorVector), colorVector, 1);
const __m256 minusAlphaVector256 = _mm256_set1_ps(1.0f - a);
@@ -678,22 +642,22 @@ void QT_FASTCALL comp_func_solid_Source_rgbafp_avx2(QRgba32F *dst, int length, Q
_mm256_storeu_ps((float *)&dst[x], dstVector);
}
if (x < length) {
- __m128 dstVector = _mm_load_ps((const float *)&dst[x]);
+ __m128 dstVector = _mm_loadu_ps((const float *)&dst[x]);
dstVector = _mm_mul_ps(dstVector, minusAlphaVector);
dstVector = _mm_add_ps(dstVector, colorVector);
- _mm_store_ps((float *)&dst[x], dstVector);
+ _mm_storeu_ps((float *)&dst[x], dstVector);
}
}
}
-void QT_FASTCALL comp_func_solid_SourceOver_rgbafp_avx2(QRgba32F *dst, int length, QRgba32F color, uint const_alpha)
+void QT_FASTCALL comp_func_solid_SourceOver_rgbafp_avx2(QRgbaFloat32 *dst, int length, QRgbaFloat32 color, uint const_alpha)
{
Q_ASSERT(const_alpha < 256); // const_alpha is in [0-255]
if (const_alpha == 255 && color.a >= 1.0f) {
for (int i = 0; i < length; ++i)
dst[i] = color;
} else {
- __m128 colorVector = _mm_load_ps((const float *)&color);
+ __m128 colorVector = _mm_loadu_ps((const float *)&color);
if (const_alpha != 255)
colorVector = _mm_mul_ps(colorVector, _mm_set1_ps(const_alpha / 255.f));
__m128 minusAlphaOfColorVector =
@@ -709,10 +673,10 @@ void QT_FASTCALL comp_func_solid_SourceOver_rgbafp_avx2(QRgba32F *dst, int lengt
_mm256_storeu_ps((float *)&dst[x], dstVector);
}
if (x < length) {
- __m128 dstVector = _mm_load_ps((const float *)&dst[x]);
+ __m128 dstVector = _mm_loadu_ps((const float *)&dst[x]);
dstVector = _mm_mul_ps(dstVector, minusAlphaOfColorVector);
dstVector = _mm_add_ps(dstVector, colorVector);
- _mm_store_ps((float *)&dst[x], dstVector);
+ _mm_storeu_ps((float *)&dst[x], dstVector);
}
}
}
@@ -1381,13 +1345,16 @@ const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM_avx2(QRgba64 *buffer, const uch
vslo = _mm256_srli_epi32(vslo, 16);
vshi = _mm256_srli_epi32(vshi, 16);
vs256 = _mm256_packus_epi32(vslo, vshi);
+ vs256 = _mm256_blend_epi16(vs256, va256, 0x88);
_mm256_storeu_si256((__m256i *)(buffer + i), vs256);
}
for (; i < count; ++i) {
+ const auto a = s[i].alpha();
__m128i vs = _mm_loadl_epi64((const __m128i *)(s + i));
__m128i va = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 3, 3, 3));
vs = multiplyAlpha65535(vs, va);
_mm_storel_epi64((__m128i *)(buffer + i), vs);
+ buffer[i].setAlpha(a);
}
return buffer;
}
@@ -1573,7 +1540,7 @@ void QT_FASTCALL storeRGBA16FFromARGB32PM_avx2(uchar *dest, const uint *src, int
}
#if QT_CONFIG(raster_fp)
-const QRgba32F *QT_FASTCALL fetchRGBA16FToRGBA32F_avx2(QRgba32F *buffer, const uchar *src, int index, int count,
+const QRgbaFloat32 *QT_FASTCALL fetchRGBA16FToRGBA32F_avx2(QRgbaFloat32 *buffer, const uchar *src, int index, int count,
const QList<QRgb> *, QDitherInfo *)
{
const quint64 *s = reinterpret_cast<const quint64 *>(src) + index;
@@ -1590,19 +1557,19 @@ const QRgba32F *QT_FASTCALL fetchRGBA16FToRGBA32F_avx2(QRgba32F *buffer, const u
__m128 vsa = _mm_permute_ps(vsf, _MM_SHUFFLE(3, 3, 3, 3));
vsf = _mm_mul_ps(vsf, vsa);
vsf = _mm_insert_ps(vsf, vsa, 0x30);
- _mm_store_ps((float *)(buffer + i), vsf);
+ _mm_storeu_ps((float *)(buffer + i), vsf);
}
return buffer;
}
-void QT_FASTCALL storeRGBX16FFromRGBA32F_avx2(uchar *dest, const QRgba32F *src, int index, int count,
+void QT_FASTCALL storeRGBX16FFromRGBA32F_avx2(uchar *dest, const QRgbaFloat32 *src, int index, int count,
const QList<QRgb> *, QDitherInfo *)
{
quint64 *d = reinterpret_cast<quint64 *>(dest) + index;
const __m128 *s = reinterpret_cast<const __m128 *>(src);
const __m128 zero = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
for (int i = 0; i < count; ++i) {
- __m128 vsf = _mm_load_ps(reinterpret_cast<const float *>(s + i));
+ __m128 vsf = _mm_loadu_ps(reinterpret_cast<const float *>(s + i));
const __m128 vsa = _mm_permute_ps(vsf, _MM_SHUFFLE(3, 3, 3, 3));
const float a = _mm_cvtss_f32(vsa);
if (a == 1.0f)
@@ -1619,14 +1586,14 @@ void QT_FASTCALL storeRGBX16FFromRGBA32F_avx2(uchar *dest, const QRgba32F *src,
}
}
-void QT_FASTCALL storeRGBA16FFromRGBA32F_avx2(uchar *dest, const QRgba32F *src, int index, int count,
+void QT_FASTCALL storeRGBA16FFromRGBA32F_avx2(uchar *dest, const QRgbaFloat32 *src, int index, int count,
const QList<QRgb> *, QDitherInfo *)
{
quint64 *d = reinterpret_cast<quint64 *>(dest) + index;
const __m128 *s = reinterpret_cast<const __m128 *>(src);
const __m128 zero = _mm_set1_ps(0.0f);
for (int i = 0; i < count; ++i) {
- __m128 vsf = _mm_load_ps(reinterpret_cast<const float *>(s + i));
+ __m128 vsf = _mm_loadu_ps(reinterpret_cast<const float *>(s + i));
const __m128 vsa = _mm_permute_ps(vsf, _MM_SHUFFLE(3, 3, 3, 3));
const float a = _mm_cvtss_f32(vsa);
if (a == 1.0f)