summaryrefslogtreecommitdiffstats
path: root/src/gui/painting
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2021-01-26 16:16:31 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2021-02-03 08:22:16 +0100
commit936d499ed4a27e6836e9525114830c57fbfd37f3 (patch)
treeaa489f3ac6772efd46e1f8996bddfc7bf594db20 /src/gui/painting
parentba6b29a3676bdb7c524c5cd7d3d8f294c22b1d89 (diff)
Optimize RGBA64->RGBA64PM for SSE2/AVX2
And remove the direct conversion so we can get both the SIMD optimization and threading applied. Change-Id: Id032ea91cc40c1cbf1c8a1da0386de35aa36cfb5 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/gui/painting')
-rw-r--r--src/gui/painting/qdrawhelper.cpp8
-rw-r--r--src/gui/painting/qdrawhelper_avx2.cpp32
-rw-r--r--src/gui/painting/qpixellayout.cpp9
3 files changed, 45 insertions, 4 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 61963a5e61..f28e8a6c59 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -5225,16 +5225,16 @@ static void qInitDrawhelperFunctions()
qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_avx2;
qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2;
-#if QT_CONFIG(raster_64bit)
- extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 *, const uint *, int, const QList<QRgb> *, QDitherInfo *);
- extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uint *, int count, const QList<QRgb> *, QDitherInfo *);
+ extern const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 *, const uint *, int, const QList<QRgb> *, QDitherInfo *);
+ extern const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uint *, int count, const QList<QRgb> *, QDitherInfo *);
extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QList<QRgb> *, QDitherInfo *);
extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QList<QRgb> *, QDitherInfo *);
+ extern const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM_avx2(QRgba64 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_avx2;
qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_avx2;
qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_avx2;
qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_avx2;
-#endif
+ qPixelLayouts[QImage::Format_RGBA64].fetchToRGBA64PM = fetchRGBA64ToRGBA64PM_avx2;
}
#endif
diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp
index 7d85355003..e401a1463f 100644
--- a/src/gui/painting/qdrawhelper_avx2.cpp
+++ b/src/gui/painting/qdrawhelper_avx2.cpp
@@ -1229,6 +1229,38 @@ const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 *buffer, const u
return buffer;
}
+const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM_avx2(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QList<QRgb> *, QDitherInfo *)
+{
+ const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
+ int i = 0;
+ const __m256i vh = _mm256_set1_epi32(0x8000);
+ for (; i < count - 3; i += 4) {
+ __m256i vs256 = _mm256_loadu_si256((const __m256i *)(s + i));
+ __m256i va256 = _mm256_shufflelo_epi16(vs256, _MM_SHUFFLE(3, 3, 3, 3));
+ va256 = _mm256_shufflehi_epi16(va256, _MM_SHUFFLE(3, 3, 3, 3));
+ const __m256i vmullo = _mm256_mullo_epi16(vs256, va256);
+ const __m256i vmulhi = _mm256_mulhi_epu16(vs256, va256);
+ __m256i vslo = _mm256_unpacklo_epi16(vmullo, vmulhi);
+ __m256i vshi = _mm256_unpackhi_epi16(vmullo, vmulhi);
+ vslo = _mm256_add_epi32(vslo, _mm256_srli_epi32(vslo, 16));
+ vshi = _mm256_add_epi32(vshi, _mm256_srli_epi32(vshi, 16));
+ vslo = _mm256_add_epi32(vslo, vh);
+ vshi = _mm256_add_epi32(vshi, vh);
+ vslo = _mm256_srli_epi32(vslo, 16);
+ vshi = _mm256_srli_epi32(vshi, 16);
+ vs256 = _mm256_packus_epi32(vslo, vshi);
+ _mm256_storeu_si256((__m256i *)(buffer + i), vs256);
+ }
+ for (; i < count; ++i) {
+ __m128i vs = _mm_loadl_epi64((const __m128i *)(s + i));
+ __m128i va = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 3, 3, 3));
+ vs = multiplyAlpha65535(vs, va);
+ _mm_storel_epi64((__m128i *)(buffer + i), vs);
+ }
+ return buffer;
+}
+
QT_END_NAMESPACE
#endif
diff --git a/src/gui/painting/qpixellayout.cpp b/src/gui/painting/qpixellayout.cpp
index acaee9f289..8dd62b0c00 100644
--- a/src/gui/painting/qpixellayout.cpp
+++ b/src/gui/painting/qpixellayout.cpp
@@ -1045,8 +1045,17 @@ static const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM(QRgba64 *buffer, const u
const QList<QRgb> *, QDitherInfo *)
{
const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
+#ifdef __SSE2__
+ for (int i = 0; i < count; ++i) {
+ __m128i vs = _mm_loadl_epi64((const __m128i *)(s + i));
+ __m128i va = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 3, 3, 3));
+ vs = multiplyAlpha65535(vs, va);
+ _mm_storel_epi64((__m128i *)(buffer + i), vs);
+ }
+#else
for (int i = 0; i < count; ++i)
buffer[i] = QRgba64::fromRgba64(s[i]).premultiplied();
+#endif
return buffer;
}