summaryrefslogtreecommitdiffstats
path: root/src/gui/painting
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2018-11-05 19:12:21 -0800
committerThiago Macieira <thiago.macieira@intel.com>2018-12-12 03:44:28 +0000
commit3df79b2953aa9142d66bd57676c6308acde98b47 (patch)
tree98aadd8455229e9ff25e8e8437187b7cbbe003d5 /src/gui/painting
parent33177b0456f50a1f3eae7a37b4ecc897aaf7125e (diff)
Add an SSSE3 implementation of qt_memfill24
Brought to you by the PSHUFB instruction, introduced in SSSE3 (implementation also uses PALIGNR just because we can). The tail functionality makes use of the fact that the low half of "mval2" ends in the correct content, so we overwrite up to 8 bytes close to the end. Change-Id: Iba4b5c183776497d8ee1fffd15646a620829c335 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/gui/painting')
-rw-r--r--src/gui/painting/qdrawhelper.cpp9
-rw-r--r--src/gui/painting/qdrawhelper_ssse3.cpp68
2 files changed, 76 insertions, 1 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 95d14a4500..9ef210cc68 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -6268,8 +6268,17 @@ void qt_memfill64(quint64 *dest, quint64 color, qsizetype count)
}
#endif
+#if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && !defined(Q_CC_CLANG)
+__attribute__((optimize("no-tree-vectorize")))
+#endif
void qt_memfill24(quint24 *dest, quint24 color, qsizetype count)
{
+# ifdef QT_COMPILER_SUPPORTS_SSSE3
+ extern void qt_memfill24_ssse3(quint24 *, quint24, qsizetype);
+ if (qCpuHasFeature(SSSE3))
+ return qt_memfill24_ssse3(dest, color, count);
+# endif
+
const quint32 v = color;
quint24 *end = dest + count;
diff --git a/src/gui/painting/qdrawhelper_ssse3.cpp b/src/gui/painting/qdrawhelper_ssse3.cpp
index 0891cff8b8..35d61c3e6c 100644
--- a/src/gui/painting/qdrawhelper_ssse3.cpp
+++ b/src/gui/painting/qdrawhelper_ssse3.cpp
@@ -1,6 +1,7 @@
/****************************************************************************
**
-** Copyright (C) 2016 The Qt Company Ltd.
+** Copyright (C) 2018 The Qt Company Ltd.
+** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
@@ -188,6 +189,71 @@ const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Ope
return buffer;
}
+void qt_memfill24_ssse3(quint24 *dest, quint24 color, qsizetype count)
+{
+ // LCM of 12 and 16 bytes is 48 bytes (16 px)
+ quint32 v = color;
+ __m128i m = _mm_cvtsi32_si128(v);
+ quint24 *end = dest + count;
+
+ constexpr uchar x = 2, y = 1, z = 0;
+ Q_DECL_ALIGN(__m128i) static const uchar
+ shuffleMask[16 + 1] = { x, y, z, x, y, z, x, y, z, x, y, z, x, y, z, x, y };
+
+ __m128i mval1 = _mm_shuffle_epi8(m, _mm_load_si128(reinterpret_cast<const __m128i *>(shuffleMask)));
+ __m128i mval2 = _mm_shuffle_epi8(m, _mm_loadu_si128(reinterpret_cast<const __m128i *>(shuffleMask + 1)));
+ __m128i mval3 = _mm_alignr_epi8(mval2, mval1, 2);
+
+ for ( ; dest + 16 <= end; dest += 16) {
+#ifdef __AVX__
+ // Store using 32-byte AVX instruction
+ __m256 mval12 = _mm256_castps128_ps256(_mm_castsi128_ps(mval1));
+ mval12 = _mm256_insertf128_ps(mval12, _mm_castsi128_ps(mval2), 1);
+ _mm256_storeu_ps(reinterpret_cast<float *>(dest), mval12);
+#else
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 0, mval1);
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 1, mval2);
+#endif
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 2, mval3);
+ }
+
+ if (count < 3) {
+ if (count > 1)
+ end[-2] = v;
+ if (count)
+ end[-1] = v;
+ return;
+ }
+
+ // less than 16px/48B left
+ uchar *ptr = reinterpret_cast<uchar *>(dest);
+ uchar *ptr_end = reinterpret_cast<uchar *>(end);
+ qptrdiff left = ptr_end - ptr;
+ if (left >= 24) {
+ // 8px/24B or more left
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(ptr) + 0, mval1);
+ _mm_storel_epi64(reinterpret_cast<__m128i *>(ptr) + 1, mval2);
+ ptr += 24;
+ left -= 24;
+ }
+
+ // less than 8px/24B left
+
+ if (left >= 16) {
+ // but more than 5px/15B left
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(ptr) , mval1);
+ } else if (left >= 8) {
+ // but more than 2px/6B left
+ _mm_storel_epi64(reinterpret_cast<__m128i *>(ptr), mval1);
+ }
+
+ if (left) {
+ // 1 or 2px left
+ // store 8 bytes ending with the right values (will overwrite a bit)
+ _mm_storel_epi64(reinterpret_cast<__m128i *>(ptr_end - 8), mval2);
+ }
+}
+
QT_END_NAMESPACE
#endif // QT_COMPILER_SUPPORTS_SSSE3