summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qdrawhelper_sse2.cpp
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2018-11-02 19:38:29 -0700
committerThiago Macieira <thiago.macieira@intel.com>2018-12-11 19:04:59 +0000
commit40894d1a60d357bc46364ae038ede0159f32261b (patch)
tree9aaf83d5aae8e0207916a35bfdc80590b3e9d929 /src/gui/painting/qdrawhelper_sse2.cpp
parenta440aada72f2ee78c5e27d70ecc79c0071673446 (diff)
Add AVX2 versions of qt_memfill32 and qt_memfill64
The implementation is almost the same 4-way-unrolled loop, but because of the wider registers, we fill 128 bytes per loop. Unlike the SSE2 implementation, the AVX2 version uses unaligned stores and won't try to align in the prologue, matching glibc's __memset_avx2 (also unaligned). Change-Id: Iba4b5c183776497d8ee1fffd15637ccb2a7b83bc Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/gui/painting/qdrawhelper_sse2.cpp')
-rw-r--r--src/gui/painting/qdrawhelper_sse2.cpp6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index bf2e90f6af..0ac9508264 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -233,6 +233,7 @@ void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, u
}
}
+#ifndef __AVX2__
static Q_NEVER_INLINE
void Q_DECL_VECTORCALL qt_memfillXX_aligned(void *dest, __m128i value128, quintptr bytecount)
{
@@ -255,7 +256,7 @@ void Q_DECL_VECTORCALL qt_memfillXX_aligned(void *dest, __m128i value128, quintp
}
}
-void qt_memfill64(quint64 *dest, quint64 value, qsizetype count)
+void qt_memfill64_sse2(quint64 *dest, quint64 value, qsizetype count)
{
quintptr misaligned = quintptr(dest) % sizeof(__m128i);
if (misaligned && count) {
@@ -285,7 +286,7 @@ void qt_memfill64(quint64 *dest, quint64 value, qsizetype count)
qt_memfillXX_aligned(dest, _mm_set1_epi64x(value), count * sizeof(quint64));
}
-void qt_memfill32(quint32 *dest, quint32 value, qsizetype count)
+void qt_memfill32_sse2(quint32 *dest, quint32 value, qsizetype count)
{
if (count < 4) {
// this simplifies the code below: the first switch can fall through
@@ -316,6 +317,7 @@ void qt_memfill32(quint32 *dest, quint32 value, qsizetype count)
qt_memfillXX_aligned(dest, _mm_set1_epi32(value), count * sizeof(quint32));
}
+#endif // !__AVX2__
void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha)
{