summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2018-11-11 18:57:12 -0800
committerThiago Macieira <thiago.macieira@intel.com>2018-12-12 03:44:31 +0000
commit58f2aa907f63bd0be61b2b6e55511c0867b42683 (patch)
treedcc819fcb56cf3a1502575095952fbabad5599ac
parent3df79b2953aa9142d66bd57676c6308acde98b47 (diff)
Work around GCC bug in generating 64-bit population of SSE register
We know what code we want it to generate, so I just replaced the _mm_set1_epi64x() with the code we want it to generate. Except that GCC sees through and tries to "optimize" my code... so that asm() statement makes it separate the two operations. This generates optimal code for both 32- and 64-bit. 64-bit: vmovq %rdi, %xmm0 vpbroadcastq %xmm0, %ymm0 32-bit: vmovq 8(%esp), %xmm0 vpbroadcastq %xmm0, %ymm0 See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80820 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87976 Change-Id: I42a48bd64ccc41aebf84fffd15664109b97fe42b Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
-rw-r--r--src/gui/painting/qdrawhelper_avx2.cpp13
1 files changed, 12 insertions, 1 deletions
diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp
index 3a37b85366..2e36f538bd 100644
--- a/src/gui/painting/qdrawhelper_avx2.cpp
+++ b/src/gui/painting/qdrawhelper_avx2.cpp
@@ -359,7 +359,18 @@ void Q_DECL_VECTORCALL qt_memfillXX_avx2(uchar *dest, __m256i value256, qsizetyp
void qt_memfill64_avx2(quint64 *dest, quint64 value, qsizetype count)
{
- qt_memfillXX_avx2(reinterpret_cast<uchar *>(dest), _mm256_set1_epi64x(value), count * sizeof(quint64));
+#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL)
+ // work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80820
+ __m128i value64 = _mm_set_epi64x(0, value); // _mm_cvtsi64_si128(value);
+# ifdef Q_PROCESSOR_X86_64
+ asm ("" : "+x" (value64));
+# endif
+ __m256i value256 = _mm256_broadcastq_epi64(value64);
+#else
+ __m256i value256 = _mm256_set1_epi64x(value);
+#endif
+
+ qt_memfillXX_avx2(reinterpret_cast<uchar *>(dest), value256, count * sizeof(quint64));
}
void qt_memfill32_avx2(quint32 *dest, quint32 value, qsizetype count)