diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2018-11-11 18:57:12 -0800 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2018-12-12 03:44:31 +0000 |
commit | 58f2aa907f63bd0be61b2b6e55511c0867b42683 (patch) | |
tree | dcc819fcb56cf3a1502575095952fbabad5599ac /src | |
parent | 3df79b2953aa9142d66bd57676c6308acde98b47 (diff) |
Work around GCC bug in generating 64-bit population of SSE register
We know what code we want it to generate, so I just replaced the
_mm_set1_epi64x() with the code we want it to generate. Except that GCC
sees through and tries to "optimize" my code... so that asm() statement
makes it separate the two operations.
This generates optimal code for both 32- and 64-bit. 64-bit:
vmovq %rdi, %xmm0
vpbroadcastq %xmm0, %ymm0
32-bit:
vmovq 8(%esp), %xmm0
vpbroadcastq %xmm0, %ymm0
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80820 and
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87976
Change-Id: I42a48bd64ccc41aebf84fffd15664109b97fe42b
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src')
-rw-r--r-- | src/gui/painting/qdrawhelper_avx2.cpp | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp index 3a37b85366..2e36f538bd 100644 --- a/src/gui/painting/qdrawhelper_avx2.cpp +++ b/src/gui/painting/qdrawhelper_avx2.cpp @@ -359,7 +359,18 @@ void Q_DECL_VECTORCALL qt_memfillXX_avx2(uchar *dest, __m256i value256, qsizetyp void qt_memfill64_avx2(quint64 *dest, quint64 value, qsizetype count) { - qt_memfillXX_avx2(reinterpret_cast<uchar *>(dest), _mm256_set1_epi64x(value), count * sizeof(quint64)); +#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL) + // work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80820 + __m128i value64 = _mm_set_epi64x(0, value); // _mm_cvtsi64_si128(value); +# ifdef Q_PROCESSOR_X86_64 + asm ("" : "+x" (value64)); +# endif + __m256i value256 = _mm256_broadcastq_epi64(value64); +#else + __m256i value256 = _mm256_set1_epi64x(value); +#endif + + qt_memfillXX_avx2(reinterpret_cast<uchar *>(dest), value256, count * sizeof(quint64)); } void qt_memfill32_avx2(quint32 *dest, quint32 value, qsizetype count) |