diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2018-11-02 19:38:29 -0700 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2018-12-11 19:04:59 +0000 |
commit | 40894d1a60d357bc46364ae038ede0159f32261b (patch) | |
tree | 9aaf83d5aae8e0207916a35bfdc80590b3e9d929 /src/gui/painting/qdrawhelper.cpp | |
parent | a440aada72f2ee78c5e27d70ecc79c0071673446 (diff) |
Add AVX2 versions of qt_memfill32 and qt_memfill64
The implementation is almost the same 4-way-unrolled loop, but because
of the wider registers, we fill 128 bytes per loop. Unlike the SSE2
implementation, the AVX2 version uses unaligned stores and won't try to
align in the prologue, matching glibc's __memset_avx2 (also unaligned).
Change-Id: Iba4b5c183776497d8ee1fffd15637ccb2a7b83bc
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/gui/painting/qdrawhelper.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index d97ace7480..59b46b84ef 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -6288,6 +6288,10 @@ void qt_memfill32(quint32 *dest, quint32 color, qsizetype count) qt_memfill_template<quint32>(dest, color, count); } #endif +#ifdef __SSE2__ +decltype(qt_memfill32_sse2) *qt_memfill32 = nullptr; +decltype(qt_memfill64_sse2) *qt_memfill64 = nullptr; +#endif #ifdef QT_COMPILER_SUPPORTS_SSE4_1 template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *); @@ -6301,6 +6305,10 @@ static void qInitDrawhelperFunctions() qInitBlendFunctions(); #ifdef __SSE2__ +# ifndef __AVX2__ + qt_memfill32 = qt_memfill32_sse2; + qt_memfill64 = qt_memfill64_sse2; +# endif qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2; qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2; qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2; @@ -6407,6 +6415,8 @@ static void qInitDrawhelperFunctions() #if defined(QT_COMPILER_SUPPORTS_AVX2) if (qCpuHasFeature(ArchHaswell)) { + qt_memfill32 = qt_memfill32_avx2; + qt_memfill64 = qt_memfill64_avx2; extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha); |