diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2016-12-20 16:17:58 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2017-01-31 00:14:11 +0000 |
commit | 85468f7bccb276c2be5801481a6ce10f07581cdb (patch) | |
tree | f96ef309303ed0caf91b0c37cabee4d295cb19d3 /src/gui/painting/qdrawhelper.cpp | |
parent | ad4f7b59ead6c4eb17e787bce25a7211b866063f (diff) |
Manually vectorize ARGB32toARGB32PM for SSE4.1 and NEON
Manually vectorizing is significantly faster because we can optimize
for common cases like long stretches of opaque or transparent pixels.
This is both smaller and faster than the auto-vectorized version, it is
also much faster than the autovectorized version for AVX2 which then can
be removed.
Change-Id: I0fa80ce273a8387cc6cd084879822ad9bade385c
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/gui/painting/qdrawhelper.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 23 |
1 files changed, 11 insertions, 12 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 9b5f15470e..4ea3b37d5f 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -6196,20 +6196,18 @@ static void qInitDrawhelperFunctions() #if defined(QT_COMPILER_SUPPORTS_SSE4_1) if (qCpuHasFeature(SSE4_1)) { -#if !defined(__SSE4_1__) extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); - qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; - qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; -#endif extern const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); extern const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); extern const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count, const QVector<QRgb> *, QDitherInfo *); + qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4; + qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4; qPixelLayouts[QImage::Format_ARGB32].convertFromARGB32PM = convertARGB32FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBA8888].convertFromARGB32PM = convertRGBA8888FromARGB32PM_sse4; qPixelLayouts[QImage::Format_RGBX8888].convertFromARGB32PM = convertRGBXFromARGB32PM_sse4; @@ -6220,14 +6218,6 @@ static void qInitDrawhelperFunctions() #if defined(QT_COMPILER_SUPPORTS_AVX2) if (qCpuHasFeature(AVX2)) { -#if !defined(__AVX2__) - extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *); - extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, const uint *src, int count, - const QVector<QRgb> *, QDitherInfo *); - qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2; - qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2; -#endif extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha); @@ -6277,6 +6267,15 @@ static void qInitDrawhelperFunctions() sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon; +#if defined(Q_PROCESSOR_ARM_64) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN + extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *); + extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, const uint *src, int count, + const QVector<QRgb> *, QDitherInfo *); + qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon; + qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon; +#endif + #if defined(ENABLE_PIXMAN_DRAWHELPERS) // The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64 qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon; |