summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qdrawhelper.cpp
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2016-12-20 16:17:58 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2017-01-31 00:14:11 +0000
commit85468f7bccb276c2be5801481a6ce10f07581cdb (patch)
treef96ef309303ed0caf91b0c37cabee4d295cb19d3 /src/gui/painting/qdrawhelper.cpp
parentad4f7b59ead6c4eb17e787bce25a7211b866063f (diff)
Manually vectorize ARGB32toARGB32PM for SSE4.1 and NEON
Manually vectorizing is significantly faster because we can optimize for common cases like long stretches of opaque or transparent pixels. This is both smaller and faster than the auto-vectorized version, it is also much faster than the autovectorized version for AVX2 which then can be removed. Change-Id: I0fa80ce273a8387cc6cd084879822ad9bade385c Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/gui/painting/qdrawhelper.cpp')
-rw-r--r--src/gui/painting/qdrawhelper.cpp23
1 files changed, 11 insertions, 12 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 9b5f15470e..4ea3b37d5f 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -6196,20 +6196,18 @@ static void qInitDrawhelperFunctions()
#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
if (qCpuHasFeature(SSE4_1)) {
-#if !defined(__SSE4_1__)
extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
- qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
- qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
-#endif
extern const uint *QT_FASTCALL convertARGB32FromARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
extern const uint *QT_FASTCALL convertRGBA8888FromARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
extern const uint *QT_FASTCALL convertRGBXFromARGB32PM_sse4(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
+ qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
+ qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
qPixelLayouts[QImage::Format_ARGB32].convertFromARGB32PM = convertARGB32FromARGB32PM_sse4;
qPixelLayouts[QImage::Format_RGBA8888].convertFromARGB32PM = convertRGBA8888FromARGB32PM_sse4;
qPixelLayouts[QImage::Format_RGBX8888].convertFromARGB32PM = convertRGBXFromARGB32PM_sse4;
@@ -6220,14 +6218,6 @@ static void qInitDrawhelperFunctions()
#if defined(QT_COMPILER_SUPPORTS_AVX2)
if (qCpuHasFeature(AVX2)) {
-#if !defined(__AVX2__)
- extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, const uint *src, int count,
- const QVector<QRgb> *, QDitherInfo *);
- extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, const uint *src, int count,
- const QVector<QRgb> *, QDitherInfo *);
- qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2;
- qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2;
-#endif
extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h, int const_alpha);
@@ -6277,6 +6267,15 @@ static void qInitDrawhelperFunctions()
sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon;
+#if defined(Q_PROCESSOR_ARM_64) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+ extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, const uint *src, int count,
+ const QVector<QRgb> *, QDitherInfo *);
+ extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, const uint *src, int count,
+ const QVector<QRgb> *, QDitherInfo *);
+ qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon;
+ qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon;
+#endif
+
#if defined(ENABLE_PIXMAN_DRAWHELPERS)
// The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64
qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;