summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-11-24 14:32:53 +0100
committerAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-11-26 16:46:06 +0000
commit7cbbca586092944eab0f024e0c249044b7599c24 (patch)
treeb4e3dc518bd004e96cf0005108cba0f74330a3eb /src
parent4a4b17805c976b40a404b48ba51984c9abdba633 (diff)
Add AArch64 ASM to qimage_neon.cpp
Since AArch64 NEON assembly is different from Arm32 NEON we need to write a separate version. Assembly is used over intrinsics as the intrinsics have trouble efficiently using the vstX and vldX instructions. Change-Id: I5b67fc87acb2433b503e658099b742d57a9cff18 Reviewed-by: Erik Verbruggen <erik.verbruggen@theqtcompany.com>
Diffstat (limited to 'src')
-rw-r--r--src/gui/image/qimage_conversions.cpp2
-rw-r--r--src/gui/image/qimage_neon.cpp30
-rw-r--r--src/gui/image/qjpeghandler.cpp3
3 files changed, 30 insertions, 5 deletions
diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp
index 8e0e53c58d..7d1fb23b15 100644
--- a/src/gui/image/qimage_conversions.cpp
+++ b/src/gui/image/qimage_conversions.cpp
@@ -2947,7 +2947,7 @@ void qInitImageConversions()
}
#endif
-#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64)
+#if defined(__ARM_NEON__)
extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
qimage_converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon;
qimage_converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_neon;
diff --git a/src/gui/image/qimage_neon.cpp b/src/gui/image/qimage_neon.cpp
index b51c43aa9d..5853510ee1 100644
--- a/src/gui/image/qimage_neon.cpp
+++ b/src/gui/image/qimage_neon.cpp
@@ -35,7 +35,7 @@
#include <private/qimage_p.h>
#include <private/qsimd_p.h>
-#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64)
+#if defined(__ARM_NEON__)
QT_BEGIN_NAMESPACE
@@ -55,6 +55,7 @@ Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, cons
if ((len - offsetToAlignOn8Bytes) >= 8) {
const quint32 *const simdEnd = end - 7;
+#if !defined(Q_PROCESSOR_ARM_64)
register uint8x8_t fullVector asm ("d3") = vdup_n_u8(0xff);
do {
#if Q_BYTE_ORDER == Q_BIG_ENDIAN
@@ -76,6 +77,31 @@ Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, cons
);
#endif
} while (dst < simdEnd);
+#else
+ register uint8x8_t fullVector asm ("v3") = vdup_n_u8(0xff);
+ do {
+#if Q_BYTE_ORDER == Q_BIG_ENDIAN
+ asm volatile (
+ "ld3 { v4.8b, v5.8b, v6.8b }, [%[SRC]], #24 \n\t"
+ "st4 { v3.8b, v4.8b, v5.8b, v6.8b }, [%[DST]], #32 \n\t"
+ : [DST]"+r" (dst), [SRC]"+r" (src)
+ : "w"(fullVector)
+ : "memory", "v4", "v5", "v6"
+ );
+#else
+ asm volatile (
+ "ld3 { v0.8b, v1.8b, v2.8b }, [%[SRC]], #24 \n\t"
+ "mov v4.8b, v2.8b\n\t"
+ "mov v2.8b, v0.8b\n\t"
+ "mov v0.8b, v4.8b\n\t"
+ "st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [%[DST]], #32 \n\t"
+ : [DST]"+r" (dst), [SRC]"+r" (src)
+ : "w"(fullVector)
+ : "memory", "v0", "v1", "v2", "v4"
+ );
+#endif
+ } while (dst < simdEnd);
+#endif
}
while (dst != end) {
@@ -103,4 +129,4 @@ void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::I
QT_END_NAMESPACE
-#endif // defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64)
+#endif // defined(__ARM_NEON__)
diff --git a/src/gui/image/qjpeghandler.cpp b/src/gui/image/qjpeghandler.cpp
index 7e9483e6f7..68709b708d 100644
--- a/src/gui/image/qjpeghandler.cpp
+++ b/src/gui/image/qjpeghandler.cpp
@@ -978,9 +978,8 @@ extern "C" void qt_convert_rgb888_to_rgb32_mips_dspr2_asm(quint32 *dst, const uc
QJpegHandler::QJpegHandler()
: d(new QJpegHandlerPrivate(this))
{
-#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64)
+#if defined(__ARM_NEON__)
// from qimage_neon.cpp
-
if (qCpuHasFeature(NEON))
d->rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_neon;
#endif