diff options
author | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-11-24 14:32:53 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-11-26 16:46:06 +0000 |
commit | 7cbbca586092944eab0f024e0c249044b7599c24 (patch) | |
tree | b4e3dc518bd004e96cf0005108cba0f74330a3eb | |
parent | 4a4b17805c976b40a404b48ba51984c9abdba633 (diff) |
Add AArch64 ASM to qimage_neon.cpp
Since AArch64 NEON assembly is different from Arm32 NEON we need to
write a separate version.
Assembly is used over intrinsics as the intrinsics have trouble
efficiently using the vstX and vldX instructions.
Change-Id: I5b67fc87acb2433b503e658099b742d57a9cff18
Reviewed-by: Erik Verbruggen <erik.verbruggen@theqtcompany.com>
-rw-r--r-- | src/gui/image/qimage_conversions.cpp | 2 | ||||
-rw-r--r-- | src/gui/image/qimage_neon.cpp | 30 | ||||
-rw-r--r-- | src/gui/image/qjpeghandler.cpp | 3 |
3 files changed, 30 insertions, 5 deletions
diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp index 8e0e53c58d..7d1fb23b15 100644 --- a/src/gui/image/qimage_conversions.cpp +++ b/src/gui/image/qimage_conversions.cpp @@ -2947,7 +2947,7 @@ void qInitImageConversions() } #endif -#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64) +#if defined(__ARM_NEON__) extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags); qimage_converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon; qimage_converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_neon; diff --git a/src/gui/image/qimage_neon.cpp b/src/gui/image/qimage_neon.cpp index b51c43aa9d..5853510ee1 100644 --- a/src/gui/image/qimage_neon.cpp +++ b/src/gui/image/qimage_neon.cpp @@ -35,7 +35,7 @@ #include <private/qimage_p.h> #include <private/qsimd_p.h> -#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64) +#if defined(__ARM_NEON__) QT_BEGIN_NAMESPACE @@ -55,6 +55,7 @@ Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, cons if ((len - offsetToAlignOn8Bytes) >= 8) { const quint32 *const simdEnd = end - 7; +#if !defined(Q_PROCESSOR_ARM_64) register uint8x8_t fullVector asm ("d3") = vdup_n_u8(0xff); do { #if Q_BYTE_ORDER == Q_BIG_ENDIAN @@ -76,6 +77,31 @@ Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, cons ); #endif } while (dst < simdEnd); +#else + register uint8x8_t fullVector asm ("v3") = vdup_n_u8(0xff); + do { +#if Q_BYTE_ORDER == Q_BIG_ENDIAN + asm volatile ( + "ld3 { v4.8b, v5.8b, v6.8b }, [%[SRC]], #24 \n\t" + "st4 { v3.8b, v4.8b, v5.8b, v6.8b }, [%[DST]], #32 \n\t" + : [DST]"+r" (dst), [SRC]"+r" (src) + : "w"(fullVector) + : "memory", "v4", "v5", "v6" + ); +#else + asm volatile ( + "ld3 { v0.8b, v1.8b, v2.8b }, [%[SRC]], #24 \n\t" + "mov v4.8b, v2.8b\n\t" + "mov v2.8b, v0.8b\n\t" + "mov v0.8b, v4.8b\n\t" + "st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [%[DST]], #32 \n\t" + : [DST]"+r" (dst), [SRC]"+r" (src) + : "w"(fullVector) + : "memory", "v0", "v1", "v2", "v4" + ); +#endif + } while (dst < simdEnd); +#endif } while (dst != end) { @@ -103,4 +129,4 @@ void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::I QT_END_NAMESPACE -#endif // defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64) +#endif // defined(__ARM_NEON__) diff --git a/src/gui/image/qjpeghandler.cpp b/src/gui/image/qjpeghandler.cpp index 7e9483e6f7..68709b708d 100644 --- a/src/gui/image/qjpeghandler.cpp +++ b/src/gui/image/qjpeghandler.cpp @@ -978,9 +978,8 @@ extern "C" void qt_convert_rgb888_to_rgb32_mips_dspr2_asm(quint32 *dst, const uc QJpegHandler::QJpegHandler() : d(new QJpegHandlerPrivate(this)) { -#if defined(__ARM_NEON__) && !defined(Q_PROCESSOR_ARM_64) +#if defined(__ARM_NEON__) // from qimage_neon.cpp - if (qCpuHasFeature(NEON)) d->rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_neon; #endif |