summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2017-11-02 13:51:56 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2017-11-08 11:44:13 +0000
commitf8807b82207d7f4f41536f777473c8870673c186 (patch)
treef872ba0135fa878662fcb7ee34ec7846e7ca97de
parentb254b03dc3b1cfcfc5f17158d2470e0a9a946a0e (diff)
Expand ARGB32ToARGB32PM to also work on 32-bit ARM neon
Replaced two AArch64 specific instructions with 2-3 instruction replacements from ARM32. Change-Id: I5cbbda5afdaabea52babaaf8e5cc57262d897159 Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
-rw-r--r--src/gui/painting/qdrawhelper.cpp2
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp40
2 files changed, 35 insertions, 7 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 5ec570a5db..149fa124f6 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -6443,7 +6443,7 @@ static void qInitDrawhelperFunctions()
sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon;
-#if defined(Q_PROCESSOR_ARM_64) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
extern const uint *QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *);
extern const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, const uint *src, int count,
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index 4cbac009d8..0ec8ced2de 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -1080,23 +1080,41 @@ const uint * QT_FASTCALL qt_fetchUntransformed_888_neon(uint *buffer, const Oper
return buffer;
}
-#if defined(Q_PROCESSOR_ARM_64) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
template<bool RGBA>
static inline void convertARGBToARGB32PM_neon(uint *buffer, const uint *src, int count)
{
int i = 0;
+#if defined(Q_PROCESSOR_ARM_64)
const uint8x16_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
+#else
+ const uint8x8_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7 };
+#endif
const uint8x8_t shuffleMask = { 3, 3, 3, 3, 7, 7, 7, 7};
const uint32x4_t blendMask = vdupq_n_u32(0xff000000);
for (; i < count - 3; i += 4) {
uint32x4_t srcVector = vld1q_u32(src + i);
uint32x4_t alphaVector = vshrq_n_u32(srcVector, 24);
+#if defined(Q_PROCESSOR_ARM_64)
uint32_t alphaSum = vaddvq_u32(alphaVector);
+#else
+ // no vaddvq_u32
+ uint32x2_t tmp = vpadd_u32(vget_low_u32(alphaVector), vget_high_u32(alphaVector));
+ uint32_t alphaSum = vget_lane_u32(vpadd_u32(tmp, tmp), 0);
+#endif
if (alphaSum) {
if (alphaSum != 255 * 4) {
- if (RGBA)
+ if (RGBA) {
+#if defined(Q_PROCESSOR_ARM_64)
srcVector = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(srcVector), rgbaMask));
+#else
+ // no vqtbl1q_u8
+ const uint8x8_t low = vtbl1_u8(vreinterpret_u8_u32(vget_low_u32(srcVector)), rgbaMask);
+ const uint8x8_t high = vtbl1_u8(vreinterpret_u8_u32(vget_high_u32(srcVector)), rgbaMask);
+ srcVector = vcombine_u32(vreinterpret_u32_u8(low), vreinterpret_u32_u8(high));
+#endif
+ }
const uint8x8_t s1 = vreinterpret_u8_u32(vget_low_u32(srcVector));
const uint8x8_t s2 = vreinterpret_u8_u32(vget_high_u32(srcVector));
const uint8x8_t alpha1 = vtbl1_u8(s1, shuffleMask);
@@ -1110,10 +1128,19 @@ static inline void convertARGBToARGB32PM_neon(uint *buffer, const uint *src, int
const uint32x4_t d = vbslq_u32(blendMask, srcVector, vreinterpretq_u32_u8(vcombine_u8(d1, d2)));
vst1q_u32(buffer + i, d);
} else {
- if (RGBA)
- vst1q_u32(buffer + i, vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(srcVector), rgbaMask)));
- else if (buffer != src)
+ if (RGBA) {
+#if defined(Q_PROCESSOR_ARM_64)
+ srcVector = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(srcVector), rgbaMask))
+#else
+ // no vqtbl1q_u8
+ const uint8x8_t low = vtbl1_u8(vreinterpret_u8_u32(vget_low_u32(srcVector)), rgbaMask);
+ const uint8x8_t high = vtbl1_u8(vreinterpret_u8_u32(vget_high_u32(srcVector)), rgbaMask);
+ srcVector = vcombine_u32(vreinterpret_u32_u8(low), vreinterpret_u32_u8(high));
+#endif
+ vst1q_u32(buffer + i, srcVector);
+ } else if (buffer != src) {
vst1q_u32(buffer + i, srcVector);
+ }
}
} else {
vst1q_u32(buffer + i, vdupq_n_u32(0));
@@ -1139,7 +1166,8 @@ const uint *QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, const uint
convertARGBToARGB32PM_neon<true>(buffer, src, count);
return buffer;
}
-#endif
+
+#endif // Q_BYTE_ORDER == Q_LITTLE_ENDIAN
QT_END_NAMESPACE