summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-08-24 04:20:19 +0200
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-08-25 11:07:12 +0200
commitd28f3ffadec209511d8d992e3b35fae155b3d1c5 (patch)
treeae6de5aa97c85c05136dff9547bc7089bbe8c7bb
parent9f5457110f86fc11995efd93729c563e0713bebc (diff)
Implement qt_memfill32 with Neon.
This patch introduce a implementation of qt_memfill32 with the Neon instructions set from ARMv7. The loop is unrolled 1 time to get better performance. This implementation of memfill is 330% faster on the N900. Reviewed-by: Samuel Rødal
-rw-r--r--src/gui/painting/qdrawhelper.cpp1
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp38
-rw-r--r--src/gui/painting/qdrawhelper_neon_p.h1
3 files changed, 40 insertions, 0 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 0b74fc0e3a..be4275c83a 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -7913,6 +7913,7 @@ void qInitDrawhelperAsm()
qMemRotateFunctions[QImage::Format_RGB16][0] = qt_memrotate90_16_neon;
qMemRotateFunctions[QImage::Format_RGB16][2] = qt_memrotate270_16_neon;
+ qt_memfill32 = qt_memfill32_neon;
}
#endif
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index c1f815d636..ed15c5cf8f 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -51,6 +51,44 @@
QT_BEGIN_NAMESPACE
+void qt_memfill32_neon(quint32 *dest, quint32 value, int count)
+{
+ const int epilogueSize = count % 16;
+ if (count >= 16) {
+ quint32 *const neonEnd = dest + count - epilogueSize;
+ register uint32x4_t valueVector1 asm ("q0") = vdupq_n_u32(value);
+ register uint32x4_t valueVector2 asm ("q1") = valueVector1;
+ while (dest != neonEnd) {
+ asm volatile (
+ "vst2.32 { d0, d1, d2, d3 }, [%[DST]] !\n\t"
+ "vst2.32 { d0, d1, d2, d3 }, [%[DST]] !\n\t"
+ : [DST]"+r" (dest)
+ : [VALUE1]"w"(valueVector1), [VALUE2]"w"(valueVector2)
+ : "memory"
+ );
+ }
+ }
+
+ switch (epilogueSize)
+ {
+ case 15: *dest++ = value;
+ case 14: *dest++ = value;
+ case 13: *dest++ = value;
+ case 12: *dest++ = value;
+ case 11: *dest++ = value;
+ case 10: *dest++ = value;
+ case 9: *dest++ = value;
+ case 8: *dest++ = value;
+ case 7: *dest++ = value;
+ case 6: *dest++ = value;
+ case 5: *dest++ = value;
+ case 4: *dest++ = value;
+ case 3: *dest++ = value;
+ case 2: *dest++ = value;
+ case 1: *dest++ = value;
+ }
+}
+
static inline uint16x8_t qvdiv_255_u16(uint16x8_t x, uint16x8_t half)
{
// result = (x + (x >> 8) + 0x80) >> 8
diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h
index 182c936627..451edbc36b 100644
--- a/src/gui/painting/qdrawhelper_neon_p.h
+++ b/src/gui/painting/qdrawhelper_neon_p.h
@@ -120,6 +120,7 @@ void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
const QTransform &targetRectTransform,
int const_alpha);
+void qt_memfill32_neon(quint32 *dest, quint32 value, int count);
void qt_memrotate90_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl);
void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl);