summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qdrawhelper_neon.cpp
diff options
context:
space:
mode:
authorSamuel Rødal <samuel.rodal@nokia.com>2011-04-13 10:15:06 +0200
committerOlivier Goffart <olivier.goffart@nokia.com>2011-05-10 12:54:45 +0200
commite05443367f60e591556ae8854ecb634a7cf6ea33 (patch)
treedbc1a06cb4d7e76f25c0b1ca6f7d79ea7daf4f5e /src/gui/painting/qdrawhelper_neon.cpp
parentdb5803959182f891259b457b5bac2ed54785b709 (diff)
Improved gradient table generation performance for two-stop gradients.
Two stops is a fairly common case so we gain quite a bit by special casing it. Improves performance by 10 % in parcycle benchmark, and by 90 % in a synthetic benchmark. Reviewed-by: Andreas Kling (cherry picked from commit 5b74a70ac630073582be56f8a0539624a1080185)
Diffstat (limited to 'src/gui/painting/qdrawhelper_neon.cpp')
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp38
1 files changed, 38 insertions, 0 deletions
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index debca37486..7eb2f09d7a 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -955,6 +955,44 @@ void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h,
}
}
+class QSimdNeon
+{
+public:
+ typedef int32x4_t Int32x4;
+ typedef float32x4_t Float32x4;
+
+ union Vect_buffer_i { Int32x4 v; int i[4]; };
+ union Vect_buffer_f { Float32x4 v; float f[4]; };
+
+ static inline Float32x4 v_dup(float x) { return vdupq_n_f32(x); }
+ static inline Int32x4 v_dup(int x) { return vdupq_n_s32(x); }
+ static inline Int32x4 v_dup(uint x) { return vdupq_n_s32(x); }
+
+ static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return vaddq_f32(a, b); }
+ static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return vaddq_s32(a, b); }
+
+ static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return vmaxq_f32(a, b); }
+ static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return vminq_f32(a, b); }
+ static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return vminq_s32(a, b); }
+
+ static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return vandq_s32(a, b); }
+
+ static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return vsubq_f32(a, b); }
+ static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return vsubq_s32(a, b); }
+
+ static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return vmulq_f32(a, b); }
+
+ static inline Float32x4 v_sqrt(Float32x4 x) { Float32x4 y = vrsqrteq_f32(x); y = vmulq_f32(y, vrsqrtsq_f32(x, vmulq_f32(y, y))); return vmulq_f32(x, y); }
+
+ static inline Int32x4 v_toInt(Float32x4 x) { return vcvtq_s32_f32(x); }
+};
+
+const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
+ int y, int x, int length)
+{
+ return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdNeon> >(buffer, op, data, y, x, length);
+}
+
QT_END_NAMESPACE
#endif // QT_HAVE_NEON