Improved gradient table generation performance for two-stop gradients.

Two stops is a fairly common case so we gain quite a bit by special casing it. Improves performance by 10 % in parcycle benchmark, and by 90 % in a synthetic benchmark. Reviewed-by: Andreas Kling (cherry picked from commit 5b74a70ac630073582be56f8a0539624a1080185)
author: Samuel Rødal <samuel.rodal@nokia.com> 2011-04-13 10:15:06 +0200
committer: Olivier Goffart <olivier.goffart@nokia.com> 2011-05-10 12:54:45 +0200
commit: e05443367f60e591556ae8854ecb634a7cf6ea33 (patch)
tree: dbc1a06cb4d7e76f25c0b1ca6f7d79ea7daf4f5e /src/gui/painting/qdrawhelper_neon.cpp
parent: db5803959182f891259b457b5bac2ed54785b709 (diff)
1 files changed, 38 insertions, 0 deletions
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index debca37486..7eb2f09d7a 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -955,6 +955,44 @@ void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h,
     }
 }
 
+class QSimdNeon
+{
+public:
+    typedef int32x4_t Int32x4;
+    typedef float32x4_t Float32x4;
+
+    union Vect_buffer_i { Int32x4 v; int i[4]; };
+    union Vect_buffer_f { Float32x4 v; float f[4]; };
+
+    static inline Float32x4 v_dup(float x) { return vdupq_n_f32(x); }
+    static inline Int32x4 v_dup(int x) { return vdupq_n_s32(x); }
+    static inline Int32x4 v_dup(uint x) { return vdupq_n_s32(x); }
+
+    static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return vaddq_f32(a, b); }
+    static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return vaddq_s32(a, b); }
+
+    static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return vmaxq_f32(a, b); }
+    static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return vminq_f32(a, b); }
+    static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return vminq_s32(a, b); }
+
+    static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return vandq_s32(a, b); }
+
+    static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return vsubq_f32(a, b); }
+    static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return vsubq_s32(a, b); }
+
+    static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return vmulq_f32(a, b); }
+
+    static inline Float32x4 v_sqrt(Float32x4 x) { Float32x4 y = vrsqrteq_f32(x); y = vmulq_f32(y, vrsqrtsq_f32(x, vmulq_f32(y, y))); return vmulq_f32(x, y); }
+
+    static inline Int32x4 v_toInt(Float32x4 x) { return vcvtq_s32_f32(x); }
+};
+
+const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
+                                                       int y, int x, int length)
+{
+    return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdNeon> >(buffer, op, data, y, x, length);
+}
+
 QT_END_NAMESPACE
 
 #endif // QT_HAVE_NEON
author	Samuel Rødal <samuel.rodal@nokia.com>	2011-04-13 10:15:06 +0200
committer	Olivier Goffart <olivier.goffart@nokia.com>	2011-05-10 12:54:45 +0200
commit	e05443367f60e591556ae8854ecb634a7cf6ea33 (patch)
tree	dbc1a06cb4d7e76f25c0b1ca6f7d79ea7daf4f5e /src/gui/painting/qdrawhelper_neon.cpp
parent	db5803959182f891259b457b5bac2ed54785b709 (diff)