From e05443367f60e591556ae8854ecb634a7cf6ea33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B8dal?= Date: Wed, 13 Apr 2011 10:15:06 +0200 Subject: Improved gradient table generation performance for two-stop gradients. Two stops is a fairly common case so we gain quite a bit by special casing it. Improves performance by 10 % in parcycle benchmark, and by 90 % in a synthetic benchmark. Reviewed-by: Andreas Kling (cherry picked from commit 5b74a70ac630073582be56f8a0539624a1080185) --- src/gui/painting/qdrawhelper_neon.cpp | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'src/gui/painting/qdrawhelper_neon.cpp') diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index debca37486..7eb2f09d7a 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -955,6 +955,44 @@ void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, } } +class QSimdNeon +{ +public: + typedef int32x4_t Int32x4; + typedef float32x4_t Float32x4; + + union Vect_buffer_i { Int32x4 v; int i[4]; }; + union Vect_buffer_f { Float32x4 v; float f[4]; }; + + static inline Float32x4 v_dup(float x) { return vdupq_n_f32(x); } + static inline Int32x4 v_dup(int x) { return vdupq_n_s32(x); } + static inline Int32x4 v_dup(uint x) { return vdupq_n_s32(x); } + + static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return vaddq_f32(a, b); } + static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return vaddq_s32(a, b); } + + static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return vmaxq_f32(a, b); } + static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return vminq_f32(a, b); } + static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return vminq_s32(a, b); } + + static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return vandq_s32(a, b); } + + static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return vsubq_f32(a, b); } + static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return vsubq_s32(a, b); } + + static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return vmulq_f32(a, b); } + + static inline Float32x4 v_sqrt(Float32x4 x) { Float32x4 y = vrsqrteq_f32(x); y = vmulq_f32(y, vrsqrtsq_f32(x, vmulq_f32(y, y))); return vmulq_f32(x, y); } + + static inline Int32x4 v_toInt(Float32x4 x) { return vcvtq_s32_f32(x); } +}; + +const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data, + int y, int x, int length) +{ + return qt_fetch_radial_gradient_template >(buffer, op, data, y, x, length); +} + QT_END_NAMESPACE #endif // QT_HAVE_NEON -- cgit v1.2.3