diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-03-23 10:19:11 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-04-11 07:53:53 +0000 |
commit | 4b6542c9ffa59eba6e82c0ecbb04dab361e3632f (patch) | |
tree | f8b7f4c8eb3448dd9050566a9a66e387445c67f5 /src/gui/painting | |
parent | a5e2ec5cdb35dddf71c39633133c4eaf8ccc12f7 (diff) |
Remove last uses of interpolate_4_pixels_16 on SSE2 and NEON
With SSE2 or NEON interpolate_4_pixels is faster than
interpolate_4_pixels_16, and using it saves a branch of duplicated code.
Similar changes had already been done other places it was used, those
have been updated to follow a similar logic.
Change-Id: I040d96480f7f925f659602f66f931d28b59312a5
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
Diffstat (limited to 'src/gui/painting')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 96 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_p.h | 9 |
2 files changed, 52 insertions, 53 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index f08038e34b..f3df62b855 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -2249,17 +2249,13 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint * #endif while (b < boundedEnd) { int x = (fx >> 16); -#if defined(__SSE2__) || defined(__ARM_NEON__) - int distx8 = (fx & 0x0000ffff) >> 8; - *b = interpolate_4_pixels(s1 + x, s2 + x, distx8, disty8); -#else - uint tl = s1[x]; - uint tr = s1[x + 1]; - uint bl = s2[x]; - uint br = s2[x + 1]; - int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4); -#endif + if (hasFastInterpolate4()) { + int distx8 = (fx & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(s1 + x, s2 + x, distx8, disty8); + } else { + int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12; + *b = interpolate_4_pixels_16(s1[x], s1[x + 1], s2[x], s2[x + 1], distx4, disty4); + } fx += fdx; ++b; } @@ -2273,14 +2269,13 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint * uint tr = s1[x2]; uint bl = s2[x1]; uint br = s2[x2]; -#if defined(__SSE2__) || defined(__ARM_NEON__) - // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16. - int distx8 = (fx & 0x0000ffff) >> 8; - *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8); -#else - int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4); -#endif + if (hasFastInterpolate4()) { + int distx8 = (fx & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8); + } else { + int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4); + } fx += fdx; ++b; } @@ -2345,15 +2340,15 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint uint tr = s1[x2]; uint bl = s2[x1]; uint br = s2[x2]; -#if defined(__SSE2__) || defined(__ARM_NEON__) - int distx = (fx & 0x0000ffff) >> 8; - int disty = (fy & 0x0000ffff) >> 8; - *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); -#else - int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; - int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); -#endif + if (hasFastInterpolate4()) { + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); + } else { + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; + int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); + } fx += fdx; fy += fdy; ++b; @@ -2495,19 +2490,15 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint const uint *s1 = (const uint *)image.scanLine(y); const uint *s2 = (const uint *)image.scanLine(y + 1); -#if defined(__SSE2__) || defined(__ARM_NEON__) - int distx = (fx & 0x0000ffff) >> 8; - int disty = (fy & 0x0000ffff) >> 8; - *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty); -#else - uint tl = s1[x]; - uint tr = s1[x + 1]; - uint bl = s2[x]; - uint br = s2[x + 1]; - int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; - int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); -#endif + if (hasFastInterpolate4()) { + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty); + } else { + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; + int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; + *b = interpolate_4_pixels_16(s1[x], s1[x + 1], s2[x], s2[x + 1], distx, disty); + } fx += fdx; fy += fdy; @@ -2532,16 +2523,15 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint uint bl = s2[x1]; uint br = s2[x2]; -#if defined(__SSE2__) || defined(__ARM_NEON__) - // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16. - int distx = (fx & 0x0000ffff) >> 8; - int disty = (fy & 0x0000ffff) >> 8; - *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); -#else - int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; - int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); -#endif + if (hasFastInterpolate4()) { + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); + } else { + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; + int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); + } fx += fdx; fy += fdy; @@ -2939,7 +2929,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); - if (qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y) + if (hasFastInterpolate4() || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y) int disty = (fy & 0x0000ffff) >> 8; for (int i = 0; i < len; ++i) { int distx = (fx & 0x0000ffff) >> 8; @@ -2974,7 +2964,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); - if (qAbs(data->m11) < qreal(1./8.)|| qAbs(data->m22) < qreal(1./8.)) { + if (hasFastInterpolate4() || qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.)) { // If we are zooming more than 8 times, we use 8bit precision for the position. for (int i = 0; i < len; ++i) { int distx = (fx & 0x0000ffff) >> 8; diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index ebf215a3eb..b94fd34b51 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -685,6 +685,9 @@ static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint dis __m128i vb = _mm_loadl_epi64((const __m128i*)b); return interpolate_4_pixels_sse2(vt, vb, distx, disty); } + +static constexpr inline bool hasFastInterpolate4() { return true; } + #elif defined(__ARM_NEON__) static Q_ALWAYS_INLINE uint interpolate_4_pixels_neon(uint32x2_t vt32, uint32x2_t vb32, uint distx, uint disty) { @@ -717,6 +720,9 @@ static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint dis uint32x2_t vb32 = vld1_u32(b); return interpolate_4_pixels_neon(vt32, vb32, distx, disty); } + +static constexpr inline bool hasFastInterpolate4() { return true; } + #else static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty) { @@ -731,6 +737,9 @@ static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint dis { return interpolate_4_pixels(t[0], t[1], b[0], b[1], distx, disty); } + +static constexpr inline bool hasFastInterpolate4() { return false; } + #endif #if Q_BYTE_ORDER == Q_BIG_ENDIAN |