summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2018-03-23 10:19:11 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2018-04-11 07:53:53 +0000
commit4b6542c9ffa59eba6e82c0ecbb04dab361e3632f (patch)
treef8b7f4c8eb3448dd9050566a9a66e387445c67f5 /src
parenta5e2ec5cdb35dddf71c39633133c4eaf8ccc12f7 (diff)
Remove last uses of interpolate_4_pixels_16 on SSE2 and NEON
With SSE2 or NEON interpolate_4_pixels is faster than interpolate_4_pixels_16, and using it saves a branch of duplicated code. Similar changes had already been done other places it was used, those have been updated to follow a similar logic. Change-Id: I040d96480f7f925f659602f66f931d28b59312a5 Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
Diffstat (limited to 'src')
-rw-r--r--src/gui/painting/qdrawhelper.cpp96
-rw-r--r--src/gui/painting/qdrawhelper_p.h9
2 files changed, 52 insertions, 53 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index f08038e34b..f3df62b855 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -2249,17 +2249,13 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint *
#endif
while (b < boundedEnd) {
int x = (fx >> 16);
-#if defined(__SSE2__) || defined(__ARM_NEON__)
- int distx8 = (fx & 0x0000ffff) >> 8;
- *b = interpolate_4_pixels(s1 + x, s2 + x, distx8, disty8);
-#else
- uint tl = s1[x];
- uint tr = s1[x + 1];
- uint bl = s2[x];
- uint br = s2[x + 1];
- int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
- *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4);
-#endif
+ if (hasFastInterpolate4()) {
+ int distx8 = (fx & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(s1 + x, s2 + x, distx8, disty8);
+ } else {
+ int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ *b = interpolate_4_pixels_16(s1[x], s1[x + 1], s2[x], s2[x + 1], distx4, disty4);
+ }
fx += fdx;
++b;
}
@@ -2273,14 +2269,13 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint *
uint tr = s1[x2];
uint bl = s2[x1];
uint br = s2[x2];
-#if defined(__SSE2__) || defined(__ARM_NEON__)
- // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16.
- int distx8 = (fx & 0x0000ffff) >> 8;
- *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8);
-#else
- int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
- *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4);
-#endif
+ if (hasFastInterpolate4()) {
+ int distx8 = (fx & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8);
+ } else {
+ int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ *b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4);
+ }
fx += fdx;
++b;
}
@@ -2345,15 +2340,15 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint
uint tr = s1[x2];
uint bl = s2[x1];
uint br = s2[x2];
-#if defined(__SSE2__) || defined(__ARM_NEON__)
- int distx = (fx & 0x0000ffff) >> 8;
- int disty = (fy & 0x0000ffff) >> 8;
- *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
-#else
- int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
- int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
- *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
-#endif
+ if (hasFastInterpolate4()) {
+ int distx = (fx & 0x0000ffff) >> 8;
+ int disty = (fy & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
+ } else {
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
+ *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
+ }
fx += fdx;
fy += fdy;
++b;
@@ -2495,19 +2490,15 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint
const uint *s1 = (const uint *)image.scanLine(y);
const uint *s2 = (const uint *)image.scanLine(y + 1);
-#if defined(__SSE2__) || defined(__ARM_NEON__)
- int distx = (fx & 0x0000ffff) >> 8;
- int disty = (fy & 0x0000ffff) >> 8;
- *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty);
-#else
- uint tl = s1[x];
- uint tr = s1[x + 1];
- uint bl = s2[x];
- uint br = s2[x + 1];
- int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
- int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
- *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
-#endif
+ if (hasFastInterpolate4()) {
+ int distx = (fx & 0x0000ffff) >> 8;
+ int disty = (fy & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty);
+ } else {
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
+ *b = interpolate_4_pixels_16(s1[x], s1[x + 1], s2[x], s2[x + 1], distx, disty);
+ }
fx += fdx;
fy += fdy;
@@ -2532,16 +2523,15 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint
uint bl = s2[x1];
uint br = s2[x2];
-#if defined(__SSE2__) || defined(__ARM_NEON__)
- // The optimized interpolate_4_pixels are faster than interpolate_4_pixels_16.
- int distx = (fx & 0x0000ffff) >> 8;
- int disty = (fy & 0x0000ffff) >> 8;
- *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
-#else
- int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
- int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
- *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
-#endif
+ if (hasFastInterpolate4()) {
+ int distx = (fx & 0x0000ffff) >> 8;
+ int disty = (fy & 0x0000ffff) >> 8;
+ *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
+ } else {
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
+ *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
+ }
fx += fdx;
fy += fdy;
@@ -2939,7 +2929,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0);
layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0);
- if (qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y)
+ if (hasFastInterpolate4() || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y)
int disty = (fy & 0x0000ffff) >> 8;
for (int i = 0; i < len; ++i) {
int distx = (fx & 0x0000ffff) >> 8;
@@ -2974,7 +2964,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0);
layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0);
- if (qAbs(data->m11) < qreal(1./8.)|| qAbs(data->m22) < qreal(1./8.)) {
+ if (hasFastInterpolate4() || qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.)) {
// If we are zooming more than 8 times, we use 8bit precision for the position.
for (int i = 0; i < len; ++i) {
int distx = (fx & 0x0000ffff) >> 8;
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index ebf215a3eb..b94fd34b51 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -685,6 +685,9 @@ static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint dis
__m128i vb = _mm_loadl_epi64((const __m128i*)b);
return interpolate_4_pixels_sse2(vt, vb, distx, disty);
}
+
+static constexpr inline bool hasFastInterpolate4() { return true; }
+
#elif defined(__ARM_NEON__)
static Q_ALWAYS_INLINE uint interpolate_4_pixels_neon(uint32x2_t vt32, uint32x2_t vb32, uint distx, uint disty)
{
@@ -717,6 +720,9 @@ static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint dis
uint32x2_t vb32 = vld1_u32(b);
return interpolate_4_pixels_neon(vt32, vb32, distx, disty);
}
+
+static constexpr inline bool hasFastInterpolate4() { return true; }
+
#else
static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
{
@@ -731,6 +737,9 @@ static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint dis
{
return interpolate_4_pixels(t[0], t[1], b[0], b[1], distx, disty);
}
+
+static constexpr inline bool hasFastInterpolate4() { return false; }
+
#endif
#if Q_BYTE_ORDER == Q_BIG_ENDIAN