summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qdrawhelper.cpp
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2016-08-16 13:37:49 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2016-08-18 15:48:59 +0000
commite231581f1ffce1bb6d9ee0dcba32d0fe8d771e1d (patch)
tree2e631023f669bf8bf9759d87b5531026ad03b8af /src/gui/painting/qdrawhelper.cpp
parent1075f6c7644e098530b317d3ac271328c7ddb481 (diff)
Remove type-punned unions
Type punning even over a union is not legal C++, and also causes the compilers to produce poorly performing code. This has already been fixed for the SSE2 code in bilinear sampling but not for the NEON code. Change-Id: Id5e184051e0bd78db730d83ef0dda56ac3206e5b Reviewed-by: Erik Verbruggen <erik.verbruggen@qt.io>
Diffstat (limited to 'src/gui/painting/qdrawhelper.cpp')
-rw-r--r--src/gui/painting/qdrawhelper.cpp63
1 files changed, 33 insertions, 30 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index f0d0ac0283..fa4470a486 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -2243,45 +2243,48 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
int32x4_t v_fdx = vdupq_n_s32(fdx*4);
- ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1);
-
- union Vect_buffer { int32x4_t vect; quint32 i[4]; };
- Vect_buffer v_fx;
-
- for (int i = 0; i < 4; i++) {
- v_fx.i[i] = fx;
- fx += fdx;
- }
+ int32x4_t v_fx = vmovq_n_s32(fx);
+ fx += fdx;
+ v_fx = vsetq_lane_s32(fx, v_fx, 1);
+ fx += fdx;
+ v_fx = vsetq_lane_s32(fx, v_fx, 2);
+ fx += fdx;
+ v_fx = vsetq_lane_s32(fx, v_fx, 3);
+ fx += fdx;
const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
while (b < boundedEnd) {
-
- Vect_buffer tl, tr, bl, br;
-
- Vect_buffer v_fx_shifted;
- v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
-
- int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx.vect, v_ffff_mask), v_fx_r), 12);
-
- for (int i = 0; i < 4; i++) {
- int x1 = v_fx_shifted.i[i];
- const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1;
- const uint *addr_tr = addr_tl + 1;
- tl.i[i] = *addr_tl;
- tr.i[i] = *addr_tr;
- bl.i[i] = *(addr_tl+secondLine);
- br.i[i] = *(addr_tr+secondLine);
- }
-
+ uint32x4x2_t v_top, v_bot;
+
+ int32x4_t v_fx_shifted = vshrq_n_s32(v_fx, 16);
+
+ int x1 = vgetq_lane_s32(v_fx_shifted, 0);
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
+ x1 = vgetq_lane_s32(v_fx_shifted, 1);
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
+ x1 = vgetq_lane_s32(v_fx_shifted, 2);
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
+ x1 = vgetq_lane_s32(v_fx_shifted, 3);
+ v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
+ v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
+
+ int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), 12);
v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
- interpolate_4_pixels_16_neon(vreinterpretq_s16_s32(tl.vect), vreinterpretq_s16_s32(tr.vect), vreinterpretq_s16_s32(bl.vect), vreinterpretq_s16_s32(br.vect), vreinterpretq_s16_s32(v_distx), v_disty, v_disty_, colorMask, invColorMask, v_256, b);
+ interpolate_4_pixels_16_neon(
+ vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
+ vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
+ vreinterpretq_s16_s32(v_distx), v_disty, v_disty_,
+ colorMask, invColorMask, v_256, b);
b+=4;
- v_fx.vect = vaddq_s32(v_fx.vect, v_fdx);
+ v_fx = vaddq_s32(v_fx, v_fdx);
}
- fx = v_fx.i[0];
+ fx = vgetq_lane_s32(v_fx, 0);
#endif
}