summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/dsp/enc_neon.c
diff options
context:
space:
mode:
authorLiang Qi <liang.qi@qt.io>2016-12-13 06:35:54 +0100
committerJani Heikkinen <jani.heikkinen@qt.io>2016-12-13 11:05:49 +0000
commita4125f0c4e8988012fe2bf5b9f933ed63c3c97d0 (patch)
tree7b183f687bd0ba111ec50d406226283c980e03a5 /src/3rdparty/libwebp/src/dsp/enc_neon.c
parent5976c46685b1335c86ce702e3af69262de97096c (diff)
Bundled libwebp updated to version 0.5.1
This commit imports libwebp 0.5.1, including AUTHORS, COPYING, ChangeLog, NEWS, PATENTS, README and src directories. In src, only includes header and source files. Upstream changes since 0.5.0 have been merged in. Also updated version in qt_attribution.json. Conflicts: src/3rdparty/libwebp.pri src/3rdparty/libwebp/qt_attribution.json src/3rdparty/libwebp/src/webp/config.h Change-Id: I7d0c15400154c3b4ee8ff37665303307c4b84f9f Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@qt.io> Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/enc_neon.c')
-rw-r--r--src/3rdparty/libwebp/src/dsp/enc_neon.c78
1 files changed, 32 insertions, 46 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/enc_neon.c b/src/3rdparty/libwebp/src/dsp/enc_neon.c
index c2aef58..46f6bf9 100644
--- a/src/3rdparty/libwebp/src/dsp/enc_neon.c
+++ b/src/3rdparty/libwebp/src/dsp/enc_neon.c
@@ -560,21 +560,6 @@ static void FTransformWHT(const int16_t* src, int16_t* out) {
// a 26ae, b 26ae
// a 37bf, b 37bf
//
-static WEBP_INLINE uint8x8x4_t DistoTranspose4x4U8(uint8x8x4_t d4_in) {
- const uint8x8x2_t d2_tmp0 = vtrn_u8(d4_in.val[0], d4_in.val[1]);
- const uint8x8x2_t d2_tmp1 = vtrn_u8(d4_in.val[2], d4_in.val[3]);
- const uint16x4x2_t d2_tmp2 = vtrn_u16(vreinterpret_u16_u8(d2_tmp0.val[0]),
- vreinterpret_u16_u8(d2_tmp1.val[0]));
- const uint16x4x2_t d2_tmp3 = vtrn_u16(vreinterpret_u16_u8(d2_tmp0.val[1]),
- vreinterpret_u16_u8(d2_tmp1.val[1]));
-
- d4_in.val[0] = vreinterpret_u8_u16(d2_tmp2.val[0]);
- d4_in.val[2] = vreinterpret_u8_u16(d2_tmp2.val[1]);
- d4_in.val[1] = vreinterpret_u8_u16(d2_tmp3.val[0]);
- d4_in.val[3] = vreinterpret_u8_u16(d2_tmp3.val[1]);
- return d4_in;
-}
-
static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]);
const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]);
@@ -589,41 +574,40 @@ static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
return q4_in;
}
-static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const uint8x8x4_t d4_in) {
+static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const int16x8x4_t q4_in) {
// {a0, a1} = {in[0] + in[2], in[1] + in[3]}
// {a3, a2} = {in[0] - in[2], in[1] - in[3]}
- const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(d4_in.val[0],
- d4_in.val[2]));
- const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(d4_in.val[1],
- d4_in.val[3]));
- const int16x8_t q_a3 = vreinterpretq_s16_u16(vsubl_u8(d4_in.val[0],
- d4_in.val[2]));
- const int16x8_t q_a2 = vreinterpretq_s16_u16(vsubl_u8(d4_in.val[1],
- d4_in.val[3]));
+ const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
+ const int16x8_t q_a1 = vaddq_s16(q4_in.val[1], q4_in.val[3]);
+ const int16x8_t q_a3 = vsubq_s16(q4_in.val[0], q4_in.val[2]);
+ const int16x8_t q_a2 = vsubq_s16(q4_in.val[1], q4_in.val[3]);
int16x8x4_t q4_out;
// tmp[0] = a0 + a1
// tmp[1] = a3 + a2
// tmp[2] = a3 - a2
// tmp[3] = a0 - a1
INIT_VECTOR4(q4_out,
- vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
- vsubq_s16(q_a3, q_a2), vsubq_s16(q_a0, q_a1));
+ vabsq_s16(vaddq_s16(q_a0, q_a1)),
+ vabsq_s16(vaddq_s16(q_a3, q_a2)),
+ vabdq_s16(q_a3, q_a2), vabdq_s16(q_a0, q_a1));
return q4_out;
}
-static WEBP_INLINE int16x8x4_t DistoVerticalPass(int16x8x4_t q4_in) {
- const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
- const int16x8_t q_a1 = vaddq_s16(q4_in.val[1], q4_in.val[3]);
- const int16x8_t q_a2 = vsubq_s16(q4_in.val[1], q4_in.val[3]);
- const int16x8_t q_a3 = vsubq_s16(q4_in.val[0], q4_in.val[2]);
+static WEBP_INLINE int16x8x4_t DistoVerticalPass(const uint8x8x4_t q4_in) {
+ const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0],
+ q4_in.val[2]));
+ const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1],
+ q4_in.val[3]));
+ const int16x8_t q_a2 = vreinterpretq_s16_u16(vsubl_u8(q4_in.val[1],
+ q4_in.val[3]));
+ const int16x8_t q_a3 = vreinterpretq_s16_u16(vsubl_u8(q4_in.val[0],
+ q4_in.val[2]));
+ int16x8x4_t q4_out;
- q4_in.val[0] = vaddq_s16(q_a0, q_a1);
- q4_in.val[1] = vaddq_s16(q_a3, q_a2);
- q4_in.val[2] = vabdq_s16(q_a3, q_a2);
- q4_in.val[3] = vabdq_s16(q_a0, q_a1);
- q4_in.val[0] = vabsq_s16(q4_in.val[0]);
- q4_in.val[1] = vabsq_s16(q4_in.val[1]);
- return q4_in;
+ INIT_VECTOR4(q4_out,
+ vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
+ vsubq_s16(q_a3, q_a2), vsubq_s16(q_a0, q_a1));
+ return q4_out;
}
static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
@@ -667,6 +651,7 @@ static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in,
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
+// w[] contains a row-major 4 by 4 symmetric matrix.
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
@@ -691,18 +676,19 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
vreinterpret_u8_u32(d_in_ab_cdef));
{
- // horizontal pass
- const uint8x8x4_t d4_t = DistoTranspose4x4U8(d4_in);
- const int16x8x4_t q4_h = DistoHorizontalPass(d4_t);
+ // Vertical pass first to avoid a transpose (vertical and horizontal passes
+ // are commutative because w/kWeightY is symmetric) and subsequent
+ // transpose.
+ const int16x8x4_t q4_v = DistoVerticalPass(d4_in);
const int16x4x4_t d4_w = DistoLoadW(w);
- // vertical pass
- const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_h);
- const int16x8x4_t q4_v = DistoVerticalPass(q4_t);
- int32x2_t d_sum = DistoSum(q4_v, d4_w);
+ // horizontal pass
+ const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_v);
+ const int16x8x4_t q4_h = DistoHorizontalPass(q4_t);
+ int32x2_t d_sum = DistoSum(q4_h, d4_w);
// abs(sum2 - sum1) >> 5
d_sum = vabs_s32(d_sum);
- d_sum = vshr_n_s32(d_sum, 5);
+ d_sum = vshr_n_s32(d_sum, 5);
return vget_lane_s32(d_sum, 0);
}
}