summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/dsp/upsampling_neon.c
diff options
context:
space:
mode:
authorLiang Qi <liang.qi@qt.io>2017-03-07 13:05:21 +0100
committerLiang Qi <liang.qi@qt.io>2017-03-13 10:47:45 +0000
commitb7ec9e78633d8f2c75a8b02e17e169497bb103e2 (patch)
treee4be04af4dbcf8cd635715efdf4e769281183746 /src/3rdparty/libwebp/src/dsp/upsampling_neon.c
parentf2dbc67c2b032a5f27d0224e020fb6dfcd3fd142 (diff)
Bundled libwebp updated to version 0.6.0
This commit imports libwebp 0.6.0, including AUTHORS, COPYING, ChangeLog, NEWS, PATENTS, README and src directories. In src, only includes header and source files. Upstream changes since 0.5.1 have been merged in. Also updated version in qt_attribution.json. Conflicts: src/3rdparty/libwebp.pri src/3rdparty/libwebp/qt_attribution.json src/3rdparty/libwebp/src/webp/config.h Change-Id: I001aa7a3fabf0130b54f9005c23aa822bc1d0ec1 Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/upsampling_neon.c')
-rw-r--r--src/3rdparty/libwebp/src/dsp/upsampling_neon.c77
1 files changed, 29 insertions, 48 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/upsampling_neon.c b/src/3rdparty/libwebp/src/dsp/upsampling_neon.c
index 2b0c99b..d371a83 100644
--- a/src/3rdparty/libwebp/src/dsp/upsampling_neon.c
+++ b/src/3rdparty/libwebp/src/dsp/upsampling_neon.c
@@ -28,47 +28,34 @@
// U/V upsampling
// Loads 9 pixels each from rows r1 and r2 and generates 16 pixels.
-#define UPSAMPLE_16PIXELS(r1, r2, out) { \
- uint8x8_t a = vld1_u8(r1); \
- uint8x8_t b = vld1_u8(r1 + 1); \
- uint8x8_t c = vld1_u8(r2); \
- uint8x8_t d = vld1_u8(r2 + 1); \
- \
- uint16x8_t al = vshll_n_u8(a, 1); \
- uint16x8_t bl = vshll_n_u8(b, 1); \
- uint16x8_t cl = vshll_n_u8(c, 1); \
- uint16x8_t dl = vshll_n_u8(d, 1); \
- \
- uint8x8_t diag1, diag2; \
- uint16x8_t sl; \
- \
+#define UPSAMPLE_16PIXELS(r1, r2, out) do { \
+ const uint8x8_t a = vld1_u8(r1 + 0); \
+ const uint8x8_t b = vld1_u8(r1 + 1); \
+ const uint8x8_t c = vld1_u8(r2 + 0); \
+ const uint8x8_t d = vld1_u8(r2 + 1); \
/* a + b + c + d */ \
- sl = vaddl_u8(a, b); \
- sl = vaddw_u8(sl, c); \
- sl = vaddw_u8(sl, d); \
- \
- al = vaddq_u16(sl, al); /* 3a + b + c + d */ \
- bl = vaddq_u16(sl, bl); /* a + 3b + c + d */ \
- \
- al = vaddq_u16(al, dl); /* 3a + b + c + 3d */ \
- bl = vaddq_u16(bl, cl); /* a + 3b + 3c + d */ \
+ const uint16x8_t ad = vaddl_u8(a, d); \
+ const uint16x8_t bc = vaddl_u8(b, c); \
+ const uint16x8_t abcd = vaddq_u16(ad, bc); \
+ /* 3a + b + c + 3d */ \
+ const uint16x8_t al = vaddq_u16(abcd, vshlq_n_u16(ad, 1)); \
+ /* a + 3b + 3c + d */ \
+ const uint16x8_t bl = vaddq_u16(abcd, vshlq_n_u16(bc, 1)); \
\
- diag2 = vshrn_n_u16(al, 3); \
- diag1 = vshrn_n_u16(bl, 3); \
+ const uint8x8_t diag2 = vshrn_n_u16(al, 3); \
+ const uint8x8_t diag1 = vshrn_n_u16(bl, 3); \
\
- a = vrhadd_u8(a, diag1); \
- b = vrhadd_u8(b, diag2); \
- c = vrhadd_u8(c, diag2); \
- d = vrhadd_u8(d, diag1); \
+ const uint8x8_t A = vrhadd_u8(a, diag1); \
+ const uint8x8_t B = vrhadd_u8(b, diag2); \
+ const uint8x8_t C = vrhadd_u8(c, diag2); \
+ const uint8x8_t D = vrhadd_u8(d, diag1); \
\
- { \
- uint8x8x2_t a_b, c_d; \
- INIT_VECTOR2(a_b, a, b); \
- INIT_VECTOR2(c_d, c, d); \
- vst2_u8(out, a_b); \
- vst2_u8(out + 32, c_d); \
- } \
-}
+ uint8x8x2_t A_B, C_D; \
+ INIT_VECTOR2(A_B, A, B); \
+ INIT_VECTOR2(C_D, C, D); \
+ vst2_u8(out + 0, A_B); \
+ vst2_u8(out + 32, C_D); \
+} while (0)
// Turn the macro into a function for reducing code-size when non-critical
static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2,
@@ -93,7 +80,6 @@ static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2,
static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
#define v255 vdup_n_u8(255)
-#define v_0x0f vdup_n_u8(15)
#define STORE_Rgb(out, r, g, b) do { \
uint8x8x3_t r_g_b; \
@@ -132,21 +118,16 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
#endif
#define STORE_Rgba4444(out, r, g, b) do { \
- const uint8x8_t r1 = vshl_n_u8(vshr_n_u8(r, 4), 4); /* 4bits */ \
- const uint8x8_t g1 = vshr_n_u8(g, 4); \
- const uint8x8_t ba = vorr_u8(b, v_0x0f); \
- const uint8x8_t rg = vorr_u8(r1, g1); \
+ const uint8x8_t rg = vsri_n_u8(r, g, 4); /* shift g, insert r */ \
+ const uint8x8_t ba = vsri_n_u8(b, v255, 4); /* shift a, insert b */ \
const uint8x8x2_t rgba4444 = ZIP_U8(rg, ba); \
vst1q_u8(out, vcombine_u8(rgba4444.val[0], rgba4444.val[1])); \
} while (0)
#define STORE_Rgb565(out, r, g, b) do { \
- const uint8x8_t r1 = vshl_n_u8(vshr_n_u8(r, 3), 3); /* 5bits */ \
- const uint8x8_t g1 = vshr_n_u8(g, 5); /* upper 3bits */\
- const uint8x8_t g2 = vshl_n_u8(vshr_n_u8(g, 2), 5); /* lower 3bits */\
- const uint8x8_t b1 = vshr_n_u8(b, 3); /* 5bits */ \
- const uint8x8_t rg = vorr_u8(r1, g1); \
- const uint8x8_t gb = vorr_u8(g2, b1); \
+ const uint8x8_t rg = vsri_n_u8(r, g, 5); /* shift g and insert r */ \
+ const uint8x8_t g1 = vshl_n_u8(g, 3); /* pre-shift g: 3bits */ \
+ const uint8x8_t gb = vsri_n_u8(g1, b, 3); /* shift b and insert g */ \
const uint8x8x2_t rgb565 = ZIP_U8(rg, gb); \
vst1q_u8(out, vcombine_u8(rgb565.val[0], rgb565.val[1])); \
} while (0)