summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/dsp
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp')
-rw-r--r--src/3rdparty/libwebp/src/dsp/dec_neon.c9
-rw-r--r--src/3rdparty/libwebp/src/dsp/dsp.h4
-rw-r--r--src/3rdparty/libwebp/src/dsp/lossless.c11
-rw-r--r--src/3rdparty/libwebp/src/dsp/lossless_common.h2
-rw-r--r--src/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c3
-rw-r--r--src/3rdparty/libwebp/src/dsp/lossless_sse2.c3
-rw-r--r--src/3rdparty/libwebp/src/dsp/upsampling_msa.c4
-rw-r--r--src/3rdparty/libwebp/src/dsp/upsampling_neon.c14
8 files changed, 33 insertions, 17 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/dec_neon.c b/src/3rdparty/libwebp/src/dsp/dec_neon.c
index ffa697f..239ec41 100644
--- a/src/3rdparty/libwebp/src/dsp/dec_neon.c
+++ b/src/3rdparty/libwebp/src/dsp/dec_neon.c
@@ -1361,7 +1361,8 @@ static void RD4_NEON(uint8_t* dst) { // Down-right
const uint32_t J = dst[-1 + 1 * BPS];
const uint32_t K = dst[-1 + 2 * BPS];
const uint32_t L = dst[-1 + 3 * BPS];
- const uint64x1_t LKJI____ = vcreate_u64(L | (K << 8) | (J << 16) | (I << 24));
+ const uint64x1_t LKJI____ =
+ vcreate_u64((uint64_t)L | (K << 8) | (J << 16) | (I << 24));
const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
@@ -1427,10 +1428,16 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) {
if (do_top) {
const uint8x8_t A = vld1_u8(dst - BPS); // top row
+#if defined(__aarch64__)
+ const uint16x8_t B = vmovl_u8(A);
+ const uint16_t p2 = vaddvq_u16(B);
+ sum_top = vdupq_n_u16(p2);
+#else
const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
const uint16x4_t p1 = vpadd_u16(p0, p0);
const uint16x4_t p2 = vpadd_u16(p1, p1);
sum_top = vcombine_u16(p2, p2);
+#endif
}
if (do_left) {
diff --git a/src/3rdparty/libwebp/src/dsp/dsp.h b/src/3rdparty/libwebp/src/dsp/dsp.h
index 0d7f3fb..7c75b26 100644
--- a/src/3rdparty/libwebp/src/dsp/dsp.h
+++ b/src/3rdparty/libwebp/src/dsp/dsp.h
@@ -246,9 +246,9 @@ extern VP8Fdct VP8FTransform2; // performs two transforms at a time
extern VP8WHT VP8FTransformWHT;
// Predictions
// *dst is the destination block. *top and *left can be NULL.
-typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
+typedef void (*VP8IntraPreds)(uint8_t* dst, const uint8_t* left,
const uint8_t* top);
-typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
+typedef void (*VP8Intra4Preds)(uint8_t* dst, const uint8_t* top);
extern VP8Intra4Preds VP8EncPredLuma4;
extern VP8IntraPreds VP8EncPredLuma16;
extern VP8IntraPreds VP8EncPredChroma8;
diff --git a/src/3rdparty/libwebp/src/dsp/lossless.c b/src/3rdparty/libwebp/src/dsp/lossless.c
index d05af84..aad5f43 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless.c
+++ b/src/3rdparty/libwebp/src/dsp/lossless.c
@@ -81,7 +81,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
// gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
// inlined.
-#if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409
+#if defined(__arm__) && defined(__GNUC__) && LOCAL_GCC_VERSION <= 0x409
# define LOCAL_INLINE __attribute__ ((noinline))
#else
# define LOCAL_INLINE WEBP_INLINE
@@ -167,15 +167,20 @@ static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
return pred;
}
-GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C)
+static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
+ int num_pixels, uint32_t* out) {
+ int x;
+ (void)upper;
+ for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
+}
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int i;
uint32_t left = out[-1];
+ (void)upper;
for (i = 0; i < num_pixels; ++i) {
out[i] = left = VP8LAddPixels(in[i], left);
}
- (void)upper;
}
GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)
diff --git a/src/3rdparty/libwebp/src/dsp/lossless_common.h b/src/3rdparty/libwebp/src/dsp/lossless_common.h
index a2648d1..9c2ebe6 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless_common.h
+++ b/src/3rdparty/libwebp/src/dsp/lossless_common.h
@@ -177,6 +177,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* out) { \
int x; \
+ assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \
const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \
out[x] = VP8LAddPixels(in[x], pred); \
@@ -189,6 +190,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
static void PREDICTOR_SUB(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* out) { \
int x; \
+ assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \
const uint32_t pred = (PREDICTOR)(in[x - 1], upper + x); \
out[x] = VP8LSubPixels(in[x], pred); \
diff --git a/src/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c b/src/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
index 8adc521..e676f6f 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
+++ b/src/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
@@ -455,8 +455,9 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
_mm_storeu_si128((__m128i*)&out[i], res);
}
if (i != num_pixels) {
- VP8LPredictorsSub_C[0](in + i, upper + i, num_pixels - i, out + i);
+ VP8LPredictorsSub_C[0](in + i, NULL, num_pixels - i, out + i);
}
+ (void)upper;
}
#define GENERATE_PREDICTOR_1(X, IN) \
diff --git a/src/3rdparty/libwebp/src/dsp/lossless_sse2.c b/src/3rdparty/libwebp/src/dsp/lossless_sse2.c
index 17d7576..aef0cee 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless_sse2.c
+++ b/src/3rdparty/libwebp/src/dsp/lossless_sse2.c
@@ -191,8 +191,9 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
_mm_storeu_si128((__m128i*)&out[i], res);
}
if (i != num_pixels) {
- VP8LPredictorsAdd_C[0](in + i, upper + i, num_pixels - i, out + i);
+ VP8LPredictorsAdd_C[0](in + i, NULL, num_pixels - i, out + i);
}
+ (void)upper;
}
// Predictor1: left.
diff --git a/src/3rdparty/libwebp/src/dsp/upsampling_msa.c b/src/3rdparty/libwebp/src/dsp/upsampling_msa.c
index 99eea70..f2e03e8 100644
--- a/src/3rdparty/libwebp/src/dsp/upsampling_msa.c
+++ b/src/3rdparty/libwebp/src/dsp/upsampling_msa.c
@@ -576,9 +576,9 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \
const uint32_t l_uv = ((cur_u[0]) | ((cur_v[0]) << 16)); \
const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \
const uint8_t* ptop_y = &top_y[1]; \
- uint8_t *ptop_dst = top_dst + XSTEP; \
+ uint8_t* ptop_dst = top_dst + XSTEP; \
const uint8_t* pbot_y = &bot_y[1]; \
- uint8_t *pbot_dst = bot_dst + XSTEP; \
+ uint8_t* pbot_dst = bot_dst + XSTEP; \
\
FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \
if (bot_y != NULL) { \
diff --git a/src/3rdparty/libwebp/src/dsp/upsampling_neon.c b/src/3rdparty/libwebp/src/dsp/upsampling_neon.c
index 17cbc9f..6ba71a7 100644
--- a/src/3rdparty/libwebp/src/dsp/upsampling_neon.c
+++ b/src/3rdparty/libwebp/src/dsp/upsampling_neon.c
@@ -58,8 +58,8 @@
} while (0)
// Turn the macro into a function for reducing code-size when non-critical
-static void Upsample16Pixels_NEON(const uint8_t *r1, const uint8_t *r2,
- uint8_t *out) {
+static void Upsample16Pixels_NEON(const uint8_t* r1, const uint8_t* r2,
+ uint8_t* out) {
UPSAMPLE_16PIXELS(r1, r2, out);
}
@@ -190,14 +190,14 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
}
#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \
-static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \
- const uint8_t *top_u, const uint8_t *top_v, \
- const uint8_t *cur_u, const uint8_t *cur_v, \
- uint8_t *top_dst, uint8_t *bottom_dst, int len) { \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
+ const uint8_t* top_u, const uint8_t* top_v, \
+ const uint8_t* cur_u, const uint8_t* cur_v, \
+ uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
int block; \
/* 16 byte aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[2 * 32 + 15]; \
- uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
+ uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
const int uv_len = (len + 1) >> 1; \
/* 9 pixels must be read-able for each block */ \
const int num_blocks = (uv_len - 1) >> 3; \