Update bundled libwebp to version 1.4.0

[ChangeLog][Third-Party Code] Update bundled libwebp to version 1.4.0 Pick-to: 6.7 6.5 6.2 5.15 Change-Id: I34bc162e3b64be75da0f82a0a7329eb1a0356239 Reviewed-by: Eskil Abrahamsen Blomfeldt <eskil.abrahamsen-blomfeldt@qt.io>
author: Eirik Aavitsland <eirik.aavitsland@qt.io> 2024-04-15 18:38:04 +0200
committer: Eirik Aavitsland <eirik.aavitsland@qt.io> 2024-04-16 08:03:23 +0200
commit: a5e0b01214ffdbac811b89c6e19f657d3a0cf34b (patch)
tree: c1b62f9f7969971fef48c6801e3cec6a8f9d2292 /src/3rdparty/libwebp/src/dsp
parent: ff565219a7724e13b769f63d3a483167046bd83d (diff)
30 files changed, 346 insertions, 263 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c b/src/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c
index f0843d0..aa0cc28 100644
--- a/src/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c
+++ b/src/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c
@@ -144,6 +144,46 @@ static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
   return (alpha_and == 0xff);
 }
 
+static void ExtractGreen_SSE2(const uint32_t* WEBP_RESTRICT argb,
+                              uint8_t* WEBP_RESTRICT alpha, int size) {
+  int i;
+  const __m128i mask = _mm_set1_epi32(0xff);
+  const __m128i* src = (const __m128i*)argb;
+
+  for (i = 0; i + 16 <= size; i += 16, src += 4) {
+    const __m128i a0 = _mm_loadu_si128(src + 0);
+    const __m128i a1 = _mm_loadu_si128(src + 1);
+    const __m128i a2 = _mm_loadu_si128(src + 2);
+    const __m128i a3 = _mm_loadu_si128(src + 3);
+    const __m128i b0 = _mm_srli_epi32(a0, 8);
+    const __m128i b1 = _mm_srli_epi32(a1, 8);
+    const __m128i b2 = _mm_srli_epi32(a2, 8);
+    const __m128i b3 = _mm_srli_epi32(a3, 8);
+    const __m128i c0 = _mm_and_si128(b0, mask);
+    const __m128i c1 = _mm_and_si128(b1, mask);
+    const __m128i c2 = _mm_and_si128(b2, mask);
+    const __m128i c3 = _mm_and_si128(b3, mask);
+    const __m128i d0 = _mm_packs_epi32(c0, c1);
+    const __m128i d1 = _mm_packs_epi32(c2, c3);
+    const __m128i e = _mm_packus_epi16(d0, d1);
+    // store
+    _mm_storeu_si128((__m128i*)&alpha[i], e);
+  }
+  if (i + 8 <= size) {
+    const __m128i a0 = _mm_loadu_si128(src + 0);
+    const __m128i a1 = _mm_loadu_si128(src + 1);
+    const __m128i b0 = _mm_srli_epi32(a0, 8);
+    const __m128i b1 = _mm_srli_epi32(a1, 8);
+    const __m128i c0 = _mm_and_si128(b0, mask);
+    const __m128i c1 = _mm_and_si128(b1, mask);
+    const __m128i d = _mm_packs_epi32(c0, c1);
+    const __m128i e = _mm_packus_epi16(d, d);
+    _mm_storel_epi64((__m128i*)&alpha[i], e);
+    i += 8;
+  }
+  for (; i < size; ++i) alpha[i] = argb[i] >> 8;
+}
+
 //------------------------------------------------------------------------------
 // Non-dither premultiplied modes
 
@@ -354,6 +394,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
   WebPDispatchAlpha = DispatchAlpha_SSE2;
   WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
   WebPExtractAlpha = ExtractAlpha_SSE2;
+  WebPExtractGreen = ExtractGreen_SSE2;
 
   WebPHasAlpha8b = HasAlpha8b_SSE2;
   WebPHasAlpha32b = HasAlpha32b_SSE2;
diff --git a/src/3rdparty/libwebp/src/dsp/cpu.c b/src/3rdparty/libwebp/src/dsp/cpu.c
index 2234c77..8ba8f68 100644
--- a/src/3rdparty/libwebp/src/dsp/cpu.c
+++ b/src/3rdparty/libwebp/src/dsp/cpu.c
@@ -36,18 +36,6 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
     : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
     : "a"(info_type), "c"(0));
 }
-#elif defined(__x86_64__) && \
-      (defined(__code_model_medium__) || defined(__code_model_large__)) && \
-      defined(__PIC__)
-static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
-  __asm__ volatile (
-    "xchg{q}\t{%%rbx}, %q1\n"
-    "cpuid\n"
-    "xchg{q}\t{%%rbx}, %q1\n"
-    : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]),
-      "=d"(cpu_info[3])
-    : "a"(info_type), "c"(0));
-}
 #elif defined(__i386__) || defined(__x86_64__)
 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
   __asm__ volatile (
diff --git a/src/3rdparty/libwebp/src/dsp/dec.c b/src/3rdparty/libwebp/src/dsp/dec.c
index 33d8df8..451d649 100644
--- a/src/3rdparty/libwebp/src/dsp/dec.c
+++ b/src/3rdparty/libwebp/src/dsp/dec.c
@@ -37,9 +37,6 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
   STORE(3, y, DC - (d));            \
 } while (0)
 
-#define MUL1(a) ((((a) * 20091) >> 16) + (a))
-#define MUL2(a) (((a) * 35468) >> 16)
-
 #if !WEBP_NEON_OMIT_C_CODE
 static void TransformOne_C(const int16_t* in, uint8_t* dst) {
   int C[4 * 4], *tmp;
@@ -48,8 +45,10 @@ static void TransformOne_C(const int16_t* in, uint8_t* dst) {
   for (i = 0; i < 4; ++i) {    // vertical pass
     const int a = in[0] + in[8];    // [-4096, 4094]
     const int b = in[0] - in[8];    // [-4095, 4095]
-    const int c = MUL2(in[4]) - MUL1(in[12]);   // [-3783, 3783]
-    const int d = MUL1(in[4]) + MUL2(in[12]);   // [-3785, 3781]
+    const int c = WEBP_TRANSFORM_AC3_MUL2(in[4]) -
+                  WEBP_TRANSFORM_AC3_MUL1(in[12]);  // [-3783, 3783]
+    const int d = WEBP_TRANSFORM_AC3_MUL1(in[4]) +
+                  WEBP_TRANSFORM_AC3_MUL2(in[12]);  // [-3785, 3781]
     tmp[0] = a + d;   // [-7881, 7875]
     tmp[1] = b + c;   // [-7878, 7878]
     tmp[2] = b - c;   // [-7878, 7878]
@@ -69,8 +68,10 @@ static void TransformOne_C(const int16_t* in, uint8_t* dst) {
     const int dc = tmp[0] + 4;
     const int a =  dc +  tmp[8];
     const int b =  dc -  tmp[8];
-    const int c = MUL2(tmp[4]) - MUL1(tmp[12]);
-    const int d = MUL1(tmp[4]) + MUL2(tmp[12]);
+    const int c =
+        WEBP_TRANSFORM_AC3_MUL2(tmp[4]) - WEBP_TRANSFORM_AC3_MUL1(tmp[12]);
+    const int d =
+        WEBP_TRANSFORM_AC3_MUL1(tmp[4]) + WEBP_TRANSFORM_AC3_MUL2(tmp[12]);
     STORE(0, 0, a + d);
     STORE(1, 0, b + c);
     STORE(2, 0, b - c);
@@ -83,17 +84,15 @@ static void TransformOne_C(const int16_t* in, uint8_t* dst) {
 // Simplified transform when only in[0], in[1] and in[4] are non-zero
 static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
   const int a = in[0] + 4;
-  const int c4 = MUL2(in[4]);
-  const int d4 = MUL1(in[4]);
-  const int c1 = MUL2(in[1]);
-  const int d1 = MUL1(in[1]);
+  const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
+  const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
+  const int c1 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
+  const int d1 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
   STORE2(0, a + d4, d1, c1);
   STORE2(1, a + c4, d1, c1);
   STORE2(2, a - c4, d1, c1);
   STORE2(3, a - d4, d1, c1);
 }
-#undef MUL1
-#undef MUL2
 #undef STORE2
 
 static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
diff --git a/src/3rdparty/libwebp/src/dsp/dec_mips32.c b/src/3rdparty/libwebp/src/dsp/dec_mips32.c
index e4e7096..f0e7de4 100644
--- a/src/3rdparty/libwebp/src/dsp/dec_mips32.c
+++ b/src/3rdparty/libwebp/src/dsp/dec_mips32.c
@@ -18,8 +18,8 @@
 
 #include "src/dsp/mips_macro.h"
 
-static const int kC1 = 20091 + (1 << 16);
-static const int kC2 = 35468;
+static const int kC1 = WEBP_TRANSFORM_AC3_C1;
+static const int kC2 = WEBP_TRANSFORM_AC3_C2;
 
 static WEBP_INLINE int abs_mips32(int x) {
   const int sign = x >> 31;
@@ -219,7 +219,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
   int temp0, temp1, temp2, temp3, temp4;
   int temp5, temp6, temp7, temp8, temp9;
   int temp10, temp11, temp12, temp13, temp14;
-  int temp15, temp16, temp17, temp18;
+  int temp15, temp16, temp17, temp18, temp19;
   int16_t* p_in = (int16_t*)in;
 
   // loops unrolled and merged to avoid usage of tmp buffer
@@ -233,16 +233,14 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
     "addu     %[temp16], %[temp0],  %[temp8]           \n\t"
     "subu     %[temp0],  %[temp0],  %[temp8]           \n\t"
     "mul      %[temp8],  %[temp4],  %[kC2]             \n\t"
-    "mul      %[temp17], %[temp12], %[kC1]             \n\t"
-    "mul      %[temp4],  %[temp4],  %[kC1]             \n\t"
+    MUL_SHIFT_C1(temp17, temp12)
+    MUL_SHIFT_C1_IO(temp4, temp19)
     "mul      %[temp12], %[temp12], %[kC2]             \n\t"
     "lh       %[temp1],  2(%[in])                      \n\t"
     "lh       %[temp5],  10(%[in])                     \n\t"
     "lh       %[temp9],  18(%[in])                     \n\t"
     "lh       %[temp13], 26(%[in])                     \n\t"
     "sra      %[temp8],  %[temp8],  16                 \n\t"
-    "sra      %[temp17], %[temp17], 16                 \n\t"
-    "sra      %[temp4],  %[temp4],  16                 \n\t"
     "sra      %[temp12], %[temp12], 16                 \n\t"
     "lh       %[temp2],  4(%[in])                      \n\t"
     "lh       %[temp6],  12(%[in])                     \n\t"
@@ -261,49 +259,43 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
     "addu     %[temp12], %[temp0],  %[temp17]          \n\t"
     "subu     %[temp0],  %[temp0],  %[temp17]          \n\t"
     "mul      %[temp9],  %[temp5],  %[kC2]             \n\t"
-    "mul      %[temp17], %[temp13], %[kC1]             \n\t"
-    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
+    MUL_SHIFT_C1(temp17, temp13)
+    MUL_SHIFT_C1_IO(temp5, temp19)
     "mul      %[temp13], %[temp13], %[kC2]             \n\t"
     "sra      %[temp9],  %[temp9],  16                 \n\t"
-    "sra      %[temp17], %[temp17], 16                 \n\t"
     "subu     %[temp17], %[temp9],  %[temp17]          \n\t"
-    "sra      %[temp5],  %[temp5],  16                 \n\t"
     "sra      %[temp13], %[temp13], 16                 \n\t"
     "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
     "addu     %[temp13], %[temp1],  %[temp17]          \n\t"
     "subu     %[temp1],  %[temp1],  %[temp17]          \n\t"
-    "mul      %[temp17], %[temp14], %[kC1]             \n\t"
+    MUL_SHIFT_C1(temp17, temp14)
     "mul      %[temp14], %[temp14], %[kC2]             \n\t"
     "addu     %[temp9],  %[temp16], %[temp5]           \n\t"
     "subu     %[temp5],  %[temp16], %[temp5]           \n\t"
     "addu     %[temp16], %[temp2],  %[temp10]          \n\t"
     "subu     %[temp2],  %[temp2],  %[temp10]          \n\t"
     "mul      %[temp10], %[temp6],  %[kC2]             \n\t"
-    "mul      %[temp6],  %[temp6],  %[kC1]             \n\t"
-    "sra      %[temp17], %[temp17], 16                 \n\t"
+    MUL_SHIFT_C1_IO(temp6, temp19)
     "sra      %[temp14], %[temp14], 16                 \n\t"
     "sra      %[temp10], %[temp10], 16                 \n\t"
-    "sra      %[temp6],  %[temp6],  16                 \n\t"
     "subu     %[temp17], %[temp10], %[temp17]          \n\t"
     "addu     %[temp6],  %[temp6],  %[temp14]          \n\t"
     "addu     %[temp10], %[temp16], %[temp6]           \n\t"
     "subu     %[temp6],  %[temp16], %[temp6]           \n\t"
     "addu     %[temp14], %[temp2],  %[temp17]          \n\t"
     "subu     %[temp2],  %[temp2],  %[temp17]          \n\t"
-    "mul      %[temp17], %[temp15], %[kC1]             \n\t"
+    MUL_SHIFT_C1(temp17, temp15)
     "mul      %[temp15], %[temp15], %[kC2]             \n\t"
     "addu     %[temp16], %[temp3],  %[temp11]          \n\t"
     "subu     %[temp3],  %[temp3],  %[temp11]          \n\t"
     "mul      %[temp11], %[temp7],  %[kC2]             \n\t"
-    "mul      %[temp7],  %[temp7],  %[kC1]             \n\t"
+    MUL_SHIFT_C1_IO(temp7, temp19)
     "addiu    %[temp8],  %[temp8],  4                  \n\t"
     "addiu    %[temp12], %[temp12], 4                  \n\t"
     "addiu    %[temp0],  %[temp0],  4                  \n\t"
     "addiu    %[temp4],  %[temp4],  4                  \n\t"
-    "sra      %[temp17], %[temp17], 16                 \n\t"
     "sra      %[temp15], %[temp15], 16                 \n\t"
     "sra      %[temp11], %[temp11], 16                 \n\t"
-    "sra      %[temp7],  %[temp7],  16                 \n\t"
     "subu     %[temp17], %[temp11], %[temp17]          \n\t"
     "addu     %[temp7],  %[temp7],  %[temp15]          \n\t"
     "addu     %[temp15], %[temp3],  %[temp17]          \n\t"
@@ -313,48 +305,40 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
     "addu     %[temp16], %[temp8],  %[temp10]          \n\t"
     "subu     %[temp8],  %[temp8],  %[temp10]          \n\t"
     "mul      %[temp10], %[temp9],  %[kC2]             \n\t"
-    "mul      %[temp17], %[temp11], %[kC1]             \n\t"
-    "mul      %[temp9],  %[temp9],  %[kC1]             \n\t"
+    MUL_SHIFT_C1(temp17, temp11)
+    MUL_SHIFT_C1_IO(temp9, temp19)
     "mul      %[temp11], %[temp11], %[kC2]             \n\t"
     "sra      %[temp10], %[temp10], 16                 \n\t"
-    "sra      %[temp17], %[temp17], 16                 \n\t"
-    "sra      %[temp9],  %[temp9],  16                 \n\t"
     "sra      %[temp11], %[temp11], 16                 \n\t"
     "subu     %[temp17], %[temp10], %[temp17]          \n\t"
     "addu     %[temp11], %[temp9],  %[temp11]          \n\t"
     "addu     %[temp10], %[temp12], %[temp14]          \n\t"
     "subu     %[temp12], %[temp12], %[temp14]          \n\t"
     "mul      %[temp14], %[temp13], %[kC2]             \n\t"
-    "mul      %[temp9],  %[temp15], %[kC1]             \n\t"
-    "mul      %[temp13], %[temp13], %[kC1]             \n\t"
+    MUL_SHIFT_C1(temp9, temp15)
+    MUL_SHIFT_C1_IO(temp13, temp19)
     "mul      %[temp15], %[temp15], %[kC2]             \n\t"
     "sra      %[temp14], %[temp14], 16                 \n\t"
-    "sra      %[temp9],  %[temp9],  16                 \n\t"
-    "sra      %[temp13], %[temp13], 16                 \n\t"
     "sra      %[temp15], %[temp15], 16                 \n\t"
     "subu     %[temp9],  %[temp14], %[temp9]           \n\t"
     "addu     %[temp15], %[temp13], %[temp15]          \n\t"
     "addu     %[temp14], %[temp0],  %[temp2]           \n\t"
     "subu     %[temp0],  %[temp0],  %[temp2]           \n\t"
     "mul      %[temp2],  %[temp1],  %[kC2]             \n\t"
-    "mul      %[temp13], %[temp3],  %[kC1]             \n\t"
-    "mul      %[temp1],  %[temp1],  %[kC1]             \n\t"
+    MUL_SHIFT_C1(temp13, temp3)
+    MUL_SHIFT_C1_IO(temp1, temp19)
     "mul      %[temp3],  %[temp3],  %[kC2]             \n\t"
     "sra      %[temp2],  %[temp2],  16                 \n\t"
-    "sra      %[temp13], %[temp13], 16                 \n\t"
-    "sra      %[temp1],  %[temp1],  16                 \n\t"
     "sra      %[temp3],  %[temp3],  16                 \n\t"
     "subu     %[temp13], %[temp2],  %[temp13]          \n\t"
     "addu     %[temp3],  %[temp1],  %[temp3]           \n\t"
     "addu     %[temp2],  %[temp4],  %[temp6]           \n\t"
     "subu     %[temp4],  %[temp4],  %[temp6]           \n\t"
     "mul      %[temp6],  %[temp5],  %[kC2]             \n\t"
-    "mul      %[temp1],  %[temp7],  %[kC1]             \n\t"
-    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
+    MUL_SHIFT_C1(temp1, temp7)
+    MUL_SHIFT_C1_IO(temp5, temp19)
     "mul      %[temp7],  %[temp7],  %[kC2]             \n\t"
     "sra      %[temp6],  %[temp6],  16                 \n\t"
-    "sra      %[temp1],  %[temp1],  16                 \n\t"
-    "sra      %[temp5],  %[temp5],  16                 \n\t"
     "sra      %[temp7],  %[temp7],  16                 \n\t"
     "subu     %[temp1],  %[temp6],  %[temp1]           \n\t"
     "addu     %[temp7],  %[temp5],  %[temp7]           \n\t"
@@ -542,7 +526,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
       [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
       [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
       [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
-      [temp18]"=&r"(temp18)
+      [temp18]"=&r"(temp18), [temp19]"=&r"(temp19)
     : [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
     : "memory", "hi", "lo"
   );
diff --git a/src/3rdparty/libwebp/src/dsp/dec_mips_dsp_r2.c b/src/3rdparty/libwebp/src/dsp/dec_mips_dsp_r2.c
index b0936bc..0ba706a 100644
--- a/src/3rdparty/libwebp/src/dsp/dec_mips_dsp_r2.c
+++ b/src/3rdparty/libwebp/src/dsp/dec_mips_dsp_r2.c
@@ -18,10 +18,8 @@
 
 #include "src/dsp/mips_macro.h"
 
-static const int kC1 = 20091 + (1 << 16);
-static const int kC2 = 35468;
-
-#define MUL(a, b) (((a) * (b)) >> 16)
+static const int kC1 = WEBP_TRANSFORM_AC3_C1;
+static const int kC2 = WEBP_TRANSFORM_AC3_C2;
 
 static void TransformDC(const int16_t* in, uint8_t* dst) {
   int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
@@ -49,10 +47,10 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
 
 static void TransformAC3(const int16_t* in, uint8_t* dst) {
   const int a = in[0] + 4;
-  int c4 = MUL(in[4], kC2);
-  const int d4 = MUL(in[4], kC1);
-  const int c1 = MUL(in[1], kC2);
-  const int d1 = MUL(in[1], kC1);
+  int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
+  const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
+  const int c1 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
+  const int d1 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
   int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
   int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
 
@@ -479,8 +477,6 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
   FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
 }
 
-#undef MUL
-
 //------------------------------------------------------------------------------
 // Simple In-loop filtering (Paragraph 15.2)
 
diff --git a/src/3rdparty/libwebp/src/dsp/dec_msa.c b/src/3rdparty/libwebp/src/dsp/dec_msa.c
index 8090622..58d1730 100644
--- a/src/3rdparty/libwebp/src/dsp/dec_msa.c
+++ b/src/3rdparty/libwebp/src/dsp/dec_msa.c
@@ -37,8 +37,6 @@
   d1_m = d_tmp1_m + d_tmp2_m;                                    \
   BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3);   \
 }
-#define MULT1(a) ((((a) * 20091) >> 16) + (a))
-#define MULT2(a) (((a) * 35468) >> 16)
 
 static void TransformOne(const int16_t* in, uint8_t* dst) {
   v8i16 input0, input1;
@@ -124,10 +122,10 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
 
 static void TransformAC3(const int16_t* in, uint8_t* dst) {
   const int a = in[0] + 4;
-  const int c4 = MULT2(in[4]);
-  const int d4 = MULT1(in[4]);
-  const int in2 = MULT2(in[1]);
-  const int in3 = MULT1(in[1]);
+  const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
+  const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
+  const int in2 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
+  const int in3 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
   v4i32 tmp0 = { 0 };
   v4i32 out0 = __msa_fill_w(a + d4);
   v4i32 out1 = __msa_fill_w(a + c4);
diff --git a/src/3rdparty/libwebp/src/dsp/dec_neon.c b/src/3rdparty/libwebp/src/dsp/dec_neon.c
index 22784cf..83b3a1f 100644
--- a/src/3rdparty/libwebp/src/dsp/dec_neon.c
+++ b/src/3rdparty/libwebp/src/dsp/dec_neon.c
@@ -1000,8 +1000,9 @@ static void HFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
 // libwebp adds 1 << 16 to cospi8sqrt2minus1 (kC1). However, this causes the
 // same issue with kC1 and vqdmulh that we work around by down shifting kC2
 
-static const int16_t kC1 = 20091;
-static const int16_t kC2 = 17734;  // half of kC2, actually. See comment above.
+static const int16_t kC1 = WEBP_TRANSFORM_AC3_C1;
+static const int16_t kC2 =
+    WEBP_TRANSFORM_AC3_C2 / 2;  // half of kC2, actually. See comment above.
 
 #if defined(WEBP_USE_INTRINSICS)
 static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
@@ -1255,15 +1256,12 @@ static void TransformWHT_NEON(const int16_t* in, int16_t* out) {
 
 //------------------------------------------------------------------------------
 
-#define MUL(a, b) (((a) * (b)) >> 16)
 static void TransformAC3_NEON(const int16_t* in, uint8_t* dst) {
-  static const int kC1_full = 20091 + (1 << 16);
-  static const int kC2_full = 35468;
   const int16x4_t A = vld1_dup_s16(in);
-  const int16x4_t c4 = vdup_n_s16(MUL(in[4], kC2_full));
-  const int16x4_t d4 = vdup_n_s16(MUL(in[4], kC1_full));
-  const int c1 = MUL(in[1], kC2_full);
-  const int d1 = MUL(in[1], kC1_full);
+  const int16x4_t c4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
+  const int16x4_t d4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
+  const int c1 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
+  const int d1 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
   const uint64_t cd = (uint64_t)( d1 & 0xffff) <<  0 |
                       (uint64_t)( c1 & 0xffff) << 16 |
                       (uint64_t)(-c1 & 0xffff) << 32 |
@@ -1274,7 +1272,6 @@ static void TransformAC3_NEON(const int16_t* in, uint8_t* dst) {
   const int16x8_t m2_m3 = vcombine_s16(vqsub_s16(B, c4), vqsub_s16(B, d4));
   Add4x4_NEON(m0_m1, m2_m3, dst);
 }
-#undef MUL
 
 //------------------------------------------------------------------------------
 // 4x4
diff --git a/src/3rdparty/libwebp/src/dsp/dec_sse2.c b/src/3rdparty/libwebp/src/dsp/dec_sse2.c
index 01e6bcb..ff3a285 100644
--- a/src/3rdparty/libwebp/src/dsp/dec_sse2.c
+++ b/src/3rdparty/libwebp/src/dsp/dec_sse2.c
@@ -196,15 +196,13 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
 }
 
 #if (USE_TRANSFORM_AC3 == 1)
-#define MUL(a, b) (((a) * (b)) >> 16)
+
 static void TransformAC3(const int16_t* in, uint8_t* dst) {
-  static const int kC1 = 20091 + (1 << 16);
-  static const int kC2 = 35468;
   const __m128i A = _mm_set1_epi16(in[0] + 4);
-  const __m128i c4 = _mm_set1_epi16(MUL(in[4], kC2));
-  const __m128i d4 = _mm_set1_epi16(MUL(in[4], kC1));
-  const int c1 = MUL(in[1], kC2);
-  const int d1 = MUL(in[1], kC1);
+  const __m128i c4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
+  const __m128i d4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
+  const int c1 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
+  const int d1 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
   const __m128i CD = _mm_set_epi16(0, 0, 0, 0, -d1, -c1, c1, d1);
   const __m128i B = _mm_adds_epi16(A, CD);
   const __m128i m0 = _mm_adds_epi16(B, d4);
@@ -238,7 +236,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
   WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
   WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
 }
-#undef MUL
+
 #endif   // USE_TRANSFORM_AC3
 
 //------------------------------------------------------------------------------
@@ -259,15 +257,15 @@ static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
   *x = _mm_packs_epi16(lo_1, hi_1);
 }
 
-#define FLIP_SIGN_BIT2(a, b) {                                                 \
+#define FLIP_SIGN_BIT2(a, b) do {                                              \
   (a) = _mm_xor_si128(a, sign_bit);                                            \
   (b) = _mm_xor_si128(b, sign_bit);                                            \
-}
+} while (0)
 
-#define FLIP_SIGN_BIT4(a, b, c, d) {                                           \
+#define FLIP_SIGN_BIT4(a, b, c, d) do {                                        \
   FLIP_SIGN_BIT2(a, b);                                                        \
   FLIP_SIGN_BIT2(c, d);                                                        \
-}
+} while (0)
 
 // input/output is uint8_t
 static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
@@ -645,12 +643,12 @@ static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
   (m) = _mm_max_epu8(m, MM_ABS(p2, p1));                                       \
 } while (0)
 
-#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) {                             \
+#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) do {                          \
   (e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]);                        \
   (e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]);                        \
   (e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]);                        \
   (e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]);                        \
-}
+} while (0)
 
 #define LOADUV_H_EDGE(p, u, v, stride) do {                                    \
   const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]);                 \
@@ -658,18 +656,18 @@ static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
   (p) = _mm_unpacklo_epi64(U, V);                                              \
 } while (0)
 
-#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) {                        \
+#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) do {                     \
   LOADUV_H_EDGE(e1, u, v, 0 * (stride));                                       \
   LOADUV_H_EDGE(e2, u, v, 1 * (stride));                                       \
   LOADUV_H_EDGE(e3, u, v, 2 * (stride));                                       \
   LOADUV_H_EDGE(e4, u, v, 3 * (stride));                                       \
-}
+} while (0)
 
-#define STOREUV(p, u, v, stride) {                                             \
+#define STOREUV(p, u, v, stride) do {                                          \
   _mm_storel_epi64((__m128i*)&(u)[(stride)], p);                               \
   (p) = _mm_srli_si128(p, 8);                                                  \
   _mm_storel_epi64((__m128i*)&(v)[(stride)], p);                               \
-}
+} while (0)
 
 static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
                                          const __m128i* const p0,
diff --git a/src/3rdparty/libwebp/src/dsp/dsp.h b/src/3rdparty/libwebp/src/dsp/dsp.h
index d2000b8..23bc296 100644
--- a/src/3rdparty/libwebp/src/dsp/dsp.h
+++ b/src/3rdparty/libwebp/src/dsp/dsp.h
@@ -203,6 +203,11 @@ extern VP8DecIdct VP8TransformDC;
 extern VP8DecIdct VP8TransformDCUV;
 extern VP8WHT VP8TransformWHT;
 
+#define WEBP_TRANSFORM_AC3_C1 20091
+#define WEBP_TRANSFORM_AC3_C2 35468
+#define WEBP_TRANSFORM_AC3_MUL1(a) ((((a) * WEBP_TRANSFORM_AC3_C1) >> 16) + (a))
+#define WEBP_TRANSFORM_AC3_MUL2(a) (((a) * WEBP_TRANSFORM_AC3_C2) >> 16)
+
 // *dst is the destination block, with stride BPS. Boundary samples are
 // assumed accessible when needed.
 typedef void (*VP8PredFunc)(uint8_t* dst);
diff --git a/src/3rdparty/libwebp/src/dsp/enc.c b/src/3rdparty/libwebp/src/dsp/enc.c
index 2ba97ba..395ad05 100644
--- a/src/3rdparty/libwebp/src/dsp/enc.c
+++ b/src/3rdparty/libwebp/src/dsp/enc.c
@@ -109,10 +109,6 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
 #define STORE(x, y, v) \
   dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
 
-static const int kC1 = 20091 + (1 << 16);
-static const int kC2 = 35468;
-#define MUL(a, b) (((a) * (b)) >> 16)
-
 static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
                                       uint8_t* dst) {
   int C[4 * 4], *tmp;
@@ -121,8 +117,10 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
   for (i = 0; i < 4; ++i) {    // vertical pass
     const int a = in[0] + in[8];
     const int b = in[0] - in[8];
-    const int c = MUL(in[4], kC2) - MUL(in[12], kC1);
-    const int d = MUL(in[4], kC1) + MUL(in[12], kC2);
+    const int c =
+        WEBP_TRANSFORM_AC3_MUL2(in[4]) - WEBP_TRANSFORM_AC3_MUL1(in[12]);
+    const int d =
+        WEBP_TRANSFORM_AC3_MUL1(in[4]) + WEBP_TRANSFORM_AC3_MUL2(in[12]);
     tmp[0] = a + d;
     tmp[1] = b + c;
     tmp[2] = b - c;
@@ -134,10 +132,12 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
   tmp = C;
   for (i = 0; i < 4; ++i) {    // horizontal pass
     const int dc = tmp[0] + 4;
-    const int a =  dc +  tmp[8];
-    const int b =  dc -  tmp[8];
-    const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);
-    const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);
+    const int a = dc + tmp[8];
+    const int b = dc - tmp[8];
+    const int c =
+        WEBP_TRANSFORM_AC3_MUL2(tmp[4]) - WEBP_TRANSFORM_AC3_MUL1(tmp[12]);
+    const int d =
+        WEBP_TRANSFORM_AC3_MUL1(tmp[4]) + WEBP_TRANSFORM_AC3_MUL2(tmp[12]);
     STORE(0, i, a + d);
     STORE(1, i, b + c);
     STORE(2, i, b - c);
@@ -222,7 +222,6 @@ static void FTransformWHT_C(const int16_t* in, int16_t* out) {
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE
 
-#undef MUL
 #undef STORE
 
 //------------------------------------------------------------------------------
diff --git a/src/3rdparty/libwebp/src/dsp/enc_mips32.c b/src/3rdparty/libwebp/src/dsp/enc_mips32.c
index 618f0fc..50518a5 100644
--- a/src/3rdparty/libwebp/src/dsp/enc_mips32.c
+++ b/src/3rdparty/libwebp/src/dsp/enc_mips32.c
@@ -21,8 +21,8 @@
 #include "src/enc/vp8i_enc.h"
 #include "src/enc/cost_enc.h"
 
-static const int kC1 = 20091 + (1 << 16);
-static const int kC2 = 35468;
+static const int kC1 = WEBP_TRANSFORM_AC3_C1;
+static const int kC2 = WEBP_TRANSFORM_AC3_C2;
 
 // macro for one vertical pass in ITransformOne
 // MUL macro inlined
@@ -30,7 +30,7 @@ static const int kC2 = 35468;
 // A..D - offsets in bytes to load from in buffer
 // TEMP0..TEMP3 - registers for corresponding tmp elements
 // TEMP4..TEMP5 - temporary registers
-#define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3)        \
+#define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \
   "lh      %[temp16],      " #A "(%[temp20])                 \n\t"          \
   "lh      %[temp18],      " #B "(%[temp20])                 \n\t"          \
   "lh      %[temp17],      " #C "(%[temp20])                 \n\t"          \
@@ -38,12 +38,10 @@ static const int kC2 = 35468;
   "addu    %[" #TEMP4 "],    %[temp16],      %[temp18]       \n\t"          \
   "subu    %[temp16],      %[temp16],      %[temp18]         \n\t"          \
   "mul     %[" #TEMP0 "],    %[temp17],      %[kC2]          \n\t"          \
-  "mul     %[temp18],      %[temp19],      %[kC1]            \n\t"          \
-  "mul     %[temp17],      %[temp17],      %[kC1]            \n\t"          \
+  MUL_SHIFT_C1_IO(temp17, temp18)                                           \
+  MUL_SHIFT_C1(temp18, temp19)                                              \
   "mul     %[temp19],      %[temp19],      %[kC2]            \n\t"          \
   "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    16            \n\n"          \
-  "sra     %[temp18],      %[temp18],      16                \n\n"          \
-  "sra     %[temp17],      %[temp17],      16                \n\n"          \
   "sra     %[temp19],      %[temp19],      16                \n\n"          \
   "subu    %[" #TEMP2 "],    %[" #TEMP0 "],    %[temp18]     \n\t"          \
   "addu    %[" #TEMP3 "],    %[temp17],      %[temp19]       \n\t"          \
@@ -58,17 +56,15 @@ static const int kC2 = 35468;
 // temp0..temp15 holds tmp[0]..tmp[15]
 // A - offset in bytes to load from ref and store to dst buffer
 // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
-#define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12)                       \
+#define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12) \
   "addiu   %[" #TEMP0 "],    %[" #TEMP0 "],    4               \n\t"          \
   "addu    %[temp16],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"          \
   "subu    %[temp17],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"          \
   "mul     %[" #TEMP0 "],    %[" #TEMP4 "],    %[kC2]          \n\t"          \
-  "mul     %[" #TEMP8 "],    %[" #TEMP12 "],   %[kC1]          \n\t"          \
-  "mul     %[" #TEMP4 "],    %[" #TEMP4 "],    %[kC1]          \n\t"          \
+  MUL_SHIFT_C1_IO(TEMP4, TEMP8)                                               \
+  MUL_SHIFT_C1(TEMP8, TEMP12)                                                 \
   "mul     %[" #TEMP12 "],   %[" #TEMP12 "],   %[kC2]          \n\t"          \
   "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    16              \n\t"          \
-  "sra     %[" #TEMP8 "],    %[" #TEMP8 "],    16              \n\t"          \
-  "sra     %[" #TEMP4 "],    %[" #TEMP4 "],    16              \n\t"          \
   "sra     %[" #TEMP12 "],   %[" #TEMP12 "],   16              \n\t"          \
   "subu    %[temp18],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"          \
   "addu    %[temp19],      %[" #TEMP4 "],    %[" #TEMP12 "]    \n\t"          \
diff --git a/src/3rdparty/libwebp/src/dsp/enc_mips_dsp_r2.c b/src/3rdparty/libwebp/src/dsp/enc_mips_dsp_r2.c
index 9ddd895..e1431f3 100644
--- a/src/3rdparty/libwebp/src/dsp/enc_mips_dsp_r2.c
+++ b/src/3rdparty/libwebp/src/dsp/enc_mips_dsp_r2.c
@@ -20,8 +20,8 @@
 #include "src/enc/cost_enc.h"
 #include "src/enc/vp8i_enc.h"
 
-static const int kC1 = 20091 + (1 << 16);
-static const int kC2 = 35468;
+static const int kC1 = WEBP_TRANSFORM_AC3_C1;
+static const int kC2 = WEBP_TRANSFORM_AC3_C2;
 
 // O - output
 // I - input (macro doesn't change it)
diff --git a/src/3rdparty/libwebp/src/dsp/enc_neon.c b/src/3rdparty/libwebp/src/dsp/enc_neon.c
index 7148003..6f641c9 100644
--- a/src/3rdparty/libwebp/src/dsp/enc_neon.c
+++ b/src/3rdparty/libwebp/src/dsp/enc_neon.c
@@ -27,8 +27,9 @@
 // This code is pretty much the same as TransformOne in the dec_neon.c, except
 // for subtraction to *ref. See the comments there for algorithmic explanations.
 
-static const int16_t kC1 = 20091;
-static const int16_t kC2 = 17734;  // half of kC2, actually. See comment above.
+static const int16_t kC1 = WEBP_TRANSFORM_AC3_C1;
+static const int16_t kC2 =
+    WEBP_TRANSFORM_AC3_C2 / 2;  // half of kC2, actually. See comment above.
 
 // This code works but is *slower* than the inlined-asm version below
 // (with gcc-4.6). So we disable it for now. Later, it'll be conditional to
diff --git a/src/3rdparty/libwebp/src/dsp/filters.c b/src/3rdparty/libwebp/src/dsp/filters.c
index 85eee50..c9232ff 100644
--- a/src/3rdparty/libwebp/src/dsp/filters.c
+++ b/src/3rdparty/libwebp/src/dsp/filters.c
@@ -19,14 +19,16 @@
 //------------------------------------------------------------------------------
 // Helpful macro.
 
-# define SANITY_CHECK(in, out)                                                 \
-  assert((in) != NULL);                                                        \
-  assert((out) != NULL);                                                       \
-  assert(width > 0);                                                           \
-  assert(height > 0);                                                          \
-  assert(stride >= width);                                                     \
-  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
-  (void)height;  // Silence unused warning.
+#define DCHECK(in, out)                                                        \
+  do {                                                                         \
+    assert((in) != NULL);                                                      \
+    assert((out) != NULL);                                                     \
+    assert(width > 0);                                                         \
+    assert(height > 0);                                                        \
+    assert(stride >= width);                                                   \
+    assert(row >= 0 && num_rows > 0 && row + num_rows <= height);              \
+    (void)height;  /* Silence unused warning. */                               \
+  } while (0)
 
 #if !WEBP_NEON_OMIT_C_CODE
 static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
@@ -49,7 +51,7 @@ static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
   preds = inverse ? out : in;
@@ -86,7 +88,7 @@ static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
   preds = inverse ? out : in;
@@ -131,7 +133,7 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
   preds = inverse ? out : in;
@@ -165,7 +167,7 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
 }
 #endif  // !WEBP_NEON_OMIT_C_CODE
 
-#undef SANITY_CHECK
+#undef DCHECK
 
 //------------------------------------------------------------------------------
 
@@ -189,6 +191,12 @@ static void GradientFilter_C(const uint8_t* data, int width, int height,
 
 //------------------------------------------------------------------------------
 
+static void NoneUnfilter_C(const uint8_t* prev, const uint8_t* in,
+                           uint8_t* out, int width) {
+  (void)prev;
+  if (out != in) memcpy(out, in, width * sizeof(*out));
+}
+
 static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
                                  uint8_t* out, int width) {
   uint8_t pred = (prev == NULL) ? 0 : prev[0];
@@ -240,7 +248,7 @@ extern void VP8FiltersInitNEON(void);
 extern void VP8FiltersInitSSE2(void);
 
 WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
-  WebPUnfilters[WEBP_FILTER_NONE] = NULL;
+  WebPUnfilters[WEBP_FILTER_NONE] = NoneUnfilter_C;
 #if !WEBP_NEON_OMIT_C_CODE
   WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
   WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C;
@@ -279,6 +287,7 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
   }
 #endif
 
+  assert(WebPUnfilters[WEBP_FILTER_NONE] != NULL);
   assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL);
   assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL);
   assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL);
diff --git a/src/3rdparty/libwebp/src/dsp/filters_mips_dsp_r2.c b/src/3rdparty/libwebp/src/dsp/filters_mips_dsp_r2.c
index 9382b12..eca866f 100644
--- a/src/3rdparty/libwebp/src/dsp/filters_mips_dsp_r2.c
+++ b/src/3rdparty/libwebp/src/dsp/filters_mips_dsp_r2.c
@@ -24,14 +24,16 @@
 //------------------------------------------------------------------------------
 // Helpful macro.
 
-# define SANITY_CHECK(in, out)                                                 \
-  assert(in != NULL);                                                          \
-  assert(out != NULL);                                                         \
-  assert(width > 0);                                                           \
-  assert(height > 0);                                                          \
-  assert(stride >= width);                                                     \
-  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
-  (void)height;  // Silence unused warning.
+#define DCHECK(in, out)                                                        \
+  do {                                                                         \
+    assert(in != NULL);                                                        \
+    assert(out != NULL);                                                       \
+    assert(width > 0);                                                         \
+    assert(height > 0);                                                        \
+    assert(stride >= width);                                                   \
+    assert(row >= 0 && num_rows > 0 && row + num_rows <= height);              \
+    (void)height;  /* Silence unused warning. */                               \
+  } while (0)
 
 #define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) do {                        \
     const uint8_t* psrc = (uint8_t*)(SRC);                                     \
@@ -200,7 +202,7 @@ static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
   preds = in;
@@ -248,7 +250,7 @@ static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
   preds = in;
@@ -316,7 +318,7 @@ static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
   const uint8_t* preds;
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
   preds = in;
@@ -378,7 +380,7 @@ static void GradientUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
 #undef DO_PREDICT_LINE_VERTICAL
 #undef PREDICT_LINE_ONE_PASS
 #undef DO_PREDICT_LINE
-#undef SANITY_CHECK
+#undef DCHECK
 
 //------------------------------------------------------------------------------
 // Entry point
diff --git a/src/3rdparty/libwebp/src/dsp/filters_msa.c b/src/3rdparty/libwebp/src/dsp/filters_msa.c
index 14c437d..33a1b20 100644
--- a/src/3rdparty/libwebp/src/dsp/filters_msa.c
+++ b/src/3rdparty/libwebp/src/dsp/filters_msa.c
@@ -56,12 +56,14 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
 //------------------------------------------------------------------------------
 // Helpful macro.
 
-#define SANITY_CHECK(in, out)  \
-  assert(in != NULL);          \
-  assert(out != NULL);         \
-  assert(width > 0);           \
-  assert(height > 0);          \
-  assert(stride >= width);
+#define DCHECK(in, out)        \
+  do {                         \
+    assert(in != NULL);        \
+    assert(out != NULL);       \
+    assert(width > 0);         \
+    assert(height > 0);        \
+    assert(stride >= width);   \
+  } while (0)
 
 //------------------------------------------------------------------------------
 // Horrizontal filter
@@ -72,7 +74,7 @@ static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
   const uint8_t* in = data;
   uint8_t* out = filtered_data;
   int row = 1;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
 
   // Leftmost pixel is the same as input for topmost scanline.
   out[0] = in[0];
@@ -135,7 +137,7 @@ static void GradientFilter_MSA(const uint8_t* data, int width, int height,
   const uint8_t* preds = data;
   uint8_t* out = filtered_data;
   int row = 1;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
 
   // left prediction for top scan-line
   out[0] = in[0];
@@ -163,7 +165,7 @@ static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
   const uint8_t* preds = data;
   uint8_t* out = filtered_data;
   int row = 1;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
 
   // Very first top-left pixel is copied.
   out[0] = in[0];
@@ -182,7 +184,7 @@ static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
   }
 }
 
-#undef SANITY_CHECK
+#undef DCHECK
 
 //------------------------------------------------------------------------------
 // Entry point
diff --git a/src/3rdparty/libwebp/src/dsp/filters_neon.c b/src/3rdparty/libwebp/src/dsp/filters_neon.c
index 3e6a578..b49e515 100644
--- a/src/3rdparty/libwebp/src/dsp/filters_neon.c
+++ b/src/3rdparty/libwebp/src/dsp/filters_neon.c
@@ -21,14 +21,16 @@
 //------------------------------------------------------------------------------
 // Helpful macros.
 
-# define SANITY_CHECK(in, out)                                                 \
-  assert(in != NULL);                                                          \
-  assert(out != NULL);                                                         \
-  assert(width > 0);                                                           \
-  assert(height > 0);                                                          \
-  assert(stride >= width);                                                     \
-  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
-  (void)height;  // Silence unused warning.
+#define DCHECK(in, out)                                                        \
+  do {                                                                         \
+    assert(in != NULL);                                                        \
+    assert(out != NULL);                                                       \
+    assert(width > 0);                                                         \
+    assert(height > 0);                                                        \
+    assert(stride >= width);                                                   \
+    assert(row >= 0 && num_rows > 0 && row + num_rows <= height);              \
+    (void)height;  /* Silence unused warning. */                               \
+  } while (0)
 
 // load eight u8 and widen to s16
 #define U8_TO_S16(A) vreinterpretq_s16_u16(vmovl_u8(A))
@@ -71,7 +73,7 @@ static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* in,
                                                 uint8_t* out) {
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
 
@@ -110,7 +112,7 @@ static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
                                               uint8_t* out) {
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
 
@@ -172,7 +174,7 @@ static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
                                               uint8_t* out) {
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
 
@@ -201,7 +203,7 @@ static void GradientFilter_NEON(const uint8_t* data, int width, int height,
                         filtered_data);
 }
 
-#undef SANITY_CHECK
+#undef DCHECK
 
 //------------------------------------------------------------------------------
 // Inverse transforms
diff --git a/src/3rdparty/libwebp/src/dsp/filters_sse2.c b/src/3rdparty/libwebp/src/dsp/filters_sse2.c
index 5c33ec1..bb4b5d5 100644
--- a/src/3rdparty/libwebp/src/dsp/filters_sse2.c
+++ b/src/3rdparty/libwebp/src/dsp/filters_sse2.c
@@ -23,14 +23,16 @@
 //------------------------------------------------------------------------------
 // Helpful macro.
 
-# define SANITY_CHECK(in, out)                                                 \
-  assert((in) != NULL);                                                        \
-  assert((out) != NULL);                                                       \
-  assert(width > 0);                                                           \
-  assert(height > 0);                                                          \
-  assert(stride >= width);                                                     \
-  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
-  (void)height;  // Silence unused warning.
+#define DCHECK(in, out)                                                        \
+  do {                                                                         \
+    assert((in) != NULL);                                                      \
+    assert((out) != NULL);                                                     \
+    assert(width > 0);                                                         \
+    assert(height > 0);                                                        \
+    assert(stride >= width);                                                   \
+    assert(row >= 0 && num_rows > 0 && row + num_rows <= height);              \
+    (void)height;  /* Silence unused warning. */                               \
+  } while (0)
 
 static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
                                 uint8_t* dst, int length) {
@@ -78,7 +80,7 @@ static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
                                                 uint8_t* out) {
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
 
@@ -111,7 +113,7 @@ static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
                                               uint8_t* out) {
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
 
@@ -174,7 +176,7 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
                                               uint8_t* out) {
   const size_t start_offset = row * stride;
   const int last_row = row + num_rows;
-  SANITY_CHECK(in, out);
+  DCHECK(in, out);
   in += start_offset;
   out += start_offset;
 
@@ -197,7 +199,7 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
   }
 }
 
-#undef SANITY_CHECK
+#undef DCHECK
 
 //------------------------------------------------------------------------------
 
diff --git a/src/3rdparty/libwebp/src/dsp/lossless.h b/src/3rdparty/libwebp/src/dsp/lossless.h
index de60d95..0bf10a1 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless.h
+++ b/src/3rdparty/libwebp/src/dsp/lossless.h
@@ -182,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
 // -----------------------------------------------------------------------------
 // Huffman-cost related functions.
 
-typedef float (*VP8LCostFunc)(const uint32_t* population, int length);
-typedef float (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
-                                      int length);
+typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
+typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
+                                         int length);
 typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
                                                 const int Y[256]);
 
diff --git a/src/3rdparty/libwebp/src/dsp/lossless_common.h b/src/3rdparty/libwebp/src/dsp/lossless_common.h
index 6a2f736..d6139b2 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless_common.h
+++ b/src/3rdparty/libwebp/src/dsp/lossless_common.h
@@ -16,9 +16,9 @@
 #ifndef WEBP_DSP_LOSSLESS_COMMON_H_
 #define WEBP_DSP_LOSSLESS_COMMON_H_
 
-#include "src/webp/types.h"
-
+#include "src/dsp/cpu.h"
 #include "src/utils/utils.h"
+#include "src/webp/types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -166,7 +166,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
 }
 
 //------------------------------------------------------------------------------
-// Transform-related functions use din both encoding and decoding.
+// Transform-related functions used in both encoding and decoding.
 
 // Macros used to create a batch predictor that iteratively uses a
 // one-pixel predictor.
diff --git a/src/3rdparty/libwebp/src/dsp/lossless_enc.c b/src/3rdparty/libwebp/src/dsp/lossless_enc.c
index cde1280..997d56c 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless_enc.c
+++ b/src/3rdparty/libwebp/src/dsp/lossless_enc.c
@@ -636,20 +636,25 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
 
 //------------------------------------------------------------------------------
 
-static float ExtraCost_C(const uint32_t* population, int length) {
+static uint32_t ExtraCost_C(const uint32_t* population, int length) {
   int i;
-  float cost = 0.f;
-  for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
+  uint32_t cost = population[4] + population[5];
+  assert(length % 2 == 0);
+  for (i = 2; i < length / 2 - 1; ++i) {
+    cost += i * (population[2 * i + 2] + population[2 * i + 3]);
+  }
   return cost;
 }
 
-static float ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
-                                  int length) {
+static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
+                                    int length) {
   int i;
-  float cost = 0.f;
-  for (i = 2; i < length - 2; ++i) {
-    const int xy = X[i + 2] + Y[i + 2];
-    cost += (i >> 1) * xy;
+  uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
+  assert(length % 2 == 0);
+  for (i = 2; i < length / 2 - 1; ++i) {
+    const int xy0 = X[2 * i + 2] + Y[2 * i + 2];
+    const int xy1 = X[2 * i + 3] + Y[2 * i + 3];
+    cost += i * (xy0 + xy1);
   }
   return cost;
 }
diff --git a/src/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c b/src/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c
index 639f786..e10f12d 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c
+++ b/src/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c
@@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
 //     cost += i * *(pop + 1);
 //     pop += 2;
 //   }
-//   return (float)cost;
-static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
+//   return cost;
+static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
   int i, temp0, temp1;
   const uint32_t* pop = &population[4];
   const uint32_t* const LoopEnd = &population[length];
@@ -130,7 +130,7 @@ static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
     : "memory", "hi", "lo"
   );
 
-  return (float)((int64_t)temp0 << 32 | temp1);
+  return ((int64_t)temp0 << 32 | temp1);
 }
 
 // C version of this function:
@@ -148,9 +148,9 @@ static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
 //     pX += 2;
 //     pY += 2;
 //   }
-//   return (float)cost;
-static float ExtraCostCombined_MIPS32(const uint32_t* const X,
-                                      const uint32_t* const Y, int length) {
+//   return cost;
+static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
+                                         const uint32_t* const Y, int length) {
   int i, temp0, temp1, temp2, temp3;
   const uint32_t* pX = &X[4];
   const uint32_t* pY = &Y[4];
@@ -183,7 +183,7 @@ static float ExtraCostCombined_MIPS32(const uint32_t* const X,
     : "memory", "hi", "lo"
   );
 
-  return (float)((int64_t)temp0 << 32 | temp1);
+  return ((int64_t)temp0 << 32 | temp1);
 }
 
 #define HUFFMAN_COST_PASS                                 \
diff --git a/src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c b/src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
index ad358a6..7ab83c2 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
+++ b/src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
@@ -18,8 +18,53 @@
 #include <smmintrin.h>
 #include "src/dsp/lossless.h"
 
-// For sign-extended multiplying constants, pre-shifted by 5:
-#define CST_5b(X)  (((int16_t)((uint16_t)(X) << 8)) >> 5)
+//------------------------------------------------------------------------------
+// Cost operations.
+
+static WEBP_INLINE uint32_t HorizontalSum_SSE41(__m128i cost) {
+  cost = _mm_add_epi32(cost, _mm_srli_si128(cost, 8));
+  cost = _mm_add_epi32(cost, _mm_srli_si128(cost, 4));
+  return _mm_cvtsi128_si32(cost);
+}
+
+static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
+  int i;
+  __m128i cost = _mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]);
+  assert(length % 8 == 0);
+
+  for (i = 8; i + 8 <= length; i += 8) {
+    const int j = (i - 2) >> 1;
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
+    const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j);
+    const __m128i a2 = _mm_hadd_epi32(a0, a1);
+    const __m128i mul = _mm_mullo_epi32(a2, w);
+    cost = _mm_add_epi32(mul, cost);
+  }
+  return HorizontalSum_SSE41(cost);
+}
+
+static uint32_t ExtraCostCombined_SSE41(const uint32_t* const a,
+                                        const uint32_t* const b, int length) {
+  int i;
+  __m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
+                               _mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
+  assert(length % 8 == 0);
+
+  for (i = 8; i + 8 <= length; i += 8) {
+    const int j = (i - 2) >> 1;
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
+    const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
+    const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
+    const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j);
+    const __m128i a2 = _mm_hadd_epi32(a0, a1);
+    const __m128i b2 = _mm_hadd_epi32(b0, b1);
+    const __m128i mul = _mm_mullo_epi32(_mm_add_epi32(a2, b2), w);
+    cost = _mm_add_epi32(mul, cost);
+  }
+  return HorizontalSum_SSE41(cost);
+}
 
 //------------------------------------------------------------------------------
 // Subtract-Green Transform
@@ -44,6 +89,9 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
 //------------------------------------------------------------------------------
 // Color Transform
 
+// For sign-extended multiplying constants, pre-shifted by 5:
+#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
+
 #define MK_CST_16(HI, LO) \
   _mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
 
@@ -143,6 +191,8 @@ static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
 extern void VP8LEncDspInitSSE41(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
+  VP8LExtraCost = ExtraCost_SSE41;
+  VP8LExtraCostCombined = ExtraCostCombined_SSE41;
   VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
   VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41;
   VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41;
diff --git a/src/3rdparty/libwebp/src/dsp/lossless_neon.c b/src/3rdparty/libwebp/src/dsp/lossless_neon.c
index ddc9b61..e9960db 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless_neon.c
+++ b/src/3rdparty/libwebp/src/dsp/lossless_neon.c
@@ -146,9 +146,9 @@ static void ConvertBGRAToRGB_NEON(const uint32_t* src,
 #define LOAD_U32P_AS_U8(IN) vreinterpret_u8_u32(vld1_u32((IN)))
 #define LOADQ_U32_AS_U8(IN) vreinterpretq_u8_u32(vdupq_n_u32((IN)))
 #define LOADQ_U32P_AS_U8(IN) vreinterpretq_u8_u32(vld1q_u32((IN)))
-#define GET_U8_AS_U32(IN) vget_lane_u32(vreinterpret_u32_u8((IN)), 0);
-#define GETQ_U8_AS_U32(IN) vgetq_lane_u32(vreinterpretq_u32_u8((IN)), 0);
-#define STOREQ_U8_AS_U32P(OUT, IN) vst1q_u32((OUT), vreinterpretq_u32_u8((IN)));
+#define GET_U8_AS_U32(IN) vget_lane_u32(vreinterpret_u32_u8((IN)), 0)
+#define GETQ_U8_AS_U32(IN) vgetq_lane_u32(vreinterpretq_u32_u8((IN)), 0)
+#define STOREQ_U8_AS_U32P(OUT, IN) vst1q_u32((OUT), vreinterpretq_u32_u8((IN)))
 #define ROTATE32_LEFT(L) vextq_u8((L), (L), 12)    // D|C|B|A -> C|B|A|D
 
 static WEBP_INLINE uint8x8_t Average2_u8_NEON(uint32_t a0, uint32_t a1) {
diff --git a/src/3rdparty/libwebp/src/dsp/mips_macro.h b/src/3rdparty/libwebp/src/dsp/mips_macro.h
index 44aba9b..e810d3d 100644
--- a/src/3rdparty/libwebp/src/dsp/mips_macro.h
+++ b/src/3rdparty/libwebp/src/dsp/mips_macro.h
@@ -45,28 +45,38 @@
   "ulw    %[" #O2 "],    " #I3 "+" XSTR(I9) "*" #I7 "(%[" #I0 "])       \n\t"  \
   "ulw    %[" #O3 "],    " #I4 "+" XSTR(I9) "*" #I8 "(%[" #I0 "])       \n\t"
 
+
+// O - output
+// I - input (macro doesn't change it so it should be different from I)
+#define MUL_SHIFT_C1(O, I)                                                     \
+  "mul              %[" #O "],    %[" #I "],    %[kC1]        \n\t"            \
+  "sra              %[" #O "],    %[" #O "],    16            \n\t"            \
+  "addu             %[" #O "],    %[" #O "],    %[" #I "]     \n\t"
+#define MUL_SHIFT_C2(O, I) \
+  "mul              %[" #O "],    %[" #I "],    %[kC2]        \n\t"            \
+  "sra              %[" #O "],    %[" #O "],    16            \n\t"
+
+// Same as #define MUL_SHIFT_C1 but I and O are the same. It stores the
+// intermediary result in TMP.
+#define MUL_SHIFT_C1_IO(IO, TMP)                                               \
+  "mul              %[" #TMP "],  %[" #IO  "], %[kC1]     \n\t"                \
+  "sra              %[" #TMP "],  %[" #TMP "], 16         \n\t"                \
+  "addu             %[" #IO  "],  %[" #TMP "], %[" #IO "] \n\t"
+
 // O - output
 // IO - input/output
 // I - input (macro doesn't change it)
 #define MUL_SHIFT_SUM(O0, O1, O2, O3, O4, O5, O6, O7,                          \
                       IO0, IO1, IO2, IO3,                                      \
                       I0, I1, I2, I3, I4, I5, I6, I7)                          \
-  "mul              %[" #O0 "],   %[" #I0 "],   %[kC2]        \n\t"            \
-  "mul              %[" #O1 "],   %[" #I0 "],   %[kC1]        \n\t"            \
-  "mul              %[" #O2 "],   %[" #I1 "],   %[kC2]        \n\t"            \
-  "mul              %[" #O3 "],   %[" #I1 "],   %[kC1]        \n\t"            \
-  "mul              %[" #O4 "],   %[" #I2 "],   %[kC2]        \n\t"            \
-  "mul              %[" #O5 "],   %[" #I2 "],   %[kC1]        \n\t"            \
-  "mul              %[" #O6 "],   %[" #I3 "],   %[kC2]        \n\t"            \
-  "mul              %[" #O7 "],   %[" #I3 "],   %[kC1]        \n\t"            \
-  "sra              %[" #O0 "],   %[" #O0 "],   16            \n\t"            \
-  "sra              %[" #O1 "],   %[" #O1 "],   16            \n\t"            \
-  "sra              %[" #O2 "],   %[" #O2 "],   16            \n\t"            \
-  "sra              %[" #O3 "],   %[" #O3 "],   16            \n\t"            \
-  "sra              %[" #O4 "],   %[" #O4 "],   16            \n\t"            \
-  "sra              %[" #O5 "],   %[" #O5 "],   16            \n\t"            \
-  "sra              %[" #O6 "],   %[" #O6 "],   16            \n\t"            \
-  "sra              %[" #O7 "],   %[" #O7 "],   16            \n\t"            \
+  MUL_SHIFT_C2(O0, I0)                                                         \
+  MUL_SHIFT_C1(O1, I0)                                                         \
+  MUL_SHIFT_C2(O2, I1)                                                         \
+  MUL_SHIFT_C1(O3, I1)                                                         \
+  MUL_SHIFT_C2(O4, I2)                                                         \
+  MUL_SHIFT_C1(O5, I2)                                                         \
+  MUL_SHIFT_C2(O6, I3)                                                         \
+  MUL_SHIFT_C1(O7, I3)                                                         \
   "addu             %[" #IO0 "],  %[" #IO0 "],  %[" #I4 "]    \n\t"            \
   "addu             %[" #IO1 "],  %[" #IO1 "],  %[" #I5 "]    \n\t"            \
   "subu             %[" #IO2 "],  %[" #IO2 "],  %[" #I6 "]    \n\t"            \
diff --git a/src/3rdparty/libwebp/src/dsp/msa_macro.h b/src/3rdparty/libwebp/src/dsp/msa_macro.h
index 51f6c64..90adbbc 100644
--- a/src/3rdparty/libwebp/src/dsp/msa_macro.h
+++ b/src/3rdparty/libwebp/src/dsp/msa_macro.h
@@ -73,27 +73,25 @@
 #define ST_UW(...) ST_W(v4u32, __VA_ARGS__)
 #define ST_SW(...) ST_W(v4i32, __VA_ARGS__)
 
-#define MSA_LOAD_FUNC(TYPE, INSTR, FUNC_NAME)             \
-  static inline TYPE FUNC_NAME(const void* const psrc) {  \
-    const uint8_t* const psrc_m = (const uint8_t*)psrc;   \
-    TYPE val_m;                                           \
-    asm volatile (                                        \
-      "" #INSTR " %[val_m], %[psrc_m]  \n\t"              \
-      : [val_m] "=r" (val_m)                              \
-      : [psrc_m] "m" (*psrc_m));                          \
-    return val_m;                                         \
+#define MSA_LOAD_FUNC(TYPE, INSTR, FUNC_NAME)               \
+  static inline TYPE FUNC_NAME(const void* const psrc) {    \
+    const uint8_t* const psrc_m = (const uint8_t*)psrc;     \
+    TYPE val_m;                                             \
+    __asm__ volatile("" #INSTR " %[val_m], %[psrc_m]  \n\t" \
+                     : [val_m] "=r"(val_m)                  \
+                     : [psrc_m] "m"(*psrc_m));              \
+    return val_m;                                           \
   }
 
 #define MSA_LOAD(psrc, FUNC_NAME)  FUNC_NAME(psrc)
 
-#define MSA_STORE_FUNC(TYPE, INSTR, FUNC_NAME)               \
-  static inline void FUNC_NAME(TYPE val, void* const pdst) { \
-    uint8_t* const pdst_m = (uint8_t*)pdst;                  \
-    TYPE val_m = val;                                        \
-    asm volatile (                                           \
-      " " #INSTR "  %[val_m],  %[pdst_m]  \n\t"              \
-      : [pdst_m] "=m" (*pdst_m)                              \
-      : [val_m] "r" (val_m));                                \
+#define MSA_STORE_FUNC(TYPE, INSTR, FUNC_NAME)                 \
+  static inline void FUNC_NAME(TYPE val, void* const pdst) {   \
+    uint8_t* const pdst_m = (uint8_t*)pdst;                    \
+    TYPE val_m = val;                                          \
+    __asm__ volatile(" " #INSTR "  %[val_m],  %[pdst_m]  \n\t" \
+                     : [pdst_m] "=m"(*pdst_m)                  \
+                     : [val_m] "r"(val_m));                    \
   }
 
 #define MSA_STORE(val, pdst, FUNC_NAME)  FUNC_NAME(val, pdst)
diff --git a/src/3rdparty/libwebp/src/dsp/quant.h b/src/3rdparty/libwebp/src/dsp/quant.h
index bf7734c..dcbc11c 100644
--- a/src/3rdparty/libwebp/src/dsp/quant.h
+++ b/src/3rdparty/libwebp/src/dsp/quant.h
@@ -36,8 +36,9 @@ static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
                               int thresh) {
   const int16x8_t tst_ones = vdupq_n_s16(-1);
   uint32x4_t sum = vdupq_n_u32(0);
+  int i;
 
-  for (int i = 0; i < num_blocks; ++i) {
+  for (i = 0; i < num_blocks; ++i) {
     // Set DC to zero.
     const int16x8_t a_0 = vsetq_lane_s16(0, vld1q_s16(levels), 0);
     const int16x8_t a_1 = vld1q_s16(levels + 8);
diff --git a/src/3rdparty/libwebp/src/dsp/rescaler_neon.c b/src/3rdparty/libwebp/src/dsp/rescaler_neon.c
index b976a85..957a92d 100644
--- a/src/3rdparty/libwebp/src/dsp/rescaler_neon.c
+++ b/src/3rdparty/libwebp/src/dsp/rescaler_neon.c
@@ -32,7 +32,7 @@
 #define STORE_32x8(SRC0, SRC1, DST) do {                              \
     vst1q_u32((DST) + 0, SRC0);                                       \
     vst1q_u32((DST) + 4, SRC1);                                       \
-} while (0);
+} while (0)
 
 #if (WEBP_RESCALER_RFIX == 32)
 #define MAKE_HALF_CST(C) vdupq_n_s32((int32_t)((C) >> 1))
diff --git a/src/3rdparty/libwebp/src/dsp/upsampling_sse2.c b/src/3rdparty/libwebp/src/dsp/upsampling_sse2.c
index 08b6d0b..77b4f72 100644
--- a/src/3rdparty/libwebp/src/dsp/upsampling_sse2.c
+++ b/src/3rdparty/libwebp/src/dsp/upsampling_sse2.c
@@ -58,7 +58,7 @@
 } while (0)
 
 // Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
-#define UPSAMPLE_32PIXELS(r1, r2, out) {                                       \
+#define UPSAMPLE_32PIXELS(r1, r2, out) do {                                    \
   const __m128i one = _mm_set1_epi8(1);                                        \
   const __m128i a = _mm_loadu_si128((const __m128i*)&(r1)[0]);                 \
   const __m128i b = _mm_loadu_si128((const __m128i*)&(r1)[1]);                 \
@@ -85,7 +85,7 @@
   /* pack the alternate pixels */                                              \
   PACK_AND_STORE(a, b, diag1, diag2, (out) +      0);  /* store top */         \
   PACK_AND_STORE(c, d, diag2, diag1, (out) + 2 * 32);  /* store bottom */      \
-}
+} while (0)
 
 // Turn the macro into a function for reducing code-size when non-critical
 static void Upsample32Pixels_SSE2(const uint8_t r1[], const uint8_t r2[],
@@ -229,11 +229,11 @@ static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
   }                                                                            \
 }
 
-YUV444_FUNC(Yuv444ToRgba_SSE2, VP8YuvToRgba32_SSE2, WebPYuv444ToRgba_C, 4);
-YUV444_FUNC(Yuv444ToBgra_SSE2, VP8YuvToBgra32_SSE2, WebPYuv444ToBgra_C, 4);
+YUV444_FUNC(Yuv444ToRgba_SSE2, VP8YuvToRgba32_SSE2, WebPYuv444ToRgba_C, 4)
+YUV444_FUNC(Yuv444ToBgra_SSE2, VP8YuvToBgra32_SSE2, WebPYuv444ToBgra_C, 4)
 #if !defined(WEBP_REDUCE_CSP)
-YUV444_FUNC(Yuv444ToRgb_SSE2, VP8YuvToRgb32_SSE2, WebPYuv444ToRgb_C, 3);
-YUV444_FUNC(Yuv444ToBgr_SSE2, VP8YuvToBgr32_SSE2, WebPYuv444ToBgr_C, 3);
+YUV444_FUNC(Yuv444ToRgb_SSE2, VP8YuvToRgb32_SSE2, WebPYuv444ToRgb_C, 3)
+YUV444_FUNC(Yuv444ToBgr_SSE2, VP8YuvToBgr32_SSE2, WebPYuv444ToBgr_C, 3)
 YUV444_FUNC(Yuv444ToArgb_SSE2, VP8YuvToArgb32_SSE2, WebPYuv444ToArgb_C, 4)
 YUV444_FUNC(Yuv444ToRgba4444_SSE2, VP8YuvToRgba444432_SSE2, \
             WebPYuv444ToRgba4444_C, 2)
diff --git a/src/3rdparty/libwebp/src/dsp/upsampling_sse41.c b/src/3rdparty/libwebp/src/dsp/upsampling_sse41.c
index 648d456..e38c88d 100644
--- a/src/3rdparty/libwebp/src/dsp/upsampling_sse41.c
+++ b/src/3rdparty/libwebp/src/dsp/upsampling_sse41.c
@@ -60,7 +60,7 @@
 } while (0)
 
 // Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
-#define UPSAMPLE_32PIXELS(r1, r2, out) {                                       \
+#define UPSAMPLE_32PIXELS(r1, r2, out) do {                                    \
   const __m128i one = _mm_set1_epi8(1);                                        \
   const __m128i a = _mm_loadu_si128((const __m128i*)&(r1)[0]);                 \
   const __m128i b = _mm_loadu_si128((const __m128i*)&(r1)[1]);                 \
@@ -87,7 +87,7 @@
   /* pack the alternate pixels */                                              \
   PACK_AND_STORE(a, b, diag1, diag2, (out) +      0);  /* store top */         \
   PACK_AND_STORE(c, d, diag2, diag1, (out) + 2 * 32);  /* store bottom */      \
-}
+} while (0)
 
 // Turn the macro into a function for reducing code-size when non-critical
 static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
@@ -217,8 +217,8 @@ static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
 }
 
 #if !defined(WEBP_REDUCE_CSP)
-YUV444_FUNC(Yuv444ToRgb_SSE41, VP8YuvToRgb32_SSE41, WebPYuv444ToRgb_C, 3);
-YUV444_FUNC(Yuv444ToBgr_SSE41, VP8YuvToBgr32_SSE41, WebPYuv444ToBgr_C, 3);
+YUV444_FUNC(Yuv444ToRgb_SSE41, VP8YuvToRgb32_SSE41, WebPYuv444ToRgb_C, 3)
+YUV444_FUNC(Yuv444ToBgr_SSE41, VP8YuvToBgr32_SSE41, WebPYuv444ToBgr_C, 3)
 #endif  // WEBP_REDUCE_CSP
 
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE41(void) {
author	Eirik Aavitsland <eirik.aavitsland@qt.io>	2024-04-15 18:38:04 +0200
committer	Eirik Aavitsland <eirik.aavitsland@qt.io>	2024-04-16 08:03:23 +0200
commit	a5e0b01214ffdbac811b89c6e19f657d3a0cf34b (patch)
tree	c1b62f9f7969971fef48c6801e3cec6a8f9d2292 /src/3rdparty/libwebp/src/dsp
parent	ff565219a7724e13b769f63d3a483167046bd83d (diff)