17 files changed, 715 insertions, 425 deletions
diff --git a/src/3rdparty/libwebp/src/dec/alpha.c b/src/3rdparty/libwebp/src/dec/alpha.c
deleted file mode 100644
index 52216fc..0000000
--- a/src/3rdparty/libwebp/src/dec/alpha.c
+++ /dev/null
@@ -1,167 +0,0 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Alpha-plane decompression.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include <stdlib.h>
-#include "./alphai.h"
-#include "./vp8i.h"
-#include "./vp8li.h"
-#include "../dsp/dsp.h"
-#include "../utils/quant_levels_dec.h"
-#include "../utils/utils.h"
-#include "../webp/format_constants.h"
-
-//------------------------------------------------------------------------------
-// ALPHDecoder object.
-
-ALPHDecoder* ALPHNew(void) {
-  ALPHDecoder* const dec = (ALPHDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
-  return dec;
-}
-
-void ALPHDelete(ALPHDecoder* const dec) {
-  if (dec != NULL) {
-    VP8LDelete(dec->vp8l_dec_);
-    dec->vp8l_dec_ = NULL;
-    WebPSafeFree(dec);
-  }
-}
-
-//------------------------------------------------------------------------------
-// Decoding.
-
-// Initialize alpha decoding by parsing the alpha header and decoding the image
-// header for alpha data stored using lossless compression.
-// Returns false in case of error in alpha header (data too short, invalid
-// compression method or filter, error in lossless header data etc).
-static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
-                    size_t data_size, int width, int height, uint8_t* output) {
-  int ok = 0;
-  const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
-  const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
-  int rsrv;
-
-  assert(width > 0 && height > 0);
-  assert(data != NULL && output != NULL);
-
-  dec->width_ = width;
-  dec->height_ = height;
-
-  if (data_size <= ALPHA_HEADER_LEN) {
-    return 0;
-  }
-
-  dec->method_ = (data[0] >> 0) & 0x03;
-  dec->filter_ = (data[0] >> 2) & 0x03;
-  dec->pre_processing_ = (data[0] >> 4) & 0x03;
-  rsrv = (data[0] >> 6) & 0x03;
-  if (dec->method_ < ALPHA_NO_COMPRESSION ||
-      dec->method_ > ALPHA_LOSSLESS_COMPRESSION ||
-      dec->filter_ >= WEBP_FILTER_LAST ||
-      dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS ||
-      rsrv != 0) {
-    return 0;
-  }
-
-  if (dec->method_ == ALPHA_NO_COMPRESSION) {
-    const size_t alpha_decoded_size = dec->width_ * dec->height_;
-    ok = (alpha_data_size >= alpha_decoded_size);
-  } else {
-    assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
-    ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size, output);
-  }
-  VP8FiltersInit();
-  return ok;
-}
-
-// Decodes, unfilters and dequantizes *at least* 'num_rows' rows of alpha
-// starting from row number 'row'. It assumes that rows up to (row - 1) have
-// already been decoded.
-// Returns false in case of bitstream error.
-static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
-  ALPHDecoder* const alph_dec = dec->alph_dec_;
-  const int width = alph_dec->width_;
-  const int height = alph_dec->height_;
-  WebPUnfilterFunc unfilter_func = WebPUnfilters[alph_dec->filter_];
-  uint8_t* const output = dec->alpha_plane_;
-  if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
-    const size_t offset = row * width;
-    const size_t num_pixels = num_rows * width;
-    assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN + offset + num_pixels);
-    memcpy(dec->alpha_plane_ + offset,
-           dec->alpha_data_ + ALPHA_HEADER_LEN + offset, num_pixels);
-  } else {  // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
-    assert(alph_dec->vp8l_dec_ != NULL);
-    if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
-      return 0;
-    }
-  }
-
-  if (unfilter_func != NULL) {
-    unfilter_func(width, height, width, row, num_rows, output);
-  }
-
-  if (row + num_rows == dec->pic_hdr_.height_) {
-    dec->is_alpha_decoded_ = 1;
-  }
-  return 1;
-}
-
-//------------------------------------------------------------------------------
-// Main entry point.
-
-const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
-                                      int row, int num_rows) {
-  const int width = dec->pic_hdr_.width_;
-  const int height = dec->pic_hdr_.height_;
-
-  if (row < 0 || num_rows <= 0 || row + num_rows > height) {
-    return NULL;    // sanity check.
-  }
-
-  if (row == 0) {
-    // Initialize decoding.
-    assert(dec->alpha_plane_ != NULL);
-    dec->alph_dec_ = ALPHNew();
-    if (dec->alph_dec_ == NULL) return NULL;
-    if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
-                  width, height, dec->alpha_plane_)) {
-      ALPHDelete(dec->alph_dec_);
-      dec->alph_dec_ = NULL;
-      return NULL;
-    }
-    // if we allowed use of alpha dithering, check whether it's needed at all
-    if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
-      dec->alpha_dithering_ = 0;  // disable dithering
-    } else {
-      num_rows = height;          // decode everything in one pass
-    }
-  }
-
-  if (!dec->is_alpha_decoded_) {
-    int ok = 0;
-    assert(dec->alph_dec_ != NULL);
-    ok = ALPHDecode(dec, row, num_rows);
-    if (ok && dec->alpha_dithering_ > 0) {
-      ok = WebPDequantizeLevels(dec->alpha_plane_, width, height,
-                                dec->alpha_dithering_);
-    }
-    if (!ok || dec->is_alpha_decoded_) {
-      ALPHDelete(dec->alph_dec_);
-      dec->alph_dec_ = NULL;
-    }
-    if (!ok) return NULL;  // Error.
-  }
-
-  // Return a pointer to the current decoded row.
-  return dec->alpha_plane_ + row * width;
-}
diff --git a/src/3rdparty/libwebp/src/dec/alpha_dec.c b/src/3rdparty/libwebp/src/dec/alpha_dec.c
new file mode 100644
index 0000000..83ffd4b
--- /dev/null
+++ b/src/3rdparty/libwebp/src/dec/alpha_dec.c
@@ -0,0 +1,232 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha-plane decompression.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./alphai_dec.h"
+#include "./vp8i_dec.h"
+#include "./vp8li_dec.h"
+#include "../dsp/dsp.h"
+#include "../utils/quant_levels_dec_utils.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+
+//------------------------------------------------------------------------------
+// ALPHDecoder object.
+
+// Allocates a new alpha decoder instance.
+static ALPHDecoder* ALPHNew(void) {
+  ALPHDecoder* const dec = (ALPHDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+  return dec;
+}
+
+// Clears and deallocates an alpha decoder instance.
+static void ALPHDelete(ALPHDecoder* const dec) {
+  if (dec != NULL) {
+    VP8LDelete(dec->vp8l_dec_);
+    dec->vp8l_dec_ = NULL;
+    WebPSafeFree(dec);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Decoding.
+
+// Initialize alpha decoding by parsing the alpha header and decoding the image
+// header for alpha data stored using lossless compression.
+// Returns false in case of error in alpha header (data too short, invalid
+// compression method or filter, error in lossless header data etc).
+static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
+                    size_t data_size, const VP8Io* const src_io,
+                    uint8_t* output) {
+  int ok = 0;
+  const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
+  const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
+  int rsrv;
+  VP8Io* const io = &dec->io_;
+
+  assert(data != NULL && output != NULL && src_io != NULL);
+
+  VP8FiltersInit();
+  dec->output_ = output;
+  dec->width_ = src_io->width;
+  dec->height_ = src_io->height;
+  assert(dec->width_ > 0 && dec->height_ > 0);
+
+  if (data_size <= ALPHA_HEADER_LEN) {
+    return 0;
+  }
+
+  dec->method_ = (data[0] >> 0) & 0x03;
+  dec->filter_ = (WEBP_FILTER_TYPE)((data[0] >> 2) & 0x03);
+  dec->pre_processing_ = (data[0] >> 4) & 0x03;
+  rsrv = (data[0] >> 6) & 0x03;
+  if (dec->method_ < ALPHA_NO_COMPRESSION ||
+      dec->method_ > ALPHA_LOSSLESS_COMPRESSION ||
+      dec->filter_ >= WEBP_FILTER_LAST ||
+      dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS ||
+      rsrv != 0) {
+    return 0;
+  }
+
+  // Copy the necessary parameters from src_io to io
+  VP8InitIo(io);
+  WebPInitCustomIo(NULL, io);
+  io->opaque = dec;
+  io->width = src_io->width;
+  io->height = src_io->height;
+
+  io->use_cropping = src_io->use_cropping;
+  io->crop_left = src_io->crop_left;
+  io->crop_right = src_io->crop_right;
+  io->crop_top = src_io->crop_top;
+  io->crop_bottom = src_io->crop_bottom;
+  // No need to copy the scaling parameters.
+
+  if (dec->method_ == ALPHA_NO_COMPRESSION) {
+    const size_t alpha_decoded_size = dec->width_ * dec->height_;
+    ok = (alpha_data_size >= alpha_decoded_size);
+  } else {
+    assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
+    ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size);
+  }
+
+  return ok;
+}
+
+// Decodes, unfilters and dequantizes *at least* 'num_rows' rows of alpha
+// starting from row number 'row'. It assumes that rows up to (row - 1) have
+// already been decoded.
+// Returns false in case of bitstream error.
+static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
+  ALPHDecoder* const alph_dec = dec->alph_dec_;
+  const int width = alph_dec->width_;
+  const int height = alph_dec->io_.crop_bottom;
+  if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
+    int y;
+    const uint8_t* prev_line = dec->alpha_prev_line_;
+    const uint8_t* deltas = dec->alpha_data_ + ALPHA_HEADER_LEN + row * width;
+    uint8_t* dst = dec->alpha_plane_ + row * width;
+    assert(deltas <= &dec->alpha_data_[dec->alpha_data_size_]);
+    if (alph_dec->filter_ != WEBP_FILTER_NONE) {
+      assert(WebPUnfilters[alph_dec->filter_] != NULL);
+      for (y = 0; y < num_rows; ++y) {
+        WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width);
+        prev_line = dst;
+        dst += width;
+        deltas += width;
+      }
+    } else {
+      for (y = 0; y < num_rows; ++y) {
+        memcpy(dst, deltas, width * sizeof(*dst));
+        prev_line = dst;
+        dst += width;
+        deltas += width;
+      }
+    }
+    dec->alpha_prev_line_ = prev_line;
+  } else {  // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
+    assert(alph_dec->vp8l_dec_ != NULL);
+    if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
+      return 0;
+    }
+  }
+
+  if (row + num_rows >= height) {
+    dec->is_alpha_decoded_ = 1;
+  }
+  return 1;
+}
+
+static int AllocateAlphaPlane(VP8Decoder* const dec, const VP8Io* const io) {
+  const int stride = io->width;
+  const int height = io->crop_bottom;
+  const uint64_t alpha_size = (uint64_t)stride * height;
+  assert(dec->alpha_plane_mem_ == NULL);
+  dec->alpha_plane_mem_ =
+      (uint8_t*)WebPSafeMalloc(alpha_size, sizeof(*dec->alpha_plane_));
+  if (dec->alpha_plane_mem_ == NULL) {
+    return 0;
+  }
+  dec->alpha_plane_ = dec->alpha_plane_mem_;
+  dec->alpha_prev_line_ = NULL;
+  return 1;
+}
+
+void WebPDeallocateAlphaMemory(VP8Decoder* const dec) {
+  assert(dec != NULL);
+  WebPSafeFree(dec->alpha_plane_mem_);
+  dec->alpha_plane_mem_ = NULL;
+  dec->alpha_plane_ = NULL;
+  ALPHDelete(dec->alph_dec_);
+  dec->alph_dec_ = NULL;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point.
+
+const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
+                                      const VP8Io* const io,
+                                      int row, int num_rows) {
+  const int width = io->width;
+  const int height = io->crop_bottom;
+
+  assert(dec != NULL && io != NULL);
+
+  if (row < 0 || num_rows <= 0 || row + num_rows > height) {
+    return NULL;    // sanity check.
+  }
+
+  if (!dec->is_alpha_decoded_) {
+    if (dec->alph_dec_ == NULL) {    // Initialize decoder.
+      dec->alph_dec_ = ALPHNew();
+      if (dec->alph_dec_ == NULL) return NULL;
+      if (!AllocateAlphaPlane(dec, io)) goto Error;
+      if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
+                    io, dec->alpha_plane_)) {
+        goto Error;
+      }
+      // if we allowed use of alpha dithering, check whether it's needed at all
+      if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
+        dec->alpha_dithering_ = 0;   // disable dithering
+      } else {
+        num_rows = height - row;     // decode everything in one pass
+      }
+    }
+
+    assert(dec->alph_dec_ != NULL);
+    assert(row + num_rows <= height);
+    if (!ALPHDecode(dec, row, num_rows)) goto Error;
+
+    if (dec->is_alpha_decoded_) {   // finished?
+      ALPHDelete(dec->alph_dec_);
+      dec->alph_dec_ = NULL;
+      if (dec->alpha_dithering_ > 0) {
+        uint8_t* const alpha = dec->alpha_plane_ + io->crop_top * width
+                             + io->crop_left;
+        if (!WebPDequantizeLevels(alpha,
+                                  io->crop_right - io->crop_left,
+                                  io->crop_bottom - io->crop_top,
+                                  width, dec->alpha_dithering_)) {
+          goto Error;
+        }
+      }
+    }
+  }
+
+  // Return a pointer to the current decoded row.
+  return dec->alpha_plane_ + row * width;
+
+ Error:
+  WebPDeallocateAlphaMemory(dec);
+  return NULL;
+}
diff --git a/src/3rdparty/libwebp/src/dec/alphai.h b/src/3rdparty/libwebp/src/dec/alphai_dec.h
index 5fa230c..561e815 100644
--- a/src/3rdparty/libwebp/src/dec/alphai.h
+++ b/src/3rdparty/libwebp/src/dec/alphai_dec.h
@@ -14,8 +14,8 @@
 #ifndef WEBP_DEC_ALPHAI_H_
 #define WEBP_DEC_ALPHAI_H_
 
-#include "./webpi.h"
-#include "../utils/filters.h"
+#include "./webpi_dec.h"
+#include "../utils/filters_utils.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -32,19 +32,18 @@ struct ALPHDecoder {
   int pre_processing_;
   struct VP8LDecoder* vp8l_dec_;
   VP8Io io_;
-  int use_8b_decode;  // Although alpha channel requires only 1 byte per
-                      // pixel, sometimes VP8LDecoder may need to allocate
-                      // 4 bytes per pixel internally during decode.
+  int use_8b_decode_;  // Although alpha channel requires only 1 byte per
+                       // pixel, sometimes VP8LDecoder may need to allocate
+                       // 4 bytes per pixel internally during decode.
+  uint8_t* output_;
+  const uint8_t* prev_line_;   // last output row (or NULL)
 };
 
 //------------------------------------------------------------------------------
 // internal functions. Not public.
 
-// Allocates a new alpha decoder instance.
-ALPHDecoder* ALPHNew(void);
-
-// Clears and deallocates an alpha decoder instance.
-void ALPHDelete(ALPHDecoder* const dec);
+// Deallocate memory associated to dec->alpha_plane_ decoding
+void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
 
 //------------------------------------------------------------------------------
 
diff --git a/src/3rdparty/libwebp/src/dec/buffer.c b/src/3rdparty/libwebp/src/dec/buffer_dec.c
index 9ed2b3f..c685fd5 100644
--- a/src/3rdparty/libwebp/src/dec/buffer.c
+++ b/src/3rdparty/libwebp/src/dec/buffer_dec.c
@@ -13,8 +13,8 @@
 
 #include <stdlib.h>
 
-#include "./vp8i.h"
-#include "./webpi.h"
+#include "./vp8i_dec.h"
+#include "./webpi_dec.h"
 #include "../utils/utils.h"
 
 //------------------------------------------------------------------------------
@@ -92,7 +92,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
     return VP8_STATUS_INVALID_PARAM;
   }
 
-  if (!buffer->is_external_memory && buffer->private_memory == NULL) {
+  if (buffer->is_external_memory <= 0 && buffer->private_memory == NULL) {
     uint8_t* output;
     int uv_stride = 0, a_stride = 0;
     uint64_t uv_size = 0, a_size = 0, total_size;
@@ -227,7 +227,7 @@ int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) {
 
 void WebPFreeDecBuffer(WebPDecBuffer* buffer) {
   if (buffer != NULL) {
-    if (!buffer->is_external_memory) {
+    if (buffer->is_external_memory <= 0) {
       WebPSafeFree(buffer->private_memory);
     }
     buffer->private_memory = NULL;
@@ -256,5 +256,45 @@ void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
   }
 }
 
-//------------------------------------------------------------------------------
+VP8StatusCode WebPCopyDecBufferPixels(const WebPDecBuffer* const src_buf,
+                                      WebPDecBuffer* const dst_buf) {
+  assert(src_buf != NULL && dst_buf != NULL);
+  assert(src_buf->colorspace == dst_buf->colorspace);
+
+  dst_buf->width = src_buf->width;
+  dst_buf->height = src_buf->height;
+  if (CheckDecBuffer(dst_buf) != VP8_STATUS_OK) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  if (WebPIsRGBMode(src_buf->colorspace)) {
+    const WebPRGBABuffer* const src = &src_buf->u.RGBA;
+    const WebPRGBABuffer* const dst = &dst_buf->u.RGBA;
+    WebPCopyPlane(src->rgba, src->stride, dst->rgba, dst->stride,
+                  src_buf->width * kModeBpp[src_buf->colorspace],
+                  src_buf->height);
+  } else {
+    const WebPYUVABuffer* const src = &src_buf->u.YUVA;
+    const WebPYUVABuffer* const dst = &dst_buf->u.YUVA;
+    WebPCopyPlane(src->y, src->y_stride, dst->y, dst->y_stride,
+                  src_buf->width, src_buf->height);
+    WebPCopyPlane(src->u, src->u_stride, dst->u, dst->u_stride,
+                  (src_buf->width + 1) / 2, (src_buf->height + 1) / 2);
+    WebPCopyPlane(src->v, src->v_stride, dst->v, dst->v_stride,
+                  (src_buf->width + 1) / 2, (src_buf->height + 1) / 2);
+    if (WebPIsAlphaMode(src_buf->colorspace)) {
+      WebPCopyPlane(src->a, src->a_stride, dst->a, dst->a_stride,
+                    src_buf->width, src_buf->height);
+    }
+  }
+  return VP8_STATUS_OK;
+}
 
+int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
+                        const WebPBitstreamFeatures* const features) {
+  assert(output != NULL);
+  return (output->is_external_memory >= 2) &&
+         WebPIsPremultipliedMode(output->colorspace) &&
+         (features != NULL && features->has_alpha);
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/3rdparty/libwebp/src/dec/common.h b/src/3rdparty/libwebp/src/dec/common_dec.h
index 6961e22..6961e22 100644
--- a/src/3rdparty/libwebp/src/dec/common.h
+++ b/src/3rdparty/libwebp/src/dec/common_dec.h
diff --git a/src/3rdparty/libwebp/src/dec/frame.c b/src/3rdparty/libwebp/src/dec/frame_dec.c
index b882133..f91e27f 100644
--- a/src/3rdparty/libwebp/src/dec/frame.c
+++ b/src/3rdparty/libwebp/src/dec/frame_dec.c
@@ -12,7 +12,7 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <stdlib.h>
-#include "./vp8i.h"
+#include "./vp8i_dec.h"
 #include "../utils/utils.h"
 
 //------------------------------------------------------------------------------
@@ -316,6 +316,9 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
 //------------------------------------------------------------------------------
 // Dithering
 
+// minimal amp that will provide a non-zero dithering effect
+#define MIN_DITHER_AMP 4
+
 #define DITHER_AMP_TAB_SIZE 12
 static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
   // roughly, it's dqm->uv_mat_[1]
@@ -356,27 +359,14 @@ void VP8InitDithering(const WebPDecoderOptions* const options,
   }
 }
 
-// minimal amp that will provide a non-zero dithering effect
-#define MIN_DITHER_AMP 4
-#define DITHER_DESCALE 4
-#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))
-#define DITHER_AMP_BITS 8
-#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)
-
+// Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
 static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
-  int i, j;
-  for (j = 0; j < 8; ++j) {
-    for (i = 0; i < 8; ++i) {
-      // TODO: could be made faster with SSE2
-      const int bits =
-          VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER;
-      // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
-      const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE;
-      const int v = (int)dst[i] + delta;
-      dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v;
-    }
-    dst += bps;
+  uint8_t dither[64];
+  int i;
+  for (i = 0; i < 8 * 8; ++i) {
+    dither[i] = VP8RandomBits2(rg, VP8_DITHER_AMP_BITS + 1, amp);
   }
+  VP8DitherCombine8x8(dither, dst, bps);
 }
 
 static void DitherRow(VP8Decoder* const dec) {
@@ -462,7 +452,7 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
     if (dec->alpha_data_ != NULL && y_start < y_end) {
       // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a
       // good idea.
-      io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
+      io->a = VP8DecompressAlphaRows(dec, io, y_start, y_end - y_start);
       if (io->a == NULL) {
         return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
                            "Could not decode alpha data.");
@@ -733,7 +723,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
       return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
                          "no memory during frame initialization.");
     }
-    // down-cast is ok, thanks to WebPSafeAlloc() above.
+    // down-cast is ok, thanks to WebPSafeMalloc() above.
     dec->mem_size_ = (size_t)needed;
   }
 
diff --git a/src/3rdparty/libwebp/src/dec/idec.c b/src/3rdparty/libwebp/src/dec/idec_dec.c
index e0cf0c9..78fb2e7 100644
--- a/src/3rdparty/libwebp/src/dec/idec.c
+++ b/src/3rdparty/libwebp/src/dec/idec_dec.c
@@ -15,9 +15,9 @@
 #include <string.h>
 #include <stdlib.h>
 
-#include "./alphai.h"
-#include "./webpi.h"
-#include "./vp8i.h"
+#include "./alphai_dec.h"
+#include "./webpi_dec.h"
+#include "./vp8i_dec.h"
 #include "../utils/utils.h"
 
 // In append mode, buffer allocations increase as multiples of this value.
@@ -70,7 +70,9 @@ struct WebPIDecoder {
   VP8Io io_;
 
   MemBuffer mem_;          // input memory buffer.
-  WebPDecBuffer output_;   // output buffer (when no external one is supplied)
+  WebPDecBuffer output_;   // output buffer (when no external one is supplied,
+                           // or if the external one has slow-memory)
+  WebPDecBuffer* final_output_;  // Slow-memory output to copy to eventually.
   size_t chunk_size_;      // Compressed VP8/VP8L size extracted from Header.
 
   int last_mb_y_;          // last row reached for intra-mode decoding
@@ -118,9 +120,9 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
   if (idec->dec_ != NULL) {
     if (!idec->is_lossless_) {
       VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
-      const int last_part = dec->num_parts_ - 1;
+      const uint32_t last_part = dec->num_parts_minus_one_;
       if (offset != 0) {
-        int p;
+        uint32_t p;
         for (p = 0; p <= last_part; ++p) {
           VP8RemapBitReader(dec->parts_ + p, offset);
         }
@@ -132,7 +134,6 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
       }
       {
         const uint8_t* const last_start = dec->parts_[last_part].buf_;
-        assert(last_part >= 0);
         VP8BitReaderSetBuffer(&dec->parts_[last_part], last_start,
                               mem->buf_ + mem->end_ - last_start);
       }
@@ -249,10 +250,16 @@ static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {
 
   idec->state_ = STATE_DONE;
   if (options != NULL && options->flip) {
-    return WebPFlipBuffer(output);
-  } else {
-    return VP8_STATUS_OK;
+    const VP8StatusCode status = WebPFlipBuffer(output);
+    if (status != VP8_STATUS_OK) return status;
+  }
+  if (idec->final_output_ != NULL) {
+    WebPCopyDecBufferPixels(output, idec->final_output_);  // do the slow-copy
+    WebPFreeDecBuffer(&idec->output_);
+    *output = *idec->final_output_;
+    idec->final_output_ = NULL;
   }
+  return VP8_STATUS_OK;
 }
 
 //------------------------------------------------------------------------------
@@ -457,19 +464,20 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
     }
     for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
       VP8BitReader* const token_br =
-          &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
+          &dec->parts_[dec->mb_y_ & dec->num_parts_minus_one_];
       MBContext context;
       SaveContext(dec, token_br, &context);
       if (!VP8DecodeMB(dec, token_br)) {
         // We shouldn't fail when MAX_MB data was available
-        if (dec->num_parts_ == 1 && MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
+        if (dec->num_parts_minus_one_ == 0 &&
+            MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
           return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
         }
         RestoreContext(&context, dec, token_br);
         return VP8_STATUS_SUSPENDED;
       }
       // Release buffer only if there is only one partition
-      if (dec->num_parts_ == 1) {
+      if (dec->num_parts_minus_one_ == 0) {
         idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_;
         assert(idec->mem_.start_ <= idec->mem_.end_);
       }
@@ -575,9 +583,10 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
 }
 
 //------------------------------------------------------------------------------
-// Public functions
+// Internal constructor
 
-WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
+static WebPIDecoder* NewDecoder(WebPDecBuffer* const output_buffer,
+                                const WebPBitstreamFeatures* const features) {
   WebPIDecoder* idec = (WebPIDecoder*)WebPSafeCalloc(1ULL, sizeof(*idec));
   if (idec == NULL) {
     return NULL;
@@ -593,25 +602,46 @@ WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
   VP8InitIo(&idec->io_);
 
   WebPResetDecParams(&idec->params_);
-  idec->params_.output = (output_buffer != NULL) ? output_buffer
-                                                 : &idec->output_;
+  if (output_buffer == NULL || WebPAvoidSlowMemory(output_buffer, features)) {
+    idec->params_.output = &idec->output_;
+    idec->final_output_ = output_buffer;
+    if (output_buffer != NULL) {
+      idec->params_.output->colorspace = output_buffer->colorspace;
+    }
+  } else {
+    idec->params_.output = output_buffer;
+    idec->final_output_ = NULL;
+  }
   WebPInitCustomIo(&idec->params_, &idec->io_);  // Plug the I/O functions.
 
   return idec;
 }
 
+//------------------------------------------------------------------------------
+// Public functions
+
+WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
+  return NewDecoder(output_buffer, NULL);
+}
+
 WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
                           WebPDecoderConfig* config) {
   WebPIDecoder* idec;
+  WebPBitstreamFeatures tmp_features;
+  WebPBitstreamFeatures* const features =
+      (config == NULL) ? &tmp_features : &config->input;
+  memset(&tmp_features, 0, sizeof(tmp_features));
 
   // Parse the bitstream's features, if requested:
-  if (data != NULL && data_size > 0 && config != NULL) {
-    if (WebPGetFeatures(data, data_size, &config->input) != VP8_STATUS_OK) {
+  if (data != NULL && data_size > 0) {
+    if (WebPGetFeatures(data, data_size, features) != VP8_STATUS_OK) {
       return NULL;
     }
   }
+
   // Create an instance of the incremental decoder
-  idec = WebPINewDecoder(config ? &config->output : NULL);
+  idec = (config != NULL) ? NewDecoder(&config->output, features)
+                          : NewDecoder(NULL, features);
   if (idec == NULL) {
     return NULL;
   }
@@ -645,11 +675,11 @@ void WebPIDelete(WebPIDecoder* idec) {
 
 WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
                           size_t output_buffer_size, int output_stride) {
-  const int is_external_memory = (output_buffer != NULL);
+  const int is_external_memory = (output_buffer != NULL) ? 1 : 0;
   WebPIDecoder* idec;
 
   if (mode >= MODE_YUV) return NULL;
-  if (!is_external_memory) {    // Overwrite parameters to sane values.
+  if (is_external_memory == 0) {    // Overwrite parameters to sane values.
     output_buffer_size = 0;
     output_stride = 0;
   } else {  // A buffer was passed. Validate the other params.
@@ -671,11 +701,11 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
                            uint8_t* u, size_t u_size, int u_stride,
                            uint8_t* v, size_t v_size, int v_stride,
                            uint8_t* a, size_t a_size, int a_stride) {
-  const int is_external_memory = (luma != NULL);
+  const int is_external_memory = (luma != NULL) ? 1 : 0;
   WebPIDecoder* idec;
   WEBP_CSP_MODE colorspace;
 
-  if (!is_external_memory) {    // Overwrite parameters to sane values.
+  if (is_external_memory == 0) {    // Overwrite parameters to sane values.
     luma_size = u_size = v_size = a_size = 0;
     luma_stride = u_stride = v_stride = a_stride = 0;
     u = v = a = NULL;
@@ -783,6 +813,9 @@ static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
   if (idec->state_ <= STATE_VP8_PARTS0) {
     return NULL;
   }
+  if (idec->final_output_ != NULL) {
+    return NULL;   // not yet slow-copied
+  }
   return idec->params_.output;
 }
 
@@ -792,7 +825,7 @@ const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
   const WebPDecBuffer* const src = GetOutputBuffer(idec);
   if (left != NULL) *left = 0;
   if (top != NULL) *top = 0;
-  if (src) {
+  if (src != NULL) {
     if (width != NULL) *width = src->width;
     if (height != NULL) *height = idec->params_.last_y;
   } else {
diff --git a/src/3rdparty/libwebp/src/dec/io.c b/src/3rdparty/libwebp/src/dec/io_dec.c
index 13e469a..8bfab86 100644
--- a/src/3rdparty/libwebp/src/dec/io.c
+++ b/src/3rdparty/libwebp/src/dec/io_dec.c
@@ -13,8 +13,8 @@
 
 #include <assert.h>
 #include <stdlib.h>
-#include "../dec/vp8i.h"
-#include "./webpi.h"
+#include "../dec/vp8i_dec.h"
+#include "./webpi_dec.h"
 #include "../dsp/dsp.h"
 #include "../dsp/yuv.h"
 #include "../utils/utils.h"
@@ -119,6 +119,14 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
 
 //------------------------------------------------------------------------------
 
+static void FillAlphaPlane(uint8_t* dst, int w, int h, int stride) {
+  int j;
+  for (j = 0; j < h; ++j) {
+    memset(dst, 0xff, w * sizeof(*dst));
+    dst += stride;
+  }
+}
+
 static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
                         int expected_num_lines_out) {
   const uint8_t* alpha = io->a;
@@ -137,10 +145,7 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
     }
   } else if (buf->a != NULL) {
     // the user requested alpha, but there is none, set it to opaque.
-    for (j = 0; j < mb_h; ++j) {
-      memset(dst, 0xff, mb_w * sizeof(*dst));
-      dst += buf->a_stride;
-    }
+    FillAlphaPlane(dst, mb_w, mb_h, buf->a_stride);
   }
   return 0;
 }
@@ -251,7 +256,7 @@ static int Rescale(const uint8_t* src, int src_stride,
 static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
   const int mb_h = io->mb_h;
   const int uv_mb_h = (mb_h + 1) >> 1;
-  WebPRescaler* const scaler = &p->scaler_y;
+  WebPRescaler* const scaler = p->scaler_y;
   int num_lines_out = 0;
   if (WebPIsAlphaMode(p->output->colorspace) && io->a != NULL) {
     // Before rescaling, we premultiply the luma directly into the io->y
@@ -262,24 +267,28 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
                  io->a, io->width, io->mb_w, mb_h, 0);
   }
   num_lines_out = Rescale(io->y, io->y_stride, mb_h, scaler);
-  Rescale(io->u, io->uv_stride, uv_mb_h, &p->scaler_u);
-  Rescale(io->v, io->uv_stride, uv_mb_h, &p->scaler_v);
+  Rescale(io->u, io->uv_stride, uv_mb_h, p->scaler_u);
+  Rescale(io->v, io->uv_stride, uv_mb_h, p->scaler_v);
   return num_lines_out;
 }
 
 static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
                                 int expected_num_lines_out) {
+  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+  uint8_t* const dst_a = buf->a + p->last_y * buf->a_stride;
   if (io->a != NULL) {
-    const WebPYUVABuffer* const buf = &p->output->u.YUVA;
-    uint8_t* dst_y = buf->y + p->last_y * buf->y_stride;
-    const uint8_t* src_a = buf->a + p->last_y * buf->a_stride;
-    const int num_lines_out = Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
-    (void)expected_num_lines_out;
+    uint8_t* const dst_y = buf->y + p->last_y * buf->y_stride;
+    const int num_lines_out = Rescale(io->a, io->width, io->mb_h, p->scaler_a);
     assert(expected_num_lines_out == num_lines_out);
     if (num_lines_out > 0) {   // unmultiply the Y
-      WebPMultRows(dst_y, buf->y_stride, src_a, buf->a_stride,
-                   p->scaler_a.dst_width, num_lines_out, 1);
+      WebPMultRows(dst_y, buf->y_stride, dst_a, buf->a_stride,
+                   p->scaler_a->dst_width, num_lines_out, 1);
     }
+  } else if (buf->a != NULL) {
+    // the user requested alpha, but there is none, set it to opaque.
+    assert(p->last_y + expected_num_lines_out <= io->scaled_height);
+    FillAlphaPlane(dst_a, io->scaled_width, expected_num_lines_out,
+                   buf->a_stride);
   }
   return 0;
 }
@@ -295,31 +304,42 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
   const int uv_in_height = (io->mb_h + 1) >> 1;
   const size_t work_size = 2 * out_width;   // scratch memory for luma rescaler
   const size_t uv_work_size = 2 * uv_out_width;  // and for each u/v ones
-  size_t tmp_size;
+  size_t tmp_size, rescaler_size;
   rescaler_t* work;
+  WebPRescaler* scalers;
+  const int num_rescalers = has_alpha ? 4 : 3;
 
   tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work);
   if (has_alpha) {
     tmp_size += work_size * sizeof(*work);
   }
-  p->memory = WebPSafeMalloc(1ULL, tmp_size);
+  rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST;
+
+  p->memory = WebPSafeMalloc(1ULL, tmp_size + rescaler_size);
   if (p->memory == NULL) {
     return 0;   // memory error
   }
   work = (rescaler_t*)p->memory;
-  WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
+
+  scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + tmp_size);
+  p->scaler_y = &scalers[0];
+  p->scaler_u = &scalers[1];
+  p->scaler_v = &scalers[2];
+  p->scaler_a = has_alpha ? &scalers[3] : NULL;
+
+  WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
                    buf->y, out_width, out_height, buf->y_stride, 1,
                    work);
-  WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
+  WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
                    buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
                    work + work_size);
-  WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
+  WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
                    buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
                    work + work_size + uv_work_size);
   p->emit = EmitRescaledYUV;
 
   if (has_alpha) {
-    WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
+    WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
                      buf->a, out_width, out_height, buf->a_stride, 1,
                      work + work_size + 2 * uv_work_size);
     p->emit_alpha = EmitRescaledAlphaYUV;
@@ -339,15 +359,15 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
   int num_lines_out = 0;
   // For RGB rescaling, because of the YUV420, current scan position
   // U/V can be +1/-1 line from the Y one.  Hence the double test.
-  while (WebPRescalerHasPendingOutput(&p->scaler_y) &&
-         WebPRescalerHasPendingOutput(&p->scaler_u)) {
+  while (WebPRescalerHasPendingOutput(p->scaler_y) &&
+         WebPRescalerHasPendingOutput(p->scaler_u)) {
     assert(y_pos + num_lines_out < p->output->height);
-    assert(p->scaler_u.y_accum == p->scaler_v.y_accum);
-    WebPRescalerExportRow(&p->scaler_y);
-    WebPRescalerExportRow(&p->scaler_u);
-    WebPRescalerExportRow(&p->scaler_v);
-    convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst,
-            dst, p->scaler_y.dst_width);
+    assert(p->scaler_u->y_accum == p->scaler_v->y_accum);
+    WebPRescalerExportRow(p->scaler_y);
+    WebPRescalerExportRow(p->scaler_u);
+    WebPRescalerExportRow(p->scaler_v);
+    convert(p->scaler_y->dst, p->scaler_u->dst, p->scaler_v->dst,
+            dst, p->scaler_y->dst_width);
     dst += buf->stride;
     ++num_lines_out;
   }
@@ -361,15 +381,15 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
   int num_lines_out = 0;
   while (j < mb_h) {
     const int y_lines_in =
-        WebPRescalerImport(&p->scaler_y, mb_h - j,
+        WebPRescalerImport(p->scaler_y, mb_h - j,
                            io->y + j * io->y_stride, io->y_stride);
     j += y_lines_in;
-    if (WebPRescaleNeededLines(&p->scaler_u, uv_mb_h - uv_j)) {
+    if (WebPRescaleNeededLines(p->scaler_u, uv_mb_h - uv_j)) {
       const int u_lines_in =
-          WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j,
+          WebPRescalerImport(p->scaler_u, uv_mb_h - uv_j,
                              io->u + uv_j * io->uv_stride, io->uv_stride);
       const int v_lines_in =
-          WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j,
+          WebPRescalerImport(p->scaler_v, uv_mb_h - uv_j,
                              io->v + uv_j * io->uv_stride, io->uv_stride);
       (void)v_lines_in;   // remove a gcc warning
       assert(u_lines_in == v_lines_in);
@@ -390,13 +410,13 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
   int num_lines_out = 0;
   const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
   uint32_t non_opaque = 0;
-  const int width = p->scaler_a.dst_width;
+  const int width = p->scaler_a->dst_width;
 
-  while (WebPRescalerHasPendingOutput(&p->scaler_a) &&
+  while (WebPRescalerHasPendingOutput(p->scaler_a) &&
          num_lines_out < max_lines_out) {
     assert(y_pos + num_lines_out < p->output->height);
-    WebPRescalerExportRow(&p->scaler_a);
-    non_opaque |= WebPDispatchAlpha(p->scaler_a.dst, 0, width, 1, dst, 0);
+    WebPRescalerExportRow(p->scaler_a);
+    non_opaque |= WebPDispatchAlpha(p->scaler_a->dst, 0, width, 1, dst, 0);
     dst += buf->stride;
     ++num_lines_out;
   }
@@ -418,18 +438,18 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
 #endif
   int num_lines_out = 0;
   const WEBP_CSP_MODE colorspace = p->output->colorspace;
-  const int width = p->scaler_a.dst_width;
+  const int width = p->scaler_a->dst_width;
   const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
   uint32_t alpha_mask = 0x0f;
 
-  while (WebPRescalerHasPendingOutput(&p->scaler_a) &&
+  while (WebPRescalerHasPendingOutput(p->scaler_a) &&
          num_lines_out < max_lines_out) {
     int i;
     assert(y_pos + num_lines_out < p->output->height);
-    WebPRescalerExportRow(&p->scaler_a);
+    WebPRescalerExportRow(p->scaler_a);
     for (i = 0; i < width; ++i) {
       // Fill in the alpha value (converted to 4 bits).
-      const uint32_t alpha_value = p->scaler_a.dst[i] >> 4;
+      const uint32_t alpha_value = p->scaler_a->dst[i] >> 4;
       alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
       alpha_mask &= alpha_value;
     }
@@ -445,7 +465,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
 static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
                                 int expected_num_out_lines) {
   if (io->a != NULL) {
-    WebPRescaler* const scaler = &p->scaler_a;
+    WebPRescaler* const scaler = p->scaler_a;
     int lines_left = expected_num_out_lines;
     const int y_end = p->last_y + lines_left;
     while (lines_left > 0) {
@@ -467,7 +487,9 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
   const size_t work_size = 2 * out_width;   // scratch memory for one rescaler
   rescaler_t* work;  // rescalers work area
   uint8_t* tmp;   // tmp storage for scaled YUV444 samples before RGB conversion
-  size_t tmp_size1, tmp_size2, total_size;
+  size_t tmp_size1, tmp_size2, total_size, rescaler_size;
+  WebPRescaler* scalers;
+  const int num_rescalers = has_alpha ? 4 : 3;
 
   tmp_size1 = 3 * work_size;
   tmp_size2 = 3 * out_width;
@@ -476,26 +498,35 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
     tmp_size2 += out_width;
   }
   total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp);
-  p->memory = WebPSafeMalloc(1ULL, total_size);
+  rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST;
+
+  p->memory = WebPSafeMalloc(1ULL, total_size + rescaler_size);
   if (p->memory == NULL) {
     return 0;   // memory error
   }
   work = (rescaler_t*)p->memory;
   tmp = (uint8_t*)(work + tmp_size1);
-  WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
+
+  scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size);
+  p->scaler_y = &scalers[0];
+  p->scaler_u = &scalers[1];
+  p->scaler_v = &scalers[2];
+  p->scaler_a = has_alpha ? &scalers[3] : NULL;
+
+  WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
                    tmp + 0 * out_width, out_width, out_height, 0, 1,
                    work + 0 * work_size);
-  WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
+  WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
                    tmp + 1 * out_width, out_width, out_height, 0, 1,
                    work + 1 * work_size);
-  WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
+  WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
                    tmp + 2 * out_width, out_width, out_height, 0, 1,
                    work + 2 * work_size);
   p->emit = EmitRescaledRGB;
   WebPInitYUV444Converters();
 
   if (has_alpha) {
-    WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
+    WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
                      tmp + 3 * out_width, out_width, out_height, 0, 1,
                      work + 3 * work_size);
     p->emit_alpha = EmitRescaledAlphaRGB;
diff --git a/src/3rdparty/libwebp/src/dec/quant.c b/src/3rdparty/libwebp/src/dec/quant_dec.c
index 5b648f9..14e3198 100644
--- a/src/3rdparty/libwebp/src/dec/quant.c
+++ b/src/3rdparty/libwebp/src/dec/quant_dec.c
@@ -11,7 +11,7 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./vp8i.h"
+#include "./vp8i_dec.h"
 
 static WEBP_INLINE int clip(int v, int M) {
   return v < 0 ? 0 : v > M ? M : v;
diff --git a/src/3rdparty/libwebp/src/dec/tree.c b/src/3rdparty/libwebp/src/dec/tree_dec.c
index c2007ea..9e805f6 100644
--- a/src/3rdparty/libwebp/src/dec/tree.c
+++ b/src/3rdparty/libwebp/src/dec/tree_dec.c
@@ -11,10 +11,13 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./vp8i.h"
-#include "../utils/bit_reader_inl.h"
+#include "./vp8i_dec.h"
+#include "../utils/bit_reader_inl_utils.h"
 
+#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
+// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
 #define USE_GENERIC_TREE
+#endif
 
 #ifdef USE_GENERIC_TREE
 static const int8_t kYModesIntra4[18] = {
diff --git a/src/3rdparty/libwebp/src/dec/vp8.c b/src/3rdparty/libwebp/src/dec/vp8_dec.c
index d89eb1c..fad8d9c 100644
--- a/src/3rdparty/libwebp/src/dec/vp8.c
+++ b/src/3rdparty/libwebp/src/dec/vp8_dec.c
@@ -13,11 +13,11 @@
 
 #include <stdlib.h>
 
-#include "./alphai.h"
-#include "./vp8i.h"
-#include "./vp8li.h"
-#include "./webpi.h"
-#include "../utils/bit_reader_inl.h"
+#include "./alphai_dec.h"
+#include "./vp8i_dec.h"
+#include "./vp8li_dec.h"
+#include "./webpi_dec.h"
+#include "../utils/bit_reader_inl_utils.h"
 #include "../utils/utils.h"
 
 //------------------------------------------------------------------------------
@@ -27,6 +27,16 @@ int WebPGetDecoderVersion(void) {
 }
 
 //------------------------------------------------------------------------------
+// Signature and pointer-to-function for GetCoeffs() variants below.
+
+typedef int (*GetCoeffsFunc)(VP8BitReader* const br,
+                             const VP8BandProbas* const prob[],
+                             int ctx, const quant_t dq, int n, int16_t* out);
+static volatile GetCoeffsFunc GetCoeffs = NULL;
+
+static void InitGetCoeffs(void);
+
+//------------------------------------------------------------------------------
 // VP8Decoder
 
 static void SetOk(VP8Decoder* const dec) {
@@ -50,7 +60,8 @@ VP8Decoder* VP8New(void) {
     SetOk(dec);
     WebPGetWorkerInterface()->Init(&dec->worker_);
     dec->ready_ = 0;
-    dec->num_parts_ = 1;
+    dec->num_parts_minus_one_ = 0;
+    InitGetCoeffs();
   }
   return dec;
 }
@@ -194,8 +205,8 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
   size_t last_part;
   size_t p;
 
-  dec->num_parts_ = 1 << VP8GetValue(br, 2);
-  last_part = dec->num_parts_ - 1;
+  dec->num_parts_minus_one_ = (1 << VP8GetValue(br, 2)) - 1;
+  last_part = dec->num_parts_minus_one_;
   if (size < 3 * last_part) {
     // we can't even read the sizes with sz[]! That's a failure.
     return VP8_STATUS_NOT_ENOUGH_DATA;
@@ -273,12 +284,14 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
     frm_hdr->profile_ = (bits >> 1) & 7;
     frm_hdr->show_ = (bits >> 4) & 1;
     frm_hdr->partition_length_ = (bits >> 5);
-    if (frm_hdr->profile_ > 3)
+    if (frm_hdr->profile_ > 3) {
       return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
                          "Incorrect keyframe parameters.");
-    if (!frm_hdr->show_)
+    }
+    if (!frm_hdr->show_) {
       return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
                          "Frame not displayable.");
+    }
     buf += 3;
     buf_size -= 3;
   }
@@ -303,15 +316,22 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
 
     dec->mb_w_ = (pic_hdr->width_ + 15) >> 4;
     dec->mb_h_ = (pic_hdr->height_ + 15) >> 4;
+
     // Setup default output area (can be later modified during io->setup())
     io->width = pic_hdr->width_;
     io->height = pic_hdr->height_;
-    io->use_scaling  = 0;
+    // IMPORTANT! use some sane dimensions in crop_* and scaled_* fields.
+    // So they can be used interchangeably without always testing for
+    // 'use_cropping'.
     io->use_cropping = 0;
     io->crop_top  = 0;
     io->crop_left = 0;
     io->crop_right  = io->width;
     io->crop_bottom = io->height;
+    io->use_scaling  = 0;
+    io->scaled_width = io->width;
+    io->scaled_height = io->height;
+
     io->mb_w = io->width;   // sanity check
     io->mb_h = io->height;  // ditto
 
@@ -413,8 +433,9 @@ static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
 }
 
 // Returns the position of the last non-zero coeff plus one
-static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[],
-                     int ctx, const quant_t dq, int n, int16_t* out) {
+static int GetCoeffsFast(VP8BitReader* const br,
+                         const VP8BandProbas* const prob[],
+                         int ctx, const quant_t dq, int n, int16_t* out) {
   const uint8_t* p = prob[n]->probas_[ctx];
   for (; n < 16; ++n) {
     if (!VP8GetBit(br, p[0])) {
@@ -440,6 +461,46 @@ static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[],
   return 16;
 }
 
+// This version of GetCoeffs() uses VP8GetBitAlt() which is an alternate version
+// of VP8GetBitAlt() targeting specific platforms.
+static int GetCoeffsAlt(VP8BitReader* const br,
+                        const VP8BandProbas* const prob[],
+                        int ctx, const quant_t dq, int n, int16_t* out) {
+  const uint8_t* p = prob[n]->probas_[ctx];
+  for (; n < 16; ++n) {
+    if (!VP8GetBitAlt(br, p[0])) {
+      return n;  // previous coeff was last non-zero coeff
+    }
+    while (!VP8GetBitAlt(br, p[1])) {       // sequence of zero coeffs
+      p = prob[++n]->probas_[0];
+      if (n == 16) return 16;
+    }
+    {        // non zero coeff
+      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
+      int v;
+      if (!VP8GetBitAlt(br, p[2])) {
+        v = 1;
+        p = p_ctx[1];
+      } else {
+        v = GetLargeValue(br, p);
+        p = p_ctx[2];
+      }
+      out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
+    }
+  }
+  return 16;
+}
+
+WEBP_TSAN_IGNORE_FUNCTION static void InitGetCoeffs(void) {
+  if (GetCoeffs == NULL) {
+    if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) {
+      GetCoeffs = GetCoeffsAlt;
+    } else {
+      GetCoeffs = GetCoeffsFast;
+    }
+  }
+}
+
 static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
   nz_coeffs <<= 2;
   nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz;
@@ -579,7 +640,7 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
   for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
     // Parse bitstream for this row.
     VP8BitReader* const token_br =
-        &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
+        &dec->parts_[dec->mb_y_ & dec->num_parts_minus_one_];
     if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
       return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
                          "Premature end-of-partition0 encountered.");
@@ -649,8 +710,7 @@ void VP8Clear(VP8Decoder* const dec) {
     return;
   }
   WebPGetWorkerInterface()->End(&dec->worker_);
-  ALPHDelete(dec->alph_dec_);
-  dec->alph_dec_ = NULL;
+  WebPDeallocateAlphaMemory(dec);
   WebPSafeFree(dec->mem_);
   dec->mem_ = NULL;
   dec->mem_size_ = 0;
@@ -659,4 +719,3 @@ void VP8Clear(VP8Decoder* const dec) {
 }
 
 //------------------------------------------------------------------------------
-
diff --git a/src/3rdparty/libwebp/src/dec/decode_vp8.h b/src/3rdparty/libwebp/src/dec/vp8_dec.h
index b9337bb..b9337bb 100644
--- a/src/3rdparty/libwebp/src/dec/decode_vp8.h
+++ b/src/3rdparty/libwebp/src/dec/vp8_dec.h
diff --git a/src/3rdparty/libwebp/src/dec/vp8i.h b/src/3rdparty/libwebp/src/dec/vp8i_dec.h
index 0104f25..555853e 100644
--- a/src/3rdparty/libwebp/src/dec/vp8i.h
+++ b/src/3rdparty/libwebp/src/dec/vp8i_dec.h
@@ -15,11 +15,11 @@
 #define WEBP_DEC_VP8I_H_
 
 #include <string.h>     // for memcpy()
-#include "./common.h"
-#include "./vp8li.h"
-#include "../utils/bit_reader.h"
-#include "../utils/random.h"
-#include "../utils/thread.h"
+#include "./common_dec.h"
+#include "./vp8li_dec.h"
+#include "../utils/bit_reader_utils.h"
+#include "../utils/random_utils.h"
+#include "../utils/thread_utils.h"
 #include "../dsp/dsp.h"
 
 #ifdef __cplusplus
@@ -31,7 +31,7 @@ extern "C" {
 
 // version numbers
 #define DEC_MAJ_VERSION 0
-#define DEC_MIN_VERSION 5
+#define DEC_MIN_VERSION 6
 #define DEC_REV_VERSION 0
 
 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
@@ -209,8 +209,8 @@ struct VP8Decoder {
   int tl_mb_x_, tl_mb_y_;  // top-left MB that must be in-loop filtered
   int br_mb_x_, br_mb_y_;  // last bottom-right MB that must be decoded
 
-  // number of partitions.
-  int num_parts_;
+  // number of partitions minus one.
+  uint32_t num_parts_minus_one_;
   // per-partition boolean decoders.
   VP8BitReader parts_[MAX_NUM_PARTITIONS];
 
@@ -258,9 +258,11 @@ struct VP8Decoder {
   struct ALPHDecoder* alph_dec_;  // alpha-plane decoder object
   const uint8_t* alpha_data_;     // compressed alpha data (if present)
   size_t alpha_data_size_;
-  int is_alpha_decoded_;  // true if alpha_data_ is decoded in alpha_plane_
-  uint8_t* alpha_plane_;  // output. Persistent, contains the whole data.
-  int alpha_dithering_;   // derived from decoding options (0=off, 100=full).
+  int is_alpha_decoded_;      // true if alpha_data_ is decoded in alpha_plane_
+  uint8_t* alpha_plane_mem_;  // memory allocated for alpha_plane_
+  uint8_t* alpha_plane_;      // output. Persistent, contains the whole data.
+  const uint8_t* alpha_prev_line_;  // last decoded alpha row (or NULL)
+  int alpha_dithering_;       // derived from decoding options (0=off, 100=full)
 };
 
 //------------------------------------------------------------------------------
@@ -306,6 +308,7 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br);
 
 // in alpha.c
 const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
+                                      const VP8Io* const io,
                                       int row, int num_rows);
 
 //------------------------------------------------------------------------------
diff --git a/src/3rdparty/libwebp/src/dec/vp8l.c b/src/3rdparty/libwebp/src/dec/vp8l_dec.c
index a76ad6a..ef359a9 100644
--- a/src/3rdparty/libwebp/src/dec/vp8l.c
+++ b/src/3rdparty/libwebp/src/dec/vp8l_dec.c
@@ -14,13 +14,14 @@
 
 #include <stdlib.h>
 
-#include "./alphai.h"
-#include "./vp8li.h"
+#include "./alphai_dec.h"
+#include "./vp8li_dec.h"
 #include "../dsp/dsp.h"
 #include "../dsp/lossless.h"
+#include "../dsp/lossless_common.h"
 #include "../dsp/yuv.h"
-#include "../utils/endian_inl.h"
-#include "../utils/huffman.h"
+#include "../utils/endian_inl_utils.h"
+#include "../utils/huffman_utils.h"
 #include "../utils/utils.h"
 
 #define NUM_ARGB_CACHE_ROWS          16
@@ -547,11 +548,14 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
     uint8_t* const row_out = out + num_lines_out * out_stride;
     const int lines_left = mb_h - num_lines_in;
     const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
+    int lines_imported;
     assert(needed_lines > 0 && needed_lines <= lines_left);
     WebPMultARGBRows(row_in, in_stride,
                      dec->rescaler->src_width, needed_lines, 0);
-    WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride);
-    num_lines_in += needed_lines;
+    lines_imported =
+        WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride);
+    assert(lines_imported == needed_lines);
+    num_lines_in += lines_imported;
     num_lines_out += Export(dec->rescaler, colorspace, out_stride, row_out);
   }
   return num_lines_out;
@@ -623,9 +627,12 @@ static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
   while (num_lines_in < mb_h) {
     const int lines_left = mb_h - num_lines_in;
     const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
+    int lines_imported;
     WebPMultARGBRows(in, in_stride, dec->rescaler->src_width, needed_lines, 0);
-    WebPRescalerImport(dec->rescaler, lines_left, in, in_stride);
-    num_lines_in += needed_lines;
+    lines_imported =
+        WebPRescalerImport(dec->rescaler, lines_left, in, in_stride);
+    assert(lines_imported == needed_lines);
+    num_lines_in += lines_imported;
     in += needed_lines * in_stride;
     y_pos += ExportYUVA(dec, y_pos);
   }
@@ -705,27 +712,15 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
   uint32_t* const rows_out = dec->argb_cache_;
 
   // Inverse transforms.
-  // TODO: most transforms only need to operate on the cropped region only.
-  memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
   while (n-- > 0) {
     VP8LTransform* const transform = &dec->transforms_[n];
     VP8LInverseTransform(transform, start_row, end_row, rows_in, rows_out);
     rows_in = rows_out;
   }
-}
-
-// Special method for paletted alpha data.
-static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows,
-                                        const uint8_t* const rows) {
-  const int start_row = dec->last_row_;
-  const int end_row = start_row + num_rows;
-  const uint8_t* rows_in = rows;
-  uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row;
-  VP8LTransform* const transform = &dec->transforms_[0];
-  assert(dec->next_transform_ == 1);
-  assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
-  VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in,
-                                      rows_out);
+  if (rows_in != rows_out) {
+    // No transform called, hence just copy.
+    memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
+  }
 }
 
 // Processes (transforms, scales & color-converts) the rows decoded after the
@@ -734,14 +729,16 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
   const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_;
   const int num_rows = row - dec->last_row_;
 
-  if (num_rows <= 0) return;  // Nothing to be done.
-  ApplyInverseTransforms(dec, num_rows, rows);
-
-  // Emit output.
-  {
+  assert(row <= dec->io_->crop_bottom);
+  // We can't process more than NUM_ARGB_CACHE_ROWS at a time (that's the size
+  // of argb_cache_), but we currently don't need more than that.
+  assert(num_rows <= NUM_ARGB_CACHE_ROWS);
+  if (num_rows > 0) {    // Emit output.
     VP8Io* const io = dec->io_;
     uint8_t* rows_data = (uint8_t*)dec->argb_cache_;
     const int in_stride = io->width * sizeof(uint32_t);  // in unit of RGBA
+
+    ApplyInverseTransforms(dec, num_rows, rows);
     if (!SetCropWindow(io, dec->last_row_, row, &rows_data, in_stride)) {
       // Nothing to output (this time).
     } else {
@@ -786,14 +783,46 @@ static int Is8bOptimizable(const VP8LMetadata* const hdr) {
   return 1;
 }
 
-static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
-  const int num_rows = row - dec->last_row_;
-  const uint8_t* const in =
-      (uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_;
-  if (num_rows > 0) {
-    ApplyInverseTransformsAlpha(dec, num_rows, in);
+static void AlphaApplyFilter(ALPHDecoder* const alph_dec,
+                             int first_row, int last_row,
+                             uint8_t* out, int stride) {
+  if (alph_dec->filter_ != WEBP_FILTER_NONE) {
+    int y;
+    const uint8_t* prev_line = alph_dec->prev_line_;
+    assert(WebPUnfilters[alph_dec->filter_] != NULL);
+    for (y = first_row; y < last_row; ++y) {
+      WebPUnfilters[alph_dec->filter_](prev_line, out, out, stride);
+      prev_line = out;
+      out += stride;
+    }
+    alph_dec->prev_line_ = prev_line;
   }
-  dec->last_row_ = dec->last_out_row_ = row;
+}
+
+static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int last_row) {
+  // For vertical and gradient filtering, we need to decode the part above the
+  // crop_top row, in order to have the correct spatial predictors.
+  ALPHDecoder* const alph_dec = (ALPHDecoder*)dec->io_->opaque;
+  const int top_row =
+      (alph_dec->filter_ == WEBP_FILTER_NONE ||
+       alph_dec->filter_ == WEBP_FILTER_HORIZONTAL) ? dec->io_->crop_top
+                                                    : dec->last_row_;
+  const int first_row = (dec->last_row_ < top_row) ? top_row : dec->last_row_;
+  assert(last_row <= dec->io_->crop_bottom);
+  if (last_row > first_row) {
+    // Special method for paletted alpha data. We only process the cropped area.
+    const int width = dec->io_->width;
+    uint8_t* out = alph_dec->output_ + width * first_row;
+    const uint8_t* const in =
+      (uint8_t*)dec->pixels_ + dec->width_ * first_row;
+    VP8LTransform* const transform = &dec->transforms_[0];
+    assert(dec->next_transform_ == 1);
+    assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
+    VP8LColorIndexInverseTransformAlpha(transform, first_row, last_row,
+                                        in, out);
+    AlphaApplyFilter(alph_dec, first_row, last_row, out, width);
+  }
+  dec->last_row_ = dec->last_out_row_ = last_row;
 }
 
 //------------------------------------------------------------------------------
@@ -922,14 +951,14 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
   int col = dec->last_pixel_ % width;
   VP8LBitReader* const br = &dec->br_;
   VP8LMetadata* const hdr = &dec->hdr_;
-  const HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row);
   int pos = dec->last_pixel_;         // current position
   const int end = width * height;     // End of data
   const int last = width * last_row;  // Last pixel to decode
   const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
   const int mask = hdr->huffman_mask_;
-  assert(htree_group != NULL);
-  assert(pos < end);
+  const HTreeGroup* htree_group =
+      (pos < last) ? GetHtreeGroupForPos(hdr, col, row) : NULL;
+  assert(pos <= end);
   assert(last_row <= height);
   assert(Is8bOptimizable(hdr));
 
@@ -939,6 +968,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
     if ((col & mask) == 0) {
       htree_group = GetHtreeGroupForPos(hdr, col, row);
     }
+    assert(htree_group != NULL);
     VP8LFillBitWindow(br);
     code = ReadSymbol(htree_group->htrees[GREEN], br);
     if (code < NUM_LITERAL_CODES) {  // Literal
@@ -948,7 +978,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
       if (col >= width) {
         col = 0;
         ++row;
-        if (row % NUM_ARGB_CACHE_ROWS == 0) {
+        if (row <= last_row && (row % NUM_ARGB_CACHE_ROWS == 0)) {
           ExtractPalettedAlphaRows(dec, row);
         }
       }
@@ -971,7 +1001,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
       while (col >= width) {
         col -= width;
         ++row;
-        if (row % NUM_ARGB_CACHE_ROWS == 0) {
+        if (row <= last_row && (row % NUM_ARGB_CACHE_ROWS == 0)) {
           ExtractPalettedAlphaRows(dec, row);
         }
       }
@@ -985,7 +1015,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
     assert(br->eos_ == VP8LIsEndOfStream(br));
   }
   // Process the remaining rows corresponding to last row-block.
-  ExtractPalettedAlphaRows(dec, row);
+  ExtractPalettedAlphaRows(dec, row > last_row ? last_row : row);
 
  End:
   if (!ok || (br->eos_ && pos < end)) {
@@ -1025,7 +1055,6 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
   int col = dec->last_pixel_ % width;
   VP8LBitReader* const br = &dec->br_;
   VP8LMetadata* const hdr = &dec->hdr_;
-  HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row);
   uint32_t* src = data + dec->last_pixel_;
   uint32_t* last_cached = src;
   uint32_t* const src_end = data + width * height;     // End of data
@@ -1036,8 +1065,9 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
   VP8LColorCache* const color_cache =
       (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
   const int mask = hdr->huffman_mask_;
-  assert(htree_group != NULL);
-  assert(src < src_end);
+  const HTreeGroup* htree_group =
+      (src < src_last) ? GetHtreeGroupForPos(hdr, col, row) : NULL;
+  assert(dec->last_row_ < last_row);
   assert(src_last <= src_end);
 
   while (src < src_last) {
@@ -1049,7 +1079,10 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
     // Only update when changing tile. Note we could use this test:
     // if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
     // but that's actually slower and needs storing the previous col/row.
-    if ((col & mask) == 0) htree_group = GetHtreeGroupForPos(hdr, col, row);
+    if ((col & mask) == 0) {
+      htree_group = GetHtreeGroupForPos(hdr, col, row);
+    }
+    assert(htree_group != NULL);
     if (htree_group->is_trivial_code) {
       *src = htree_group->literal_arb;
       goto AdvanceByOne;
@@ -1080,8 +1113,10 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
       if (col >= width) {
         col = 0;
         ++row;
-        if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) {
-          process_func(dec, row);
+        if (process_func != NULL) {
+          if (row <= last_row && (row % NUM_ARGB_CACHE_ROWS == 0)) {
+            process_func(dec, row);
+          }
         }
         if (color_cache != NULL) {
           while (last_cached < src) {
@@ -1108,8 +1143,10 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
       while (col >= width) {
         col -= width;
         ++row;
-        if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) {
-          process_func(dec, row);
+        if (process_func != NULL) {
+          if (row <= last_row && (row % NUM_ARGB_CACHE_ROWS == 0)) {
+            process_func(dec, row);
+          }
         }
       }
       // Because of the check done above (before 'src' was incremented by
@@ -1140,7 +1177,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
   } else if (!br->eos_) {
     // Process the remaining rows corresponding to last row-block.
     if (process_func != NULL) {
-      process_func(dec, row);
+      process_func(dec, row > last_row ? last_row : row);
     }
     dec->status_ = VP8_STATUS_OK;
     dec->last_pixel_ = (int)(src - data);  // end-of-scan marker
@@ -1182,8 +1219,9 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
       // Equivalent to AddPixelEq(), on a byte-basis.
       new_data[i] = (data[i] + new_data[i - 4]) & 0xff;
     }
-    for (; i < 4 * final_num_colors; ++i)
+    for (; i < 4 * final_num_colors; ++i) {
       new_data[i] = 0;  // black tail.
+    }
     WebPSafeFree(transform->data_);
     transform->data_ = new_color_map;
   }
@@ -1438,46 +1476,50 @@ static int AllocateInternalBuffers8b(VP8LDecoder* const dec) {
 //------------------------------------------------------------------------------
 
 // Special row-processing that only stores the alpha data.
-static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
-  const int num_rows = row - dec->last_row_;
-  const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_;
-
-  if (num_rows <= 0) return;  // Nothing to be done.
-  ApplyInverseTransforms(dec, num_rows, in);
-
-  // Extract alpha (which is stored in the green plane).
-  {
+static void ExtractAlphaRows(VP8LDecoder* const dec, int last_row) {
+  int cur_row = dec->last_row_;
+  int num_rows = last_row - cur_row;
+  const uint32_t* in = dec->pixels_ + dec->width_ * cur_row;
+
+  assert(last_row <= dec->io_->crop_bottom);
+  while (num_rows > 0) {
+    const int num_rows_to_process =
+        (num_rows > NUM_ARGB_CACHE_ROWS) ? NUM_ARGB_CACHE_ROWS : num_rows;
+    // Extract alpha (which is stored in the green plane).
+    ALPHDecoder* const alph_dec = (ALPHDecoder*)dec->io_->opaque;
+    uint8_t* const output = alph_dec->output_;
     const int width = dec->io_->width;      // the final width (!= dec->width_)
-    const int cache_pixs = width * num_rows;
-    uint8_t* const dst = (uint8_t*)dec->io_->opaque + width * dec->last_row_;
+    const int cache_pixs = width * num_rows_to_process;
+    uint8_t* const dst = output + width * cur_row;
     const uint32_t* const src = dec->argb_cache_;
-    int i;
-    for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
-  }
-  dec->last_row_ = dec->last_out_row_ = row;
+    ApplyInverseTransforms(dec, num_rows_to_process, in);
+    WebPExtractGreen(src, dst, cache_pixs);
+    AlphaApplyFilter(alph_dec,
+                     cur_row, cur_row + num_rows_to_process, dst, width);
+    num_rows -= num_rows_to_process;
+    in += num_rows_to_process * dec->width_;
+    cur_row += num_rows_to_process;
+  }
+  assert(cur_row == last_row);
+  dec->last_row_ = dec->last_out_row_ = last_row;
 }
 
 int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
-                          const uint8_t* const data, size_t data_size,
-                          uint8_t* const output) {
+                          const uint8_t* const data, size_t data_size) {
   int ok = 0;
-  VP8LDecoder* dec;
-  VP8Io* io;
+  VP8LDecoder* dec = VP8LNew();
+
+  if (dec == NULL) return 0;
+
   assert(alph_dec != NULL);
-  alph_dec->vp8l_dec_ = VP8LNew();
-  if (alph_dec->vp8l_dec_ == NULL) return 0;
-  dec = alph_dec->vp8l_dec_;
+  alph_dec->vp8l_dec_ = dec;
 
   dec->width_ = alph_dec->width_;
   dec->height_ = alph_dec->height_;
   dec->io_ = &alph_dec->io_;
-  io = dec->io_;
-
-  VP8InitIo(io);
-  WebPInitCustomIo(NULL, io);  // Just a sanity Init. io won't be used.
-  io->opaque = output;
-  io->width = alph_dec->width_;
-  io->height = alph_dec->height_;
+  dec->io_->opaque = alph_dec;
+  dec->io_->width = alph_dec->width_;
+  dec->io_->height = alph_dec->height_;
 
   dec->status_ = VP8_STATUS_OK;
   VP8LInitBitReader(&dec->br_, data, data_size);
@@ -1492,11 +1534,11 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
   if (dec->next_transform_ == 1 &&
       dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM &&
       Is8bOptimizable(&dec->hdr_)) {
-    alph_dec->use_8b_decode = 1;
+    alph_dec->use_8b_decode_ = 1;
     ok = AllocateInternalBuffers8b(dec);
   } else {
     // Allocate internal buffers (note that dec->width_ may have changed here).
-    alph_dec->use_8b_decode = 0;
+    alph_dec->use_8b_decode_ = 0;
     ok = AllocateInternalBuffers32b(dec, alph_dec->width_);
   }
 
@@ -1515,12 +1557,14 @@ int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) {
   assert(dec != NULL);
   assert(last_row <= dec->height_);
 
-  if (dec->last_pixel_ == dec->width_ * dec->height_) {
+  if (dec->last_row_ >= last_row) {
     return 1;  // done
   }
 
+  if (!alph_dec->use_8b_decode_) WebPInitAlphaProcessing();
+
   // Decode (with special row processing).
-  return alph_dec->use_8b_decode ?
+  return alph_dec->use_8b_decode_ ?
       DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_,
                       last_row) :
       DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
@@ -1611,7 +1655,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
 
   // Decode.
   if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
-                       dec->height_, ProcessRows)) {
+                       io->crop_bottom, ProcessRows)) {
     goto Err;
   }
 
diff --git a/src/3rdparty/libwebp/src/dec/vp8li.h b/src/3rdparty/libwebp/src/dec/vp8li_dec.h
index 8886e47..097a9d0 100644
--- a/src/3rdparty/libwebp/src/dec/vp8li.h
+++ b/src/3rdparty/libwebp/src/dec/vp8li_dec.h
@@ -16,10 +16,10 @@
 #define WEBP_DEC_VP8LI_H_
 
 #include <string.h>     // for memcpy()
-#include "./webpi.h"
-#include "../utils/bit_reader.h"
-#include "../utils/color_cache.h"
-#include "../utils/huffman.h"
+#include "./webpi_dec.h"
+#include "../utils/bit_reader_utils.h"
+#include "../utils/color_cache_utils.h"
+#include "../utils/huffman_utils.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -100,8 +100,7 @@ struct ALPHDecoder;  // Defined in dec/alphai.h.
 // Decodes image header for alpha data stored using lossless compression.
 // Returns false in case of error.
 int VP8LDecodeAlphaHeader(struct ALPHDecoder* const alph_dec,
-                          const uint8_t* const data, size_t data_size,
-                          uint8_t* const output);
+                          const uint8_t* const data, size_t data_size);
 
 // Decodes *at least* 'last_row' rows of alpha. If some of the initial rows are
 // already decoded in previous call(s), it will resume decoding from where it
diff --git a/src/3rdparty/libwebp/src/dec/webp.c b/src/3rdparty/libwebp/src/dec/webp_dec.c
index 952178f..a8e9c2c 100644
--- a/src/3rdparty/libwebp/src/dec/webp.c
+++ b/src/3rdparty/libwebp/src/dec/webp_dec.c
@@ -13,9 +13,9 @@
 
 #include <stdlib.h>
 
-#include "./vp8i.h"
-#include "./vp8li.h"
-#include "./webpi.h"
+#include "./vp8i_dec.h"
+#include "./vp8li_dec.h"
+#include "./webpi_dec.h"
 #include "../utils/utils.h"
 #include "../webp/mux_types.h"  // ALPHA_FLAG
 
@@ -39,8 +39,8 @@
 //   20..23  VP8X flags bit-map corresponding to the chunk-types present.
 //   24..26  Width of the Canvas Image.
 //   27..29  Height of the Canvas Image.
-// There can be extra chunks after the "VP8X" chunk (ICCP, FRGM, ANMF, VP8,
-// VP8L, XMP, EXIF  ...)
+// There can be extra chunks after the "VP8X" chunk (ICCP, ANMF, VP8, VP8L,
+// XMP, EXIF  ...)
 // All sizes are in little-endian order.
 // Note: chunk data size must be padded to multiple of 2 when written.
 
@@ -289,7 +289,6 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
   int found_riff = 0;
   int found_vp8x = 0;
   int animation_present = 0;
-  int fragments_present = 0;
   const int have_all_data = (headers != NULL) ? headers->have_all_data : 0;
 
   VP8StatusCode status;
@@ -318,7 +317,6 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
       return status;  // Wrong VP8X / insufficient data.
     }
     animation_present = !!(flags & ANIMATION_FLAG);
-    fragments_present = !!(flags & FRAGMENTS_FLAG);
     if (!found_riff && found_vp8x) {
       // Note: This restriction may be removed in the future, if it becomes
       // necessary to send VP8X chunk to the decoder.
@@ -330,8 +328,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
 
     image_width = canvas_width;
     image_height = canvas_height;
-    if (found_vp8x && (animation_present || fragments_present) &&
-        headers == NULL) {
+    if (found_vp8x && animation_present && headers == NULL) {
       status = VP8_STATUS_OK;
       goto ReturnWidthHeight;  // Just return features from VP8X header.
     }
@@ -362,7 +359,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
     return VP8_STATUS_BITSTREAM_ERROR;
   }
 
-  if (format != NULL && !(animation_present || fragments_present)) {
+  if (format != NULL && !animation_present) {
     *format = hdrs.is_lossless ? 2 : 1;
   }
 
@@ -415,7 +412,8 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
 }
 
 VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
-  VP8StatusCode status;
+  // status is marked volatile as a workaround for a clang-3.8 (aarch64) bug
+  volatile VP8StatusCode status;
   int has_animation = 0;
   assert(headers != NULL);
   // fill out headers, ignore width/height/has_alpha.
@@ -512,10 +510,12 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
 
   if (status != VP8_STATUS_OK) {
     WebPFreeDecBuffer(params->output);
-  }
-
-  if (params->options != NULL && params->options->flip) {
-    status = WebPFlipBuffer(params->output);
+  } else {
+    if (params->options != NULL && params->options->flip) {
+      // This restores the original stride values if options->flip was used
+      // during the call to WebPAllocateDecBuffer above.
+      status = WebPFlipBuffer(params->output);
+    }
   }
   return status;
 }
@@ -758,9 +758,24 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
   }
 
   WebPResetDecParams(&params);
-  params.output = &config->output;
   params.options = &config->options;
-  status = DecodeInto(data, data_size, &params);
+  params.output = &config->output;
+  if (WebPAvoidSlowMemory(params.output, &config->input)) {
+    // decoding to slow memory: use a temporary in-mem buffer to decode into.
+    WebPDecBuffer in_mem_buffer;
+    WebPInitDecBuffer(&in_mem_buffer);
+    in_mem_buffer.colorspace = config->output.colorspace;
+    in_mem_buffer.width = config->input.width;
+    in_mem_buffer.height = config->input.height;
+    params.output = &in_mem_buffer;
+    status = DecodeInto(data, data_size, &params);
+    if (status == VP8_STATUS_OK) {  // do the slow-copy
+      status = WebPCopyDecBufferPixels(&in_mem_buffer, &config->output);
+    }
+    WebPFreeDecBuffer(&in_mem_buffer);
+  } else {
+    status = DecodeInto(data, data_size, &params);
+  }
 
   return status;
 }
@@ -809,7 +824,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
   }
 
   // Filter
-  io->bypass_filtering = options && options->bypass_filtering;
+  io->bypass_filtering = (options != NULL) && options->bypass_filtering;
 
   // Fancy upsampler
 #ifdef FANCY_UPSAMPLING
@@ -826,4 +841,3 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
 }
 
 //------------------------------------------------------------------------------
-
diff --git a/src/3rdparty/libwebp/src/dec/webpi.h b/src/3rdparty/libwebp/src/dec/webpi_dec.h
index c75a2e4..696abc1 100644
--- a/src/3rdparty/libwebp/src/dec/webpi.h
+++ b/src/3rdparty/libwebp/src/dec/webpi_dec.h
@@ -18,8 +18,8 @@
 extern "C" {
 #endif
 
-#include "../utils/rescaler.h"
-#include "./decode_vp8.h"
+#include "../utils/rescaler_utils.h"
+#include "./vp8_dec.h"
 
 //------------------------------------------------------------------------------
 // WebPDecParams: Decoding output parameters. Transient internal object.
@@ -38,8 +38,8 @@ struct WebPDecParams {
 
   int last_y;                 // coordinate of the line that was last output
   const WebPDecoderOptions* options;  // if not NULL, use alt decoding features
-  // rescalers
-  WebPRescaler scaler_y, scaler_u, scaler_v, scaler_a;
+
+  WebPRescaler* scaler_y, *scaler_u, *scaler_v, *scaler_a;  // rescalers
   void* memory;                  // overall scratch memory for the output work.
 
   OutputFunc emit;               // output RGB or YUV samples
@@ -107,13 +107,23 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
 VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer);
 
 // Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the
-// memory (still held by 'src').
+// memory (still held by 'src'). No pixels are copied.
 void WebPCopyDecBuffer(const WebPDecBuffer* const src,
                        WebPDecBuffer* const dst);
 
 // Copy and transfer ownership from src to dst (beware of parameter order!)
 void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst);
 
+// Copy pixels from 'src' into a *preallocated* 'dst' buffer. Returns
+// VP8_STATUS_INVALID_PARAM if the 'dst' is not set up correctly for the copy.
+VP8StatusCode WebPCopyDecBufferPixels(const WebPDecBuffer* const src,
+                                      WebPDecBuffer* const dst);
+
+// Returns true if decoding will be slow with the current configuration
+// and bitstream features.
+int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
+                        const WebPBitstreamFeatures* const features);
+
 //------------------------------------------------------------------------------
 
 #ifdef __cplusplus