1 files changed, 89 insertions, 135 deletions
diff --git a/src/3rdparty/libwebp/src/enc/vp8enci.h b/src/3rdparty/libwebp/src/enc/vp8enci.h
index 20f58c6..b2cc8d1 100644
--- a/src/3rdparty/libwebp/src/enc/vp8enci.h
+++ b/src/3rdparty/libwebp/src/enc/vp8enci.h
@@ -15,10 +15,16 @@
 #define WEBP_ENC_VP8ENCI_H_
 
 #include <string.h>     // for memcpy()
-#include "../webp/encode.h"
+#include "../dec/common.h"
 #include "../dsp/dsp.h"
 #include "../utils/bit_writer.h"
 #include "../utils/thread.h"
+#include "../utils/utils.h"
+#include "../webp/encode.h"
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+#include "./vp8li.h"
+#endif  // WEBP_EXPERIMENTAL_FEATURES
 
 #ifdef __cplusplus
 extern "C" {
@@ -29,35 +35,10 @@ extern "C" {
 
 // version numbers
 #define ENC_MAJ_VERSION 0
-#define ENC_MIN_VERSION 4
-#define ENC_REV_VERSION 4
-
-// intra prediction modes
-enum { B_DC_PRED = 0,   // 4x4 modes
-       B_TM_PRED = 1,
-       B_VE_PRED = 2,
-       B_HE_PRED = 3,
-       B_RD_PRED = 4,
-       B_VR_PRED = 5,
-       B_LD_PRED = 6,
-       B_VL_PRED = 7,
-       B_HD_PRED = 8,
-       B_HU_PRED = 9,
-       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
-
-       // Luma16 or UV modes
-       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
-       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
-       NUM_PRED_MODES = 4
-     };
+#define ENC_MIN_VERSION 5
+#define ENC_REV_VERSION 0
 
-enum { NUM_MB_SEGMENTS = 4,
-       MAX_NUM_PARTITIONS = 8,
-       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
-       NUM_BANDS = 8,
-       NUM_CTX = 3,
-       NUM_PROBAS = 11,
-       MAX_LF_LEVELS = 64,       // Maximum loop filter level
+enum { MAX_LF_LEVELS = 64,       // Maximum loop filter level
        MAX_VARIABLE_LEVEL = 67,  // last (inclusive) level with variable cost
        MAX_LEVEL = 2047          // max level (note: max codable is 2047 + 67)
      };
@@ -69,66 +50,34 @@ typedef enum {   // Rate-distortion optimization levels
   RD_OPT_TRELLIS_ALL = 3   // trellis-quant for every scoring (much slower)
 } VP8RDLevel;
 
-// YUV-cache parameters. Cache is 16-pixels wide.
-// The original or reconstructed samples can be accessed using VP8Scan[]
+// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
+// The original or reconstructed samples can be accessed using VP8Scan[].
 // The predicted blocks can be accessed using offsets to yuv_p_ and
-// the arrays VP8*ModeOffsets[];
-//         +----+      YUV Samples area. See VP8Scan[] for accessing the blocks.
-//  Y_OFF  |YYYY| <- original samples  ('yuv_in_')
-//         |YYYY|
-//         |YYYY|
-//         |YYYY|
-//  U_OFF  |UUVV| V_OFF  (=U_OFF + 8)
-//         |UUVV|
-//         +----+
-//  Y_OFF  |YYYY| <- compressed/decoded samples  ('yuv_out_')
-//         |YYYY|    There are two buffers like this ('yuv_out_'/'yuv_out2_')
-//         |YYYY|
-//         |YYYY|
-//  U_OFF  |UUVV| V_OFF
-//         |UUVV|
-//          x2 (for yuv_out2_)
-//         +----+     Prediction area ('yuv_p_', size = PRED_SIZE)
-// I16DC16 |YYYY|  Intra16 predictions (16x16 block each)
-//         |YYYY|
-//         |YYYY|
-//         |YYYY|
-// I16TM16 |YYYY|
-//         |YYYY|
-//         |YYYY|
-//         |YYYY|
-// I16VE16 |YYYY|
-//         |YYYY|
-//         |YYYY|
-//         |YYYY|
-// I16HE16 |YYYY|
-//         |YYYY|
-//         |YYYY|
-//         |YYYY|
-//         +----+  Chroma U/V predictions (16x8 block each)
-// C8DC8   |UUVV|
-//         |UUVV|
-// C8TM8   |UUVV|
-//         |UUVV|
-// C8VE8   |UUVV|
-//         |UUVV|
-// C8HE8   |UUVV|
-//         |UUVV|
-//         +----+  Intra 4x4 predictions (4x4 block each)
-//         |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
-//         |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
-//         |YY..| I4HD4 I4HU4 I4TMP
-//         +----+
-#define BPS       16   // this is the common stride
-#define Y_SIZE   (BPS * 16)
-#define UV_SIZE  (BPS * 8)
-#define YUV_SIZE (Y_SIZE + UV_SIZE)
-#define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
-#define Y_OFF    (0)
-#define U_OFF    (Y_SIZE)
-#define V_OFF    (U_OFF + 8)
-#define ALIGN_CST 15
-#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
+// the arrays VP8*ModeOffsets[].
+// * YUV Samples area (yuv_in_/yuv_out_/yuv_out2_)
+//   (see VP8Scan[] for accessing the blocks, along with
+//   Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC):
+//             +----+----+
+//  Y_OFF_ENC  |YYYY|UUVV|
+//  U_OFF_ENC  |YYYY|UUVV|
+//  V_OFF_ENC  |YYYY|....| <- 25% wasted U/V area
+//             |YYYY|....|
+//             +----+----+
+// * Prediction area ('yuv_p_', size = PRED_SIZE_ENC)
+//   Intra16 predictions (16x16 block each, two per row):
+//         |I16DC16|I16TM16|
+//         |I16VE16|I16HE16|
+//   Chroma U/V predictions (16x8 block each, two per row):
+//         |C8DC8|C8TM8|
+//         |C8VE8|C8HE8|
+//   Intra 4x4 predictions (4x4 block each)
+//         |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4|
+//         |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted
+#define YUV_SIZE_ENC (BPS * 16)
+#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS)   // I16+Chroma+I4 preds
+#define Y_OFF_ENC    (0)
+#define U_OFF_ENC    (16)
+#define V_OFF_ENC    (16 + 8)
 
 extern const int VP8Scan[16];           // in quant.c
 extern const int VP8UVModeOffsets[4];   // in analyze.c
@@ -138,26 +87,26 @@ extern const int VP8I4ModeOffsets[NUM_BMODES];
 // Layout of prediction blocks
 // intra 16x16
 #define I16DC16 (0 * 16 * BPS)
-#define I16TM16 (1 * 16 * BPS)
-#define I16VE16 (2 * 16 * BPS)
-#define I16HE16 (3 * 16 * BPS)
+#define I16TM16 (I16DC16 + 16)
+#define I16VE16 (1 * 16 * BPS)
+#define I16HE16 (I16VE16 + 16)
 // chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
-#define C8DC8 (4 * 16 * BPS)
-#define C8TM8 (4 * 16 * BPS + 8 * BPS)
-#define C8VE8 (5 * 16 * BPS)
-#define C8HE8 (5 * 16 * BPS + 8 * BPS)
+#define C8DC8 (2 * 16 * BPS)
+#define C8TM8 (C8DC8 + 1 * 16)
+#define C8VE8 (2 * 16 * BPS + 8 * BPS)
+#define C8HE8 (C8VE8 + 1 * 16)
 // intra 4x4
-#define I4DC4 (6 * 16 * BPS +  0)
-#define I4TM4 (6 * 16 * BPS +  4)
-#define I4VE4 (6 * 16 * BPS +  8)
-#define I4HE4 (6 * 16 * BPS + 12)
-#define I4RD4 (6 * 16 * BPS + 4 * BPS +  0)
-#define I4VR4 (6 * 16 * BPS + 4 * BPS +  4)
-#define I4LD4 (6 * 16 * BPS + 4 * BPS +  8)
-#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
-#define I4HD4 (6 * 16 * BPS + 8 * BPS +  0)
-#define I4HU4 (6 * 16 * BPS + 8 * BPS +  4)
-#define I4TMP (6 * 16 * BPS + 8 * BPS +  8)
+#define I4DC4 (3 * 16 * BPS +  0)
+#define I4TM4 (I4DC4 +  4)
+#define I4VE4 (I4DC4 +  8)
+#define I4HE4 (I4DC4 + 12)
+#define I4RD4 (I4DC4 + 16)
+#define I4VR4 (I4DC4 + 20)
+#define I4LD4 (I4DC4 + 24)
+#define I4VL4 (I4DC4 + 28)
+#define I4HD4 (3 * 16 * BPS + 4 * BPS)
+#define I4HU4 (I4HD4 + 4)
+#define I4TMP (I4HD4 + 8)
 
 typedef int64_t score_t;     // type used for scores, rate, distortion
 // Note that MAX_COST is not the maximum allowed by sizeof(score_t),
@@ -172,14 +121,6 @@ static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
   return (int)((n * iQ + B) >> QFIX);
 }
 
-// size of histogram used by CollectHistogram.
-#define MAX_COEFF_THRESH   31
-typedef struct VP8Histogram VP8Histogram;
-struct VP8Histogram {
-  // TODO(skal): we only need to store the max_value and last_non_zero actually.
-  int distribution[MAX_COEFF_THRESH + 1];
-};
-
 // Uncomment the following to remove token-buffer code:
 // #define DISABLE_TOKEN_BUFFER
 
@@ -190,6 +131,8 @@ typedef uint32_t proba_t;   // 16b + 16b
 typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
 typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
 typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
+typedef const uint16_t* (*CostArrayPtr)[NUM_CTX];   // for easy casting
+typedef const uint16_t* CostArrayMap[16][NUM_CTX];
 typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS];  // filter stats
 
 typedef struct VP8Encoder VP8Encoder;
@@ -200,7 +143,7 @@ typedef struct {
   int update_map_;        // whether to update the segment map or not.
                           // must be 0 if there's only 1 segment.
   int size_;              // bit-cost for transmitting the segment map
-} VP8SegmentHeader;
+} VP8EncSegmentHeader;
 
 // Struct collecting all frame-persistent probabilities.
 typedef struct {
@@ -209,10 +152,11 @@ typedef struct {
   ProbaArray coeffs_[NUM_TYPES][NUM_BANDS];      // 1056 bytes
   StatsArray stats_[NUM_TYPES][NUM_BANDS];       // 4224 bytes
   CostArray level_cost_[NUM_TYPES][NUM_BANDS];   // 13056 bytes
+  CostArrayMap remapped_costs_[NUM_TYPES];       // 1536 bytes
   int dirty_;               // if true, need to call VP8CalculateLevelCosts()
   int use_skip_proba_;      // Note: we always use skip_proba for now.
   int nb_skip_;             // number of skipped blocks
-} VP8Proba;
+} VP8EncProba;
 
 // Filter parameters. Not actually used in the code (we don't perform
 // the in-loop filtering), but filled from user's config
@@ -221,7 +165,7 @@ typedef struct {
   int level_;              // base filter level [0..63]
   int sharpness_;          // [0..7]
   int i4x4_lf_delta_;      // delta filter level for i4x4 relative to i16x16
-} VP8FilterHeader;
+} VP8EncFilterHeader;
 
 //------------------------------------------------------------------------------
 // Informations about the macroblocks.
@@ -307,9 +251,10 @@ typedef struct {
   uint8_t* y_top_;     // top luma samples at position 'x_'
   uint8_t* uv_top_;    // top u/v samples at position 'x_', packed as 16 bytes
 
-  // memory for storing y/u/v_left_ and yuv_in_/out_*
-  uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + ALIGN_CST];     // memory for *_left_
-  uint8_t yuv_mem_[3 * YUV_SIZE + PRED_SIZE + ALIGN_CST];  // memory for yuv_*
+  // memory for storing y/u/v_left_
+  uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + WEBP_ALIGN_CST];
+  // memory for yuv_*
+  uint8_t yuv_mem_[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST];
 } VP8EncIterator;
 
   // in iterator.c
@@ -381,7 +326,8 @@ int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
                   const uint8_t* const probas, int final_pass);
 
 // record the coding of coefficients without knowing the probabilities yet
-int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
+int VP8RecordCoeffTokens(const int ctx, const int coeff_type,
+                         int first, int last,
                          const int16_t* const coeffs,
                          VP8TBuffer* const tokens);
 
@@ -401,8 +347,8 @@ struct VP8Encoder {
   WebPPicture* pic_;            // input / output picture
 
   // headers
-  VP8FilterHeader   filter_hdr_;     // filtering information
-  VP8SegmentHeader  segment_hdr_;    // segment information
+  VP8EncFilterHeader   filter_hdr_;     // filtering information
+  VP8EncSegmentHeader  segment_hdr_;    // segment information
 
   int profile_;                      // VP8's profile, deduced from Config.
 
@@ -438,12 +384,12 @@ struct VP8Encoder {
   int dq_uv_dc_, dq_uv_ac_;
 
   // probabilities and statistics
-  VP8Proba proba_;
-  uint64_t sse_[4];        // sum of Y/U/V/A squared errors for all macroblocks
-  uint64_t sse_count_;     // pixel count for the sse_[] stats
-  int      coded_size_;
-  int      residual_bytes_[3][4];
-  int      block_count_[3];
+  VP8EncProba proba_;
+  uint64_t    sse_[4];      // sum of Y/U/V/A squared errors for all macroblocks
+  uint64_t    sse_count_;   // pixel count for the sse_[] stats
+  int         coded_size_;
+  int         residual_bytes_[3][4];
+  int         block_count_[3];
 
   // quality/speed settings
   int method_;               // 0=fastest, 6=best/slowest.
@@ -473,7 +419,7 @@ extern const uint8_t
 // Reset the token probabilities to their initial (default) values
 void VP8DefaultProbas(VP8Encoder* const enc);
 // Write the token probabilities
-void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas);
 // Writes the partition #0 modes (that is: all intra modes)
 void VP8CodeIntraModes(VP8Encoder* const enc);
 
@@ -486,7 +432,6 @@ int VP8EncWrite(VP8Encoder* const enc);
 void VP8EncFreeBitWriters(VP8Encoder* const enc);
 
   // in frame.c
-extern const uint8_t VP8EncBands[16 + 1];
 extern const uint8_t VP8Cat3[];
 extern const uint8_t VP8Cat4[];
 extern const uint8_t VP8Cat5[];
@@ -569,12 +514,21 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
 // Returns false in case of error (invalid param, out-of-memory).
 int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
 
+// Clean-up the RGB samples under fully transparent area, to help lossless
+// compressibility (no guarantee, though). Assumes that pic->use_argb is true.
+void WebPCleanupTransparentAreaLossless(WebPPicture* const pic);
+
+  // in near_lossless.c
+// Near lossless preprocessing in RGB color-space.
+int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality);
+// Near lossless adjustment for predictors.
+void VP8ApplyNearLosslessPredict(int xsize, int ysize, int pred_bits,
+                                 const uint32_t* argb_orig,
+                                 uint32_t* argb, uint32_t* argb_scratch,
+                                 const uint32_t* const transform_data,
+                                 int quality, int subtract_green);
 //------------------------------------------------------------------------------
 
-#if WEBP_ENCODER_ABI_VERSION <= 0x0203
-void WebPMemoryWriterClear(WebPMemoryWriter* writer);
-#endif
-
 #ifdef __cplusplus
 }    // extern "C"
 #endif