summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/enc
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/libwebp/src/enc')
-rw-r--r--src/3rdparty/libwebp/src/enc/alpha.c61
-rw-r--r--src/3rdparty/libwebp/src/enc/analysis.c51
-rw-r--r--src/3rdparty/libwebp/src/enc/backward_references.c898
-rw-r--r--src/3rdparty/libwebp/src/enc/backward_references.h22
-rw-r--r--src/3rdparty/libwebp/src/enc/config.c19
-rw-r--r--src/3rdparty/libwebp/src/enc/cost.c399
-rw-r--r--src/3rdparty/libwebp/src/enc/cost.h29
-rw-r--r--src/3rdparty/libwebp/src/enc/delta_palettization.c455
-rw-r--r--src/3rdparty/libwebp/src/enc/delta_palettization.h25
-rw-r--r--src/3rdparty/libwebp/src/enc/filter.c19
-rw-r--r--src/3rdparty/libwebp/src/enc/frame.c30
-rw-r--r--src/3rdparty/libwebp/src/enc/histogram.c564
-rw-r--r--src/3rdparty/libwebp/src/enc/histogram.h37
-rw-r--r--src/3rdparty/libwebp/src/enc/iterator.c26
-rw-r--r--src/3rdparty/libwebp/src/enc/near_lossless.c160
-rw-r--r--src/3rdparty/libwebp/src/enc/picture.c1
-rw-r--r--src/3rdparty/libwebp/src/enc/picture_csp.c350
-rw-r--r--src/3rdparty/libwebp/src/enc/picture_psnr.c149
-rw-r--r--src/3rdparty/libwebp/src/enc/picture_rescale.c65
-rw-r--r--src/3rdparty/libwebp/src/enc/picture_tools.c20
-rw-r--r--src/3rdparty/libwebp/src/enc/quant.c277
-rw-r--r--src/3rdparty/libwebp/src/enc/syntax.c40
-rw-r--r--src/3rdparty/libwebp/src/enc/token.c65
-rw-r--r--src/3rdparty/libwebp/src/enc/tree.c8
-rw-r--r--src/3rdparty/libwebp/src/enc/vp8enci.h224
-rw-r--r--src/3rdparty/libwebp/src/enc/vp8l.c986
-rw-r--r--src/3rdparty/libwebp/src/enc/webpenc.c71
27 files changed, 3040 insertions, 2011 deletions
diff --git a/src/3rdparty/libwebp/src/enc/alpha.c b/src/3rdparty/libwebp/src/enc/alpha.c
index 79cb94d..3c970b0 100644
--- a/src/3rdparty/libwebp/src/enc/alpha.c
+++ b/src/3rdparty/libwebp/src/enc/alpha.c
@@ -15,6 +15,7 @@
#include <stdlib.h>
#include "./vp8enci.h"
+#include "../dsp/dsp.h"
#include "../utils/filters.h"
#include "../utils/quant_levels.h"
#include "../utils/utils.h"
@@ -61,21 +62,16 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
if (!WebPPictureAlloc(&picture)) return 0;
// Transfer the alpha values to the green channel.
- {
- int i, j;
- uint32_t* dst = picture.argb;
- const uint8_t* src = data;
- for (j = 0; j < picture.height; ++j) {
- for (i = 0; i < picture.width; ++i) {
- dst[i] = src[i] << 8; // we leave A/R/B channels zero'd.
- }
- src += width;
- dst += picture.argb_stride;
- }
- }
+ WebPDispatchAlphaToGreen(data, width, picture.width, picture.height,
+ picture.argb, picture.argb_stride);
WebPConfigInit(&config);
config.lossless = 1;
+ // Enable exact, or it would alter RGB values of transparent alpha, which is
+ // normally OK but not here since we are not encoding the input image but an
+ // internal encoding-related image containing necessary exact information in
+ // RGB channels.
+ config.exact = 1;
config.method = effort_level; // impact is very small
// Set a low default quality for encoding alpha. Ensure that Alpha quality at
// lower methods (3 and below) is less than the threshold for triggering
@@ -87,11 +83,10 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
WebPPictureFree(&picture);
ok = ok && !bw->error_;
if (!ok) {
- VP8LBitWriterDestroy(bw);
+ VP8LBitWriterWipeOut(bw);
return 0;
}
return 1;
-
}
// -----------------------------------------------------------------------------
@@ -143,10 +138,10 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
if (output_size > data_size) {
// compressed size is larger than source! Revert to uncompressed mode.
method = ALPHA_NO_COMPRESSION;
- VP8LBitWriterDestroy(&tmp_bw);
+ VP8LBitWriterWipeOut(&tmp_bw);
}
} else {
- VP8LBitWriterDestroy(&tmp_bw);
+ VP8LBitWriterWipeOut(&tmp_bw);
return 0;
}
}
@@ -166,7 +161,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
ok = ok && VP8BitWriterAppend(&result->bw, output, output_size);
if (method != ALPHA_NO_COMPRESSION) {
- VP8LBitWriterDestroy(&tmp_bw);
+ VP8LBitWriterWipeOut(&tmp_bw);
}
ok = ok && !result->bw.error_;
result->score = VP8BitWriterSize(&result->bw);
@@ -175,16 +170,6 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
// -----------------------------------------------------------------------------
-// TODO(skal): move to dsp/ ?
-static void CopyPlane(const uint8_t* src, int src_stride,
- uint8_t* dst, int dst_stride, int width, int height) {
- while (height-- > 0) {
- memcpy(dst, src, width);
- src += src_stride;
- dst += dst_stride;
- }
-}
-
static int GetNumColors(const uint8_t* data, int width, int height,
int stride) {
int j;
@@ -218,8 +203,9 @@ static uint32_t GetFilterMap(const uint8_t* alpha, int width, int height,
const int kMaxColorsForFilterNone = 192;
const int num_colors = GetNumColors(alpha, width, height, width);
// For low number of colors, NONE yields better compression.
- filter = (num_colors <= kMinColorsForFilterNone) ? WEBP_FILTER_NONE :
- EstimateBestFilter(alpha, width, height, width);
+ filter = (num_colors <= kMinColorsForFilterNone)
+ ? WEBP_FILTER_NONE
+ : WebPEstimateBestFilter(alpha, width, height, width);
bit_map |= 1 << filter;
// For large number of colors, try FILTER_NONE in addition to the best
// filter as well.
@@ -250,6 +236,7 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height,
uint32_t try_map =
GetFilterMap(alpha, width, height, filter, effort_level);
InitFilterTrial(&best);
+
if (try_map != FILTER_TRY_NONE) {
uint8_t* filtered_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size);
if (filtered_alpha == NULL) return 0;
@@ -274,7 +261,16 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height,
reduce_levels, effort_level, NULL, &best);
}
if (ok) {
- if (stats != NULL) *stats = best.stats;
+ if (stats != NULL) {
+ stats->lossless_features = best.stats.lossless_features;
+ stats->histogram_bits = best.stats.histogram_bits;
+ stats->transform_bits = best.stats.transform_bits;
+ stats->cache_bits = best.stats.cache_bits;
+ stats->palette_size = best.stats.palette_size;
+ stats->lossless_size = best.stats.lossless_size;
+ stats->lossless_hdr_size = best.stats.lossless_hdr_size;
+ stats->lossless_data_size = best.stats.lossless_data_size;
+ }
*output_size = VP8BitWriterSize(&best.bw);
*output = VP8BitWriterBuf(&best.bw);
} else {
@@ -324,7 +320,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
}
// Extract alpha data (width x height) from raw_data (stride x height).
- CopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height);
+ WebPCopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height);
if (reduce_levels) { // No Quantization required for 'quality = 100'.
// 16 alpha levels gives quite a low MSE w.r.t original alpha plane hence
@@ -336,6 +332,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
}
if (ok) {
+ VP8FiltersInit();
ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method,
filter, reduce_levels, effort_level, output,
output_size, pic->stats);
@@ -376,6 +373,7 @@ static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
}
void VP8EncInitAlpha(VP8Encoder* const enc) {
+ WebPInitAlphaProcessing();
enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0;
@@ -430,4 +428,3 @@ int VP8EncDeleteAlpha(VP8Encoder* const enc) {
enc->has_alpha_ = 0;
return ok;
}
-
diff --git a/src/3rdparty/libwebp/src/enc/analysis.c b/src/3rdparty/libwebp/src/enc/analysis.c
index e019465..b55128f 100644
--- a/src/3rdparty/libwebp/src/enc/analysis.c
+++ b/src/3rdparty/libwebp/src/enc/analysis.c
@@ -111,28 +111,28 @@ static int FinalAlphaValue(int alpha) {
}
static int GetAlpha(const VP8Histogram* const histo) {
- int max_value = 0, last_non_zero = 1;
- int k;
- int alpha;
- for (k = 0; k <= MAX_COEFF_THRESH; ++k) {
- const int value = histo->distribution[k];
- if (value > 0) {
- if (value > max_value) max_value = value;
- last_non_zero = k;
- }
- }
// 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
// values which happen to be mostly noise. This leaves the maximum precision
// for handling the useful small values which contribute most.
- alpha = (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
+ const int max_value = histo->max_value;
+ const int last_non_zero = histo->last_non_zero;
+ const int alpha =
+ (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
return alpha;
}
+static void InitHistogram(VP8Histogram* const histo) {
+ histo->max_value = 0;
+ histo->last_non_zero = 1;
+}
+
static void MergeHistograms(const VP8Histogram* const in,
VP8Histogram* const out) {
- int i;
- for (i = 0; i <= MAX_COEFF_THRESH; ++i) {
- out->distribution[i] += in->distribution[i];
+ if (in->max_value > out->max_value) {
+ out->max_value = in->max_value;
+ }
+ if (in->last_non_zero > out->last_non_zero) {
+ out->last_non_zero = in->last_non_zero;
}
}
@@ -245,10 +245,11 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
VP8MakeLuma16Preds(it);
for (mode = 0; mode < max_mode; ++mode) {
- VP8Histogram histo = { { 0 } };
+ VP8Histogram histo;
int alpha;
- VP8CollectHistogram(it->yuv_in_ + Y_OFF,
+ InitHistogram(&histo);
+ VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC,
it->yuv_p_ + VP8I16ModeOffsets[mode],
0, 16, &histo);
alpha = GetAlpha(&histo);
@@ -266,21 +267,22 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
uint8_t modes[16];
const int max_mode = MAX_INTRA4_MODE;
int i4_alpha;
- VP8Histogram total_histo = { { 0 } };
+ VP8Histogram total_histo;
int cur_histo = 0;
+ InitHistogram(&total_histo);
VP8IteratorStartI4(it);
do {
int mode;
int best_mode_alpha = DEFAULT_ALPHA;
VP8Histogram histos[2];
- const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
VP8MakeIntra4Preds(it);
for (mode = 0; mode < max_mode; ++mode) {
int alpha;
- memset(&histos[cur_histo], 0, sizeof(histos[cur_histo]));
+ InitHistogram(&histos[cur_histo]);
VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode],
0, 1, &histos[cur_histo]);
alpha = GetAlpha(&histos[cur_histo]);
@@ -293,7 +295,7 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
// accumulate best histogram
MergeHistograms(&histos[cur_histo ^ 1], &total_histo);
// Note: we reuse the original samples for predictors
- } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
+ } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC));
i4_alpha = GetAlpha(&total_histo);
if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) {
@@ -311,9 +313,10 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
VP8MakeChroma8Preds(it);
for (mode = 0; mode < max_mode; ++mode) {
- VP8Histogram histo = { { 0 } };
+ VP8Histogram histo;
int alpha;
- VP8CollectHistogram(it->yuv_in_ + U_OFF,
+ InitHistogram(&histo);
+ VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC,
it->yuv_p_ + VP8UVModeOffsets[mode],
16, 16 + 4 + 4, &histo);
alpha = GetAlpha(&histo);
@@ -402,8 +405,8 @@ typedef struct {
static int DoSegmentsJob(SegmentJob* const job, VP8EncIterator* const it) {
int ok = 1;
if (!VP8IteratorIsDone(it)) {
- uint8_t tmp[32 + ALIGN_CST];
- uint8_t* const scratch = (uint8_t*)DO_ALIGN(tmp);
+ uint8_t tmp[32 + WEBP_ALIGN_CST];
+ uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp);
do {
// Let's pretend we have perfect lossless reconstruction.
VP8IteratorImport(it, scratch);
diff --git a/src/3rdparty/libwebp/src/enc/backward_references.c b/src/3rdparty/libwebp/src/enc/backward_references.c
index a3c30aa..c39437d 100644
--- a/src/3rdparty/libwebp/src/enc/backward_references.c
+++ b/src/3rdparty/libwebp/src/enc/backward_references.c
@@ -16,13 +16,12 @@
#include "./backward_references.h"
#include "./histogram.h"
#include "../dsp/lossless.h"
+#include "../dsp/dsp.h"
#include "../utils/color_cache.h"
#include "../utils/utils.h"
#define VALUES_IN_BYTE 256
-#define HASH_MULTIPLIER (0xc6a4a7935bd1e995ULL)
-
#define MIN_BLOCK_SIZE 256 // minimum block size for backward references
#define MAX_ENTROPY (1e30f)
@@ -58,10 +57,28 @@ static int DistanceToPlaneCode(int xsize, int dist) {
return dist + 120;
}
+// Returns the exact index where array1 and array2 are different if this
+// index is strictly superior to best_len_match. Otherwise, it returns 0.
+// If no two elements are the same, it returns max_limit.
static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
const uint32_t* const array2,
- const int max_limit) {
- int match_len = 0;
+ int best_len_match,
+ int max_limit) {
+ int match_len;
+
+ // Before 'expensive' linear match, check if the two arrays match at the
+ // current best length index.
+ if (array1[best_len_match] != array2[best_len_match]) return 0;
+
+#if defined(WEBP_USE_SSE2)
+ // Check if anything is different up to best_len_match excluded.
+ // memcmp seems to be slower on ARM so it is disabled for now.
+ if (memcmp(array1, array2, best_len_match * sizeof(*array1))) return 0;
+ match_len = best_len_match + 1;
+#else
+ match_len = 0;
+#endif
+
while (match_len < max_limit && array1[match_len] == array2[match_len]) {
++match_len;
}
@@ -178,15 +195,12 @@ int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
// Hash chains
// initialize as empty
-static void HashChainInit(VP8LHashChain* const p) {
- int i;
+static void HashChainReset(VP8LHashChain* const p) {
assert(p != NULL);
- for (i = 0; i < p->size_; ++i) {
- p->chain_[i] = -1;
- }
- for (i = 0; i < HASH_SIZE; ++i) {
- p->hash_to_first_index_[i] = -1;
- }
+ // Set the int32_t arrays to -1.
+ memset(p->chain_, 0xff, p->size_ * sizeof(*p->chain_));
+ memset(p->hash_to_first_index_, 0xff,
+ HASH_SIZE * sizeof(*p->hash_to_first_index_));
}
int VP8LHashChainInit(VP8LHashChain* const p, int size) {
@@ -196,7 +210,7 @@ int VP8LHashChainInit(VP8LHashChain* const p, int size) {
p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_));
if (p->chain_ == NULL) return 0;
p->size_ = size;
- HashChainInit(p);
+ HashChainReset(p);
return 1;
}
@@ -209,209 +223,212 @@ void VP8LHashChainClear(VP8LHashChain* const p) {
// -----------------------------------------------------------------------------
-static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) {
- uint64_t key = ((uint64_t)argb[1] << 32) | argb[0];
- key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS);
+#define HASH_MULTIPLIER_HI (0xc6a4a793U)
+#define HASH_MULTIPLIER_LO (0x5bd1e996U)
+
+static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) {
+ uint32_t key;
+ key = argb[1] * HASH_MULTIPLIER_HI;
+ key += argb[0] * HASH_MULTIPLIER_LO;
+ key = key >> (32 - HASH_BITS);
return key;
}
// Insertion of two pixels at a time.
static void HashChainInsert(VP8LHashChain* const p,
const uint32_t* const argb, int pos) {
- const uint64_t hash_code = GetPixPairHash64(argb);
+ const uint32_t hash_code = GetPixPairHash64(argb);
p->chain_[pos] = p->hash_to_first_index_[hash_code];
p->hash_to_first_index_[hash_code] = pos;
}
-static void GetParamsForHashChainFindCopy(int quality, int xsize,
- int cache_bits, int* window_size,
- int* iter_pos, int* iter_limit) {
- const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
- const int iter_neg = -iter_mult * (quality >> 1);
- // Limit the backward-ref window size for lower qualities.
- const int max_window_size = (quality > 50) ? WINDOW_SIZE
- : (quality > 25) ? (xsize << 8)
+// Returns the maximum number of hash chain lookups to do for a
+// given compression quality. Return value in range [6, 86].
+static int GetMaxItersForQuality(int quality, int low_effort) {
+ return (low_effort ? 6 : 8) + (quality * quality) / 128;
+}
+
+static int GetWindowSizeForHashChain(int quality, int xsize) {
+ const int max_window_size = (quality > 75) ? WINDOW_SIZE
+ : (quality > 50) ? (xsize << 8)
+ : (quality > 25) ? (xsize << 6)
: (xsize << 4);
assert(xsize > 0);
- *window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE
- : max_window_size;
- *iter_pos = 8 + (quality >> 3);
- // For lower entropy images, the rigorous search loop in HashChainFindCopy
- // can be relaxed.
- *iter_limit = (cache_bits > 0) ? iter_neg : iter_neg / 2;
+ return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size;
+}
+
+static WEBP_INLINE int MaxFindCopyLength(int len) {
+ return (len < MAX_LENGTH) ? len : MAX_LENGTH;
+}
+
+static void HashChainFindOffset(const VP8LHashChain* const p, int base_position,
+ const uint32_t* const argb, int len,
+ int window_size, int* const distance_ptr) {
+ const uint32_t* const argb_start = argb + base_position;
+ const int min_pos =
+ (base_position > window_size) ? base_position - window_size : 0;
+ int pos;
+ assert(len <= MAX_LENGTH);
+ for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
+ pos >= min_pos;
+ pos = p->chain_[pos]) {
+ const int curr_length =
+ FindMatchLength(argb + pos, argb_start, len - 1, len);
+ if (curr_length == len) break;
+ }
+ *distance_ptr = base_position - pos;
}
static int HashChainFindCopy(const VP8LHashChain* const p,
- int base_position, int xsize_signed,
+ int base_position,
const uint32_t* const argb, int max_len,
- int window_size, int iter_pos, int iter_limit,
+ int window_size, int iter_max,
int* const distance_ptr,
int* const length_ptr) {
const uint32_t* const argb_start = argb + base_position;
- uint64_t best_val = 0;
- uint32_t best_length = 1;
- uint32_t best_distance = 0;
- const uint32_t xsize = (uint32_t)xsize_signed;
+ int iter = iter_max;
+ int best_length = 0;
+ int best_distance = 0;
const int min_pos =
(base_position > window_size) ? base_position - window_size : 0;
int pos;
- assert(xsize > 0);
- if (max_len > MAX_LENGTH) {
- max_len = MAX_LENGTH;
+ int length_max = 256;
+ if (max_len < length_max) {
+ length_max = max_len;
}
for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
pos >= min_pos;
pos = p->chain_[pos]) {
- uint64_t val;
- uint32_t curr_length;
- uint32_t distance;
- const uint32_t* const ptr1 = (argb + pos + best_length - 1);
- const uint32_t* const ptr2 = (argb_start + best_length - 1);
-
- if (iter_pos < 0) {
- if (iter_pos < iter_limit || best_val >= 0xff0000) {
- break;
- }
+ int curr_length;
+ int distance;
+ if (--iter < 0) {
+ break;
}
- --iter_pos;
-
- // Before 'expensive' linear match, check if the two arrays match at the
- // current best length index and also for the succeeding elements.
- if (ptr1[0] != ptr2[0] || ptr1[1] != ptr2[1]) continue;
-
- curr_length = FindMatchLength(argb + pos, argb_start, max_len);
- if (curr_length < best_length) continue;
-
- distance = (uint32_t)(base_position - pos);
- val = curr_length << 16;
- // Favoring 2d locality here gives savings for certain images.
- if (distance < 9 * xsize) {
- const uint32_t y = distance / xsize;
- uint32_t x = distance % xsize;
- if (x > (xsize >> 1)) {
- x = xsize - x;
- }
- if (x <= 7) {
- val += 9 * 9 + 9 * 9;
- val -= y * y + x * x;
- }
- }
- if (best_val < val) {
- best_val = val;
+
+ curr_length = FindMatchLength(argb + pos, argb_start, best_length, max_len);
+ if (best_length < curr_length) {
+ distance = base_position - pos;
best_length = curr_length;
best_distance = distance;
- if (curr_length >= (uint32_t)max_len) {
- break;
- }
- if ((best_distance == 1 || distance == xsize) &&
- best_length >= 128) {
+ if (curr_length >= length_max) {
break;
}
}
}
- *distance_ptr = (int)best_distance;
+ *distance_ptr = best_distance;
*length_ptr = best_length;
return (best_length >= MIN_LENGTH);
}
-static WEBP_INLINE void PushBackCopy(VP8LBackwardRefs* const refs, int length) {
- while (length >= MAX_LENGTH) {
- BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, MAX_LENGTH));
- length -= MAX_LENGTH;
- }
- if (length > 0) {
- BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, length));
+static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
+ VP8LColorCache* const hashers,
+ VP8LBackwardRefs* const refs) {
+ PixOrCopy v;
+ if (use_color_cache) {
+ const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel);
+ if (VP8LColorCacheLookup(hashers, key) == pixel) {
+ v = PixOrCopyCreateCacheIdx(key);
+ } else {
+ v = PixOrCopyCreateLiteral(pixel);
+ VP8LColorCacheSet(hashers, key, pixel);
+ }
+ } else {
+ v = PixOrCopyCreateLiteral(pixel);
}
+ BackwardRefsCursorAdd(refs, v);
}
static int BackwardReferencesRle(int xsize, int ysize,
const uint32_t* const argb,
- VP8LBackwardRefs* const refs) {
+ int cache_bits, VP8LBackwardRefs* const refs) {
const int pix_count = xsize * ysize;
- int match_len = 0;
- int i;
+ int i, k;
+ const int use_color_cache = (cache_bits > 0);
+ VP8LColorCache hashers;
+
+ if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) {
+ return 0;
+ }
ClearBackwardRefs(refs);
- PushBackCopy(refs, match_len); // i=0 case
- BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[0]));
- for (i = 1; i < pix_count; ++i) {
- if (argb[i] == argb[i - 1]) {
- ++match_len;
+ // Add first pixel as literal.
+ AddSingleLiteral(argb[0], use_color_cache, &hashers, refs);
+ i = 1;
+ while (i < pix_count) {
+ const int max_len = MaxFindCopyLength(pix_count - i);
+ const int kMinLength = 4;
+ const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len);
+ const int prev_row_len = (i < xsize) ? 0 :
+ FindMatchLength(argb + i, argb + i - xsize, 0, max_len);
+ if (rle_len >= prev_row_len && rle_len >= kMinLength) {
+ BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len));
+ // We don't need to update the color cache here since it is always the
+ // same pixel being copied, and that does not change the color cache
+ // state.
+ i += rle_len;
+ } else if (prev_row_len >= kMinLength) {
+ BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len));
+ if (use_color_cache) {
+ for (k = 0; k < prev_row_len; ++k) {
+ VP8LColorCacheInsert(&hashers, argb[i + k]);
+ }
+ }
+ i += prev_row_len;
} else {
- PushBackCopy(refs, match_len);
- match_len = 0;
- BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[i]));
+ AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+ i++;
}
}
- PushBackCopy(refs, match_len);
+ if (use_color_cache) VP8LColorCacheClear(&hashers);
return !refs->error_;
}
-static int BackwardReferencesHashChain(int xsize, int ysize,
- const uint32_t* const argb,
- int cache_bits, int quality,
- VP8LHashChain* const hash_chain,
- VP8LBackwardRefs* const refs) {
+static int BackwardReferencesLz77(int xsize, int ysize,
+ const uint32_t* const argb, int cache_bits,
+ int quality, int low_effort,
+ VP8LHashChain* const hash_chain,
+ VP8LBackwardRefs* const refs) {
int i;
int ok = 0;
int cc_init = 0;
const int use_color_cache = (cache_bits > 0);
const int pix_count = xsize * ysize;
VP8LColorCache hashers;
- int window_size = WINDOW_SIZE;
- int iter_pos = 1;
- int iter_limit = -1;
+ int iter_max = GetMaxItersForQuality(quality, low_effort);
+ const int window_size = GetWindowSizeForHashChain(quality, xsize);
+ int min_matches = 32;
if (use_color_cache) {
cc_init = VP8LColorCacheInit(&hashers, cache_bits);
if (!cc_init) goto Error;
}
-
ClearBackwardRefs(refs);
- GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
- &window_size, &iter_pos, &iter_limit);
- HashChainInit(hash_chain);
- for (i = 0; i < pix_count; ) {
+ HashChainReset(hash_chain);
+ for (i = 0; i < pix_count - 2; ) {
// Alternative#1: Code the pixels starting at 'i' using backward reference.
int offset = 0;
int len = 0;
- if (i < pix_count - 1) { // FindCopy(i,..) reads pixels at [i] and [i + 1].
- int max_len = pix_count - i;
- HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
- window_size, iter_pos, iter_limit,
- &offset, &len);
- }
- if (len >= MIN_LENGTH) {
- // Alternative#2: Insert the pixel at 'i' as literal, and code the
- // pixels starting at 'i + 1' using backward reference.
+ const int max_len = MaxFindCopyLength(pix_count - i);
+ HashChainFindCopy(hash_chain, i, argb, max_len, window_size,
+ iter_max, &offset, &len);
+ if (len > MIN_LENGTH || (len == MIN_LENGTH && offset <= 512)) {
int offset2 = 0;
int len2 = 0;
int k;
+ min_matches = 8;
HashChainInsert(hash_chain, &argb[i], i);
- if (i < pix_count - 2) { // FindCopy(i+1,..) reads [i + 1] and [i + 2].
- int max_len = pix_count - (i + 1);
- HashChainFindCopy(hash_chain, i + 1, xsize, argb, max_len,
- window_size, iter_pos, iter_limit,
- &offset2, &len2);
+ if ((len < (max_len >> 2)) && !low_effort) {
+ // Evaluate Alternative#2: Insert the pixel at 'i' as literal, and code
+ // the pixels starting at 'i + 1' using backward reference.
+ HashChainFindCopy(hash_chain, i + 1, argb, max_len - 1,
+ window_size, iter_max, &offset2,
+ &len2);
if (len2 > len + 1) {
- const uint32_t pixel = argb[i];
- // Alternative#2 is a better match. So push pixel at 'i' as literal.
- PixOrCopy v;
- if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
- const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
- v = PixOrCopyCreateCacheIdx(ix);
- } else {
- if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
- v = PixOrCopyCreateLiteral(pixel);
- }
- BackwardRefsCursorAdd(refs, v);
+ AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
i++; // Backward reference to be done for next pixel.
len = len2;
offset = offset2;
}
}
- if (len >= MAX_LENGTH) {
- len = MAX_LENGTH - 1;
- }
BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
if (use_color_cache) {
for (k = 0; k < len; ++k) {
@@ -419,33 +436,36 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
}
}
// Add to the hash_chain (but cannot add the last pixel).
- {
+ if (offset >= 3 && offset != xsize) {
const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
- for (k = 1; k < last; ++k) {
+ for (k = 2; k < last - 8; k += 2) {
+ HashChainInsert(hash_chain, &argb[i + k], i + k);
+ }
+ for (; k < last; ++k) {
HashChainInsert(hash_chain, &argb[i + k], i + k);
}
}
i += len;
} else {
- const uint32_t pixel = argb[i];
- PixOrCopy v;
- if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
- // push pixel as a PixOrCopyCreateCacheIdx pixel
- const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
- v = PixOrCopyCreateCacheIdx(ix);
- } else {
- if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
- v = PixOrCopyCreateLiteral(pixel);
- }
- BackwardRefsCursorAdd(refs, v);
- if (i + 1 < pix_count) {
+ AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+ HashChainInsert(hash_chain, &argb[i], i);
+ ++i;
+ --min_matches;
+ if (min_matches <= 0) {
+ AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
HashChainInsert(hash_chain, &argb[i], i);
+ ++i;
}
- ++i;
}
}
+ while (i < pix_count) {
+ // Handle the last pixel(s).
+ AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+ ++i;
+ }
+
ok = !refs->error_;
-Error:
+ Error:
if (cc_init) VP8LColorCacheClear(&hashers);
return ok;
}
@@ -455,15 +475,14 @@ Error:
typedef struct {
double alpha_[VALUES_IN_BYTE];
double red_[VALUES_IN_BYTE];
- double literal_[PIX_OR_COPY_CODES_MAX];
double blue_[VALUES_IN_BYTE];
double distance_[NUM_DISTANCE_CODES];
+ double* literal_;
} CostModel;
static int BackwardReferencesTraceBackwards(
- int xsize, int ysize, int recursive_cost_model,
- const uint32_t* const argb, int quality, int cache_bits,
- VP8LHashChain* const hash_chain,
+ int xsize, int ysize, const uint32_t* const argb, int quality,
+ int cache_bits, VP8LHashChain* const hash_chain,
VP8LBackwardRefs* const refs);
static void ConvertPopulationCountTableToBitEstimates(
@@ -487,28 +506,10 @@ static void ConvertPopulationCountTableToBitEstimates(
}
}
-static int CostModelBuild(CostModel* const m, int xsize, int ysize,
- int recursion_level, const uint32_t* const argb,
- int quality, int cache_bits,
- VP8LHashChain* const hash_chain,
+static int CostModelBuild(CostModel* const m, int cache_bits,
VP8LBackwardRefs* const refs) {
int ok = 0;
- VP8LHistogram* histo = NULL;
-
- ClearBackwardRefs(refs);
- if (recursion_level > 0) {
- if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1,
- argb, quality, cache_bits, hash_chain,
- refs)) {
- goto Error;
- }
- } else {
- if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality,
- hash_chain, refs)) {
- goto Error;
- }
- }
- histo = VP8LAllocateHistogram(cache_bits);
+ VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
if (histo == NULL) goto Error;
VP8LHistogramCreate(histo, refs, cache_bits);
@@ -557,10 +558,35 @@ static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
return m->distance_[code] + extra_bits;
}
+static void AddSingleLiteralWithCostModel(
+ const uint32_t* const argb, VP8LHashChain* const hash_chain,
+ VP8LColorCache* const hashers, const CostModel* const cost_model, int idx,
+ int is_last, int use_color_cache, double prev_cost, float* const cost,
+ uint16_t* const dist_array) {
+ double cost_val = prev_cost;
+ const uint32_t color = argb[0];
+ if (!is_last) {
+ HashChainInsert(hash_chain, argb, idx);
+ }
+ if (use_color_cache && VP8LColorCacheContains(hashers, color)) {
+ const double mul0 = 0.68;
+ const int ix = VP8LColorCacheGetIndex(hashers, color);
+ cost_val += GetCacheCost(cost_model, ix) * mul0;
+ } else {
+ const double mul1 = 0.82;
+ if (use_color_cache) VP8LColorCacheInsert(hashers, color);
+ cost_val += GetLiteralCost(cost_model, color) * mul1;
+ }
+ if (cost[idx] > cost_val) {
+ cost[idx] = (float)cost_val;
+ dist_array[idx] = 1; // only one is inserted.
+ }
+}
+
static int BackwardReferencesHashChainDistanceOnly(
- int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb,
+ int xsize, int ysize, const uint32_t* const argb,
int quality, int cache_bits, VP8LHashChain* const hash_chain,
- VP8LBackwardRefs* const refs, uint32_t* const dist_array) {
+ VP8LBackwardRefs* const refs, uint16_t* const dist_array) {
int i;
int ok = 0;
int cc_init = 0;
@@ -568,24 +594,27 @@ static int BackwardReferencesHashChainDistanceOnly(
const int use_color_cache = (cache_bits > 0);
float* const cost =
(float*)WebPSafeMalloc(pix_count, sizeof(*cost));
- CostModel* cost_model = (CostModel*)WebPSafeMalloc(1ULL, sizeof(*cost_model));
+ const size_t literal_array_size = sizeof(double) *
+ (NUM_LITERAL_CODES + NUM_LENGTH_CODES +
+ ((cache_bits > 0) ? (1 << cache_bits) : 0));
+ const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
+ CostModel* const cost_model =
+ (CostModel*)WebPSafeMalloc(1ULL, cost_model_size);
VP8LColorCache hashers;
- const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68;
- const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82;
- const int min_distance_code = 2; // TODO(vikasa): tune as function of quality
- int window_size = WINDOW_SIZE;
- int iter_pos = 1;
- int iter_limit = -1;
+ const int skip_length = 32 + quality;
+ const int skip_min_distance_code = 2;
+ int iter_max = GetMaxItersForQuality(quality, 0);
+ const int window_size = GetWindowSizeForHashChain(quality, xsize);
if (cost == NULL || cost_model == NULL) goto Error;
+ cost_model->literal_ = (double*)(cost_model + 1);
if (use_color_cache) {
cc_init = VP8LColorCacheInit(&hashers, cache_bits);
if (!cc_init) goto Error;
}
- if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb,
- quality, cache_bits, hash_chain, refs)) {
+ if (!CostModelBuild(cost_model, cache_bits, refs)) {
goto Error;
}
@@ -594,85 +623,80 @@ static int BackwardReferencesHashChainDistanceOnly(
// We loop one pixel at a time, but store all currently best points to
// non-processed locations from this point.
dist_array[0] = 0;
- GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
- &window_size, &iter_pos, &iter_limit);
- HashChainInit(hash_chain);
- for (i = 0; i < pix_count; ++i) {
- double prev_cost = 0.0;
- int shortmax;
- if (i > 0) {
- prev_cost = cost[i - 1];
- }
- for (shortmax = 0; shortmax < 2; ++shortmax) {
- int offset = 0;
- int len = 0;
- if (i < pix_count - 1) { // FindCopy reads pixels at [i] and [i + 1].
- int max_len = shortmax ? 2 : pix_count - i;
- HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
- window_size, iter_pos, iter_limit,
- &offset, &len);
+ HashChainReset(hash_chain);
+ // Add first pixel as literal.
+ AddSingleLiteralWithCostModel(argb + 0, hash_chain, &hashers, cost_model, 0,
+ 0, use_color_cache, 0.0, cost, dist_array);
+ for (i = 1; i < pix_count - 1; ++i) {
+ int offset = 0;
+ int len = 0;
+ double prev_cost = cost[i - 1];
+ const int max_len = MaxFindCopyLength(pix_count - i);
+ HashChainFindCopy(hash_chain, i, argb, max_len, window_size,
+ iter_max, &offset, &len);
+ if (len >= MIN_LENGTH) {
+ const int code = DistanceToPlaneCode(xsize, offset);
+ const double distance_cost =
+ prev_cost + GetDistanceCost(cost_model, code);
+ int k;
+ for (k = 1; k < len; ++k) {
+ const double cost_val = distance_cost + GetLengthCost(cost_model, k);
+ if (cost[i + k] > cost_val) {
+ cost[i + k] = (float)cost_val;
+ dist_array[i + k] = k + 1;
+ }
}
- if (len >= MIN_LENGTH) {
- const int code = DistanceToPlaneCode(xsize, offset);
- const double distance_cost =
- prev_cost + GetDistanceCost(cost_model, code);
- int k;
- for (k = 1; k < len; ++k) {
- const double cost_val = distance_cost + GetLengthCost(cost_model, k);
- if (cost[i + k] > cost_val) {
- cost[i + k] = (float)cost_val;
- dist_array[i + k] = k + 1;
+ // This if is for speedup only. It roughly doubles the speed, and
+ // makes compression worse by .1 %.
+ if (len >= skip_length && code <= skip_min_distance_code) {
+ // Long copy for short distances, let's skip the middle
+ // lookups for better copies.
+ // 1) insert the hashes.
+ if (use_color_cache) {
+ for (k = 0; k < len; ++k) {
+ VP8LColorCacheInsert(&hashers, argb[i + k]);
}
}
- // This if is for speedup only. It roughly doubles the speed, and
- // makes compression worse by .1 %.
- if (len >= 128 && code <= min_distance_code) {
- // Long copy for short distances, let's skip the middle
- // lookups for better copies.
- // 1) insert the hashes.
- if (use_color_cache) {
- for (k = 0; k < len; ++k) {
- VP8LColorCacheInsert(&hashers, argb[i + k]);
- }
- }
- // 2) Add to the hash_chain (but cannot add the last pixel)
- {
- const int last = (len + i < pix_count - 1) ? len + i
- : pix_count - 1;
- for (k = i; k < last; ++k) {
- HashChainInsert(hash_chain, &argb[k], k);
- }
+ // 2) Add to the hash_chain (but cannot add the last pixel)
+ {
+ const int last = (len + i < pix_count - 1) ? len + i
+ : pix_count - 1;
+ for (k = i; k < last; ++k) {
+ HashChainInsert(hash_chain, &argb[k], k);
}
- // 3) jump.
- i += len - 1; // for loop does ++i, thus -1 here.
- goto next_symbol;
}
+ // 3) jump.
+ i += len - 1; // for loop does ++i, thus -1 here.
+ goto next_symbol;
}
- }
- if (i < pix_count - 1) {
- HashChainInsert(hash_chain, &argb[i], i);
- }
- {
- // inserting a literal pixel
- double cost_val = prev_cost;
- if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
- const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]);
- cost_val += GetCacheCost(cost_model, ix) * mul0;
- } else {
- if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
- cost_val += GetLiteralCost(cost_model, argb[i]) * mul1;
- }
- if (cost[i] > cost_val) {
- cost[i] = (float)cost_val;
- dist_array[i] = 1; // only one is inserted.
+ if (len != MIN_LENGTH) {
+ int code_min_length;
+ double cost_total;
+ HashChainFindOffset(hash_chain, i, argb, MIN_LENGTH, window_size,
+ &offset);
+ code_min_length = DistanceToPlaneCode(xsize, offset);
+ cost_total = prev_cost +
+ GetDistanceCost(cost_model, code_min_length) +
+ GetLengthCost(cost_model, 1);
+ if (cost[i + 1] > cost_total) {
+ cost[i + 1] = (float)cost_total;
+ dist_array[i + 1] = 2;
+ }
}
}
+ AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i,
+ 0, use_color_cache, prev_cost, cost,
+ dist_array);
next_symbol: ;
}
- // Last pixel still to do, it can only be a single step if not reached
- // through cheaper means already.
+ // Handle the last pixel.
+ if (i == (pix_count - 1)) {
+ AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i,
+ 1, use_color_cache, cost[pix_count - 2], cost,
+ dist_array);
+ }
ok = !refs->error_;
-Error:
+ Error:
if (cc_init) VP8LColorCacheClear(&hashers);
WebPSafeFree(cost_model);
WebPSafeFree(cost);
@@ -682,12 +706,12 @@ Error:
// We pack the path at the end of *dist_array and return
// a pointer to this part of the array. Example:
// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
-static void TraceBackwards(uint32_t* const dist_array,
+static void TraceBackwards(uint16_t* const dist_array,
int dist_array_size,
- uint32_t** const chosen_path,
+ uint16_t** const chosen_path,
int* const chosen_path_size) {
- uint32_t* path = dist_array + dist_array_size;
- uint32_t* cur = dist_array + dist_array_size - 1;
+ uint16_t* path = dist_array + dist_array_size;
+ uint16_t* cur = dist_array + dist_array_size - 1;
while (cur >= dist_array) {
const int k = *cur;
--path;
@@ -701,20 +725,16 @@ static void TraceBackwards(uint32_t* const dist_array,
static int BackwardReferencesHashChainFollowChosenPath(
int xsize, int ysize, const uint32_t* const argb,
int quality, int cache_bits,
- const uint32_t* const chosen_path, int chosen_path_size,
+ const uint16_t* const chosen_path, int chosen_path_size,
VP8LHashChain* const hash_chain,
VP8LBackwardRefs* const refs) {
const int pix_count = xsize * ysize;
const int use_color_cache = (cache_bits > 0);
- int size = 0;
- int i = 0;
- int k;
int ix;
+ int i = 0;
int ok = 0;
int cc_init = 0;
- int window_size = WINDOW_SIZE;
- int iter_pos = 1;
- int iter_limit = -1;
+ const int window_size = GetWindowSizeForHashChain(quality, xsize);
VP8LColorCache hashers;
if (use_color_cache) {
@@ -723,18 +743,13 @@ static int BackwardReferencesHashChainFollowChosenPath(
}
ClearBackwardRefs(refs);
- GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
- &window_size, &iter_pos, &iter_limit);
- HashChainInit(hash_chain);
- for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
+ HashChainReset(hash_chain);
+ for (ix = 0; ix < chosen_path_size; ++ix) {
int offset = 0;
- int len = 0;
- int max_len = chosen_path[ix];
- if (max_len != 1) {
- HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
- window_size, iter_pos, iter_limit,
- &offset, &len);
- assert(len == max_len);
+ const int len = chosen_path[ix];
+ if (len != 1) {
+ int k;
+ HashChainFindOffset(hash_chain, i, argb, len, window_size, &offset);
BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
if (use_color_cache) {
for (k = 0; k < len; ++k) {
@@ -766,29 +781,28 @@ static int BackwardReferencesHashChainFollowChosenPath(
}
}
ok = !refs->error_;
-Error:
+ Error:
if (cc_init) VP8LColorCacheClear(&hashers);
return ok;
}
// Returns 1 on success.
static int BackwardReferencesTraceBackwards(int xsize, int ysize,
- int recursive_cost_model,
const uint32_t* const argb,
int quality, int cache_bits,
VP8LHashChain* const hash_chain,
VP8LBackwardRefs* const refs) {
int ok = 0;
const int dist_array_size = xsize * ysize;
- uint32_t* chosen_path = NULL;
+ uint16_t* chosen_path = NULL;
int chosen_path_size = 0;
- uint32_t* dist_array =
- (uint32_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array));
+ uint16_t* dist_array =
+ (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array));
if (dist_array == NULL) goto Error;
if (!BackwardReferencesHashChainDistanceOnly(
- xsize, ysize, recursive_cost_model, argb, quality, cache_bits, hash_chain,
+ xsize, ysize, argb, quality, cache_bits, hash_chain,
refs, dist_array)) {
goto Error;
}
@@ -817,72 +831,10 @@ static void BackwardReferences2DLocality(int xsize,
}
}
-VP8LBackwardRefs* VP8LGetBackwardReferences(
- int width, int height, const uint32_t* const argb, int quality,
- int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain,
- VP8LBackwardRefs refs_array[2]) {
- int lz77_is_useful;
- const int num_pix = width * height;
- VP8LBackwardRefs* best = NULL;
- VP8LBackwardRefs* const refs_lz77 = &refs_array[0];
- VP8LBackwardRefs* const refs_rle = &refs_array[1];
-
- if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality,
- hash_chain, refs_lz77)) {
- return NULL;
- }
- if (!BackwardReferencesRle(width, height, argb, refs_rle)) {
- return NULL;
- }
-
- {
- double bit_cost_lz77, bit_cost_rle;
- VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
- if (histo == NULL) return NULL;
- // Evaluate LZ77 coding.
- VP8LHistogramCreate(histo, refs_lz77, cache_bits);
- bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
- // Evaluate RLE coding.
- VP8LHistogramCreate(histo, refs_rle, cache_bits);
- bit_cost_rle = VP8LHistogramEstimateBits(histo);
- // Decide if LZ77 is useful.
- lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
- VP8LFreeHistogram(histo);
- }
-
- // Choose appropriate backward reference.
- if (lz77_is_useful) {
- // TraceBackwards is costly. Don't execute it at lower quality.
- const int try_lz77_trace_backwards = (quality >= 25);
- best = refs_lz77; // default guess: lz77 is better
- if (try_lz77_trace_backwards) {
- // Set recursion level for large images using a color cache.
- const int recursion_level =
- (num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0;
- VP8LBackwardRefs* const refs_trace = &refs_array[1];
- ClearBackwardRefs(refs_trace);
- if (BackwardReferencesTraceBackwards(width, height, recursion_level, argb,
- quality, cache_bits, hash_chain,
- refs_trace)) {
- best = refs_trace;
- }
- }
- } else {
- best = refs_rle;
- }
-
- if (use_2d_locality) BackwardReferences2DLocality(width, best);
-
- return best;
-}
-
// Returns entropy for the given cache bits.
-static double ComputeCacheEntropy(const uint32_t* const argb,
- int xsize, int ysize,
+static double ComputeCacheEntropy(const uint32_t* argb,
const VP8LBackwardRefs* const refs,
int cache_bits) {
- int pixel_index = 0;
- uint32_t k;
const int use_color_cache = (cache_bits > 0);
int cc_init = 0;
double entropy = MAX_ENTROPY;
@@ -896,33 +848,40 @@ static double ComputeCacheEntropy(const uint32_t* const argb,
cc_init = VP8LColorCacheInit(&hashers, cache_bits);
if (!cc_init) goto Error;
}
-
- while (VP8LRefsCursorOk(&c)) {
- const PixOrCopy* const v = c.cur_pos;
- if (PixOrCopyIsLiteral(v)) {
- if (use_color_cache &&
- VP8LColorCacheContains(&hashers, argb[pixel_index])) {
- // push pixel as a cache index
- const int ix = VP8LColorCacheGetIndex(&hashers, argb[pixel_index]);
- const PixOrCopy token = PixOrCopyCreateCacheIdx(ix);
- VP8LHistogramAddSinglePixOrCopy(histo, &token);
- } else {
- VP8LHistogramAddSinglePixOrCopy(histo, v);
- }
- } else {
- VP8LHistogramAddSinglePixOrCopy(histo, v);
+ if (!use_color_cache) {
+ while (VP8LRefsCursorOk(&c)) {
+ VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
+ VP8LRefsCursorNext(&c);
}
- if (use_color_cache) {
- for (k = 0; k < PixOrCopyLength(v); ++k) {
- VP8LColorCacheInsert(&hashers, argb[pixel_index + k]);
+ } else {
+ while (VP8LRefsCursorOk(&c)) {
+ const PixOrCopy* const v = c.cur_pos;
+ if (PixOrCopyIsLiteral(v)) {
+ const uint32_t pix = *argb++;
+ const uint32_t key = VP8LColorCacheGetIndex(&hashers, pix);
+ if (VP8LColorCacheLookup(&hashers, key) == pix) {
+ ++histo->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
+ } else {
+ VP8LColorCacheSet(&hashers, key, pix);
+ ++histo->blue_[pix & 0xff];
+ ++histo->literal_[(pix >> 8) & 0xff];
+ ++histo->red_[(pix >> 16) & 0xff];
+ ++histo->alpha_[pix >> 24];
+ }
+ } else {
+ int len = PixOrCopyLength(v);
+ int code, extra_bits;
+ VP8LPrefixEncodeBits(len, &code, &extra_bits);
+ ++histo->literal_[NUM_LITERAL_CODES + code];
+ VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
+ ++histo->distance_[code];
+ do {
+ VP8LColorCacheInsert(&hashers, *argb++);
+ } while(--len != 0);
}
+ VP8LRefsCursorNext(&c);
}
- pixel_index += PixOrCopyLength(v);
- VP8LRefsCursorNext(&c);
}
- assert(pixel_index == xsize * ysize);
- (void)xsize; // xsize is not used in non-debug compilations otherwise.
- (void)ysize; // ysize is not used in non-debug compilations otherwise.
entropy = VP8LHistogramEstimateBits(histo) +
kSmallPenaltyForLargeCache * cache_bits;
Error:
@@ -931,45 +890,204 @@ static double ComputeCacheEntropy(const uint32_t* const argb,
return entropy;
}
-// *best_cache_bits will contain how many bits are to be used for a color cache.
+// Evaluate optimal cache bits for the local color cache.
+// The input *best_cache_bits sets the maximum cache bits to use (passing 0
+// implies disabling the local color cache). The local color cache is also
+// disabled for the lower (<= 25) quality.
// Returns 0 in case of memory error.
-int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
- int xsize, int ysize, int quality,
- VP8LHashChain* const hash_chain,
- VP8LBackwardRefs* const refs,
- int* const best_cache_bits) {
+static int CalculateBestCacheSize(const uint32_t* const argb,
+ int xsize, int ysize, int quality,
+ VP8LHashChain* const hash_chain,
+ VP8LBackwardRefs* const refs,
+ int* const lz77_computed,
+ int* const best_cache_bits) {
int eval_low = 1;
int eval_high = 1;
double entropy_low = MAX_ENTROPY;
double entropy_high = MAX_ENTROPY;
+ const double cost_mul = 5e-4;
int cache_bits_low = 0;
- int cache_bits_high = MAX_COLOR_CACHE_BITS;
+ int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits;
- if (!BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, hash_chain,
- refs)) {
+ assert(cache_bits_high <= MAX_COLOR_CACHE_BITS);
+
+ *lz77_computed = 0;
+ if (cache_bits_high == 0) {
+ *best_cache_bits = 0;
+ // Local color cache is disabled.
+ return 1;
+ }
+ if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, quality, 0,
+ hash_chain, refs)) {
return 0;
}
// Do a binary search to find the optimal entropy for cache_bits.
- while (cache_bits_high - cache_bits_low > 1) {
+ while (eval_low || eval_high) {
if (eval_low) {
- entropy_low =
- ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_low);
+ entropy_low = ComputeCacheEntropy(argb, refs, cache_bits_low);
+ entropy_low += entropy_low * cache_bits_low * cost_mul;
eval_low = 0;
}
if (eval_high) {
- entropy_high =
- ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_high);
+ entropy_high = ComputeCacheEntropy(argb, refs, cache_bits_high);
+ entropy_high += entropy_high * cache_bits_high * cost_mul;
eval_high = 0;
}
if (entropy_high < entropy_low) {
+ const int prev_cache_bits_low = cache_bits_low;
*best_cache_bits = cache_bits_high;
cache_bits_low = (cache_bits_low + cache_bits_high) / 2;
- eval_low = 1;
+ if (cache_bits_low != prev_cache_bits_low) eval_low = 1;
} else {
*best_cache_bits = cache_bits_low;
cache_bits_high = (cache_bits_low + cache_bits_high) / 2;
- eval_high = 1;
+ if (cache_bits_high != cache_bits_low) eval_high = 1;
}
}
+ *lz77_computed = 1;
return 1;
}
+
+// Update (in-place) backward references for specified cache_bits.
+static int BackwardRefsWithLocalCache(const uint32_t* const argb,
+ int cache_bits,
+ VP8LBackwardRefs* const refs) {
+ int pixel_index = 0;
+ VP8LColorCache hashers;
+ VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+ if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0;
+
+ while (VP8LRefsCursorOk(&c)) {
+ PixOrCopy* const v = c.cur_pos;
+ if (PixOrCopyIsLiteral(v)) {
+ const uint32_t argb_literal = v->argb_or_distance;
+ if (VP8LColorCacheContains(&hashers, argb_literal)) {
+ const int ix = VP8LColorCacheGetIndex(&hashers, argb_literal);
+ *v = PixOrCopyCreateCacheIdx(ix);
+ } else {
+ VP8LColorCacheInsert(&hashers, argb_literal);
+ }
+ ++pixel_index;
+ } else {
+ // refs was created without local cache, so it can not have cache indexes.
+ int k;
+ assert(PixOrCopyIsCopy(v));
+ for (k = 0; k < v->len; ++k) {
+ VP8LColorCacheInsert(&hashers, argb[pixel_index++]);
+ }
+ }
+ VP8LRefsCursorNext(&c);
+ }
+ VP8LColorCacheClear(&hashers);
+ return 1;
+}
+
+static VP8LBackwardRefs* GetBackwardReferencesLowEffort(
+ int width, int height, const uint32_t* const argb, int quality,
+ int* const cache_bits, VP8LHashChain* const hash_chain,
+ VP8LBackwardRefs refs_array[2]) {
+ VP8LBackwardRefs* refs_lz77 = &refs_array[0];
+ *cache_bits = 0;
+ if (!BackwardReferencesLz77(width, height, argb, 0, quality,
+ 1 /* Low effort. */, hash_chain, refs_lz77)) {
+ return NULL;
+ }
+ BackwardReferences2DLocality(width, refs_lz77);
+ return refs_lz77;
+}
+
+static VP8LBackwardRefs* GetBackwardReferences(
+ int width, int height, const uint32_t* const argb, int quality,
+ int* const cache_bits, VP8LHashChain* const hash_chain,
+ VP8LBackwardRefs refs_array[2]) {
+ int lz77_is_useful;
+ int lz77_computed;
+ double bit_cost_lz77, bit_cost_rle;
+ VP8LBackwardRefs* best = NULL;
+ VP8LBackwardRefs* refs_lz77 = &refs_array[0];
+ VP8LBackwardRefs* refs_rle = &refs_array[1];
+ VP8LHistogram* histo = NULL;
+
+ if (!CalculateBestCacheSize(argb, width, height, quality, hash_chain,
+ refs_lz77, &lz77_computed, cache_bits)) {
+ goto Error;
+ }
+
+ if (lz77_computed) {
+ // Transform refs_lz77 for the optimized cache_bits.
+ if (*cache_bits > 0) {
+ if (!BackwardRefsWithLocalCache(argb, *cache_bits, refs_lz77)) {
+ goto Error;
+ }
+ }
+ } else {
+ if (!BackwardReferencesLz77(width, height, argb, *cache_bits, quality,
+ 0 /* Low effort. */, hash_chain, refs_lz77)) {
+ goto Error;
+ }
+ }
+
+ if (!BackwardReferencesRle(width, height, argb, *cache_bits, refs_rle)) {
+ goto Error;
+ }
+
+ histo = VP8LAllocateHistogram(*cache_bits);
+ if (histo == NULL) goto Error;
+
+ {
+ // Evaluate LZ77 coding.
+ VP8LHistogramCreate(histo, refs_lz77, *cache_bits);
+ bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
+ // Evaluate RLE coding.
+ VP8LHistogramCreate(histo, refs_rle, *cache_bits);
+ bit_cost_rle = VP8LHistogramEstimateBits(histo);
+ // Decide if LZ77 is useful.
+ lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
+ }
+
+ // Choose appropriate backward reference.
+ if (lz77_is_useful) {
+ // TraceBackwards is costly. Don't execute it at lower quality.
+ const int try_lz77_trace_backwards = (quality >= 25);
+ best = refs_lz77; // default guess: lz77 is better
+ if (try_lz77_trace_backwards) {
+ VP8LBackwardRefs* const refs_trace = refs_rle;
+ if (!VP8LBackwardRefsCopy(refs_lz77, refs_trace)) {
+ best = NULL;
+ goto Error;
+ }
+ if (BackwardReferencesTraceBackwards(width, height, argb, quality,
+ *cache_bits, hash_chain,
+ refs_trace)) {
+ double bit_cost_trace;
+ // Evaluate LZ77 coding.
+ VP8LHistogramCreate(histo, refs_trace, *cache_bits);
+ bit_cost_trace = VP8LHistogramEstimateBits(histo);
+ if (bit_cost_trace < bit_cost_lz77) {
+ best = refs_trace;
+ }
+ }
+ }
+ } else {
+ best = refs_rle;
+ }
+
+ BackwardReferences2DLocality(width, best);
+
+ Error:
+ VP8LFreeHistogram(histo);
+ return best;
+}
+
+VP8LBackwardRefs* VP8LGetBackwardReferences(
+ int width, int height, const uint32_t* const argb, int quality,
+ int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain,
+ VP8LBackwardRefs refs_array[2]) {
+ if (low_effort) {
+ return GetBackwardReferencesLowEffort(width, height, argb, quality,
+ cache_bits, hash_chain, refs_array);
+ } else {
+ return GetBackwardReferences(width, height, argb, quality, cache_bits,
+ hash_chain, refs_array);
+ }
+}
diff --git a/src/3rdparty/libwebp/src/enc/backward_references.h b/src/3rdparty/libwebp/src/enc/backward_references.h
index c2c81c5..daa084d 100644
--- a/src/3rdparty/libwebp/src/enc/backward_references.h
+++ b/src/3rdparty/libwebp/src/enc/backward_references.h
@@ -22,13 +22,8 @@
extern "C" {
#endif
-// The spec allows 11, we use 9 bits to reduce memory consumption in encoding.
-// Having 9 instead of 11 only removes about 0.25 % of compression density.
-#define MAX_COLOR_CACHE_BITS 9
-
-// Max ever number of codes we'll use:
-#define PIX_OR_COPY_CODES_MAX \
- (NUM_LITERAL_CODES + NUM_LENGTH_CODES + (1 << MAX_COLOR_CACHE_BITS))
+// The maximum allowed limit is 11.
+#define MAX_COLOR_CACHE_BITS 10
// -----------------------------------------------------------------------------
// PixOrCopy
@@ -190,21 +185,16 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
// Main entry points
// Evaluates best possible backward references for specified quality.
-// Further optimize for 2D locality if use_2d_locality flag is set.
+// The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache
+// bits to use (passing 0 implies disabling the local color cache).
+// The optimal cache bits is evaluated and set for the *cache_bits parameter.
// The return value is the pointer to the best of the two backward refs viz,
// refs[0] or refs[1].
VP8LBackwardRefs* VP8LGetBackwardReferences(
int width, int height, const uint32_t* const argb, int quality,
- int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain,
+ int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain,
VP8LBackwardRefs refs[2]);
-// Produce an estimate for a good color cache size for the image.
-int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
- int xsize, int ysize, int quality,
- VP8LHashChain* const hash_chain,
- VP8LBackwardRefs* const ref,
- int* const best_cache_bits);
-
#ifdef __cplusplus
}
#endif
diff --git a/src/3rdparty/libwebp/src/enc/config.c b/src/3rdparty/libwebp/src/enc/config.c
index 53a3bb2..f9f7961 100644
--- a/src/3rdparty/libwebp/src/enc/config.c
+++ b/src/3rdparty/libwebp/src/enc/config.c
@@ -43,10 +43,15 @@ int WebPConfigInitInternal(WebPConfig* config,
config->alpha_filtering = 1;
config->alpha_quality = 100;
config->lossless = 0;
+ config->exact = 0;
config->image_hint = WEBP_HINT_DEFAULT;
config->emulate_jpeg_size = 0;
config->thread_level = 0;
config->low_memory = 0;
+ config->near_lossless = 100;
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+ config->delta_palettization = 0;
+#endif // WEBP_EXPERIMENTAL_FEATURES
// TODO(skal): tune.
switch (preset) {
@@ -111,11 +116,7 @@ int WebPValidateConfig(const WebPConfig* config) {
return 0;
if (config->show_compressed < 0 || config->show_compressed > 1)
return 0;
-#if WEBP_ENCODER_ABI_VERSION > 0x0204
if (config->preprocessing < 0 || config->preprocessing > 7)
-#else
- if (config->preprocessing < 0 || config->preprocessing > 3)
-#endif
return 0;
if (config->partitions < 0 || config->partitions > 3)
return 0;
@@ -129,6 +130,8 @@ int WebPValidateConfig(const WebPConfig* config) {
return 0;
if (config->lossless < 0 || config->lossless > 1)
return 0;
+ if (config->near_lossless < 0 || config->near_lossless > 100)
+ return 0;
if (config->image_hint >= WEBP_HINT_LAST)
return 0;
if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1)
@@ -137,12 +140,17 @@ int WebPValidateConfig(const WebPConfig* config) {
return 0;
if (config->low_memory < 0 || config->low_memory > 1)
return 0;
+ if (config->exact < 0 || config->exact > 1)
+ return 0;
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+ if (config->delta_palettization < 0 || config->delta_palettization > 1)
+ return 0;
+#endif // WEBP_EXPERIMENTAL_FEATURES
return 1;
}
//------------------------------------------------------------------------------
-#if WEBP_ENCODER_ABI_VERSION > 0x0202
#define MAX_LEVEL 9
// Mapping between -z level and -m / -q parameter settings.
@@ -161,6 +169,5 @@ int WebPConfigLosslessPreset(WebPConfig* config, int level) {
config->quality = kLosslessPresets[level].quality_;
return 1;
}
-#endif
//------------------------------------------------------------------------------
diff --git a/src/3rdparty/libwebp/src/enc/cost.c b/src/3rdparty/libwebp/src/enc/cost.c
index 9d2cc01..ae7fe01 100644
--- a/src/3rdparty/libwebp/src/enc/cost.c
+++ b/src/3rdparty/libwebp/src/enc/cost.c
@@ -14,38 +14,6 @@
#include "./cost.h"
//------------------------------------------------------------------------------
-// Boolean-cost cost table
-
-const uint16_t VP8EntropyCost[256] = {
- 1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216,
- 1178, 1152, 1110, 1076, 1061, 1024, 1024, 992, 968, 951,
- 939, 911, 896, 878, 871, 854, 838, 820, 811, 794,
- 786, 768, 768, 752, 740, 732, 720, 709, 704, 690,
- 683, 672, 666, 655, 647, 640, 631, 622, 615, 607,
- 598, 592, 586, 576, 572, 564, 559, 555, 547, 541,
- 534, 528, 522, 512, 512, 504, 500, 494, 488, 483,
- 477, 473, 467, 461, 458, 452, 448, 443, 438, 434,
- 427, 424, 419, 415, 410, 406, 403, 399, 394, 390,
- 384, 384, 377, 374, 370, 366, 362, 359, 355, 351,
- 347, 342, 342, 336, 333, 330, 326, 323, 320, 316,
- 312, 308, 305, 302, 299, 296, 293, 288, 287, 283,
- 280, 277, 274, 272, 268, 266, 262, 256, 256, 256,
- 251, 248, 245, 242, 240, 237, 234, 232, 228, 226,
- 223, 221, 218, 216, 214, 211, 208, 205, 203, 201,
- 198, 196, 192, 191, 188, 187, 183, 181, 179, 176,
- 175, 171, 171, 168, 165, 163, 160, 159, 156, 154,
- 152, 150, 148, 146, 144, 142, 139, 138, 135, 133,
- 131, 128, 128, 125, 123, 121, 119, 117, 115, 113,
- 111, 110, 107, 105, 103, 102, 100, 98, 96, 94,
- 92, 91, 89, 86, 86, 83, 82, 80, 77, 76,
- 74, 73, 71, 69, 67, 66, 64, 63, 61, 59,
- 57, 55, 54, 52, 51, 49, 47, 46, 44, 43,
- 41, 40, 38, 36, 35, 33, 32, 30, 29, 27,
- 25, 24, 22, 21, 19, 18, 16, 15, 13, 12,
- 10, 9, 7, 6, 4, 3
-};
-
-//------------------------------------------------------------------------------
// Level cost tables
// For each given level, the following table gives the pattern of contexts to
@@ -71,267 +39,6 @@ const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
{0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x153}
};
-// fixed costs for coding levels, deduce from the coding tree.
-// This is only the part that doesn't depend on the probability state.
-const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
- 0, 256, 256, 256, 256, 432, 618, 630,
- 731, 640, 640, 828, 901, 948, 1021, 1101,
- 1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
- 1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497,
- 1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358,
- 1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
- 1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679,
- 1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853,
- 1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
- 1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832,
- 1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910,
- 1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
- 1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059,
- 2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132,
- 2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
- 2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283,
- 2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200,
- 2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
- 2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351,
- 2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424,
- 2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
- 2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573,
- 2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651,
- 2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
- 2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572,
- 2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645,
- 2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
- 2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796,
- 2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872,
- 2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
- 2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023,
- 3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096,
- 3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
- 3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086,
- 3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164,
- 3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
- 3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313,
- 3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386,
- 3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
- 3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537,
- 3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848,
- 2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
- 2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999,
- 3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072,
- 3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
- 3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221,
- 3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299,
- 3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
- 3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289,
- 3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362,
- 3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
- 3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513,
- 3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589,
- 3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
- 3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740,
- 3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813,
- 3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
- 3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734,
- 3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812,
- 3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
- 3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961,
- 3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034,
- 4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
- 4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185,
- 4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102,
- 4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
- 4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253,
- 4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326,
- 4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
- 4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475,
- 4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553,
- 4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
- 4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547,
- 3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
- 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
- 3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
- 3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
- 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
- 3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
- 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
- 4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
- 3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
- 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
- 4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
- 4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
- 4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
- 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
- 4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
- 4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
- 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
- 4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
- 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
- 4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
- 4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
- 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
- 4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
- 4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
- 4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
- 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
- 4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
- 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
- 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
- 5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
- 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
- 5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
- 4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
- 4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
- 4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
- 4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
- 4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
- 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
- 5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
- 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
- 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
- 5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
- 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
- 5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
- 5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
- 5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
- 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
- 5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
- 5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
- 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
- 5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
- 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
- 5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
- 5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
- 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
- 5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
- 5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
- 5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
- 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
- 6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
- 6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
- 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
- 6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
- 6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547,
- 3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
- 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
- 3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
- 3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
- 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
- 3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
- 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
- 4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
- 3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
- 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
- 4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
- 4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
- 4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
- 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
- 4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
- 4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
- 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
- 4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
- 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
- 4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
- 4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
- 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
- 4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
- 4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
- 4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
- 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
- 4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
- 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
- 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
- 5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
- 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
- 5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
- 4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
- 4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
- 4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
- 4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
- 4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
- 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
- 5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
- 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
- 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
- 5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
- 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
- 5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
- 5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
- 5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
- 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
- 5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
- 5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
- 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
- 5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
- 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
- 5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
- 5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
- 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
- 5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
- 5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
- 5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
- 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
- 6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
- 6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
- 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
- 6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
- 6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
- 5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408,
- 5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486,
- 5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
- 5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635,
- 5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708,
- 5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
- 5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859,
- 5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776,
- 5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
- 5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927,
- 5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000,
- 6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
- 6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149,
- 6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227,
- 6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
- 6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148,
- 6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221,
- 6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
- 6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372,
- 6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448,
- 6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
- 6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599,
- 6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672,
- 6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
- 6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662,
- 6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740,
- 6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
- 6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889,
- 6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962,
- 6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
- 7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113,
- 7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424,
- 6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
- 6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575,
- 6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648,
- 6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
- 6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797,
- 6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875,
- 6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
- 6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865,
- 6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938,
- 6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
- 7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089,
- 7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165,
- 7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
- 7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316,
- 7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389,
- 7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
- 7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310,
- 7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388,
- 7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
- 7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537,
- 7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610,
- 7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
- 7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761
-};
-
static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) {
int pattern = VP8LevelCodes[level - 1][0];
int bits = VP8LevelCodes[level - 1][1];
@@ -350,12 +57,13 @@ static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) {
//------------------------------------------------------------------------------
// Pre-calc level costs once for all
-void VP8CalculateLevelCosts(VP8Proba* const proba) {
+void VP8CalculateLevelCosts(VP8EncProba* const proba) {
int ctype, band, ctx;
if (!proba->dirty_) return; // nothing to do.
for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
+ int n;
for (band = 0; band < NUM_BANDS; ++band) {
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
@@ -371,6 +79,12 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
// actually constant.
}
}
+ for (n = 0; n < 16; ++n) { // replicate bands. We don't need to sentinel.
+ for (ctx = 0; ctx < NUM_CTX; ++ctx) {
+ proba->remapped_costs_[ctype][n][ctx] =
+ proba->level_cost_[ctype][VP8EncBands[n]][ctx];
+ }
+ }
}
proba->dirty_ = 0;
}
@@ -487,66 +201,6 @@ const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
};
//------------------------------------------------------------------------------
-// Mode costs
-
-static int GetResidualCost(int ctx0, const VP8Residual* const res) {
- int n = res->first;
- // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
- const int p0 = res->prob[n][ctx0][0];
- const uint16_t* t = res->cost[n][ctx0];
- // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
- // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
- // be missing during the loop.
- int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
-
- if (res->last < 0) {
- return VP8BitCost(0, p0);
- }
- for (; n < res->last; ++n) {
- const int v = abs(res->coeffs[n]);
- const int b = VP8EncBands[n + 1];
- const int ctx = (v >= 2) ? 2 : v;
- cost += VP8LevelCost(t, v);
- t = res->cost[b][ctx];
- }
- // Last coefficient is always non-zero
- {
- const int v = abs(res->coeffs[n]);
- assert(v != 0);
- cost += VP8LevelCost(t, v);
- if (n < 15) {
- const int b = VP8EncBands[n + 1];
- const int ctx = (v == 1) ? 1 : 2;
- const int last_p0 = res->prob[b][ctx][0];
- cost += VP8BitCost(0, last_p0);
- }
- }
- return cost;
-}
-
-//------------------------------------------------------------------------------
-// init function
-
-#if defined(WEBP_USE_MIPS32)
-extern int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res);
-#endif // WEBP_USE_MIPS32
-
-// TODO(skal): this, and GetResidualCost(), should probably go somewhere
-// under src/dsp/ at some point.
-VP8GetResidualCostFunc VP8GetResidualCost;
-
-void VP8GetResidualCostInit(void) {
- VP8GetResidualCost = GetResidualCost;
- if (VP8GetCPUInfo != NULL) {
-#if defined(WEBP_USE_MIPS32)
- if (VP8GetCPUInfo(kMIPS32)) {
- VP8GetResidualCost = VP8GetResidualCostMIPS32;
- }
-#endif
- }
-}
-
-//------------------------------------------------------------------------------
// helper functions for residuals struct VP8Residual.
void VP8InitResidual(int first, int coeff_type,
@@ -554,45 +208,10 @@ void VP8InitResidual(int first, int coeff_type,
res->coeff_type = coeff_type;
res->prob = enc->proba_.coeffs_[coeff_type];
res->stats = enc->proba_.stats_[coeff_type];
- res->cost = enc->proba_.level_cost_[coeff_type];
+ res->costs = enc->proba_.remapped_costs_[coeff_type];
res->first = first;
}
-static void SetResidualCoeffs(const int16_t* const coeffs,
- VP8Residual* const res) {
- int n;
- res->last = -1;
- assert(res->first == 0 || coeffs[0] == 0);
- for (n = 15; n >= 0; --n) {
- if (coeffs[n]) {
- res->last = n;
- break;
- }
- }
- res->coeffs = coeffs;
-}
-
-//------------------------------------------------------------------------------
-// init function
-
-#if defined(WEBP_USE_SSE2)
-extern void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs,
- VP8Residual* const res);
-#endif // WEBP_USE_SSE2
-
-VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
-
-void VP8SetResidualCoeffsInit(void) {
- VP8SetResidualCoeffs = SetResidualCoeffs;
- if (VP8GetCPUInfo != NULL) {
-#if defined(WEBP_USE_SSE2)
- if (VP8GetCPUInfo(kSSE2)) {
- VP8SetResidualCoeffs = VP8SetResidualCoeffsSSE2;
- }
-#endif
- }
-}
-
//------------------------------------------------------------------------------
// Mode costs
diff --git a/src/3rdparty/libwebp/src/enc/cost.h b/src/3rdparty/libwebp/src/enc/cost.h
index 4e55895..20960d6 100644
--- a/src/3rdparty/libwebp/src/enc/cost.h
+++ b/src/3rdparty/libwebp/src/enc/cost.h
@@ -24,46 +24,31 @@ extern "C" {
// On-the-fly info about the current set of residuals. Handy to avoid
// passing zillions of params.
-typedef struct {
+typedef struct VP8Residual VP8Residual;
+struct VP8Residual {
int first;
int last;
const int16_t* coeffs;
int coeff_type;
- ProbaArray* prob;
- StatsArray* stats;
- CostArray* cost;
-} VP8Residual;
+ ProbaArray* prob;
+ StatsArray* stats;
+ CostArrayPtr costs;
+};
void VP8InitResidual(int first, int coeff_type,
VP8Encoder* const enc, VP8Residual* const res);
-typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
- VP8Residual* const res);
-extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
-
-void VP8SetResidualCoeffsInit(void); // must be called first
-
int VP8RecordCoeffs(int ctx, const VP8Residual* const res);
-// approximate cost per level:
-extern const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1];
-extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p)
-
// Cost of coding one event with probability 'proba'.
static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba];
}
-// Cost calculation function.
-typedef int (*VP8GetResidualCostFunc)(int ctx0, const VP8Residual* const res);
-extern VP8GetResidualCostFunc VP8GetResidualCost;
-
-void VP8GetResidualCostInit(void); // must be called first
-
// Level cost calculations
extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
-void VP8CalculateLevelCosts(VP8Proba* const proba);
+void VP8CalculateLevelCosts(VP8EncProba* const proba);
static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) {
return VP8LevelFixedCosts[level]
+ table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level];
diff --git a/src/3rdparty/libwebp/src/enc/delta_palettization.c b/src/3rdparty/libwebp/src/enc/delta_palettization.c
new file mode 100644
index 0000000..062e588
--- /dev/null
+++ b/src/3rdparty/libwebp/src/enc/delta_palettization.c
@@ -0,0 +1,455 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Mislav Bradac (mislavm@google.com)
+//
+
+#include "./delta_palettization.h"
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+#include "../webp/types.h"
+#include "../dsp/lossless.h"
+
+#define MK_COL(r, g, b) (((r) << 16) + ((g) << 8) + (b))
+
+// Format allows palette up to 256 entries, but more palette entries produce
+// bigger entropy. In the future it will probably be useful to add more entries
+// that are far from the origin of the palette or choose remaining entries
+// dynamically.
+#define DELTA_PALETTE_SIZE 226
+
+// Palette used for delta_palettization. Entries are roughly sorted by distance
+// of their signed equivalents from the origin.
+static const uint32_t kDeltaPalette[DELTA_PALETTE_SIZE] = {
+ MK_COL(0u, 0u, 0u),
+ MK_COL(255u, 255u, 255u),
+ MK_COL(1u, 1u, 1u),
+ MK_COL(254u, 254u, 254u),
+ MK_COL(2u, 2u, 2u),
+ MK_COL(4u, 4u, 4u),
+ MK_COL(252u, 252u, 252u),
+ MK_COL(250u, 0u, 0u),
+ MK_COL(0u, 250u, 0u),
+ MK_COL(0u, 0u, 250u),
+ MK_COL(6u, 0u, 0u),
+ MK_COL(0u, 6u, 0u),
+ MK_COL(0u, 0u, 6u),
+ MK_COL(0u, 0u, 248u),
+ MK_COL(0u, 0u, 8u),
+ MK_COL(0u, 248u, 0u),
+ MK_COL(0u, 248u, 248u),
+ MK_COL(0u, 248u, 8u),
+ MK_COL(0u, 8u, 0u),
+ MK_COL(0u, 8u, 248u),
+ MK_COL(0u, 8u, 8u),
+ MK_COL(8u, 8u, 8u),
+ MK_COL(248u, 0u, 0u),
+ MK_COL(248u, 0u, 248u),
+ MK_COL(248u, 0u, 8u),
+ MK_COL(248u, 248u, 0u),
+ MK_COL(248u, 8u, 0u),
+ MK_COL(8u, 0u, 0u),
+ MK_COL(8u, 0u, 248u),
+ MK_COL(8u, 0u, 8u),
+ MK_COL(8u, 248u, 0u),
+ MK_COL(8u, 8u, 0u),
+ MK_COL(23u, 23u, 23u),
+ MK_COL(13u, 13u, 13u),
+ MK_COL(232u, 232u, 232u),
+ MK_COL(244u, 244u, 244u),
+ MK_COL(245u, 245u, 250u),
+ MK_COL(50u, 50u, 50u),
+ MK_COL(204u, 204u, 204u),
+ MK_COL(236u, 236u, 236u),
+ MK_COL(16u, 16u, 16u),
+ MK_COL(240u, 16u, 16u),
+ MK_COL(16u, 240u, 16u),
+ MK_COL(240u, 240u, 16u),
+ MK_COL(16u, 16u, 240u),
+ MK_COL(240u, 16u, 240u),
+ MK_COL(16u, 240u, 240u),
+ MK_COL(240u, 240u, 240u),
+ MK_COL(0u, 0u, 232u),
+ MK_COL(0u, 232u, 0u),
+ MK_COL(232u, 0u, 0u),
+ MK_COL(0u, 0u, 24u),
+ MK_COL(0u, 24u, 0u),
+ MK_COL(24u, 0u, 0u),
+ MK_COL(32u, 32u, 32u),
+ MK_COL(224u, 32u, 32u),
+ MK_COL(32u, 224u, 32u),
+ MK_COL(224u, 224u, 32u),
+ MK_COL(32u, 32u, 224u),
+ MK_COL(224u, 32u, 224u),
+ MK_COL(32u, 224u, 224u),
+ MK_COL(224u, 224u, 224u),
+ MK_COL(0u, 0u, 176u),
+ MK_COL(0u, 0u, 80u),
+ MK_COL(0u, 176u, 0u),
+ MK_COL(0u, 176u, 176u),
+ MK_COL(0u, 176u, 80u),
+ MK_COL(0u, 80u, 0u),
+ MK_COL(0u, 80u, 176u),
+ MK_COL(0u, 80u, 80u),
+ MK_COL(176u, 0u, 0u),
+ MK_COL(176u, 0u, 176u),
+ MK_COL(176u, 0u, 80u),
+ MK_COL(176u, 176u, 0u),
+ MK_COL(176u, 80u, 0u),
+ MK_COL(80u, 0u, 0u),
+ MK_COL(80u, 0u, 176u),
+ MK_COL(80u, 0u, 80u),
+ MK_COL(80u, 176u, 0u),
+ MK_COL(80u, 80u, 0u),
+ MK_COL(0u, 0u, 152u),
+ MK_COL(0u, 0u, 104u),
+ MK_COL(0u, 152u, 0u),
+ MK_COL(0u, 152u, 152u),
+ MK_COL(0u, 152u, 104u),
+ MK_COL(0u, 104u, 0u),
+ MK_COL(0u, 104u, 152u),
+ MK_COL(0u, 104u, 104u),
+ MK_COL(152u, 0u, 0u),
+ MK_COL(152u, 0u, 152u),
+ MK_COL(152u, 0u, 104u),
+ MK_COL(152u, 152u, 0u),
+ MK_COL(152u, 104u, 0u),
+ MK_COL(104u, 0u, 0u),
+ MK_COL(104u, 0u, 152u),
+ MK_COL(104u, 0u, 104u),
+ MK_COL(104u, 152u, 0u),
+ MK_COL(104u, 104u, 0u),
+ MK_COL(216u, 216u, 216u),
+ MK_COL(216u, 216u, 40u),
+ MK_COL(216u, 216u, 176u),
+ MK_COL(216u, 216u, 80u),
+ MK_COL(216u, 40u, 216u),
+ MK_COL(216u, 40u, 40u),
+ MK_COL(216u, 40u, 176u),
+ MK_COL(216u, 40u, 80u),
+ MK_COL(216u, 176u, 216u),
+ MK_COL(216u, 176u, 40u),
+ MK_COL(216u, 176u, 176u),
+ MK_COL(216u, 176u, 80u),
+ MK_COL(216u, 80u, 216u),
+ MK_COL(216u, 80u, 40u),
+ MK_COL(216u, 80u, 176u),
+ MK_COL(216u, 80u, 80u),
+ MK_COL(40u, 216u, 216u),
+ MK_COL(40u, 216u, 40u),
+ MK_COL(40u, 216u, 176u),
+ MK_COL(40u, 216u, 80u),
+ MK_COL(40u, 40u, 216u),
+ MK_COL(40u, 40u, 40u),
+ MK_COL(40u, 40u, 176u),
+ MK_COL(40u, 40u, 80u),
+ MK_COL(40u, 176u, 216u),
+ MK_COL(40u, 176u, 40u),
+ MK_COL(40u, 176u, 176u),
+ MK_COL(40u, 176u, 80u),
+ MK_COL(40u, 80u, 216u),
+ MK_COL(40u, 80u, 40u),
+ MK_COL(40u, 80u, 176u),
+ MK_COL(40u, 80u, 80u),
+ MK_COL(80u, 216u, 216u),
+ MK_COL(80u, 216u, 40u),
+ MK_COL(80u, 216u, 176u),
+ MK_COL(80u, 216u, 80u),
+ MK_COL(80u, 40u, 216u),
+ MK_COL(80u, 40u, 40u),
+ MK_COL(80u, 40u, 176u),
+ MK_COL(80u, 40u, 80u),
+ MK_COL(80u, 176u, 216u),
+ MK_COL(80u, 176u, 40u),
+ MK_COL(80u, 176u, 176u),
+ MK_COL(80u, 176u, 80u),
+ MK_COL(80u, 80u, 216u),
+ MK_COL(80u, 80u, 40u),
+ MK_COL(80u, 80u, 176u),
+ MK_COL(80u, 80u, 80u),
+ MK_COL(0u, 0u, 192u),
+ MK_COL(0u, 0u, 64u),
+ MK_COL(0u, 0u, 128u),
+ MK_COL(0u, 192u, 0u),
+ MK_COL(0u, 192u, 192u),
+ MK_COL(0u, 192u, 64u),
+ MK_COL(0u, 192u, 128u),
+ MK_COL(0u, 64u, 0u),
+ MK_COL(0u, 64u, 192u),
+ MK_COL(0u, 64u, 64u),
+ MK_COL(0u, 64u, 128u),
+ MK_COL(0u, 128u, 0u),
+ MK_COL(0u, 128u, 192u),
+ MK_COL(0u, 128u, 64u),
+ MK_COL(0u, 128u, 128u),
+ MK_COL(176u, 216u, 216u),
+ MK_COL(176u, 216u, 40u),
+ MK_COL(176u, 216u, 176u),
+ MK_COL(176u, 216u, 80u),
+ MK_COL(176u, 40u, 216u),
+ MK_COL(176u, 40u, 40u),
+ MK_COL(176u, 40u, 176u),
+ MK_COL(176u, 40u, 80u),
+ MK_COL(176u, 176u, 216u),
+ MK_COL(176u, 176u, 40u),
+ MK_COL(176u, 176u, 176u),
+ MK_COL(176u, 176u, 80u),
+ MK_COL(176u, 80u, 216u),
+ MK_COL(176u, 80u, 40u),
+ MK_COL(176u, 80u, 176u),
+ MK_COL(176u, 80u, 80u),
+ MK_COL(192u, 0u, 0u),
+ MK_COL(192u, 0u, 192u),
+ MK_COL(192u, 0u, 64u),
+ MK_COL(192u, 0u, 128u),
+ MK_COL(192u, 192u, 0u),
+ MK_COL(192u, 192u, 192u),
+ MK_COL(192u, 192u, 64u),
+ MK_COL(192u, 192u, 128u),
+ MK_COL(192u, 64u, 0u),
+ MK_COL(192u, 64u, 192u),
+ MK_COL(192u, 64u, 64u),
+ MK_COL(192u, 64u, 128u),
+ MK_COL(192u, 128u, 0u),
+ MK_COL(192u, 128u, 192u),
+ MK_COL(192u, 128u, 64u),
+ MK_COL(192u, 128u, 128u),
+ MK_COL(64u, 0u, 0u),
+ MK_COL(64u, 0u, 192u),
+ MK_COL(64u, 0u, 64u),
+ MK_COL(64u, 0u, 128u),
+ MK_COL(64u, 192u, 0u),
+ MK_COL(64u, 192u, 192u),
+ MK_COL(64u, 192u, 64u),
+ MK_COL(64u, 192u, 128u),
+ MK_COL(64u, 64u, 0u),
+ MK_COL(64u, 64u, 192u),
+ MK_COL(64u, 64u, 64u),
+ MK_COL(64u, 64u, 128u),
+ MK_COL(64u, 128u, 0u),
+ MK_COL(64u, 128u, 192u),
+ MK_COL(64u, 128u, 64u),
+ MK_COL(64u, 128u, 128u),
+ MK_COL(128u, 0u, 0u),
+ MK_COL(128u, 0u, 192u),
+ MK_COL(128u, 0u, 64u),
+ MK_COL(128u, 0u, 128u),
+ MK_COL(128u, 192u, 0u),
+ MK_COL(128u, 192u, 192u),
+ MK_COL(128u, 192u, 64u),
+ MK_COL(128u, 192u, 128u),
+ MK_COL(128u, 64u, 0u),
+ MK_COL(128u, 64u, 192u),
+ MK_COL(128u, 64u, 64u),
+ MK_COL(128u, 64u, 128u),
+ MK_COL(128u, 128u, 0u),
+ MK_COL(128u, 128u, 192u),
+ MK_COL(128u, 128u, 64u),
+ MK_COL(128u, 128u, 128u),
+};
+
+#undef MK_COL
+
+//------------------------------------------------------------------------------
+// TODO(skal): move the functions to dsp/lossless.c when the correct
+// granularity is found. For now, we'll just copy-paste some useful bits
+// here instead.
+
+// In-place sum of each component with mod 256.
+static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
+ const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
+ const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
+ *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+static WEBP_INLINE uint32_t Clip255(uint32_t a) {
+ if (a < 256) {
+ return a;
+ }
+ // return 0, when a is a negative integer.
+ // return 255, when a is positive.
+ return ~a >> 24;
+}
+
+// Delta palettization functions.
+static WEBP_INLINE int Square(int x) {
+ return x * x;
+}
+
+static WEBP_INLINE uint32_t Intensity(uint32_t a) {
+ return
+ 30 * ((a >> 16) & 0xff) +
+ 59 * ((a >> 8) & 0xff) +
+ 11 * ((a >> 0) & 0xff);
+}
+
+static uint32_t CalcDist(uint32_t predicted_value, uint32_t actual_value,
+ uint32_t palette_entry) {
+ int i;
+ uint32_t distance = 0;
+ AddPixelsEq(&predicted_value, palette_entry);
+ for (i = 0; i < 32; i += 8) {
+ const int32_t av = (actual_value >> i) & 0xff;
+ const int32_t pv = (predicted_value >> i) & 0xff;
+ distance += Square(pv - av);
+ }
+ // We sum square of intensity difference with factor 10, but because Intensity
+ // returns 100 times real intensity we need to multiply differences of colors
+ // by 1000.
+ distance *= 1000u;
+ distance += Square(Intensity(predicted_value)
+ - Intensity(actual_value));
+ return distance;
+}
+
+static uint32_t Predict(int x, int y, uint32_t* image) {
+ const uint32_t t = (y == 0) ? ARGB_BLACK : image[x];
+ const uint32_t l = (x == 0) ? ARGB_BLACK : image[x - 1];
+ const uint32_t p =
+ (((((t >> 24) & 0xff) + ((l >> 24) & 0xff)) / 2) << 24) +
+ (((((t >> 16) & 0xff) + ((l >> 16) & 0xff)) / 2) << 16) +
+ (((((t >> 8) & 0xff) + ((l >> 8) & 0xff)) / 2) << 8) +
+ (((((t >> 0) & 0xff) + ((l >> 0) & 0xff)) / 2) << 0);
+ if (x == 0 && y == 0) return ARGB_BLACK;
+ if (x == 0) return t;
+ if (y == 0) return l;
+ return p;
+}
+
+static WEBP_INLINE int AddSubtractComponentFullWithCoefficient(
+ int a, int b, int c) {
+ return Clip255(a + ((b - c) >> 2));
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFullWithCoefficient(
+ uint32_t c0, uint32_t c1, uint32_t c2) {
+ const int a = AddSubtractComponentFullWithCoefficient(
+ c0 >> 24, c1 >> 24, c2 >> 24);
+ const int r = AddSubtractComponentFullWithCoefficient((c0 >> 16) & 0xff,
+ (c1 >> 16) & 0xff,
+ (c2 >> 16) & 0xff);
+ const int g = AddSubtractComponentFullWithCoefficient((c0 >> 8) & 0xff,
+ (c1 >> 8) & 0xff,
+ (c2 >> 8) & 0xff);
+ const int b = AddSubtractComponentFullWithCoefficient(
+ c0 & 0xff, c1 & 0xff, c2 & 0xff);
+ return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
+}
+
+//------------------------------------------------------------------------------
+
+// Find palette entry with minimum error from difference of actual pixel value
+// and predicted pixel value. Propagate error of pixel to its top and left pixel
+// in src array. Write predicted_value + palette_entry to new_image. Return
+// index of best palette entry.
+static int FindBestPaletteEntry(uint32_t src, uint32_t predicted_value,
+ const uint32_t palette[], int palette_size) {
+ int i;
+ int idx = 0;
+ uint32_t best_distance = CalcDist(predicted_value, src, palette[0]);
+ for (i = 1; i < palette_size; ++i) {
+ const uint32_t distance = CalcDist(predicted_value, src, palette[i]);
+ if (distance < best_distance) {
+ best_distance = distance;
+ idx = i;
+ }
+ }
+ return idx;
+}
+
+static void ApplyBestPaletteEntry(int x, int y,
+ uint32_t new_value, uint32_t palette_value,
+ uint32_t* src, int src_stride,
+ uint32_t* new_image) {
+ AddPixelsEq(&new_value, palette_value);
+ if (x > 0) {
+ src[x - 1] = ClampedAddSubtractFullWithCoefficient(src[x - 1],
+ new_value, src[x]);
+ }
+ if (y > 0) {
+ src[x - src_stride] =
+ ClampedAddSubtractFullWithCoefficient(src[x - src_stride],
+ new_value, src[x]);
+ }
+ new_image[x] = new_value;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point
+
+static WebPEncodingError ApplyDeltaPalette(uint32_t* src, uint32_t* dst,
+ uint32_t src_stride,
+ uint32_t dst_stride,
+ const uint32_t* palette,
+ int palette_size,
+ int width, int height,
+ int num_passes) {
+ int x, y;
+ WebPEncodingError err = VP8_ENC_OK;
+ uint32_t* new_image = (uint32_t*)WebPSafeMalloc(width, sizeof(*new_image));
+ uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row));
+ if (new_image == NULL || tmp_row == NULL) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+
+ while (num_passes--) {
+ uint32_t* cur_src = src;
+ uint32_t* cur_dst = dst;
+ for (y = 0; y < height; ++y) {
+ for (x = 0; x < width; ++x) {
+ const uint32_t predicted_value = Predict(x, y, new_image);
+ tmp_row[x] = FindBestPaletteEntry(cur_src[x], predicted_value,
+ palette, palette_size);
+ ApplyBestPaletteEntry(x, y, predicted_value, palette[tmp_row[x]],
+ cur_src, src_stride, new_image);
+ }
+ for (x = 0; x < width; ++x) {
+ cur_dst[x] = palette[tmp_row[x]];
+ }
+ cur_src += src_stride;
+ cur_dst += dst_stride;
+ }
+ }
+ Error:
+ WebPSafeFree(new_image);
+ WebPSafeFree(tmp_row);
+ return err;
+}
+
+// replaces enc->argb_ by a palettizable approximation of it,
+// and generates optimal enc->palette_[]
+WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) {
+ const WebPPicture* const pic = enc->pic_;
+ uint32_t* src = pic->argb;
+ uint32_t* dst = enc->argb_;
+ const int width = pic->width;
+ const int height = pic->height;
+
+ WebPEncodingError err = VP8_ENC_OK;
+ memcpy(enc->palette_, kDeltaPalette, sizeof(kDeltaPalette));
+ enc->palette_[DELTA_PALETTE_SIZE - 1] = src[0] - 0xff000000u;
+ enc->palette_size_ = DELTA_PALETTE_SIZE;
+ err = ApplyDeltaPalette(src, dst, pic->argb_stride, enc->current_width_,
+ enc->palette_, enc->palette_size_,
+ width, height, 2);
+ if (err != VP8_ENC_OK) goto Error;
+
+ Error:
+ return err;
+}
+
+#else // !WEBP_EXPERIMENTAL_FEATURES
+
+WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) {
+ (void)enc;
+ return VP8_ENC_ERROR_INVALID_CONFIGURATION;
+}
+
+#endif // WEBP_EXPERIMENTAL_FEATURES
diff --git a/src/3rdparty/libwebp/src/enc/delta_palettization.h b/src/3rdparty/libwebp/src/enc/delta_palettization.h
new file mode 100644
index 0000000..e41c0c5
--- /dev/null
+++ b/src/3rdparty/libwebp/src/enc/delta_palettization.h
@@ -0,0 +1,25 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Mislav Bradac (mislavm@google.com)
+//
+
+#ifndef WEBP_ENC_DELTA_PALETTIZATION_H_
+#define WEBP_ENC_DELTA_PALETTIZATION_H_
+
+#include "../webp/encode.h"
+#include "../enc/vp8li.h"
+
+// Replaces enc->argb_[] input by a palettizable approximation of it,
+// and generates optimal enc->palette_[].
+// This function can revert enc->use_palette_ / enc->use_predict_ flag
+// if delta-palettization is not producing expected saving.
+WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc);
+
+#endif // WEBP_ENC_DELTA_PALETTIZATION_H_
diff --git a/src/3rdparty/libwebp/src/enc/filter.c b/src/3rdparty/libwebp/src/enc/filter.c
index 11db4bd..41813cf 100644
--- a/src/3rdparty/libwebp/src/enc/filter.c
+++ b/src/3rdparty/libwebp/src/enc/filter.c
@@ -85,12 +85,12 @@ static void DoFilter(const VP8EncIterator* const it, int level) {
const int ilevel = GetILevel(enc->config_->filter_sharpness, level);
const int limit = 2 * level + ilevel;
- uint8_t* const y_dst = it->yuv_out2_ + Y_OFF;
- uint8_t* const u_dst = it->yuv_out2_ + U_OFF;
- uint8_t* const v_dst = it->yuv_out2_ + V_OFF;
+ uint8_t* const y_dst = it->yuv_out2_ + Y_OFF_ENC;
+ uint8_t* const u_dst = it->yuv_out2_ + U_OFF_ENC;
+ uint8_t* const v_dst = it->yuv_out2_ + V_OFF_ENC;
// copy current block to yuv_out2_
- memcpy(y_dst, it->yuv_out_, YUV_SIZE * sizeof(uint8_t));
+ memcpy(y_dst, it->yuv_out_, YUV_SIZE_ENC * sizeof(uint8_t));
if (enc->filter_hdr_.simple_ == 1) { // simple
VP8SimpleHFilter16i(y_dst, BPS, limit);
@@ -195,13 +195,16 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {
// compute SSIM in a 10 x 10 window
for (x = 3; x < 13; x++) {
for (y = 3; y < 13; y++) {
- VP8SSIMAccumulate(yuv1 + Y_OFF, BPS, yuv2 + Y_OFF, BPS, x, y, 16, 16, &s);
+ VP8SSIMAccumulate(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS,
+ x, y, 16, 16, &s);
}
}
for (x = 1; x < 7; x++) {
for (y = 1; y < 7; y++) {
- VP8SSIMAccumulate(yuv1 + U_OFF, BPS, yuv2 + U_OFF, BPS, x, y, 8, 8, &s);
- VP8SSIMAccumulate(yuv1 + V_OFF, BPS, yuv2 + V_OFF, BPS, x, y, 8, 8, &s);
+ VP8SSIMAccumulate(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS,
+ x, y, 8, 8, &s);
+ VP8SSIMAccumulate(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS,
+ x, y, 8, 8, &s);
}
}
return VP8SSIMGet(&s);
@@ -226,7 +229,7 @@ void VP8StoreFilterStats(VP8EncIterator* const it) {
int d;
VP8Encoder* const enc = it->enc_;
const int s = it->mb_->segment_;
- const int level0 = enc->dqm_[s].fstrength_; // TODO: ref_lf_delta[]
+ const int level0 = enc->dqm_[s].fstrength_;
// explore +/-quant range of values around level0
const int delta_min = -enc->dqm_[s].quant_;
diff --git a/src/3rdparty/libwebp/src/enc/frame.c b/src/3rdparty/libwebp/src/enc/frame.c
index cdf1dab..5b7a40b 100644
--- a/src/3rdparty/libwebp/src/enc/frame.c
+++ b/src/3rdparty/libwebp/src/enc/frame.c
@@ -14,8 +14,9 @@
#include <string.h>
#include <math.h>
-#include "./vp8enci.h"
#include "./cost.h"
+#include "./vp8enci.h"
+#include "../dsp/dsp.h"
#include "../webp/format_constants.h" // RIFF constants
#define SEGMENT_VISU 0
@@ -81,11 +82,6 @@ static float ComputeNextQ(PassStats* const s) {
//------------------------------------------------------------------------------
// Tables for level coding
-const uint8_t VP8EncBands[16 + 1] = {
- 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
- 0 // sentinel
-};
-
const uint8_t VP8Cat3[] = { 173, 148, 140 };
const uint8_t VP8Cat4[] = { 176, 155, 140, 135 };
const uint8_t VP8Cat5[] = { 180, 157, 141, 134, 130 };
@@ -96,7 +92,7 @@ const uint8_t VP8Cat6[] =
// Reset the statistics about: number of skips, token proba, level cost,...
static void ResetStats(VP8Encoder* const enc) {
- VP8Proba* const proba = &enc->proba_;
+ VP8EncProba* const proba = &enc->proba_;
VP8CalculateLevelCosts(proba);
proba->nb_skip_ = 0;
}
@@ -112,7 +108,7 @@ static int CalcSkipProba(uint64_t nb, uint64_t total) {
// Returns the bit-cost for coding the skip probability.
static int FinalizeSkipProba(VP8Encoder* const enc) {
- VP8Proba* const proba = &enc->proba_;
+ VP8EncProba* const proba = &enc->proba_;
const int nb_mbs = enc->mb_w_ * enc->mb_h_;
const int nb_events = proba->nb_skip_;
int size;
@@ -140,11 +136,11 @@ static int BranchCost(int nb, int total, int proba) {
}
static void ResetTokenStats(VP8Encoder* const enc) {
- VP8Proba* const proba = &enc->proba_;
+ VP8EncProba* const proba = &enc->proba_;
memset(proba->stats_, 0, sizeof(proba->stats_));
}
-static int FinalizeTokenProbas(VP8Proba* const proba) {
+static int FinalizeTokenProbas(VP8EncProba* const proba) {
int has_changed = 0;
int size = 0;
int t, b, c, p;
@@ -476,9 +472,9 @@ static void StoreSSE(const VP8EncIterator* const it) {
const uint8_t* const in = it->yuv_in_;
const uint8_t* const out = it->yuv_out_;
// Note: not totally accurate at boundary. And doesn't include in-loop filter.
- enc->sse_[0] += VP8SSE16x16(in + Y_OFF, out + Y_OFF);
- enc->sse_[1] += VP8SSE8x8(in + U_OFF, out + U_OFF);
- enc->sse_[2] += VP8SSE8x8(in + V_OFF, out + V_OFF);
+ enc->sse_[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC);
+ enc->sse_[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC);
+ enc->sse_[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC);
enc->sse_count_ += 16 * 16;
}
@@ -511,9 +507,9 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
}
}
#if SEGMENT_VISU // visualize segments and prediction modes
- SetBlock(it->yuv_out_ + Y_OFF, mb->segment_ * 64, 16);
- SetBlock(it->yuv_out_ + U_OFF, it->preds_[0] * 64, 8);
- SetBlock(it->yuv_out_ + V_OFF, mb->uv_mode_ * 64, 8);
+ SetBlock(it->yuv_out_ + Y_OFF_ENC, mb->segment_ * 64, 16);
+ SetBlock(it->yuv_out_ + U_OFF_ENC, it->preds_[0] * 64, 8);
+ SetBlock(it->yuv_out_ + V_OFF_ENC, mb->uv_mode_ * 64, 8);
#endif
}
@@ -743,7 +739,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
int num_pass_left = enc->config_->pass;
const int do_search = enc->do_search_;
VP8EncIterator it;
- VP8Proba* const proba = &enc->proba_;
+ VP8EncProba* const proba = &enc->proba_;
const VP8RDLevel rd_opt = enc->rd_opt_level_;
const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384;
PassStats stats;
diff --git a/src/3rdparty/libwebp/src/enc/histogram.c b/src/3rdparty/libwebp/src/enc/histogram.c
index a2266b4..869882d 100644
--- a/src/3rdparty/libwebp/src/enc/histogram.c
+++ b/src/3rdparty/libwebp/src/enc/histogram.c
@@ -20,9 +20,6 @@
#include "../dsp/lossless.h"
#include "../utils/utils.h"
-#define ALIGN_CST 15
-#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
-
#define MAX_COST 1.e38
// Number of partitions for the three dominant (literal, red and blue) symbol
@@ -30,6 +27,8 @@
#define NUM_PARTITIONS 4
// The size of the bin-hash corresponding to the three dominant costs.
#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
+// Maximum number of histograms allowed in greedy combining algorithm.
+#define MAX_HISTO_GREEDY 100
static void HistogramClear(VP8LHistogram* const p) {
uint32_t* const literal = p->literal_;
@@ -40,6 +39,13 @@ static void HistogramClear(VP8LHistogram* const p) {
p->literal_ = literal;
}
+// Swap two histogram pointers.
+static void HistogramSwap(VP8LHistogram** const A, VP8LHistogram** const B) {
+ VP8LHistogram* const tmp = *A;
+ *A = *B;
+ *B = tmp;
+}
+
static void HistogramCopy(const VP8LHistogram* const src,
VP8LHistogram* const dst) {
uint32_t* const dst_literal = dst->literal_;
@@ -106,7 +112,8 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
VP8LHistogramSet* set;
const int histo_size = VP8LGetHistogramSize(cache_bits);
const size_t total_size =
- sizeof(*set) + size * (sizeof(*set->histograms) + histo_size + ALIGN_CST);
+ sizeof(*set) + size * (sizeof(*set->histograms) +
+ histo_size + WEBP_ALIGN_CST);
uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
if (memory == NULL) return NULL;
@@ -117,7 +124,7 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
set->max_size = size;
set->size = size;
for (i = 0; i < size; ++i) {
- memory = (uint8_t*)DO_ALIGN(memory);
+ memory = (uint8_t*)WEBP_ALIGN(memory);
set->histograms[i] = (VP8LHistogram*)memory;
// literal_ won't necessary be aligned.
set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
@@ -149,24 +156,26 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
}
}
-static WEBP_INLINE double BitsEntropyRefine(int nonzeros, int sum, int max_val,
- double retval) {
+// -----------------------------------------------------------------------------
+// Entropy-related functions.
+
+static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
double mix;
- if (nonzeros < 5) {
- if (nonzeros <= 1) {
+ if (entropy->nonzeros < 5) {
+ if (entropy->nonzeros <= 1) {
return 0;
}
// Two symbols, they will be 0 and 1 in a Huffman code.
// Let's mix in a bit of entropy to favor good clustering when
// distributions of these are combined.
- if (nonzeros == 2) {
- return 0.99 * sum + 0.01 * retval;
+ if (entropy->nonzeros == 2) {
+ return 0.99 * entropy->sum + 0.01 * entropy->entropy;
}
// No matter what the entropy says, we cannot be better than min_limit
// with Huffman coding. I am mixing a bit of entropy into the
// min_limit since it produces much better (~0.5 %) compression results
// perhaps because of better entropy clustering.
- if (nonzeros == 3) {
+ if (entropy->nonzeros == 3) {
mix = 0.95;
} else {
mix = 0.7; // nonzeros == 4.
@@ -176,52 +185,22 @@ static WEBP_INLINE double BitsEntropyRefine(int nonzeros, int sum, int max_val,
}
{
- double min_limit = 2 * sum - max_val;
- min_limit = mix * min_limit + (1.0 - mix) * retval;
- return (retval < min_limit) ? min_limit : retval;
+ double min_limit = 2 * entropy->sum - entropy->max_val;
+ min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy;
+ return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
}
}
-static double BitsEntropy(const uint32_t* const array, int n) {
- double retval = 0.;
- uint32_t sum = 0;
- int nonzeros = 0;
- uint32_t max_val = 0;
- int i;
- for (i = 0; i < n; ++i) {
- if (array[i] != 0) {
- sum += array[i];
- ++nonzeros;
- retval -= VP8LFastSLog2(array[i]);
- if (max_val < array[i]) {
- max_val = array[i];
- }
- }
+double VP8LBitsEntropy(const uint32_t* const array, int n,
+ uint32_t* const trivial_symbol) {
+ VP8LBitEntropy entropy;
+ VP8LBitsEntropyUnrefined(array, n, &entropy);
+ if (trivial_symbol != NULL) {
+ *trivial_symbol =
+ (entropy.nonzeros == 1) ? entropy.nonzero_code : VP8L_NON_TRIVIAL_SYM;
}
- retval += VP8LFastSLog2(sum);
- return BitsEntropyRefine(nonzeros, sum, max_val, retval);
-}
-static double BitsEntropyCombined(const uint32_t* const X,
- const uint32_t* const Y, int n) {
- double retval = 0.;
- int sum = 0;
- int nonzeros = 0;
- int max_val = 0;
- int i;
- for (i = 0; i < n; ++i) {
- const int xy = X[i] + Y[i];
- if (xy != 0) {
- sum += xy;
- ++nonzeros;
- retval -= VP8LFastSLog2(xy);
- if (max_val < xy) {
- max_val = xy;
- }
- }
- }
- retval += VP8LFastSLog2(sum);
- return BitsEntropyRefine(nonzeros, sum, max_val, retval);
+ return BitsEntropyRefine(&entropy);
}
static double InitialHuffmanCost(void) {
@@ -242,47 +221,40 @@ static double FinalHuffmanCost(const VP8LStreaks* const stats) {
return retval;
}
-// Trampolines
-static double HuffmanCost(const uint32_t* const population, int length) {
- const VP8LStreaks stats = VP8LHuffmanCostCount(population, length);
- return FinalHuffmanCost(&stats);
-}
+// Get the symbol entropy for the distribution 'population'.
+// Set 'trivial_sym', if there's only one symbol present in the distribution.
+static double PopulationCost(const uint32_t* const population, int length,
+ uint32_t* const trivial_sym) {
+ VP8LBitEntropy bit_entropy;
+ VP8LStreaks stats;
+ VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
+ if (trivial_sym != NULL) {
+ *trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code
+ : VP8L_NON_TRIVIAL_SYM;
+ }
-static double HuffmanCostCombined(const uint32_t* const X,
- const uint32_t* const Y, int length) {
- const VP8LStreaks stats = VP8LHuffmanCostCombinedCount(X, Y, length);
- return FinalHuffmanCost(&stats);
+ return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
}
-// Aggregated costs
-static double PopulationCost(const uint32_t* const population, int length) {
- return BitsEntropy(population, length) + HuffmanCost(population, length);
-}
+static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
+ const uint32_t* const Y,
+ int length) {
+ VP8LBitEntropy bit_entropy;
+ VP8LStreaks stats;
+ VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
-static double GetCombinedEntropy(const uint32_t* const X,
- const uint32_t* const Y, int length) {
- return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
+ return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
}
// Estimates the Entropy + Huffman + other block overhead size cost.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
return
- PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_))
- + PopulationCost(p->red_, NUM_LITERAL_CODES)
- + PopulationCost(p->blue_, NUM_LITERAL_CODES)
- + PopulationCost(p->alpha_, NUM_LITERAL_CODES)
- + PopulationCost(p->distance_, NUM_DISTANCE_CODES)
- + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
- + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
-}
-
-double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
- return
- BitsEntropy(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_))
- + BitsEntropy(p->red_, NUM_LITERAL_CODES)
- + BitsEntropy(p->blue_, NUM_LITERAL_CODES)
- + BitsEntropy(p->alpha_, NUM_LITERAL_CODES)
- + BitsEntropy(p->distance_, NUM_DISTANCE_CODES)
+ PopulationCost(
+ p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL)
+ + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL)
+ + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)
+ + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)
+ + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
@@ -313,8 +285,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
if (*cost > cost_threshold) return 0;
*cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES);
- *cost += VP8LExtraCostCombined(a->distance_, b->distance_,
- NUM_DISTANCE_CODES);
+ *cost +=
+ VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
if (*cost > cost_threshold) return 0;
return 1;
@@ -338,6 +310,8 @@ static double HistogramAddEval(const VP8LHistogram* const a,
VP8LHistogramAdd(a, b, out);
out->bit_cost_ = cost;
out->palette_code_bits_ = a->palette_code_bits_;
+ out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) ?
+ a->trivial_symbol_ : VP8L_NON_TRIVIAL_SYM;
}
return cost - sum_cost;
@@ -389,18 +363,26 @@ static void UpdateDominantCostRange(
}
static void UpdateHistogramCost(VP8LHistogram* const h) {
- const double alpha_cost = PopulationCost(h->alpha_, NUM_LITERAL_CODES);
+ uint32_t alpha_sym, red_sym, blue_sym;
+ const double alpha_cost =
+ PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym);
const double distance_cost =
- PopulationCost(h->distance_, NUM_DISTANCE_CODES) +
+ PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) +
VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
- h->literal_cost_ = PopulationCost(h->literal_, num_codes) +
+ h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) +
VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
NUM_LENGTH_CODES);
- h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES);
- h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES);
+ h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);
+ h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
alpha_cost + distance_cost;
+ if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
+ h->trivial_symbol_ = VP8L_NON_TRIVIAL_SYM;
+ } else {
+ h->trivial_symbol_ =
+ ((uint32_t)alpha_sym << 24) | (red_sym << 16) | (blue_sym << 0);
+ }
}
static int GetBinIdForEntropy(double min, double max, double val) {
@@ -409,7 +391,14 @@ static int GetBinIdForEntropy(double min, double max, double val) {
return (int)(NUM_PARTITIONS * delta / range);
}
-// TODO(vikasa): Evaluate, if there's any correlation between red & blue.
+static int GetHistoBinIndexLowEffort(
+ const VP8LHistogram* const h, const DominantCostRange* const c) {
+ const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_,
+ h->literal_cost_);
+ assert(bin_id < NUM_PARTITIONS);
+ return bin_id;
+}
+
static int GetHistoBinIndex(
const VP8LHistogram* const h, const DominantCostRange* const c) {
const int bin_id =
@@ -432,7 +421,6 @@ static void HistogramBuild(
VP8LHistogram** const histograms = image_histo->histograms;
VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs);
assert(histo_bits > 0);
- // Construct the Histo from a given backward references.
while (VP8LRefsCursorOk(&c)) {
const PixOrCopy* const v = c.cur_pos;
const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
@@ -463,8 +451,8 @@ static void HistogramCopyAndAnalyze(
// Partition histograms to different entropy bins for three dominant (literal,
// red and blue) symbol costs and compute the histogram aggregate bit_cost.
-static void HistogramAnalyzeEntropyBin(
- VP8LHistogramSet* const image_histo, int16_t* const bin_map) {
+static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
+ int16_t* const bin_map, int low_effort) {
int i;
VP8LHistogram** const histograms = image_histo->histograms;
const int histo_size = image_histo->size;
@@ -483,7 +471,9 @@ static void HistogramAnalyzeEntropyBin(
for (i = 0; i < histo_size; ++i) {
int num_histos;
VP8LHistogram* const histo = histograms[i];
- const int16_t bin_id = (int16_t)GetHistoBinIndex(histo, &cost_range);
+ const int16_t bin_id = low_effort ?
+ (int16_t)GetHistoBinIndexLowEffort(histo, &cost_range) :
+ (int16_t)GetHistoBinIndex(histo, &cost_range);
const int bin_offset = bin_id * bin_depth;
// bin_map[n][0] for every bin 'n' maintains the counter for the number of
// histograms in that bin.
@@ -495,64 +485,79 @@ static void HistogramAnalyzeEntropyBin(
}
}
-// Compact the histogram set by moving the valid one left in the set to the
-// head and moving the ones that have been merged to other histograms towards
-// the end.
-// TODO(vikasa): Evaluate if this method can be avoided by altering the code
-// logic of HistogramCombineEntropyBin main loop.
+// Compact the histogram set by removing unused entries.
static void HistogramCompactBins(VP8LHistogramSet* const image_histo) {
- int start = 0;
- int end = image_histo->size - 1;
VP8LHistogram** const histograms = image_histo->histograms;
- while (start < end) {
- while (start <= end && histograms[start] != NULL &&
- histograms[start]->bit_cost_ != 0.) {
- ++start;
- }
- while (start <= end && histograms[end]->bit_cost_ == 0.) {
- histograms[end] = NULL;
- --end;
- }
- if (start < end) {
- assert(histograms[start] != NULL);
- assert(histograms[end] != NULL);
- HistogramCopy(histograms[end], histograms[start]);
- histograms[end] = NULL;
- --end;
+ int i, j;
+
+ for (i = 0, j = 0; i < image_histo->size; ++i) {
+ if (histograms[i] != NULL && histograms[i]->bit_cost_ != 0.) {
+ if (j < i) {
+ histograms[j] = histograms[i];
+ histograms[i] = NULL;
+ }
+ ++j;
}
}
- image_histo->size = end + 1;
+ image_histo->size = j;
}
-static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
- VP8LHistogram* const histos,
- int16_t* const bin_map, int bin_depth,
- double combine_cost_factor) {
+static VP8LHistogram* HistogramCombineEntropyBin(
+ VP8LHistogramSet* const image_histo,
+ VP8LHistogram* cur_combo,
+ int16_t* const bin_map, int bin_depth, int num_bins,
+ double combine_cost_factor, int low_effort) {
int bin_id;
- VP8LHistogram* cur_combo = histos;
VP8LHistogram** const histograms = image_histo->histograms;
- for (bin_id = 0; bin_id < BIN_SIZE; ++bin_id) {
+ for (bin_id = 0; bin_id < num_bins; ++bin_id) {
const int bin_offset = bin_id * bin_depth;
const int num_histos = bin_map[bin_offset];
const int idx1 = bin_map[bin_offset + 1];
+ int num_combine_failures = 0;
int n;
for (n = 2; n <= num_histos; ++n) {
const int idx2 = bin_map[bin_offset + n];
- const double bit_cost_idx2 = histograms[idx2]->bit_cost_;
- if (bit_cost_idx2 > 0.) {
- const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor;
- const double curr_cost_diff =
- HistogramAddEval(histograms[idx1], histograms[idx2],
- cur_combo, bit_cost_thresh);
- if (curr_cost_diff < bit_cost_thresh) {
- HistogramCopy(cur_combo, histograms[idx1]);
- histograms[idx2]->bit_cost_ = 0.;
+ if (low_effort) {
+ // Merge all histograms with the same bin index, irrespective of cost of
+ // the merged histograms.
+ VP8LHistogramAdd(histograms[idx1], histograms[idx2], histograms[idx1]);
+ histograms[idx2]->bit_cost_ = 0.;
+ } else {
+ const double bit_cost_idx2 = histograms[idx2]->bit_cost_;
+ if (bit_cost_idx2 > 0.) {
+ const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor;
+ const double curr_cost_diff =
+ HistogramAddEval(histograms[idx1], histograms[idx2],
+ cur_combo, bit_cost_thresh);
+ if (curr_cost_diff < bit_cost_thresh) {
+ // Try to merge two histograms only if the combo is a trivial one or
+ // the two candidate histograms are already non-trivial.
+ // For some images, 'try_combine' turns out to be false for a lot of
+ // histogram pairs. In that case, we fallback to combining
+ // histograms as usual to avoid increasing the header size.
+ const int try_combine =
+ (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) ||
+ ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) &&
+ (histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM));
+ const int max_combine_failures = 32;
+ if (try_combine || (num_combine_failures >= max_combine_failures)) {
+ HistogramSwap(&cur_combo, &histograms[idx1]);
+ histograms[idx2]->bit_cost_ = 0.;
+ } else {
+ ++num_combine_failures;
+ }
+ }
}
}
}
+ if (low_effort) {
+ // Update the bit_cost for the merged histograms (per bin index).
+ UpdateHistogramCost(histograms[idx1]);
+ }
}
HistogramCompactBins(image_histo);
+ return cur_combo;
}
static uint32_t MyRand(uint32_t *seed) {
@@ -563,8 +568,179 @@ static uint32_t MyRand(uint32_t *seed) {
return *seed;
}
-static void HistogramCombine(VP8LHistogramSet* const image_histo,
- VP8LHistogramSet* const histos, int quality) {
+// -----------------------------------------------------------------------------
+// Histogram pairs priority queue
+
+// Pair of histograms. Negative idx1 value means that pair is out-of-date.
+typedef struct {
+ int idx1;
+ int idx2;
+ double cost_diff;
+ double cost_combo;
+} HistogramPair;
+
+typedef struct {
+ HistogramPair* queue;
+ int size;
+ int max_size;
+} HistoQueue;
+
+static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) {
+ histo_queue->size = 0;
+ // max_index^2 for the queue size is safe. If you look at
+ // HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes
+ // data to the queue, you insert at most:
+ // - max_index*(max_index-1)/2 (the first two for loops)
+ // - max_index - 1 in the last for loop at the first iteration of the while
+ // loop, max_index - 2 at the second iteration ... therefore
+ // max_index*(max_index-1)/2 overall too
+ histo_queue->max_size = max_index * max_index;
+ // We allocate max_size + 1 because the last element at index "size" is
+ // used as temporary data (and it could be up to max_size).
+ histo_queue->queue = WebPSafeMalloc(histo_queue->max_size + 1,
+ sizeof(*histo_queue->queue));
+ return histo_queue->queue != NULL;
+}
+
+static void HistoQueueClear(HistoQueue* const histo_queue) {
+ assert(histo_queue != NULL);
+ WebPSafeFree(histo_queue->queue);
+}
+
+static void SwapHistogramPairs(HistogramPair *p1,
+ HistogramPair *p2) {
+ const HistogramPair tmp = *p1;
+ *p1 = *p2;
+ *p2 = tmp;
+}
+
+// Given a valid priority queue in range [0, queue_size) this function checks
+// whether histo_queue[queue_size] should be accepted and swaps it with the
+// front if it is smaller. Otherwise, it leaves it as is.
+static void UpdateQueueFront(HistoQueue* const histo_queue) {
+ if (histo_queue->queue[histo_queue->size].cost_diff >= 0) return;
+
+ if (histo_queue->queue[histo_queue->size].cost_diff <
+ histo_queue->queue[0].cost_diff) {
+ SwapHistogramPairs(histo_queue->queue,
+ histo_queue->queue + histo_queue->size);
+ }
+ ++histo_queue->size;
+
+ // We cannot add more elements than the capacity.
+ // The allocation adds an extra element to the official capacity so that
+ // histo_queue->queue[histo_queue->max_size] is read/written within bound.
+ assert(histo_queue->size <= histo_queue->max_size);
+}
+
+// -----------------------------------------------------------------------------
+
+static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2,
+ HistogramPair* const pair,
+ VP8LHistogram* const histos) {
+ if (idx1 > idx2) {
+ const int tmp = idx2;
+ idx2 = idx1;
+ idx1 = tmp;
+ }
+ pair->idx1 = idx1;
+ pair->idx2 = idx2;
+ pair->cost_diff =
+ HistogramAddEval(histograms[idx1], histograms[idx2], histos, 0);
+ pair->cost_combo = histos->bit_cost_;
+}
+
+// Combines histograms by continuously choosing the one with the highest cost
+// reduction.
+static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
+ VP8LHistogram* const histos) {
+ int ok = 0;
+ int image_histo_size = image_histo->size;
+ int i, j;
+ VP8LHistogram** const histograms = image_histo->histograms;
+ // Indexes of remaining histograms.
+ int* const clusters = WebPSafeMalloc(image_histo_size, sizeof(*clusters));
+ // Priority queue of histogram pairs.
+ HistoQueue histo_queue;
+
+ if (!HistoQueueInit(&histo_queue, image_histo_size) || clusters == NULL) {
+ goto End;
+ }
+
+ for (i = 0; i < image_histo_size; ++i) {
+ // Initialize clusters indexes.
+ clusters[i] = i;
+ for (j = i + 1; j < image_histo_size; ++j) {
+ // Initialize positions array.
+ PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size],
+ histos);
+ UpdateQueueFront(&histo_queue);
+ }
+ }
+
+ while (image_histo_size > 1 && histo_queue.size > 0) {
+ HistogramPair* copy_to;
+ const int idx1 = histo_queue.queue[0].idx1;
+ const int idx2 = histo_queue.queue[0].idx2;
+ VP8LHistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]);
+ histograms[idx1]->bit_cost_ = histo_queue.queue[0].cost_combo;
+ // Remove merged histogram.
+ for (i = 0; i + 1 < image_histo_size; ++i) {
+ if (clusters[i] >= idx2) {
+ clusters[i] = clusters[i + 1];
+ }
+ }
+ --image_histo_size;
+
+ // Remove pairs intersecting the just combined best pair. This will
+ // therefore pop the head of the queue.
+ copy_to = histo_queue.queue;
+ for (i = 0; i < histo_queue.size; ++i) {
+ HistogramPair* const p = histo_queue.queue + i;
+ if (p->idx1 == idx1 || p->idx2 == idx1 ||
+ p->idx1 == idx2 || p->idx2 == idx2) {
+ // Do not copy the invalid pair.
+ continue;
+ }
+ if (p->cost_diff < histo_queue.queue[0].cost_diff) {
+ // Replace the top of the queue if we found better.
+ SwapHistogramPairs(histo_queue.queue, p);
+ }
+ SwapHistogramPairs(copy_to, p);
+ ++copy_to;
+ }
+ histo_queue.size = (int)(copy_to - histo_queue.queue);
+
+ // Push new pairs formed with combined histogram to the queue.
+ for (i = 0; i < image_histo_size; ++i) {
+ if (clusters[i] != idx1) {
+ PreparePair(histograms, idx1, clusters[i],
+ &histo_queue.queue[histo_queue.size], histos);
+ UpdateQueueFront(&histo_queue);
+ }
+ }
+ }
+ // Move remaining histograms to the beginning of the array.
+ for (i = 0; i < image_histo_size; ++i) {
+ if (i != clusters[i]) { // swap the two histograms
+ HistogramSwap(&histograms[i], &histograms[clusters[i]]);
+ }
+ }
+
+ image_histo->size = image_histo_size;
+ ok = 1;
+
+ End:
+ WebPSafeFree(clusters);
+ HistoQueueClear(&histo_queue);
+ return ok;
+}
+
+static VP8LHistogram* HistogramCombineStochastic(
+ VP8LHistogramSet* const image_histo,
+ VP8LHistogram* tmp_histo,
+ VP8LHistogram* best_combo,
+ int quality, int min_cluster_size) {
int iter;
uint32_t seed = 0;
int tries_with_no_success = 0;
@@ -573,12 +749,10 @@ static void HistogramCombine(VP8LHistogramSet* const image_histo,
const int outer_iters = image_histo_size * iter_mult;
const int num_pairs = image_histo_size / 2;
const int num_tries_no_success = outer_iters / 2;
- const int min_cluster_size = 2;
VP8LHistogram** const histograms = image_histo->histograms;
- VP8LHistogram* cur_combo = histos->histograms[0]; // trial histogram
- VP8LHistogram* best_combo = histos->histograms[1]; // best histogram so far
// Collapse similar histograms in 'image_histo'.
+ ++min_cluster_size;
for (iter = 0;
iter < outer_iters && image_histo_size >= min_cluster_size;
++iter) {
@@ -602,13 +776,9 @@ static void HistogramCombine(VP8LHistogramSet* const image_histo,
// Calculate cost reduction on combining.
curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2],
- cur_combo, best_cost_diff);
+ tmp_histo, best_cost_diff);
if (curr_cost_diff < best_cost_diff) { // found a better pair?
- { // swap cur/best combo histograms
- VP8LHistogram* const tmp_histo = cur_combo;
- cur_combo = best_combo;
- best_combo = tmp_histo;
- }
+ HistogramSwap(&best_combo, &tmp_histo);
best_cost_diff = curr_cost_diff;
best_idx1 = idx1;
best_idx2 = idx2;
@@ -616,11 +786,11 @@ static void HistogramCombine(VP8LHistogramSet* const image_histo,
}
if (best_idx1 >= 0) {
- HistogramCopy(best_combo, histograms[best_idx1]);
+ HistogramSwap(&best_combo, &histograms[best_idx1]);
// swap best_idx2 slot with last one (which is now unused)
--image_histo_size;
if (best_idx2 != image_histo_size) {
- HistogramCopy(histograms[image_histo_size], histograms[best_idx2]);
+ HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]);
histograms[image_histo_size] = NULL;
}
tries_with_no_success = 0;
@@ -630,6 +800,7 @@ static void HistogramCombine(VP8LHistogramSet* const image_histo,
}
}
image_histo->size = image_histo_size;
+ return best_combo;
}
// -----------------------------------------------------------------------------
@@ -643,28 +814,37 @@ static void HistogramRemap(const VP8LHistogramSet* const orig_histo,
int i;
VP8LHistogram** const orig_histograms = orig_histo->histograms;
VP8LHistogram** const histograms = image_histo->histograms;
- for (i = 0; i < orig_histo->size; ++i) {
- int best_out = 0;
- double best_bits =
- HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST);
- int k;
- for (k = 1; k < image_histo->size; ++k) {
- const double cur_bits =
- HistogramAddThresh(histograms[k], orig_histograms[i], best_bits);
- if (cur_bits < best_bits) {
- best_bits = cur_bits;
- best_out = k;
+ const int orig_histo_size = orig_histo->size;
+ const int image_histo_size = image_histo->size;
+ if (image_histo_size > 1) {
+ for (i = 0; i < orig_histo_size; ++i) {
+ int best_out = 0;
+ double best_bits =
+ HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST);
+ int k;
+ for (k = 1; k < image_histo_size; ++k) {
+ const double cur_bits =
+ HistogramAddThresh(histograms[k], orig_histograms[i], best_bits);
+ if (cur_bits < best_bits) {
+ best_bits = cur_bits;
+ best_out = k;
+ }
}
+ symbols[i] = best_out;
+ }
+ } else {
+ assert(image_histo_size == 1);
+ for (i = 0; i < orig_histo_size; ++i) {
+ symbols[i] = 0;
}
- symbols[i] = best_out;
}
// Recompute each out based on raw and symbols.
- for (i = 0; i < image_histo->size; ++i) {
+ for (i = 0; i < image_histo_size; ++i) {
HistogramClear(histograms[i]);
}
- for (i = 0; i < orig_histo->size; ++i) {
+ for (i = 0; i < orig_histo_size; ++i) {
const int idx = symbols[i];
VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]);
}
@@ -672,44 +852,48 @@ static void HistogramRemap(const VP8LHistogramSet* const orig_histo,
static double GetCombineCostFactor(int histo_size, int quality) {
double combine_cost_factor = 0.16;
- if (histo_size > 256) combine_cost_factor /= 2.;
- if (histo_size > 512) combine_cost_factor /= 2.;
- if (histo_size > 1024) combine_cost_factor /= 2.;
- if (quality <= 50) combine_cost_factor /= 2.;
+ if (quality < 90) {
+ if (histo_size > 256) combine_cost_factor /= 2.;
+ if (histo_size > 512) combine_cost_factor /= 2.;
+ if (histo_size > 1024) combine_cost_factor /= 2.;
+ if (quality <= 50) combine_cost_factor /= 2.;
+ }
return combine_cost_factor;
}
int VP8LGetHistoImageSymbols(int xsize, int ysize,
const VP8LBackwardRefs* const refs,
- int quality, int histo_bits, int cache_bits,
+ int quality, int low_effort,
+ int histo_bits, int cache_bits,
VP8LHistogramSet* const image_histo,
+ VP8LHistogramSet* const tmp_histos,
uint16_t* const histogram_symbols) {
int ok = 0;
const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
const int image_histo_raw_size = histo_xsize * histo_ysize;
+ const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
// The bin_map for every bin follows following semantics:
// bin_map[n][0] = num_histo; // The number of histograms in that bin.
// bin_map[n][1] = index of first histogram in that bin;
// bin_map[n][num_histo] = index of last histogram in that bin;
- // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = un-used indices.
+ // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = unused indices.
const int bin_depth = image_histo_raw_size + 1;
int16_t* bin_map = NULL;
- VP8LHistogramSet* const histos = VP8LAllocateHistogramSet(2, cache_bits);
VP8LHistogramSet* const orig_histo =
VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);
+ VP8LHistogram* cur_combo;
+ const int entropy_combine =
+ (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
- if (orig_histo == NULL || histos == NULL) {
- goto Error;
- }
+ if (orig_histo == NULL) goto Error;
// Don't attempt linear bin-partition heuristic for:
// histograms of small sizes, as bin_map will be very sparse and;
- // Higher qualities (> 90), to preserve the compression gains at those
- // quality settings.
- if (orig_histo->size > 2 * BIN_SIZE && quality < 90) {
- const int bin_map_size = bin_depth * BIN_SIZE;
+ // Maximum quality (q==100), to preserve the compression gains at that level.
+ if (entropy_combine) {
+ const int bin_map_size = bin_depth * entropy_combine_num_bins;
bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map));
if (bin_map == NULL) goto Error;
}
@@ -719,18 +903,33 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
// Copies the histograms and computes its bit_cost.
HistogramCopyAndAnalyze(orig_histo, image_histo);
- if (bin_map != NULL) {
+ cur_combo = tmp_histos->histograms[1]; // pick up working slot
+ if (entropy_combine) {
const double combine_cost_factor =
GetCombineCostFactor(image_histo_raw_size, quality);
- HistogramAnalyzeEntropyBin(orig_histo, bin_map);
+ HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
// Collapse histograms with similar entropy.
- HistogramCombineEntropyBin(image_histo, histos->histograms[0],
- bin_map, bin_depth, combine_cost_factor);
+ cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, bin_map,
+ bin_depth, entropy_combine_num_bins,
+ combine_cost_factor, low_effort);
}
- // Collapse similar histograms by random histogram-pair compares.
- HistogramCombine(image_histo, histos, quality);
+ // Don't combine the histograms using stochastic and greedy heuristics for
+ // low-effort compression mode.
+ if (!low_effort || !entropy_combine) {
+ const float x = quality / 100.f;
+ // cubic ramp between 1 and MAX_HISTO_GREEDY:
+ const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
+ cur_combo = HistogramCombineStochastic(image_histo,
+ tmp_histos->histograms[0],
+ cur_combo, quality, threshold_size);
+ if ((image_histo->size <= threshold_size) &&
+ !HistogramCombineGreedy(image_histo, cur_combo)) {
+ goto Error;
+ }
+ }
+ // TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also.
// Find the optimal map from original histograms to the final ones.
HistogramRemap(orig_histo, image_histo, histogram_symbols);
@@ -739,6 +938,5 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
Error:
WebPSafeFree(bin_map);
VP8LFreeHistogramSet(orig_histo);
- VP8LFreeHistogramSet(histos);
return ok;
}
diff --git a/src/3rdparty/libwebp/src/enc/histogram.h b/src/3rdparty/libwebp/src/enc/histogram.h
index 1cf4c54..d303d1d 100644
--- a/src/3rdparty/libwebp/src/enc/histogram.h
+++ b/src/3rdparty/libwebp/src/enc/histogram.h
@@ -14,10 +14,6 @@
#ifndef WEBP_ENC_HISTOGRAM_H_
#define WEBP_ENC_HISTOGRAM_H_
-#include <assert.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <stdio.h>
#include <string.h>
#include "./backward_references.h"
@@ -28,6 +24,9 @@
extern "C" {
#endif
+// Not a trivial literal symbol.
+#define VP8L_NON_TRIVIAL_SYM (0xffffffff)
+
// A simple container for histograms of data.
typedef struct {
// literal_ contains green literal, palette-code and
@@ -39,9 +38,11 @@ typedef struct {
// Backward reference prefix-code histogram.
uint32_t distance_[NUM_DISTANCE_CODES];
int palette_code_bits_;
- double bit_cost_; // cached value of VP8LHistogramEstimateBits(this)
- double literal_cost_; // Cached values of dominant entropy costs:
- double red_cost_; // literal, red & blue.
+ uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha
+ // literal symbols are single valued.
+ double bit_cost_; // cached value of bit cost.
+ double literal_cost_; // Cached values of dominant entropy costs:
+ double red_cost_; // literal, red & blue.
double blue_cost_;
} VP8LHistogram;
@@ -91,14 +92,6 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits);
void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
const PixOrCopy* const v);
-// Estimate how many bits the combined entropy of literals and distance
-// approximately maps to.
-double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
-
-// This function estimates the cost in bits excluding the bits needed to
-// represent the entropy code itself.
-double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
-
static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
return NUM_LITERAL_CODES + NUM_LENGTH_CODES +
((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
@@ -107,10 +100,22 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
// Builds the histogram image.
int VP8LGetHistoImageSymbols(int xsize, int ysize,
const VP8LBackwardRefs* const refs,
- int quality, int histogram_bits, int cache_bits,
+ int quality, int low_effort,
+ int histogram_bits, int cache_bits,
VP8LHistogramSet* const image_in,
+ VP8LHistogramSet* const tmp_histos,
uint16_t* const histogram_symbols);
+// Returns the entropy for the symbols in the input array.
+// Also sets trivial_symbol to the code value, if the array has only one code
+// value. Otherwise, set it to VP8L_NON_TRIVIAL_SYM.
+double VP8LBitsEntropy(const uint32_t* const array, int n,
+ uint32_t* const trivial_symbol);
+
+// Estimate how many bits the combined entropy of literals and distance
+// approximately maps to.
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/3rdparty/libwebp/src/enc/iterator.c b/src/3rdparty/libwebp/src/enc/iterator.c
index e42ad00..99d960a 100644
--- a/src/3rdparty/libwebp/src/enc/iterator.c
+++ b/src/3rdparty/libwebp/src/enc/iterator.c
@@ -70,13 +70,13 @@ void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
it->enc_ = enc;
it->y_stride_ = enc->pic_->y_stride;
it->uv_stride_ = enc->pic_->uv_stride;
- it->yuv_in_ = (uint8_t*)DO_ALIGN(it->yuv_mem_);
- it->yuv_out_ = it->yuv_in_ + YUV_SIZE;
- it->yuv_out2_ = it->yuv_out_ + YUV_SIZE;
- it->yuv_p_ = it->yuv_out2_ + YUV_SIZE;
+ it->yuv_in_ = (uint8_t*)WEBP_ALIGN(it->yuv_mem_);
+ it->yuv_out_ = it->yuv_in_ + YUV_SIZE_ENC;
+ it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC;
+ it->yuv_p_ = it->yuv_out2_ + YUV_SIZE_ENC;
it->lf_stats_ = enc->lf_stats_;
it->percent0_ = enc->percent_;
- it->y_left_ = (uint8_t*)DO_ALIGN(it->yuv_left_mem_ + 1);
+ it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1);
it->u_left_ = it->y_left_ + 16 + 16;
it->v_left_ = it->u_left_ + 16;
VP8IteratorReset(it);
@@ -136,9 +136,9 @@ void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32) {
const int uv_w = (w + 1) >> 1;
const int uv_h = (h + 1) >> 1;
- ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF, w, h, 16);
- ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF, uv_w, uv_h, 8);
- ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF, uv_w, uv_h, 8);
+ ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF_ENC, w, h, 16);
+ ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8);
+ ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8);
if (tmp_32 == NULL) return;
@@ -185,9 +185,9 @@ void VP8IteratorExport(const VP8EncIterator* const it) {
const VP8Encoder* const enc = it->enc_;
if (enc->config_->show_compressed) {
const int x = it->x_, y = it->y_;
- const uint8_t* const ysrc = it->yuv_out_ + Y_OFF;
- const uint8_t* const usrc = it->yuv_out_ + U_OFF;
- const uint8_t* const vsrc = it->yuv_out_ + V_OFF;
+ const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
+ const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC;
+ const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC;
const WebPPicture* const pic = enc->pic_;
uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;
uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;
@@ -286,8 +286,8 @@ void VP8IteratorBytesToNz(VP8EncIterator* const it) {
void VP8IteratorSaveBoundary(VP8EncIterator* const it) {
VP8Encoder* const enc = it->enc_;
const int x = it->x_, y = it->y_;
- const uint8_t* const ysrc = it->yuv_out_ + Y_OFF;
- const uint8_t* const uvsrc = it->yuv_out_ + U_OFF;
+ const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
+ const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC;
if (x < enc->mb_w_ - 1) { // left
int i;
for (i = 0; i < 16; ++i) {
diff --git a/src/3rdparty/libwebp/src/enc/near_lossless.c b/src/3rdparty/libwebp/src/enc/near_lossless.c
new file mode 100644
index 0000000..9bc0f0e
--- /dev/null
+++ b/src/3rdparty/libwebp/src/enc/near_lossless.c
@@ -0,0 +1,160 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Near-lossless image preprocessing adjusts pixel values to help
+// compressibility with a guarantee of maximum deviation between original and
+// resulting pixel values.
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+// Converted to C by Aleksander Kramarz (akramarz@google.com)
+
+#include <stdlib.h>
+
+#include "../dsp/lossless.h"
+#include "../utils/utils.h"
+#include "./vp8enci.h"
+
+#define MIN_DIM_FOR_NEAR_LOSSLESS 64
+#define MAX_LIMIT_BITS 5
+
+// Computes quantized pixel value and distance from original value.
+static void GetValAndDistance(int a, int initial, int bits,
+ int* const val, int* const distance) {
+ const int mask = ~((1 << bits) - 1);
+ *val = (initial & mask) | (initial >> (8 - bits));
+ *distance = 2 * abs(a - *val);
+}
+
+// Clamps the value to range [0, 255].
+static int Clamp8b(int val) {
+ const int min_val = 0;
+ const int max_val = 0xff;
+ return (val < min_val) ? min_val : (val > max_val) ? max_val : val;
+}
+
+// Quantizes values {a, a+(1<<bits), a-(1<<bits)} and returns the nearest one.
+static int FindClosestDiscretized(int a, int bits) {
+ int best_val = a, i;
+ int min_distance = 256;
+
+ for (i = -1; i <= 1; ++i) {
+ int candidate, distance;
+ const int val = Clamp8b(a + i * (1 << bits));
+ GetValAndDistance(a, val, bits, &candidate, &distance);
+ if (i != 0) {
+ ++distance;
+ }
+ // Smallest distance but favor i == 0 over i == -1 and i == 1
+ // since that keeps the overall intensity more constant in the
+ // images.
+ if (distance < min_distance) {
+ min_distance = distance;
+ best_val = candidate;
+ }
+ }
+ return best_val;
+}
+
+// Applies FindClosestDiscretized to all channels of pixel.
+static uint32_t ClosestDiscretizedArgb(uint32_t a, int bits) {
+ return
+ (FindClosestDiscretized(a >> 24, bits) << 24) |
+ (FindClosestDiscretized((a >> 16) & 0xff, bits) << 16) |
+ (FindClosestDiscretized((a >> 8) & 0xff, bits) << 8) |
+ (FindClosestDiscretized(a & 0xff, bits));
+}
+
+// Checks if distance between corresponding channel values of pixels a and b
+// is within the given limit.
+static int IsNear(uint32_t a, uint32_t b, int limit) {
+ int k;
+ for (k = 0; k < 4; ++k) {
+ const int delta =
+ (int)((a >> (k * 8)) & 0xff) - (int)((b >> (k * 8)) & 0xff);
+ if (delta >= limit || delta <= -limit) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int IsSmooth(const uint32_t* const prev_row,
+ const uint32_t* const curr_row,
+ const uint32_t* const next_row,
+ int ix, int limit) {
+ // Check that all pixels in 4-connected neighborhood are smooth.
+ return (IsNear(curr_row[ix], curr_row[ix - 1], limit) &&
+ IsNear(curr_row[ix], curr_row[ix + 1], limit) &&
+ IsNear(curr_row[ix], prev_row[ix], limit) &&
+ IsNear(curr_row[ix], next_row[ix], limit));
+}
+
+// Adjusts pixel values of image with given maximum error.
+static void NearLossless(int xsize, int ysize, uint32_t* argb,
+ int limit_bits, uint32_t* copy_buffer) {
+ int x, y;
+ const int limit = 1 << limit_bits;
+ uint32_t* prev_row = copy_buffer;
+ uint32_t* curr_row = prev_row + xsize;
+ uint32_t* next_row = curr_row + xsize;
+ memcpy(copy_buffer, argb, xsize * 2 * sizeof(argb[0]));
+
+ for (y = 1; y < ysize - 1; ++y) {
+ uint32_t* const curr_argb_row = argb + y * xsize;
+ uint32_t* const next_argb_row = curr_argb_row + xsize;
+ memcpy(next_row, next_argb_row, xsize * sizeof(argb[0]));
+ for (x = 1; x < xsize - 1; ++x) {
+ if (!IsSmooth(prev_row, curr_row, next_row, x, limit)) {
+ curr_argb_row[x] = ClosestDiscretizedArgb(curr_row[x], limit_bits);
+ }
+ }
+ {
+ // Three-way swap.
+ uint32_t* const temp = prev_row;
+ prev_row = curr_row;
+ curr_row = next_row;
+ next_row = temp;
+ }
+ }
+}
+
+static int QualityToLimitBits(int quality) {
+ // quality mapping:
+ // 0..19 -> 5
+ // 0..39 -> 4
+ // 0..59 -> 3
+ // 0..79 -> 2
+ // 0..99 -> 1
+ // 100 -> 0
+ return MAX_LIMIT_BITS - quality / 20;
+}
+
+int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality) {
+ int i;
+ uint32_t* const copy_buffer =
+ (uint32_t*)WebPSafeMalloc(xsize * 3, sizeof(*copy_buffer));
+ const int limit_bits = QualityToLimitBits(quality);
+ assert(argb != NULL);
+ assert(limit_bits >= 0);
+ assert(limit_bits <= MAX_LIMIT_BITS);
+ if (copy_buffer == NULL) {
+ return 0;
+ }
+ // For small icon images, don't attempt to apply near-lossless compression.
+ if (xsize < MIN_DIM_FOR_NEAR_LOSSLESS && ysize < MIN_DIM_FOR_NEAR_LOSSLESS) {
+ WebPSafeFree(copy_buffer);
+ return 1;
+ }
+
+ for (i = limit_bits; i != 0; --i) {
+ NearLossless(xsize, ysize, argb, i, copy_buffer);
+ }
+ WebPSafeFree(copy_buffer);
+ return 1;
+}
diff --git a/src/3rdparty/libwebp/src/enc/picture.c b/src/3rdparty/libwebp/src/enc/picture.c
index 9a66fbe..26679a7 100644
--- a/src/3rdparty/libwebp/src/enc/picture.c
+++ b/src/3rdparty/libwebp/src/enc/picture.c
@@ -15,6 +15,7 @@
#include <stdlib.h>
#include "./vp8enci.h"
+#include "../dsp/dsp.h"
#include "../utils/utils.h"
//------------------------------------------------------------------------------
diff --git a/src/3rdparty/libwebp/src/enc/picture_csp.c b/src/3rdparty/libwebp/src/enc/picture_csp.c
index 7875f62..0ef5f9e 100644
--- a/src/3rdparty/libwebp/src/enc/picture_csp.c
+++ b/src/3rdparty/libwebp/src/enc/picture_csp.c
@@ -32,10 +32,6 @@ static const union {
} test_endian = { 0xff000000u };
#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
-static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
- return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
-}
-
//------------------------------------------------------------------------------
// Detection of non-trivial transparency
@@ -89,9 +85,9 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {
static int kLinearToGammaTab[kGammaTabSize + 1];
static uint16_t kGammaToLinearTab[256];
-static int kGammaTablesOk = 0;
+static volatile int kGammaTablesOk = 0;
-static void InitGammaTables(void) {
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {
if (!kGammaTablesOk) {
int v;
const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
@@ -130,7 +126,7 @@ static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
#else
-static void InitGammaTables(void) {}
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {}
static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
return (int)(base_value << shift);
@@ -162,19 +158,15 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) {
static const int kNumIterations = 6;
static const int kMinDimensionIterativeConversion = 4;
-// We use a-priori a different precision for storing RGB and Y/W components
-// We could use YFIX=0 and only uint8_t for fixed_y_t, but it produces some
+// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
// banding sometimes. Better use extra precision.
-// TODO(skal): cleanup once TFIX/YFIX values are fixed.
+#define SFIX 2 // fixed-point precision of RGB and Y/W
+typedef int16_t fixed_t; // signed type with extra SFIX precision for UV
+typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W
-typedef int16_t fixed_t; // signed type with extra TFIX precision for UV
-typedef uint16_t fixed_y_t; // unsigned type with extra YFIX precision for W
-#define TFIX 6 // fixed-point precision of RGB
-#define YFIX 2 // fixed point precision for Y/W
-
-#define THALF ((1 << TFIX) >> 1)
-#define MAX_Y_T ((256 << YFIX) - 1)
-#define TROUNDER (1 << (YUV_FIX + TFIX - 1))
+#define SHALF (1 << SFIX >> 1)
+#define MAX_Y_T ((256 << SFIX) - 1)
+#define SROUNDER (1 << (YUV_FIX + SFIX - 1))
#if defined(USE_GAMMA_COMPRESSION)
@@ -184,9 +176,9 @@ typedef uint16_t fixed_y_t; // unsigned type with extra YFIX precision for W
#define kGammaF 2.2
static float kGammaToLinearTabF[MAX_Y_T + 1]; // size scales with Y_FIX
static float kLinearToGammaTabF[kGammaTabSize + 2];
-static int kGammaTablesFOk = 0;
+static volatile int kGammaTablesFOk = 0;
-static void InitGammaTablesF(void) {
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {
if (!kGammaTablesFOk) {
int v;
const double norm = 1. / MAX_Y_T;
@@ -207,52 +199,31 @@ static WEBP_INLINE float GammaToLinearF(int v) {
return kGammaToLinearTabF[v];
}
-static WEBP_INLINE float LinearToGammaF(float value) {
+static WEBP_INLINE int LinearToGammaF(float value) {
const float v = value * kGammaTabSize;
const int tab_pos = (int)v;
const float x = v - (float)tab_pos; // fractional part
const float v0 = kLinearToGammaTabF[tab_pos + 0];
const float v1 = kLinearToGammaTabF[tab_pos + 1];
const float y = v1 * x + v0 * (1.f - x); // interpolate
- return y;
+ return (int)(y + .5);
}
#else
-static void InitGammaTablesF(void) {}
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {}
static WEBP_INLINE float GammaToLinearF(int v) {
const float norm = 1.f / MAX_Y_T;
return norm * v;
}
-static WEBP_INLINE float LinearToGammaF(float value) {
- return MAX_Y_T * value;
+static WEBP_INLINE int LinearToGammaF(float value) {
+ return (int)(MAX_Y_T * value + .5);
}
#endif // USE_GAMMA_COMPRESSION
//------------------------------------------------------------------------------
-// precision: YFIX -> TFIX
-static WEBP_INLINE int FixedYToW(int v) {
-#if TFIX == YFIX
- return v;
-#elif TFIX >= YFIX
- return v << (TFIX - YFIX);
-#else
- return v >> (YFIX - TFIX);
-#endif
-}
-
-static WEBP_INLINE int FixedWToY(int v) {
-#if TFIX == YFIX
- return v;
-#elif YFIX >= TFIX
- return v << (YFIX - TFIX);
-#else
- return v >> (TFIX - YFIX);
-#endif
-}
-
static uint8_t clip_8b(fixed_t v) {
return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
}
@@ -261,13 +232,6 @@ static fixed_y_t clip_y(int y) {
return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
}
-// precision: TFIX -> YFIX
-static fixed_y_t clip_fixed_t(fixed_t v) {
- const int y = FixedWToY(v);
- const fixed_y_t w = clip_y(y);
- return w;
-}
-
//------------------------------------------------------------------------------
static int RGBToGray(int r, int g, int b) {
@@ -279,7 +243,7 @@ static float RGBToGrayF(float r, float g, float b) {
return 0.299f * r + 0.587f * g + 0.114f * b;
}
-static float ScaleDown(int a, int b, int c, int d) {
+static int ScaleDown(int a, int b, int c, int d) {
const float A = GammaToLinearF(a);
const float B = GammaToLinearF(b);
const float C = GammaToLinearF(c);
@@ -293,30 +257,36 @@ static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int len) {
const float G = GammaToLinearF(src[1]);
const float B = GammaToLinearF(src[2]);
const float Y = RGBToGrayF(R, G, B);
- *dst++ = (fixed_y_t)(LinearToGammaF(Y) + .5);
+ *dst++ = (fixed_y_t)LinearToGammaF(Y);
src += 3;
}
}
-static WEBP_INLINE void UpdateChroma(const fixed_y_t* src1,
- const fixed_y_t* src2,
- fixed_t* dst, fixed_y_t* tmp, int len) {
+static int UpdateChroma(const fixed_y_t* src1,
+ const fixed_y_t* src2,
+ fixed_t* dst, fixed_y_t* tmp, int len) {
+ int diff = 0;
while (len--> 0) {
- const float r = ScaleDown(src1[0], src1[3], src2[0], src2[3]);
- const float g = ScaleDown(src1[1], src1[4], src2[1], src2[4]);
- const float b = ScaleDown(src1[2], src1[5], src2[2], src2[5]);
- const float W = RGBToGrayF(r, g, b);
- dst[0] = (fixed_t)FixedYToW((int)(r - W));
- dst[1] = (fixed_t)FixedYToW((int)(g - W));
- dst[2] = (fixed_t)FixedYToW((int)(b - W));
+ const int r = ScaleDown(src1[0], src1[3], src2[0], src2[3]);
+ const int g = ScaleDown(src1[1], src1[4], src2[1], src2[4]);
+ const int b = ScaleDown(src1[2], src1[5], src2[2], src2[5]);
+ const int W = RGBToGray(r, g, b);
+ const int r_avg = (src1[0] + src1[3] + src2[0] + src2[3] + 2) >> 2;
+ const int g_avg = (src1[1] + src1[4] + src2[1] + src2[4] + 2) >> 2;
+ const int b_avg = (src1[2] + src1[5] + src2[2] + src2[5] + 2) >> 2;
+ dst[0] = (fixed_t)(r - W);
+ dst[1] = (fixed_t)(g - W);
+ dst[2] = (fixed_t)(b - W);
dst += 3;
src1 += 6;
src2 += 6;
if (tmp != NULL) {
- tmp[0] = tmp[1] = clip_y((int)(W + .5));
+ tmp[0] = tmp[1] = clip_y(W);
tmp += 2;
}
+ diff += abs(RGBToGray(r_avg, g_avg, b_avg) - W);
}
+ return diff;
}
//------------------------------------------------------------------------------
@@ -336,9 +306,8 @@ static WEBP_INLINE int Filter2(int A, int B) { return (A * 3 + B + 2) >> 2; }
//------------------------------------------------------------------------------
-// 8bit -> YFIX
-static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {
- return ((fixed_y_t)a << YFIX) | (1 << (YFIX - 1));
+static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX
+ return ((fixed_y_t)a << SFIX) | SHALF;
}
static void ImportOneRow(const uint8_t* const r_ptr,
@@ -368,50 +337,48 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y,
fixed_y_t* const out2) {
int i, k;
{ // special boundary case for i==0
- const int W0 = FixedYToW(best_y[0]);
- const int W1 = FixedYToW(best_y[w]);
+ const int W0 = best_y[0];
+ const int W1 = best_y[w];
for (k = 0; k <= 2; ++k) {
- out1[k] = clip_fixed_t(Filter2(cur_uv[k], prev_uv[k]) + W0);
- out2[k] = clip_fixed_t(Filter2(cur_uv[k], next_uv[k]) + W1);
+ out1[k] = clip_y(Filter2(cur_uv[k], prev_uv[k]) + W0);
+ out2[k] = clip_y(Filter2(cur_uv[k], next_uv[k]) + W1);
}
}
for (i = 1; i < w - 1; ++i) {
- const int W0 = FixedYToW(best_y[i + 0]);
- const int W1 = FixedYToW(best_y[i + w]);
+ const int W0 = best_y[i + 0];
+ const int W1 = best_y[i + w];
const int off = 3 * (i >> 1);
for (k = 0; k <= 2; ++k) {
const int tmp0 = Filter(cur_uv + off + k, prev_uv + off + k, i & 1);
const int tmp1 = Filter(cur_uv + off + k, next_uv + off + k, i & 1);
- out1[3 * i + k] = clip_fixed_t(tmp0 + W0);
- out2[3 * i + k] = clip_fixed_t(tmp1 + W1);
+ out1[3 * i + k] = clip_y(tmp0 + W0);
+ out2[3 * i + k] = clip_y(tmp1 + W1);
}
}
{ // special boundary case for i == w - 1
- const int W0 = FixedYToW(best_y[i + 0]);
- const int W1 = FixedYToW(best_y[i + w]);
+ const int W0 = best_y[i + 0];
+ const int W1 = best_y[i + w];
const int off = 3 * (i >> 1);
for (k = 0; k <= 2; ++k) {
- out1[3 * i + k] =
- clip_fixed_t(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0);
- out2[3 * i + k] =
- clip_fixed_t(Filter2(cur_uv[off + k], next_uv[off + k]) + W1);
+ out1[3 * i + k] = clip_y(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0);
+ out2[3 * i + k] = clip_y(Filter2(cur_uv[off + k], next_uv[off + k]) + W1);
}
}
}
static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
- const int luma = 16839 * r + 33059 * g + 6420 * b + TROUNDER;
- return clip_8b(16 + (luma >> (YUV_FIX + TFIX)));
+ const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
+ return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
}
static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
- const int u = -9719 * r - 19081 * g + 28800 * b + TROUNDER;
- return clip_8b(128 + (u >> (YUV_FIX + TFIX)));
+ const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER;
+ return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
}
static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
- const int v = +28800 * r - 24116 * g - 4684 * b + TROUNDER;
- return clip_8b(128 + (v >> (YUV_FIX + TFIX)));
+ const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER;
+ return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
}
static int ConvertWRGBToYUV(const fixed_y_t* const best_y,
@@ -426,7 +393,7 @@ static int ConvertWRGBToYUV(const fixed_y_t* const best_y,
for (i = 0; i < picture->width; ++i) {
const int off = 3 * ((i >> 1) + (j >> 1) * uv_w);
const int off2 = i + j * picture->y_stride;
- const int W = FixedYToW(best_y[i + j * w]);
+ const int W = best_y[i + j * w];
const int r = best_uv[off + 0] + W;
const int g = best_uv[off + 1] + W;
const int b = best_uv[off + 2] + W;
@@ -475,6 +442,10 @@ static int PreprocessARGB(const uint8_t* const r_ptr,
fixed_t* const target_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
int ok;
+ int diff_sum = 0;
+ const int first_diff_threshold = (int)(2.5 * w * h);
+ const int min_improvement = 5; // stop if improvement is below this %
+ const int min_first_improvement = 80;
if (best_y == NULL || best_uv == NULL ||
target_y == NULL || target_uv == NULL ||
@@ -507,7 +478,7 @@ static int PreprocessARGB(const uint8_t* const r_ptr,
}
UpdateW(src1, target_y + (j + 0) * w, w);
UpdateW(src2, target_y + (j + 1) * w, w);
- UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w);
+ diff_sum += UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w);
memcpy(best_uv + uv_off, target_uv + uv_off, 3 * uv_w * sizeof(*best_uv));
memcpy(dst_y + w, dst_y, w * sizeof(*dst_y));
}
@@ -517,10 +488,11 @@ static int PreprocessARGB(const uint8_t* const r_ptr,
int k;
const fixed_t* cur_uv = best_uv;
const fixed_t* prev_uv = best_uv;
+ const int old_diff_sum = diff_sum;
+ diff_sum = 0;
for (j = 0; j < h; j += 2) {
fixed_y_t* const src1 = tmp_buffer;
fixed_y_t* const src2 = tmp_buffer + 3 * w;
-
{
const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
InterpolateTwoRows(best_y + j * w, prev_uv, cur_uv, next_uv,
@@ -531,7 +503,7 @@ static int PreprocessARGB(const uint8_t* const r_ptr,
UpdateW(src1, best_rgb_y + 0 * w, w);
UpdateW(src2, best_rgb_y + 1 * w, w);
- UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w);
+ diff_sum += UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w);
// update two rows of Y and one row of RGB
for (i = 0; i < 2 * w; ++i) {
@@ -553,7 +525,23 @@ static int PreprocessARGB(const uint8_t* const r_ptr,
}
}
}
- // TODO(skal): add early-termination criterion
+ // test exit condition
+ if (diff_sum > 0) {
+ const int improvement = 100 * abs(diff_sum - old_diff_sum) / diff_sum;
+ // Check if first iteration gave good result already, without a large
+ // jump of improvement (otherwise it means we need to try few extra
+ // iterations, just to be sure).
+ if (iter == 0 && diff_sum < first_diff_threshold &&
+ improvement < min_first_improvement) {
+ break;
+ }
+ // then, check if improvement is stalling.
+ if (improvement < min_improvement) {
+ break;
+ }
+ } else {
+ break;
+ }
}
// final reconstruction
@@ -762,23 +750,20 @@ static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
int width,
VP8Random* const rg) {
int i, j;
- for (i = 0, j = 0; i < width; ++i, j += step) {
+ for (i = 0, j = 0; i < width; i += 1, j += step) {
dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
}
}
-static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr,
- const uint8_t* const g_ptr,
- const uint8_t* const b_ptr,
- const uint8_t* const a_ptr,
- int rgb_stride,
- uint8_t* const dst_u,
- uint8_t* const dst_v,
- int width,
- VP8Random* const rg) {
+static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr,
+ const uint8_t* const g_ptr,
+ const uint8_t* const b_ptr,
+ const uint8_t* const a_ptr,
+ int rgb_stride,
+ uint16_t* dst, int width) {
int i, j;
- // we loop over 2x2 blocks and produce one U/V value for each.
- for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * sizeof(uint32_t)) {
+ // we loop over 2x2 blocks and produce one R/G/B/A value for each.
+ for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) {
const uint32_t a = SUM4ALPHA(a_ptr + j);
int r, g, b;
if (a == 4 * 0xff || a == 0) {
@@ -790,8 +775,10 @@ static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr,
g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
}
- dst_u[i] = RGBToU(r, g, b, rg);
- dst_v[i] = RGBToV(r, g, b, rg);
+ dst[0] = r;
+ dst[1] = g;
+ dst[2] = b;
+ dst[3] = a;
}
if (width & 1) {
const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
@@ -805,31 +792,39 @@ static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr,
g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
}
- dst_u[i] = RGBToU(r, g, b, rg);
- dst_v[i] = RGBToV(r, g, b, rg);
+ dst[0] = r;
+ dst[1] = g;
+ dst[2] = b;
+ dst[3] = a;
+ }
+}
+
+static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
+ const uint8_t* const g_ptr,
+ const uint8_t* const b_ptr,
+ int step, int rgb_stride,
+ uint16_t* dst, int width) {
+ int i, j;
+ for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) {
+ dst[0] = SUM4(r_ptr + j, step);
+ dst[1] = SUM4(g_ptr + j, step);
+ dst[2] = SUM4(b_ptr + j, step);
+ }
+ if (width & 1) {
+ dst[0] = SUM2(r_ptr + j);
+ dst[1] = SUM2(g_ptr + j);
+ dst[2] = SUM2(b_ptr + j);
}
}
-static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr,
- const uint8_t* const g_ptr,
- const uint8_t* const b_ptr,
- int step, int rgb_stride,
+static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
uint8_t* const dst_u,
uint8_t* const dst_v,
int width,
VP8Random* const rg) {
- int i, j;
- for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * step) {
- const int r = SUM4(r_ptr + j, step);
- const int g = SUM4(g_ptr + j, step);
- const int b = SUM4(b_ptr + j, step);
- dst_u[i] = RGBToU(r, g, b, rg);
- dst_v[i] = RGBToV(r, g, b, rg);
- }
- if (width & 1) {
- const int r = SUM2(r_ptr + j);
- const int g = SUM2(g_ptr + j);
- const int b = SUM2(b_ptr + j);
+ int i;
+ for (i = 0; i < width; i += 1, rgb += 4) {
+ const int r = rgb[0], g = rgb[1], b = rgb[2];
dst_u[i] = RGBToU(r, g, b, rg);
dst_v[i] = RGBToV(r, g, b, rg);
}
@@ -848,6 +843,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
const int width = picture->width;
const int height = picture->height;
const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
+ const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr
picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
picture->use_argb = 0;
@@ -864,7 +860,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
if (has_alpha) {
WebPInitAlphaProcessing();
assert(step == 4);
-#if defined(USE_INVERSE_ALPHA_TABLE)
+#if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
assert(kAlphaFix + kGammaFix <= 31);
#endif
}
@@ -879,6 +875,11 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
picture->a, picture->a_stride);
}
} else {
+ const int uv_width = (width + 1) >> 1;
+ int use_dsp = (step == 3); // use special function in this case
+ // temporary storage for accumulated R/G/B values during conversion to U/V
+ uint16_t* const tmp_rgb =
+ (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb));
uint8_t* dst_y = picture->y;
uint8_t* dst_u = picture->u;
uint8_t* dst_v = picture->v;
@@ -889,19 +890,32 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
if (dithering > 0.) {
VP8InitRandom(&base_rg, dithering);
rg = &base_rg;
+ use_dsp = 0; // can't use dsp in this case
}
-
+ WebPInitConvertARGBToYUV();
InitGammaTables();
+ if (tmp_rgb == NULL) return 0; // malloc error
+
// Downsample Y/U/V planes, two rows at a time
for (y = 0; y < (height >> 1); ++y) {
int rows_have_alpha = has_alpha;
const int off1 = (2 * y + 0) * rgb_stride;
const int off2 = (2 * y + 1) * rgb_stride;
- ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step,
- dst_y, width, rg);
- ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step,
- dst_y + picture->y_stride, width, rg);
+ if (use_dsp) {
+ if (is_rgb) {
+ WebPConvertRGB24ToY(r_ptr + off1, dst_y, width);
+ WebPConvertRGB24ToY(r_ptr + off2, dst_y + picture->y_stride, width);
+ } else {
+ WebPConvertBGR24ToY(b_ptr + off1, dst_y, width);
+ WebPConvertBGR24ToY(b_ptr + off2, dst_y + picture->y_stride, width);
+ }
+ } else {
+ ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step,
+ dst_y, width, rg);
+ ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step,
+ dst_y + picture->y_stride, width, rg);
+ }
dst_y += 2 * picture->y_stride;
if (has_alpha) {
rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride,
@@ -909,13 +923,19 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
dst_a, picture->a_stride);
dst_a += 2 * picture->a_stride;
}
+ // Collect averaged R/G/B(/A)
if (!rows_have_alpha) {
- ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1,
- step, rgb_stride, dst_u, dst_v, width, rg);
+ AccumulateRGB(r_ptr + off1, g_ptr + off1, b_ptr + off1,
+ step, rgb_stride, tmp_rgb, width);
+ } else {
+ AccumulateRGBA(r_ptr + off1, g_ptr + off1, b_ptr + off1, a_ptr + off1,
+ rgb_stride, tmp_rgb, width);
+ }
+ // Convert to U/V
+ if (rg == NULL) {
+ WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
} else {
- ConvertRowsToUVWithAlpha(r_ptr + off1, g_ptr + off1, b_ptr + off1,
- a_ptr + off1, rgb_stride,
- dst_u, dst_v, width, rg);
+ ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
}
dst_u += picture->uv_stride;
dst_v += picture->uv_stride;
@@ -923,20 +943,35 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
if (height & 1) { // extra last row
const int off = 2 * y * rgb_stride;
int row_has_alpha = has_alpha;
- ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step,
- dst_y, width, rg);
+ if (use_dsp) {
+ if (r_ptr < b_ptr) {
+ WebPConvertRGB24ToY(r_ptr + off, dst_y, width);
+ } else {
+ WebPConvertBGR24ToY(b_ptr + off, dst_y, width);
+ }
+ } else {
+ ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step,
+ dst_y, width, rg);
+ }
if (row_has_alpha) {
row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0);
}
+ // Collect averaged R/G/B(/A)
if (!row_has_alpha) {
- ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off,
- step, 0, dst_u, dst_v, width, rg);
+ // Collect averaged R/G/B
+ AccumulateRGB(r_ptr + off, g_ptr + off, b_ptr + off,
+ step, /* rgb_stride = */ 0, tmp_rgb, width);
+ } else {
+ AccumulateRGBA(r_ptr + off, g_ptr + off, b_ptr + off, a_ptr + off,
+ /* rgb_stride = */ 0, tmp_rgb, width);
+ }
+ if (rg == NULL) {
+ WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
} else {
- ConvertRowsToUVWithAlpha(r_ptr + off, g_ptr + off, b_ptr + off,
- a_ptr + off, 0,
- dst_u, dst_v, width, rg);
+ ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
}
}
+ WebPSafeFree(tmp_rgb);
}
return 1;
}
@@ -978,11 +1013,9 @@ int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
return PictureARGBToYUVA(picture, colorspace, 0.f, 0);
}
-#if WEBP_ENCODER_ABI_VERSION > 0x0204
int WebPPictureSmartARGBToYUVA(WebPPicture* picture) {
return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1);
}
-#endif
//------------------------------------------------------------------------------
// call for YUVA -> ARGB conversion
@@ -1066,14 +1099,23 @@ static int Import(WebPPicture* const picture,
}
if (!WebPPictureAlloc(picture)) return 0;
- assert(step >= (import_alpha ? 4 : 3));
- for (y = 0; y < height; ++y) {
- uint32_t* const dst = &picture->argb[y * picture->argb_stride];
- int x;
- for (x = 0; x < width; ++x) {
- const int offset = step * x + y * rgb_stride;
- dst[x] = MakeARGB32(import_alpha ? a_ptr[offset] : 0xff,
- r_ptr[offset], g_ptr[offset], b_ptr[offset]);
+ VP8EncDspARGBInit();
+
+ if (import_alpha) {
+ assert(step == 4);
+ for (y = 0; y < height; ++y) {
+ uint32_t* const dst = &picture->argb[y * picture->argb_stride];
+ const int offset = y * rgb_stride;
+ VP8PackARGB(a_ptr + offset, r_ptr + offset, g_ptr + offset,
+ b_ptr + offset, width, dst);
+ }
+ } else {
+ assert(step >= 3);
+ for (y = 0; y < height; ++y) {
+ uint32_t* const dst = &picture->argb[y * picture->argb_stride];
+ const int offset = y * rgb_stride;
+ VP8PackRGB(r_ptr + offset, g_ptr + offset, b_ptr + offset,
+ width, step, dst);
}
}
return 1;
diff --git a/src/3rdparty/libwebp/src/enc/picture_psnr.c b/src/3rdparty/libwebp/src/enc/picture_psnr.c
index 2254b7e..40214ef 100644
--- a/src/3rdparty/libwebp/src/enc/picture_psnr.c
+++ b/src/3rdparty/libwebp/src/enc/picture_psnr.c
@@ -12,8 +12,10 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <math.h>
+#include <stdlib.h>
#include "./vp8enci.h"
+#include "../utils/utils.h"
//------------------------------------------------------------------------------
// local-min distortion
@@ -23,9 +25,9 @@
#define RADIUS 2 // search radius. Shouldn't be too large.
-static float AccumulateLSIM(const uint8_t* src, int src_stride,
- const uint8_t* ref, int ref_stride,
- int w, int h) {
+static void AccumulateLSIM(const uint8_t* src, int src_stride,
+ const uint8_t* ref, int ref_stride,
+ int w, int h, DistoStats* stats) {
int x, y;
double total_sse = 0.;
for (y = 0; y < h; ++y) {
@@ -38,16 +40,22 @@ static float AccumulateLSIM(const uint8_t* src, int src_stride,
const double value = (double)ref[y * ref_stride + x];
int i, j;
for (j = y_0; j < y_1; ++j) {
- const uint8_t* s = src + j * src_stride;
+ const uint8_t* const s = src + j * src_stride;
for (i = x_0; i < x_1; ++i) {
- const double sse = (double)(s[i] - value) * (s[i] - value);
+ const double diff = s[i] - value;
+ const double sse = diff * diff;
if (sse < best_sse) best_sse = sse;
}
}
total_sse += best_sse;
}
}
- return (float)total_sse;
+ stats->w = w * h;
+ stats->xm = 0;
+ stats->ym = 0;
+ stats->xxm = total_sse;
+ stats->yym = 0;
+ stats->xxm = 0;
}
#undef RADIUS
@@ -64,73 +72,90 @@ static float GetPSNR(const double v) {
int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
int type, float result[5]) {
DistoStats stats[5];
- int has_alpha;
- int uv_w, uv_h;
+ int w, h;
+
+ memset(stats, 0, sizeof(stats));
if (src == NULL || ref == NULL ||
src->width != ref->width || src->height != ref->height ||
- src->y == NULL || ref->y == NULL ||
- src->u == NULL || ref->u == NULL ||
- src->v == NULL || ref->v == NULL ||
- result == NULL) {
- return 0;
- }
- // TODO(skal): provide distortion for ARGB too.
- if (src->use_argb == 1 || src->use_argb != ref->use_argb) {
- return 0;
- }
-
- has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT);
- if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) ||
- (has_alpha && (src->a == NULL || ref->a == NULL))) {
+ src->use_argb != ref->use_argb || result == NULL) {
return 0;
}
+ w = src->width;
+ h = src->height;
- memset(stats, 0, sizeof(stats));
+ if (src->use_argb == 1) {
+ if (src->argb == NULL || ref->argb == NULL) {
+ return 0;
+ } else {
+ int i, j, c;
+ uint8_t* tmp1, *tmp2;
+ uint8_t* const tmp_plane =
+ (uint8_t*)WebPSafeMalloc(2ULL * w * h, sizeof(*tmp_plane));
+ if (tmp_plane == NULL) return 0;
+ tmp1 = tmp_plane;
+ tmp2 = tmp_plane + w * h;
+ for (c = 0; c < 4; ++c) {
+ for (j = 0; j < h; ++j) {
+ for (i = 0; i < w; ++i) {
+ tmp1[j * w + i] = src->argb[i + j * src->argb_stride] >> (c * 8);
+ tmp2[j * w + i] = ref->argb[i + j * ref->argb_stride] >> (c * 8);
+ }
+ }
+ if (type >= 2) {
+ AccumulateLSIM(tmp1, w, tmp2, w, w, h, &stats[c]);
+ } else {
+ VP8SSIMAccumulatePlane(tmp1, w, tmp2, w, w, h, &stats[c]);
+ }
+ }
+ free(tmp_plane);
+ }
+ } else {
+ int has_alpha, uv_w, uv_h;
+ if (src->y == NULL || ref->y == NULL ||
+ src->u == NULL || ref->u == NULL ||
+ src->v == NULL || ref->v == NULL) {
+ return 0;
+ }
+ has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT);
+ if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) ||
+ (has_alpha && (src->a == NULL || ref->a == NULL))) {
+ return 0;
+ }
- uv_w = (src->width + 1) >> 1;
- uv_h = (src->height + 1) >> 1;
- if (type >= 2) {
- float sse[4];
- sse[0] = AccumulateLSIM(src->y, src->y_stride,
- ref->y, ref->y_stride, src->width, src->height);
- sse[1] = AccumulateLSIM(src->u, src->uv_stride,
- ref->u, ref->uv_stride, uv_w, uv_h);
- sse[2] = AccumulateLSIM(src->v, src->uv_stride,
- ref->v, ref->uv_stride, uv_w, uv_h);
- sse[3] = has_alpha ? AccumulateLSIM(src->a, src->a_stride,
- ref->a, ref->a_stride,
- src->width, src->height)
- : 0.f;
- result[0] = GetPSNR(sse[0] / (src->width * src->height));
- result[1] = GetPSNR(sse[1] / (uv_w * uv_h));
- result[2] = GetPSNR(sse[2] / (uv_w * uv_h));
- result[3] = GetPSNR(sse[3] / (src->width * src->height));
- {
- double total_sse = sse[0] + sse[1] + sse[2];
- int total_pixels = src->width * src->height + 2 * uv_w * uv_h;
+ uv_w = (src->width + 1) >> 1;
+ uv_h = (src->height + 1) >> 1;
+ if (type >= 2) {
+ AccumulateLSIM(src->y, src->y_stride, ref->y, ref->y_stride,
+ w, h, &stats[0]);
+ AccumulateLSIM(src->u, src->uv_stride, ref->u, ref->uv_stride,
+ uv_w, uv_h, &stats[1]);
+ AccumulateLSIM(src->v, src->uv_stride, ref->v, ref->uv_stride,
+ uv_w, uv_h, &stats[2]);
if (has_alpha) {
- total_pixels += src->width * src->height;
- total_sse += sse[3];
+ AccumulateLSIM(src->a, src->a_stride, ref->a, ref->a_stride,
+ w, h, &stats[3]);
+ }
+ } else {
+ VP8SSIMAccumulatePlane(src->y, src->y_stride,
+ ref->y, ref->y_stride,
+ w, h, &stats[0]);
+ VP8SSIMAccumulatePlane(src->u, src->uv_stride,
+ ref->u, ref->uv_stride,
+ uv_w, uv_h, &stats[1]);
+ VP8SSIMAccumulatePlane(src->v, src->uv_stride,
+ ref->v, ref->uv_stride,
+ uv_w, uv_h, &stats[2]);
+ if (has_alpha) {
+ VP8SSIMAccumulatePlane(src->a, src->a_stride,
+ ref->a, ref->a_stride,
+ w, h, &stats[3]);
}
- result[4] = GetPSNR(total_sse / total_pixels);
}
- } else {
+ }
+ // Final stat calculations.
+ {
int c;
- VP8SSIMAccumulatePlane(src->y, src->y_stride,
- ref->y, ref->y_stride,
- src->width, src->height, &stats[0]);
- VP8SSIMAccumulatePlane(src->u, src->uv_stride,
- ref->u, ref->uv_stride,
- uv_w, uv_h, &stats[1]);
- VP8SSIMAccumulatePlane(src->v, src->uv_stride,
- ref->v, ref->uv_stride,
- uv_w, uv_h, &stats[2]);
- if (has_alpha) {
- VP8SSIMAccumulatePlane(src->a, src->a_stride,
- ref->a, ref->a_stride,
- src->width, src->height, &stats[3]);
- }
for (c = 0; c <= 4; ++c) {
if (type == 1) {
const double v = VP8SSIMGet(&stats[c]);
diff --git a/src/3rdparty/libwebp/src/enc/picture_rescale.c b/src/3rdparty/libwebp/src/enc/picture_rescale.c
index 9e45551..9f19e8e 100644
--- a/src/3rdparty/libwebp/src/enc/picture_rescale.c
+++ b/src/3rdparty/libwebp/src/enc/picture_rescale.c
@@ -30,16 +30,6 @@ static void PictureGrabSpecs(const WebPPicture* const src,
}
//------------------------------------------------------------------------------
-// Picture copying
-
-static void CopyPlane(const uint8_t* src, int src_stride,
- uint8_t* dst, int dst_stride, int width, int height) {
- while (height-- > 0) {
- memcpy(dst, src, width);
- src += src_stride;
- dst += dst_stride;
- }
-}
// Adjust top-left corner to chroma sample position.
static void SnapTopLeftPosition(const WebPPicture* const pic,
@@ -70,20 +60,20 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
if (!WebPPictureAlloc(dst)) return 0;
if (!src->use_argb) {
- CopyPlane(src->y, src->y_stride,
- dst->y, dst->y_stride, dst->width, dst->height);
- CopyPlane(src->u, src->uv_stride,
- dst->u, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
- CopyPlane(src->v, src->uv_stride,
- dst->v, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
+ WebPCopyPlane(src->y, src->y_stride,
+ dst->y, dst->y_stride, dst->width, dst->height);
+ WebPCopyPlane(src->u, src->uv_stride, dst->u, dst->uv_stride,
+ HALVE(dst->width), HALVE(dst->height));
+ WebPCopyPlane(src->v, src->uv_stride, dst->v, dst->uv_stride,
+ HALVE(dst->width), HALVE(dst->height));
if (dst->a != NULL) {
- CopyPlane(src->a, src->a_stride,
- dst->a, dst->a_stride, dst->width, dst->height);
+ WebPCopyPlane(src->a, src->a_stride,
+ dst->a, dst->a_stride, dst->width, dst->height);
}
} else {
- CopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride,
- (uint8_t*)dst->argb, 4 * dst->argb_stride,
- 4 * dst->width, dst->height);
+ WebPCopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride,
+ (uint8_t*)dst->argb, 4 * dst->argb_stride,
+ 4 * dst->width, dst->height);
}
return 1;
}
@@ -144,24 +134,23 @@ int WebPPictureCrop(WebPPicture* pic,
if (!pic->use_argb) {
const int y_offset = top * pic->y_stride + left;
const int uv_offset = (top / 2) * pic->uv_stride + left / 2;
- CopyPlane(pic->y + y_offset, pic->y_stride,
- tmp.y, tmp.y_stride, width, height);
- CopyPlane(pic->u + uv_offset, pic->uv_stride,
- tmp.u, tmp.uv_stride, HALVE(width), HALVE(height));
- CopyPlane(pic->v + uv_offset, pic->uv_stride,
- tmp.v, tmp.uv_stride, HALVE(width), HALVE(height));
+ WebPCopyPlane(pic->y + y_offset, pic->y_stride,
+ tmp.y, tmp.y_stride, width, height);
+ WebPCopyPlane(pic->u + uv_offset, pic->uv_stride,
+ tmp.u, tmp.uv_stride, HALVE(width), HALVE(height));
+ WebPCopyPlane(pic->v + uv_offset, pic->uv_stride,
+ tmp.v, tmp.uv_stride, HALVE(width), HALVE(height));
if (tmp.a != NULL) {
const int a_offset = top * pic->a_stride + left;
- CopyPlane(pic->a + a_offset, pic->a_stride,
- tmp.a, tmp.a_stride, width, height);
+ WebPCopyPlane(pic->a + a_offset, pic->a_stride,
+ tmp.a, tmp.a_stride, width, height);
}
} else {
const uint8_t* const src =
(const uint8_t*)(pic->argb + top * pic->argb_stride + left);
- CopyPlane(src, pic->argb_stride * 4,
- (uint8_t*)tmp.argb, tmp.argb_stride * 4,
- width * 4, height);
+ WebPCopyPlane(src, pic->argb_stride * 4, (uint8_t*)tmp.argb,
+ tmp.argb_stride * 4, width * 4, height);
}
WebPPictureFree(pic);
*pic = tmp;
@@ -210,16 +199,10 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
if (pic == NULL) return 0;
prev_width = pic->width;
prev_height = pic->height;
- // if width is unspecified, scale original proportionally to height ratio.
- if (width == 0) {
- width = (prev_width * height + prev_height / 2) / prev_height;
+ if (!WebPRescalerGetScaledDimensions(
+ prev_width, prev_height, &width, &height)) {
+ return 0;
}
- // if height is unspecified, scale original proportionally to width ratio.
- if (height == 0) {
- height = (prev_height * width + prev_width / 2) / prev_width;
- }
- // Check if the overall dimensions still make sense.
- if (width <= 0 || height <= 0) return 0;
PictureGrabSpecs(pic, &tmp);
tmp.width = width;
diff --git a/src/3rdparty/libwebp/src/enc/picture_tools.c b/src/3rdparty/libwebp/src/enc/picture_tools.c
index 7c73646..bf97af8 100644
--- a/src/3rdparty/libwebp/src/enc/picture_tools.c
+++ b/src/3rdparty/libwebp/src/enc/picture_tools.c
@@ -11,6 +11,8 @@
//
// Author: Skal (pascal.massimino@gmail.com)
+#include <assert.h>
+
#include "./vp8enci.h"
#include "../dsp/yuv.h"
@@ -120,6 +122,24 @@ void WebPCleanupTransparentArea(WebPPicture* pic) {
#undef SIZE
#undef SIZE2
+void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) {
+ int x, y, w, h;
+ uint32_t* argb;
+ assert(pic != NULL && pic->use_argb);
+ w = pic->width;
+ h = pic->height;
+ argb = pic->argb;
+
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ if ((argb[x] & 0xff000000) == 0) {
+ argb[x] = 0x00000000;
+ }
+ }
+ argb += pic->argb_stride;
+ }
+}
+
//------------------------------------------------------------------------------
// Blend color and remove transparency info
diff --git a/src/3rdparty/libwebp/src/enc/quant.c b/src/3rdparty/libwebp/src/enc/quant.c
index 9130a41..dd6885a 100644
--- a/src/3rdparty/libwebp/src/enc/quant.c
+++ b/src/3rdparty/libwebp/src/enc/quant.c
@@ -30,7 +30,7 @@
#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
// power-law modulation. Must be strictly less than 1.
-#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
+#define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision
// number of non-zero coeffs below which we consider the block very flat
// (and apply a penalty to complex predictions)
@@ -41,6 +41,8 @@
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
+#define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda)
+
// #define DEBUG_BLOCK
//------------------------------------------------------------------------------
@@ -54,15 +56,37 @@ static void PrintBlockInfo(const VP8EncIterator* const it,
const VP8ModeScore* const rd) {
int i, j;
const int is_i16 = (it->mb_->type_ == 1);
+ const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;
+ const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;
+ const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;
+ const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;
printf("SOURCE / OUTPUT / ABS DELTA\n");
- for (j = 0; j < 24; ++j) {
- if (j == 16) printf("\n"); // newline before the U/V block
- for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
+ for (j = 0; j < 16; ++j) {
+ for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);
printf(" ");
- for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);
+ for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]);
printf(" ");
for (i = 0; i < 16; ++i) {
- printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));
+ printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS]));
+ }
+ printf("\n");
+ }
+ printf("\n"); // newline before the U/V block
+ for (j = 0; j < 8; ++j) {
+ for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]);
+ printf(" ");
+ for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]);
+ printf(" ");
+ for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]);
+ printf(" ");
+ for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]);
+ printf(" ");
+ for (i = 0; i < 8; ++i) {
+ printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
+ }
+ printf(" ");
+ for (i = 8; i < 16; ++i) {
+ printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
}
printf("\n");
}
@@ -444,15 +468,12 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
// Quantize
// Layout:
-// +----+
-// |YYYY| 0
-// |YYYY| 4
-// |YYYY| 8
-// |YYYY| 12
-// +----+
-// |UUVV| 16
-// |UUVV| 20
-// +----+
+// +----+----+
+// |YYYY|UUVV| 0
+// |YYYY|UUVV| 4
+// |YYYY|....| 8
+// |YYYY|....| 12
+// +----+----+
const int VP8Scan[16] = { // Luma
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
@@ -538,13 +559,12 @@ typedef struct {
#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
- // TODO: incorporate the "* 256" in the tables?
- rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
+ rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD);
}
static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
score_t distortion) {
- return rate * lambda + 256 * distortion;
+ return rate * lambda + RD_DISTO_MULT * distortion;
}
static int TrellisQuantizeBlock(const VP8Encoder* const enc,
@@ -553,7 +573,8 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
const VP8Matrix* const mtx,
int lambda) {
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
- const CostArray* const costs = enc->proba_.level_cost_[coeff_type];
+ CostArrayPtr const costs =
+ (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
const int first = (coeff_type == 0) ? 1 : 0;
Node nodes[16][NUM_NODES];
ScoreState score_states[2][NUM_NODES];
@@ -590,7 +611,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
- ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];
+ ss_cur[m].costs = costs[first][ctx0];
}
}
@@ -624,7 +645,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
int best_prev = 0; // default, in case
ss_cur[m].score = MAX_COST;
- ss_cur[m].costs = costs[band][ctx];
+ ss_cur[m].costs = costs[n + 1][ctx];
if (level > MAX_LEVEL || level < 0) { // node is dead?
continue;
}
@@ -719,14 +740,14 @@ static int ReconstructIntra16(VP8EncIterator* const it,
int mode) {
const VP8Encoder* const enc = it->enc_;
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
- const uint8_t* const src = it->yuv_in_ + Y_OFF;
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
int nz = 0;
int n;
int16_t tmp[16][16], dc_tmp[16];
- for (n = 0; n < 16; ++n) {
- VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
+ for (n = 0; n < 16; n += 2) {
+ VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
}
VP8FTransformWHT(tmp[0], dc_tmp);
nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
@@ -746,12 +767,13 @@ static int ReconstructIntra16(VP8EncIterator* const it,
}
}
} else {
- for (n = 0; n < 16; ++n) {
+ for (n = 0; n < 16; n += 2) {
// Zero-out the first coeff, so that: a) nz is correct below, and
// b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
- tmp[n][0] = 0;
- nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
- assert(rd->y_ac_levels[n][0] == 0);
+ tmp[n][0] = tmp[n + 1][0] = 0;
+ nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
+ assert(rd->y_ac_levels[n + 0][0] == 0);
+ assert(rd->y_ac_levels[n + 1][0] == 0);
}
}
@@ -792,14 +814,14 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
uint8_t* const yuv_out, int mode) {
const VP8Encoder* const enc = it->enc_;
const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
- const uint8_t* const src = it->yuv_in_ + U_OFF;
+ const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
int nz = 0;
int n;
int16_t tmp[8][16];
- for (n = 0; n < 8; ++n) {
- VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
+ for (n = 0; n < 8; n += 2) {
+ VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
}
if (DO_TRELLIS_UV && it->do_trellis_) {
int ch, x, y;
@@ -816,8 +838,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
}
}
} else {
- for (n = 0; n < 8; ++n) {
- nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
+ for (n = 0; n < 8; n += 2) {
+ nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
}
}
@@ -842,6 +864,12 @@ static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
}
+static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
+ VP8ModeScore* const tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
static void SwapPtr(uint8_t** a, uint8_t** b) {
uint8_t* const tmp = *a;
*a = *b;
@@ -865,46 +893,47 @@ static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
return 1;
}
-static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
+static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
const int kNumBlocks = 16;
VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_i16_;
const int tlambda = dqm->tlambda_;
- const uint8_t* const src = it->yuv_in_ + Y_OFF;
- VP8ModeScore rd16;
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
+ VP8ModeScore rd_tmp;
+ VP8ModeScore* rd_cur = &rd_tmp;
+ VP8ModeScore* rd_best = rd;
int mode;
rd->mode_i16 = -1;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
- uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
- int nz;
+ uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer
+ rd_cur->mode_i16 = mode;
// Reconstruct
- nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
+ rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);
// Measure RD-score
- rd16.D = VP8SSE16x16(src, tmp_dst);
- rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
- : 0;
- rd16.H = VP8FixedCostsI16[mode];
- rd16.R = VP8GetCostLuma16(it, &rd16);
+ rd_cur->D = VP8SSE16x16(src, tmp_dst);
+ rd_cur->SD =
+ tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
+ rd_cur->H = VP8FixedCostsI16[mode];
+ rd_cur->R = VP8GetCostLuma16(it, rd_cur);
if (mode > 0 &&
- IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
+ IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
// penalty to avoid flat area to be mispredicted by complex mode
- rd16.R += FLATNESS_PENALTY * kNumBlocks;
+ rd_cur->R += FLATNESS_PENALTY * kNumBlocks;
}
// Since we always examine Intra16 first, we can overwrite *rd directly.
- SetRDScore(lambda, &rd16);
- if (mode == 0 || rd16.score < rd->score) {
- CopyScore(rd, &rd16);
- rd->mode_i16 = mode;
- rd->nz = nz;
- memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
- memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
+ SetRDScore(lambda, rd_cur);
+ if (mode == 0 || rd_cur->score < rd_best->score) {
+ SwapModeScore(&rd_cur, &rd_best);
SwapOut(it);
}
}
+ if (rd_best != rd) {
+ memcpy(rd, rd_best, sizeof(*rd));
+ }
SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
VP8SetIntra16Mode(it, rd->mode_i16);
@@ -933,8 +962,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_i4_;
const int tlambda = dqm->tlambda_;
- const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
- uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
+ const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;
+ uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;
int total_header_bits = 0;
VP8ModeScore rd_best;
@@ -972,17 +1001,28 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
: 0;
rd_tmp.H = mode_costs[mode];
- rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
+
+ // Add flatness penalty
if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
- rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;
+ rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
+ } else {
+ rd_tmp.R = 0;
}
+ // early-out check
SetRDScore(lambda, &rd_tmp);
+ if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;
+
+ // finish computing score
+ rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);
+ SetRDScore(lambda, &rd_tmp);
+
if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
CopyScore(&rd_i4, &rd_tmp);
best_mode = mode;
SwapPtr(&tmp_dst, &best_block);
- memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
+ memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,
+ sizeof(rd_best.y_ac_levels[it->i4_]));
}
}
SetRDScore(dqm->lambda_mode_, &rd_i4);
@@ -1016,9 +1056,10 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
const int kNumBlocks = 8;
const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_uv_;
- const uint8_t* const src = it->yuv_in_ + U_OFF;
- uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer
- uint8_t* const dst0 = it->yuv_out_ + U_OFF;
+ const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
+ uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer
+ uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;
+ uint8_t* dst = dst0;
VP8ModeScore rd_best;
int mode;
@@ -1032,7 +1073,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
// Compute RD-score
rd_uv.D = VP8SSE16x8(src, tmp_dst);
- rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.
+ rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas.
rd_uv.H = VP8FixedCostsUV[mode];
rd_uv.R = VP8GetCostUV(it, &rd_uv);
if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
@@ -1044,11 +1085,14 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
CopyScore(&rd_best, &rd_uv);
rd->mode_uv = mode;
memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
- memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?
+ SwapPtr(&dst, &tmp_dst);
}
}
VP8SetIntraUVMode(it, rd->mode_uv);
AddScore(rd, &rd_best);
+ if (dst != dst0) { // copy 16x8 block if needed
+ VP8Copy16x8(dst, dst0);
+ }
}
//------------------------------------------------------------------------------
@@ -1060,35 +1104,41 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
int nz = 0;
if (is_i16) {
- nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
+ nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
} else {
VP8IteratorStartI4(it);
do {
const int mode =
it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
- const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
- uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
+ uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
VP8MakeIntra4Preds(it);
nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
src, dst, mode) << it->i4_;
- } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
+ } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
}
- nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
+ nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
rd->nz = nz;
}
// Refine intra16/intra4 sub-modes based on distortion only (not rate).
-static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
- const int is_i16 = (it->mb_->type_ == 1);
+static void RefineUsingDistortion(VP8EncIterator* const it,
+ int try_both_modes, int refine_uv_mode,
+ VP8ModeScore* const rd) {
score_t best_score = MAX_COST;
+ score_t score_i4 = (score_t)I4_PENALTY;
+ int16_t tmp_levels[16][16];
+ uint8_t modes_i4[16];
+ int nz = 0;
+ int mode;
+ int is_i16 = try_both_modes || (it->mb_->type_ == 1);
- if (try_both_i4_i16 || is_i16) {
- int mode;
+ if (is_i16) { // First, evaluate Intra16 distortion
int best_mode = -1;
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
- const uint8_t* const src = it->yuv_in_ + Y_OFF;
const score_t score = VP8SSE16x16(src, ref);
if (score < best_score) {
best_mode = mode;
@@ -1096,39 +1146,72 @@ static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
}
}
VP8SetIntra16Mode(it, best_mode);
+ // we'll reconstruct later, if i16 mode actually gets selected
}
- if (try_both_i4_i16 || !is_i16) {
- uint8_t modes_i4[16];
+
+ // Next, evaluate Intra4
+ if (try_both_modes || !is_i16) {
// We don't evaluate the rate here, but just account for it through a
// constant penalty (i4 mode usually needs more bits compared to i16).
- score_t score_i4 = (score_t)I4_PENALTY;
-
+ is_i16 = 0;
VP8IteratorStartI4(it);
do {
- int mode;
- int best_sub_mode = -1;
- score_t best_sub_score = MAX_COST;
- const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
+ int best_i4_mode = -1;
+ score_t best_i4_score = MAX_COST;
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
- // TODO(skal): we don't really need the prediction pixels here,
- // but just the distortion against 'src'.
VP8MakeIntra4Preds(it);
for (mode = 0; mode < NUM_BMODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
const score_t score = VP8SSE4x4(src, ref);
- if (score < best_sub_score) {
- best_sub_mode = mode;
- best_sub_score = score;
+ if (score < best_i4_score) {
+ best_i4_mode = mode;
+ best_i4_score = score;
}
}
- modes_i4[it->i4_] = best_sub_mode;
- score_i4 += best_sub_score;
- if (score_i4 >= best_score) break;
- } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
- if (score_i4 < best_score) {
- VP8SetIntra4Mode(it, modes_i4);
+ modes_i4[it->i4_] = best_i4_mode;
+ score_i4 += best_i4_score;
+ if (score_i4 >= best_score) {
+ // Intra4 won't be better than Intra16. Bail out and pick Intra16.
+ is_i16 = 1;
+ break;
+ } else { // reconstruct partial block inside yuv_out2_ buffer
+ uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];
+ nz |= ReconstructIntra4(it, tmp_levels[it->i4_],
+ src, tmp_dst, best_i4_mode) << it->i4_;
+ }
+ } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));
+ }
+
+ // Final reconstruction, depending on which mode is selected.
+ if (!is_i16) {
+ VP8SetIntra4Mode(it, modes_i4);
+ memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels));
+ SwapOut(it);
+ best_score = score_i4;
+ } else {
+ nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
+ }
+
+ // ... and UV!
+ if (refine_uv_mode) {
+ int best_mode = -1;
+ score_t best_uv_score = MAX_COST;
+ const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
+ for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+ const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
+ const score_t score = VP8SSE16x8(src, ref);
+ if (score < best_uv_score) {
+ best_mode = mode;
+ best_uv_score = score;
+ }
}
+ VP8SetIntraUVMode(it, best_mode);
}
+ nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
+
+ rd->nz = nz;
+ rd->score = best_score;
}
//------------------------------------------------------------------------------
@@ -1158,13 +1241,13 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
SimpleQuantize(it, rd);
}
} else {
- // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
- // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
- DistoRefine(it, (method >= 2));
- SimpleQuantize(it, rd);
+ // At this point we have heuristically decided intra16 / intra4.
+ // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower).
+ // For method <= 1, we don't re-examine the decision but just go ahead with
+ // quantization/reconstruction.
+ RefineUsingDistortion(it, (method >= 2), (method >= 1), rd);
}
is_skipped = (rd->nz == 0);
VP8SetSkip(it, is_skipped);
return is_skipped;
}
-
diff --git a/src/3rdparty/libwebp/src/enc/syntax.c b/src/3rdparty/libwebp/src/enc/syntax.c
index d1ff0a5..a0e79ef 100644
--- a/src/3rdparty/libwebp/src/enc/syntax.c
+++ b/src/3rdparty/libwebp/src/enc/syntax.c
@@ -186,8 +186,8 @@ static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
// Segmentation header
static void PutSegmentHeader(VP8BitWriter* const bw,
const VP8Encoder* const enc) {
- const VP8SegmentHeader* const hdr = &enc->segment_hdr_;
- const VP8Proba* const proba = &enc->proba_;
+ const VP8EncSegmentHeader* const hdr = &enc->segment_hdr_;
+ const VP8EncProba* const proba = &enc->proba_;
if (VP8PutBitUniform(bw, (hdr->num_segments_ > 1))) {
// We always 'update' the quant and filter strength values
const int update_data = 1;
@@ -197,16 +197,16 @@ static void PutSegmentHeader(VP8BitWriter* const bw,
// we always use absolute values, not relative ones
VP8PutBitUniform(bw, 1); // (segment_feature_mode = 1. Paragraph 9.3.)
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
- VP8PutSignedValue(bw, enc->dqm_[s].quant_, 7);
+ VP8PutSignedBits(bw, enc->dqm_[s].quant_, 7);
}
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
- VP8PutSignedValue(bw, enc->dqm_[s].fstrength_, 6);
+ VP8PutSignedBits(bw, enc->dqm_[s].fstrength_, 6);
}
}
if (hdr->update_map_) {
for (s = 0; s < 3; ++s) {
if (VP8PutBitUniform(bw, (proba->segments_[s] != 255u))) {
- VP8PutValue(bw, proba->segments_[s], 8);
+ VP8PutBits(bw, proba->segments_[s], 8);
}
}
}
@@ -215,20 +215,20 @@ static void PutSegmentHeader(VP8BitWriter* const bw,
// Filtering parameters header
static void PutFilterHeader(VP8BitWriter* const bw,
- const VP8FilterHeader* const hdr) {
+ const VP8EncFilterHeader* const hdr) {
const int use_lf_delta = (hdr->i4x4_lf_delta_ != 0);
VP8PutBitUniform(bw, hdr->simple_);
- VP8PutValue(bw, hdr->level_, 6);
- VP8PutValue(bw, hdr->sharpness_, 3);
+ VP8PutBits(bw, hdr->level_, 6);
+ VP8PutBits(bw, hdr->sharpness_, 3);
if (VP8PutBitUniform(bw, use_lf_delta)) {
// '0' is the default value for i4x4_lf_delta_ at frame #0.
const int need_update = (hdr->i4x4_lf_delta_ != 0);
if (VP8PutBitUniform(bw, need_update)) {
// we don't use ref_lf_delta => emit four 0 bits
- VP8PutValue(bw, 0, 4);
+ VP8PutBits(bw, 0, 4);
// we use mode_lf_delta for i4x4
- VP8PutSignedValue(bw, hdr->i4x4_lf_delta_, 6);
- VP8PutValue(bw, 0, 3); // all others unused
+ VP8PutSignedBits(bw, hdr->i4x4_lf_delta_, 6);
+ VP8PutBits(bw, 0, 3); // all others unused
}
}
}
@@ -236,12 +236,12 @@ static void PutFilterHeader(VP8BitWriter* const bw,
// Nominal quantization parameters
static void PutQuant(VP8BitWriter* const bw,
const VP8Encoder* const enc) {
- VP8PutValue(bw, enc->base_quant_, 7);
- VP8PutSignedValue(bw, enc->dq_y1_dc_, 4);
- VP8PutSignedValue(bw, enc->dq_y2_dc_, 4);
- VP8PutSignedValue(bw, enc->dq_y2_ac_, 4);
- VP8PutSignedValue(bw, enc->dq_uv_dc_, 4);
- VP8PutSignedValue(bw, enc->dq_uv_ac_, 4);
+ VP8PutBits(bw, enc->base_quant_, 7);
+ VP8PutSignedBits(bw, enc->dq_y1_dc_, 4);
+ VP8PutSignedBits(bw, enc->dq_y2_dc_, 4);
+ VP8PutSignedBits(bw, enc->dq_y2_ac_, 4);
+ VP8PutSignedBits(bw, enc->dq_uv_dc_, 4);
+ VP8PutSignedBits(bw, enc->dq_uv_ac_, 4);
}
// Partition sizes
@@ -277,9 +277,9 @@ static int GeneratePartition0(VP8Encoder* const enc) {
PutSegmentHeader(bw, enc);
PutFilterHeader(bw, &enc->filter_hdr_);
- VP8PutValue(bw, enc->num_parts_ == 8 ? 3 :
- enc->num_parts_ == 4 ? 2 :
- enc->num_parts_ == 2 ? 1 : 0, 2);
+ VP8PutBits(bw, enc->num_parts_ == 8 ? 3 :
+ enc->num_parts_ == 4 ? 2 :
+ enc->num_parts_ == 2 ? 1 : 0, 2);
PutQuant(bw, enc);
VP8PutBitUniform(bw, 0); // no proba update
VP8WriteProbas(bw, &enc->proba_);
diff --git a/src/3rdparty/libwebp/src/enc/token.c b/src/3rdparty/libwebp/src/enc/token.c
index 8af13a0..e73256b 100644
--- a/src/3rdparty/libwebp/src/enc/token.c
+++ b/src/3rdparty/libwebp/src/enc/token.c
@@ -30,15 +30,15 @@
#define MIN_PAGE_SIZE 8192 // minimum number of token per page
#define FIXED_PROBA_BIT (1u << 14)
-typedef uint16_t token_t; // bit#15: bit
- // bit #14: constant proba or idx
- // bits 0..13: slot or constant proba
+typedef uint16_t token_t; // bit #15: bit value
+ // bit #14: flags for constant proba or idx
+ // bits #0..13: slot or constant proba
struct VP8Tokens {
VP8Tokens* next_; // pointer to next page
};
// Token data is located in memory just after the next_ field.
// This macro is used to return their address and hide the trick.
-#define TOKEN_DATA(p) ((token_t*)&(p)[1])
+#define TOKEN_DATA(p) ((const token_t*)&(p)[1])
//------------------------------------------------------------------------------
@@ -53,10 +53,10 @@ void VP8TBufferInit(VP8TBuffer* const b, int page_size) {
void VP8TBufferClear(VP8TBuffer* const b) {
if (b != NULL) {
- const VP8Tokens* p = b->pages_;
+ VP8Tokens* p = b->pages_;
while (p != NULL) {
- const VP8Tokens* const next = p->next_;
- WebPSafeFree((void*)p);
+ VP8Tokens* const next = p->next_;
+ WebPSafeFree(p);
p = next;
}
VP8TBufferInit(b, b->page_size_);
@@ -65,8 +65,8 @@ void VP8TBufferClear(VP8TBuffer* const b) {
static int TBufferNewPage(VP8TBuffer* const b) {
VP8Tokens* page = NULL;
- const size_t size = sizeof(*page) + b->page_size_ * sizeof(token_t);
if (!b->error_) {
+ const size_t size = sizeof(*page) + b->page_size_ * sizeof(token_t);
page = (VP8Tokens*)WebPSafeMalloc(1ULL, size);
}
if (page == NULL) {
@@ -78,19 +78,19 @@ static int TBufferNewPage(VP8TBuffer* const b) {
*b->last_page_ = page;
b->last_page_ = &page->next_;
b->left_ = b->page_size_;
- b->tokens_ = TOKEN_DATA(page);
+ b->tokens_ = (token_t*)TOKEN_DATA(page);
return 1;
}
//------------------------------------------------------------------------------
-#define TOKEN_ID(t, b, ctx, p) \
- ((p) + NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t))))
+#define TOKEN_ID(t, b, ctx) \
+ (NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t))))
-static WEBP_INLINE int AddToken(VP8TBuffer* const b,
- int bit, uint32_t proba_idx) {
+static WEBP_INLINE uint32_t AddToken(VP8TBuffer* const b,
+ uint32_t bit, uint32_t proba_idx) {
assert(proba_idx < FIXED_PROBA_BIT);
- assert(bit == 0 || bit == 1);
+ assert(bit <= 1);
if (b->left_ > 0 || TBufferNewPage(b)) {
const int slot = --b->left_;
b->tokens_[slot] = (bit << 15) | proba_idx;
@@ -99,20 +99,21 @@ static WEBP_INLINE int AddToken(VP8TBuffer* const b,
}
static WEBP_INLINE void AddConstantToken(VP8TBuffer* const b,
- int bit, int proba) {
+ uint32_t bit, uint32_t proba) {
assert(proba < 256);
- assert(bit == 0 || bit == 1);
+ assert(bit <= 1);
if (b->left_ > 0 || TBufferNewPage(b)) {
const int slot = --b->left_;
b->tokens_[slot] = (bit << 15) | FIXED_PROBA_BIT | proba;
}
}
-int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
+int VP8RecordCoeffTokens(const int ctx, const int coeff_type,
+ int first, int last,
const int16_t* const coeffs,
VP8TBuffer* const tokens) {
int n = first;
- uint32_t base_id = TOKEN_ID(coeff_type, n, ctx, 0);
+ uint32_t base_id = TOKEN_ID(coeff_type, n, ctx);
if (!AddToken(tokens, last >= 0, base_id + 0)) {
return 0;
}
@@ -120,14 +121,13 @@ int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
while (n < 16) {
const int c = coeffs[n++];
const int sign = c < 0;
- int v = sign ? -c : c;
+ const uint32_t v = sign ? -c : c;
if (!AddToken(tokens, v != 0, base_id + 1)) {
- ctx = 0;
- base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0);
+ base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 0); // ctx=0
continue;
}
if (!AddToken(tokens, v > 1, base_id + 2)) {
- ctx = 1;
+ base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 1); // ctx=1
} else {
if (!AddToken(tokens, v > 4, base_id + 3)) {
if (AddToken(tokens, v != 2, base_id + 4))
@@ -142,40 +142,40 @@ int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
} else {
int mask;
const uint8_t* tab;
- if (v < 3 + (8 << 1)) { // VP8Cat3 (3b)
+ uint32_t residue = v - 3;
+ if (residue < (8 << 1)) { // VP8Cat3 (3b)
AddToken(tokens, 0, base_id + 8);
AddToken(tokens, 0, base_id + 9);
- v -= 3 + (8 << 0);
+ residue -= (8 << 0);
mask = 1 << 2;
tab = VP8Cat3;
- } else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b)
+ } else if (residue < (8 << 2)) { // VP8Cat4 (4b)
AddToken(tokens, 0, base_id + 8);
AddToken(tokens, 1, base_id + 9);
- v -= 3 + (8 << 1);
+ residue -= (8 << 1);
mask = 1 << 3;
tab = VP8Cat4;
- } else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b)
+ } else if (residue < (8 << 3)) { // VP8Cat5 (5b)
AddToken(tokens, 1, base_id + 8);
AddToken(tokens, 0, base_id + 10);
- v -= 3 + (8 << 2);
+ residue -= (8 << 2);
mask = 1 << 4;
tab = VP8Cat5;
} else { // VP8Cat6 (11b)
AddToken(tokens, 1, base_id + 8);
AddToken(tokens, 1, base_id + 10);
- v -= 3 + (8 << 3);
+ residue -= (8 << 3);
mask = 1 << 10;
tab = VP8Cat6;
}
while (mask) {
- AddConstantToken(tokens, !!(v & mask), *tab++);
+ AddConstantToken(tokens, !!(residue & mask), *tab++);
mask >>= 1;
}
}
- ctx = 2;
+ base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 2); // ctx=2
}
AddConstantToken(tokens, sign, 128);
- base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0);
if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) {
return 1; // EOB
}
@@ -224,7 +224,6 @@ void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) {
int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas, int final_pass) {
const VP8Tokens* p = b->pages_;
- (void)final_pass;
assert(!b->error_);
while (p != NULL) {
const VP8Tokens* const next = p->next_;
diff --git a/src/3rdparty/libwebp/src/enc/tree.c b/src/3rdparty/libwebp/src/enc/tree.c
index e5d05e5..f141006 100644
--- a/src/3rdparty/libwebp/src/enc/tree.c
+++ b/src/3rdparty/libwebp/src/enc/tree.c
@@ -154,7 +154,7 @@ const uint8_t
};
void VP8DefaultProbas(VP8Encoder* const enc) {
- VP8Proba* const probas = &enc->proba_;
+ VP8EncProba* const probas = &enc->proba_;
probas->use_skip_proba_ = 0;
memset(probas->segments_, 255u, sizeof(probas->segments_));
memcpy(probas->coeffs_, VP8CoeffsProba0, sizeof(VP8CoeffsProba0));
@@ -482,7 +482,7 @@ const uint8_t
}
};
-void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas) {
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas) {
int t, b, c, p;
for (t = 0; t < NUM_TYPES; ++t) {
for (b = 0; b < NUM_BANDS; ++b) {
@@ -491,14 +491,14 @@ void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas) {
const uint8_t p0 = probas->coeffs_[t][b][c][p];
const int update = (p0 != VP8CoeffsProba0[t][b][c][p]);
if (VP8PutBit(bw, update, VP8CoeffsUpdateProba[t][b][c][p])) {
- VP8PutValue(bw, p0, 8);
+ VP8PutBits(bw, p0, 8);
}
}
}
}
}
if (VP8PutBitUniform(bw, probas->use_skip_proba_)) {
- VP8PutValue(bw, probas->skip_proba_, 8);
+ VP8PutBits(bw, probas->skip_proba_, 8);
}
}
diff --git a/src/3rdparty/libwebp/src/enc/vp8enci.h b/src/3rdparty/libwebp/src/enc/vp8enci.h
index 20f58c6..b2cc8d1 100644
--- a/src/3rdparty/libwebp/src/enc/vp8enci.h
+++ b/src/3rdparty/libwebp/src/enc/vp8enci.h
@@ -15,10 +15,16 @@
#define WEBP_ENC_VP8ENCI_H_
#include <string.h> // for memcpy()
-#include "../webp/encode.h"
+#include "../dec/common.h"
#include "../dsp/dsp.h"
#include "../utils/bit_writer.h"
#include "../utils/thread.h"
+#include "../utils/utils.h"
+#include "../webp/encode.h"
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+#include "./vp8li.h"
+#endif // WEBP_EXPERIMENTAL_FEATURES
#ifdef __cplusplus
extern "C" {
@@ -29,35 +35,10 @@ extern "C" {
// version numbers
#define ENC_MAJ_VERSION 0
-#define ENC_MIN_VERSION 4
-#define ENC_REV_VERSION 4
-
-// intra prediction modes
-enum { B_DC_PRED = 0, // 4x4 modes
- B_TM_PRED = 1,
- B_VE_PRED = 2,
- B_HE_PRED = 3,
- B_RD_PRED = 4,
- B_VR_PRED = 5,
- B_LD_PRED = 6,
- B_VL_PRED = 7,
- B_HD_PRED = 8,
- B_HU_PRED = 9,
- NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10
-
- // Luma16 or UV modes
- DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
- H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
- NUM_PRED_MODES = 4
- };
+#define ENC_MIN_VERSION 5
+#define ENC_REV_VERSION 0
-enum { NUM_MB_SEGMENTS = 4,
- MAX_NUM_PARTITIONS = 8,
- NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC
- NUM_BANDS = 8,
- NUM_CTX = 3,
- NUM_PROBAS = 11,
- MAX_LF_LEVELS = 64, // Maximum loop filter level
+enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
MAX_LEVEL = 2047 // max level (note: max codable is 2047 + 67)
};
@@ -69,66 +50,34 @@ typedef enum { // Rate-distortion optimization levels
RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)
} VP8RDLevel;
-// YUV-cache parameters. Cache is 16-pixels wide.
-// The original or reconstructed samples can be accessed using VP8Scan[]
+// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
+// The original or reconstructed samples can be accessed using VP8Scan[].
// The predicted blocks can be accessed using offsets to yuv_p_ and
-// the arrays VP8*ModeOffsets[];
-// +----+ YUV Samples area. See VP8Scan[] for accessing the blocks.
-// Y_OFF |YYYY| <- original samples ('yuv_in_')
-// |YYYY|
-// |YYYY|
-// |YYYY|
-// U_OFF |UUVV| V_OFF (=U_OFF + 8)
-// |UUVV|
-// +----+
-// Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_')
-// |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_')
-// |YYYY|
-// |YYYY|
-// U_OFF |UUVV| V_OFF
-// |UUVV|
-// x2 (for yuv_out2_)
-// +----+ Prediction area ('yuv_p_', size = PRED_SIZE)
-// I16DC16 |YYYY| Intra16 predictions (16x16 block each)
-// |YYYY|
-// |YYYY|
-// |YYYY|
-// I16TM16 |YYYY|
-// |YYYY|
-// |YYYY|
-// |YYYY|
-// I16VE16 |YYYY|
-// |YYYY|
-// |YYYY|
-// |YYYY|
-// I16HE16 |YYYY|
-// |YYYY|
-// |YYYY|
-// |YYYY|
-// +----+ Chroma U/V predictions (16x8 block each)
-// C8DC8 |UUVV|
-// |UUVV|
-// C8TM8 |UUVV|
-// |UUVV|
-// C8VE8 |UUVV|
-// |UUVV|
-// C8HE8 |UUVV|
-// |UUVV|
-// +----+ Intra 4x4 predictions (4x4 block each)
-// |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
-// |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
-// |YY..| I4HD4 I4HU4 I4TMP
-// +----+
-#define BPS 16 // this is the common stride
-#define Y_SIZE (BPS * 16)
-#define UV_SIZE (BPS * 8)
-#define YUV_SIZE (Y_SIZE + UV_SIZE)
-#define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
-#define Y_OFF (0)
-#define U_OFF (Y_SIZE)
-#define V_OFF (U_OFF + 8)
-#define ALIGN_CST 15
-#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
+// the arrays VP8*ModeOffsets[].
+// * YUV Samples area (yuv_in_/yuv_out_/yuv_out2_)
+// (see VP8Scan[] for accessing the blocks, along with
+// Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC):
+// +----+----+
+// Y_OFF_ENC |YYYY|UUVV|
+// U_OFF_ENC |YYYY|UUVV|
+// V_OFF_ENC |YYYY|....| <- 25% wasted U/V area
+// |YYYY|....|
+// +----+----+
+// * Prediction area ('yuv_p_', size = PRED_SIZE_ENC)
+// Intra16 predictions (16x16 block each, two per row):
+// |I16DC16|I16TM16|
+// |I16VE16|I16HE16|
+// Chroma U/V predictions (16x8 block each, two per row):
+// |C8DC8|C8TM8|
+// |C8VE8|C8HE8|
+// Intra 4x4 predictions (4x4 block each)
+// |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4|
+// |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted
+#define YUV_SIZE_ENC (BPS * 16)
+#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds
+#define Y_OFF_ENC (0)
+#define U_OFF_ENC (16)
+#define V_OFF_ENC (16 + 8)
extern const int VP8Scan[16]; // in quant.c
extern const int VP8UVModeOffsets[4]; // in analyze.c
@@ -138,26 +87,26 @@ extern const int VP8I4ModeOffsets[NUM_BMODES];
// Layout of prediction blocks
// intra 16x16
#define I16DC16 (0 * 16 * BPS)
-#define I16TM16 (1 * 16 * BPS)
-#define I16VE16 (2 * 16 * BPS)
-#define I16HE16 (3 * 16 * BPS)
+#define I16TM16 (I16DC16 + 16)
+#define I16VE16 (1 * 16 * BPS)
+#define I16HE16 (I16VE16 + 16)
// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
-#define C8DC8 (4 * 16 * BPS)
-#define C8TM8 (4 * 16 * BPS + 8 * BPS)
-#define C8VE8 (5 * 16 * BPS)
-#define C8HE8 (5 * 16 * BPS + 8 * BPS)
+#define C8DC8 (2 * 16 * BPS)
+#define C8TM8 (C8DC8 + 1 * 16)
+#define C8VE8 (2 * 16 * BPS + 8 * BPS)
+#define C8HE8 (C8VE8 + 1 * 16)
// intra 4x4
-#define I4DC4 (6 * 16 * BPS + 0)
-#define I4TM4 (6 * 16 * BPS + 4)
-#define I4VE4 (6 * 16 * BPS + 8)
-#define I4HE4 (6 * 16 * BPS + 12)
-#define I4RD4 (6 * 16 * BPS + 4 * BPS + 0)
-#define I4VR4 (6 * 16 * BPS + 4 * BPS + 4)
-#define I4LD4 (6 * 16 * BPS + 4 * BPS + 8)
-#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
-#define I4HD4 (6 * 16 * BPS + 8 * BPS + 0)
-#define I4HU4 (6 * 16 * BPS + 8 * BPS + 4)
-#define I4TMP (6 * 16 * BPS + 8 * BPS + 8)
+#define I4DC4 (3 * 16 * BPS + 0)
+#define I4TM4 (I4DC4 + 4)
+#define I4VE4 (I4DC4 + 8)
+#define I4HE4 (I4DC4 + 12)
+#define I4RD4 (I4DC4 + 16)
+#define I4VR4 (I4DC4 + 20)
+#define I4LD4 (I4DC4 + 24)
+#define I4VL4 (I4DC4 + 28)
+#define I4HD4 (3 * 16 * BPS + 4 * BPS)
+#define I4HU4 (I4HD4 + 4)
+#define I4TMP (I4HD4 + 8)
typedef int64_t score_t; // type used for scores, rate, distortion
// Note that MAX_COST is not the maximum allowed by sizeof(score_t),
@@ -172,14 +121,6 @@ static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
return (int)((n * iQ + B) >> QFIX);
}
-// size of histogram used by CollectHistogram.
-#define MAX_COEFF_THRESH 31
-typedef struct VP8Histogram VP8Histogram;
-struct VP8Histogram {
- // TODO(skal): we only need to store the max_value and last_non_zero actually.
- int distribution[MAX_COEFF_THRESH + 1];
-};
-
// Uncomment the following to remove token-buffer code:
// #define DISABLE_TOKEN_BUFFER
@@ -190,6 +131,8 @@ typedef uint32_t proba_t; // 16b + 16b
typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
+typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting
+typedef const uint16_t* CostArrayMap[16][NUM_CTX];
typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
typedef struct VP8Encoder VP8Encoder;
@@ -200,7 +143,7 @@ typedef struct {
int update_map_; // whether to update the segment map or not.
// must be 0 if there's only 1 segment.
int size_; // bit-cost for transmitting the segment map
-} VP8SegmentHeader;
+} VP8EncSegmentHeader;
// Struct collecting all frame-persistent probabilities.
typedef struct {
@@ -209,10 +152,11 @@ typedef struct {
ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes
StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes
+ CostArrayMap remapped_costs_[NUM_TYPES]; // 1536 bytes
int dirty_; // if true, need to call VP8CalculateLevelCosts()
int use_skip_proba_; // Note: we always use skip_proba for now.
int nb_skip_; // number of skipped blocks
-} VP8Proba;
+} VP8EncProba;
// Filter parameters. Not actually used in the code (we don't perform
// the in-loop filtering), but filled from user's config
@@ -221,7 +165,7 @@ typedef struct {
int level_; // base filter level [0..63]
int sharpness_; // [0..7]
int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16
-} VP8FilterHeader;
+} VP8EncFilterHeader;
//------------------------------------------------------------------------------
// Informations about the macroblocks.
@@ -307,9 +251,10 @@ typedef struct {
uint8_t* y_top_; // top luma samples at position 'x_'
uint8_t* uv_top_; // top u/v samples at position 'x_', packed as 16 bytes
- // memory for storing y/u/v_left_ and yuv_in_/out_*
- uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + ALIGN_CST]; // memory for *_left_
- uint8_t yuv_mem_[3 * YUV_SIZE + PRED_SIZE + ALIGN_CST]; // memory for yuv_*
+ // memory for storing y/u/v_left_
+ uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + WEBP_ALIGN_CST];
+ // memory for yuv_*
+ uint8_t yuv_mem_[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST];
} VP8EncIterator;
// in iterator.c
@@ -381,7 +326,8 @@ int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas, int final_pass);
// record the coding of coefficients without knowing the probabilities yet
-int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
+int VP8RecordCoeffTokens(const int ctx, const int coeff_type,
+ int first, int last,
const int16_t* const coeffs,
VP8TBuffer* const tokens);
@@ -401,8 +347,8 @@ struct VP8Encoder {
WebPPicture* pic_; // input / output picture
// headers
- VP8FilterHeader filter_hdr_; // filtering information
- VP8SegmentHeader segment_hdr_; // segment information
+ VP8EncFilterHeader filter_hdr_; // filtering information
+ VP8EncSegmentHeader segment_hdr_; // segment information
int profile_; // VP8's profile, deduced from Config.
@@ -438,12 +384,12 @@ struct VP8Encoder {
int dq_uv_dc_, dq_uv_ac_;
// probabilities and statistics
- VP8Proba proba_;
- uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks
- uint64_t sse_count_; // pixel count for the sse_[] stats
- int coded_size_;
- int residual_bytes_[3][4];
- int block_count_[3];
+ VP8EncProba proba_;
+ uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks
+ uint64_t sse_count_; // pixel count for the sse_[] stats
+ int coded_size_;
+ int residual_bytes_[3][4];
+ int block_count_[3];
// quality/speed settings
int method_; // 0=fastest, 6=best/slowest.
@@ -473,7 +419,7 @@ extern const uint8_t
// Reset the token probabilities to their initial (default) values
void VP8DefaultProbas(VP8Encoder* const enc);
// Write the token probabilities
-void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas);
// Writes the partition #0 modes (that is: all intra modes)
void VP8CodeIntraModes(VP8Encoder* const enc);
@@ -486,7 +432,6 @@ int VP8EncWrite(VP8Encoder* const enc);
void VP8EncFreeBitWriters(VP8Encoder* const enc);
// in frame.c
-extern const uint8_t VP8EncBands[16 + 1];
extern const uint8_t VP8Cat3[];
extern const uint8_t VP8Cat4[];
extern const uint8_t VP8Cat5[];
@@ -569,12 +514,21 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
// Returns false in case of error (invalid param, out-of-memory).
int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
+// Clean-up the RGB samples under fully transparent area, to help lossless
+// compressibility (no guarantee, though). Assumes that pic->use_argb is true.
+void WebPCleanupTransparentAreaLossless(WebPPicture* const pic);
+
+ // in near_lossless.c
+// Near lossless preprocessing in RGB color-space.
+int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality);
+// Near lossless adjustment for predictors.
+void VP8ApplyNearLosslessPredict(int xsize, int ysize, int pred_bits,
+ const uint32_t* argb_orig,
+ uint32_t* argb, uint32_t* argb_scratch,
+ const uint32_t* const transform_data,
+ int quality, int subtract_green);
//------------------------------------------------------------------------------
-#if WEBP_ENCODER_ABI_VERSION <= 0x0203
-void WebPMemoryWriterClear(WebPMemoryWriter* writer);
-#endif
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/src/3rdparty/libwebp/src/enc/vp8l.c b/src/3rdparty/libwebp/src/enc/vp8l.c
index c2bb13d..db94e78 100644
--- a/src/3rdparty/libwebp/src/enc/vp8l.c
+++ b/src/3rdparty/libwebp/src/enc/vp8l.c
@@ -13,10 +13,10 @@
//
#include <assert.h>
-#include <stdio.h>
#include <stdlib.h>
#include "./backward_references.h"
+#include "./histogram.h"
#include "./vp8enci.h"
#include "./vp8li.h"
#include "../dsp/lossless.h"
@@ -25,23 +25,105 @@
#include "../utils/utils.h"
#include "../webp/format_constants.h"
+#include "./delta_palettization.h"
+
#define PALETTE_KEY_RIGHT_SHIFT 22 // Key for 1K buffer.
-#define MAX_HUFF_IMAGE_SIZE (16 * 1024 * 1024)
-#define MAX_COLORS_FOR_GRAPH 64
+// Maximum number of histogram images (sub-blocks).
+#define MAX_HUFF_IMAGE_SIZE 2600
-// -----------------------------------------------------------------------------
-// Palette
+// Palette reordering for smaller sum of deltas (and for smaller storage).
-static int CompareColors(const void* p1, const void* p2) {
- const uint32_t a = *(const uint32_t*)p1;
- const uint32_t b = *(const uint32_t*)p2;
+static int PaletteCompareColorsForQsort(const void* p1, const void* p2) {
+ const uint32_t a = WebPMemToUint32(p1);
+ const uint32_t b = WebPMemToUint32(p2);
assert(a != b);
return (a < b) ? -1 : 1;
}
+static WEBP_INLINE uint32_t PaletteComponentDistance(uint32_t v) {
+ return (v <= 128) ? v : (256 - v);
+}
+
+// Computes a value that is related to the entropy created by the
+// palette entry diff.
+//
+// Note that the last & 0xff is a no-operation in the next statement, but
+// removed by most compilers and is here only for regularity of the code.
+static WEBP_INLINE uint32_t PaletteColorDistance(uint32_t col1, uint32_t col2) {
+ const uint32_t diff = VP8LSubPixels(col1, col2);
+ const int kMoreWeightForRGBThanForAlpha = 9;
+ uint32_t score;
+ score = PaletteComponentDistance((diff >> 0) & 0xff);
+ score += PaletteComponentDistance((diff >> 8) & 0xff);
+ score += PaletteComponentDistance((diff >> 16) & 0xff);
+ score *= kMoreWeightForRGBThanForAlpha;
+ score += PaletteComponentDistance((diff >> 24) & 0xff);
+ return score;
+}
+
+static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) {
+ const uint32_t tmp = *col1;
+ *col1 = *col2;
+ *col2 = tmp;
+}
+
+static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) {
+ // Find greedily always the closest color of the predicted color to minimize
+ // deltas in the palette. This reduces storage needs since the
+ // palette is stored with delta encoding.
+ uint32_t predict = 0x00000000;
+ int i, k;
+ for (i = 0; i < num_colors; ++i) {
+ int best_ix = i;
+ uint32_t best_score = ~0U;
+ for (k = i; k < num_colors; ++k) {
+ const uint32_t cur_score = PaletteColorDistance(palette[k], predict);
+ if (best_score > cur_score) {
+ best_score = cur_score;
+ best_ix = k;
+ }
+ }
+ SwapColor(&palette[best_ix], &palette[i]);
+ predict = palette[i];
+ }
+}
+
+// The palette has been sorted by alpha. This function checks if the other
+// components of the palette have a monotonic development with regards to
+// position in the palette. If all have monotonic development, there is
+// no benefit to re-organize them greedily. A monotonic development
+// would be spotted in green-only situations (like lossy alpha) or gray-scale
+// images.
+static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) {
+ uint32_t predict = 0x000000;
+ int i;
+ uint8_t sign_found = 0x00;
+ for (i = 0; i < num_colors; ++i) {
+ const uint32_t diff = VP8LSubPixels(palette[i], predict);
+ const uint8_t rd = (diff >> 16) & 0xff;
+ const uint8_t gd = (diff >> 8) & 0xff;
+ const uint8_t bd = (diff >> 0) & 0xff;
+ if (rd != 0x00) {
+ sign_found |= (rd < 0x80) ? 1 : 2;
+ }
+ if (gd != 0x00) {
+ sign_found |= (gd < 0x80) ? 8 : 16;
+ }
+ if (bd != 0x00) {
+ sign_found |= (bd < 0x80) ? 64 : 128;
+ }
+ predict = palette[i];
+ }
+ return (sign_found & (sign_found << 1)) != 0; // two consequent signs.
+}
+
+// -----------------------------------------------------------------------------
+// Palette
+
// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE,
// creates a palette and returns true, else returns false.
static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
+ int low_effort,
uint32_t palette[MAX_PALETTE_SIZE],
int* const palette_size) {
int i, x, y, key;
@@ -92,84 +174,240 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
++num_colors;
}
}
-
- qsort(palette, num_colors, sizeof(*palette), CompareColors);
*palette_size = num_colors;
+ qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort);
+ if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) {
+ GreedyMinimizeDeltas(palette, num_colors);
+ }
return 1;
}
-static int AnalyzeEntropy(const uint32_t* argb,
- int width, int height, int argb_stride,
- double* const nonpredicted_bits,
- double* const predicted_bits) {
- int x, y;
- const uint32_t* last_line = NULL;
- uint32_t last_pix = argb[0]; // so we're sure that pix_diff == 0
+// These five modes are evaluated and their respective entropy is computed.
+typedef enum {
+ kDirect = 0,
+ kSpatial = 1,
+ kSubGreen = 2,
+ kSpatialSubGreen = 3,
+ kPalette = 4,
+ kNumEntropyIx = 5
+} EntropyIx;
+
+typedef enum {
+ kHistoAlpha = 0,
+ kHistoAlphaPred,
+ kHistoGreen,
+ kHistoGreenPred,
+ kHistoRed,
+ kHistoRedPred,
+ kHistoBlue,
+ kHistoBluePred,
+ kHistoRedSubGreen,
+ kHistoRedPredSubGreen,
+ kHistoBlueSubGreen,
+ kHistoBluePredSubGreen,
+ kHistoPalette,
+ kHistoTotal // Must be last.
+} HistoIx;
+
+static void AddSingleSubGreen(uint32_t p, uint32_t* r, uint32_t* b) {
+ const uint32_t green = p >> 8; // The upper bits are masked away later.
+ ++r[((p >> 16) - green) & 0xff];
+ ++b[(p - green) & 0xff];
+}
- VP8LHistogramSet* const histo_set = VP8LAllocateHistogramSet(2, 0);
- if (histo_set == NULL) return 0;
+static void AddSingle(uint32_t p,
+ uint32_t* a, uint32_t* r, uint32_t* g, uint32_t* b) {
+ ++a[p >> 24];
+ ++r[(p >> 16) & 0xff];
+ ++g[(p >> 8) & 0xff];
+ ++b[(p & 0xff)];
+}
- for (y = 0; y < height; ++y) {
- for (x = 0; x < width; ++x) {
- const uint32_t pix = argb[x];
- const uint32_t pix_diff = VP8LSubPixels(pix, last_pix);
- if (pix_diff == 0) continue;
- if (last_line != NULL && pix == last_line[x]) {
- continue;
+static int AnalyzeEntropy(const uint32_t* argb,
+ int width, int height, int argb_stride,
+ int use_palette,
+ EntropyIx* const min_entropy_ix,
+ int* const red_and_blue_always_zero) {
+ // Allocate histogram set with cache_bits = 0.
+ uint32_t* const histo =
+ (uint32_t*)WebPSafeCalloc(kHistoTotal, sizeof(*histo) * 256);
+ if (histo != NULL) {
+ int i, x, y;
+ const uint32_t* prev_row = argb;
+ const uint32_t* curr_row = argb + argb_stride;
+ for (y = 1; y < height; ++y) {
+ uint32_t prev_pix = curr_row[0];
+ for (x = 1; x < width; ++x) {
+ const uint32_t pix = curr_row[x];
+ const uint32_t pix_diff = VP8LSubPixels(pix, prev_pix);
+ if ((pix_diff == 0) || (pix == prev_row[x])) continue;
+ prev_pix = pix;
+ AddSingle(pix,
+ &histo[kHistoAlpha * 256],
+ &histo[kHistoRed * 256],
+ &histo[kHistoGreen * 256],
+ &histo[kHistoBlue * 256]);
+ AddSingle(pix_diff,
+ &histo[kHistoAlphaPred * 256],
+ &histo[kHistoRedPred * 256],
+ &histo[kHistoGreenPred * 256],
+ &histo[kHistoBluePred * 256]);
+ AddSingleSubGreen(pix,
+ &histo[kHistoRedSubGreen * 256],
+ &histo[kHistoBlueSubGreen * 256]);
+ AddSingleSubGreen(pix_diff,
+ &histo[kHistoRedPredSubGreen * 256],
+ &histo[kHistoBluePredSubGreen * 256]);
+ {
+ // Approximate the palette by the entropy of the multiplicative hash.
+ const int hash = ((pix + (pix >> 19)) * 0x39c5fba7) >> 24;
+ ++histo[kHistoPalette * 256 + (hash & 0xff)];
+ }
+ }
+ prev_row = curr_row;
+ curr_row += argb_stride;
+ }
+ {
+ double entropy_comp[kHistoTotal];
+ double entropy[kNumEntropyIx];
+ EntropyIx k;
+ EntropyIx last_mode_to_analyze =
+ use_palette ? kPalette : kSpatialSubGreen;
+ int j;
+ // Let's add one zero to the predicted histograms. The zeros are removed
+ // too efficiently by the pix_diff == 0 comparison, at least one of the
+ // zeros is likely to exist.
+ ++histo[kHistoRedPredSubGreen * 256];
+ ++histo[kHistoBluePredSubGreen * 256];
+ ++histo[kHistoRedPred * 256];
+ ++histo[kHistoGreenPred * 256];
+ ++histo[kHistoBluePred * 256];
+ ++histo[kHistoAlphaPred * 256];
+
+ for (j = 0; j < kHistoTotal; ++j) {
+ entropy_comp[j] = VP8LBitsEntropy(&histo[j * 256], 256, NULL);
}
- last_pix = pix;
+ entropy[kDirect] = entropy_comp[kHistoAlpha] +
+ entropy_comp[kHistoRed] +
+ entropy_comp[kHistoGreen] +
+ entropy_comp[kHistoBlue];
+ entropy[kSpatial] = entropy_comp[kHistoAlphaPred] +
+ entropy_comp[kHistoRedPred] +
+ entropy_comp[kHistoGreenPred] +
+ entropy_comp[kHistoBluePred];
+ entropy[kSubGreen] = entropy_comp[kHistoAlpha] +
+ entropy_comp[kHistoRedSubGreen] +
+ entropy_comp[kHistoGreen] +
+ entropy_comp[kHistoBlueSubGreen];
+ entropy[kSpatialSubGreen] = entropy_comp[kHistoAlphaPred] +
+ entropy_comp[kHistoRedPredSubGreen] +
+ entropy_comp[kHistoGreenPred] +
+ entropy_comp[kHistoBluePredSubGreen];
+ // Palette mode seems more efficient in a breakeven case. Bias with 1.0.
+ entropy[kPalette] = entropy_comp[kHistoPalette] - 1.0;
+
+ *min_entropy_ix = kDirect;
+ for (k = kDirect + 1; k <= last_mode_to_analyze; ++k) {
+ if (entropy[*min_entropy_ix] > entropy[k]) {
+ *min_entropy_ix = k;
+ }
+ }
+ *red_and_blue_always_zero = 1;
+ // Let's check if the histogram of the chosen entropy mode has
+ // non-zero red and blue values. If all are zero, we can later skip
+ // the cross color optimization.
{
- const PixOrCopy pix_token = PixOrCopyCreateLiteral(pix);
- const PixOrCopy pix_diff_token = PixOrCopyCreateLiteral(pix_diff);
- VP8LHistogramAddSinglePixOrCopy(histo_set->histograms[0], &pix_token);
- VP8LHistogramAddSinglePixOrCopy(histo_set->histograms[1],
- &pix_diff_token);
+ static const uint8_t kHistoPairs[5][2] = {
+ { kHistoRed, kHistoBlue },
+ { kHistoRedPred, kHistoBluePred },
+ { kHistoRedSubGreen, kHistoBlueSubGreen },
+ { kHistoRedPredSubGreen, kHistoBluePredSubGreen },
+ { kHistoRed, kHistoBlue }
+ };
+ const uint32_t* const red_histo =
+ &histo[256 * kHistoPairs[*min_entropy_ix][0]];
+ const uint32_t* const blue_histo =
+ &histo[256 * kHistoPairs[*min_entropy_ix][1]];
+ for (i = 1; i < 256; ++i) {
+ if ((red_histo[i] | blue_histo[i]) != 0) {
+ *red_and_blue_always_zero = 0;
+ break;
+ }
+ }
}
}
- last_line = argb;
- argb += argb_stride;
+ free(histo);
+ return 1;
+ } else {
+ return 0;
}
- *nonpredicted_bits = VP8LHistogramEstimateBitsBulk(histo_set->histograms[0]);
- *predicted_bits = VP8LHistogramEstimateBitsBulk(histo_set->histograms[1]);
- VP8LFreeHistogramSet(histo_set);
- return 1;
}
-static int AnalyzeAndInit(VP8LEncoder* const enc, WebPImageHint image_hint) {
+static int GetHistoBits(int method, int use_palette, int width, int height) {
+ // Make tile size a function of encoding method (Range: 0 to 6).
+ int histo_bits = (use_palette ? 9 : 7) - method;
+ while (1) {
+ const int huff_image_size = VP8LSubSampleSize(width, histo_bits) *
+ VP8LSubSampleSize(height, histo_bits);
+ if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break;
+ ++histo_bits;
+ }
+ return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS :
+ (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
+}
+
+static int GetTransformBits(int method, int histo_bits) {
+ const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5;
+ return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits;
+}
+
+static int AnalyzeAndInit(VP8LEncoder* const enc) {
const WebPPicture* const pic = enc->pic_;
const int width = pic->width;
const int height = pic->height;
const int pix_cnt = width * height;
+ const WebPConfig* const config = enc->config_;
+ const int method = config->method;
+ const int low_effort = (config->method == 0);
// we round the block size up, so we're guaranteed to have
// at max MAX_REFS_BLOCK_PER_IMAGE blocks used:
int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1;
assert(pic != NULL && pic->argb != NULL);
+ enc->use_cross_color_ = 0;
+ enc->use_predict_ = 0;
+ enc->use_subtract_green_ = 0;
enc->use_palette_ =
- AnalyzeAndCreatePalette(pic, enc->palette_, &enc->palette_size_);
+ AnalyzeAndCreatePalette(pic, low_effort,
+ enc->palette_, &enc->palette_size_);
- if (image_hint == WEBP_HINT_GRAPH) {
- if (enc->use_palette_ && enc->palette_size_ < MAX_COLORS_FOR_GRAPH) {
- enc->use_palette_ = 0;
- }
- }
+ // TODO(jyrki): replace the decision to be based on an actual estimate
+ // of entropy, or even spatial variance of entropy.
+ enc->histo_bits_ = GetHistoBits(method, enc->use_palette_,
+ pic->width, pic->height);
+ enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_);
- if (!enc->use_palette_) {
- if (image_hint == WEBP_HINT_PHOTO) {
- enc->use_predict_ = 1;
- enc->use_cross_color_ = 1;
- } else {
- double non_pred_entropy, pred_entropy;
- if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride,
- &non_pred_entropy, &pred_entropy)) {
- return 0;
- }
- if (pred_entropy < 0.95 * non_pred_entropy) {
- enc->use_predict_ = 1;
- enc->use_cross_color_ = 1;
- }
+ if (low_effort) {
+ // AnalyzeEntropy is somewhat slow.
+ enc->use_predict_ = !enc->use_palette_;
+ enc->use_subtract_green_ = !enc->use_palette_;
+ enc->use_cross_color_ = 0;
+ } else {
+ int red_and_blue_always_zero;
+ EntropyIx min_entropy_ix;
+ if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride,
+ enc->use_palette_, &min_entropy_ix,
+ &red_and_blue_always_zero)) {
+ return 0;
}
+ enc->use_palette_ = (min_entropy_ix == kPalette);
+ enc->use_subtract_green_ =
+ (min_entropy_ix == kSubGreen) || (min_entropy_ix == kSpatialSubGreen);
+ enc->use_predict_ =
+ (min_entropy_ix == kSpatial) || (min_entropy_ix == kSpatialSubGreen);
+ enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_;
}
+
if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0;
// palette-friendly input typically uses less literals
@@ -271,9 +509,9 @@ static void StoreHuffmanTreeOfHuffmanTreeToBitMask(
break;
}
}
- VP8LWriteBits(bw, 4, codes_to_store - 4);
+ VP8LPutBits(bw, codes_to_store - 4, 4);
for (i = 0; i < codes_to_store; ++i) {
- VP8LWriteBits(bw, 3, code_length_bitdepth[kStorageOrder[i]]);
+ VP8LPutBits(bw, code_length_bitdepth[kStorageOrder[i]], 3);
}
}
@@ -301,16 +539,16 @@ static void StoreHuffmanTreeToBitMask(
for (i = 0; i < num_tokens; ++i) {
const int ix = tokens[i].code;
const int extra_bits = tokens[i].extra_bits;
- VP8LWriteBits(bw, huffman_code->code_lengths[ix], huffman_code->codes[ix]);
+ VP8LPutBits(bw, huffman_code->codes[ix], huffman_code->code_lengths[ix]);
switch (ix) {
case 16:
- VP8LWriteBits(bw, 2, extra_bits);
+ VP8LPutBits(bw, extra_bits, 2);
break;
case 17:
- VP8LWriteBits(bw, 3, extra_bits);
+ VP8LPutBits(bw, extra_bits, 3);
break;
case 18:
- VP8LWriteBits(bw, 7, extra_bits);
+ VP8LPutBits(bw, extra_bits, 7);
break;
}
}
@@ -330,7 +568,7 @@ static void StoreFullHuffmanCode(VP8LBitWriter* const bw,
huffman_code.code_lengths = code_length_bitdepth;
huffman_code.codes = code_length_bitdepth_symbols;
- VP8LWriteBits(bw, 1, 0);
+ VP8LPutBits(bw, 0, 1);
num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens);
{
uint32_t histogram[CODE_LENGTH_CODES] = { 0 };
@@ -367,13 +605,13 @@ static void StoreFullHuffmanCode(VP8LBitWriter* const bw,
}
write_trimmed_length = (trimmed_length > 1 && trailing_zero_bits > 12);
length = write_trimmed_length ? trimmed_length : num_tokens;
- VP8LWriteBits(bw, 1, write_trimmed_length);
+ VP8LPutBits(bw, write_trimmed_length, 1);
if (write_trimmed_length) {
const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1);
const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
- VP8LWriteBits(bw, 3, nbitpairs - 1);
+ VP8LPutBits(bw, nbitpairs - 1, 3);
assert(trimmed_length >= 2);
- VP8LWriteBits(bw, nbitpairs * 2, trimmed_length - 2);
+ VP8LPutBits(bw, trimmed_length - 2, nbitpairs * 2);
}
StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code);
}
@@ -400,31 +638,42 @@ static void StoreHuffmanCode(VP8LBitWriter* const bw,
if (count == 0) { // emit minimal tree for empty cases
// bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
- VP8LWriteBits(bw, 4, 0x01);
+ VP8LPutBits(bw, 0x01, 4);
} else if (count <= 2 && symbols[0] < kMaxSymbol && symbols[1] < kMaxSymbol) {
- VP8LWriteBits(bw, 1, 1); // Small tree marker to encode 1 or 2 symbols.
- VP8LWriteBits(bw, 1, count - 1);
+ VP8LPutBits(bw, 1, 1); // Small tree marker to encode 1 or 2 symbols.
+ VP8LPutBits(bw, count - 1, 1);
if (symbols[0] <= 1) {
- VP8LWriteBits(bw, 1, 0); // Code bit for small (1 bit) symbol value.
- VP8LWriteBits(bw, 1, symbols[0]);
+ VP8LPutBits(bw, 0, 1); // Code bit for small (1 bit) symbol value.
+ VP8LPutBits(bw, symbols[0], 1);
} else {
- VP8LWriteBits(bw, 1, 1);
- VP8LWriteBits(bw, 8, symbols[0]);
+ VP8LPutBits(bw, 1, 1);
+ VP8LPutBits(bw, symbols[0], 8);
}
if (count == 2) {
- VP8LWriteBits(bw, 8, symbols[1]);
+ VP8LPutBits(bw, symbols[1], 8);
}
} else {
StoreFullHuffmanCode(bw, huff_tree, tokens, huffman_code);
}
}
-static void WriteHuffmanCode(VP8LBitWriter* const bw,
+static WEBP_INLINE void WriteHuffmanCode(VP8LBitWriter* const bw,
const HuffmanTreeCode* const code,
int code_index) {
const int depth = code->code_lengths[code_index];
const int symbol = code->codes[code_index];
- VP8LWriteBits(bw, depth, symbol);
+ VP8LPutBits(bw, symbol, depth);
+}
+
+static WEBP_INLINE void WriteHuffmanCodeWithExtraBits(
+ VP8LBitWriter* const bw,
+ const HuffmanTreeCode* const code,
+ int code_index,
+ int bits,
+ int n_bits) {
+ const int depth = code->code_lengths[code_index];
+ const int symbol = code->codes[code_index];
+ VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits);
}
static WebPEncodingError StoreImageToBitMask(
@@ -432,40 +681,51 @@ static WebPEncodingError StoreImageToBitMask(
VP8LBackwardRefs* const refs,
const uint16_t* histogram_symbols,
const HuffmanTreeCode* const huffman_codes) {
+ const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
+ const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits);
// x and y trace the position in the image.
int x = 0;
int y = 0;
- const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
+ int tile_x = x & tile_mask;
+ int tile_y = y & tile_mask;
+ int histogram_ix = histogram_symbols[0];
+ const HuffmanTreeCode* codes = huffman_codes + 5 * histogram_ix;
VP8LRefsCursor c = VP8LRefsCursorInit(refs);
while (VP8LRefsCursorOk(&c)) {
const PixOrCopy* const v = c.cur_pos;
- const int histogram_ix = histogram_symbols[histo_bits ?
- (y >> histo_bits) * histo_xsize +
- (x >> histo_bits) : 0];
- const HuffmanTreeCode* const codes = huffman_codes + 5 * histogram_ix;
- if (PixOrCopyIsCacheIdx(v)) {
- const int code = PixOrCopyCacheIdx(v);
- const int literal_ix = 256 + NUM_LENGTH_CODES + code;
- WriteHuffmanCode(bw, codes, literal_ix);
- } else if (PixOrCopyIsLiteral(v)) {
+ if ((tile_x != (x & tile_mask)) || (tile_y != (y & tile_mask))) {
+ tile_x = x & tile_mask;
+ tile_y = y & tile_mask;
+ histogram_ix = histogram_symbols[(y >> histo_bits) * histo_xsize +
+ (x >> histo_bits)];
+ codes = huffman_codes + 5 * histogram_ix;
+ }
+ if (PixOrCopyIsLiteral(v)) {
static const int order[] = { 1, 2, 0, 3 };
int k;
for (k = 0; k < 4; ++k) {
const int code = PixOrCopyLiteral(v, order[k]);
WriteHuffmanCode(bw, codes + k, code);
}
+ } else if (PixOrCopyIsCacheIdx(v)) {
+ const int code = PixOrCopyCacheIdx(v);
+ const int literal_ix = 256 + NUM_LENGTH_CODES + code;
+ WriteHuffmanCode(bw, codes, literal_ix);
} else {
int bits, n_bits;
- int code, distance;
+ int code;
+ const int distance = PixOrCopyDistance(v);
VP8LPrefixEncode(v->len, &code, &n_bits, &bits);
- WriteHuffmanCode(bw, codes, 256 + code);
- VP8LWriteBits(bw, n_bits, bits);
+ WriteHuffmanCodeWithExtraBits(bw, codes, 256 + code, bits, n_bits);
- distance = PixOrCopyDistance(v);
+ // Don't write the distance with the extra bits code since
+ // the distance can be up to 18 bits of extra bits, and the prefix
+ // 15 bits, totaling to 33, and our PutBits only supports up to 32 bits.
+ // TODO(jyrki): optimize this further.
VP8LPrefixEncode(distance, &code, &n_bits, &bits);
WriteHuffmanCode(bw, codes + 4, code);
- VP8LWriteBits(bw, n_bits, bits);
+ VP8LPutBits(bw, bits, n_bits);
}
x += PixOrCopyLength(v);
while (x >= width) {
@@ -491,21 +751,28 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
HuffmanTreeToken* tokens = NULL;
HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } };
const uint16_t histogram_symbols[1] = { 0 }; // only one tree, one symbol
- VP8LHistogramSet* const histogram_image = VP8LAllocateHistogramSet(1, 0);
+ int cache_bits = 0;
+ VP8LHistogramSet* histogram_image = NULL;
HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc(
3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
- if (histogram_image == NULL || huff_tree == NULL) {
+ if (huff_tree == NULL) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
// Calculate backward references from ARGB image.
- refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, 1,
+ refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, &cache_bits,
hash_chain, refs_array);
if (refs == NULL) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
+ histogram_image = VP8LAllocateHistogramSet(1, cache_bits);
+ if (histogram_image == NULL) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+
// Build histogram image and symbols from backward references.
VP8LHistogramStoreRefs(refs, histogram_image->histograms[0]);
@@ -517,7 +784,7 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
}
// No color cache, no Huffman image.
- VP8LWriteBits(bw, 1, 0);
+ VP8LPutBits(bw, 0, 1);
// Find maximum number of symbols for the huffman tree-set.
for (i = 0; i < 5; ++i) {
@@ -557,16 +824,17 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
VP8LHashChain* const hash_chain,
VP8LBackwardRefs refs_array[2],
int width, int height, int quality,
- int cache_bits,
- int histogram_bits) {
+ int low_effort, int* cache_bits,
+ int histogram_bits,
+ size_t init_byte_position,
+ int* const hdr_size,
+ int* const data_size) {
WebPEncodingError err = VP8_ENC_OK;
- const int use_2d_locality = 1;
- const int use_color_cache = (cache_bits > 0);
const uint32_t histogram_image_xysize =
VP8LSubSampleSize(width, histogram_bits) *
VP8LSubSampleSize(height, histogram_bits);
- VP8LHistogramSet* histogram_image =
- VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits);
+ VP8LHistogramSet* histogram_image = NULL;
+ VP8LHistogramSet* tmp_histos = NULL;
int histogram_image_size = 0;
size_t bit_array_size = 0;
HuffmanTree* huff_tree = NULL;
@@ -579,28 +847,39 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
sizeof(*histogram_symbols));
assert(histogram_bits >= MIN_HUFFMAN_BITS);
assert(histogram_bits <= MAX_HUFFMAN_BITS);
+ assert(hdr_size != NULL);
+ assert(data_size != NULL);
VP8LBackwardRefsInit(&refs, refs_array[0].block_size_);
- if (histogram_image == NULL || histogram_symbols == NULL) {
- VP8LFreeHistogramSet(histogram_image);
- WebPSafeFree(histogram_symbols);
- return 0;
+ if (histogram_symbols == NULL) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
}
+ *cache_bits = MAX_COLOR_CACHE_BITS;
// 'best_refs' is the reference to the best backward refs and points to one
// of refs_array[0] or refs_array[1].
// Calculate backward references from ARGB image.
best_refs = VP8LGetBackwardReferences(width, height, argb, quality,
- cache_bits, use_2d_locality,
- hash_chain, refs_array);
+ low_effort, cache_bits, hash_chain,
+ refs_array);
if (best_refs == NULL || !VP8LBackwardRefsCopy(best_refs, &refs)) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
+ histogram_image =
+ VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits);
+ tmp_histos = VP8LAllocateHistogramSet(2, *cache_bits);
+ if (histogram_image == NULL || tmp_histos == NULL) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+
// Build histogram image and symbols from backward references.
- if (!VP8LGetHistoImageSymbols(width, height, &refs,
- quality, histogram_bits, cache_bits,
- histogram_image,
- histogram_symbols)) {
+ if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, low_effort,
+ histogram_bits, *cache_bits, histogram_image,
+ tmp_histos, histogram_symbols)) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
// Create Huffman bit lengths and codes for each histogram image.
@@ -608,41 +887,53 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
bit_array_size = 5 * histogram_image_size;
huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size,
sizeof(*huffman_codes));
+ // Note: some histogram_image entries may point to tmp_histos[], so the latter
+ // need to outlive the following call to GetHuffBitLengthsAndCodes().
if (huffman_codes == NULL ||
!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
// Free combined histograms.
VP8LFreeHistogramSet(histogram_image);
histogram_image = NULL;
+ // Free scratch histograms.
+ VP8LFreeHistogramSet(tmp_histos);
+ tmp_histos = NULL;
+
// Color Cache parameters.
- VP8LWriteBits(bw, 1, use_color_cache);
- if (use_color_cache) {
- VP8LWriteBits(bw, 4, cache_bits);
+ if (*cache_bits > 0) {
+ VP8LPutBits(bw, 1, 1);
+ VP8LPutBits(bw, *cache_bits, 4);
+ } else {
+ VP8LPutBits(bw, 0, 1);
}
// Huffman image + meta huffman.
{
const int write_histogram_image = (histogram_image_size > 1);
- VP8LWriteBits(bw, 1, write_histogram_image);
+ VP8LPutBits(bw, write_histogram_image, 1);
if (write_histogram_image) {
uint32_t* const histogram_argb =
(uint32_t*)WebPSafeMalloc(histogram_image_xysize,
sizeof(*histogram_argb));
int max_index = 0;
uint32_t i;
- if (histogram_argb == NULL) goto Error;
+ if (histogram_argb == NULL) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
for (i = 0; i < histogram_image_xysize; ++i) {
const int symbol_index = histogram_symbols[i] & 0xffff;
- histogram_argb[i] = 0xff000000 | (symbol_index << 8);
+ histogram_argb[i] = (symbol_index << 8);
if (symbol_index >= max_index) {
max_index = symbol_index + 1;
}
}
histogram_image_size = max_index;
- VP8LWriteBits(bw, 3, histogram_bits - 2);
+ VP8LPutBits(bw, histogram_bits - 2, 3);
err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array,
VP8LSubSampleSize(width, histogram_bits),
VP8LSubSampleSize(height, histogram_bits),
@@ -658,7 +949,10 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
int max_tokens = 0;
huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES,
sizeof(*huff_tree));
- if (huff_tree == NULL) goto Error;
+ if (huff_tree == NULL) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
// Find maximum number of symbols for the huffman tree-set.
for (i = 0; i < 5 * histogram_image_size; ++i) {
HuffmanTreeCode* const codes = &huffman_codes[i];
@@ -668,7 +962,10 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
}
tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens,
sizeof(*tokens));
- if (tokens == NULL) goto Error;
+ if (tokens == NULL) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
for (i = 0; i < 5 * histogram_image_size; ++i) {
HuffmanTreeCode* const codes = &huffman_codes[i];
StoreHuffmanCode(bw, huff_tree, tokens, codes);
@@ -676,14 +973,18 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
}
}
+ *hdr_size = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position);
// Store actual literals.
err = StoreImageToBitMask(bw, width, histogram_bits, &refs,
histogram_symbols, huffman_codes);
+ *data_size =
+ (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size);
Error:
WebPSafeFree(tokens);
WebPSafeFree(huff_tree);
VP8LFreeHistogramSet(histogram_image);
+ VP8LFreeHistogramSet(tmp_histos);
VP8LBackwardRefsClear(&refs);
if (huffman_codes != NULL) {
WebPSafeFree(huffman_codes->codes);
@@ -696,59 +997,28 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
// -----------------------------------------------------------------------------
// Transforms
-// Check if it would be a good idea to subtract green from red and blue. We
-// only impact entropy in red/blue components, don't bother to look at others.
-static WebPEncodingError EvalAndApplySubtractGreen(VP8LEncoder* const enc,
- int width, int height,
- VP8LBitWriter* const bw) {
- if (!enc->use_palette_) {
- int i;
- const uint32_t* const argb = enc->argb_;
- double bit_cost_before, bit_cost_after;
- // Allocate histogram with cache_bits = 1.
- VP8LHistogram* const histo = VP8LAllocateHistogram(1);
- if (histo == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
- for (i = 0; i < width * height; ++i) {
- const uint32_t c = argb[i];
- ++histo->red_[(c >> 16) & 0xff];
- ++histo->blue_[(c >> 0) & 0xff];
- }
- bit_cost_before = VP8LHistogramEstimateBits(histo);
-
- VP8LHistogramInit(histo, 1);
- for (i = 0; i < width * height; ++i) {
- const uint32_t c = argb[i];
- const int green = (c >> 8) & 0xff;
- ++histo->red_[((c >> 16) - green) & 0xff];
- ++histo->blue_[((c >> 0) - green) & 0xff];
- }
- bit_cost_after = VP8LHistogramEstimateBits(histo);
- VP8LFreeHistogram(histo);
-
- // Check if subtracting green yields low entropy.
- enc->use_subtract_green_ = (bit_cost_after < bit_cost_before);
- if (enc->use_subtract_green_) {
- VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
- VP8LWriteBits(bw, 2, SUBTRACT_GREEN);
- VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
- }
- }
- return VP8_ENC_OK;
+static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height,
+ VP8LBitWriter* const bw) {
+ VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+ VP8LPutBits(bw, SUBTRACT_GREEN, 2);
+ VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
}
static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc,
- int width, int height, int quality,
+ int width, int height,
+ int quality, int low_effort,
VP8LBitWriter* const bw) {
const int pred_bits = enc->transform_bits_;
const int transform_width = VP8LSubSampleSize(width, pred_bits);
const int transform_height = VP8LSubSampleSize(height, pred_bits);
- VP8LResidualImage(width, height, pred_bits, enc->argb_, enc->argb_scratch_,
- enc->transform_data_);
- VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
- VP8LWriteBits(bw, 2, PREDICTOR_TRANSFORM);
+ VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_,
+ enc->argb_scratch_, enc->transform_data_,
+ enc->config_->exact);
+ VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+ VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
assert(pred_bits >= 2);
- VP8LWriteBits(bw, 3, pred_bits - 2);
+ VP8LPutBits(bw, pred_bits - 2, 3);
return EncodeImageNoHuffman(bw, enc->transform_data_,
(VP8LHashChain*)&enc->hash_chain_,
(VP8LBackwardRefs*)enc->refs_, // cast const away
@@ -766,10 +1036,10 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc,
VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality,
enc->argb_, enc->transform_data_);
- VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
- VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM);
+ VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+ VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2);
assert(ccolor_transform_bits >= 2);
- VP8LWriteBits(bw, 3, ccolor_transform_bits - 2);
+ VP8LPutBits(bw, ccolor_transform_bits - 2, 3);
return EncodeImageNoHuffman(bw, enc->transform_data_,
(VP8LHashChain*)&enc->hash_chain_,
(VP8LBackwardRefs*)enc->refs_, // cast const away
@@ -799,14 +1069,14 @@ static int WriteImageSize(const WebPPicture* const pic,
const int height = pic->height - 1;
assert(width < WEBP_MAX_DIMENSION && height < WEBP_MAX_DIMENSION);
- VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, width);
- VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, height);
+ VP8LPutBits(bw, width, VP8L_IMAGE_SIZE_BITS);
+ VP8LPutBits(bw, height, VP8L_IMAGE_SIZE_BITS);
return !bw->error_;
}
static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) {
- VP8LWriteBits(bw, 1, has_alpha);
- VP8LWriteBits(bw, VP8L_VERSION_BITS, VP8L_VERSION);
+ VP8LPutBits(bw, has_alpha, 1);
+ VP8LPutBits(bw, VP8L_VERSION, VP8L_VERSION_BITS);
return !bw->error_;
}
@@ -846,39 +1116,107 @@ static WebPEncodingError WriteImage(const WebPPicture* const pic,
// Allocates the memory for argb (W x H) buffer, 2 rows of context for
// prediction and transform data.
+// Flags influencing the memory allocated:
+// enc->transform_bits_
+// enc->use_predict_, enc->use_cross_color_
static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
int width, int height) {
WebPEncodingError err = VP8_ENC_OK;
- const int tile_size = 1 << enc->transform_bits_;
- const uint64_t image_size = width * height;
- const uint64_t argb_scratch_size = tile_size * width + width;
- const int transform_data_size =
- VP8LSubSampleSize(width, enc->transform_bits_) *
- VP8LSubSampleSize(height, enc->transform_bits_);
- const uint64_t total_size =
- image_size + argb_scratch_size + (uint64_t)transform_data_size;
- uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem));
- if (mem == NULL) {
- err = VP8_ENC_ERROR_OUT_OF_MEMORY;
- goto Error;
+ if (enc->argb_ == NULL) {
+ const int tile_size = 1 << enc->transform_bits_;
+ const uint64_t image_size = width * height;
+ // Ensure enough size for tiles, as well as for two scanlines and two
+ // extra pixels for CopyImageWithPrediction.
+ const uint64_t argb_scratch_size =
+ enc->use_predict_ ? tile_size * width + width + 2 : 0;
+ const int transform_data_size =
+ (enc->use_predict_ || enc->use_cross_color_)
+ ? VP8LSubSampleSize(width, enc->transform_bits_) *
+ VP8LSubSampleSize(height, enc->transform_bits_)
+ : 0;
+ const uint64_t total_size =
+ image_size + WEBP_ALIGN_CST +
+ argb_scratch_size + WEBP_ALIGN_CST +
+ (uint64_t)transform_data_size;
+ uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+ if (mem == NULL) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
+ enc->argb_ = mem;
+ mem = (uint32_t*)WEBP_ALIGN(mem + image_size);
+ enc->argb_scratch_ = mem;
+ mem = (uint32_t*)WEBP_ALIGN(mem + argb_scratch_size);
+ enc->transform_data_ = mem;
+ enc->current_width_ = width;
}
- enc->argb_ = mem;
- mem += image_size;
- enc->argb_scratch_ = mem;
- mem += argb_scratch_size;
- enc->transform_data_ = mem;
- enc->current_width_ = width;
-
Error:
return err;
}
-static void ApplyPalette(uint32_t* src, uint32_t* dst,
- uint32_t src_stride, uint32_t dst_stride,
- const uint32_t* palette, int palette_size,
- int width, int height, int xbits, uint8_t* row) {
+static void ClearTransformBuffer(VP8LEncoder* const enc) {
+ WebPSafeFree(enc->argb_);
+ enc->argb_ = NULL;
+}
+
+static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) {
+ WebPEncodingError err = VP8_ENC_OK;
+ const WebPPicture* const picture = enc->pic_;
+ const int width = picture->width;
+ const int height = picture->height;
+ int y;
+ err = AllocateTransformBuffer(enc, width, height);
+ if (err != VP8_ENC_OK) return err;
+ for (y = 0; y < height; ++y) {
+ memcpy(enc->argb_ + y * width,
+ picture->argb + y * picture->argb_stride,
+ width * sizeof(*enc->argb_));
+ }
+ assert(enc->current_width_ == width);
+ return VP8_ENC_OK;
+}
+
+// -----------------------------------------------------------------------------
+
+static void MapToPalette(const uint32_t palette[], int num_colors,
+ uint32_t* const last_pix, int* const last_idx,
+ const uint32_t* src, uint8_t* dst, int width) {
+ int x;
+ int prev_idx = *last_idx;
+ uint32_t prev_pix = *last_pix;
+ for (x = 0; x < width; ++x) {
+ const uint32_t pix = src[x];
+ if (pix != prev_pix) {
+ int i;
+ for (i = 0; i < num_colors; ++i) {
+ if (pix == palette[i]) {
+ prev_idx = i;
+ prev_pix = pix;
+ break;
+ }
+ }
+ }
+ dst[x] = prev_idx;
+ }
+ *last_idx = prev_idx;
+ *last_pix = prev_pix;
+}
+
+// Remap argb values in src[] to packed palettes entries in dst[]
+// using 'row' as a temporary buffer of size 'width'.
+// We assume that all src[] values have a corresponding entry in the palette.
+// Note: src[] can be the same as dst[]
+static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride,
+ uint32_t* dst, uint32_t dst_stride,
+ const uint32_t* palette, int palette_size,
+ int width, int height, int xbits) {
+ // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be
+ // made to work in-place.
+ uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row));
int i, x, y;
int use_LUT = 1;
+
+ if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
for (i = 0; i < palette_size; ++i) {
if ((palette[i] & 0xffff00ffu) != 0) {
use_LUT = 0;
@@ -895,9 +1233,9 @@ static void ApplyPalette(uint32_t* src, uint32_t* dst,
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
const int color = (src[x] >> 8) & 0xff;
- row[x] = inv_palette[color];
+ tmp_row[x] = inv_palette[color];
}
- VP8LBundleColorMap(row, width, xbits, dst);
+ VP8LBundleColorMap(tmp_row, width, xbits, dst);
src += src_stride;
dst += dst_stride;
}
@@ -906,41 +1244,28 @@ static void ApplyPalette(uint32_t* src, uint32_t* dst,
uint32_t last_pix = palette[0];
int last_idx = 0;
for (y = 0; y < height; ++y) {
- for (x = 0; x < width; ++x) {
- const uint32_t pix = src[x];
- if (pix != last_pix) {
- for (i = 0; i < palette_size; ++i) {
- if (pix == palette[i]) {
- last_idx = i;
- last_pix = pix;
- break;
- }
- }
- }
- row[x] = last_idx;
- }
- VP8LBundleColorMap(row, width, xbits, dst);
+ MapToPalette(palette, palette_size, &last_pix, &last_idx,
+ src, tmp_row, width);
+ VP8LBundleColorMap(tmp_row, width, xbits, dst);
src += src_stride;
dst += dst_stride;
}
}
+ WebPSafeFree(tmp_row);
+ return VP8_ENC_OK;
}
// Note: Expects "enc->palette_" to be set properly.
-// Also, "enc->palette_" will be modified after this call and should not be used
-// later.
-static WebPEncodingError EncodePalette(VP8LBitWriter* const bw,
- VP8LEncoder* const enc, int quality) {
+static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc,
+ int in_place) {
WebPEncodingError err = VP8_ENC_OK;
- int i;
const WebPPicture* const pic = enc->pic_;
- uint32_t* src = pic->argb;
- uint32_t* dst;
const int width = pic->width;
const int height = pic->height;
- uint32_t* const palette = enc->palette_;
+ const uint32_t* const palette = enc->palette_;
+ const uint32_t* src = in_place ? enc->argb_ : pic->argb;
+ const int src_stride = in_place ? enc->current_width_ : pic->argb_stride;
const int palette_size = enc->palette_size_;
- uint8_t* row = NULL;
int xbits;
// Replace each input pixel by corresponding palette index.
@@ -952,67 +1277,74 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw,
}
err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
- if (err != VP8_ENC_OK) goto Error;
- dst = enc->argb_;
-
- row = (uint8_t*)WebPSafeMalloc(width, sizeof(*row));
- if (row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
+ if (err != VP8_ENC_OK) return err;
- ApplyPalette(src, dst, pic->argb_stride, enc->current_width_,
- palette, palette_size, width, height, xbits, row);
+ err = ApplyPalette(src, src_stride,
+ enc->argb_, enc->current_width_,
+ palette, palette_size, width, height, xbits);
+ return err;
+}
- // Save palette to bitstream.
- VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
- VP8LWriteBits(bw, 2, COLOR_INDEXING_TRANSFORM);
- assert(palette_size >= 1);
- VP8LWriteBits(bw, 8, palette_size - 1);
+// Save palette_[] to bitstream.
+static WebPEncodingError EncodePalette(VP8LBitWriter* const bw,
+ VP8LEncoder* const enc) {
+ int i;
+ uint32_t tmp_palette[MAX_PALETTE_SIZE];
+ const int palette_size = enc->palette_size_;
+ const uint32_t* const palette = enc->palette_;
+ VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+ VP8LPutBits(bw, COLOR_INDEXING_TRANSFORM, 2);
+ assert(palette_size >= 1 && palette_size <= MAX_PALETTE_SIZE);
+ VP8LPutBits(bw, palette_size - 1, 8);
for (i = palette_size - 1; i >= 1; --i) {
- palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
+ tmp_palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
}
- err = EncodeImageNoHuffman(bw, palette, &enc->hash_chain_, enc->refs_,
- palette_size, 1, quality);
-
- Error:
- WebPSafeFree(row);
- return err;
+ tmp_palette[0] = palette[0];
+ return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, enc->refs_,
+ palette_size, 1, 20 /* quality */);
}
-// -----------------------------------------------------------------------------
+#ifdef WEBP_EXPERIMENTAL_FEATURES
-static int GetHistoBits(int method, int use_palette, int width, int height) {
- const int hist_size = VP8LGetHistogramSize(MAX_COLOR_CACHE_BITS);
- // Make tile size a function of encoding method (Range: 0 to 6).
- int histo_bits = (use_palette ? 9 : 7) - method;
- while (1) {
- const int huff_image_size = VP8LSubSampleSize(width, histo_bits) *
- VP8LSubSampleSize(height, histo_bits);
- if ((uint64_t)huff_image_size * hist_size <= MAX_HUFF_IMAGE_SIZE) break;
- ++histo_bits;
- }
- return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS :
- (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
-}
+static WebPEncodingError EncodeDeltaPalettePredictorImage(
+ VP8LBitWriter* const bw, VP8LEncoder* const enc, int quality) {
+ const WebPPicture* const pic = enc->pic_;
+ const int width = pic->width;
+ const int height = pic->height;
-static int GetTransformBits(int method, int histo_bits) {
- const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5;
- return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits;
-}
+ const int pred_bits = 5;
+ const int transform_width = VP8LSubSampleSize(width, pred_bits);
+ const int transform_height = VP8LSubSampleSize(height, pred_bits);
+ const int pred = 7; // default is Predictor7 (Top/Left Average)
+ const int tiles_per_row = VP8LSubSampleSize(width, pred_bits);
+ const int tiles_per_col = VP8LSubSampleSize(height, pred_bits);
+ uint32_t* predictors;
+ int tile_x, tile_y;
+ WebPEncodingError err = VP8_ENC_OK;
-static int GetCacheBits(float quality) {
- return (quality <= 25.f) ? 0 : 7;
-}
+ predictors = (uint32_t*)WebPSafeMalloc(tiles_per_col * tiles_per_row,
+ sizeof(*predictors));
+ if (predictors == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
-static void FinishEncParams(VP8LEncoder* const enc) {
- const WebPConfig* const config = enc->config_;
- const WebPPicture* const pic = enc->pic_;
- const int method = config->method;
- const float quality = config->quality;
- const int use_palette = enc->use_palette_;
- enc->histo_bits_ = GetHistoBits(method, use_palette, pic->width, pic->height);
- enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_);
- enc->cache_bits_ = GetCacheBits(quality);
+ for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
+ for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
+ predictors[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
+ }
+ }
+
+ VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+ VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
+ VP8LPutBits(bw, pred_bits - 2, 3);
+ err = EncodeImageNoHuffman(bw, predictors, &enc->hash_chain_,
+ (VP8LBackwardRefs*)enc->refs_, // cast const away
+ transform_width, transform_height,
+ quality);
+ WebPSafeFree(predictors);
+ return err;
}
+#endif // WEBP_EXPERIMENTAL_FEATURES
+
// -----------------------------------------------------------------------------
// VP8LEncoder
@@ -1026,7 +1358,7 @@ static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config,
enc->config_ = config;
enc->pic_ = picture;
- VP8LDspInit();
+ VP8LEncDspInit();
return enc;
}
@@ -1036,7 +1368,7 @@ static void VP8LEncoderDelete(VP8LEncoder* enc) {
VP8LHashChainClear(&enc->hash_chain_);
VP8LBackwardRefsClear(&enc->refs_[0]);
VP8LBackwardRefsClear(&enc->refs_[1]);
- WebPSafeFree(enc->argb_);
+ ClearTransformBuffer(enc);
WebPSafeFree(enc);
}
}
@@ -1049,10 +1381,15 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
VP8LBitWriter* const bw) {
WebPEncodingError err = VP8_ENC_OK;
const int quality = (int)config->quality;
+ const int low_effort = (config->method == 0);
const int width = picture->width;
const int height = picture->height;
VP8LEncoder* const enc = VP8LEncoderNew(config, picture);
const size_t byte_position = VP8LBitWriterNumBytes(bw);
+ int use_near_lossless = 0;
+ int hdr_size = 0;
+ int data_size = 0;
+ int use_delta_palettization = 0;
if (enc == NULL) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
@@ -1062,70 +1399,83 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
// ---------------------------------------------------------------------------
// Analyze image (entropy, num_palettes etc)
- if (!AnalyzeAndInit(enc, config->image_hint)) {
+ if (!AnalyzeAndInit(enc)) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
- FinishEncParams(enc);
-
- if (enc->use_palette_) {
- err = EncodePalette(bw, enc, quality);
- if (err != VP8_ENC_OK) goto Error;
- // Color cache is disabled for palette.
- enc->cache_bits_ = 0;
+ // Apply near-lossless preprocessing.
+ use_near_lossless = !enc->use_palette_ && (config->near_lossless < 100);
+ if (use_near_lossless) {
+ if (!VP8ApplyNearLossless(width, height, picture->argb,
+ config->near_lossless)) {
+ err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+ goto Error;
+ }
}
- // In case image is not packed.
- if (enc->argb_ == NULL) {
- int y;
- err = AllocateTransformBuffer(enc, width, height);
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+ if (config->delta_palettization) {
+ enc->use_predict_ = 1;
+ enc->use_cross_color_ = 0;
+ enc->use_subtract_green_ = 0;
+ enc->use_palette_ = 1;
+ err = MakeInputImageCopy(enc);
if (err != VP8_ENC_OK) goto Error;
- assert(enc->argb_ != NULL);
- for (y = 0; y < height; ++y) {
- memcpy(enc->argb_ + y * width,
- picture->argb + y * picture->argb_stride,
- width * sizeof(*enc->argb_));
+ err = WebPSearchOptimalDeltaPalette(enc);
+ if (err != VP8_ENC_OK) goto Error;
+ if (enc->use_palette_) {
+ err = AllocateTransformBuffer(enc, width, height);
+ if (err != VP8_ENC_OK) goto Error;
+ err = EncodeDeltaPalettePredictorImage(bw, enc, quality);
+ if (err != VP8_ENC_OK) goto Error;
+ use_delta_palettization = 1;
}
- enc->current_width_ = width;
}
+#endif // WEBP_EXPERIMENTAL_FEATURES
- // ---------------------------------------------------------------------------
- // Apply transforms and write transform data.
-
- err = EvalAndApplySubtractGreen(enc, enc->current_width_, height, bw);
- if (err != VP8_ENC_OK) goto Error;
-
- if (enc->use_predict_) {
- err = ApplyPredictFilter(enc, enc->current_width_, height, quality, bw);
+ // Encode palette
+ if (enc->use_palette_) {
+ err = EncodePalette(bw, enc);
if (err != VP8_ENC_OK) goto Error;
- }
-
- if (enc->use_cross_color_) {
- err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality, bw);
+ err = MapImageFromPalette(enc, use_delta_palettization);
if (err != VP8_ENC_OK) goto Error;
}
+ if (!use_delta_palettization) {
+ // In case image is not packed.
+ if (enc->argb_ == NULL) {
+ err = MakeInputImageCopy(enc);
+ if (err != VP8_ENC_OK) goto Error;
+ }
- VP8LWriteBits(bw, 1, !TRANSFORM_PRESENT); // No more transforms.
+ // -------------------------------------------------------------------------
+ // Apply transforms and write transform data.
- // ---------------------------------------------------------------------------
- // Estimate the color cache size.
+ if (enc->use_subtract_green_) {
+ ApplySubtractGreen(enc, enc->current_width_, height, bw);
+ }
- if (enc->cache_bits_ > 0) {
- if (!VP8LCalculateEstimateForCacheSize(enc->argb_, enc->current_width_,
- height, quality, &enc->hash_chain_,
- &enc->refs_[0], &enc->cache_bits_)) {
- err = VP8_ENC_ERROR_OUT_OF_MEMORY;
- goto Error;
+ if (enc->use_predict_) {
+ err = ApplyPredictFilter(enc, enc->current_width_, height, quality,
+ low_effort, bw);
+ if (err != VP8_ENC_OK) goto Error;
+ }
+
+ if (enc->use_cross_color_) {
+ err = ApplyCrossColorFilter(enc, enc->current_width_,
+ height, quality, bw);
+ if (err != VP8_ENC_OK) goto Error;
}
}
+ VP8LPutBits(bw, !TRANSFORM_PRESENT, 1); // No more transforms.
+
// ---------------------------------------------------------------------------
// Encode and write the transformed image.
-
err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_,
- enc->current_width_, height, quality,
- enc->cache_bits_, enc->histo_bits_);
+ enc->current_width_, height, quality, low_effort,
+ &enc->cache_bits_, enc->histo_bits_, byte_position,
+ &hdr_size, &data_size);
if (err != VP8_ENC_OK) goto Error;
if (picture->stats != NULL) {
@@ -1140,6 +1490,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
stats->cache_bits = enc->cache_bits_;
stats->palette_size = enc->palette_size_;
stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position);
+ stats->lossless_hdr_size = hdr_size;
+ stats->lossless_data_size = data_size;
}
Error:
@@ -1170,7 +1522,7 @@ int VP8LEncodeImage(const WebPConfig* const config,
// Initialize BitWriter with size corresponding to 16 bpp to photo images and
// 8 bpp for graphical images.
initial_size = (config->image_hint == WEBP_HINT_GRAPH) ?
- width * height : width * height * 2;
+ width * height : width * height * 2;
if (!VP8LBitWriterInit(&bw, initial_size)) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
@@ -1234,7 +1586,7 @@ int VP8LEncodeImage(const WebPConfig* const config,
Error:
if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY;
- VP8LBitWriterDestroy(&bw);
+ VP8LBitWriterWipeOut(&bw);
if (err != VP8_ENC_OK) {
WebPEncodingSetError(picture, err);
return 0;
diff --git a/src/3rdparty/libwebp/src/enc/webpenc.c b/src/3rdparty/libwebp/src/enc/webpenc.c
index ca85e0b..fece736 100644
--- a/src/3rdparty/libwebp/src/enc/webpenc.c
+++ b/src/3rdparty/libwebp/src/enc/webpenc.c
@@ -16,9 +16,9 @@
#include <string.h>
#include <math.h>
+#include "./cost.h"
#include "./vp8enci.h"
#include "./vp8li.h"
-#include "./cost.h"
#include "../utils/utils.h"
// #define PRINT_MEMORY_INFO
@@ -38,14 +38,14 @@ int WebPGetEncoderVersion(void) {
//------------------------------------------------------------------------------
static void ResetSegmentHeader(VP8Encoder* const enc) {
- VP8SegmentHeader* const hdr = &enc->segment_hdr_;
+ VP8EncSegmentHeader* const hdr = &enc->segment_hdr_;
hdr->num_segments_ = enc->config_->segments;
hdr->update_map_ = (hdr->num_segments_ > 1);
hdr->size_ = 0;
}
static void ResetFilterHeader(VP8Encoder* const enc) {
- VP8FilterHeader* const hdr = &enc->filter_hdr_;
+ VP8EncFilterHeader* const hdr = &enc->filter_hdr_;
hdr->simple_ = 1;
hdr->level_ = 0;
hdr->sharpness_ = 0;
@@ -79,7 +79,9 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) {
//-------------------+---+---+---+---+---+---+---+
// basic rd-opt | | | | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
-// disto-score i4/16 | | | x | | | | |
+// disto-refine i4/16| x | x | x | | | | |
+//-------------------+---+---+---+---+---+---+---+
+// disto-refine uv | | x | x | | | | |
//-------------------+---+---+---+---+---+---+---+
// rd-opt i4/16 | | | ~ | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
@@ -131,35 +133,36 @@ static void MapConfigToTools(VP8Encoder* const enc) {
// VP8EncIterator: 3360
// VP8ModeScore: 872
// VP8SegmentInfo: 732
-// VP8Proba: 18352
+// VP8EncProba: 18352
// LFStats: 2048
// Picture size (yuv): 419328
static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
WebPPicture* const picture) {
+ VP8Encoder* enc;
const int use_filter =
(config->filter_strength > 0) || (config->autofilter > 0);
const int mb_w = (picture->width + 15) >> 4;
const int mb_h = (picture->height + 15) >> 4;
const int preds_w = 4 * mb_w + 1;
const int preds_h = 4 * mb_h + 1;
- const size_t preds_size = preds_w * preds_h * sizeof(uint8_t);
+ const size_t preds_size = preds_w * preds_h * sizeof(*enc->preds_);
const int top_stride = mb_w * 16;
- const size_t nz_size = (mb_w + 1) * sizeof(uint32_t) + ALIGN_CST;
- const size_t info_size = mb_w * mb_h * sizeof(VP8MBInfo);
- const size_t samples_size = 2 * top_stride * sizeof(uint8_t) // top-luma/u/v
- + ALIGN_CST; // align all
+ const size_t nz_size = (mb_w + 1) * sizeof(*enc->nz_) + WEBP_ALIGN_CST;
+ const size_t info_size = mb_w * mb_h * sizeof(*enc->mb_info_);
+ const size_t samples_size =
+ 2 * top_stride * sizeof(*enc->y_top_) // top-luma/u/v
+ + WEBP_ALIGN_CST; // align all
const size_t lf_stats_size =
- config->autofilter ? sizeof(LFStats) + ALIGN_CST : 0;
- VP8Encoder* enc;
+ config->autofilter ? sizeof(*enc->lf_stats_) + WEBP_ALIGN_CST : 0;
uint8_t* mem;
- const uint64_t size = (uint64_t)sizeof(VP8Encoder) // main struct
- + ALIGN_CST // cache alignment
- + info_size // modes info
- + preds_size // prediction modes
- + samples_size // top/left samples
- + nz_size // coeff context bits
- + lf_stats_size; // autofilter stats
+ const uint64_t size = (uint64_t)sizeof(*enc) // main struct
+ + WEBP_ALIGN_CST // cache alignment
+ + info_size // modes info
+ + preds_size // prediction modes
+ + samples_size // top/left samples
+ + nz_size // coeff context bits
+ + lf_stats_size; // autofilter stats
#ifdef PRINT_MEMORY_INFO
printf("===================================\n");
@@ -171,16 +174,16 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
" non-zero: %ld\n"
" lf-stats: %ld\n"
" total: %ld\n",
- sizeof(VP8Encoder) + ALIGN_CST, info_size,
+ sizeof(*enc) + WEBP_ALIGN_CST, info_size,
preds_size, samples_size, nz_size, lf_stats_size, size);
printf("Transient object sizes:\n"
" VP8EncIterator: %ld\n"
" VP8ModeScore: %ld\n"
" VP8SegmentInfo: %ld\n"
- " VP8Proba: %ld\n"
+ " VP8EncProba: %ld\n"
" LFStats: %ld\n",
sizeof(VP8EncIterator), sizeof(VP8ModeScore),
- sizeof(VP8SegmentInfo), sizeof(VP8Proba),
+ sizeof(VP8SegmentInfo), sizeof(VP8EncProba),
sizeof(LFStats));
printf("Picture size (yuv): %ld\n",
mb_w * mb_h * 384 * sizeof(uint8_t));
@@ -192,7 +195,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
return NULL;
}
enc = (VP8Encoder*)mem;
- mem = (uint8_t*)DO_ALIGN(mem + sizeof(*enc));
+ mem = (uint8_t*)WEBP_ALIGN(mem + sizeof(*enc));
memset(enc, 0, sizeof(*enc));
enc->num_parts_ = 1 << config->partitions;
enc->mb_w_ = mb_w;
@@ -201,14 +204,14 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
enc->mb_info_ = (VP8MBInfo*)mem;
mem += info_size;
enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_;
- mem += preds_w * preds_h * sizeof(uint8_t);
- enc->nz_ = 1 + (uint32_t*)DO_ALIGN(mem);
+ mem += preds_size;
+ enc->nz_ = 1 + (uint32_t*)WEBP_ALIGN(mem);
mem += nz_size;
- enc->lf_stats_ = lf_stats_size ? (LFStats*)DO_ALIGN(mem) : NULL;
+ enc->lf_stats_ = lf_stats_size ? (LFStats*)WEBP_ALIGN(mem) : NULL;
mem += lf_stats_size;
// top samples (all 16-aligned)
- mem = (uint8_t*)DO_ALIGN(mem);
+ mem = (uint8_t*)WEBP_ALIGN(mem);
enc->y_top_ = (uint8_t*)mem;
enc->uv_top_ = enc->y_top_ + top_stride;
mem += 2 * top_stride;
@@ -225,8 +228,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
ResetSegmentHeader(enc);
ResetFilterHeader(enc);
ResetBoundaryPredictions(enc);
- VP8GetResidualCostInit();
- VP8SetResidualCoeffsInit();
+ VP8EncDspCostInit();
VP8EncInitAlpha(enc);
// lower quality means smaller output -> we modulate a little the page
@@ -326,14 +328,17 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
if (!config->lossless) {
VP8Encoder* enc = NULL;
+
+ if (!config->exact) {
+ WebPCleanupTransparentArea(pic);
+ }
+
if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) {
// Make sure we have YUVA samples.
if (config->preprocessing & 4) {
-#if WEBP_ENCODER_ABI_VERSION > 0x0204
if (!WebPPictureSmartARGBToYUVA(pic)) {
return 0;
}
-#endif
} else {
float dithering = 0.f;
if (config->preprocessing & 2) {
@@ -375,6 +380,10 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
return 0;
}
+ if (!config->exact) {
+ WebPCleanupTransparentAreaLossless(pic);
+ }
+
ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem.
}