diff options
Diffstat (limited to 'src/3rdparty/libwebp/src/enc')
27 files changed, 3040 insertions, 2011 deletions
diff --git a/src/3rdparty/libwebp/src/enc/alpha.c b/src/3rdparty/libwebp/src/enc/alpha.c index 79cb94d..3c970b0 100644 --- a/src/3rdparty/libwebp/src/enc/alpha.c +++ b/src/3rdparty/libwebp/src/enc/alpha.c @@ -15,6 +15,7 @@ #include <stdlib.h> #include "./vp8enci.h" +#include "../dsp/dsp.h" #include "../utils/filters.h" #include "../utils/quant_levels.h" #include "../utils/utils.h" @@ -61,21 +62,16 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, if (!WebPPictureAlloc(&picture)) return 0; // Transfer the alpha values to the green channel. - { - int i, j; - uint32_t* dst = picture.argb; - const uint8_t* src = data; - for (j = 0; j < picture.height; ++j) { - for (i = 0; i < picture.width; ++i) { - dst[i] = src[i] << 8; // we leave A/R/B channels zero'd. - } - src += width; - dst += picture.argb_stride; - } - } + WebPDispatchAlphaToGreen(data, width, picture.width, picture.height, + picture.argb, picture.argb_stride); WebPConfigInit(&config); config.lossless = 1; + // Enable exact, or it would alter RGB values of transparent alpha, which is + // normally OK but not here since we are not encoding the input image but an + // internal encoding-related image containing necessary exact information in + // RGB channels. + config.exact = 1; config.method = effort_level; // impact is very small // Set a low default quality for encoding alpha. Ensure that Alpha quality at // lower methods (3 and below) is less than the threshold for triggering @@ -87,11 +83,10 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, WebPPictureFree(&picture); ok = ok && !bw->error_; if (!ok) { - VP8LBitWriterDestroy(bw); + VP8LBitWriterWipeOut(bw); return 0; } return 1; - } // ----------------------------------------------------------------------------- @@ -143,10 +138,10 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, if (output_size > data_size) { // compressed size is larger than source! Revert to uncompressed mode. method = ALPHA_NO_COMPRESSION; - VP8LBitWriterDestroy(&tmp_bw); + VP8LBitWriterWipeOut(&tmp_bw); } } else { - VP8LBitWriterDestroy(&tmp_bw); + VP8LBitWriterWipeOut(&tmp_bw); return 0; } } @@ -166,7 +161,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, ok = ok && VP8BitWriterAppend(&result->bw, output, output_size); if (method != ALPHA_NO_COMPRESSION) { - VP8LBitWriterDestroy(&tmp_bw); + VP8LBitWriterWipeOut(&tmp_bw); } ok = ok && !result->bw.error_; result->score = VP8BitWriterSize(&result->bw); @@ -175,16 +170,6 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, // ----------------------------------------------------------------------------- -// TODO(skal): move to dsp/ ? -static void CopyPlane(const uint8_t* src, int src_stride, - uint8_t* dst, int dst_stride, int width, int height) { - while (height-- > 0) { - memcpy(dst, src, width); - src += src_stride; - dst += dst_stride; - } -} - static int GetNumColors(const uint8_t* data, int width, int height, int stride) { int j; @@ -218,8 +203,9 @@ static uint32_t GetFilterMap(const uint8_t* alpha, int width, int height, const int kMaxColorsForFilterNone = 192; const int num_colors = GetNumColors(alpha, width, height, width); // For low number of colors, NONE yields better compression. - filter = (num_colors <= kMinColorsForFilterNone) ? WEBP_FILTER_NONE : - EstimateBestFilter(alpha, width, height, width); + filter = (num_colors <= kMinColorsForFilterNone) + ? WEBP_FILTER_NONE + : WebPEstimateBestFilter(alpha, width, height, width); bit_map |= 1 << filter; // For large number of colors, try FILTER_NONE in addition to the best // filter as well. @@ -250,6 +236,7 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height, uint32_t try_map = GetFilterMap(alpha, width, height, filter, effort_level); InitFilterTrial(&best); + if (try_map != FILTER_TRY_NONE) { uint8_t* filtered_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size); if (filtered_alpha == NULL) return 0; @@ -274,7 +261,16 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height, reduce_levels, effort_level, NULL, &best); } if (ok) { - if (stats != NULL) *stats = best.stats; + if (stats != NULL) { + stats->lossless_features = best.stats.lossless_features; + stats->histogram_bits = best.stats.histogram_bits; + stats->transform_bits = best.stats.transform_bits; + stats->cache_bits = best.stats.cache_bits; + stats->palette_size = best.stats.palette_size; + stats->lossless_size = best.stats.lossless_size; + stats->lossless_hdr_size = best.stats.lossless_hdr_size; + stats->lossless_data_size = best.stats.lossless_data_size; + } *output_size = VP8BitWriterSize(&best.bw); *output = VP8BitWriterBuf(&best.bw); } else { @@ -324,7 +320,7 @@ static int EncodeAlpha(VP8Encoder* const enc, } // Extract alpha data (width x height) from raw_data (stride x height). - CopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height); + WebPCopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height); if (reduce_levels) { // No Quantization required for 'quality = 100'. // 16 alpha levels gives quite a low MSE w.r.t original alpha plane hence @@ -336,6 +332,7 @@ static int EncodeAlpha(VP8Encoder* const enc, } if (ok) { + VP8FiltersInit(); ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method, filter, reduce_levels, effort_level, output, output_size, pic->stats); @@ -376,6 +373,7 @@ static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) { } void VP8EncInitAlpha(VP8Encoder* const enc) { + WebPInitAlphaProcessing(); enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_); enc->alpha_data_ = NULL; enc->alpha_data_size_ = 0; @@ -430,4 +428,3 @@ int VP8EncDeleteAlpha(VP8Encoder* const enc) { enc->has_alpha_ = 0; return ok; } - diff --git a/src/3rdparty/libwebp/src/enc/analysis.c b/src/3rdparty/libwebp/src/enc/analysis.c index e019465..b55128f 100644 --- a/src/3rdparty/libwebp/src/enc/analysis.c +++ b/src/3rdparty/libwebp/src/enc/analysis.c @@ -111,28 +111,28 @@ static int FinalAlphaValue(int alpha) { } static int GetAlpha(const VP8Histogram* const histo) { - int max_value = 0, last_non_zero = 1; - int k; - int alpha; - for (k = 0; k <= MAX_COEFF_THRESH; ++k) { - const int value = histo->distribution[k]; - if (value > 0) { - if (value > max_value) max_value = value; - last_non_zero = k; - } - } // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer // values which happen to be mostly noise. This leaves the maximum precision // for handling the useful small values which contribute most. - alpha = (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0; + const int max_value = histo->max_value; + const int last_non_zero = histo->last_non_zero; + const int alpha = + (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0; return alpha; } +static void InitHistogram(VP8Histogram* const histo) { + histo->max_value = 0; + histo->last_non_zero = 1; +} + static void MergeHistograms(const VP8Histogram* const in, VP8Histogram* const out) { - int i; - for (i = 0; i <= MAX_COEFF_THRESH; ++i) { - out->distribution[i] += in->distribution[i]; + if (in->max_value > out->max_value) { + out->max_value = in->max_value; + } + if (in->last_non_zero > out->last_non_zero) { + out->last_non_zero = in->last_non_zero; } } @@ -245,10 +245,11 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { VP8MakeLuma16Preds(it); for (mode = 0; mode < max_mode; ++mode) { - VP8Histogram histo = { { 0 } }; + VP8Histogram histo; int alpha; - VP8CollectHistogram(it->yuv_in_ + Y_OFF, + InitHistogram(&histo); + VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC, it->yuv_p_ + VP8I16ModeOffsets[mode], 0, 16, &histo); alpha = GetAlpha(&histo); @@ -266,21 +267,22 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, uint8_t modes[16]; const int max_mode = MAX_INTRA4_MODE; int i4_alpha; - VP8Histogram total_histo = { { 0 } }; + VP8Histogram total_histo; int cur_histo = 0; + InitHistogram(&total_histo); VP8IteratorStartI4(it); do { int mode; int best_mode_alpha = DEFAULT_ALPHA; VP8Histogram histos[2]; - const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; VP8MakeIntra4Preds(it); for (mode = 0; mode < max_mode; ++mode) { int alpha; - memset(&histos[cur_histo], 0, sizeof(histos[cur_histo])); + InitHistogram(&histos[cur_histo]); VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode], 0, 1, &histos[cur_histo]); alpha = GetAlpha(&histos[cur_histo]); @@ -293,7 +295,7 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, // accumulate best histogram MergeHistograms(&histos[cur_histo ^ 1], &total_histo); // Note: we reuse the original samples for predictors - } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF)); + } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC)); i4_alpha = GetAlpha(&total_histo); if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) { @@ -311,9 +313,10 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { VP8MakeChroma8Preds(it); for (mode = 0; mode < max_mode; ++mode) { - VP8Histogram histo = { { 0 } }; + VP8Histogram histo; int alpha; - VP8CollectHistogram(it->yuv_in_ + U_OFF, + InitHistogram(&histo); + VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC, it->yuv_p_ + VP8UVModeOffsets[mode], 16, 16 + 4 + 4, &histo); alpha = GetAlpha(&histo); @@ -402,8 +405,8 @@ typedef struct { static int DoSegmentsJob(SegmentJob* const job, VP8EncIterator* const it) { int ok = 1; if (!VP8IteratorIsDone(it)) { - uint8_t tmp[32 + ALIGN_CST]; - uint8_t* const scratch = (uint8_t*)DO_ALIGN(tmp); + uint8_t tmp[32 + WEBP_ALIGN_CST]; + uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp); do { // Let's pretend we have perfect lossless reconstruction. VP8IteratorImport(it, scratch); diff --git a/src/3rdparty/libwebp/src/enc/backward_references.c b/src/3rdparty/libwebp/src/enc/backward_references.c index a3c30aa..c39437d 100644 --- a/src/3rdparty/libwebp/src/enc/backward_references.c +++ b/src/3rdparty/libwebp/src/enc/backward_references.c @@ -16,13 +16,12 @@ #include "./backward_references.h" #include "./histogram.h" #include "../dsp/lossless.h" +#include "../dsp/dsp.h" #include "../utils/color_cache.h" #include "../utils/utils.h" #define VALUES_IN_BYTE 256 -#define HASH_MULTIPLIER (0xc6a4a7935bd1e995ULL) - #define MIN_BLOCK_SIZE 256 // minimum block size for backward references #define MAX_ENTROPY (1e30f) @@ -58,10 +57,28 @@ static int DistanceToPlaneCode(int xsize, int dist) { return dist + 120; } +// Returns the exact index where array1 and array2 are different if this +// index is strictly superior to best_len_match. Otherwise, it returns 0. +// If no two elements are the same, it returns max_limit. static WEBP_INLINE int FindMatchLength(const uint32_t* const array1, const uint32_t* const array2, - const int max_limit) { - int match_len = 0; + int best_len_match, + int max_limit) { + int match_len; + + // Before 'expensive' linear match, check if the two arrays match at the + // current best length index. + if (array1[best_len_match] != array2[best_len_match]) return 0; + +#if defined(WEBP_USE_SSE2) + // Check if anything is different up to best_len_match excluded. + // memcmp seems to be slower on ARM so it is disabled for now. + if (memcmp(array1, array2, best_len_match * sizeof(*array1))) return 0; + match_len = best_len_match + 1; +#else + match_len = 0; +#endif + while (match_len < max_limit && array1[match_len] == array2[match_len]) { ++match_len; } @@ -178,15 +195,12 @@ int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, // Hash chains // initialize as empty -static void HashChainInit(VP8LHashChain* const p) { - int i; +static void HashChainReset(VP8LHashChain* const p) { assert(p != NULL); - for (i = 0; i < p->size_; ++i) { - p->chain_[i] = -1; - } - for (i = 0; i < HASH_SIZE; ++i) { - p->hash_to_first_index_[i] = -1; - } + // Set the int32_t arrays to -1. + memset(p->chain_, 0xff, p->size_ * sizeof(*p->chain_)); + memset(p->hash_to_first_index_, 0xff, + HASH_SIZE * sizeof(*p->hash_to_first_index_)); } int VP8LHashChainInit(VP8LHashChain* const p, int size) { @@ -196,7 +210,7 @@ int VP8LHashChainInit(VP8LHashChain* const p, int size) { p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_)); if (p->chain_ == NULL) return 0; p->size_ = size; - HashChainInit(p); + HashChainReset(p); return 1; } @@ -209,209 +223,212 @@ void VP8LHashChainClear(VP8LHashChain* const p) { // ----------------------------------------------------------------------------- -static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) { - uint64_t key = ((uint64_t)argb[1] << 32) | argb[0]; - key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS); +#define HASH_MULTIPLIER_HI (0xc6a4a793U) +#define HASH_MULTIPLIER_LO (0x5bd1e996U) + +static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) { + uint32_t key; + key = argb[1] * HASH_MULTIPLIER_HI; + key += argb[0] * HASH_MULTIPLIER_LO; + key = key >> (32 - HASH_BITS); return key; } // Insertion of two pixels at a time. static void HashChainInsert(VP8LHashChain* const p, const uint32_t* const argb, int pos) { - const uint64_t hash_code = GetPixPairHash64(argb); + const uint32_t hash_code = GetPixPairHash64(argb); p->chain_[pos] = p->hash_to_first_index_[hash_code]; p->hash_to_first_index_[hash_code] = pos; } -static void GetParamsForHashChainFindCopy(int quality, int xsize, - int cache_bits, int* window_size, - int* iter_pos, int* iter_limit) { - const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4); - const int iter_neg = -iter_mult * (quality >> 1); - // Limit the backward-ref window size for lower qualities. - const int max_window_size = (quality > 50) ? WINDOW_SIZE - : (quality > 25) ? (xsize << 8) +// Returns the maximum number of hash chain lookups to do for a +// given compression quality. Return value in range [6, 86]. +static int GetMaxItersForQuality(int quality, int low_effort) { + return (low_effort ? 6 : 8) + (quality * quality) / 128; +} + +static int GetWindowSizeForHashChain(int quality, int xsize) { + const int max_window_size = (quality > 75) ? WINDOW_SIZE + : (quality > 50) ? (xsize << 8) + : (quality > 25) ? (xsize << 6) : (xsize << 4); assert(xsize > 0); - *window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE - : max_window_size; - *iter_pos = 8 + (quality >> 3); - // For lower entropy images, the rigorous search loop in HashChainFindCopy - // can be relaxed. - *iter_limit = (cache_bits > 0) ? iter_neg : iter_neg / 2; + return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size; +} + +static WEBP_INLINE int MaxFindCopyLength(int len) { + return (len < MAX_LENGTH) ? len : MAX_LENGTH; +} + +static void HashChainFindOffset(const VP8LHashChain* const p, int base_position, + const uint32_t* const argb, int len, + int window_size, int* const distance_ptr) { + const uint32_t* const argb_start = argb + base_position; + const int min_pos = + (base_position > window_size) ? base_position - window_size : 0; + int pos; + assert(len <= MAX_LENGTH); + for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; + pos >= min_pos; + pos = p->chain_[pos]) { + const int curr_length = + FindMatchLength(argb + pos, argb_start, len - 1, len); + if (curr_length == len) break; + } + *distance_ptr = base_position - pos; } static int HashChainFindCopy(const VP8LHashChain* const p, - int base_position, int xsize_signed, + int base_position, const uint32_t* const argb, int max_len, - int window_size, int iter_pos, int iter_limit, + int window_size, int iter_max, int* const distance_ptr, int* const length_ptr) { const uint32_t* const argb_start = argb + base_position; - uint64_t best_val = 0; - uint32_t best_length = 1; - uint32_t best_distance = 0; - const uint32_t xsize = (uint32_t)xsize_signed; + int iter = iter_max; + int best_length = 0; + int best_distance = 0; const int min_pos = (base_position > window_size) ? base_position - window_size : 0; int pos; - assert(xsize > 0); - if (max_len > MAX_LENGTH) { - max_len = MAX_LENGTH; + int length_max = 256; + if (max_len < length_max) { + length_max = max_len; } for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; pos >= min_pos; pos = p->chain_[pos]) { - uint64_t val; - uint32_t curr_length; - uint32_t distance; - const uint32_t* const ptr1 = (argb + pos + best_length - 1); - const uint32_t* const ptr2 = (argb_start + best_length - 1); - - if (iter_pos < 0) { - if (iter_pos < iter_limit || best_val >= 0xff0000) { - break; - } + int curr_length; + int distance; + if (--iter < 0) { + break; } - --iter_pos; - - // Before 'expensive' linear match, check if the two arrays match at the - // current best length index and also for the succeeding elements. - if (ptr1[0] != ptr2[0] || ptr1[1] != ptr2[1]) continue; - - curr_length = FindMatchLength(argb + pos, argb_start, max_len); - if (curr_length < best_length) continue; - - distance = (uint32_t)(base_position - pos); - val = curr_length << 16; - // Favoring 2d locality here gives savings for certain images. - if (distance < 9 * xsize) { - const uint32_t y = distance / xsize; - uint32_t x = distance % xsize; - if (x > (xsize >> 1)) { - x = xsize - x; - } - if (x <= 7) { - val += 9 * 9 + 9 * 9; - val -= y * y + x * x; - } - } - if (best_val < val) { - best_val = val; + + curr_length = FindMatchLength(argb + pos, argb_start, best_length, max_len); + if (best_length < curr_length) { + distance = base_position - pos; best_length = curr_length; best_distance = distance; - if (curr_length >= (uint32_t)max_len) { - break; - } - if ((best_distance == 1 || distance == xsize) && - best_length >= 128) { + if (curr_length >= length_max) { break; } } } - *distance_ptr = (int)best_distance; + *distance_ptr = best_distance; *length_ptr = best_length; return (best_length >= MIN_LENGTH); } -static WEBP_INLINE void PushBackCopy(VP8LBackwardRefs* const refs, int length) { - while (length >= MAX_LENGTH) { - BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, MAX_LENGTH)); - length -= MAX_LENGTH; - } - if (length > 0) { - BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, length)); +static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache, + VP8LColorCache* const hashers, + VP8LBackwardRefs* const refs) { + PixOrCopy v; + if (use_color_cache) { + const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel); + if (VP8LColorCacheLookup(hashers, key) == pixel) { + v = PixOrCopyCreateCacheIdx(key); + } else { + v = PixOrCopyCreateLiteral(pixel); + VP8LColorCacheSet(hashers, key, pixel); + } + } else { + v = PixOrCopyCreateLiteral(pixel); } + BackwardRefsCursorAdd(refs, v); } static int BackwardReferencesRle(int xsize, int ysize, const uint32_t* const argb, - VP8LBackwardRefs* const refs) { + int cache_bits, VP8LBackwardRefs* const refs) { const int pix_count = xsize * ysize; - int match_len = 0; - int i; + int i, k; + const int use_color_cache = (cache_bits > 0); + VP8LColorCache hashers; + + if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) { + return 0; + } ClearBackwardRefs(refs); - PushBackCopy(refs, match_len); // i=0 case - BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[0])); - for (i = 1; i < pix_count; ++i) { - if (argb[i] == argb[i - 1]) { - ++match_len; + // Add first pixel as literal. + AddSingleLiteral(argb[0], use_color_cache, &hashers, refs); + i = 1; + while (i < pix_count) { + const int max_len = MaxFindCopyLength(pix_count - i); + const int kMinLength = 4; + const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len); + const int prev_row_len = (i < xsize) ? 0 : + FindMatchLength(argb + i, argb + i - xsize, 0, max_len); + if (rle_len >= prev_row_len && rle_len >= kMinLength) { + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len)); + // We don't need to update the color cache here since it is always the + // same pixel being copied, and that does not change the color cache + // state. + i += rle_len; + } else if (prev_row_len >= kMinLength) { + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len)); + if (use_color_cache) { + for (k = 0; k < prev_row_len; ++k) { + VP8LColorCacheInsert(&hashers, argb[i + k]); + } + } + i += prev_row_len; } else { - PushBackCopy(refs, match_len); - match_len = 0; - BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[i])); + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); + i++; } } - PushBackCopy(refs, match_len); + if (use_color_cache) VP8LColorCacheClear(&hashers); return !refs->error_; } -static int BackwardReferencesHashChain(int xsize, int ysize, - const uint32_t* const argb, - int cache_bits, int quality, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs) { +static int BackwardReferencesLz77(int xsize, int ysize, + const uint32_t* const argb, int cache_bits, + int quality, int low_effort, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs) { int i; int ok = 0; int cc_init = 0; const int use_color_cache = (cache_bits > 0); const int pix_count = xsize * ysize; VP8LColorCache hashers; - int window_size = WINDOW_SIZE; - int iter_pos = 1; - int iter_limit = -1; + int iter_max = GetMaxItersForQuality(quality, low_effort); + const int window_size = GetWindowSizeForHashChain(quality, xsize); + int min_matches = 32; if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } - ClearBackwardRefs(refs); - GetParamsForHashChainFindCopy(quality, xsize, cache_bits, - &window_size, &iter_pos, &iter_limit); - HashChainInit(hash_chain); - for (i = 0; i < pix_count; ) { + HashChainReset(hash_chain); + for (i = 0; i < pix_count - 2; ) { // Alternative#1: Code the pixels starting at 'i' using backward reference. int offset = 0; int len = 0; - if (i < pix_count - 1) { // FindCopy(i,..) reads pixels at [i] and [i + 1]. - int max_len = pix_count - i; - HashChainFindCopy(hash_chain, i, xsize, argb, max_len, - window_size, iter_pos, iter_limit, - &offset, &len); - } - if (len >= MIN_LENGTH) { - // Alternative#2: Insert the pixel at 'i' as literal, and code the - // pixels starting at 'i + 1' using backward reference. + const int max_len = MaxFindCopyLength(pix_count - i); + HashChainFindCopy(hash_chain, i, argb, max_len, window_size, + iter_max, &offset, &len); + if (len > MIN_LENGTH || (len == MIN_LENGTH && offset <= 512)) { int offset2 = 0; int len2 = 0; int k; + min_matches = 8; HashChainInsert(hash_chain, &argb[i], i); - if (i < pix_count - 2) { // FindCopy(i+1,..) reads [i + 1] and [i + 2]. - int max_len = pix_count - (i + 1); - HashChainFindCopy(hash_chain, i + 1, xsize, argb, max_len, - window_size, iter_pos, iter_limit, - &offset2, &len2); + if ((len < (max_len >> 2)) && !low_effort) { + // Evaluate Alternative#2: Insert the pixel at 'i' as literal, and code + // the pixels starting at 'i + 1' using backward reference. + HashChainFindCopy(hash_chain, i + 1, argb, max_len - 1, + window_size, iter_max, &offset2, + &len2); if (len2 > len + 1) { - const uint32_t pixel = argb[i]; - // Alternative#2 is a better match. So push pixel at 'i' as literal. - PixOrCopy v; - if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) { - const int ix = VP8LColorCacheGetIndex(&hashers, pixel); - v = PixOrCopyCreateCacheIdx(ix); - } else { - if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel); - v = PixOrCopyCreateLiteral(pixel); - } - BackwardRefsCursorAdd(refs, v); + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); i++; // Backward reference to be done for next pixel. len = len2; offset = offset2; } } - if (len >= MAX_LENGTH) { - len = MAX_LENGTH - 1; - } BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); if (use_color_cache) { for (k = 0; k < len; ++k) { @@ -419,33 +436,36 @@ static int BackwardReferencesHashChain(int xsize, int ysize, } } // Add to the hash_chain (but cannot add the last pixel). - { + if (offset >= 3 && offset != xsize) { const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i; - for (k = 1; k < last; ++k) { + for (k = 2; k < last - 8; k += 2) { + HashChainInsert(hash_chain, &argb[i + k], i + k); + } + for (; k < last; ++k) { HashChainInsert(hash_chain, &argb[i + k], i + k); } } i += len; } else { - const uint32_t pixel = argb[i]; - PixOrCopy v; - if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) { - // push pixel as a PixOrCopyCreateCacheIdx pixel - const int ix = VP8LColorCacheGetIndex(&hashers, pixel); - v = PixOrCopyCreateCacheIdx(ix); - } else { - if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel); - v = PixOrCopyCreateLiteral(pixel); - } - BackwardRefsCursorAdd(refs, v); - if (i + 1 < pix_count) { + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); + HashChainInsert(hash_chain, &argb[i], i); + ++i; + --min_matches; + if (min_matches <= 0) { + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); HashChainInsert(hash_chain, &argb[i], i); + ++i; } - ++i; } } + while (i < pix_count) { + // Handle the last pixel(s). + AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); + ++i; + } + ok = !refs->error_; -Error: + Error: if (cc_init) VP8LColorCacheClear(&hashers); return ok; } @@ -455,15 +475,14 @@ Error: typedef struct { double alpha_[VALUES_IN_BYTE]; double red_[VALUES_IN_BYTE]; - double literal_[PIX_OR_COPY_CODES_MAX]; double blue_[VALUES_IN_BYTE]; double distance_[NUM_DISTANCE_CODES]; + double* literal_; } CostModel; static int BackwardReferencesTraceBackwards( - int xsize, int ysize, int recursive_cost_model, - const uint32_t* const argb, int quality, int cache_bits, - VP8LHashChain* const hash_chain, + int xsize, int ysize, const uint32_t* const argb, int quality, + int cache_bits, VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs); static void ConvertPopulationCountTableToBitEstimates( @@ -487,28 +506,10 @@ static void ConvertPopulationCountTableToBitEstimates( } } -static int CostModelBuild(CostModel* const m, int xsize, int ysize, - int recursion_level, const uint32_t* const argb, - int quality, int cache_bits, - VP8LHashChain* const hash_chain, +static int CostModelBuild(CostModel* const m, int cache_bits, VP8LBackwardRefs* const refs) { int ok = 0; - VP8LHistogram* histo = NULL; - - ClearBackwardRefs(refs); - if (recursion_level > 0) { - if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1, - argb, quality, cache_bits, hash_chain, - refs)) { - goto Error; - } - } else { - if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality, - hash_chain, refs)) { - goto Error; - } - } - histo = VP8LAllocateHistogram(cache_bits); + VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits); if (histo == NULL) goto Error; VP8LHistogramCreate(histo, refs, cache_bits); @@ -557,10 +558,35 @@ static WEBP_INLINE double GetDistanceCost(const CostModel* const m, return m->distance_[code] + extra_bits; } +static void AddSingleLiteralWithCostModel( + const uint32_t* const argb, VP8LHashChain* const hash_chain, + VP8LColorCache* const hashers, const CostModel* const cost_model, int idx, + int is_last, int use_color_cache, double prev_cost, float* const cost, + uint16_t* const dist_array) { + double cost_val = prev_cost; + const uint32_t color = argb[0]; + if (!is_last) { + HashChainInsert(hash_chain, argb, idx); + } + if (use_color_cache && VP8LColorCacheContains(hashers, color)) { + const double mul0 = 0.68; + const int ix = VP8LColorCacheGetIndex(hashers, color); + cost_val += GetCacheCost(cost_model, ix) * mul0; + } else { + const double mul1 = 0.82; + if (use_color_cache) VP8LColorCacheInsert(hashers, color); + cost_val += GetLiteralCost(cost_model, color) * mul1; + } + if (cost[idx] > cost_val) { + cost[idx] = (float)cost_val; + dist_array[idx] = 1; // only one is inserted. + } +} + static int BackwardReferencesHashChainDistanceOnly( - int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb, + int xsize, int ysize, const uint32_t* const argb, int quality, int cache_bits, VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs, uint32_t* const dist_array) { + VP8LBackwardRefs* const refs, uint16_t* const dist_array) { int i; int ok = 0; int cc_init = 0; @@ -568,24 +594,27 @@ static int BackwardReferencesHashChainDistanceOnly( const int use_color_cache = (cache_bits > 0); float* const cost = (float*)WebPSafeMalloc(pix_count, sizeof(*cost)); - CostModel* cost_model = (CostModel*)WebPSafeMalloc(1ULL, sizeof(*cost_model)); + const size_t literal_array_size = sizeof(double) * + (NUM_LITERAL_CODES + NUM_LENGTH_CODES + + ((cache_bits > 0) ? (1 << cache_bits) : 0)); + const size_t cost_model_size = sizeof(CostModel) + literal_array_size; + CostModel* const cost_model = + (CostModel*)WebPSafeMalloc(1ULL, cost_model_size); VP8LColorCache hashers; - const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68; - const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82; - const int min_distance_code = 2; // TODO(vikasa): tune as function of quality - int window_size = WINDOW_SIZE; - int iter_pos = 1; - int iter_limit = -1; + const int skip_length = 32 + quality; + const int skip_min_distance_code = 2; + int iter_max = GetMaxItersForQuality(quality, 0); + const int window_size = GetWindowSizeForHashChain(quality, xsize); if (cost == NULL || cost_model == NULL) goto Error; + cost_model->literal_ = (double*)(cost_model + 1); if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } - if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb, - quality, cache_bits, hash_chain, refs)) { + if (!CostModelBuild(cost_model, cache_bits, refs)) { goto Error; } @@ -594,85 +623,80 @@ static int BackwardReferencesHashChainDistanceOnly( // We loop one pixel at a time, but store all currently best points to // non-processed locations from this point. dist_array[0] = 0; - GetParamsForHashChainFindCopy(quality, xsize, cache_bits, - &window_size, &iter_pos, &iter_limit); - HashChainInit(hash_chain); - for (i = 0; i < pix_count; ++i) { - double prev_cost = 0.0; - int shortmax; - if (i > 0) { - prev_cost = cost[i - 1]; - } - for (shortmax = 0; shortmax < 2; ++shortmax) { - int offset = 0; - int len = 0; - if (i < pix_count - 1) { // FindCopy reads pixels at [i] and [i + 1]. - int max_len = shortmax ? 2 : pix_count - i; - HashChainFindCopy(hash_chain, i, xsize, argb, max_len, - window_size, iter_pos, iter_limit, - &offset, &len); + HashChainReset(hash_chain); + // Add first pixel as literal. + AddSingleLiteralWithCostModel(argb + 0, hash_chain, &hashers, cost_model, 0, + 0, use_color_cache, 0.0, cost, dist_array); + for (i = 1; i < pix_count - 1; ++i) { + int offset = 0; + int len = 0; + double prev_cost = cost[i - 1]; + const int max_len = MaxFindCopyLength(pix_count - i); + HashChainFindCopy(hash_chain, i, argb, max_len, window_size, + iter_max, &offset, &len); + if (len >= MIN_LENGTH) { + const int code = DistanceToPlaneCode(xsize, offset); + const double distance_cost = + prev_cost + GetDistanceCost(cost_model, code); + int k; + for (k = 1; k < len; ++k) { + const double cost_val = distance_cost + GetLengthCost(cost_model, k); + if (cost[i + k] > cost_val) { + cost[i + k] = (float)cost_val; + dist_array[i + k] = k + 1; + } } - if (len >= MIN_LENGTH) { - const int code = DistanceToPlaneCode(xsize, offset); - const double distance_cost = - prev_cost + GetDistanceCost(cost_model, code); - int k; - for (k = 1; k < len; ++k) { - const double cost_val = distance_cost + GetLengthCost(cost_model, k); - if (cost[i + k] > cost_val) { - cost[i + k] = (float)cost_val; - dist_array[i + k] = k + 1; + // This if is for speedup only. It roughly doubles the speed, and + // makes compression worse by .1 %. + if (len >= skip_length && code <= skip_min_distance_code) { + // Long copy for short distances, let's skip the middle + // lookups for better copies. + // 1) insert the hashes. + if (use_color_cache) { + for (k = 0; k < len; ++k) { + VP8LColorCacheInsert(&hashers, argb[i + k]); } } - // This if is for speedup only. It roughly doubles the speed, and - // makes compression worse by .1 %. - if (len >= 128 && code <= min_distance_code) { - // Long copy for short distances, let's skip the middle - // lookups for better copies. - // 1) insert the hashes. - if (use_color_cache) { - for (k = 0; k < len; ++k) { - VP8LColorCacheInsert(&hashers, argb[i + k]); - } - } - // 2) Add to the hash_chain (but cannot add the last pixel) - { - const int last = (len + i < pix_count - 1) ? len + i - : pix_count - 1; - for (k = i; k < last; ++k) { - HashChainInsert(hash_chain, &argb[k], k); - } + // 2) Add to the hash_chain (but cannot add the last pixel) + { + const int last = (len + i < pix_count - 1) ? len + i + : pix_count - 1; + for (k = i; k < last; ++k) { + HashChainInsert(hash_chain, &argb[k], k); } - // 3) jump. - i += len - 1; // for loop does ++i, thus -1 here. - goto next_symbol; } + // 3) jump. + i += len - 1; // for loop does ++i, thus -1 here. + goto next_symbol; } - } - if (i < pix_count - 1) { - HashChainInsert(hash_chain, &argb[i], i); - } - { - // inserting a literal pixel - double cost_val = prev_cost; - if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) { - const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]); - cost_val += GetCacheCost(cost_model, ix) * mul0; - } else { - if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]); - cost_val += GetLiteralCost(cost_model, argb[i]) * mul1; - } - if (cost[i] > cost_val) { - cost[i] = (float)cost_val; - dist_array[i] = 1; // only one is inserted. + if (len != MIN_LENGTH) { + int code_min_length; + double cost_total; + HashChainFindOffset(hash_chain, i, argb, MIN_LENGTH, window_size, + &offset); + code_min_length = DistanceToPlaneCode(xsize, offset); + cost_total = prev_cost + + GetDistanceCost(cost_model, code_min_length) + + GetLengthCost(cost_model, 1); + if (cost[i + 1] > cost_total) { + cost[i + 1] = (float)cost_total; + dist_array[i + 1] = 2; + } } } + AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i, + 0, use_color_cache, prev_cost, cost, + dist_array); next_symbol: ; } - // Last pixel still to do, it can only be a single step if not reached - // through cheaper means already. + // Handle the last pixel. + if (i == (pix_count - 1)) { + AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i, + 1, use_color_cache, cost[pix_count - 2], cost, + dist_array); + } ok = !refs->error_; -Error: + Error: if (cc_init) VP8LColorCacheClear(&hashers); WebPSafeFree(cost_model); WebPSafeFree(cost); @@ -682,12 +706,12 @@ Error: // We pack the path at the end of *dist_array and return // a pointer to this part of the array. Example: // dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232] -static void TraceBackwards(uint32_t* const dist_array, +static void TraceBackwards(uint16_t* const dist_array, int dist_array_size, - uint32_t** const chosen_path, + uint16_t** const chosen_path, int* const chosen_path_size) { - uint32_t* path = dist_array + dist_array_size; - uint32_t* cur = dist_array + dist_array_size - 1; + uint16_t* path = dist_array + dist_array_size; + uint16_t* cur = dist_array + dist_array_size - 1; while (cur >= dist_array) { const int k = *cur; --path; @@ -701,20 +725,16 @@ static void TraceBackwards(uint32_t* const dist_array, static int BackwardReferencesHashChainFollowChosenPath( int xsize, int ysize, const uint32_t* const argb, int quality, int cache_bits, - const uint32_t* const chosen_path, int chosen_path_size, + const uint16_t* const chosen_path, int chosen_path_size, VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { const int pix_count = xsize * ysize; const int use_color_cache = (cache_bits > 0); - int size = 0; - int i = 0; - int k; int ix; + int i = 0; int ok = 0; int cc_init = 0; - int window_size = WINDOW_SIZE; - int iter_pos = 1; - int iter_limit = -1; + const int window_size = GetWindowSizeForHashChain(quality, xsize); VP8LColorCache hashers; if (use_color_cache) { @@ -723,18 +743,13 @@ static int BackwardReferencesHashChainFollowChosenPath( } ClearBackwardRefs(refs); - GetParamsForHashChainFindCopy(quality, xsize, cache_bits, - &window_size, &iter_pos, &iter_limit); - HashChainInit(hash_chain); - for (ix = 0; ix < chosen_path_size; ++ix, ++size) { + HashChainReset(hash_chain); + for (ix = 0; ix < chosen_path_size; ++ix) { int offset = 0; - int len = 0; - int max_len = chosen_path[ix]; - if (max_len != 1) { - HashChainFindCopy(hash_chain, i, xsize, argb, max_len, - window_size, iter_pos, iter_limit, - &offset, &len); - assert(len == max_len); + const int len = chosen_path[ix]; + if (len != 1) { + int k; + HashChainFindOffset(hash_chain, i, argb, len, window_size, &offset); BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); if (use_color_cache) { for (k = 0; k < len; ++k) { @@ -766,29 +781,28 @@ static int BackwardReferencesHashChainFollowChosenPath( } } ok = !refs->error_; -Error: + Error: if (cc_init) VP8LColorCacheClear(&hashers); return ok; } // Returns 1 on success. static int BackwardReferencesTraceBackwards(int xsize, int ysize, - int recursive_cost_model, const uint32_t* const argb, int quality, int cache_bits, VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { int ok = 0; const int dist_array_size = xsize * ysize; - uint32_t* chosen_path = NULL; + uint16_t* chosen_path = NULL; int chosen_path_size = 0; - uint32_t* dist_array = - (uint32_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array)); + uint16_t* dist_array = + (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array)); if (dist_array == NULL) goto Error; if (!BackwardReferencesHashChainDistanceOnly( - xsize, ysize, recursive_cost_model, argb, quality, cache_bits, hash_chain, + xsize, ysize, argb, quality, cache_bits, hash_chain, refs, dist_array)) { goto Error; } @@ -817,72 +831,10 @@ static void BackwardReferences2DLocality(int xsize, } } -VP8LBackwardRefs* VP8LGetBackwardReferences( - int width, int height, const uint32_t* const argb, int quality, - int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain, - VP8LBackwardRefs refs_array[2]) { - int lz77_is_useful; - const int num_pix = width * height; - VP8LBackwardRefs* best = NULL; - VP8LBackwardRefs* const refs_lz77 = &refs_array[0]; - VP8LBackwardRefs* const refs_rle = &refs_array[1]; - - if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality, - hash_chain, refs_lz77)) { - return NULL; - } - if (!BackwardReferencesRle(width, height, argb, refs_rle)) { - return NULL; - } - - { - double bit_cost_lz77, bit_cost_rle; - VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits); - if (histo == NULL) return NULL; - // Evaluate LZ77 coding. - VP8LHistogramCreate(histo, refs_lz77, cache_bits); - bit_cost_lz77 = VP8LHistogramEstimateBits(histo); - // Evaluate RLE coding. - VP8LHistogramCreate(histo, refs_rle, cache_bits); - bit_cost_rle = VP8LHistogramEstimateBits(histo); - // Decide if LZ77 is useful. - lz77_is_useful = (bit_cost_lz77 < bit_cost_rle); - VP8LFreeHistogram(histo); - } - - // Choose appropriate backward reference. - if (lz77_is_useful) { - // TraceBackwards is costly. Don't execute it at lower quality. - const int try_lz77_trace_backwards = (quality >= 25); - best = refs_lz77; // default guess: lz77 is better - if (try_lz77_trace_backwards) { - // Set recursion level for large images using a color cache. - const int recursion_level = - (num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0; - VP8LBackwardRefs* const refs_trace = &refs_array[1]; - ClearBackwardRefs(refs_trace); - if (BackwardReferencesTraceBackwards(width, height, recursion_level, argb, - quality, cache_bits, hash_chain, - refs_trace)) { - best = refs_trace; - } - } - } else { - best = refs_rle; - } - - if (use_2d_locality) BackwardReferences2DLocality(width, best); - - return best; -} - // Returns entropy for the given cache bits. -static double ComputeCacheEntropy(const uint32_t* const argb, - int xsize, int ysize, +static double ComputeCacheEntropy(const uint32_t* argb, const VP8LBackwardRefs* const refs, int cache_bits) { - int pixel_index = 0; - uint32_t k; const int use_color_cache = (cache_bits > 0); int cc_init = 0; double entropy = MAX_ENTROPY; @@ -896,33 +848,40 @@ static double ComputeCacheEntropy(const uint32_t* const argb, cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } - - while (VP8LRefsCursorOk(&c)) { - const PixOrCopy* const v = c.cur_pos; - if (PixOrCopyIsLiteral(v)) { - if (use_color_cache && - VP8LColorCacheContains(&hashers, argb[pixel_index])) { - // push pixel as a cache index - const int ix = VP8LColorCacheGetIndex(&hashers, argb[pixel_index]); - const PixOrCopy token = PixOrCopyCreateCacheIdx(ix); - VP8LHistogramAddSinglePixOrCopy(histo, &token); - } else { - VP8LHistogramAddSinglePixOrCopy(histo, v); - } - } else { - VP8LHistogramAddSinglePixOrCopy(histo, v); + if (!use_color_cache) { + while (VP8LRefsCursorOk(&c)) { + VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos); + VP8LRefsCursorNext(&c); } - if (use_color_cache) { - for (k = 0; k < PixOrCopyLength(v); ++k) { - VP8LColorCacheInsert(&hashers, argb[pixel_index + k]); + } else { + while (VP8LRefsCursorOk(&c)) { + const PixOrCopy* const v = c.cur_pos; + if (PixOrCopyIsLiteral(v)) { + const uint32_t pix = *argb++; + const uint32_t key = VP8LColorCacheGetIndex(&hashers, pix); + if (VP8LColorCacheLookup(&hashers, key) == pix) { + ++histo->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key]; + } else { + VP8LColorCacheSet(&hashers, key, pix); + ++histo->blue_[pix & 0xff]; + ++histo->literal_[(pix >> 8) & 0xff]; + ++histo->red_[(pix >> 16) & 0xff]; + ++histo->alpha_[pix >> 24]; + } + } else { + int len = PixOrCopyLength(v); + int code, extra_bits; + VP8LPrefixEncodeBits(len, &code, &extra_bits); + ++histo->literal_[NUM_LITERAL_CODES + code]; + VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits); + ++histo->distance_[code]; + do { + VP8LColorCacheInsert(&hashers, *argb++); + } while(--len != 0); } + VP8LRefsCursorNext(&c); } - pixel_index += PixOrCopyLength(v); - VP8LRefsCursorNext(&c); } - assert(pixel_index == xsize * ysize); - (void)xsize; // xsize is not used in non-debug compilations otherwise. - (void)ysize; // ysize is not used in non-debug compilations otherwise. entropy = VP8LHistogramEstimateBits(histo) + kSmallPenaltyForLargeCache * cache_bits; Error: @@ -931,45 +890,204 @@ static double ComputeCacheEntropy(const uint32_t* const argb, return entropy; } -// *best_cache_bits will contain how many bits are to be used for a color cache. +// Evaluate optimal cache bits for the local color cache. +// The input *best_cache_bits sets the maximum cache bits to use (passing 0 +// implies disabling the local color cache). The local color cache is also +// disabled for the lower (<= 25) quality. // Returns 0 in case of memory error. -int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb, - int xsize, int ysize, int quality, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs, - int* const best_cache_bits) { +static int CalculateBestCacheSize(const uint32_t* const argb, + int xsize, int ysize, int quality, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs, + int* const lz77_computed, + int* const best_cache_bits) { int eval_low = 1; int eval_high = 1; double entropy_low = MAX_ENTROPY; double entropy_high = MAX_ENTROPY; + const double cost_mul = 5e-4; int cache_bits_low = 0; - int cache_bits_high = MAX_COLOR_CACHE_BITS; + int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits; - if (!BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, hash_chain, - refs)) { + assert(cache_bits_high <= MAX_COLOR_CACHE_BITS); + + *lz77_computed = 0; + if (cache_bits_high == 0) { + *best_cache_bits = 0; + // Local color cache is disabled. + return 1; + } + if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, quality, 0, + hash_chain, refs)) { return 0; } // Do a binary search to find the optimal entropy for cache_bits. - while (cache_bits_high - cache_bits_low > 1) { + while (eval_low || eval_high) { if (eval_low) { - entropy_low = - ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_low); + entropy_low = ComputeCacheEntropy(argb, refs, cache_bits_low); + entropy_low += entropy_low * cache_bits_low * cost_mul; eval_low = 0; } if (eval_high) { - entropy_high = - ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_high); + entropy_high = ComputeCacheEntropy(argb, refs, cache_bits_high); + entropy_high += entropy_high * cache_bits_high * cost_mul; eval_high = 0; } if (entropy_high < entropy_low) { + const int prev_cache_bits_low = cache_bits_low; *best_cache_bits = cache_bits_high; cache_bits_low = (cache_bits_low + cache_bits_high) / 2; - eval_low = 1; + if (cache_bits_low != prev_cache_bits_low) eval_low = 1; } else { *best_cache_bits = cache_bits_low; cache_bits_high = (cache_bits_low + cache_bits_high) / 2; - eval_high = 1; + if (cache_bits_high != cache_bits_low) eval_high = 1; } } + *lz77_computed = 1; return 1; } + +// Update (in-place) backward references for specified cache_bits. +static int BackwardRefsWithLocalCache(const uint32_t* const argb, + int cache_bits, + VP8LBackwardRefs* const refs) { + int pixel_index = 0; + VP8LColorCache hashers; + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0; + + while (VP8LRefsCursorOk(&c)) { + PixOrCopy* const v = c.cur_pos; + if (PixOrCopyIsLiteral(v)) { + const uint32_t argb_literal = v->argb_or_distance; + if (VP8LColorCacheContains(&hashers, argb_literal)) { + const int ix = VP8LColorCacheGetIndex(&hashers, argb_literal); + *v = PixOrCopyCreateCacheIdx(ix); + } else { + VP8LColorCacheInsert(&hashers, argb_literal); + } + ++pixel_index; + } else { + // refs was created without local cache, so it can not have cache indexes. + int k; + assert(PixOrCopyIsCopy(v)); + for (k = 0; k < v->len; ++k) { + VP8LColorCacheInsert(&hashers, argb[pixel_index++]); + } + } + VP8LRefsCursorNext(&c); + } + VP8LColorCacheClear(&hashers); + return 1; +} + +static VP8LBackwardRefs* GetBackwardReferencesLowEffort( + int width, int height, const uint32_t* const argb, int quality, + int* const cache_bits, VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2]) { + VP8LBackwardRefs* refs_lz77 = &refs_array[0]; + *cache_bits = 0; + if (!BackwardReferencesLz77(width, height, argb, 0, quality, + 1 /* Low effort. */, hash_chain, refs_lz77)) { + return NULL; + } + BackwardReferences2DLocality(width, refs_lz77); + return refs_lz77; +} + +static VP8LBackwardRefs* GetBackwardReferences( + int width, int height, const uint32_t* const argb, int quality, + int* const cache_bits, VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2]) { + int lz77_is_useful; + int lz77_computed; + double bit_cost_lz77, bit_cost_rle; + VP8LBackwardRefs* best = NULL; + VP8LBackwardRefs* refs_lz77 = &refs_array[0]; + VP8LBackwardRefs* refs_rle = &refs_array[1]; + VP8LHistogram* histo = NULL; + + if (!CalculateBestCacheSize(argb, width, height, quality, hash_chain, + refs_lz77, &lz77_computed, cache_bits)) { + goto Error; + } + + if (lz77_computed) { + // Transform refs_lz77 for the optimized cache_bits. + if (*cache_bits > 0) { + if (!BackwardRefsWithLocalCache(argb, *cache_bits, refs_lz77)) { + goto Error; + } + } + } else { + if (!BackwardReferencesLz77(width, height, argb, *cache_bits, quality, + 0 /* Low effort. */, hash_chain, refs_lz77)) { + goto Error; + } + } + + if (!BackwardReferencesRle(width, height, argb, *cache_bits, refs_rle)) { + goto Error; + } + + histo = VP8LAllocateHistogram(*cache_bits); + if (histo == NULL) goto Error; + + { + // Evaluate LZ77 coding. + VP8LHistogramCreate(histo, refs_lz77, *cache_bits); + bit_cost_lz77 = VP8LHistogramEstimateBits(histo); + // Evaluate RLE coding. + VP8LHistogramCreate(histo, refs_rle, *cache_bits); + bit_cost_rle = VP8LHistogramEstimateBits(histo); + // Decide if LZ77 is useful. + lz77_is_useful = (bit_cost_lz77 < bit_cost_rle); + } + + // Choose appropriate backward reference. + if (lz77_is_useful) { + // TraceBackwards is costly. Don't execute it at lower quality. + const int try_lz77_trace_backwards = (quality >= 25); + best = refs_lz77; // default guess: lz77 is better + if (try_lz77_trace_backwards) { + VP8LBackwardRefs* const refs_trace = refs_rle; + if (!VP8LBackwardRefsCopy(refs_lz77, refs_trace)) { + best = NULL; + goto Error; + } + if (BackwardReferencesTraceBackwards(width, height, argb, quality, + *cache_bits, hash_chain, + refs_trace)) { + double bit_cost_trace; + // Evaluate LZ77 coding. + VP8LHistogramCreate(histo, refs_trace, *cache_bits); + bit_cost_trace = VP8LHistogramEstimateBits(histo); + if (bit_cost_trace < bit_cost_lz77) { + best = refs_trace; + } + } + } + } else { + best = refs_rle; + } + + BackwardReferences2DLocality(width, best); + + Error: + VP8LFreeHistogram(histo); + return best; +} + +VP8LBackwardRefs* VP8LGetBackwardReferences( + int width, int height, const uint32_t* const argb, int quality, + int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2]) { + if (low_effort) { + return GetBackwardReferencesLowEffort(width, height, argb, quality, + cache_bits, hash_chain, refs_array); + } else { + return GetBackwardReferences(width, height, argb, quality, cache_bits, + hash_chain, refs_array); + } +} diff --git a/src/3rdparty/libwebp/src/enc/backward_references.h b/src/3rdparty/libwebp/src/enc/backward_references.h index c2c81c5..daa084d 100644 --- a/src/3rdparty/libwebp/src/enc/backward_references.h +++ b/src/3rdparty/libwebp/src/enc/backward_references.h @@ -22,13 +22,8 @@ extern "C" { #endif -// The spec allows 11, we use 9 bits to reduce memory consumption in encoding. -// Having 9 instead of 11 only removes about 0.25 % of compression density. -#define MAX_COLOR_CACHE_BITS 9 - -// Max ever number of codes we'll use: -#define PIX_OR_COPY_CODES_MAX \ - (NUM_LITERAL_CODES + NUM_LENGTH_CODES + (1 << MAX_COLOR_CACHE_BITS)) +// The maximum allowed limit is 11. +#define MAX_COLOR_CACHE_BITS 10 // ----------------------------------------------------------------------------- // PixOrCopy @@ -190,21 +185,16 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) { // Main entry points // Evaluates best possible backward references for specified quality. -// Further optimize for 2D locality if use_2d_locality flag is set. +// The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache +// bits to use (passing 0 implies disabling the local color cache). +// The optimal cache bits is evaluated and set for the *cache_bits parameter. // The return value is the pointer to the best of the two backward refs viz, // refs[0] or refs[1]. VP8LBackwardRefs* VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain, + int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain, VP8LBackwardRefs refs[2]); -// Produce an estimate for a good color cache size for the image. -int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb, - int xsize, int ysize, int quality, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const ref, - int* const best_cache_bits); - #ifdef __cplusplus } #endif diff --git a/src/3rdparty/libwebp/src/enc/config.c b/src/3rdparty/libwebp/src/enc/config.c index 53a3bb2..f9f7961 100644 --- a/src/3rdparty/libwebp/src/enc/config.c +++ b/src/3rdparty/libwebp/src/enc/config.c @@ -43,10 +43,15 @@ int WebPConfigInitInternal(WebPConfig* config, config->alpha_filtering = 1; config->alpha_quality = 100; config->lossless = 0; + config->exact = 0; config->image_hint = WEBP_HINT_DEFAULT; config->emulate_jpeg_size = 0; config->thread_level = 0; config->low_memory = 0; + config->near_lossless = 100; +#ifdef WEBP_EXPERIMENTAL_FEATURES + config->delta_palettization = 0; +#endif // WEBP_EXPERIMENTAL_FEATURES // TODO(skal): tune. switch (preset) { @@ -111,11 +116,7 @@ int WebPValidateConfig(const WebPConfig* config) { return 0; if (config->show_compressed < 0 || config->show_compressed > 1) return 0; -#if WEBP_ENCODER_ABI_VERSION > 0x0204 if (config->preprocessing < 0 || config->preprocessing > 7) -#else - if (config->preprocessing < 0 || config->preprocessing > 3) -#endif return 0; if (config->partitions < 0 || config->partitions > 3) return 0; @@ -129,6 +130,8 @@ int WebPValidateConfig(const WebPConfig* config) { return 0; if (config->lossless < 0 || config->lossless > 1) return 0; + if (config->near_lossless < 0 || config->near_lossless > 100) + return 0; if (config->image_hint >= WEBP_HINT_LAST) return 0; if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1) @@ -137,12 +140,17 @@ int WebPValidateConfig(const WebPConfig* config) { return 0; if (config->low_memory < 0 || config->low_memory > 1) return 0; + if (config->exact < 0 || config->exact > 1) + return 0; +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (config->delta_palettization < 0 || config->delta_palettization > 1) + return 0; +#endif // WEBP_EXPERIMENTAL_FEATURES return 1; } //------------------------------------------------------------------------------ -#if WEBP_ENCODER_ABI_VERSION > 0x0202 #define MAX_LEVEL 9 // Mapping between -z level and -m / -q parameter settings. @@ -161,6 +169,5 @@ int WebPConfigLosslessPreset(WebPConfig* config, int level) { config->quality = kLosslessPresets[level].quality_; return 1; } -#endif //------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/cost.c b/src/3rdparty/libwebp/src/enc/cost.c index 9d2cc01..ae7fe01 100644 --- a/src/3rdparty/libwebp/src/enc/cost.c +++ b/src/3rdparty/libwebp/src/enc/cost.c @@ -14,38 +14,6 @@ #include "./cost.h" //------------------------------------------------------------------------------ -// Boolean-cost cost table - -const uint16_t VP8EntropyCost[256] = { - 1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216, - 1178, 1152, 1110, 1076, 1061, 1024, 1024, 992, 968, 951, - 939, 911, 896, 878, 871, 854, 838, 820, 811, 794, - 786, 768, 768, 752, 740, 732, 720, 709, 704, 690, - 683, 672, 666, 655, 647, 640, 631, 622, 615, 607, - 598, 592, 586, 576, 572, 564, 559, 555, 547, 541, - 534, 528, 522, 512, 512, 504, 500, 494, 488, 483, - 477, 473, 467, 461, 458, 452, 448, 443, 438, 434, - 427, 424, 419, 415, 410, 406, 403, 399, 394, 390, - 384, 384, 377, 374, 370, 366, 362, 359, 355, 351, - 347, 342, 342, 336, 333, 330, 326, 323, 320, 316, - 312, 308, 305, 302, 299, 296, 293, 288, 287, 283, - 280, 277, 274, 272, 268, 266, 262, 256, 256, 256, - 251, 248, 245, 242, 240, 237, 234, 232, 228, 226, - 223, 221, 218, 216, 214, 211, 208, 205, 203, 201, - 198, 196, 192, 191, 188, 187, 183, 181, 179, 176, - 175, 171, 171, 168, 165, 163, 160, 159, 156, 154, - 152, 150, 148, 146, 144, 142, 139, 138, 135, 133, - 131, 128, 128, 125, 123, 121, 119, 117, 115, 113, - 111, 110, 107, 105, 103, 102, 100, 98, 96, 94, - 92, 91, 89, 86, 86, 83, 82, 80, 77, 76, - 74, 73, 71, 69, 67, 66, 64, 63, 61, 59, - 57, 55, 54, 52, 51, 49, 47, 46, 44, 43, - 41, 40, 38, 36, 35, 33, 32, 30, 29, 27, - 25, 24, 22, 21, 19, 18, 16, 15, 13, 12, - 10, 9, 7, 6, 4, 3 -}; - -//------------------------------------------------------------------------------ // Level cost tables // For each given level, the following table gives the pattern of contexts to @@ -71,267 +39,6 @@ const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = { {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x153} }; -// fixed costs for coding levels, deduce from the coding tree. -// This is only the part that doesn't depend on the probability state. -const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = { - 0, 256, 256, 256, 256, 432, 618, 630, - 731, 640, 640, 828, 901, 948, 1021, 1101, - 1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202, - 1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497, - 1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358, - 1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532, - 1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679, - 1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853, - 1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759, - 1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832, - 1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910, - 1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983, - 1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059, - 2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132, - 2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210, - 2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283, - 2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200, - 2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273, - 2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351, - 2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424, - 2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500, - 2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573, - 2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651, - 2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724, - 2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572, - 2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645, - 2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723, - 2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796, - 2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872, - 2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945, - 2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023, - 3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096, - 3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013, - 3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086, - 3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164, - 3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237, - 3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313, - 3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386, - 3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464, - 3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537, - 3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848, - 2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921, - 2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999, - 3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072, - 3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148, - 3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221, - 3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299, - 3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372, - 3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289, - 3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362, - 3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440, - 3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513, - 3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589, - 3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662, - 3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740, - 3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813, - 3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661, - 3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734, - 3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812, - 3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885, - 3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961, - 3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034, - 4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112, - 4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185, - 4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102, - 4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175, - 4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253, - 4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326, - 4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402, - 4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475, - 4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553, - 4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626, - 4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547, - 3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620, - 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698, - 3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771, - 3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847, - 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920, - 3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998, - 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071, - 4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988, - 3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061, - 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139, - 4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212, - 4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288, - 4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361, - 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439, - 4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512, - 4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360, - 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433, - 4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511, - 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584, - 4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660, - 4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733, - 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811, - 4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884, - 4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801, - 4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874, - 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952, - 4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025, - 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101, - 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174, - 5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252, - 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325, - 5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636, - 4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709, - 4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787, - 4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860, - 4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936, - 4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009, - 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087, - 5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160, - 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077, - 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150, - 5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228, - 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301, - 5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377, - 5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450, - 5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528, - 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601, - 5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449, - 5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522, - 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600, - 5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673, - 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749, - 5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822, - 5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900, - 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973, - 5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890, - 5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963, - 5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041, - 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114, - 6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190, - 6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263, - 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341, - 6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414, - 6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547, - 3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620, - 3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698, - 3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771, - 3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847, - 3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920, - 3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998, - 4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071, - 4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988, - 3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061, - 4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139, - 4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212, - 4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288, - 4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361, - 4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439, - 4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512, - 4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360, - 4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433, - 4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511, - 4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584, - 4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660, - 4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733, - 4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811, - 4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884, - 4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801, - 4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874, - 4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952, - 4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025, - 5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101, - 5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174, - 5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252, - 5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325, - 5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636, - 4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709, - 4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787, - 4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860, - 4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936, - 4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009, - 5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087, - 5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160, - 5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077, - 5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150, - 5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228, - 5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301, - 5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377, - 5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450, - 5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528, - 5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601, - 5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449, - 5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522, - 5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600, - 5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673, - 5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749, - 5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822, - 5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900, - 5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973, - 5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890, - 5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963, - 5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041, - 6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114, - 6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190, - 6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263, - 6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341, - 6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414, - 6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335, - 5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408, - 5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486, - 5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559, - 5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635, - 5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708, - 5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786, - 5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859, - 5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776, - 5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849, - 5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927, - 5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000, - 6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076, - 6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149, - 6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227, - 6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300, - 6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148, - 6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221, - 6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299, - 6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372, - 6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448, - 6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521, - 6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599, - 6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672, - 6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589, - 6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662, - 6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740, - 6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813, - 6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889, - 6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962, - 6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040, - 7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113, - 7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424, - 6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497, - 6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575, - 6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648, - 6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724, - 6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797, - 6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875, - 6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948, - 6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865, - 6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938, - 6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016, - 7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089, - 7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165, - 7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238, - 7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316, - 7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389, - 7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237, - 7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310, - 7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388, - 7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461, - 7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537, - 7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610, - 7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688, - 7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761 -}; - static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) { int pattern = VP8LevelCodes[level - 1][0]; int bits = VP8LevelCodes[level - 1][1]; @@ -350,12 +57,13 @@ static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) { //------------------------------------------------------------------------------ // Pre-calc level costs once for all -void VP8CalculateLevelCosts(VP8Proba* const proba) { +void VP8CalculateLevelCosts(VP8EncProba* const proba) { int ctype, band, ctx; if (!proba->dirty_) return; // nothing to do. for (ctype = 0; ctype < NUM_TYPES; ++ctype) { + int n; for (band = 0; band < NUM_BANDS; ++band) { for (ctx = 0; ctx < NUM_CTX; ++ctx) { const uint8_t* const p = proba->coeffs_[ctype][band][ctx]; @@ -371,6 +79,12 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) { // actually constant. } } + for (n = 0; n < 16; ++n) { // replicate bands. We don't need to sentinel. + for (ctx = 0; ctx < NUM_CTX; ++ctx) { + proba->remapped_costs_[ctype][n][ctx] = + proba->level_cost_[ctype][VP8EncBands[n]][ctx]; + } + } } proba->dirty_ = 0; } @@ -487,66 +201,6 @@ const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = { }; //------------------------------------------------------------------------------ -// Mode costs - -static int GetResidualCost(int ctx0, const VP8Residual* const res) { - int n = res->first; - // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 - const int p0 = res->prob[n][ctx0][0]; - const uint16_t* t = res->cost[n][ctx0]; - // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0 - // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll - // be missing during the loop. - int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; - - if (res->last < 0) { - return VP8BitCost(0, p0); - } - for (; n < res->last; ++n) { - const int v = abs(res->coeffs[n]); - const int b = VP8EncBands[n + 1]; - const int ctx = (v >= 2) ? 2 : v; - cost += VP8LevelCost(t, v); - t = res->cost[b][ctx]; - } - // Last coefficient is always non-zero - { - const int v = abs(res->coeffs[n]); - assert(v != 0); - cost += VP8LevelCost(t, v); - if (n < 15) { - const int b = VP8EncBands[n + 1]; - const int ctx = (v == 1) ? 1 : 2; - const int last_p0 = res->prob[b][ctx][0]; - cost += VP8BitCost(0, last_p0); - } - } - return cost; -} - -//------------------------------------------------------------------------------ -// init function - -#if defined(WEBP_USE_MIPS32) -extern int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res); -#endif // WEBP_USE_MIPS32 - -// TODO(skal): this, and GetResidualCost(), should probably go somewhere -// under src/dsp/ at some point. -VP8GetResidualCostFunc VP8GetResidualCost; - -void VP8GetResidualCostInit(void) { - VP8GetResidualCost = GetResidualCost; - if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_MIPS32) - if (VP8GetCPUInfo(kMIPS32)) { - VP8GetResidualCost = VP8GetResidualCostMIPS32; - } -#endif - } -} - -//------------------------------------------------------------------------------ // helper functions for residuals struct VP8Residual. void VP8InitResidual(int first, int coeff_type, @@ -554,45 +208,10 @@ void VP8InitResidual(int first, int coeff_type, res->coeff_type = coeff_type; res->prob = enc->proba_.coeffs_[coeff_type]; res->stats = enc->proba_.stats_[coeff_type]; - res->cost = enc->proba_.level_cost_[coeff_type]; + res->costs = enc->proba_.remapped_costs_[coeff_type]; res->first = first; } -static void SetResidualCoeffs(const int16_t* const coeffs, - VP8Residual* const res) { - int n; - res->last = -1; - assert(res->first == 0 || coeffs[0] == 0); - for (n = 15; n >= 0; --n) { - if (coeffs[n]) { - res->last = n; - break; - } - } - res->coeffs = coeffs; -} - -//------------------------------------------------------------------------------ -// init function - -#if defined(WEBP_USE_SSE2) -extern void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs, - VP8Residual* const res); -#endif // WEBP_USE_SSE2 - -VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; - -void VP8SetResidualCoeffsInit(void) { - VP8SetResidualCoeffs = SetResidualCoeffs; - if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - VP8SetResidualCoeffs = VP8SetResidualCoeffsSSE2; - } -#endif - } -} - //------------------------------------------------------------------------------ // Mode costs diff --git a/src/3rdparty/libwebp/src/enc/cost.h b/src/3rdparty/libwebp/src/enc/cost.h index 4e55895..20960d6 100644 --- a/src/3rdparty/libwebp/src/enc/cost.h +++ b/src/3rdparty/libwebp/src/enc/cost.h @@ -24,46 +24,31 @@ extern "C" { // On-the-fly info about the current set of residuals. Handy to avoid // passing zillions of params. -typedef struct { +typedef struct VP8Residual VP8Residual; +struct VP8Residual { int first; int last; const int16_t* coeffs; int coeff_type; - ProbaArray* prob; - StatsArray* stats; - CostArray* cost; -} VP8Residual; + ProbaArray* prob; + StatsArray* stats; + CostArrayPtr costs; +}; void VP8InitResidual(int first, int coeff_type, VP8Encoder* const enc, VP8Residual* const res); -typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs, - VP8Residual* const res); -extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; - -void VP8SetResidualCoeffsInit(void); // must be called first - int VP8RecordCoeffs(int ctx, const VP8Residual* const res); -// approximate cost per level: -extern const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1]; -extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p) - // Cost of coding one event with probability 'proba'. static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) { return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba]; } -// Cost calculation function. -typedef int (*VP8GetResidualCostFunc)(int ctx0, const VP8Residual* const res); -extern VP8GetResidualCostFunc VP8GetResidualCost; - -void VP8GetResidualCostInit(void); // must be called first - // Level cost calculations extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2]; -void VP8CalculateLevelCosts(VP8Proba* const proba); +void VP8CalculateLevelCosts(VP8EncProba* const proba); static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) { return VP8LevelFixedCosts[level] + table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level]; diff --git a/src/3rdparty/libwebp/src/enc/delta_palettization.c b/src/3rdparty/libwebp/src/enc/delta_palettization.c new file mode 100644 index 0000000..062e588 --- /dev/null +++ b/src/3rdparty/libwebp/src/enc/delta_palettization.c @@ -0,0 +1,455 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Mislav Bradac (mislavm@google.com) +// + +#include "./delta_palettization.h" + +#ifdef WEBP_EXPERIMENTAL_FEATURES +#include "../webp/types.h" +#include "../dsp/lossless.h" + +#define MK_COL(r, g, b) (((r) << 16) + ((g) << 8) + (b)) + +// Format allows palette up to 256 entries, but more palette entries produce +// bigger entropy. In the future it will probably be useful to add more entries +// that are far from the origin of the palette or choose remaining entries +// dynamically. +#define DELTA_PALETTE_SIZE 226 + +// Palette used for delta_palettization. Entries are roughly sorted by distance +// of their signed equivalents from the origin. +static const uint32_t kDeltaPalette[DELTA_PALETTE_SIZE] = { + MK_COL(0u, 0u, 0u), + MK_COL(255u, 255u, 255u), + MK_COL(1u, 1u, 1u), + MK_COL(254u, 254u, 254u), + MK_COL(2u, 2u, 2u), + MK_COL(4u, 4u, 4u), + MK_COL(252u, 252u, 252u), + MK_COL(250u, 0u, 0u), + MK_COL(0u, 250u, 0u), + MK_COL(0u, 0u, 250u), + MK_COL(6u, 0u, 0u), + MK_COL(0u, 6u, 0u), + MK_COL(0u, 0u, 6u), + MK_COL(0u, 0u, 248u), + MK_COL(0u, 0u, 8u), + MK_COL(0u, 248u, 0u), + MK_COL(0u, 248u, 248u), + MK_COL(0u, 248u, 8u), + MK_COL(0u, 8u, 0u), + MK_COL(0u, 8u, 248u), + MK_COL(0u, 8u, 8u), + MK_COL(8u, 8u, 8u), + MK_COL(248u, 0u, 0u), + MK_COL(248u, 0u, 248u), + MK_COL(248u, 0u, 8u), + MK_COL(248u, 248u, 0u), + MK_COL(248u, 8u, 0u), + MK_COL(8u, 0u, 0u), + MK_COL(8u, 0u, 248u), + MK_COL(8u, 0u, 8u), + MK_COL(8u, 248u, 0u), + MK_COL(8u, 8u, 0u), + MK_COL(23u, 23u, 23u), + MK_COL(13u, 13u, 13u), + MK_COL(232u, 232u, 232u), + MK_COL(244u, 244u, 244u), + MK_COL(245u, 245u, 250u), + MK_COL(50u, 50u, 50u), + MK_COL(204u, 204u, 204u), + MK_COL(236u, 236u, 236u), + MK_COL(16u, 16u, 16u), + MK_COL(240u, 16u, 16u), + MK_COL(16u, 240u, 16u), + MK_COL(240u, 240u, 16u), + MK_COL(16u, 16u, 240u), + MK_COL(240u, 16u, 240u), + MK_COL(16u, 240u, 240u), + MK_COL(240u, 240u, 240u), + MK_COL(0u, 0u, 232u), + MK_COL(0u, 232u, 0u), + MK_COL(232u, 0u, 0u), + MK_COL(0u, 0u, 24u), + MK_COL(0u, 24u, 0u), + MK_COL(24u, 0u, 0u), + MK_COL(32u, 32u, 32u), + MK_COL(224u, 32u, 32u), + MK_COL(32u, 224u, 32u), + MK_COL(224u, 224u, 32u), + MK_COL(32u, 32u, 224u), + MK_COL(224u, 32u, 224u), + MK_COL(32u, 224u, 224u), + MK_COL(224u, 224u, 224u), + MK_COL(0u, 0u, 176u), + MK_COL(0u, 0u, 80u), + MK_COL(0u, 176u, 0u), + MK_COL(0u, 176u, 176u), + MK_COL(0u, 176u, 80u), + MK_COL(0u, 80u, 0u), + MK_COL(0u, 80u, 176u), + MK_COL(0u, 80u, 80u), + MK_COL(176u, 0u, 0u), + MK_COL(176u, 0u, 176u), + MK_COL(176u, 0u, 80u), + MK_COL(176u, 176u, 0u), + MK_COL(176u, 80u, 0u), + MK_COL(80u, 0u, 0u), + MK_COL(80u, 0u, 176u), + MK_COL(80u, 0u, 80u), + MK_COL(80u, 176u, 0u), + MK_COL(80u, 80u, 0u), + MK_COL(0u, 0u, 152u), + MK_COL(0u, 0u, 104u), + MK_COL(0u, 152u, 0u), + MK_COL(0u, 152u, 152u), + MK_COL(0u, 152u, 104u), + MK_COL(0u, 104u, 0u), + MK_COL(0u, 104u, 152u), + MK_COL(0u, 104u, 104u), + MK_COL(152u, 0u, 0u), + MK_COL(152u, 0u, 152u), + MK_COL(152u, 0u, 104u), + MK_COL(152u, 152u, 0u), + MK_COL(152u, 104u, 0u), + MK_COL(104u, 0u, 0u), + MK_COL(104u, 0u, 152u), + MK_COL(104u, 0u, 104u), + MK_COL(104u, 152u, 0u), + MK_COL(104u, 104u, 0u), + MK_COL(216u, 216u, 216u), + MK_COL(216u, 216u, 40u), + MK_COL(216u, 216u, 176u), + MK_COL(216u, 216u, 80u), + MK_COL(216u, 40u, 216u), + MK_COL(216u, 40u, 40u), + MK_COL(216u, 40u, 176u), + MK_COL(216u, 40u, 80u), + MK_COL(216u, 176u, 216u), + MK_COL(216u, 176u, 40u), + MK_COL(216u, 176u, 176u), + MK_COL(216u, 176u, 80u), + MK_COL(216u, 80u, 216u), + MK_COL(216u, 80u, 40u), + MK_COL(216u, 80u, 176u), + MK_COL(216u, 80u, 80u), + MK_COL(40u, 216u, 216u), + MK_COL(40u, 216u, 40u), + MK_COL(40u, 216u, 176u), + MK_COL(40u, 216u, 80u), + MK_COL(40u, 40u, 216u), + MK_COL(40u, 40u, 40u), + MK_COL(40u, 40u, 176u), + MK_COL(40u, 40u, 80u), + MK_COL(40u, 176u, 216u), + MK_COL(40u, 176u, 40u), + MK_COL(40u, 176u, 176u), + MK_COL(40u, 176u, 80u), + MK_COL(40u, 80u, 216u), + MK_COL(40u, 80u, 40u), + MK_COL(40u, 80u, 176u), + MK_COL(40u, 80u, 80u), + MK_COL(80u, 216u, 216u), + MK_COL(80u, 216u, 40u), + MK_COL(80u, 216u, 176u), + MK_COL(80u, 216u, 80u), + MK_COL(80u, 40u, 216u), + MK_COL(80u, 40u, 40u), + MK_COL(80u, 40u, 176u), + MK_COL(80u, 40u, 80u), + MK_COL(80u, 176u, 216u), + MK_COL(80u, 176u, 40u), + MK_COL(80u, 176u, 176u), + MK_COL(80u, 176u, 80u), + MK_COL(80u, 80u, 216u), + MK_COL(80u, 80u, 40u), + MK_COL(80u, 80u, 176u), + MK_COL(80u, 80u, 80u), + MK_COL(0u, 0u, 192u), + MK_COL(0u, 0u, 64u), + MK_COL(0u, 0u, 128u), + MK_COL(0u, 192u, 0u), + MK_COL(0u, 192u, 192u), + MK_COL(0u, 192u, 64u), + MK_COL(0u, 192u, 128u), + MK_COL(0u, 64u, 0u), + MK_COL(0u, 64u, 192u), + MK_COL(0u, 64u, 64u), + MK_COL(0u, 64u, 128u), + MK_COL(0u, 128u, 0u), + MK_COL(0u, 128u, 192u), + MK_COL(0u, 128u, 64u), + MK_COL(0u, 128u, 128u), + MK_COL(176u, 216u, 216u), + MK_COL(176u, 216u, 40u), + MK_COL(176u, 216u, 176u), + MK_COL(176u, 216u, 80u), + MK_COL(176u, 40u, 216u), + MK_COL(176u, 40u, 40u), + MK_COL(176u, 40u, 176u), + MK_COL(176u, 40u, 80u), + MK_COL(176u, 176u, 216u), + MK_COL(176u, 176u, 40u), + MK_COL(176u, 176u, 176u), + MK_COL(176u, 176u, 80u), + MK_COL(176u, 80u, 216u), + MK_COL(176u, 80u, 40u), + MK_COL(176u, 80u, 176u), + MK_COL(176u, 80u, 80u), + MK_COL(192u, 0u, 0u), + MK_COL(192u, 0u, 192u), + MK_COL(192u, 0u, 64u), + MK_COL(192u, 0u, 128u), + MK_COL(192u, 192u, 0u), + MK_COL(192u, 192u, 192u), + MK_COL(192u, 192u, 64u), + MK_COL(192u, 192u, 128u), + MK_COL(192u, 64u, 0u), + MK_COL(192u, 64u, 192u), + MK_COL(192u, 64u, 64u), + MK_COL(192u, 64u, 128u), + MK_COL(192u, 128u, 0u), + MK_COL(192u, 128u, 192u), + MK_COL(192u, 128u, 64u), + MK_COL(192u, 128u, 128u), + MK_COL(64u, 0u, 0u), + MK_COL(64u, 0u, 192u), + MK_COL(64u, 0u, 64u), + MK_COL(64u, 0u, 128u), + MK_COL(64u, 192u, 0u), + MK_COL(64u, 192u, 192u), + MK_COL(64u, 192u, 64u), + MK_COL(64u, 192u, 128u), + MK_COL(64u, 64u, 0u), + MK_COL(64u, 64u, 192u), + MK_COL(64u, 64u, 64u), + MK_COL(64u, 64u, 128u), + MK_COL(64u, 128u, 0u), + MK_COL(64u, 128u, 192u), + MK_COL(64u, 128u, 64u), + MK_COL(64u, 128u, 128u), + MK_COL(128u, 0u, 0u), + MK_COL(128u, 0u, 192u), + MK_COL(128u, 0u, 64u), + MK_COL(128u, 0u, 128u), + MK_COL(128u, 192u, 0u), + MK_COL(128u, 192u, 192u), + MK_COL(128u, 192u, 64u), + MK_COL(128u, 192u, 128u), + MK_COL(128u, 64u, 0u), + MK_COL(128u, 64u, 192u), + MK_COL(128u, 64u, 64u), + MK_COL(128u, 64u, 128u), + MK_COL(128u, 128u, 0u), + MK_COL(128u, 128u, 192u), + MK_COL(128u, 128u, 64u), + MK_COL(128u, 128u, 128u), +}; + +#undef MK_COL + +//------------------------------------------------------------------------------ +// TODO(skal): move the functions to dsp/lossless.c when the correct +// granularity is found. For now, we'll just copy-paste some useful bits +// here instead. + +// In-place sum of each component with mod 256. +static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) { + const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u); + const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu); + *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); +} + +static WEBP_INLINE uint32_t Clip255(uint32_t a) { + if (a < 256) { + return a; + } + // return 0, when a is a negative integer. + // return 255, when a is positive. + return ~a >> 24; +} + +// Delta palettization functions. +static WEBP_INLINE int Square(int x) { + return x * x; +} + +static WEBP_INLINE uint32_t Intensity(uint32_t a) { + return + 30 * ((a >> 16) & 0xff) + + 59 * ((a >> 8) & 0xff) + + 11 * ((a >> 0) & 0xff); +} + +static uint32_t CalcDist(uint32_t predicted_value, uint32_t actual_value, + uint32_t palette_entry) { + int i; + uint32_t distance = 0; + AddPixelsEq(&predicted_value, palette_entry); + for (i = 0; i < 32; i += 8) { + const int32_t av = (actual_value >> i) & 0xff; + const int32_t pv = (predicted_value >> i) & 0xff; + distance += Square(pv - av); + } + // We sum square of intensity difference with factor 10, but because Intensity + // returns 100 times real intensity we need to multiply differences of colors + // by 1000. + distance *= 1000u; + distance += Square(Intensity(predicted_value) + - Intensity(actual_value)); + return distance; +} + +static uint32_t Predict(int x, int y, uint32_t* image) { + const uint32_t t = (y == 0) ? ARGB_BLACK : image[x]; + const uint32_t l = (x == 0) ? ARGB_BLACK : image[x - 1]; + const uint32_t p = + (((((t >> 24) & 0xff) + ((l >> 24) & 0xff)) / 2) << 24) + + (((((t >> 16) & 0xff) + ((l >> 16) & 0xff)) / 2) << 16) + + (((((t >> 8) & 0xff) + ((l >> 8) & 0xff)) / 2) << 8) + + (((((t >> 0) & 0xff) + ((l >> 0) & 0xff)) / 2) << 0); + if (x == 0 && y == 0) return ARGB_BLACK; + if (x == 0) return t; + if (y == 0) return l; + return p; +} + +static WEBP_INLINE int AddSubtractComponentFullWithCoefficient( + int a, int b, int c) { + return Clip255(a + ((b - c) >> 2)); +} + +static WEBP_INLINE uint32_t ClampedAddSubtractFullWithCoefficient( + uint32_t c0, uint32_t c1, uint32_t c2) { + const int a = AddSubtractComponentFullWithCoefficient( + c0 >> 24, c1 >> 24, c2 >> 24); + const int r = AddSubtractComponentFullWithCoefficient((c0 >> 16) & 0xff, + (c1 >> 16) & 0xff, + (c2 >> 16) & 0xff); + const int g = AddSubtractComponentFullWithCoefficient((c0 >> 8) & 0xff, + (c1 >> 8) & 0xff, + (c2 >> 8) & 0xff); + const int b = AddSubtractComponentFullWithCoefficient( + c0 & 0xff, c1 & 0xff, c2 & 0xff); + return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; +} + +//------------------------------------------------------------------------------ + +// Find palette entry with minimum error from difference of actual pixel value +// and predicted pixel value. Propagate error of pixel to its top and left pixel +// in src array. Write predicted_value + palette_entry to new_image. Return +// index of best palette entry. +static int FindBestPaletteEntry(uint32_t src, uint32_t predicted_value, + const uint32_t palette[], int palette_size) { + int i; + int idx = 0; + uint32_t best_distance = CalcDist(predicted_value, src, palette[0]); + for (i = 1; i < palette_size; ++i) { + const uint32_t distance = CalcDist(predicted_value, src, palette[i]); + if (distance < best_distance) { + best_distance = distance; + idx = i; + } + } + return idx; +} + +static void ApplyBestPaletteEntry(int x, int y, + uint32_t new_value, uint32_t palette_value, + uint32_t* src, int src_stride, + uint32_t* new_image) { + AddPixelsEq(&new_value, palette_value); + if (x > 0) { + src[x - 1] = ClampedAddSubtractFullWithCoefficient(src[x - 1], + new_value, src[x]); + } + if (y > 0) { + src[x - src_stride] = + ClampedAddSubtractFullWithCoefficient(src[x - src_stride], + new_value, src[x]); + } + new_image[x] = new_value; +} + +//------------------------------------------------------------------------------ +// Main entry point + +static WebPEncodingError ApplyDeltaPalette(uint32_t* src, uint32_t* dst, + uint32_t src_stride, + uint32_t dst_stride, + const uint32_t* palette, + int palette_size, + int width, int height, + int num_passes) { + int x, y; + WebPEncodingError err = VP8_ENC_OK; + uint32_t* new_image = (uint32_t*)WebPSafeMalloc(width, sizeof(*new_image)); + uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row)); + if (new_image == NULL || tmp_row == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + while (num_passes--) { + uint32_t* cur_src = src; + uint32_t* cur_dst = dst; + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + const uint32_t predicted_value = Predict(x, y, new_image); + tmp_row[x] = FindBestPaletteEntry(cur_src[x], predicted_value, + palette, palette_size); + ApplyBestPaletteEntry(x, y, predicted_value, palette[tmp_row[x]], + cur_src, src_stride, new_image); + } + for (x = 0; x < width; ++x) { + cur_dst[x] = palette[tmp_row[x]]; + } + cur_src += src_stride; + cur_dst += dst_stride; + } + } + Error: + WebPSafeFree(new_image); + WebPSafeFree(tmp_row); + return err; +} + +// replaces enc->argb_ by a palettizable approximation of it, +// and generates optimal enc->palette_[] +WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) { + const WebPPicture* const pic = enc->pic_; + uint32_t* src = pic->argb; + uint32_t* dst = enc->argb_; + const int width = pic->width; + const int height = pic->height; + + WebPEncodingError err = VP8_ENC_OK; + memcpy(enc->palette_, kDeltaPalette, sizeof(kDeltaPalette)); + enc->palette_[DELTA_PALETTE_SIZE - 1] = src[0] - 0xff000000u; + enc->palette_size_ = DELTA_PALETTE_SIZE; + err = ApplyDeltaPalette(src, dst, pic->argb_stride, enc->current_width_, + enc->palette_, enc->palette_size_, + width, height, 2); + if (err != VP8_ENC_OK) goto Error; + + Error: + return err; +} + +#else // !WEBP_EXPERIMENTAL_FEATURES + +WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) { + (void)enc; + return VP8_ENC_ERROR_INVALID_CONFIGURATION; +} + +#endif // WEBP_EXPERIMENTAL_FEATURES diff --git a/src/3rdparty/libwebp/src/enc/delta_palettization.h b/src/3rdparty/libwebp/src/enc/delta_palettization.h new file mode 100644 index 0000000..e41c0c5 --- /dev/null +++ b/src/3rdparty/libwebp/src/enc/delta_palettization.h @@ -0,0 +1,25 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Author: Mislav Bradac (mislavm@google.com) +// + +#ifndef WEBP_ENC_DELTA_PALETTIZATION_H_ +#define WEBP_ENC_DELTA_PALETTIZATION_H_ + +#include "../webp/encode.h" +#include "../enc/vp8li.h" + +// Replaces enc->argb_[] input by a palettizable approximation of it, +// and generates optimal enc->palette_[]. +// This function can revert enc->use_palette_ / enc->use_predict_ flag +// if delta-palettization is not producing expected saving. +WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc); + +#endif // WEBP_ENC_DELTA_PALETTIZATION_H_ diff --git a/src/3rdparty/libwebp/src/enc/filter.c b/src/3rdparty/libwebp/src/enc/filter.c index 11db4bd..41813cf 100644 --- a/src/3rdparty/libwebp/src/enc/filter.c +++ b/src/3rdparty/libwebp/src/enc/filter.c @@ -85,12 +85,12 @@ static void DoFilter(const VP8EncIterator* const it, int level) { const int ilevel = GetILevel(enc->config_->filter_sharpness, level); const int limit = 2 * level + ilevel; - uint8_t* const y_dst = it->yuv_out2_ + Y_OFF; - uint8_t* const u_dst = it->yuv_out2_ + U_OFF; - uint8_t* const v_dst = it->yuv_out2_ + V_OFF; + uint8_t* const y_dst = it->yuv_out2_ + Y_OFF_ENC; + uint8_t* const u_dst = it->yuv_out2_ + U_OFF_ENC; + uint8_t* const v_dst = it->yuv_out2_ + V_OFF_ENC; // copy current block to yuv_out2_ - memcpy(y_dst, it->yuv_out_, YUV_SIZE * sizeof(uint8_t)); + memcpy(y_dst, it->yuv_out_, YUV_SIZE_ENC * sizeof(uint8_t)); if (enc->filter_hdr_.simple_ == 1) { // simple VP8SimpleHFilter16i(y_dst, BPS, limit); @@ -195,13 +195,16 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) { // compute SSIM in a 10 x 10 window for (x = 3; x < 13; x++) { for (y = 3; y < 13; y++) { - VP8SSIMAccumulate(yuv1 + Y_OFF, BPS, yuv2 + Y_OFF, BPS, x, y, 16, 16, &s); + VP8SSIMAccumulate(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS, + x, y, 16, 16, &s); } } for (x = 1; x < 7; x++) { for (y = 1; y < 7; y++) { - VP8SSIMAccumulate(yuv1 + U_OFF, BPS, yuv2 + U_OFF, BPS, x, y, 8, 8, &s); - VP8SSIMAccumulate(yuv1 + V_OFF, BPS, yuv2 + V_OFF, BPS, x, y, 8, 8, &s); + VP8SSIMAccumulate(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS, + x, y, 8, 8, &s); + VP8SSIMAccumulate(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS, + x, y, 8, 8, &s); } } return VP8SSIMGet(&s); @@ -226,7 +229,7 @@ void VP8StoreFilterStats(VP8EncIterator* const it) { int d; VP8Encoder* const enc = it->enc_; const int s = it->mb_->segment_; - const int level0 = enc->dqm_[s].fstrength_; // TODO: ref_lf_delta[] + const int level0 = enc->dqm_[s].fstrength_; // explore +/-quant range of values around level0 const int delta_min = -enc->dqm_[s].quant_; diff --git a/src/3rdparty/libwebp/src/enc/frame.c b/src/3rdparty/libwebp/src/enc/frame.c index cdf1dab..5b7a40b 100644 --- a/src/3rdparty/libwebp/src/enc/frame.c +++ b/src/3rdparty/libwebp/src/enc/frame.c @@ -14,8 +14,9 @@ #include <string.h> #include <math.h> -#include "./vp8enci.h" #include "./cost.h" +#include "./vp8enci.h" +#include "../dsp/dsp.h" #include "../webp/format_constants.h" // RIFF constants #define SEGMENT_VISU 0 @@ -81,11 +82,6 @@ static float ComputeNextQ(PassStats* const s) { //------------------------------------------------------------------------------ // Tables for level coding -const uint8_t VP8EncBands[16 + 1] = { - 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, - 0 // sentinel -}; - const uint8_t VP8Cat3[] = { 173, 148, 140 }; const uint8_t VP8Cat4[] = { 176, 155, 140, 135 }; const uint8_t VP8Cat5[] = { 180, 157, 141, 134, 130 }; @@ -96,7 +92,7 @@ const uint8_t VP8Cat6[] = // Reset the statistics about: number of skips, token proba, level cost,... static void ResetStats(VP8Encoder* const enc) { - VP8Proba* const proba = &enc->proba_; + VP8EncProba* const proba = &enc->proba_; VP8CalculateLevelCosts(proba); proba->nb_skip_ = 0; } @@ -112,7 +108,7 @@ static int CalcSkipProba(uint64_t nb, uint64_t total) { // Returns the bit-cost for coding the skip probability. static int FinalizeSkipProba(VP8Encoder* const enc) { - VP8Proba* const proba = &enc->proba_; + VP8EncProba* const proba = &enc->proba_; const int nb_mbs = enc->mb_w_ * enc->mb_h_; const int nb_events = proba->nb_skip_; int size; @@ -140,11 +136,11 @@ static int BranchCost(int nb, int total, int proba) { } static void ResetTokenStats(VP8Encoder* const enc) { - VP8Proba* const proba = &enc->proba_; + VP8EncProba* const proba = &enc->proba_; memset(proba->stats_, 0, sizeof(proba->stats_)); } -static int FinalizeTokenProbas(VP8Proba* const proba) { +static int FinalizeTokenProbas(VP8EncProba* const proba) { int has_changed = 0; int size = 0; int t, b, c, p; @@ -476,9 +472,9 @@ static void StoreSSE(const VP8EncIterator* const it) { const uint8_t* const in = it->yuv_in_; const uint8_t* const out = it->yuv_out_; // Note: not totally accurate at boundary. And doesn't include in-loop filter. - enc->sse_[0] += VP8SSE16x16(in + Y_OFF, out + Y_OFF); - enc->sse_[1] += VP8SSE8x8(in + U_OFF, out + U_OFF); - enc->sse_[2] += VP8SSE8x8(in + V_OFF, out + V_OFF); + enc->sse_[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC); + enc->sse_[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC); + enc->sse_[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC); enc->sse_count_ += 16 * 16; } @@ -511,9 +507,9 @@ static void StoreSideInfo(const VP8EncIterator* const it) { } } #if SEGMENT_VISU // visualize segments and prediction modes - SetBlock(it->yuv_out_ + Y_OFF, mb->segment_ * 64, 16); - SetBlock(it->yuv_out_ + U_OFF, it->preds_[0] * 64, 8); - SetBlock(it->yuv_out_ + V_OFF, mb->uv_mode_ * 64, 8); + SetBlock(it->yuv_out_ + Y_OFF_ENC, mb->segment_ * 64, 16); + SetBlock(it->yuv_out_ + U_OFF_ENC, it->preds_[0] * 64, 8); + SetBlock(it->yuv_out_ + V_OFF_ENC, mb->uv_mode_ * 64, 8); #endif } @@ -743,7 +739,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { int num_pass_left = enc->config_->pass; const int do_search = enc->do_search_; VP8EncIterator it; - VP8Proba* const proba = &enc->proba_; + VP8EncProba* const proba = &enc->proba_; const VP8RDLevel rd_opt = enc->rd_opt_level_; const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384; PassStats stats; diff --git a/src/3rdparty/libwebp/src/enc/histogram.c b/src/3rdparty/libwebp/src/enc/histogram.c index a2266b4..869882d 100644 --- a/src/3rdparty/libwebp/src/enc/histogram.c +++ b/src/3rdparty/libwebp/src/enc/histogram.c @@ -20,9 +20,6 @@ #include "../dsp/lossless.h" #include "../utils/utils.h" -#define ALIGN_CST 15 -#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST) - #define MAX_COST 1.e38 // Number of partitions for the three dominant (literal, red and blue) symbol @@ -30,6 +27,8 @@ #define NUM_PARTITIONS 4 // The size of the bin-hash corresponding to the three dominant costs. #define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS) +// Maximum number of histograms allowed in greedy combining algorithm. +#define MAX_HISTO_GREEDY 100 static void HistogramClear(VP8LHistogram* const p) { uint32_t* const literal = p->literal_; @@ -40,6 +39,13 @@ static void HistogramClear(VP8LHistogram* const p) { p->literal_ = literal; } +// Swap two histogram pointers. +static void HistogramSwap(VP8LHistogram** const A, VP8LHistogram** const B) { + VP8LHistogram* const tmp = *A; + *A = *B; + *B = tmp; +} + static void HistogramCopy(const VP8LHistogram* const src, VP8LHistogram* const dst) { uint32_t* const dst_literal = dst->literal_; @@ -106,7 +112,8 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) { VP8LHistogramSet* set; const int histo_size = VP8LGetHistogramSize(cache_bits); const size_t total_size = - sizeof(*set) + size * (sizeof(*set->histograms) + histo_size + ALIGN_CST); + sizeof(*set) + size * (sizeof(*set->histograms) + + histo_size + WEBP_ALIGN_CST); uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory)); if (memory == NULL) return NULL; @@ -117,7 +124,7 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) { set->max_size = size; set->size = size; for (i = 0; i < size; ++i) { - memory = (uint8_t*)DO_ALIGN(memory); + memory = (uint8_t*)WEBP_ALIGN(memory); set->histograms[i] = (VP8LHistogram*)memory; // literal_ won't necessary be aligned. set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram)); @@ -149,24 +156,26 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, } } -static WEBP_INLINE double BitsEntropyRefine(int nonzeros, int sum, int max_val, - double retval) { +// ----------------------------------------------------------------------------- +// Entropy-related functions. + +static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) { double mix; - if (nonzeros < 5) { - if (nonzeros <= 1) { + if (entropy->nonzeros < 5) { + if (entropy->nonzeros <= 1) { return 0; } // Two symbols, they will be 0 and 1 in a Huffman code. // Let's mix in a bit of entropy to favor good clustering when // distributions of these are combined. - if (nonzeros == 2) { - return 0.99 * sum + 0.01 * retval; + if (entropy->nonzeros == 2) { + return 0.99 * entropy->sum + 0.01 * entropy->entropy; } // No matter what the entropy says, we cannot be better than min_limit // with Huffman coding. I am mixing a bit of entropy into the // min_limit since it produces much better (~0.5 %) compression results // perhaps because of better entropy clustering. - if (nonzeros == 3) { + if (entropy->nonzeros == 3) { mix = 0.95; } else { mix = 0.7; // nonzeros == 4. @@ -176,52 +185,22 @@ static WEBP_INLINE double BitsEntropyRefine(int nonzeros, int sum, int max_val, } { - double min_limit = 2 * sum - max_val; - min_limit = mix * min_limit + (1.0 - mix) * retval; - return (retval < min_limit) ? min_limit : retval; + double min_limit = 2 * entropy->sum - entropy->max_val; + min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy; + return (entropy->entropy < min_limit) ? min_limit : entropy->entropy; } } -static double BitsEntropy(const uint32_t* const array, int n) { - double retval = 0.; - uint32_t sum = 0; - int nonzeros = 0; - uint32_t max_val = 0; - int i; - for (i = 0; i < n; ++i) { - if (array[i] != 0) { - sum += array[i]; - ++nonzeros; - retval -= VP8LFastSLog2(array[i]); - if (max_val < array[i]) { - max_val = array[i]; - } - } +double VP8LBitsEntropy(const uint32_t* const array, int n, + uint32_t* const trivial_symbol) { + VP8LBitEntropy entropy; + VP8LBitsEntropyUnrefined(array, n, &entropy); + if (trivial_symbol != NULL) { + *trivial_symbol = + (entropy.nonzeros == 1) ? entropy.nonzero_code : VP8L_NON_TRIVIAL_SYM; } - retval += VP8LFastSLog2(sum); - return BitsEntropyRefine(nonzeros, sum, max_val, retval); -} -static double BitsEntropyCombined(const uint32_t* const X, - const uint32_t* const Y, int n) { - double retval = 0.; - int sum = 0; - int nonzeros = 0; - int max_val = 0; - int i; - for (i = 0; i < n; ++i) { - const int xy = X[i] + Y[i]; - if (xy != 0) { - sum += xy; - ++nonzeros; - retval -= VP8LFastSLog2(xy); - if (max_val < xy) { - max_val = xy; - } - } - } - retval += VP8LFastSLog2(sum); - return BitsEntropyRefine(nonzeros, sum, max_val, retval); + return BitsEntropyRefine(&entropy); } static double InitialHuffmanCost(void) { @@ -242,47 +221,40 @@ static double FinalHuffmanCost(const VP8LStreaks* const stats) { return retval; } -// Trampolines -static double HuffmanCost(const uint32_t* const population, int length) { - const VP8LStreaks stats = VP8LHuffmanCostCount(population, length); - return FinalHuffmanCost(&stats); -} +// Get the symbol entropy for the distribution 'population'. +// Set 'trivial_sym', if there's only one symbol present in the distribution. +static double PopulationCost(const uint32_t* const population, int length, + uint32_t* const trivial_sym) { + VP8LBitEntropy bit_entropy; + VP8LStreaks stats; + VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats); + if (trivial_sym != NULL) { + *trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code + : VP8L_NON_TRIVIAL_SYM; + } -static double HuffmanCostCombined(const uint32_t* const X, - const uint32_t* const Y, int length) { - const VP8LStreaks stats = VP8LHuffmanCostCombinedCount(X, Y, length); - return FinalHuffmanCost(&stats); + return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); } -// Aggregated costs -static double PopulationCost(const uint32_t* const population, int length) { - return BitsEntropy(population, length) + HuffmanCost(population, length); -} +static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, + const uint32_t* const Y, + int length) { + VP8LBitEntropy bit_entropy; + VP8LStreaks stats; + VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats); -static double GetCombinedEntropy(const uint32_t* const X, - const uint32_t* const Y, int length) { - return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length); + return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); } // Estimates the Entropy + Huffman + other block overhead size cost. double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { return - PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_)) - + PopulationCost(p->red_, NUM_LITERAL_CODES) - + PopulationCost(p->blue_, NUM_LITERAL_CODES) - + PopulationCost(p->alpha_, NUM_LITERAL_CODES) - + PopulationCost(p->distance_, NUM_DISTANCE_CODES) - + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES) - + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); -} - -double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) { - return - BitsEntropy(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_)) - + BitsEntropy(p->red_, NUM_LITERAL_CODES) - + BitsEntropy(p->blue_, NUM_LITERAL_CODES) - + BitsEntropy(p->alpha_, NUM_LITERAL_CODES) - + BitsEntropy(p->distance_, NUM_DISTANCE_CODES) + PopulationCost( + p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL) + + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL) + + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL) + + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL) + + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL) + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES) + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); } @@ -313,8 +285,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, if (*cost > cost_threshold) return 0; *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES); - *cost += VP8LExtraCostCombined(a->distance_, b->distance_, - NUM_DISTANCE_CODES); + *cost += + VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES); if (*cost > cost_threshold) return 0; return 1; @@ -338,6 +310,8 @@ static double HistogramAddEval(const VP8LHistogram* const a, VP8LHistogramAdd(a, b, out); out->bit_cost_ = cost; out->palette_code_bits_ = a->palette_code_bits_; + out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) ? + a->trivial_symbol_ : VP8L_NON_TRIVIAL_SYM; } return cost - sum_cost; @@ -389,18 +363,26 @@ static void UpdateDominantCostRange( } static void UpdateHistogramCost(VP8LHistogram* const h) { - const double alpha_cost = PopulationCost(h->alpha_, NUM_LITERAL_CODES); + uint32_t alpha_sym, red_sym, blue_sym; + const double alpha_cost = + PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym); const double distance_cost = - PopulationCost(h->distance_, NUM_DISTANCE_CODES) + + PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) + VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); - h->literal_cost_ = PopulationCost(h->literal_, num_codes) + + h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) + VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES); - h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES); - h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES); + h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym); + h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym); h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ + alpha_cost + distance_cost; + if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) { + h->trivial_symbol_ = VP8L_NON_TRIVIAL_SYM; + } else { + h->trivial_symbol_ = + ((uint32_t)alpha_sym << 24) | (red_sym << 16) | (blue_sym << 0); + } } static int GetBinIdForEntropy(double min, double max, double val) { @@ -409,7 +391,14 @@ static int GetBinIdForEntropy(double min, double max, double val) { return (int)(NUM_PARTITIONS * delta / range); } -// TODO(vikasa): Evaluate, if there's any correlation between red & blue. +static int GetHistoBinIndexLowEffort( + const VP8LHistogram* const h, const DominantCostRange* const c) { + const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_, + h->literal_cost_); + assert(bin_id < NUM_PARTITIONS); + return bin_id; +} + static int GetHistoBinIndex( const VP8LHistogram* const h, const DominantCostRange* const c) { const int bin_id = @@ -432,7 +421,6 @@ static void HistogramBuild( VP8LHistogram** const histograms = image_histo->histograms; VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs); assert(histo_bits > 0); - // Construct the Histo from a given backward references. while (VP8LRefsCursorOk(&c)) { const PixOrCopy* const v = c.cur_pos; const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits); @@ -463,8 +451,8 @@ static void HistogramCopyAndAnalyze( // Partition histograms to different entropy bins for three dominant (literal, // red and blue) symbol costs and compute the histogram aggregate bit_cost. -static void HistogramAnalyzeEntropyBin( - VP8LHistogramSet* const image_histo, int16_t* const bin_map) { +static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, + int16_t* const bin_map, int low_effort) { int i; VP8LHistogram** const histograms = image_histo->histograms; const int histo_size = image_histo->size; @@ -483,7 +471,9 @@ static void HistogramAnalyzeEntropyBin( for (i = 0; i < histo_size; ++i) { int num_histos; VP8LHistogram* const histo = histograms[i]; - const int16_t bin_id = (int16_t)GetHistoBinIndex(histo, &cost_range); + const int16_t bin_id = low_effort ? + (int16_t)GetHistoBinIndexLowEffort(histo, &cost_range) : + (int16_t)GetHistoBinIndex(histo, &cost_range); const int bin_offset = bin_id * bin_depth; // bin_map[n][0] for every bin 'n' maintains the counter for the number of // histograms in that bin. @@ -495,64 +485,79 @@ static void HistogramAnalyzeEntropyBin( } } -// Compact the histogram set by moving the valid one left in the set to the -// head and moving the ones that have been merged to other histograms towards -// the end. -// TODO(vikasa): Evaluate if this method can be avoided by altering the code -// logic of HistogramCombineEntropyBin main loop. +// Compact the histogram set by removing unused entries. static void HistogramCompactBins(VP8LHistogramSet* const image_histo) { - int start = 0; - int end = image_histo->size - 1; VP8LHistogram** const histograms = image_histo->histograms; - while (start < end) { - while (start <= end && histograms[start] != NULL && - histograms[start]->bit_cost_ != 0.) { - ++start; - } - while (start <= end && histograms[end]->bit_cost_ == 0.) { - histograms[end] = NULL; - --end; - } - if (start < end) { - assert(histograms[start] != NULL); - assert(histograms[end] != NULL); - HistogramCopy(histograms[end], histograms[start]); - histograms[end] = NULL; - --end; + int i, j; + + for (i = 0, j = 0; i < image_histo->size; ++i) { + if (histograms[i] != NULL && histograms[i]->bit_cost_ != 0.) { + if (j < i) { + histograms[j] = histograms[i]; + histograms[i] = NULL; + } + ++j; } } - image_histo->size = end + 1; + image_histo->size = j; } -static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo, - VP8LHistogram* const histos, - int16_t* const bin_map, int bin_depth, - double combine_cost_factor) { +static VP8LHistogram* HistogramCombineEntropyBin( + VP8LHistogramSet* const image_histo, + VP8LHistogram* cur_combo, + int16_t* const bin_map, int bin_depth, int num_bins, + double combine_cost_factor, int low_effort) { int bin_id; - VP8LHistogram* cur_combo = histos; VP8LHistogram** const histograms = image_histo->histograms; - for (bin_id = 0; bin_id < BIN_SIZE; ++bin_id) { + for (bin_id = 0; bin_id < num_bins; ++bin_id) { const int bin_offset = bin_id * bin_depth; const int num_histos = bin_map[bin_offset]; const int idx1 = bin_map[bin_offset + 1]; + int num_combine_failures = 0; int n; for (n = 2; n <= num_histos; ++n) { const int idx2 = bin_map[bin_offset + n]; - const double bit_cost_idx2 = histograms[idx2]->bit_cost_; - if (bit_cost_idx2 > 0.) { - const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor; - const double curr_cost_diff = - HistogramAddEval(histograms[idx1], histograms[idx2], - cur_combo, bit_cost_thresh); - if (curr_cost_diff < bit_cost_thresh) { - HistogramCopy(cur_combo, histograms[idx1]); - histograms[idx2]->bit_cost_ = 0.; + if (low_effort) { + // Merge all histograms with the same bin index, irrespective of cost of + // the merged histograms. + VP8LHistogramAdd(histograms[idx1], histograms[idx2], histograms[idx1]); + histograms[idx2]->bit_cost_ = 0.; + } else { + const double bit_cost_idx2 = histograms[idx2]->bit_cost_; + if (bit_cost_idx2 > 0.) { + const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor; + const double curr_cost_diff = + HistogramAddEval(histograms[idx1], histograms[idx2], + cur_combo, bit_cost_thresh); + if (curr_cost_diff < bit_cost_thresh) { + // Try to merge two histograms only if the combo is a trivial one or + // the two candidate histograms are already non-trivial. + // For some images, 'try_combine' turns out to be false for a lot of + // histogram pairs. In that case, we fallback to combining + // histograms as usual to avoid increasing the header size. + const int try_combine = + (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) || + ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) && + (histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM)); + const int max_combine_failures = 32; + if (try_combine || (num_combine_failures >= max_combine_failures)) { + HistogramSwap(&cur_combo, &histograms[idx1]); + histograms[idx2]->bit_cost_ = 0.; + } else { + ++num_combine_failures; + } + } } } } + if (low_effort) { + // Update the bit_cost for the merged histograms (per bin index). + UpdateHistogramCost(histograms[idx1]); + } } HistogramCompactBins(image_histo); + return cur_combo; } static uint32_t MyRand(uint32_t *seed) { @@ -563,8 +568,179 @@ static uint32_t MyRand(uint32_t *seed) { return *seed; } -static void HistogramCombine(VP8LHistogramSet* const image_histo, - VP8LHistogramSet* const histos, int quality) { +// ----------------------------------------------------------------------------- +// Histogram pairs priority queue + +// Pair of histograms. Negative idx1 value means that pair is out-of-date. +typedef struct { + int idx1; + int idx2; + double cost_diff; + double cost_combo; +} HistogramPair; + +typedef struct { + HistogramPair* queue; + int size; + int max_size; +} HistoQueue; + +static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) { + histo_queue->size = 0; + // max_index^2 for the queue size is safe. If you look at + // HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes + // data to the queue, you insert at most: + // - max_index*(max_index-1)/2 (the first two for loops) + // - max_index - 1 in the last for loop at the first iteration of the while + // loop, max_index - 2 at the second iteration ... therefore + // max_index*(max_index-1)/2 overall too + histo_queue->max_size = max_index * max_index; + // We allocate max_size + 1 because the last element at index "size" is + // used as temporary data (and it could be up to max_size). + histo_queue->queue = WebPSafeMalloc(histo_queue->max_size + 1, + sizeof(*histo_queue->queue)); + return histo_queue->queue != NULL; +} + +static void HistoQueueClear(HistoQueue* const histo_queue) { + assert(histo_queue != NULL); + WebPSafeFree(histo_queue->queue); +} + +static void SwapHistogramPairs(HistogramPair *p1, + HistogramPair *p2) { + const HistogramPair tmp = *p1; + *p1 = *p2; + *p2 = tmp; +} + +// Given a valid priority queue in range [0, queue_size) this function checks +// whether histo_queue[queue_size] should be accepted and swaps it with the +// front if it is smaller. Otherwise, it leaves it as is. +static void UpdateQueueFront(HistoQueue* const histo_queue) { + if (histo_queue->queue[histo_queue->size].cost_diff >= 0) return; + + if (histo_queue->queue[histo_queue->size].cost_diff < + histo_queue->queue[0].cost_diff) { + SwapHistogramPairs(histo_queue->queue, + histo_queue->queue + histo_queue->size); + } + ++histo_queue->size; + + // We cannot add more elements than the capacity. + // The allocation adds an extra element to the official capacity so that + // histo_queue->queue[histo_queue->max_size] is read/written within bound. + assert(histo_queue->size <= histo_queue->max_size); +} + +// ----------------------------------------------------------------------------- + +static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2, + HistogramPair* const pair, + VP8LHistogram* const histos) { + if (idx1 > idx2) { + const int tmp = idx2; + idx2 = idx1; + idx1 = tmp; + } + pair->idx1 = idx1; + pair->idx2 = idx2; + pair->cost_diff = + HistogramAddEval(histograms[idx1], histograms[idx2], histos, 0); + pair->cost_combo = histos->bit_cost_; +} + +// Combines histograms by continuously choosing the one with the highest cost +// reduction. +static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, + VP8LHistogram* const histos) { + int ok = 0; + int image_histo_size = image_histo->size; + int i, j; + VP8LHistogram** const histograms = image_histo->histograms; + // Indexes of remaining histograms. + int* const clusters = WebPSafeMalloc(image_histo_size, sizeof(*clusters)); + // Priority queue of histogram pairs. + HistoQueue histo_queue; + + if (!HistoQueueInit(&histo_queue, image_histo_size) || clusters == NULL) { + goto End; + } + + for (i = 0; i < image_histo_size; ++i) { + // Initialize clusters indexes. + clusters[i] = i; + for (j = i + 1; j < image_histo_size; ++j) { + // Initialize positions array. + PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size], + histos); + UpdateQueueFront(&histo_queue); + } + } + + while (image_histo_size > 1 && histo_queue.size > 0) { + HistogramPair* copy_to; + const int idx1 = histo_queue.queue[0].idx1; + const int idx2 = histo_queue.queue[0].idx2; + VP8LHistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]); + histograms[idx1]->bit_cost_ = histo_queue.queue[0].cost_combo; + // Remove merged histogram. + for (i = 0; i + 1 < image_histo_size; ++i) { + if (clusters[i] >= idx2) { + clusters[i] = clusters[i + 1]; + } + } + --image_histo_size; + + // Remove pairs intersecting the just combined best pair. This will + // therefore pop the head of the queue. + copy_to = histo_queue.queue; + for (i = 0; i < histo_queue.size; ++i) { + HistogramPair* const p = histo_queue.queue + i; + if (p->idx1 == idx1 || p->idx2 == idx1 || + p->idx1 == idx2 || p->idx2 == idx2) { + // Do not copy the invalid pair. + continue; + } + if (p->cost_diff < histo_queue.queue[0].cost_diff) { + // Replace the top of the queue if we found better. + SwapHistogramPairs(histo_queue.queue, p); + } + SwapHistogramPairs(copy_to, p); + ++copy_to; + } + histo_queue.size = (int)(copy_to - histo_queue.queue); + + // Push new pairs formed with combined histogram to the queue. + for (i = 0; i < image_histo_size; ++i) { + if (clusters[i] != idx1) { + PreparePair(histograms, idx1, clusters[i], + &histo_queue.queue[histo_queue.size], histos); + UpdateQueueFront(&histo_queue); + } + } + } + // Move remaining histograms to the beginning of the array. + for (i = 0; i < image_histo_size; ++i) { + if (i != clusters[i]) { // swap the two histograms + HistogramSwap(&histograms[i], &histograms[clusters[i]]); + } + } + + image_histo->size = image_histo_size; + ok = 1; + + End: + WebPSafeFree(clusters); + HistoQueueClear(&histo_queue); + return ok; +} + +static VP8LHistogram* HistogramCombineStochastic( + VP8LHistogramSet* const image_histo, + VP8LHistogram* tmp_histo, + VP8LHistogram* best_combo, + int quality, int min_cluster_size) { int iter; uint32_t seed = 0; int tries_with_no_success = 0; @@ -573,12 +749,10 @@ static void HistogramCombine(VP8LHistogramSet* const image_histo, const int outer_iters = image_histo_size * iter_mult; const int num_pairs = image_histo_size / 2; const int num_tries_no_success = outer_iters / 2; - const int min_cluster_size = 2; VP8LHistogram** const histograms = image_histo->histograms; - VP8LHistogram* cur_combo = histos->histograms[0]; // trial histogram - VP8LHistogram* best_combo = histos->histograms[1]; // best histogram so far // Collapse similar histograms in 'image_histo'. + ++min_cluster_size; for (iter = 0; iter < outer_iters && image_histo_size >= min_cluster_size; ++iter) { @@ -602,13 +776,9 @@ static void HistogramCombine(VP8LHistogramSet* const image_histo, // Calculate cost reduction on combining. curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2], - cur_combo, best_cost_diff); + tmp_histo, best_cost_diff); if (curr_cost_diff < best_cost_diff) { // found a better pair? - { // swap cur/best combo histograms - VP8LHistogram* const tmp_histo = cur_combo; - cur_combo = best_combo; - best_combo = tmp_histo; - } + HistogramSwap(&best_combo, &tmp_histo); best_cost_diff = curr_cost_diff; best_idx1 = idx1; best_idx2 = idx2; @@ -616,11 +786,11 @@ static void HistogramCombine(VP8LHistogramSet* const image_histo, } if (best_idx1 >= 0) { - HistogramCopy(best_combo, histograms[best_idx1]); + HistogramSwap(&best_combo, &histograms[best_idx1]); // swap best_idx2 slot with last one (which is now unused) --image_histo_size; if (best_idx2 != image_histo_size) { - HistogramCopy(histograms[image_histo_size], histograms[best_idx2]); + HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]); histograms[image_histo_size] = NULL; } tries_with_no_success = 0; @@ -630,6 +800,7 @@ static void HistogramCombine(VP8LHistogramSet* const image_histo, } } image_histo->size = image_histo_size; + return best_combo; } // ----------------------------------------------------------------------------- @@ -643,28 +814,37 @@ static void HistogramRemap(const VP8LHistogramSet* const orig_histo, int i; VP8LHistogram** const orig_histograms = orig_histo->histograms; VP8LHistogram** const histograms = image_histo->histograms; - for (i = 0; i < orig_histo->size; ++i) { - int best_out = 0; - double best_bits = - HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST); - int k; - for (k = 1; k < image_histo->size; ++k) { - const double cur_bits = - HistogramAddThresh(histograms[k], orig_histograms[i], best_bits); - if (cur_bits < best_bits) { - best_bits = cur_bits; - best_out = k; + const int orig_histo_size = orig_histo->size; + const int image_histo_size = image_histo->size; + if (image_histo_size > 1) { + for (i = 0; i < orig_histo_size; ++i) { + int best_out = 0; + double best_bits = + HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST); + int k; + for (k = 1; k < image_histo_size; ++k) { + const double cur_bits = + HistogramAddThresh(histograms[k], orig_histograms[i], best_bits); + if (cur_bits < best_bits) { + best_bits = cur_bits; + best_out = k; + } } + symbols[i] = best_out; + } + } else { + assert(image_histo_size == 1); + for (i = 0; i < orig_histo_size; ++i) { + symbols[i] = 0; } - symbols[i] = best_out; } // Recompute each out based on raw and symbols. - for (i = 0; i < image_histo->size; ++i) { + for (i = 0; i < image_histo_size; ++i) { HistogramClear(histograms[i]); } - for (i = 0; i < orig_histo->size; ++i) { + for (i = 0; i < orig_histo_size; ++i) { const int idx = symbols[i]; VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]); } @@ -672,44 +852,48 @@ static void HistogramRemap(const VP8LHistogramSet* const orig_histo, static double GetCombineCostFactor(int histo_size, int quality) { double combine_cost_factor = 0.16; - if (histo_size > 256) combine_cost_factor /= 2.; - if (histo_size > 512) combine_cost_factor /= 2.; - if (histo_size > 1024) combine_cost_factor /= 2.; - if (quality <= 50) combine_cost_factor /= 2.; + if (quality < 90) { + if (histo_size > 256) combine_cost_factor /= 2.; + if (histo_size > 512) combine_cost_factor /= 2.; + if (histo_size > 1024) combine_cost_factor /= 2.; + if (quality <= 50) combine_cost_factor /= 2.; + } return combine_cost_factor; } int VP8LGetHistoImageSymbols(int xsize, int ysize, const VP8LBackwardRefs* const refs, - int quality, int histo_bits, int cache_bits, + int quality, int low_effort, + int histo_bits, int cache_bits, VP8LHistogramSet* const image_histo, + VP8LHistogramSet* const tmp_histos, uint16_t* const histogram_symbols) { int ok = 0; const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1; const int image_histo_raw_size = histo_xsize * histo_ysize; + const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE; // The bin_map for every bin follows following semantics: // bin_map[n][0] = num_histo; // The number of histograms in that bin. // bin_map[n][1] = index of first histogram in that bin; // bin_map[n][num_histo] = index of last histogram in that bin; - // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = un-used indices. + // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = unused indices. const int bin_depth = image_histo_raw_size + 1; int16_t* bin_map = NULL; - VP8LHistogramSet* const histos = VP8LAllocateHistogramSet(2, cache_bits); VP8LHistogramSet* const orig_histo = VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits); + VP8LHistogram* cur_combo; + const int entropy_combine = + (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100); - if (orig_histo == NULL || histos == NULL) { - goto Error; - } + if (orig_histo == NULL) goto Error; // Don't attempt linear bin-partition heuristic for: // histograms of small sizes, as bin_map will be very sparse and; - // Higher qualities (> 90), to preserve the compression gains at those - // quality settings. - if (orig_histo->size > 2 * BIN_SIZE && quality < 90) { - const int bin_map_size = bin_depth * BIN_SIZE; + // Maximum quality (q==100), to preserve the compression gains at that level. + if (entropy_combine) { + const int bin_map_size = bin_depth * entropy_combine_num_bins; bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map)); if (bin_map == NULL) goto Error; } @@ -719,18 +903,33 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, // Copies the histograms and computes its bit_cost. HistogramCopyAndAnalyze(orig_histo, image_histo); - if (bin_map != NULL) { + cur_combo = tmp_histos->histograms[1]; // pick up working slot + if (entropy_combine) { const double combine_cost_factor = GetCombineCostFactor(image_histo_raw_size, quality); - HistogramAnalyzeEntropyBin(orig_histo, bin_map); + HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort); // Collapse histograms with similar entropy. - HistogramCombineEntropyBin(image_histo, histos->histograms[0], - bin_map, bin_depth, combine_cost_factor); + cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, bin_map, + bin_depth, entropy_combine_num_bins, + combine_cost_factor, low_effort); } - // Collapse similar histograms by random histogram-pair compares. - HistogramCombine(image_histo, histos, quality); + // Don't combine the histograms using stochastic and greedy heuristics for + // low-effort compression mode. + if (!low_effort || !entropy_combine) { + const float x = quality / 100.f; + // cubic ramp between 1 and MAX_HISTO_GREEDY: + const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1)); + cur_combo = HistogramCombineStochastic(image_histo, + tmp_histos->histograms[0], + cur_combo, quality, threshold_size); + if ((image_histo->size <= threshold_size) && + !HistogramCombineGreedy(image_histo, cur_combo)) { + goto Error; + } + } + // TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also. // Find the optimal map from original histograms to the final ones. HistogramRemap(orig_histo, image_histo, histogram_symbols); @@ -739,6 +938,5 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, Error: WebPSafeFree(bin_map); VP8LFreeHistogramSet(orig_histo); - VP8LFreeHistogramSet(histos); return ok; } diff --git a/src/3rdparty/libwebp/src/enc/histogram.h b/src/3rdparty/libwebp/src/enc/histogram.h index 1cf4c54..d303d1d 100644 --- a/src/3rdparty/libwebp/src/enc/histogram.h +++ b/src/3rdparty/libwebp/src/enc/histogram.h @@ -14,10 +14,6 @@ #ifndef WEBP_ENC_HISTOGRAM_H_ #define WEBP_ENC_HISTOGRAM_H_ -#include <assert.h> -#include <stddef.h> -#include <stdlib.h> -#include <stdio.h> #include <string.h> #include "./backward_references.h" @@ -28,6 +24,9 @@ extern "C" { #endif +// Not a trivial literal symbol. +#define VP8L_NON_TRIVIAL_SYM (0xffffffff) + // A simple container for histograms of data. typedef struct { // literal_ contains green literal, palette-code and @@ -39,9 +38,11 @@ typedef struct { // Backward reference prefix-code histogram. uint32_t distance_[NUM_DISTANCE_CODES]; int palette_code_bits_; - double bit_cost_; // cached value of VP8LHistogramEstimateBits(this) - double literal_cost_; // Cached values of dominant entropy costs: - double red_cost_; // literal, red & blue. + uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha + // literal symbols are single valued. + double bit_cost_; // cached value of bit cost. + double literal_cost_; // Cached values of dominant entropy costs: + double red_cost_; // literal, red & blue. double blue_cost_; } VP8LHistogram; @@ -91,14 +92,6 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits); void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, const PixOrCopy* const v); -// Estimate how many bits the combined entropy of literals and distance -// approximately maps to. -double VP8LHistogramEstimateBits(const VP8LHistogram* const p); - -// This function estimates the cost in bits excluding the bits needed to -// represent the entropy code itself. -double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p); - static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { return NUM_LITERAL_CODES + NUM_LENGTH_CODES + ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0); @@ -107,10 +100,22 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { // Builds the histogram image. int VP8LGetHistoImageSymbols(int xsize, int ysize, const VP8LBackwardRefs* const refs, - int quality, int histogram_bits, int cache_bits, + int quality, int low_effort, + int histogram_bits, int cache_bits, VP8LHistogramSet* const image_in, + VP8LHistogramSet* const tmp_histos, uint16_t* const histogram_symbols); +// Returns the entropy for the symbols in the input array. +// Also sets trivial_symbol to the code value, if the array has only one code +// value. Otherwise, set it to VP8L_NON_TRIVIAL_SYM. +double VP8LBitsEntropy(const uint32_t* const array, int n, + uint32_t* const trivial_symbol); + +// Estimate how many bits the combined entropy of literals and distance +// approximately maps to. +double VP8LHistogramEstimateBits(const VP8LHistogram* const p); + #ifdef __cplusplus } #endif diff --git a/src/3rdparty/libwebp/src/enc/iterator.c b/src/3rdparty/libwebp/src/enc/iterator.c index e42ad00..99d960a 100644 --- a/src/3rdparty/libwebp/src/enc/iterator.c +++ b/src/3rdparty/libwebp/src/enc/iterator.c @@ -70,13 +70,13 @@ void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) { it->enc_ = enc; it->y_stride_ = enc->pic_->y_stride; it->uv_stride_ = enc->pic_->uv_stride; - it->yuv_in_ = (uint8_t*)DO_ALIGN(it->yuv_mem_); - it->yuv_out_ = it->yuv_in_ + YUV_SIZE; - it->yuv_out2_ = it->yuv_out_ + YUV_SIZE; - it->yuv_p_ = it->yuv_out2_ + YUV_SIZE; + it->yuv_in_ = (uint8_t*)WEBP_ALIGN(it->yuv_mem_); + it->yuv_out_ = it->yuv_in_ + YUV_SIZE_ENC; + it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC; + it->yuv_p_ = it->yuv_out2_ + YUV_SIZE_ENC; it->lf_stats_ = enc->lf_stats_; it->percent0_ = enc->percent_; - it->y_left_ = (uint8_t*)DO_ALIGN(it->yuv_left_mem_ + 1); + it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1); it->u_left_ = it->y_left_ + 16 + 16; it->v_left_ = it->u_left_ + 16; VP8IteratorReset(it); @@ -136,9 +136,9 @@ void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32) { const int uv_w = (w + 1) >> 1; const int uv_h = (h + 1) >> 1; - ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF, w, h, 16); - ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF, uv_w, uv_h, 8); - ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF, uv_w, uv_h, 8); + ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF_ENC, w, h, 16); + ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8); + ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8); if (tmp_32 == NULL) return; @@ -185,9 +185,9 @@ void VP8IteratorExport(const VP8EncIterator* const it) { const VP8Encoder* const enc = it->enc_; if (enc->config_->show_compressed) { const int x = it->x_, y = it->y_; - const uint8_t* const ysrc = it->yuv_out_ + Y_OFF; - const uint8_t* const usrc = it->yuv_out_ + U_OFF; - const uint8_t* const vsrc = it->yuv_out_ + V_OFF; + const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC; + const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC; + const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC; const WebPPicture* const pic = enc->pic_; uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16; uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8; @@ -286,8 +286,8 @@ void VP8IteratorBytesToNz(VP8EncIterator* const it) { void VP8IteratorSaveBoundary(VP8EncIterator* const it) { VP8Encoder* const enc = it->enc_; const int x = it->x_, y = it->y_; - const uint8_t* const ysrc = it->yuv_out_ + Y_OFF; - const uint8_t* const uvsrc = it->yuv_out_ + U_OFF; + const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC; + const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC; if (x < enc->mb_w_ - 1) { // left int i; for (i = 0; i < 16; ++i) { diff --git a/src/3rdparty/libwebp/src/enc/near_lossless.c b/src/3rdparty/libwebp/src/enc/near_lossless.c new file mode 100644 index 0000000..9bc0f0e --- /dev/null +++ b/src/3rdparty/libwebp/src/enc/near_lossless.c @@ -0,0 +1,160 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Near-lossless image preprocessing adjusts pixel values to help +// compressibility with a guarantee of maximum deviation between original and +// resulting pixel values. +// +// Author: Jyrki Alakuijala (jyrki@google.com) +// Converted to C by Aleksander Kramarz (akramarz@google.com) + +#include <stdlib.h> + +#include "../dsp/lossless.h" +#include "../utils/utils.h" +#include "./vp8enci.h" + +#define MIN_DIM_FOR_NEAR_LOSSLESS 64 +#define MAX_LIMIT_BITS 5 + +// Computes quantized pixel value and distance from original value. +static void GetValAndDistance(int a, int initial, int bits, + int* const val, int* const distance) { + const int mask = ~((1 << bits) - 1); + *val = (initial & mask) | (initial >> (8 - bits)); + *distance = 2 * abs(a - *val); +} + +// Clamps the value to range [0, 255]. +static int Clamp8b(int val) { + const int min_val = 0; + const int max_val = 0xff; + return (val < min_val) ? min_val : (val > max_val) ? max_val : val; +} + +// Quantizes values {a, a+(1<<bits), a-(1<<bits)} and returns the nearest one. +static int FindClosestDiscretized(int a, int bits) { + int best_val = a, i; + int min_distance = 256; + + for (i = -1; i <= 1; ++i) { + int candidate, distance; + const int val = Clamp8b(a + i * (1 << bits)); + GetValAndDistance(a, val, bits, &candidate, &distance); + if (i != 0) { + ++distance; + } + // Smallest distance but favor i == 0 over i == -1 and i == 1 + // since that keeps the overall intensity more constant in the + // images. + if (distance < min_distance) { + min_distance = distance; + best_val = candidate; + } + } + return best_val; +} + +// Applies FindClosestDiscretized to all channels of pixel. +static uint32_t ClosestDiscretizedArgb(uint32_t a, int bits) { + return + (FindClosestDiscretized(a >> 24, bits) << 24) | + (FindClosestDiscretized((a >> 16) & 0xff, bits) << 16) | + (FindClosestDiscretized((a >> 8) & 0xff, bits) << 8) | + (FindClosestDiscretized(a & 0xff, bits)); +} + +// Checks if distance between corresponding channel values of pixels a and b +// is within the given limit. +static int IsNear(uint32_t a, uint32_t b, int limit) { + int k; + for (k = 0; k < 4; ++k) { + const int delta = + (int)((a >> (k * 8)) & 0xff) - (int)((b >> (k * 8)) & 0xff); + if (delta >= limit || delta <= -limit) { + return 0; + } + } + return 1; +} + +static int IsSmooth(const uint32_t* const prev_row, + const uint32_t* const curr_row, + const uint32_t* const next_row, + int ix, int limit) { + // Check that all pixels in 4-connected neighborhood are smooth. + return (IsNear(curr_row[ix], curr_row[ix - 1], limit) && + IsNear(curr_row[ix], curr_row[ix + 1], limit) && + IsNear(curr_row[ix], prev_row[ix], limit) && + IsNear(curr_row[ix], next_row[ix], limit)); +} + +// Adjusts pixel values of image with given maximum error. +static void NearLossless(int xsize, int ysize, uint32_t* argb, + int limit_bits, uint32_t* copy_buffer) { + int x, y; + const int limit = 1 << limit_bits; + uint32_t* prev_row = copy_buffer; + uint32_t* curr_row = prev_row + xsize; + uint32_t* next_row = curr_row + xsize; + memcpy(copy_buffer, argb, xsize * 2 * sizeof(argb[0])); + + for (y = 1; y < ysize - 1; ++y) { + uint32_t* const curr_argb_row = argb + y * xsize; + uint32_t* const next_argb_row = curr_argb_row + xsize; + memcpy(next_row, next_argb_row, xsize * sizeof(argb[0])); + for (x = 1; x < xsize - 1; ++x) { + if (!IsSmooth(prev_row, curr_row, next_row, x, limit)) { + curr_argb_row[x] = ClosestDiscretizedArgb(curr_row[x], limit_bits); + } + } + { + // Three-way swap. + uint32_t* const temp = prev_row; + prev_row = curr_row; + curr_row = next_row; + next_row = temp; + } + } +} + +static int QualityToLimitBits(int quality) { + // quality mapping: + // 0..19 -> 5 + // 0..39 -> 4 + // 0..59 -> 3 + // 0..79 -> 2 + // 0..99 -> 1 + // 100 -> 0 + return MAX_LIMIT_BITS - quality / 20; +} + +int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality) { + int i; + uint32_t* const copy_buffer = + (uint32_t*)WebPSafeMalloc(xsize * 3, sizeof(*copy_buffer)); + const int limit_bits = QualityToLimitBits(quality); + assert(argb != NULL); + assert(limit_bits >= 0); + assert(limit_bits <= MAX_LIMIT_BITS); + if (copy_buffer == NULL) { + return 0; + } + // For small icon images, don't attempt to apply near-lossless compression. + if (xsize < MIN_DIM_FOR_NEAR_LOSSLESS && ysize < MIN_DIM_FOR_NEAR_LOSSLESS) { + WebPSafeFree(copy_buffer); + return 1; + } + + for (i = limit_bits; i != 0; --i) { + NearLossless(xsize, ysize, argb, i, copy_buffer); + } + WebPSafeFree(copy_buffer); + return 1; +} diff --git a/src/3rdparty/libwebp/src/enc/picture.c b/src/3rdparty/libwebp/src/enc/picture.c index 9a66fbe..26679a7 100644 --- a/src/3rdparty/libwebp/src/enc/picture.c +++ b/src/3rdparty/libwebp/src/enc/picture.c @@ -15,6 +15,7 @@ #include <stdlib.h> #include "./vp8enci.h" +#include "../dsp/dsp.h" #include "../utils/utils.h" //------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/picture_csp.c b/src/3rdparty/libwebp/src/enc/picture_csp.c index 7875f62..0ef5f9e 100644 --- a/src/3rdparty/libwebp/src/enc/picture_csp.c +++ b/src/3rdparty/libwebp/src/enc/picture_csp.c @@ -32,10 +32,6 @@ static const union { } test_endian = { 0xff000000u }; #define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff) -static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) { - return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b); -} - //------------------------------------------------------------------------------ // Detection of non-trivial transparency @@ -89,9 +85,9 @@ int WebPPictureHasTransparency(const WebPPicture* picture) { static int kLinearToGammaTab[kGammaTabSize + 1]; static uint16_t kGammaToLinearTab[256]; -static int kGammaTablesOk = 0; +static volatile int kGammaTablesOk = 0; -static void InitGammaTables(void) { +static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) { if (!kGammaTablesOk) { int v; const double scale = (double)(1 << kGammaTabFix) / kGammaScale; @@ -130,7 +126,7 @@ static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { #else -static void InitGammaTables(void) {} +static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {} static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; } static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { return (int)(base_value << shift); @@ -162,19 +158,15 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) { static const int kNumIterations = 6; static const int kMinDimensionIterativeConversion = 4; -// We use a-priori a different precision for storing RGB and Y/W components -// We could use YFIX=0 and only uint8_t for fixed_y_t, but it produces some +// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some // banding sometimes. Better use extra precision. -// TODO(skal): cleanup once TFIX/YFIX values are fixed. +#define SFIX 2 // fixed-point precision of RGB and Y/W +typedef int16_t fixed_t; // signed type with extra SFIX precision for UV +typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W -typedef int16_t fixed_t; // signed type with extra TFIX precision for UV -typedef uint16_t fixed_y_t; // unsigned type with extra YFIX precision for W -#define TFIX 6 // fixed-point precision of RGB -#define YFIX 2 // fixed point precision for Y/W - -#define THALF ((1 << TFIX) >> 1) -#define MAX_Y_T ((256 << YFIX) - 1) -#define TROUNDER (1 << (YUV_FIX + TFIX - 1)) +#define SHALF (1 << SFIX >> 1) +#define MAX_Y_T ((256 << SFIX) - 1) +#define SROUNDER (1 << (YUV_FIX + SFIX - 1)) #if defined(USE_GAMMA_COMPRESSION) @@ -184,9 +176,9 @@ typedef uint16_t fixed_y_t; // unsigned type with extra YFIX precision for W #define kGammaF 2.2 static float kGammaToLinearTabF[MAX_Y_T + 1]; // size scales with Y_FIX static float kLinearToGammaTabF[kGammaTabSize + 2]; -static int kGammaTablesFOk = 0; +static volatile int kGammaTablesFOk = 0; -static void InitGammaTablesF(void) { +static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) { if (!kGammaTablesFOk) { int v; const double norm = 1. / MAX_Y_T; @@ -207,52 +199,31 @@ static WEBP_INLINE float GammaToLinearF(int v) { return kGammaToLinearTabF[v]; } -static WEBP_INLINE float LinearToGammaF(float value) { +static WEBP_INLINE int LinearToGammaF(float value) { const float v = value * kGammaTabSize; const int tab_pos = (int)v; const float x = v - (float)tab_pos; // fractional part const float v0 = kLinearToGammaTabF[tab_pos + 0]; const float v1 = kLinearToGammaTabF[tab_pos + 1]; const float y = v1 * x + v0 * (1.f - x); // interpolate - return y; + return (int)(y + .5); } #else -static void InitGammaTablesF(void) {} +static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {} static WEBP_INLINE float GammaToLinearF(int v) { const float norm = 1.f / MAX_Y_T; return norm * v; } -static WEBP_INLINE float LinearToGammaF(float value) { - return MAX_Y_T * value; +static WEBP_INLINE int LinearToGammaF(float value) { + return (int)(MAX_Y_T * value + .5); } #endif // USE_GAMMA_COMPRESSION //------------------------------------------------------------------------------ -// precision: YFIX -> TFIX -static WEBP_INLINE int FixedYToW(int v) { -#if TFIX == YFIX - return v; -#elif TFIX >= YFIX - return v << (TFIX - YFIX); -#else - return v >> (YFIX - TFIX); -#endif -} - -static WEBP_INLINE int FixedWToY(int v) { -#if TFIX == YFIX - return v; -#elif YFIX >= TFIX - return v << (YFIX - TFIX); -#else - return v >> (TFIX - YFIX); -#endif -} - static uint8_t clip_8b(fixed_t v) { return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; } @@ -261,13 +232,6 @@ static fixed_y_t clip_y(int y) { return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T; } -// precision: TFIX -> YFIX -static fixed_y_t clip_fixed_t(fixed_t v) { - const int y = FixedWToY(v); - const fixed_y_t w = clip_y(y); - return w; -} - //------------------------------------------------------------------------------ static int RGBToGray(int r, int g, int b) { @@ -279,7 +243,7 @@ static float RGBToGrayF(float r, float g, float b) { return 0.299f * r + 0.587f * g + 0.114f * b; } -static float ScaleDown(int a, int b, int c, int d) { +static int ScaleDown(int a, int b, int c, int d) { const float A = GammaToLinearF(a); const float B = GammaToLinearF(b); const float C = GammaToLinearF(c); @@ -293,30 +257,36 @@ static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int len) { const float G = GammaToLinearF(src[1]); const float B = GammaToLinearF(src[2]); const float Y = RGBToGrayF(R, G, B); - *dst++ = (fixed_y_t)(LinearToGammaF(Y) + .5); + *dst++ = (fixed_y_t)LinearToGammaF(Y); src += 3; } } -static WEBP_INLINE void UpdateChroma(const fixed_y_t* src1, - const fixed_y_t* src2, - fixed_t* dst, fixed_y_t* tmp, int len) { +static int UpdateChroma(const fixed_y_t* src1, + const fixed_y_t* src2, + fixed_t* dst, fixed_y_t* tmp, int len) { + int diff = 0; while (len--> 0) { - const float r = ScaleDown(src1[0], src1[3], src2[0], src2[3]); - const float g = ScaleDown(src1[1], src1[4], src2[1], src2[4]); - const float b = ScaleDown(src1[2], src1[5], src2[2], src2[5]); - const float W = RGBToGrayF(r, g, b); - dst[0] = (fixed_t)FixedYToW((int)(r - W)); - dst[1] = (fixed_t)FixedYToW((int)(g - W)); - dst[2] = (fixed_t)FixedYToW((int)(b - W)); + const int r = ScaleDown(src1[0], src1[3], src2[0], src2[3]); + const int g = ScaleDown(src1[1], src1[4], src2[1], src2[4]); + const int b = ScaleDown(src1[2], src1[5], src2[2], src2[5]); + const int W = RGBToGray(r, g, b); + const int r_avg = (src1[0] + src1[3] + src2[0] + src2[3] + 2) >> 2; + const int g_avg = (src1[1] + src1[4] + src2[1] + src2[4] + 2) >> 2; + const int b_avg = (src1[2] + src1[5] + src2[2] + src2[5] + 2) >> 2; + dst[0] = (fixed_t)(r - W); + dst[1] = (fixed_t)(g - W); + dst[2] = (fixed_t)(b - W); dst += 3; src1 += 6; src2 += 6; if (tmp != NULL) { - tmp[0] = tmp[1] = clip_y((int)(W + .5)); + tmp[0] = tmp[1] = clip_y(W); tmp += 2; } + diff += abs(RGBToGray(r_avg, g_avg, b_avg) - W); } + return diff; } //------------------------------------------------------------------------------ @@ -336,9 +306,8 @@ static WEBP_INLINE int Filter2(int A, int B) { return (A * 3 + B + 2) >> 2; } //------------------------------------------------------------------------------ -// 8bit -> YFIX -static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { - return ((fixed_y_t)a << YFIX) | (1 << (YFIX - 1)); +static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX + return ((fixed_y_t)a << SFIX) | SHALF; } static void ImportOneRow(const uint8_t* const r_ptr, @@ -368,50 +337,48 @@ static void InterpolateTwoRows(const fixed_y_t* const best_y, fixed_y_t* const out2) { int i, k; { // special boundary case for i==0 - const int W0 = FixedYToW(best_y[0]); - const int W1 = FixedYToW(best_y[w]); + const int W0 = best_y[0]; + const int W1 = best_y[w]; for (k = 0; k <= 2; ++k) { - out1[k] = clip_fixed_t(Filter2(cur_uv[k], prev_uv[k]) + W0); - out2[k] = clip_fixed_t(Filter2(cur_uv[k], next_uv[k]) + W1); + out1[k] = clip_y(Filter2(cur_uv[k], prev_uv[k]) + W0); + out2[k] = clip_y(Filter2(cur_uv[k], next_uv[k]) + W1); } } for (i = 1; i < w - 1; ++i) { - const int W0 = FixedYToW(best_y[i + 0]); - const int W1 = FixedYToW(best_y[i + w]); + const int W0 = best_y[i + 0]; + const int W1 = best_y[i + w]; const int off = 3 * (i >> 1); for (k = 0; k <= 2; ++k) { const int tmp0 = Filter(cur_uv + off + k, prev_uv + off + k, i & 1); const int tmp1 = Filter(cur_uv + off + k, next_uv + off + k, i & 1); - out1[3 * i + k] = clip_fixed_t(tmp0 + W0); - out2[3 * i + k] = clip_fixed_t(tmp1 + W1); + out1[3 * i + k] = clip_y(tmp0 + W0); + out2[3 * i + k] = clip_y(tmp1 + W1); } } { // special boundary case for i == w - 1 - const int W0 = FixedYToW(best_y[i + 0]); - const int W1 = FixedYToW(best_y[i + w]); + const int W0 = best_y[i + 0]; + const int W1 = best_y[i + w]; const int off = 3 * (i >> 1); for (k = 0; k <= 2; ++k) { - out1[3 * i + k] = - clip_fixed_t(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0); - out2[3 * i + k] = - clip_fixed_t(Filter2(cur_uv[off + k], next_uv[off + k]) + W1); + out1[3 * i + k] = clip_y(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0); + out2[3 * i + k] = clip_y(Filter2(cur_uv[off + k], next_uv[off + k]) + W1); } } } static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) { - const int luma = 16839 * r + 33059 * g + 6420 * b + TROUNDER; - return clip_8b(16 + (luma >> (YUV_FIX + TFIX))); + const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER; + return clip_8b(16 + (luma >> (YUV_FIX + SFIX))); } static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) { - const int u = -9719 * r - 19081 * g + 28800 * b + TROUNDER; - return clip_8b(128 + (u >> (YUV_FIX + TFIX))); + const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER; + return clip_8b(128 + (u >> (YUV_FIX + SFIX))); } static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) { - const int v = +28800 * r - 24116 * g - 4684 * b + TROUNDER; - return clip_8b(128 + (v >> (YUV_FIX + TFIX))); + const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER; + return clip_8b(128 + (v >> (YUV_FIX + SFIX))); } static int ConvertWRGBToYUV(const fixed_y_t* const best_y, @@ -426,7 +393,7 @@ static int ConvertWRGBToYUV(const fixed_y_t* const best_y, for (i = 0; i < picture->width; ++i) { const int off = 3 * ((i >> 1) + (j >> 1) * uv_w); const int off2 = i + j * picture->y_stride; - const int W = FixedYToW(best_y[i + j * w]); + const int W = best_y[i + j * w]; const int r = best_uv[off + 0] + W; const int g = best_uv[off + 1] + W; const int b = best_uv[off + 2] + W; @@ -475,6 +442,10 @@ static int PreprocessARGB(const uint8_t* const r_ptr, fixed_t* const target_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); int ok; + int diff_sum = 0; + const int first_diff_threshold = (int)(2.5 * w * h); + const int min_improvement = 5; // stop if improvement is below this % + const int min_first_improvement = 80; if (best_y == NULL || best_uv == NULL || target_y == NULL || target_uv == NULL || @@ -507,7 +478,7 @@ static int PreprocessARGB(const uint8_t* const r_ptr, } UpdateW(src1, target_y + (j + 0) * w, w); UpdateW(src2, target_y + (j + 1) * w, w); - UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w); + diff_sum += UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w); memcpy(best_uv + uv_off, target_uv + uv_off, 3 * uv_w * sizeof(*best_uv)); memcpy(dst_y + w, dst_y, w * sizeof(*dst_y)); } @@ -517,10 +488,11 @@ static int PreprocessARGB(const uint8_t* const r_ptr, int k; const fixed_t* cur_uv = best_uv; const fixed_t* prev_uv = best_uv; + const int old_diff_sum = diff_sum; + diff_sum = 0; for (j = 0; j < h; j += 2) { fixed_y_t* const src1 = tmp_buffer; fixed_y_t* const src2 = tmp_buffer + 3 * w; - { const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); InterpolateTwoRows(best_y + j * w, prev_uv, cur_uv, next_uv, @@ -531,7 +503,7 @@ static int PreprocessARGB(const uint8_t* const r_ptr, UpdateW(src1, best_rgb_y + 0 * w, w); UpdateW(src2, best_rgb_y + 1 * w, w); - UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w); + diff_sum += UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w); // update two rows of Y and one row of RGB for (i = 0; i < 2 * w; ++i) { @@ -553,7 +525,23 @@ static int PreprocessARGB(const uint8_t* const r_ptr, } } } - // TODO(skal): add early-termination criterion + // test exit condition + if (diff_sum > 0) { + const int improvement = 100 * abs(diff_sum - old_diff_sum) / diff_sum; + // Check if first iteration gave good result already, without a large + // jump of improvement (otherwise it means we need to try few extra + // iterations, just to be sure). + if (iter == 0 && diff_sum < first_diff_threshold && + improvement < min_first_improvement) { + break; + } + // then, check if improvement is stalling. + if (improvement < min_improvement) { + break; + } + } else { + break; + } } // final reconstruction @@ -762,23 +750,20 @@ static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr, int width, VP8Random* const rg) { int i, j; - for (i = 0, j = 0; i < width; ++i, j += step) { + for (i = 0, j = 0; i < width; i += 1, j += step) { dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg); } } -static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr, - const uint8_t* const g_ptr, - const uint8_t* const b_ptr, - const uint8_t* const a_ptr, - int rgb_stride, - uint8_t* const dst_u, - uint8_t* const dst_v, - int width, - VP8Random* const rg) { +static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + const uint8_t* const a_ptr, + int rgb_stride, + uint16_t* dst, int width) { int i, j; - // we loop over 2x2 blocks and produce one U/V value for each. - for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * sizeof(uint32_t)) { + // we loop over 2x2 blocks and produce one R/G/B/A value for each. + for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) { const uint32_t a = SUM4ALPHA(a_ptr + j); int r, g, b; if (a == 4 * 0xff || a == 0) { @@ -790,8 +775,10 @@ static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr, g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride); b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride); } - dst_u[i] = RGBToU(r, g, b, rg); - dst_v[i] = RGBToV(r, g, b, rg); + dst[0] = r; + dst[1] = g; + dst[2] = b; + dst[3] = a; } if (width & 1) { const uint32_t a = 2u * SUM2ALPHA(a_ptr + j); @@ -805,31 +792,39 @@ static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr, g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride); b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride); } - dst_u[i] = RGBToU(r, g, b, rg); - dst_v[i] = RGBToV(r, g, b, rg); + dst[0] = r; + dst[1] = g; + dst[2] = b; + dst[3] = a; + } +} + +static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, int rgb_stride, + uint16_t* dst, int width) { + int i, j; + for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) { + dst[0] = SUM4(r_ptr + j, step); + dst[1] = SUM4(g_ptr + j, step); + dst[2] = SUM4(b_ptr + j, step); + } + if (width & 1) { + dst[0] = SUM2(r_ptr + j); + dst[1] = SUM2(g_ptr + j); + dst[2] = SUM2(b_ptr + j); } } -static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr, - const uint8_t* const g_ptr, - const uint8_t* const b_ptr, - int step, int rgb_stride, +static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb, uint8_t* const dst_u, uint8_t* const dst_v, int width, VP8Random* const rg) { - int i, j; - for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * step) { - const int r = SUM4(r_ptr + j, step); - const int g = SUM4(g_ptr + j, step); - const int b = SUM4(b_ptr + j, step); - dst_u[i] = RGBToU(r, g, b, rg); - dst_v[i] = RGBToV(r, g, b, rg); - } - if (width & 1) { - const int r = SUM2(r_ptr + j); - const int g = SUM2(g_ptr + j); - const int b = SUM2(b_ptr + j); + int i; + for (i = 0; i < width; i += 1, rgb += 4) { + const int r = rgb[0], g = rgb[1], b = rgb[2]; dst_u[i] = RGBToU(r, g, b, rg); dst_v[i] = RGBToV(r, g, b, rg); } @@ -848,6 +843,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, const int width = picture->width; const int height = picture->height; const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride); + const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420; picture->use_argb = 0; @@ -864,7 +860,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, if (has_alpha) { WebPInitAlphaProcessing(); assert(step == 4); -#if defined(USE_INVERSE_ALPHA_TABLE) +#if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE) assert(kAlphaFix + kGammaFix <= 31); #endif } @@ -879,6 +875,11 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, picture->a, picture->a_stride); } } else { + const int uv_width = (width + 1) >> 1; + int use_dsp = (step == 3); // use special function in this case + // temporary storage for accumulated R/G/B values during conversion to U/V + uint16_t* const tmp_rgb = + (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb)); uint8_t* dst_y = picture->y; uint8_t* dst_u = picture->u; uint8_t* dst_v = picture->v; @@ -889,19 +890,32 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, if (dithering > 0.) { VP8InitRandom(&base_rg, dithering); rg = &base_rg; + use_dsp = 0; // can't use dsp in this case } - + WebPInitConvertARGBToYUV(); InitGammaTables(); + if (tmp_rgb == NULL) return 0; // malloc error + // Downsample Y/U/V planes, two rows at a time for (y = 0; y < (height >> 1); ++y) { int rows_have_alpha = has_alpha; const int off1 = (2 * y + 0) * rgb_stride; const int off2 = (2 * y + 1) * rgb_stride; - ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step, - dst_y, width, rg); - ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step, - dst_y + picture->y_stride, width, rg); + if (use_dsp) { + if (is_rgb) { + WebPConvertRGB24ToY(r_ptr + off1, dst_y, width); + WebPConvertRGB24ToY(r_ptr + off2, dst_y + picture->y_stride, width); + } else { + WebPConvertBGR24ToY(b_ptr + off1, dst_y, width); + WebPConvertBGR24ToY(b_ptr + off2, dst_y + picture->y_stride, width); + } + } else { + ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step, + dst_y, width, rg); + ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step, + dst_y + picture->y_stride, width, rg); + } dst_y += 2 * picture->y_stride; if (has_alpha) { rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride, @@ -909,13 +923,19 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, dst_a, picture->a_stride); dst_a += 2 * picture->a_stride; } + // Collect averaged R/G/B(/A) if (!rows_have_alpha) { - ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1, - step, rgb_stride, dst_u, dst_v, width, rg); + AccumulateRGB(r_ptr + off1, g_ptr + off1, b_ptr + off1, + step, rgb_stride, tmp_rgb, width); + } else { + AccumulateRGBA(r_ptr + off1, g_ptr + off1, b_ptr + off1, a_ptr + off1, + rgb_stride, tmp_rgb, width); + } + // Convert to U/V + if (rg == NULL) { + WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width); } else { - ConvertRowsToUVWithAlpha(r_ptr + off1, g_ptr + off1, b_ptr + off1, - a_ptr + off1, rgb_stride, - dst_u, dst_v, width, rg); + ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg); } dst_u += picture->uv_stride; dst_v += picture->uv_stride; @@ -923,20 +943,35 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, if (height & 1) { // extra last row const int off = 2 * y * rgb_stride; int row_has_alpha = has_alpha; - ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step, - dst_y, width, rg); + if (use_dsp) { + if (r_ptr < b_ptr) { + WebPConvertRGB24ToY(r_ptr + off, dst_y, width); + } else { + WebPConvertBGR24ToY(b_ptr + off, dst_y, width); + } + } else { + ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step, + dst_y, width, rg); + } if (row_has_alpha) { row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0); } + // Collect averaged R/G/B(/A) if (!row_has_alpha) { - ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off, - step, 0, dst_u, dst_v, width, rg); + // Collect averaged R/G/B + AccumulateRGB(r_ptr + off, g_ptr + off, b_ptr + off, + step, /* rgb_stride = */ 0, tmp_rgb, width); + } else { + AccumulateRGBA(r_ptr + off, g_ptr + off, b_ptr + off, a_ptr + off, + /* rgb_stride = */ 0, tmp_rgb, width); + } + if (rg == NULL) { + WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width); } else { - ConvertRowsToUVWithAlpha(r_ptr + off, g_ptr + off, b_ptr + off, - a_ptr + off, 0, - dst_u, dst_v, width, rg); + ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg); } } + WebPSafeFree(tmp_rgb); } return 1; } @@ -978,11 +1013,9 @@ int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) { return PictureARGBToYUVA(picture, colorspace, 0.f, 0); } -#if WEBP_ENCODER_ABI_VERSION > 0x0204 int WebPPictureSmartARGBToYUVA(WebPPicture* picture) { return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1); } -#endif //------------------------------------------------------------------------------ // call for YUVA -> ARGB conversion @@ -1066,14 +1099,23 @@ static int Import(WebPPicture* const picture, } if (!WebPPictureAlloc(picture)) return 0; - assert(step >= (import_alpha ? 4 : 3)); - for (y = 0; y < height; ++y) { - uint32_t* const dst = &picture->argb[y * picture->argb_stride]; - int x; - for (x = 0; x < width; ++x) { - const int offset = step * x + y * rgb_stride; - dst[x] = MakeARGB32(import_alpha ? a_ptr[offset] : 0xff, - r_ptr[offset], g_ptr[offset], b_ptr[offset]); + VP8EncDspARGBInit(); + + if (import_alpha) { + assert(step == 4); + for (y = 0; y < height; ++y) { + uint32_t* const dst = &picture->argb[y * picture->argb_stride]; + const int offset = y * rgb_stride; + VP8PackARGB(a_ptr + offset, r_ptr + offset, g_ptr + offset, + b_ptr + offset, width, dst); + } + } else { + assert(step >= 3); + for (y = 0; y < height; ++y) { + uint32_t* const dst = &picture->argb[y * picture->argb_stride]; + const int offset = y * rgb_stride; + VP8PackRGB(r_ptr + offset, g_ptr + offset, b_ptr + offset, + width, step, dst); } } return 1; diff --git a/src/3rdparty/libwebp/src/enc/picture_psnr.c b/src/3rdparty/libwebp/src/enc/picture_psnr.c index 2254b7e..40214ef 100644 --- a/src/3rdparty/libwebp/src/enc/picture_psnr.c +++ b/src/3rdparty/libwebp/src/enc/picture_psnr.c @@ -12,8 +12,10 @@ // Author: Skal (pascal.massimino@gmail.com) #include <math.h> +#include <stdlib.h> #include "./vp8enci.h" +#include "../utils/utils.h" //------------------------------------------------------------------------------ // local-min distortion @@ -23,9 +25,9 @@ #define RADIUS 2 // search radius. Shouldn't be too large. -static float AccumulateLSIM(const uint8_t* src, int src_stride, - const uint8_t* ref, int ref_stride, - int w, int h) { +static void AccumulateLSIM(const uint8_t* src, int src_stride, + const uint8_t* ref, int ref_stride, + int w, int h, DistoStats* stats) { int x, y; double total_sse = 0.; for (y = 0; y < h; ++y) { @@ -38,16 +40,22 @@ static float AccumulateLSIM(const uint8_t* src, int src_stride, const double value = (double)ref[y * ref_stride + x]; int i, j; for (j = y_0; j < y_1; ++j) { - const uint8_t* s = src + j * src_stride; + const uint8_t* const s = src + j * src_stride; for (i = x_0; i < x_1; ++i) { - const double sse = (double)(s[i] - value) * (s[i] - value); + const double diff = s[i] - value; + const double sse = diff * diff; if (sse < best_sse) best_sse = sse; } } total_sse += best_sse; } } - return (float)total_sse; + stats->w = w * h; + stats->xm = 0; + stats->ym = 0; + stats->xxm = total_sse; + stats->yym = 0; + stats->xxm = 0; } #undef RADIUS @@ -64,73 +72,90 @@ static float GetPSNR(const double v) { int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, int type, float result[5]) { DistoStats stats[5]; - int has_alpha; - int uv_w, uv_h; + int w, h; + + memset(stats, 0, sizeof(stats)); if (src == NULL || ref == NULL || src->width != ref->width || src->height != ref->height || - src->y == NULL || ref->y == NULL || - src->u == NULL || ref->u == NULL || - src->v == NULL || ref->v == NULL || - result == NULL) { - return 0; - } - // TODO(skal): provide distortion for ARGB too. - if (src->use_argb == 1 || src->use_argb != ref->use_argb) { - return 0; - } - - has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT); - if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) || - (has_alpha && (src->a == NULL || ref->a == NULL))) { + src->use_argb != ref->use_argb || result == NULL) { return 0; } + w = src->width; + h = src->height; - memset(stats, 0, sizeof(stats)); + if (src->use_argb == 1) { + if (src->argb == NULL || ref->argb == NULL) { + return 0; + } else { + int i, j, c; + uint8_t* tmp1, *tmp2; + uint8_t* const tmp_plane = + (uint8_t*)WebPSafeMalloc(2ULL * w * h, sizeof(*tmp_plane)); + if (tmp_plane == NULL) return 0; + tmp1 = tmp_plane; + tmp2 = tmp_plane + w * h; + for (c = 0; c < 4; ++c) { + for (j = 0; j < h; ++j) { + for (i = 0; i < w; ++i) { + tmp1[j * w + i] = src->argb[i + j * src->argb_stride] >> (c * 8); + tmp2[j * w + i] = ref->argb[i + j * ref->argb_stride] >> (c * 8); + } + } + if (type >= 2) { + AccumulateLSIM(tmp1, w, tmp2, w, w, h, &stats[c]); + } else { + VP8SSIMAccumulatePlane(tmp1, w, tmp2, w, w, h, &stats[c]); + } + } + free(tmp_plane); + } + } else { + int has_alpha, uv_w, uv_h; + if (src->y == NULL || ref->y == NULL || + src->u == NULL || ref->u == NULL || + src->v == NULL || ref->v == NULL) { + return 0; + } + has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT); + if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) || + (has_alpha && (src->a == NULL || ref->a == NULL))) { + return 0; + } - uv_w = (src->width + 1) >> 1; - uv_h = (src->height + 1) >> 1; - if (type >= 2) { - float sse[4]; - sse[0] = AccumulateLSIM(src->y, src->y_stride, - ref->y, ref->y_stride, src->width, src->height); - sse[1] = AccumulateLSIM(src->u, src->uv_stride, - ref->u, ref->uv_stride, uv_w, uv_h); - sse[2] = AccumulateLSIM(src->v, src->uv_stride, - ref->v, ref->uv_stride, uv_w, uv_h); - sse[3] = has_alpha ? AccumulateLSIM(src->a, src->a_stride, - ref->a, ref->a_stride, - src->width, src->height) - : 0.f; - result[0] = GetPSNR(sse[0] / (src->width * src->height)); - result[1] = GetPSNR(sse[1] / (uv_w * uv_h)); - result[2] = GetPSNR(sse[2] / (uv_w * uv_h)); - result[3] = GetPSNR(sse[3] / (src->width * src->height)); - { - double total_sse = sse[0] + sse[1] + sse[2]; - int total_pixels = src->width * src->height + 2 * uv_w * uv_h; + uv_w = (src->width + 1) >> 1; + uv_h = (src->height + 1) >> 1; + if (type >= 2) { + AccumulateLSIM(src->y, src->y_stride, ref->y, ref->y_stride, + w, h, &stats[0]); + AccumulateLSIM(src->u, src->uv_stride, ref->u, ref->uv_stride, + uv_w, uv_h, &stats[1]); + AccumulateLSIM(src->v, src->uv_stride, ref->v, ref->uv_stride, + uv_w, uv_h, &stats[2]); if (has_alpha) { - total_pixels += src->width * src->height; - total_sse += sse[3]; + AccumulateLSIM(src->a, src->a_stride, ref->a, ref->a_stride, + w, h, &stats[3]); + } + } else { + VP8SSIMAccumulatePlane(src->y, src->y_stride, + ref->y, ref->y_stride, + w, h, &stats[0]); + VP8SSIMAccumulatePlane(src->u, src->uv_stride, + ref->u, ref->uv_stride, + uv_w, uv_h, &stats[1]); + VP8SSIMAccumulatePlane(src->v, src->uv_stride, + ref->v, ref->uv_stride, + uv_w, uv_h, &stats[2]); + if (has_alpha) { + VP8SSIMAccumulatePlane(src->a, src->a_stride, + ref->a, ref->a_stride, + w, h, &stats[3]); } - result[4] = GetPSNR(total_sse / total_pixels); } - } else { + } + // Final stat calculations. + { int c; - VP8SSIMAccumulatePlane(src->y, src->y_stride, - ref->y, ref->y_stride, - src->width, src->height, &stats[0]); - VP8SSIMAccumulatePlane(src->u, src->uv_stride, - ref->u, ref->uv_stride, - uv_w, uv_h, &stats[1]); - VP8SSIMAccumulatePlane(src->v, src->uv_stride, - ref->v, ref->uv_stride, - uv_w, uv_h, &stats[2]); - if (has_alpha) { - VP8SSIMAccumulatePlane(src->a, src->a_stride, - ref->a, ref->a_stride, - src->width, src->height, &stats[3]); - } for (c = 0; c <= 4; ++c) { if (type == 1) { const double v = VP8SSIMGet(&stats[c]); diff --git a/src/3rdparty/libwebp/src/enc/picture_rescale.c b/src/3rdparty/libwebp/src/enc/picture_rescale.c index 9e45551..9f19e8e 100644 --- a/src/3rdparty/libwebp/src/enc/picture_rescale.c +++ b/src/3rdparty/libwebp/src/enc/picture_rescale.c @@ -30,16 +30,6 @@ static void PictureGrabSpecs(const WebPPicture* const src, } //------------------------------------------------------------------------------ -// Picture copying - -static void CopyPlane(const uint8_t* src, int src_stride, - uint8_t* dst, int dst_stride, int width, int height) { - while (height-- > 0) { - memcpy(dst, src, width); - src += src_stride; - dst += dst_stride; - } -} // Adjust top-left corner to chroma sample position. static void SnapTopLeftPosition(const WebPPicture* const pic, @@ -70,20 +60,20 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { if (!WebPPictureAlloc(dst)) return 0; if (!src->use_argb) { - CopyPlane(src->y, src->y_stride, - dst->y, dst->y_stride, dst->width, dst->height); - CopyPlane(src->u, src->uv_stride, - dst->u, dst->uv_stride, HALVE(dst->width), HALVE(dst->height)); - CopyPlane(src->v, src->uv_stride, - dst->v, dst->uv_stride, HALVE(dst->width), HALVE(dst->height)); + WebPCopyPlane(src->y, src->y_stride, + dst->y, dst->y_stride, dst->width, dst->height); + WebPCopyPlane(src->u, src->uv_stride, dst->u, dst->uv_stride, + HALVE(dst->width), HALVE(dst->height)); + WebPCopyPlane(src->v, src->uv_stride, dst->v, dst->uv_stride, + HALVE(dst->width), HALVE(dst->height)); if (dst->a != NULL) { - CopyPlane(src->a, src->a_stride, - dst->a, dst->a_stride, dst->width, dst->height); + WebPCopyPlane(src->a, src->a_stride, + dst->a, dst->a_stride, dst->width, dst->height); } } else { - CopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride, - (uint8_t*)dst->argb, 4 * dst->argb_stride, - 4 * dst->width, dst->height); + WebPCopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride, + (uint8_t*)dst->argb, 4 * dst->argb_stride, + 4 * dst->width, dst->height); } return 1; } @@ -144,24 +134,23 @@ int WebPPictureCrop(WebPPicture* pic, if (!pic->use_argb) { const int y_offset = top * pic->y_stride + left; const int uv_offset = (top / 2) * pic->uv_stride + left / 2; - CopyPlane(pic->y + y_offset, pic->y_stride, - tmp.y, tmp.y_stride, width, height); - CopyPlane(pic->u + uv_offset, pic->uv_stride, - tmp.u, tmp.uv_stride, HALVE(width), HALVE(height)); - CopyPlane(pic->v + uv_offset, pic->uv_stride, - tmp.v, tmp.uv_stride, HALVE(width), HALVE(height)); + WebPCopyPlane(pic->y + y_offset, pic->y_stride, + tmp.y, tmp.y_stride, width, height); + WebPCopyPlane(pic->u + uv_offset, pic->uv_stride, + tmp.u, tmp.uv_stride, HALVE(width), HALVE(height)); + WebPCopyPlane(pic->v + uv_offset, pic->uv_stride, + tmp.v, tmp.uv_stride, HALVE(width), HALVE(height)); if (tmp.a != NULL) { const int a_offset = top * pic->a_stride + left; - CopyPlane(pic->a + a_offset, pic->a_stride, - tmp.a, tmp.a_stride, width, height); + WebPCopyPlane(pic->a + a_offset, pic->a_stride, + tmp.a, tmp.a_stride, width, height); } } else { const uint8_t* const src = (const uint8_t*)(pic->argb + top * pic->argb_stride + left); - CopyPlane(src, pic->argb_stride * 4, - (uint8_t*)tmp.argb, tmp.argb_stride * 4, - width * 4, height); + WebPCopyPlane(src, pic->argb_stride * 4, (uint8_t*)tmp.argb, + tmp.argb_stride * 4, width * 4, height); } WebPPictureFree(pic); *pic = tmp; @@ -210,16 +199,10 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) { if (pic == NULL) return 0; prev_width = pic->width; prev_height = pic->height; - // if width is unspecified, scale original proportionally to height ratio. - if (width == 0) { - width = (prev_width * height + prev_height / 2) / prev_height; + if (!WebPRescalerGetScaledDimensions( + prev_width, prev_height, &width, &height)) { + return 0; } - // if height is unspecified, scale original proportionally to width ratio. - if (height == 0) { - height = (prev_height * width + prev_width / 2) / prev_width; - } - // Check if the overall dimensions still make sense. - if (width <= 0 || height <= 0) return 0; PictureGrabSpecs(pic, &tmp); tmp.width = width; diff --git a/src/3rdparty/libwebp/src/enc/picture_tools.c b/src/3rdparty/libwebp/src/enc/picture_tools.c index 7c73646..bf97af8 100644 --- a/src/3rdparty/libwebp/src/enc/picture_tools.c +++ b/src/3rdparty/libwebp/src/enc/picture_tools.c @@ -11,6 +11,8 @@ // // Author: Skal (pascal.massimino@gmail.com) +#include <assert.h> + #include "./vp8enci.h" #include "../dsp/yuv.h" @@ -120,6 +122,24 @@ void WebPCleanupTransparentArea(WebPPicture* pic) { #undef SIZE #undef SIZE2 +void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) { + int x, y, w, h; + uint32_t* argb; + assert(pic != NULL && pic->use_argb); + w = pic->width; + h = pic->height; + argb = pic->argb; + + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) { + if ((argb[x] & 0xff000000) == 0) { + argb[x] = 0x00000000; + } + } + argb += pic->argb_stride; + } +} + //------------------------------------------------------------------------------ // Blend color and remove transparency info diff --git a/src/3rdparty/libwebp/src/enc/quant.c b/src/3rdparty/libwebp/src/enc/quant.c index 9130a41..dd6885a 100644 --- a/src/3rdparty/libwebp/src/enc/quant.c +++ b/src/3rdparty/libwebp/src/enc/quant.c @@ -30,7 +30,7 @@ #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP // power-law modulation. Must be strictly less than 1. -#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision +#define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision // number of non-zero coeffs below which we consider the block very flat // (and apply a penalty to complex predictions) @@ -41,6 +41,8 @@ #define MULT_8B(a, b) (((a) * (b) + 128) >> 8) +#define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda) + // #define DEBUG_BLOCK //------------------------------------------------------------------------------ @@ -54,15 +56,37 @@ static void PrintBlockInfo(const VP8EncIterator* const it, const VP8ModeScore* const rd) { int i, j; const int is_i16 = (it->mb_->type_ == 1); + const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC; + const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC; + const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC; + const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC; printf("SOURCE / OUTPUT / ABS DELTA\n"); - for (j = 0; j < 24; ++j) { - if (j == 16) printf("\n"); // newline before the U/V block - for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]); + for (j = 0; j < 16; ++j) { + for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]); printf(" "); - for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]); + for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]); printf(" "); for (i = 0; i < 16; ++i) { - printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS])); + printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS])); + } + printf("\n"); + } + printf("\n"); // newline before the U/V block + for (j = 0; j < 8; ++j) { + for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]); + printf(" "); + for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]); + printf(" "); + for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]); + printf(" "); + for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]); + printf(" "); + for (i = 0; i < 8; ++i) { + printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS])); + } + printf(" "); + for (i = 8; i < 16; ++i) { + printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS])); } printf("\n"); } @@ -444,15 +468,12 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) { // Quantize // Layout: -// +----+ -// |YYYY| 0 -// |YYYY| 4 -// |YYYY| 8 -// |YYYY| 12 -// +----+ -// |UUVV| 16 -// |UUVV| 20 -// +----+ +// +----+----+ +// |YYYY|UUVV| 0 +// |YYYY|UUVV| 4 +// |YYYY|....| 8 +// |YYYY|....| 12 +// +----+----+ const int VP8Scan[16] = { // Luma 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, @@ -538,13 +559,12 @@ typedef struct { #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA]) static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) { - // TODO: incorporate the "* 256" in the tables? - rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD); + rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD); } static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, score_t distortion) { - return rate * lambda + 256 * distortion; + return rate * lambda + RD_DISTO_MULT * distortion; } static int TrellisQuantizeBlock(const VP8Encoder* const enc, @@ -553,7 +573,8 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, const VP8Matrix* const mtx, int lambda) { const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; - const CostArray* const costs = enc->proba_.level_cost_[coeff_type]; + CostArrayPtr const costs = + (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type]; const int first = (coeff_type == 0) ? 1 : 0; Node nodes[16][NUM_NODES]; ScoreState score_states[2][NUM_NODES]; @@ -590,7 +611,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0; ss_cur[m].score = RDScoreTrellis(lambda, rate, 0); - ss_cur[m].costs = costs[VP8EncBands[first]][ctx0]; + ss_cur[m].costs = costs[first][ctx0]; } } @@ -624,7 +645,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, int best_prev = 0; // default, in case ss_cur[m].score = MAX_COST; - ss_cur[m].costs = costs[band][ctx]; + ss_cur[m].costs = costs[n + 1][ctx]; if (level > MAX_LEVEL || level < 0) { // node is dead? continue; } @@ -719,14 +740,14 @@ static int ReconstructIntra16(VP8EncIterator* const it, int mode) { const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; - const uint8_t* const src = it->yuv_in_ + Y_OFF; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; int nz = 0; int n; int16_t tmp[16][16], dc_tmp[16]; - for (n = 0; n < 16; ++n) { - VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); + for (n = 0; n < 16; n += 2) { + VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); } VP8FTransformWHT(tmp[0], dc_tmp); nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24; @@ -746,12 +767,13 @@ static int ReconstructIntra16(VP8EncIterator* const it, } } } else { - for (n = 0; n < 16; ++n) { + for (n = 0; n < 16; n += 2) { // Zero-out the first coeff, so that: a) nz is correct below, and // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. - tmp[n][0] = 0; - nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; - assert(rd->y_ac_levels[n][0] == 0); + tmp[n][0] = tmp[n + 1][0] = 0; + nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; + assert(rd->y_ac_levels[n + 0][0] == 0); + assert(rd->y_ac_levels[n + 1][0] == 0); } } @@ -792,14 +814,14 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, uint8_t* const yuv_out, int mode) { const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; - const uint8_t* const src = it->yuv_in_ + U_OFF; + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; int nz = 0; int n; int16_t tmp[8][16]; - for (n = 0; n < 8; ++n) { - VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); + for (n = 0; n < 8; n += 2) { + VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); } if (DO_TRELLIS_UV && it->do_trellis_) { int ch, x, y; @@ -816,8 +838,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, } } } else { - for (n = 0; n < 8; ++n) { - nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; + for (n = 0; n < 8; n += 2) { + nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; } } @@ -842,6 +864,12 @@ static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) { if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v; } +static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) { + VP8ModeScore* const tmp = *a; + *a = *b; + *b = tmp; +} + static void SwapPtr(uint8_t** a, uint8_t** b) { uint8_t* const tmp = *a; *a = *b; @@ -865,46 +893,47 @@ static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) { return 1; } -static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) { +static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { const int kNumBlocks = 16; VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_i16_; const int tlambda = dqm->tlambda_; - const uint8_t* const src = it->yuv_in_ + Y_OFF; - VP8ModeScore rd16; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; + VP8ModeScore rd_tmp; + VP8ModeScore* rd_cur = &rd_tmp; + VP8ModeScore* rd_best = rd; int mode; rd->mode_i16 = -1; for (mode = 0; mode < NUM_PRED_MODES; ++mode) { - uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer - int nz; + uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer + rd_cur->mode_i16 = mode; // Reconstruct - nz = ReconstructIntra16(it, &rd16, tmp_dst, mode); + rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode); // Measure RD-score - rd16.D = VP8SSE16x16(src, tmp_dst); - rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) - : 0; - rd16.H = VP8FixedCostsI16[mode]; - rd16.R = VP8GetCostLuma16(it, &rd16); + rd_cur->D = VP8SSE16x16(src, tmp_dst); + rd_cur->SD = + tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0; + rd_cur->H = VP8FixedCostsI16[mode]; + rd_cur->R = VP8GetCostLuma16(it, rd_cur); if (mode > 0 && - IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { + IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { // penalty to avoid flat area to be mispredicted by complex mode - rd16.R += FLATNESS_PENALTY * kNumBlocks; + rd_cur->R += FLATNESS_PENALTY * kNumBlocks; } // Since we always examine Intra16 first, we can overwrite *rd directly. - SetRDScore(lambda, &rd16); - if (mode == 0 || rd16.score < rd->score) { - CopyScore(rd, &rd16); - rd->mode_i16 = mode; - rd->nz = nz; - memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels)); - memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels)); + SetRDScore(lambda, rd_cur); + if (mode == 0 || rd_cur->score < rd_best->score) { + SwapModeScore(&rd_cur, &rd_best); SwapOut(it); } } + if (rd_best != rd) { + memcpy(rd, rd_best, sizeof(*rd)); + } SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision. VP8SetIntra16Mode(it, rd->mode_i16); @@ -933,8 +962,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_i4_; const int tlambda = dqm->tlambda_; - const uint8_t* const src0 = it->yuv_in_ + Y_OFF; - uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF; + const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC; + uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC; int total_header_bits = 0; VP8ModeScore rd_best; @@ -972,17 +1001,28 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY)) : 0; rd_tmp.H = mode_costs[mode]; - rd_tmp.R = VP8GetCostLuma4(it, tmp_levels); + + // Add flatness penalty if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) { - rd_tmp.R += FLATNESS_PENALTY * kNumBlocks; + rd_tmp.R = FLATNESS_PENALTY * kNumBlocks; + } else { + rd_tmp.R = 0; } + // early-out check SetRDScore(lambda, &rd_tmp); + if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue; + + // finish computing score + rd_tmp.R += VP8GetCostLuma4(it, tmp_levels); + SetRDScore(lambda, &rd_tmp); + if (best_mode < 0 || rd_tmp.score < rd_i4.score) { CopyScore(&rd_i4, &rd_tmp); best_mode = mode; SwapPtr(&tmp_dst, &best_block); - memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels)); + memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, + sizeof(rd_best.y_ac_levels[it->i4_])); } } SetRDScore(dqm->lambda_mode_, &rd_i4); @@ -1016,9 +1056,10 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { const int kNumBlocks = 8; const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_uv_; - const uint8_t* const src = it->yuv_in_ + U_OFF; - uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer - uint8_t* const dst0 = it->yuv_out_ + U_OFF; + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; + uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer + uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC; + uint8_t* dst = dst0; VP8ModeScore rd_best; int mode; @@ -1032,7 +1073,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { // Compute RD-score rd_uv.D = VP8SSE16x8(src, tmp_dst); - rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas. + rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas. rd_uv.H = VP8FixedCostsUV[mode]; rd_uv.R = VP8GetCostUV(it, &rd_uv); if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) { @@ -1044,11 +1085,14 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { CopyScore(&rd_best, &rd_uv); rd->mode_uv = mode; memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); - memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ? + SwapPtr(&dst, &tmp_dst); } } VP8SetIntraUVMode(it, rd->mode_uv); AddScore(rd, &rd_best); + if (dst != dst0) { // copy 16x8 block if needed + VP8Copy16x8(dst, dst0); + } } //------------------------------------------------------------------------------ @@ -1060,35 +1104,41 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { int nz = 0; if (is_i16) { - nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]); + nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); } else { VP8IteratorStartI4(it); do { const int mode = it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_]; - const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; - uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_]; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; + uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_]; VP8MakeIntra4Preds(it); nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], src, dst, mode) << it->i4_; - } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF)); + } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC)); } - nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_); + nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); rd->nz = nz; } // Refine intra16/intra4 sub-modes based on distortion only (not rate). -static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { - const int is_i16 = (it->mb_->type_ == 1); +static void RefineUsingDistortion(VP8EncIterator* const it, + int try_both_modes, int refine_uv_mode, + VP8ModeScore* const rd) { score_t best_score = MAX_COST; + score_t score_i4 = (score_t)I4_PENALTY; + int16_t tmp_levels[16][16]; + uint8_t modes_i4[16]; + int nz = 0; + int mode; + int is_i16 = try_both_modes || (it->mb_->type_ == 1); - if (try_both_i4_i16 || is_i16) { - int mode; + if (is_i16) { // First, evaluate Intra16 distortion int best_mode = -1; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; for (mode = 0; mode < NUM_PRED_MODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; - const uint8_t* const src = it->yuv_in_ + Y_OFF; const score_t score = VP8SSE16x16(src, ref); if (score < best_score) { best_mode = mode; @@ -1096,39 +1146,72 @@ static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { } } VP8SetIntra16Mode(it, best_mode); + // we'll reconstruct later, if i16 mode actually gets selected } - if (try_both_i4_i16 || !is_i16) { - uint8_t modes_i4[16]; + + // Next, evaluate Intra4 + if (try_both_modes || !is_i16) { // We don't evaluate the rate here, but just account for it through a // constant penalty (i4 mode usually needs more bits compared to i16). - score_t score_i4 = (score_t)I4_PENALTY; - + is_i16 = 0; VP8IteratorStartI4(it); do { - int mode; - int best_sub_mode = -1; - score_t best_sub_score = MAX_COST; - const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; + int best_i4_mode = -1; + score_t best_i4_score = MAX_COST; + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; - // TODO(skal): we don't really need the prediction pixels here, - // but just the distortion against 'src'. VP8MakeIntra4Preds(it); for (mode = 0; mode < NUM_BMODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; const score_t score = VP8SSE4x4(src, ref); - if (score < best_sub_score) { - best_sub_mode = mode; - best_sub_score = score; + if (score < best_i4_score) { + best_i4_mode = mode; + best_i4_score = score; } } - modes_i4[it->i4_] = best_sub_mode; - score_i4 += best_sub_score; - if (score_i4 >= best_score) break; - } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF)); - if (score_i4 < best_score) { - VP8SetIntra4Mode(it, modes_i4); + modes_i4[it->i4_] = best_i4_mode; + score_i4 += best_i4_score; + if (score_i4 >= best_score) { + // Intra4 won't be better than Intra16. Bail out and pick Intra16. + is_i16 = 1; + break; + } else { // reconstruct partial block inside yuv_out2_ buffer + uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_]; + nz |= ReconstructIntra4(it, tmp_levels[it->i4_], + src, tmp_dst, best_i4_mode) << it->i4_; + } + } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC)); + } + + // Final reconstruction, depending on which mode is selected. + if (!is_i16) { + VP8SetIntra4Mode(it, modes_i4); + memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels)); + SwapOut(it); + best_score = score_i4; + } else { + nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); + } + + // ... and UV! + if (refine_uv_mode) { + int best_mode = -1; + score_t best_uv_score = MAX_COST; + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; + for (mode = 0; mode < NUM_PRED_MODES; ++mode) { + const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; + const score_t score = VP8SSE16x8(src, ref); + if (score < best_uv_score) { + best_mode = mode; + best_uv_score = score; + } } + VP8SetIntraUVMode(it, best_mode); } + nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); + + rd->nz = nz; + rd->score = best_score; } //------------------------------------------------------------------------------ @@ -1158,13 +1241,13 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, SimpleQuantize(it, rd); } } else { - // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower). - // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode). - DistoRefine(it, (method >= 2)); - SimpleQuantize(it, rd); + // At this point we have heuristically decided intra16 / intra4. + // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower). + // For method <= 1, we don't re-examine the decision but just go ahead with + // quantization/reconstruction. + RefineUsingDistortion(it, (method >= 2), (method >= 1), rd); } is_skipped = (rd->nz == 0); VP8SetSkip(it, is_skipped); return is_skipped; } - diff --git a/src/3rdparty/libwebp/src/enc/syntax.c b/src/3rdparty/libwebp/src/enc/syntax.c index d1ff0a5..a0e79ef 100644 --- a/src/3rdparty/libwebp/src/enc/syntax.c +++ b/src/3rdparty/libwebp/src/enc/syntax.c @@ -186,8 +186,8 @@ static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0, // Segmentation header static void PutSegmentHeader(VP8BitWriter* const bw, const VP8Encoder* const enc) { - const VP8SegmentHeader* const hdr = &enc->segment_hdr_; - const VP8Proba* const proba = &enc->proba_; + const VP8EncSegmentHeader* const hdr = &enc->segment_hdr_; + const VP8EncProba* const proba = &enc->proba_; if (VP8PutBitUniform(bw, (hdr->num_segments_ > 1))) { // We always 'update' the quant and filter strength values const int update_data = 1; @@ -197,16 +197,16 @@ static void PutSegmentHeader(VP8BitWriter* const bw, // we always use absolute values, not relative ones VP8PutBitUniform(bw, 1); // (segment_feature_mode = 1. Paragraph 9.3.) for (s = 0; s < NUM_MB_SEGMENTS; ++s) { - VP8PutSignedValue(bw, enc->dqm_[s].quant_, 7); + VP8PutSignedBits(bw, enc->dqm_[s].quant_, 7); } for (s = 0; s < NUM_MB_SEGMENTS; ++s) { - VP8PutSignedValue(bw, enc->dqm_[s].fstrength_, 6); + VP8PutSignedBits(bw, enc->dqm_[s].fstrength_, 6); } } if (hdr->update_map_) { for (s = 0; s < 3; ++s) { if (VP8PutBitUniform(bw, (proba->segments_[s] != 255u))) { - VP8PutValue(bw, proba->segments_[s], 8); + VP8PutBits(bw, proba->segments_[s], 8); } } } @@ -215,20 +215,20 @@ static void PutSegmentHeader(VP8BitWriter* const bw, // Filtering parameters header static void PutFilterHeader(VP8BitWriter* const bw, - const VP8FilterHeader* const hdr) { + const VP8EncFilterHeader* const hdr) { const int use_lf_delta = (hdr->i4x4_lf_delta_ != 0); VP8PutBitUniform(bw, hdr->simple_); - VP8PutValue(bw, hdr->level_, 6); - VP8PutValue(bw, hdr->sharpness_, 3); + VP8PutBits(bw, hdr->level_, 6); + VP8PutBits(bw, hdr->sharpness_, 3); if (VP8PutBitUniform(bw, use_lf_delta)) { // '0' is the default value for i4x4_lf_delta_ at frame #0. const int need_update = (hdr->i4x4_lf_delta_ != 0); if (VP8PutBitUniform(bw, need_update)) { // we don't use ref_lf_delta => emit four 0 bits - VP8PutValue(bw, 0, 4); + VP8PutBits(bw, 0, 4); // we use mode_lf_delta for i4x4 - VP8PutSignedValue(bw, hdr->i4x4_lf_delta_, 6); - VP8PutValue(bw, 0, 3); // all others unused + VP8PutSignedBits(bw, hdr->i4x4_lf_delta_, 6); + VP8PutBits(bw, 0, 3); // all others unused } } } @@ -236,12 +236,12 @@ static void PutFilterHeader(VP8BitWriter* const bw, // Nominal quantization parameters static void PutQuant(VP8BitWriter* const bw, const VP8Encoder* const enc) { - VP8PutValue(bw, enc->base_quant_, 7); - VP8PutSignedValue(bw, enc->dq_y1_dc_, 4); - VP8PutSignedValue(bw, enc->dq_y2_dc_, 4); - VP8PutSignedValue(bw, enc->dq_y2_ac_, 4); - VP8PutSignedValue(bw, enc->dq_uv_dc_, 4); - VP8PutSignedValue(bw, enc->dq_uv_ac_, 4); + VP8PutBits(bw, enc->base_quant_, 7); + VP8PutSignedBits(bw, enc->dq_y1_dc_, 4); + VP8PutSignedBits(bw, enc->dq_y2_dc_, 4); + VP8PutSignedBits(bw, enc->dq_y2_ac_, 4); + VP8PutSignedBits(bw, enc->dq_uv_dc_, 4); + VP8PutSignedBits(bw, enc->dq_uv_ac_, 4); } // Partition sizes @@ -277,9 +277,9 @@ static int GeneratePartition0(VP8Encoder* const enc) { PutSegmentHeader(bw, enc); PutFilterHeader(bw, &enc->filter_hdr_); - VP8PutValue(bw, enc->num_parts_ == 8 ? 3 : - enc->num_parts_ == 4 ? 2 : - enc->num_parts_ == 2 ? 1 : 0, 2); + VP8PutBits(bw, enc->num_parts_ == 8 ? 3 : + enc->num_parts_ == 4 ? 2 : + enc->num_parts_ == 2 ? 1 : 0, 2); PutQuant(bw, enc); VP8PutBitUniform(bw, 0); // no proba update VP8WriteProbas(bw, &enc->proba_); diff --git a/src/3rdparty/libwebp/src/enc/token.c b/src/3rdparty/libwebp/src/enc/token.c index 8af13a0..e73256b 100644 --- a/src/3rdparty/libwebp/src/enc/token.c +++ b/src/3rdparty/libwebp/src/enc/token.c @@ -30,15 +30,15 @@ #define MIN_PAGE_SIZE 8192 // minimum number of token per page #define FIXED_PROBA_BIT (1u << 14) -typedef uint16_t token_t; // bit#15: bit - // bit #14: constant proba or idx - // bits 0..13: slot or constant proba +typedef uint16_t token_t; // bit #15: bit value + // bit #14: flags for constant proba or idx + // bits #0..13: slot or constant proba struct VP8Tokens { VP8Tokens* next_; // pointer to next page }; // Token data is located in memory just after the next_ field. // This macro is used to return their address and hide the trick. -#define TOKEN_DATA(p) ((token_t*)&(p)[1]) +#define TOKEN_DATA(p) ((const token_t*)&(p)[1]) //------------------------------------------------------------------------------ @@ -53,10 +53,10 @@ void VP8TBufferInit(VP8TBuffer* const b, int page_size) { void VP8TBufferClear(VP8TBuffer* const b) { if (b != NULL) { - const VP8Tokens* p = b->pages_; + VP8Tokens* p = b->pages_; while (p != NULL) { - const VP8Tokens* const next = p->next_; - WebPSafeFree((void*)p); + VP8Tokens* const next = p->next_; + WebPSafeFree(p); p = next; } VP8TBufferInit(b, b->page_size_); @@ -65,8 +65,8 @@ void VP8TBufferClear(VP8TBuffer* const b) { static int TBufferNewPage(VP8TBuffer* const b) { VP8Tokens* page = NULL; - const size_t size = sizeof(*page) + b->page_size_ * sizeof(token_t); if (!b->error_) { + const size_t size = sizeof(*page) + b->page_size_ * sizeof(token_t); page = (VP8Tokens*)WebPSafeMalloc(1ULL, size); } if (page == NULL) { @@ -78,19 +78,19 @@ static int TBufferNewPage(VP8TBuffer* const b) { *b->last_page_ = page; b->last_page_ = &page->next_; b->left_ = b->page_size_; - b->tokens_ = TOKEN_DATA(page); + b->tokens_ = (token_t*)TOKEN_DATA(page); return 1; } //------------------------------------------------------------------------------ -#define TOKEN_ID(t, b, ctx, p) \ - ((p) + NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t)))) +#define TOKEN_ID(t, b, ctx) \ + (NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t)))) -static WEBP_INLINE int AddToken(VP8TBuffer* const b, - int bit, uint32_t proba_idx) { +static WEBP_INLINE uint32_t AddToken(VP8TBuffer* const b, + uint32_t bit, uint32_t proba_idx) { assert(proba_idx < FIXED_PROBA_BIT); - assert(bit == 0 || bit == 1); + assert(bit <= 1); if (b->left_ > 0 || TBufferNewPage(b)) { const int slot = --b->left_; b->tokens_[slot] = (bit << 15) | proba_idx; @@ -99,20 +99,21 @@ static WEBP_INLINE int AddToken(VP8TBuffer* const b, } static WEBP_INLINE void AddConstantToken(VP8TBuffer* const b, - int bit, int proba) { + uint32_t bit, uint32_t proba) { assert(proba < 256); - assert(bit == 0 || bit == 1); + assert(bit <= 1); if (b->left_ > 0 || TBufferNewPage(b)) { const int slot = --b->left_; b->tokens_[slot] = (bit << 15) | FIXED_PROBA_BIT | proba; } } -int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last, +int VP8RecordCoeffTokens(const int ctx, const int coeff_type, + int first, int last, const int16_t* const coeffs, VP8TBuffer* const tokens) { int n = first; - uint32_t base_id = TOKEN_ID(coeff_type, n, ctx, 0); + uint32_t base_id = TOKEN_ID(coeff_type, n, ctx); if (!AddToken(tokens, last >= 0, base_id + 0)) { return 0; } @@ -120,14 +121,13 @@ int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last, while (n < 16) { const int c = coeffs[n++]; const int sign = c < 0; - int v = sign ? -c : c; + const uint32_t v = sign ? -c : c; if (!AddToken(tokens, v != 0, base_id + 1)) { - ctx = 0; - base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0); + base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 0); // ctx=0 continue; } if (!AddToken(tokens, v > 1, base_id + 2)) { - ctx = 1; + base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 1); // ctx=1 } else { if (!AddToken(tokens, v > 4, base_id + 3)) { if (AddToken(tokens, v != 2, base_id + 4)) @@ -142,40 +142,40 @@ int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last, } else { int mask; const uint8_t* tab; - if (v < 3 + (8 << 1)) { // VP8Cat3 (3b) + uint32_t residue = v - 3; + if (residue < (8 << 1)) { // VP8Cat3 (3b) AddToken(tokens, 0, base_id + 8); AddToken(tokens, 0, base_id + 9); - v -= 3 + (8 << 0); + residue -= (8 << 0); mask = 1 << 2; tab = VP8Cat3; - } else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b) + } else if (residue < (8 << 2)) { // VP8Cat4 (4b) AddToken(tokens, 0, base_id + 8); AddToken(tokens, 1, base_id + 9); - v -= 3 + (8 << 1); + residue -= (8 << 1); mask = 1 << 3; tab = VP8Cat4; - } else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b) + } else if (residue < (8 << 3)) { // VP8Cat5 (5b) AddToken(tokens, 1, base_id + 8); AddToken(tokens, 0, base_id + 10); - v -= 3 + (8 << 2); + residue -= (8 << 2); mask = 1 << 4; tab = VP8Cat5; } else { // VP8Cat6 (11b) AddToken(tokens, 1, base_id + 8); AddToken(tokens, 1, base_id + 10); - v -= 3 + (8 << 3); + residue -= (8 << 3); mask = 1 << 10; tab = VP8Cat6; } while (mask) { - AddConstantToken(tokens, !!(v & mask), *tab++); + AddConstantToken(tokens, !!(residue & mask), *tab++); mask >>= 1; } } - ctx = 2; + base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 2); // ctx=2 } AddConstantToken(tokens, sign, 128); - base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0); if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) { return 1; // EOB } @@ -224,7 +224,6 @@ void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) { int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, const uint8_t* const probas, int final_pass) { const VP8Tokens* p = b->pages_; - (void)final_pass; assert(!b->error_); while (p != NULL) { const VP8Tokens* const next = p->next_; diff --git a/src/3rdparty/libwebp/src/enc/tree.c b/src/3rdparty/libwebp/src/enc/tree.c index e5d05e5..f141006 100644 --- a/src/3rdparty/libwebp/src/enc/tree.c +++ b/src/3rdparty/libwebp/src/enc/tree.c @@ -154,7 +154,7 @@ const uint8_t }; void VP8DefaultProbas(VP8Encoder* const enc) { - VP8Proba* const probas = &enc->proba_; + VP8EncProba* const probas = &enc->proba_; probas->use_skip_proba_ = 0; memset(probas->segments_, 255u, sizeof(probas->segments_)); memcpy(probas->coeffs_, VP8CoeffsProba0, sizeof(VP8CoeffsProba0)); @@ -482,7 +482,7 @@ const uint8_t } }; -void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas) { +void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas) { int t, b, c, p; for (t = 0; t < NUM_TYPES; ++t) { for (b = 0; b < NUM_BANDS; ++b) { @@ -491,14 +491,14 @@ void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas) { const uint8_t p0 = probas->coeffs_[t][b][c][p]; const int update = (p0 != VP8CoeffsProba0[t][b][c][p]); if (VP8PutBit(bw, update, VP8CoeffsUpdateProba[t][b][c][p])) { - VP8PutValue(bw, p0, 8); + VP8PutBits(bw, p0, 8); } } } } } if (VP8PutBitUniform(bw, probas->use_skip_proba_)) { - VP8PutValue(bw, probas->skip_proba_, 8); + VP8PutBits(bw, probas->skip_proba_, 8); } } diff --git a/src/3rdparty/libwebp/src/enc/vp8enci.h b/src/3rdparty/libwebp/src/enc/vp8enci.h index 20f58c6..b2cc8d1 100644 --- a/src/3rdparty/libwebp/src/enc/vp8enci.h +++ b/src/3rdparty/libwebp/src/enc/vp8enci.h @@ -15,10 +15,16 @@ #define WEBP_ENC_VP8ENCI_H_ #include <string.h> // for memcpy() -#include "../webp/encode.h" +#include "../dec/common.h" #include "../dsp/dsp.h" #include "../utils/bit_writer.h" #include "../utils/thread.h" +#include "../utils/utils.h" +#include "../webp/encode.h" + +#ifdef WEBP_EXPERIMENTAL_FEATURES +#include "./vp8li.h" +#endif // WEBP_EXPERIMENTAL_FEATURES #ifdef __cplusplus extern "C" { @@ -29,35 +35,10 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 0 -#define ENC_MIN_VERSION 4 -#define ENC_REV_VERSION 4 - -// intra prediction modes -enum { B_DC_PRED = 0, // 4x4 modes - B_TM_PRED = 1, - B_VE_PRED = 2, - B_HE_PRED = 3, - B_RD_PRED = 4, - B_VR_PRED = 5, - B_LD_PRED = 6, - B_VL_PRED = 7, - B_HD_PRED = 8, - B_HU_PRED = 9, - NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10 - - // Luma16 or UV modes - DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED, - H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED, - NUM_PRED_MODES = 4 - }; +#define ENC_MIN_VERSION 5 +#define ENC_REV_VERSION 0 -enum { NUM_MB_SEGMENTS = 4, - MAX_NUM_PARTITIONS = 8, - NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC - NUM_BANDS = 8, - NUM_CTX = 3, - NUM_PROBAS = 11, - MAX_LF_LEVELS = 64, // Maximum loop filter level +enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost MAX_LEVEL = 2047 // max level (note: max codable is 2047 + 67) }; @@ -69,66 +50,34 @@ typedef enum { // Rate-distortion optimization levels RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower) } VP8RDLevel; -// YUV-cache parameters. Cache is 16-pixels wide. -// The original or reconstructed samples can be accessed using VP8Scan[] +// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). +// The original or reconstructed samples can be accessed using VP8Scan[]. // The predicted blocks can be accessed using offsets to yuv_p_ and -// the arrays VP8*ModeOffsets[]; -// +----+ YUV Samples area. See VP8Scan[] for accessing the blocks. -// Y_OFF |YYYY| <- original samples ('yuv_in_') -// |YYYY| -// |YYYY| -// |YYYY| -// U_OFF |UUVV| V_OFF (=U_OFF + 8) -// |UUVV| -// +----+ -// Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_') -// |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_') -// |YYYY| -// |YYYY| -// U_OFF |UUVV| V_OFF -// |UUVV| -// x2 (for yuv_out2_) -// +----+ Prediction area ('yuv_p_', size = PRED_SIZE) -// I16DC16 |YYYY| Intra16 predictions (16x16 block each) -// |YYYY| -// |YYYY| -// |YYYY| -// I16TM16 |YYYY| -// |YYYY| -// |YYYY| -// |YYYY| -// I16VE16 |YYYY| -// |YYYY| -// |YYYY| -// |YYYY| -// I16HE16 |YYYY| -// |YYYY| -// |YYYY| -// |YYYY| -// +----+ Chroma U/V predictions (16x8 block each) -// C8DC8 |UUVV| -// |UUVV| -// C8TM8 |UUVV| -// |UUVV| -// C8VE8 |UUVV| -// |UUVV| -// C8HE8 |UUVV| -// |UUVV| -// +----+ Intra 4x4 predictions (4x4 block each) -// |YYYY| I4DC4 I4TM4 I4VE4 I4HE4 -// |YYYY| I4RD4 I4VR4 I4LD4 I4VL4 -// |YY..| I4HD4 I4HU4 I4TMP -// +----+ -#define BPS 16 // this is the common stride -#define Y_SIZE (BPS * 16) -#define UV_SIZE (BPS * 8) -#define YUV_SIZE (Y_SIZE + UV_SIZE) -#define PRED_SIZE (6 * 16 * BPS + 12 * BPS) -#define Y_OFF (0) -#define U_OFF (Y_SIZE) -#define V_OFF (U_OFF + 8) -#define ALIGN_CST 15 -#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST) +// the arrays VP8*ModeOffsets[]. +// * YUV Samples area (yuv_in_/yuv_out_/yuv_out2_) +// (see VP8Scan[] for accessing the blocks, along with +// Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC): +// +----+----+ +// Y_OFF_ENC |YYYY|UUVV| +// U_OFF_ENC |YYYY|UUVV| +// V_OFF_ENC |YYYY|....| <- 25% wasted U/V area +// |YYYY|....| +// +----+----+ +// * Prediction area ('yuv_p_', size = PRED_SIZE_ENC) +// Intra16 predictions (16x16 block each, two per row): +// |I16DC16|I16TM16| +// |I16VE16|I16HE16| +// Chroma U/V predictions (16x8 block each, two per row): +// |C8DC8|C8TM8| +// |C8VE8|C8HE8| +// Intra 4x4 predictions (4x4 block each) +// |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4| +// |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted +#define YUV_SIZE_ENC (BPS * 16) +#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds +#define Y_OFF_ENC (0) +#define U_OFF_ENC (16) +#define V_OFF_ENC (16 + 8) extern const int VP8Scan[16]; // in quant.c extern const int VP8UVModeOffsets[4]; // in analyze.c @@ -138,26 +87,26 @@ extern const int VP8I4ModeOffsets[NUM_BMODES]; // Layout of prediction blocks // intra 16x16 #define I16DC16 (0 * 16 * BPS) -#define I16TM16 (1 * 16 * BPS) -#define I16VE16 (2 * 16 * BPS) -#define I16HE16 (3 * 16 * BPS) +#define I16TM16 (I16DC16 + 16) +#define I16VE16 (1 * 16 * BPS) +#define I16HE16 (I16VE16 + 16) // chroma 8x8, two U/V blocks side by side (hence: 16x8 each) -#define C8DC8 (4 * 16 * BPS) -#define C8TM8 (4 * 16 * BPS + 8 * BPS) -#define C8VE8 (5 * 16 * BPS) -#define C8HE8 (5 * 16 * BPS + 8 * BPS) +#define C8DC8 (2 * 16 * BPS) +#define C8TM8 (C8DC8 + 1 * 16) +#define C8VE8 (2 * 16 * BPS + 8 * BPS) +#define C8HE8 (C8VE8 + 1 * 16) // intra 4x4 -#define I4DC4 (6 * 16 * BPS + 0) -#define I4TM4 (6 * 16 * BPS + 4) -#define I4VE4 (6 * 16 * BPS + 8) -#define I4HE4 (6 * 16 * BPS + 12) -#define I4RD4 (6 * 16 * BPS + 4 * BPS + 0) -#define I4VR4 (6 * 16 * BPS + 4 * BPS + 4) -#define I4LD4 (6 * 16 * BPS + 4 * BPS + 8) -#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12) -#define I4HD4 (6 * 16 * BPS + 8 * BPS + 0) -#define I4HU4 (6 * 16 * BPS + 8 * BPS + 4) -#define I4TMP (6 * 16 * BPS + 8 * BPS + 8) +#define I4DC4 (3 * 16 * BPS + 0) +#define I4TM4 (I4DC4 + 4) +#define I4VE4 (I4DC4 + 8) +#define I4HE4 (I4DC4 + 12) +#define I4RD4 (I4DC4 + 16) +#define I4VR4 (I4DC4 + 20) +#define I4LD4 (I4DC4 + 24) +#define I4VL4 (I4DC4 + 28) +#define I4HD4 (3 * 16 * BPS + 4 * BPS) +#define I4HU4 (I4HD4 + 4) +#define I4TMP (I4HD4 + 8) typedef int64_t score_t; // type used for scores, rate, distortion // Note that MAX_COST is not the maximum allowed by sizeof(score_t), @@ -172,14 +121,6 @@ static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) { return (int)((n * iQ + B) >> QFIX); } -// size of histogram used by CollectHistogram. -#define MAX_COEFF_THRESH 31 -typedef struct VP8Histogram VP8Histogram; -struct VP8Histogram { - // TODO(skal): we only need to store the max_value and last_non_zero actually. - int distribution[MAX_COEFF_THRESH + 1]; -}; - // Uncomment the following to remove token-buffer code: // #define DISABLE_TOKEN_BUFFER @@ -190,6 +131,8 @@ typedef uint32_t proba_t; // 16b + 16b typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS]; typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS]; typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1]; +typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting +typedef const uint16_t* CostArrayMap[16][NUM_CTX]; typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats typedef struct VP8Encoder VP8Encoder; @@ -200,7 +143,7 @@ typedef struct { int update_map_; // whether to update the segment map or not. // must be 0 if there's only 1 segment. int size_; // bit-cost for transmitting the segment map -} VP8SegmentHeader; +} VP8EncSegmentHeader; // Struct collecting all frame-persistent probabilities. typedef struct { @@ -209,10 +152,11 @@ typedef struct { ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes + CostArrayMap remapped_costs_[NUM_TYPES]; // 1536 bytes int dirty_; // if true, need to call VP8CalculateLevelCosts() int use_skip_proba_; // Note: we always use skip_proba for now. int nb_skip_; // number of skipped blocks -} VP8Proba; +} VP8EncProba; // Filter parameters. Not actually used in the code (we don't perform // the in-loop filtering), but filled from user's config @@ -221,7 +165,7 @@ typedef struct { int level_; // base filter level [0..63] int sharpness_; // [0..7] int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16 -} VP8FilterHeader; +} VP8EncFilterHeader; //------------------------------------------------------------------------------ // Informations about the macroblocks. @@ -307,9 +251,10 @@ typedef struct { uint8_t* y_top_; // top luma samples at position 'x_' uint8_t* uv_top_; // top u/v samples at position 'x_', packed as 16 bytes - // memory for storing y/u/v_left_ and yuv_in_/out_* - uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + ALIGN_CST]; // memory for *_left_ - uint8_t yuv_mem_[3 * YUV_SIZE + PRED_SIZE + ALIGN_CST]; // memory for yuv_* + // memory for storing y/u/v_left_ + uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + WEBP_ALIGN_CST]; + // memory for yuv_* + uint8_t yuv_mem_[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST]; } VP8EncIterator; // in iterator.c @@ -381,7 +326,8 @@ int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, const uint8_t* const probas, int final_pass); // record the coding of coefficients without knowing the probabilities yet -int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last, +int VP8RecordCoeffTokens(const int ctx, const int coeff_type, + int first, int last, const int16_t* const coeffs, VP8TBuffer* const tokens); @@ -401,8 +347,8 @@ struct VP8Encoder { WebPPicture* pic_; // input / output picture // headers - VP8FilterHeader filter_hdr_; // filtering information - VP8SegmentHeader segment_hdr_; // segment information + VP8EncFilterHeader filter_hdr_; // filtering information + VP8EncSegmentHeader segment_hdr_; // segment information int profile_; // VP8's profile, deduced from Config. @@ -438,12 +384,12 @@ struct VP8Encoder { int dq_uv_dc_, dq_uv_ac_; // probabilities and statistics - VP8Proba proba_; - uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks - uint64_t sse_count_; // pixel count for the sse_[] stats - int coded_size_; - int residual_bytes_[3][4]; - int block_count_[3]; + VP8EncProba proba_; + uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks + uint64_t sse_count_; // pixel count for the sse_[] stats + int coded_size_; + int residual_bytes_[3][4]; + int block_count_[3]; // quality/speed settings int method_; // 0=fastest, 6=best/slowest. @@ -473,7 +419,7 @@ extern const uint8_t // Reset the token probabilities to their initial (default) values void VP8DefaultProbas(VP8Encoder* const enc); // Write the token probabilities -void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas); +void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas); // Writes the partition #0 modes (that is: all intra modes) void VP8CodeIntraModes(VP8Encoder* const enc); @@ -486,7 +432,6 @@ int VP8EncWrite(VP8Encoder* const enc); void VP8EncFreeBitWriters(VP8Encoder* const enc); // in frame.c -extern const uint8_t VP8EncBands[16 + 1]; extern const uint8_t VP8Cat3[]; extern const uint8_t VP8Cat4[]; extern const uint8_t VP8Cat5[]; @@ -569,12 +514,21 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height); // Returns false in case of error (invalid param, out-of-memory). int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); +// Clean-up the RGB samples under fully transparent area, to help lossless +// compressibility (no guarantee, though). Assumes that pic->use_argb is true. +void WebPCleanupTransparentAreaLossless(WebPPicture* const pic); + + // in near_lossless.c +// Near lossless preprocessing in RGB color-space. +int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality); +// Near lossless adjustment for predictors. +void VP8ApplyNearLosslessPredict(int xsize, int ysize, int pred_bits, + const uint32_t* argb_orig, + uint32_t* argb, uint32_t* argb_scratch, + const uint32_t* const transform_data, + int quality, int subtract_green); //------------------------------------------------------------------------------ -#if WEBP_ENCODER_ABI_VERSION <= 0x0203 -void WebPMemoryWriterClear(WebPMemoryWriter* writer); -#endif - #ifdef __cplusplus } // extern "C" #endif diff --git a/src/3rdparty/libwebp/src/enc/vp8l.c b/src/3rdparty/libwebp/src/enc/vp8l.c index c2bb13d..db94e78 100644 --- a/src/3rdparty/libwebp/src/enc/vp8l.c +++ b/src/3rdparty/libwebp/src/enc/vp8l.c @@ -13,10 +13,10 @@ // #include <assert.h> -#include <stdio.h> #include <stdlib.h> #include "./backward_references.h" +#include "./histogram.h" #include "./vp8enci.h" #include "./vp8li.h" #include "../dsp/lossless.h" @@ -25,23 +25,105 @@ #include "../utils/utils.h" #include "../webp/format_constants.h" +#include "./delta_palettization.h" + #define PALETTE_KEY_RIGHT_SHIFT 22 // Key for 1K buffer. -#define MAX_HUFF_IMAGE_SIZE (16 * 1024 * 1024) -#define MAX_COLORS_FOR_GRAPH 64 +// Maximum number of histogram images (sub-blocks). +#define MAX_HUFF_IMAGE_SIZE 2600 -// ----------------------------------------------------------------------------- -// Palette +// Palette reordering for smaller sum of deltas (and for smaller storage). -static int CompareColors(const void* p1, const void* p2) { - const uint32_t a = *(const uint32_t*)p1; - const uint32_t b = *(const uint32_t*)p2; +static int PaletteCompareColorsForQsort(const void* p1, const void* p2) { + const uint32_t a = WebPMemToUint32(p1); + const uint32_t b = WebPMemToUint32(p2); assert(a != b); return (a < b) ? -1 : 1; } +static WEBP_INLINE uint32_t PaletteComponentDistance(uint32_t v) { + return (v <= 128) ? v : (256 - v); +} + +// Computes a value that is related to the entropy created by the +// palette entry diff. +// +// Note that the last & 0xff is a no-operation in the next statement, but +// removed by most compilers and is here only for regularity of the code. +static WEBP_INLINE uint32_t PaletteColorDistance(uint32_t col1, uint32_t col2) { + const uint32_t diff = VP8LSubPixels(col1, col2); + const int kMoreWeightForRGBThanForAlpha = 9; + uint32_t score; + score = PaletteComponentDistance((diff >> 0) & 0xff); + score += PaletteComponentDistance((diff >> 8) & 0xff); + score += PaletteComponentDistance((diff >> 16) & 0xff); + score *= kMoreWeightForRGBThanForAlpha; + score += PaletteComponentDistance((diff >> 24) & 0xff); + return score; +} + +static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) { + const uint32_t tmp = *col1; + *col1 = *col2; + *col2 = tmp; +} + +static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) { + // Find greedily always the closest color of the predicted color to minimize + // deltas in the palette. This reduces storage needs since the + // palette is stored with delta encoding. + uint32_t predict = 0x00000000; + int i, k; + for (i = 0; i < num_colors; ++i) { + int best_ix = i; + uint32_t best_score = ~0U; + for (k = i; k < num_colors; ++k) { + const uint32_t cur_score = PaletteColorDistance(palette[k], predict); + if (best_score > cur_score) { + best_score = cur_score; + best_ix = k; + } + } + SwapColor(&palette[best_ix], &palette[i]); + predict = palette[i]; + } +} + +// The palette has been sorted by alpha. This function checks if the other +// components of the palette have a monotonic development with regards to +// position in the palette. If all have monotonic development, there is +// no benefit to re-organize them greedily. A monotonic development +// would be spotted in green-only situations (like lossy alpha) or gray-scale +// images. +static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) { + uint32_t predict = 0x000000; + int i; + uint8_t sign_found = 0x00; + for (i = 0; i < num_colors; ++i) { + const uint32_t diff = VP8LSubPixels(palette[i], predict); + const uint8_t rd = (diff >> 16) & 0xff; + const uint8_t gd = (diff >> 8) & 0xff; + const uint8_t bd = (diff >> 0) & 0xff; + if (rd != 0x00) { + sign_found |= (rd < 0x80) ? 1 : 2; + } + if (gd != 0x00) { + sign_found |= (gd < 0x80) ? 8 : 16; + } + if (bd != 0x00) { + sign_found |= (bd < 0x80) ? 64 : 128; + } + predict = palette[i]; + } + return (sign_found & (sign_found << 1)) != 0; // two consequent signs. +} + +// ----------------------------------------------------------------------------- +// Palette + // If number of colors in the image is less than or equal to MAX_PALETTE_SIZE, // creates a palette and returns true, else returns false. static int AnalyzeAndCreatePalette(const WebPPicture* const pic, + int low_effort, uint32_t palette[MAX_PALETTE_SIZE], int* const palette_size) { int i, x, y, key; @@ -92,84 +174,240 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic, ++num_colors; } } - - qsort(palette, num_colors, sizeof(*palette), CompareColors); *palette_size = num_colors; + qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort); + if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) { + GreedyMinimizeDeltas(palette, num_colors); + } return 1; } -static int AnalyzeEntropy(const uint32_t* argb, - int width, int height, int argb_stride, - double* const nonpredicted_bits, - double* const predicted_bits) { - int x, y; - const uint32_t* last_line = NULL; - uint32_t last_pix = argb[0]; // so we're sure that pix_diff == 0 +// These five modes are evaluated and their respective entropy is computed. +typedef enum { + kDirect = 0, + kSpatial = 1, + kSubGreen = 2, + kSpatialSubGreen = 3, + kPalette = 4, + kNumEntropyIx = 5 +} EntropyIx; + +typedef enum { + kHistoAlpha = 0, + kHistoAlphaPred, + kHistoGreen, + kHistoGreenPred, + kHistoRed, + kHistoRedPred, + kHistoBlue, + kHistoBluePred, + kHistoRedSubGreen, + kHistoRedPredSubGreen, + kHistoBlueSubGreen, + kHistoBluePredSubGreen, + kHistoPalette, + kHistoTotal // Must be last. +} HistoIx; + +static void AddSingleSubGreen(uint32_t p, uint32_t* r, uint32_t* b) { + const uint32_t green = p >> 8; // The upper bits are masked away later. + ++r[((p >> 16) - green) & 0xff]; + ++b[(p - green) & 0xff]; +} - VP8LHistogramSet* const histo_set = VP8LAllocateHistogramSet(2, 0); - if (histo_set == NULL) return 0; +static void AddSingle(uint32_t p, + uint32_t* a, uint32_t* r, uint32_t* g, uint32_t* b) { + ++a[p >> 24]; + ++r[(p >> 16) & 0xff]; + ++g[(p >> 8) & 0xff]; + ++b[(p & 0xff)]; +} - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const uint32_t pix = argb[x]; - const uint32_t pix_diff = VP8LSubPixels(pix, last_pix); - if (pix_diff == 0) continue; - if (last_line != NULL && pix == last_line[x]) { - continue; +static int AnalyzeEntropy(const uint32_t* argb, + int width, int height, int argb_stride, + int use_palette, + EntropyIx* const min_entropy_ix, + int* const red_and_blue_always_zero) { + // Allocate histogram set with cache_bits = 0. + uint32_t* const histo = + (uint32_t*)WebPSafeCalloc(kHistoTotal, sizeof(*histo) * 256); + if (histo != NULL) { + int i, x, y; + const uint32_t* prev_row = argb; + const uint32_t* curr_row = argb + argb_stride; + for (y = 1; y < height; ++y) { + uint32_t prev_pix = curr_row[0]; + for (x = 1; x < width; ++x) { + const uint32_t pix = curr_row[x]; + const uint32_t pix_diff = VP8LSubPixels(pix, prev_pix); + if ((pix_diff == 0) || (pix == prev_row[x])) continue; + prev_pix = pix; + AddSingle(pix, + &histo[kHistoAlpha * 256], + &histo[kHistoRed * 256], + &histo[kHistoGreen * 256], + &histo[kHistoBlue * 256]); + AddSingle(pix_diff, + &histo[kHistoAlphaPred * 256], + &histo[kHistoRedPred * 256], + &histo[kHistoGreenPred * 256], + &histo[kHistoBluePred * 256]); + AddSingleSubGreen(pix, + &histo[kHistoRedSubGreen * 256], + &histo[kHistoBlueSubGreen * 256]); + AddSingleSubGreen(pix_diff, + &histo[kHistoRedPredSubGreen * 256], + &histo[kHistoBluePredSubGreen * 256]); + { + // Approximate the palette by the entropy of the multiplicative hash. + const int hash = ((pix + (pix >> 19)) * 0x39c5fba7) >> 24; + ++histo[kHistoPalette * 256 + (hash & 0xff)]; + } + } + prev_row = curr_row; + curr_row += argb_stride; + } + { + double entropy_comp[kHistoTotal]; + double entropy[kNumEntropyIx]; + EntropyIx k; + EntropyIx last_mode_to_analyze = + use_palette ? kPalette : kSpatialSubGreen; + int j; + // Let's add one zero to the predicted histograms. The zeros are removed + // too efficiently by the pix_diff == 0 comparison, at least one of the + // zeros is likely to exist. + ++histo[kHistoRedPredSubGreen * 256]; + ++histo[kHistoBluePredSubGreen * 256]; + ++histo[kHistoRedPred * 256]; + ++histo[kHistoGreenPred * 256]; + ++histo[kHistoBluePred * 256]; + ++histo[kHistoAlphaPred * 256]; + + for (j = 0; j < kHistoTotal; ++j) { + entropy_comp[j] = VP8LBitsEntropy(&histo[j * 256], 256, NULL); } - last_pix = pix; + entropy[kDirect] = entropy_comp[kHistoAlpha] + + entropy_comp[kHistoRed] + + entropy_comp[kHistoGreen] + + entropy_comp[kHistoBlue]; + entropy[kSpatial] = entropy_comp[kHistoAlphaPred] + + entropy_comp[kHistoRedPred] + + entropy_comp[kHistoGreenPred] + + entropy_comp[kHistoBluePred]; + entropy[kSubGreen] = entropy_comp[kHistoAlpha] + + entropy_comp[kHistoRedSubGreen] + + entropy_comp[kHistoGreen] + + entropy_comp[kHistoBlueSubGreen]; + entropy[kSpatialSubGreen] = entropy_comp[kHistoAlphaPred] + + entropy_comp[kHistoRedPredSubGreen] + + entropy_comp[kHistoGreenPred] + + entropy_comp[kHistoBluePredSubGreen]; + // Palette mode seems more efficient in a breakeven case. Bias with 1.0. + entropy[kPalette] = entropy_comp[kHistoPalette] - 1.0; + + *min_entropy_ix = kDirect; + for (k = kDirect + 1; k <= last_mode_to_analyze; ++k) { + if (entropy[*min_entropy_ix] > entropy[k]) { + *min_entropy_ix = k; + } + } + *red_and_blue_always_zero = 1; + // Let's check if the histogram of the chosen entropy mode has + // non-zero red and blue values. If all are zero, we can later skip + // the cross color optimization. { - const PixOrCopy pix_token = PixOrCopyCreateLiteral(pix); - const PixOrCopy pix_diff_token = PixOrCopyCreateLiteral(pix_diff); - VP8LHistogramAddSinglePixOrCopy(histo_set->histograms[0], &pix_token); - VP8LHistogramAddSinglePixOrCopy(histo_set->histograms[1], - &pix_diff_token); + static const uint8_t kHistoPairs[5][2] = { + { kHistoRed, kHistoBlue }, + { kHistoRedPred, kHistoBluePred }, + { kHistoRedSubGreen, kHistoBlueSubGreen }, + { kHistoRedPredSubGreen, kHistoBluePredSubGreen }, + { kHistoRed, kHistoBlue } + }; + const uint32_t* const red_histo = + &histo[256 * kHistoPairs[*min_entropy_ix][0]]; + const uint32_t* const blue_histo = + &histo[256 * kHistoPairs[*min_entropy_ix][1]]; + for (i = 1; i < 256; ++i) { + if ((red_histo[i] | blue_histo[i]) != 0) { + *red_and_blue_always_zero = 0; + break; + } + } } } - last_line = argb; - argb += argb_stride; + free(histo); + return 1; + } else { + return 0; } - *nonpredicted_bits = VP8LHistogramEstimateBitsBulk(histo_set->histograms[0]); - *predicted_bits = VP8LHistogramEstimateBitsBulk(histo_set->histograms[1]); - VP8LFreeHistogramSet(histo_set); - return 1; } -static int AnalyzeAndInit(VP8LEncoder* const enc, WebPImageHint image_hint) { +static int GetHistoBits(int method, int use_palette, int width, int height) { + // Make tile size a function of encoding method (Range: 0 to 6). + int histo_bits = (use_palette ? 9 : 7) - method; + while (1) { + const int huff_image_size = VP8LSubSampleSize(width, histo_bits) * + VP8LSubSampleSize(height, histo_bits); + if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break; + ++histo_bits; + } + return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS : + (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits; +} + +static int GetTransformBits(int method, int histo_bits) { + const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5; + return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits; +} + +static int AnalyzeAndInit(VP8LEncoder* const enc) { const WebPPicture* const pic = enc->pic_; const int width = pic->width; const int height = pic->height; const int pix_cnt = width * height; + const WebPConfig* const config = enc->config_; + const int method = config->method; + const int low_effort = (config->method == 0); // we round the block size up, so we're guaranteed to have // at max MAX_REFS_BLOCK_PER_IMAGE blocks used: int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1; assert(pic != NULL && pic->argb != NULL); + enc->use_cross_color_ = 0; + enc->use_predict_ = 0; + enc->use_subtract_green_ = 0; enc->use_palette_ = - AnalyzeAndCreatePalette(pic, enc->palette_, &enc->palette_size_); + AnalyzeAndCreatePalette(pic, low_effort, + enc->palette_, &enc->palette_size_); - if (image_hint == WEBP_HINT_GRAPH) { - if (enc->use_palette_ && enc->palette_size_ < MAX_COLORS_FOR_GRAPH) { - enc->use_palette_ = 0; - } - } + // TODO(jyrki): replace the decision to be based on an actual estimate + // of entropy, or even spatial variance of entropy. + enc->histo_bits_ = GetHistoBits(method, enc->use_palette_, + pic->width, pic->height); + enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_); - if (!enc->use_palette_) { - if (image_hint == WEBP_HINT_PHOTO) { - enc->use_predict_ = 1; - enc->use_cross_color_ = 1; - } else { - double non_pred_entropy, pred_entropy; - if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, - &non_pred_entropy, &pred_entropy)) { - return 0; - } - if (pred_entropy < 0.95 * non_pred_entropy) { - enc->use_predict_ = 1; - enc->use_cross_color_ = 1; - } + if (low_effort) { + // AnalyzeEntropy is somewhat slow. + enc->use_predict_ = !enc->use_palette_; + enc->use_subtract_green_ = !enc->use_palette_; + enc->use_cross_color_ = 0; + } else { + int red_and_blue_always_zero; + EntropyIx min_entropy_ix; + if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, + enc->use_palette_, &min_entropy_ix, + &red_and_blue_always_zero)) { + return 0; } + enc->use_palette_ = (min_entropy_ix == kPalette); + enc->use_subtract_green_ = + (min_entropy_ix == kSubGreen) || (min_entropy_ix == kSpatialSubGreen); + enc->use_predict_ = + (min_entropy_ix == kSpatial) || (min_entropy_ix == kSpatialSubGreen); + enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_; } + if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0; // palette-friendly input typically uses less literals @@ -271,9 +509,9 @@ static void StoreHuffmanTreeOfHuffmanTreeToBitMask( break; } } - VP8LWriteBits(bw, 4, codes_to_store - 4); + VP8LPutBits(bw, codes_to_store - 4, 4); for (i = 0; i < codes_to_store; ++i) { - VP8LWriteBits(bw, 3, code_length_bitdepth[kStorageOrder[i]]); + VP8LPutBits(bw, code_length_bitdepth[kStorageOrder[i]], 3); } } @@ -301,16 +539,16 @@ static void StoreHuffmanTreeToBitMask( for (i = 0; i < num_tokens; ++i) { const int ix = tokens[i].code; const int extra_bits = tokens[i].extra_bits; - VP8LWriteBits(bw, huffman_code->code_lengths[ix], huffman_code->codes[ix]); + VP8LPutBits(bw, huffman_code->codes[ix], huffman_code->code_lengths[ix]); switch (ix) { case 16: - VP8LWriteBits(bw, 2, extra_bits); + VP8LPutBits(bw, extra_bits, 2); break; case 17: - VP8LWriteBits(bw, 3, extra_bits); + VP8LPutBits(bw, extra_bits, 3); break; case 18: - VP8LWriteBits(bw, 7, extra_bits); + VP8LPutBits(bw, extra_bits, 7); break; } } @@ -330,7 +568,7 @@ static void StoreFullHuffmanCode(VP8LBitWriter* const bw, huffman_code.code_lengths = code_length_bitdepth; huffman_code.codes = code_length_bitdepth_symbols; - VP8LWriteBits(bw, 1, 0); + VP8LPutBits(bw, 0, 1); num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens); { uint32_t histogram[CODE_LENGTH_CODES] = { 0 }; @@ -367,13 +605,13 @@ static void StoreFullHuffmanCode(VP8LBitWriter* const bw, } write_trimmed_length = (trimmed_length > 1 && trailing_zero_bits > 12); length = write_trimmed_length ? trimmed_length : num_tokens; - VP8LWriteBits(bw, 1, write_trimmed_length); + VP8LPutBits(bw, write_trimmed_length, 1); if (write_trimmed_length) { const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1); const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2; - VP8LWriteBits(bw, 3, nbitpairs - 1); + VP8LPutBits(bw, nbitpairs - 1, 3); assert(trimmed_length >= 2); - VP8LWriteBits(bw, nbitpairs * 2, trimmed_length - 2); + VP8LPutBits(bw, trimmed_length - 2, nbitpairs * 2); } StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code); } @@ -400,31 +638,42 @@ static void StoreHuffmanCode(VP8LBitWriter* const bw, if (count == 0) { // emit minimal tree for empty cases // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0 - VP8LWriteBits(bw, 4, 0x01); + VP8LPutBits(bw, 0x01, 4); } else if (count <= 2 && symbols[0] < kMaxSymbol && symbols[1] < kMaxSymbol) { - VP8LWriteBits(bw, 1, 1); // Small tree marker to encode 1 or 2 symbols. - VP8LWriteBits(bw, 1, count - 1); + VP8LPutBits(bw, 1, 1); // Small tree marker to encode 1 or 2 symbols. + VP8LPutBits(bw, count - 1, 1); if (symbols[0] <= 1) { - VP8LWriteBits(bw, 1, 0); // Code bit for small (1 bit) symbol value. - VP8LWriteBits(bw, 1, symbols[0]); + VP8LPutBits(bw, 0, 1); // Code bit for small (1 bit) symbol value. + VP8LPutBits(bw, symbols[0], 1); } else { - VP8LWriteBits(bw, 1, 1); - VP8LWriteBits(bw, 8, symbols[0]); + VP8LPutBits(bw, 1, 1); + VP8LPutBits(bw, symbols[0], 8); } if (count == 2) { - VP8LWriteBits(bw, 8, symbols[1]); + VP8LPutBits(bw, symbols[1], 8); } } else { StoreFullHuffmanCode(bw, huff_tree, tokens, huffman_code); } } -static void WriteHuffmanCode(VP8LBitWriter* const bw, +static WEBP_INLINE void WriteHuffmanCode(VP8LBitWriter* const bw, const HuffmanTreeCode* const code, int code_index) { const int depth = code->code_lengths[code_index]; const int symbol = code->codes[code_index]; - VP8LWriteBits(bw, depth, symbol); + VP8LPutBits(bw, symbol, depth); +} + +static WEBP_INLINE void WriteHuffmanCodeWithExtraBits( + VP8LBitWriter* const bw, + const HuffmanTreeCode* const code, + int code_index, + int bits, + int n_bits) { + const int depth = code->code_lengths[code_index]; + const int symbol = code->codes[code_index]; + VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits); } static WebPEncodingError StoreImageToBitMask( @@ -432,40 +681,51 @@ static WebPEncodingError StoreImageToBitMask( VP8LBackwardRefs* const refs, const uint16_t* histogram_symbols, const HuffmanTreeCode* const huffman_codes) { + const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1; + const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits); // x and y trace the position in the image. int x = 0; int y = 0; - const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1; + int tile_x = x & tile_mask; + int tile_y = y & tile_mask; + int histogram_ix = histogram_symbols[0]; + const HuffmanTreeCode* codes = huffman_codes + 5 * histogram_ix; VP8LRefsCursor c = VP8LRefsCursorInit(refs); while (VP8LRefsCursorOk(&c)) { const PixOrCopy* const v = c.cur_pos; - const int histogram_ix = histogram_symbols[histo_bits ? - (y >> histo_bits) * histo_xsize + - (x >> histo_bits) : 0]; - const HuffmanTreeCode* const codes = huffman_codes + 5 * histogram_ix; - if (PixOrCopyIsCacheIdx(v)) { - const int code = PixOrCopyCacheIdx(v); - const int literal_ix = 256 + NUM_LENGTH_CODES + code; - WriteHuffmanCode(bw, codes, literal_ix); - } else if (PixOrCopyIsLiteral(v)) { + if ((tile_x != (x & tile_mask)) || (tile_y != (y & tile_mask))) { + tile_x = x & tile_mask; + tile_y = y & tile_mask; + histogram_ix = histogram_symbols[(y >> histo_bits) * histo_xsize + + (x >> histo_bits)]; + codes = huffman_codes + 5 * histogram_ix; + } + if (PixOrCopyIsLiteral(v)) { static const int order[] = { 1, 2, 0, 3 }; int k; for (k = 0; k < 4; ++k) { const int code = PixOrCopyLiteral(v, order[k]); WriteHuffmanCode(bw, codes + k, code); } + } else if (PixOrCopyIsCacheIdx(v)) { + const int code = PixOrCopyCacheIdx(v); + const int literal_ix = 256 + NUM_LENGTH_CODES + code; + WriteHuffmanCode(bw, codes, literal_ix); } else { int bits, n_bits; - int code, distance; + int code; + const int distance = PixOrCopyDistance(v); VP8LPrefixEncode(v->len, &code, &n_bits, &bits); - WriteHuffmanCode(bw, codes, 256 + code); - VP8LWriteBits(bw, n_bits, bits); + WriteHuffmanCodeWithExtraBits(bw, codes, 256 + code, bits, n_bits); - distance = PixOrCopyDistance(v); + // Don't write the distance with the extra bits code since + // the distance can be up to 18 bits of extra bits, and the prefix + // 15 bits, totaling to 33, and our PutBits only supports up to 32 bits. + // TODO(jyrki): optimize this further. VP8LPrefixEncode(distance, &code, &n_bits, &bits); WriteHuffmanCode(bw, codes + 4, code); - VP8LWriteBits(bw, n_bits, bits); + VP8LPutBits(bw, bits, n_bits); } x += PixOrCopyLength(v); while (x >= width) { @@ -491,21 +751,28 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, HuffmanTreeToken* tokens = NULL; HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } }; const uint16_t histogram_symbols[1] = { 0 }; // only one tree, one symbol - VP8LHistogramSet* const histogram_image = VP8LAllocateHistogramSet(1, 0); + int cache_bits = 0; + VP8LHistogramSet* histogram_image = NULL; HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc( 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); - if (histogram_image == NULL || huff_tree == NULL) { + if (huff_tree == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } // Calculate backward references from ARGB image. - refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, 1, + refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, &cache_bits, hash_chain, refs_array); if (refs == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } + histogram_image = VP8LAllocateHistogramSet(1, cache_bits); + if (histogram_image == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Build histogram image and symbols from backward references. VP8LHistogramStoreRefs(refs, histogram_image->histograms[0]); @@ -517,7 +784,7 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, } // No color cache, no Huffman image. - VP8LWriteBits(bw, 1, 0); + VP8LPutBits(bw, 0, 1); // Find maximum number of symbols for the huffman tree-set. for (i = 0; i < 5; ++i) { @@ -557,16 +824,17 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2], int width, int height, int quality, - int cache_bits, - int histogram_bits) { + int low_effort, int* cache_bits, + int histogram_bits, + size_t init_byte_position, + int* const hdr_size, + int* const data_size) { WebPEncodingError err = VP8_ENC_OK; - const int use_2d_locality = 1; - const int use_color_cache = (cache_bits > 0); const uint32_t histogram_image_xysize = VP8LSubSampleSize(width, histogram_bits) * VP8LSubSampleSize(height, histogram_bits); - VP8LHistogramSet* histogram_image = - VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits); + VP8LHistogramSet* histogram_image = NULL; + VP8LHistogramSet* tmp_histos = NULL; int histogram_image_size = 0; size_t bit_array_size = 0; HuffmanTree* huff_tree = NULL; @@ -579,28 +847,39 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, sizeof(*histogram_symbols)); assert(histogram_bits >= MIN_HUFFMAN_BITS); assert(histogram_bits <= MAX_HUFFMAN_BITS); + assert(hdr_size != NULL); + assert(data_size != NULL); VP8LBackwardRefsInit(&refs, refs_array[0].block_size_); - if (histogram_image == NULL || histogram_symbols == NULL) { - VP8LFreeHistogramSet(histogram_image); - WebPSafeFree(histogram_symbols); - return 0; + if (histogram_symbols == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; } + *cache_bits = MAX_COLOR_CACHE_BITS; // 'best_refs' is the reference to the best backward refs and points to one // of refs_array[0] or refs_array[1]. // Calculate backward references from ARGB image. best_refs = VP8LGetBackwardReferences(width, height, argb, quality, - cache_bits, use_2d_locality, - hash_chain, refs_array); + low_effort, cache_bits, hash_chain, + refs_array); if (best_refs == NULL || !VP8LBackwardRefsCopy(best_refs, &refs)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } + histogram_image = + VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits); + tmp_histos = VP8LAllocateHistogramSet(2, *cache_bits); + if (histogram_image == NULL || tmp_histos == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Build histogram image and symbols from backward references. - if (!VP8LGetHistoImageSymbols(width, height, &refs, - quality, histogram_bits, cache_bits, - histogram_image, - histogram_symbols)) { + if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, low_effort, + histogram_bits, *cache_bits, histogram_image, + tmp_histos, histogram_symbols)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } // Create Huffman bit lengths and codes for each histogram image. @@ -608,41 +887,53 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, bit_array_size = 5 * histogram_image_size; huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, sizeof(*huffman_codes)); + // Note: some histogram_image entries may point to tmp_histos[], so the latter + // need to outlive the following call to GetHuffBitLengthsAndCodes(). if (huffman_codes == NULL || !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } // Free combined histograms. VP8LFreeHistogramSet(histogram_image); histogram_image = NULL; + // Free scratch histograms. + VP8LFreeHistogramSet(tmp_histos); + tmp_histos = NULL; + // Color Cache parameters. - VP8LWriteBits(bw, 1, use_color_cache); - if (use_color_cache) { - VP8LWriteBits(bw, 4, cache_bits); + if (*cache_bits > 0) { + VP8LPutBits(bw, 1, 1); + VP8LPutBits(bw, *cache_bits, 4); + } else { + VP8LPutBits(bw, 0, 1); } // Huffman image + meta huffman. { const int write_histogram_image = (histogram_image_size > 1); - VP8LWriteBits(bw, 1, write_histogram_image); + VP8LPutBits(bw, write_histogram_image, 1); if (write_histogram_image) { uint32_t* const histogram_argb = (uint32_t*)WebPSafeMalloc(histogram_image_xysize, sizeof(*histogram_argb)); int max_index = 0; uint32_t i; - if (histogram_argb == NULL) goto Error; + if (histogram_argb == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } for (i = 0; i < histogram_image_xysize; ++i) { const int symbol_index = histogram_symbols[i] & 0xffff; - histogram_argb[i] = 0xff000000 | (symbol_index << 8); + histogram_argb[i] = (symbol_index << 8); if (symbol_index >= max_index) { max_index = symbol_index + 1; } } histogram_image_size = max_index; - VP8LWriteBits(bw, 3, histogram_bits - 2); + VP8LPutBits(bw, histogram_bits - 2, 3); err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array, VP8LSubSampleSize(width, histogram_bits), VP8LSubSampleSize(height, histogram_bits), @@ -658,7 +949,10 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, int max_tokens = 0; huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); - if (huff_tree == NULL) goto Error; + if (huff_tree == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } // Find maximum number of symbols for the huffman tree-set. for (i = 0; i < 5 * histogram_image_size; ++i) { HuffmanTreeCode* const codes = &huffman_codes[i]; @@ -668,7 +962,10 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, } tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); - if (tokens == NULL) goto Error; + if (tokens == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } for (i = 0; i < 5 * histogram_image_size; ++i) { HuffmanTreeCode* const codes = &huffman_codes[i]; StoreHuffmanCode(bw, huff_tree, tokens, codes); @@ -676,14 +973,18 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, } } + *hdr_size = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); // Store actual literals. err = StoreImageToBitMask(bw, width, histogram_bits, &refs, histogram_symbols, huffman_codes); + *data_size = + (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); Error: WebPSafeFree(tokens); WebPSafeFree(huff_tree); VP8LFreeHistogramSet(histogram_image); + VP8LFreeHistogramSet(tmp_histos); VP8LBackwardRefsClear(&refs); if (huffman_codes != NULL) { WebPSafeFree(huffman_codes->codes); @@ -696,59 +997,28 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, // ----------------------------------------------------------------------------- // Transforms -// Check if it would be a good idea to subtract green from red and blue. We -// only impact entropy in red/blue components, don't bother to look at others. -static WebPEncodingError EvalAndApplySubtractGreen(VP8LEncoder* const enc, - int width, int height, - VP8LBitWriter* const bw) { - if (!enc->use_palette_) { - int i; - const uint32_t* const argb = enc->argb_; - double bit_cost_before, bit_cost_after; - // Allocate histogram with cache_bits = 1. - VP8LHistogram* const histo = VP8LAllocateHistogram(1); - if (histo == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; - for (i = 0; i < width * height; ++i) { - const uint32_t c = argb[i]; - ++histo->red_[(c >> 16) & 0xff]; - ++histo->blue_[(c >> 0) & 0xff]; - } - bit_cost_before = VP8LHistogramEstimateBits(histo); - - VP8LHistogramInit(histo, 1); - for (i = 0; i < width * height; ++i) { - const uint32_t c = argb[i]; - const int green = (c >> 8) & 0xff; - ++histo->red_[((c >> 16) - green) & 0xff]; - ++histo->blue_[((c >> 0) - green) & 0xff]; - } - bit_cost_after = VP8LHistogramEstimateBits(histo); - VP8LFreeHistogram(histo); - - // Check if subtracting green yields low entropy. - enc->use_subtract_green_ = (bit_cost_after < bit_cost_before); - if (enc->use_subtract_green_) { - VP8LWriteBits(bw, 1, TRANSFORM_PRESENT); - VP8LWriteBits(bw, 2, SUBTRACT_GREEN); - VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height); - } - } - return VP8_ENC_OK; +static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height, + VP8LBitWriter* const bw) { + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, SUBTRACT_GREEN, 2); + VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height); } static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, - int width, int height, int quality, + int width, int height, + int quality, int low_effort, VP8LBitWriter* const bw) { const int pred_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, pred_bits); const int transform_height = VP8LSubSampleSize(height, pred_bits); - VP8LResidualImage(width, height, pred_bits, enc->argb_, enc->argb_scratch_, - enc->transform_data_); - VP8LWriteBits(bw, 1, TRANSFORM_PRESENT); - VP8LWriteBits(bw, 2, PREDICTOR_TRANSFORM); + VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_, + enc->argb_scratch_, enc->transform_data_, + enc->config_->exact); + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); assert(pred_bits >= 2); - VP8LWriteBits(bw, 3, pred_bits - 2); + VP8LPutBits(bw, pred_bits - 2, 3); return EncodeImageNoHuffman(bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, (VP8LBackwardRefs*)enc->refs_, // cast const away @@ -766,10 +1036,10 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, enc->argb_, enc->transform_data_); - VP8LWriteBits(bw, 1, TRANSFORM_PRESENT); - VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM); + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2); assert(ccolor_transform_bits >= 2); - VP8LWriteBits(bw, 3, ccolor_transform_bits - 2); + VP8LPutBits(bw, ccolor_transform_bits - 2, 3); return EncodeImageNoHuffman(bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, (VP8LBackwardRefs*)enc->refs_, // cast const away @@ -799,14 +1069,14 @@ static int WriteImageSize(const WebPPicture* const pic, const int height = pic->height - 1; assert(width < WEBP_MAX_DIMENSION && height < WEBP_MAX_DIMENSION); - VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, width); - VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, height); + VP8LPutBits(bw, width, VP8L_IMAGE_SIZE_BITS); + VP8LPutBits(bw, height, VP8L_IMAGE_SIZE_BITS); return !bw->error_; } static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) { - VP8LWriteBits(bw, 1, has_alpha); - VP8LWriteBits(bw, VP8L_VERSION_BITS, VP8L_VERSION); + VP8LPutBits(bw, has_alpha, 1); + VP8LPutBits(bw, VP8L_VERSION, VP8L_VERSION_BITS); return !bw->error_; } @@ -846,39 +1116,107 @@ static WebPEncodingError WriteImage(const WebPPicture* const pic, // Allocates the memory for argb (W x H) buffer, 2 rows of context for // prediction and transform data. +// Flags influencing the memory allocated: +// enc->transform_bits_ +// enc->use_predict_, enc->use_cross_color_ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, int width, int height) { WebPEncodingError err = VP8_ENC_OK; - const int tile_size = 1 << enc->transform_bits_; - const uint64_t image_size = width * height; - const uint64_t argb_scratch_size = tile_size * width + width; - const int transform_data_size = - VP8LSubSampleSize(width, enc->transform_bits_) * - VP8LSubSampleSize(height, enc->transform_bits_); - const uint64_t total_size = - image_size + argb_scratch_size + (uint64_t)transform_data_size; - uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem)); - if (mem == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; + if (enc->argb_ == NULL) { + const int tile_size = 1 << enc->transform_bits_; + const uint64_t image_size = width * height; + // Ensure enough size for tiles, as well as for two scanlines and two + // extra pixels for CopyImageWithPrediction. + const uint64_t argb_scratch_size = + enc->use_predict_ ? tile_size * width + width + 2 : 0; + const int transform_data_size = + (enc->use_predict_ || enc->use_cross_color_) + ? VP8LSubSampleSize(width, enc->transform_bits_) * + VP8LSubSampleSize(height, enc->transform_bits_) + : 0; + const uint64_t total_size = + image_size + WEBP_ALIGN_CST + + argb_scratch_size + WEBP_ALIGN_CST + + (uint64_t)transform_data_size; + uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem)); + if (mem == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + enc->argb_ = mem; + mem = (uint32_t*)WEBP_ALIGN(mem + image_size); + enc->argb_scratch_ = mem; + mem = (uint32_t*)WEBP_ALIGN(mem + argb_scratch_size); + enc->transform_data_ = mem; + enc->current_width_ = width; } - enc->argb_ = mem; - mem += image_size; - enc->argb_scratch_ = mem; - mem += argb_scratch_size; - enc->transform_data_ = mem; - enc->current_width_ = width; - Error: return err; } -static void ApplyPalette(uint32_t* src, uint32_t* dst, - uint32_t src_stride, uint32_t dst_stride, - const uint32_t* palette, int palette_size, - int width, int height, int xbits, uint8_t* row) { +static void ClearTransformBuffer(VP8LEncoder* const enc) { + WebPSafeFree(enc->argb_); + enc->argb_ = NULL; +} + +static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { + WebPEncodingError err = VP8_ENC_OK; + const WebPPicture* const picture = enc->pic_; + const int width = picture->width; + const int height = picture->height; + int y; + err = AllocateTransformBuffer(enc, width, height); + if (err != VP8_ENC_OK) return err; + for (y = 0; y < height; ++y) { + memcpy(enc->argb_ + y * width, + picture->argb + y * picture->argb_stride, + width * sizeof(*enc->argb_)); + } + assert(enc->current_width_ == width); + return VP8_ENC_OK; +} + +// ----------------------------------------------------------------------------- + +static void MapToPalette(const uint32_t palette[], int num_colors, + uint32_t* const last_pix, int* const last_idx, + const uint32_t* src, uint8_t* dst, int width) { + int x; + int prev_idx = *last_idx; + uint32_t prev_pix = *last_pix; + for (x = 0; x < width; ++x) { + const uint32_t pix = src[x]; + if (pix != prev_pix) { + int i; + for (i = 0; i < num_colors; ++i) { + if (pix == palette[i]) { + prev_idx = i; + prev_pix = pix; + break; + } + } + } + dst[x] = prev_idx; + } + *last_idx = prev_idx; + *last_pix = prev_pix; +} + +// Remap argb values in src[] to packed palettes entries in dst[] +// using 'row' as a temporary buffer of size 'width'. +// We assume that all src[] values have a corresponding entry in the palette. +// Note: src[] can be the same as dst[] +static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, + uint32_t* dst, uint32_t dst_stride, + const uint32_t* palette, int palette_size, + int width, int height, int xbits) { + // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be + // made to work in-place. + uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row)); int i, x, y; int use_LUT = 1; + + if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; for (i = 0; i < palette_size; ++i) { if ((palette[i] & 0xffff00ffu) != 0) { use_LUT = 0; @@ -895,9 +1233,9 @@ static void ApplyPalette(uint32_t* src, uint32_t* dst, for (y = 0; y < height; ++y) { for (x = 0; x < width; ++x) { const int color = (src[x] >> 8) & 0xff; - row[x] = inv_palette[color]; + tmp_row[x] = inv_palette[color]; } - VP8LBundleColorMap(row, width, xbits, dst); + VP8LBundleColorMap(tmp_row, width, xbits, dst); src += src_stride; dst += dst_stride; } @@ -906,41 +1244,28 @@ static void ApplyPalette(uint32_t* src, uint32_t* dst, uint32_t last_pix = palette[0]; int last_idx = 0; for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const uint32_t pix = src[x]; - if (pix != last_pix) { - for (i = 0; i < palette_size; ++i) { - if (pix == palette[i]) { - last_idx = i; - last_pix = pix; - break; - } - } - } - row[x] = last_idx; - } - VP8LBundleColorMap(row, width, xbits, dst); + MapToPalette(palette, palette_size, &last_pix, &last_idx, + src, tmp_row, width); + VP8LBundleColorMap(tmp_row, width, xbits, dst); src += src_stride; dst += dst_stride; } } + WebPSafeFree(tmp_row); + return VP8_ENC_OK; } // Note: Expects "enc->palette_" to be set properly. -// Also, "enc->palette_" will be modified after this call and should not be used -// later. -static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, - VP8LEncoder* const enc, int quality) { +static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, + int in_place) { WebPEncodingError err = VP8_ENC_OK; - int i; const WebPPicture* const pic = enc->pic_; - uint32_t* src = pic->argb; - uint32_t* dst; const int width = pic->width; const int height = pic->height; - uint32_t* const palette = enc->palette_; + const uint32_t* const palette = enc->palette_; + const uint32_t* src = in_place ? enc->argb_ : pic->argb; + const int src_stride = in_place ? enc->current_width_ : pic->argb_stride; const int palette_size = enc->palette_size_; - uint8_t* row = NULL; int xbits; // Replace each input pixel by corresponding palette index. @@ -952,67 +1277,74 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, } err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height); - if (err != VP8_ENC_OK) goto Error; - dst = enc->argb_; - - row = (uint8_t*)WebPSafeMalloc(width, sizeof(*row)); - if (row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + if (err != VP8_ENC_OK) return err; - ApplyPalette(src, dst, pic->argb_stride, enc->current_width_, - palette, palette_size, width, height, xbits, row); + err = ApplyPalette(src, src_stride, + enc->argb_, enc->current_width_, + palette, palette_size, width, height, xbits); + return err; +} - // Save palette to bitstream. - VP8LWriteBits(bw, 1, TRANSFORM_PRESENT); - VP8LWriteBits(bw, 2, COLOR_INDEXING_TRANSFORM); - assert(palette_size >= 1); - VP8LWriteBits(bw, 8, palette_size - 1); +// Save palette_[] to bitstream. +static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, + VP8LEncoder* const enc) { + int i; + uint32_t tmp_palette[MAX_PALETTE_SIZE]; + const int palette_size = enc->palette_size_; + const uint32_t* const palette = enc->palette_; + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, COLOR_INDEXING_TRANSFORM, 2); + assert(palette_size >= 1 && palette_size <= MAX_PALETTE_SIZE); + VP8LPutBits(bw, palette_size - 1, 8); for (i = palette_size - 1; i >= 1; --i) { - palette[i] = VP8LSubPixels(palette[i], palette[i - 1]); + tmp_palette[i] = VP8LSubPixels(palette[i], palette[i - 1]); } - err = EncodeImageNoHuffman(bw, palette, &enc->hash_chain_, enc->refs_, - palette_size, 1, quality); - - Error: - WebPSafeFree(row); - return err; + tmp_palette[0] = palette[0]; + return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, enc->refs_, + palette_size, 1, 20 /* quality */); } -// ----------------------------------------------------------------------------- +#ifdef WEBP_EXPERIMENTAL_FEATURES -static int GetHistoBits(int method, int use_palette, int width, int height) { - const int hist_size = VP8LGetHistogramSize(MAX_COLOR_CACHE_BITS); - // Make tile size a function of encoding method (Range: 0 to 6). - int histo_bits = (use_palette ? 9 : 7) - method; - while (1) { - const int huff_image_size = VP8LSubSampleSize(width, histo_bits) * - VP8LSubSampleSize(height, histo_bits); - if ((uint64_t)huff_image_size * hist_size <= MAX_HUFF_IMAGE_SIZE) break; - ++histo_bits; - } - return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS : - (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits; -} +static WebPEncodingError EncodeDeltaPalettePredictorImage( + VP8LBitWriter* const bw, VP8LEncoder* const enc, int quality) { + const WebPPicture* const pic = enc->pic_; + const int width = pic->width; + const int height = pic->height; -static int GetTransformBits(int method, int histo_bits) { - const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5; - return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits; -} + const int pred_bits = 5; + const int transform_width = VP8LSubSampleSize(width, pred_bits); + const int transform_height = VP8LSubSampleSize(height, pred_bits); + const int pred = 7; // default is Predictor7 (Top/Left Average) + const int tiles_per_row = VP8LSubSampleSize(width, pred_bits); + const int tiles_per_col = VP8LSubSampleSize(height, pred_bits); + uint32_t* predictors; + int tile_x, tile_y; + WebPEncodingError err = VP8_ENC_OK; -static int GetCacheBits(float quality) { - return (quality <= 25.f) ? 0 : 7; -} + predictors = (uint32_t*)WebPSafeMalloc(tiles_per_col * tiles_per_row, + sizeof(*predictors)); + if (predictors == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; -static void FinishEncParams(VP8LEncoder* const enc) { - const WebPConfig* const config = enc->config_; - const WebPPicture* const pic = enc->pic_; - const int method = config->method; - const float quality = config->quality; - const int use_palette = enc->use_palette_; - enc->histo_bits_ = GetHistoBits(method, use_palette, pic->width, pic->height); - enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_); - enc->cache_bits_ = GetCacheBits(quality); + for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { + for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { + predictors[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8); + } + } + + VP8LPutBits(bw, TRANSFORM_PRESENT, 1); + VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); + VP8LPutBits(bw, pred_bits - 2, 3); + err = EncodeImageNoHuffman(bw, predictors, &enc->hash_chain_, + (VP8LBackwardRefs*)enc->refs_, // cast const away + transform_width, transform_height, + quality); + WebPSafeFree(predictors); + return err; } +#endif // WEBP_EXPERIMENTAL_FEATURES + // ----------------------------------------------------------------------------- // VP8LEncoder @@ -1026,7 +1358,7 @@ static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config, enc->config_ = config; enc->pic_ = picture; - VP8LDspInit(); + VP8LEncDspInit(); return enc; } @@ -1036,7 +1368,7 @@ static void VP8LEncoderDelete(VP8LEncoder* enc) { VP8LHashChainClear(&enc->hash_chain_); VP8LBackwardRefsClear(&enc->refs_[0]); VP8LBackwardRefsClear(&enc->refs_[1]); - WebPSafeFree(enc->argb_); + ClearTransformBuffer(enc); WebPSafeFree(enc); } } @@ -1049,10 +1381,15 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, VP8LBitWriter* const bw) { WebPEncodingError err = VP8_ENC_OK; const int quality = (int)config->quality; + const int low_effort = (config->method == 0); const int width = picture->width; const int height = picture->height; VP8LEncoder* const enc = VP8LEncoderNew(config, picture); const size_t byte_position = VP8LBitWriterNumBytes(bw); + int use_near_lossless = 0; + int hdr_size = 0; + int data_size = 0; + int use_delta_palettization = 0; if (enc == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; @@ -1062,70 +1399,83 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // --------------------------------------------------------------------------- // Analyze image (entropy, num_palettes etc) - if (!AnalyzeAndInit(enc, config->image_hint)) { + if (!AnalyzeAndInit(enc)) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - FinishEncParams(enc); - - if (enc->use_palette_) { - err = EncodePalette(bw, enc, quality); - if (err != VP8_ENC_OK) goto Error; - // Color cache is disabled for palette. - enc->cache_bits_ = 0; + // Apply near-lossless preprocessing. + use_near_lossless = !enc->use_palette_ && (config->near_lossless < 100); + if (use_near_lossless) { + if (!VP8ApplyNearLossless(width, height, picture->argb, + config->near_lossless)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } } - // In case image is not packed. - if (enc->argb_ == NULL) { - int y; - err = AllocateTransformBuffer(enc, width, height); +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (config->delta_palettization) { + enc->use_predict_ = 1; + enc->use_cross_color_ = 0; + enc->use_subtract_green_ = 0; + enc->use_palette_ = 1; + err = MakeInputImageCopy(enc); if (err != VP8_ENC_OK) goto Error; - assert(enc->argb_ != NULL); - for (y = 0; y < height; ++y) { - memcpy(enc->argb_ + y * width, - picture->argb + y * picture->argb_stride, - width * sizeof(*enc->argb_)); + err = WebPSearchOptimalDeltaPalette(enc); + if (err != VP8_ENC_OK) goto Error; + if (enc->use_palette_) { + err = AllocateTransformBuffer(enc, width, height); + if (err != VP8_ENC_OK) goto Error; + err = EncodeDeltaPalettePredictorImage(bw, enc, quality); + if (err != VP8_ENC_OK) goto Error; + use_delta_palettization = 1; } - enc->current_width_ = width; } +#endif // WEBP_EXPERIMENTAL_FEATURES - // --------------------------------------------------------------------------- - // Apply transforms and write transform data. - - err = EvalAndApplySubtractGreen(enc, enc->current_width_, height, bw); - if (err != VP8_ENC_OK) goto Error; - - if (enc->use_predict_) { - err = ApplyPredictFilter(enc, enc->current_width_, height, quality, bw); + // Encode palette + if (enc->use_palette_) { + err = EncodePalette(bw, enc); if (err != VP8_ENC_OK) goto Error; - } - - if (enc->use_cross_color_) { - err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality, bw); + err = MapImageFromPalette(enc, use_delta_palettization); if (err != VP8_ENC_OK) goto Error; } + if (!use_delta_palettization) { + // In case image is not packed. + if (enc->argb_ == NULL) { + err = MakeInputImageCopy(enc); + if (err != VP8_ENC_OK) goto Error; + } - VP8LWriteBits(bw, 1, !TRANSFORM_PRESENT); // No more transforms. + // ------------------------------------------------------------------------- + // Apply transforms and write transform data. - // --------------------------------------------------------------------------- - // Estimate the color cache size. + if (enc->use_subtract_green_) { + ApplySubtractGreen(enc, enc->current_width_, height, bw); + } - if (enc->cache_bits_ > 0) { - if (!VP8LCalculateEstimateForCacheSize(enc->argb_, enc->current_width_, - height, quality, &enc->hash_chain_, - &enc->refs_[0], &enc->cache_bits_)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; + if (enc->use_predict_) { + err = ApplyPredictFilter(enc, enc->current_width_, height, quality, + low_effort, bw); + if (err != VP8_ENC_OK) goto Error; + } + + if (enc->use_cross_color_) { + err = ApplyCrossColorFilter(enc, enc->current_width_, + height, quality, bw); + if (err != VP8_ENC_OK) goto Error; } } + VP8LPutBits(bw, !TRANSFORM_PRESENT, 1); // No more transforms. + // --------------------------------------------------------------------------- // Encode and write the transformed image. - err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, - enc->current_width_, height, quality, - enc->cache_bits_, enc->histo_bits_); + enc->current_width_, height, quality, low_effort, + &enc->cache_bits_, enc->histo_bits_, byte_position, + &hdr_size, &data_size); if (err != VP8_ENC_OK) goto Error; if (picture->stats != NULL) { @@ -1140,6 +1490,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, stats->cache_bits = enc->cache_bits_; stats->palette_size = enc->palette_size_; stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position); + stats->lossless_hdr_size = hdr_size; + stats->lossless_data_size = data_size; } Error: @@ -1170,7 +1522,7 @@ int VP8LEncodeImage(const WebPConfig* const config, // Initialize BitWriter with size corresponding to 16 bpp to photo images and // 8 bpp for graphical images. initial_size = (config->image_hint == WEBP_HINT_GRAPH) ? - width * height : width * height * 2; + width * height : width * height * 2; if (!VP8LBitWriterInit(&bw, initial_size)) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; @@ -1234,7 +1586,7 @@ int VP8LEncodeImage(const WebPConfig* const config, Error: if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY; - VP8LBitWriterDestroy(&bw); + VP8LBitWriterWipeOut(&bw); if (err != VP8_ENC_OK) { WebPEncodingSetError(picture, err); return 0; diff --git a/src/3rdparty/libwebp/src/enc/webpenc.c b/src/3rdparty/libwebp/src/enc/webpenc.c index ca85e0b..fece736 100644 --- a/src/3rdparty/libwebp/src/enc/webpenc.c +++ b/src/3rdparty/libwebp/src/enc/webpenc.c @@ -16,9 +16,9 @@ #include <string.h> #include <math.h> +#include "./cost.h" #include "./vp8enci.h" #include "./vp8li.h" -#include "./cost.h" #include "../utils/utils.h" // #define PRINT_MEMORY_INFO @@ -38,14 +38,14 @@ int WebPGetEncoderVersion(void) { //------------------------------------------------------------------------------ static void ResetSegmentHeader(VP8Encoder* const enc) { - VP8SegmentHeader* const hdr = &enc->segment_hdr_; + VP8EncSegmentHeader* const hdr = &enc->segment_hdr_; hdr->num_segments_ = enc->config_->segments; hdr->update_map_ = (hdr->num_segments_ > 1); hdr->size_ = 0; } static void ResetFilterHeader(VP8Encoder* const enc) { - VP8FilterHeader* const hdr = &enc->filter_hdr_; + VP8EncFilterHeader* const hdr = &enc->filter_hdr_; hdr->simple_ = 1; hdr->level_ = 0; hdr->sharpness_ = 0; @@ -79,7 +79,9 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) { //-------------------+---+---+---+---+---+---+---+ // basic rd-opt | | | | x | x | x | x | //-------------------+---+---+---+---+---+---+---+ -// disto-score i4/16 | | | x | | | | | +// disto-refine i4/16| x | x | x | | | | | +//-------------------+---+---+---+---+---+---+---+ +// disto-refine uv | | x | x | | | | | //-------------------+---+---+---+---+---+---+---+ // rd-opt i4/16 | | | ~ | x | x | x | x | //-------------------+---+---+---+---+---+---+---+ @@ -131,35 +133,36 @@ static void MapConfigToTools(VP8Encoder* const enc) { // VP8EncIterator: 3360 // VP8ModeScore: 872 // VP8SegmentInfo: 732 -// VP8Proba: 18352 +// VP8EncProba: 18352 // LFStats: 2048 // Picture size (yuv): 419328 static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, WebPPicture* const picture) { + VP8Encoder* enc; const int use_filter = (config->filter_strength > 0) || (config->autofilter > 0); const int mb_w = (picture->width + 15) >> 4; const int mb_h = (picture->height + 15) >> 4; const int preds_w = 4 * mb_w + 1; const int preds_h = 4 * mb_h + 1; - const size_t preds_size = preds_w * preds_h * sizeof(uint8_t); + const size_t preds_size = preds_w * preds_h * sizeof(*enc->preds_); const int top_stride = mb_w * 16; - const size_t nz_size = (mb_w + 1) * sizeof(uint32_t) + ALIGN_CST; - const size_t info_size = mb_w * mb_h * sizeof(VP8MBInfo); - const size_t samples_size = 2 * top_stride * sizeof(uint8_t) // top-luma/u/v - + ALIGN_CST; // align all + const size_t nz_size = (mb_w + 1) * sizeof(*enc->nz_) + WEBP_ALIGN_CST; + const size_t info_size = mb_w * mb_h * sizeof(*enc->mb_info_); + const size_t samples_size = + 2 * top_stride * sizeof(*enc->y_top_) // top-luma/u/v + + WEBP_ALIGN_CST; // align all const size_t lf_stats_size = - config->autofilter ? sizeof(LFStats) + ALIGN_CST : 0; - VP8Encoder* enc; + config->autofilter ? sizeof(*enc->lf_stats_) + WEBP_ALIGN_CST : 0; uint8_t* mem; - const uint64_t size = (uint64_t)sizeof(VP8Encoder) // main struct - + ALIGN_CST // cache alignment - + info_size // modes info - + preds_size // prediction modes - + samples_size // top/left samples - + nz_size // coeff context bits - + lf_stats_size; // autofilter stats + const uint64_t size = (uint64_t)sizeof(*enc) // main struct + + WEBP_ALIGN_CST // cache alignment + + info_size // modes info + + preds_size // prediction modes + + samples_size // top/left samples + + nz_size // coeff context bits + + lf_stats_size; // autofilter stats #ifdef PRINT_MEMORY_INFO printf("===================================\n"); @@ -171,16 +174,16 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, " non-zero: %ld\n" " lf-stats: %ld\n" " total: %ld\n", - sizeof(VP8Encoder) + ALIGN_CST, info_size, + sizeof(*enc) + WEBP_ALIGN_CST, info_size, preds_size, samples_size, nz_size, lf_stats_size, size); printf("Transient object sizes:\n" " VP8EncIterator: %ld\n" " VP8ModeScore: %ld\n" " VP8SegmentInfo: %ld\n" - " VP8Proba: %ld\n" + " VP8EncProba: %ld\n" " LFStats: %ld\n", sizeof(VP8EncIterator), sizeof(VP8ModeScore), - sizeof(VP8SegmentInfo), sizeof(VP8Proba), + sizeof(VP8SegmentInfo), sizeof(VP8EncProba), sizeof(LFStats)); printf("Picture size (yuv): %ld\n", mb_w * mb_h * 384 * sizeof(uint8_t)); @@ -192,7 +195,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, return NULL; } enc = (VP8Encoder*)mem; - mem = (uint8_t*)DO_ALIGN(mem + sizeof(*enc)); + mem = (uint8_t*)WEBP_ALIGN(mem + sizeof(*enc)); memset(enc, 0, sizeof(*enc)); enc->num_parts_ = 1 << config->partitions; enc->mb_w_ = mb_w; @@ -201,14 +204,14 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, enc->mb_info_ = (VP8MBInfo*)mem; mem += info_size; enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_; - mem += preds_w * preds_h * sizeof(uint8_t); - enc->nz_ = 1 + (uint32_t*)DO_ALIGN(mem); + mem += preds_size; + enc->nz_ = 1 + (uint32_t*)WEBP_ALIGN(mem); mem += nz_size; - enc->lf_stats_ = lf_stats_size ? (LFStats*)DO_ALIGN(mem) : NULL; + enc->lf_stats_ = lf_stats_size ? (LFStats*)WEBP_ALIGN(mem) : NULL; mem += lf_stats_size; // top samples (all 16-aligned) - mem = (uint8_t*)DO_ALIGN(mem); + mem = (uint8_t*)WEBP_ALIGN(mem); enc->y_top_ = (uint8_t*)mem; enc->uv_top_ = enc->y_top_ + top_stride; mem += 2 * top_stride; @@ -225,8 +228,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, ResetSegmentHeader(enc); ResetFilterHeader(enc); ResetBoundaryPredictions(enc); - VP8GetResidualCostInit(); - VP8SetResidualCoeffsInit(); + VP8EncDspCostInit(); VP8EncInitAlpha(enc); // lower quality means smaller output -> we modulate a little the page @@ -326,14 +328,17 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { if (!config->lossless) { VP8Encoder* enc = NULL; + + if (!config->exact) { + WebPCleanupTransparentArea(pic); + } + if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) { // Make sure we have YUVA samples. if (config->preprocessing & 4) { -#if WEBP_ENCODER_ABI_VERSION > 0x0204 if (!WebPPictureSmartARGBToYUVA(pic)) { return 0; } -#endif } else { float dithering = 0.f; if (config->preprocessing & 2) { @@ -375,6 +380,10 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { return 0; } + if (!config->exact) { + WebPCleanupTransparentAreaLossless(pic); + } + ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem. } |