diff options
author | Liang Qi <liang.qi@qt.io> | 2017-03-06 14:33:16 +0100 |
---|---|---|
committer | Liang Qi <liang.qi@qt.io> | 2017-03-06 14:33:34 +0100 |
commit | f2dbc67c2b032a5f27d0224e020fb6dfcd3fd142 (patch) | |
tree | c5c195998f538fd7b9aa1382ced53caf2dec3926 /src/3rdparty/libwebp/src/enc | |
parent | d2306d74850986692c02b70df0d7a6a6e933d0dc (diff) | |
parent | 03e507c8f3816053257b3c351d79c494b6f9174d (diff) |
Merge remote-tracking branch 'origin/5.8' into 5.9
Change-Id: I9cf7f04769944935d7b836453c7982839857a909
Diffstat (limited to 'src/3rdparty/libwebp/src/enc')
-rw-r--r-- | src/3rdparty/libwebp/src/enc/alpha.c | 7 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/backward_references.c | 1116 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/backward_references.h | 18 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/filter.c | 89 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/histogram.c | 125 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/near_lossless.c | 56 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/picture.c | 2 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/picture_csp.c | 24 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/picture_psnr.c | 6 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/quant.c | 84 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/vp8enci.h | 23 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/vp8l.c | 204 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/vp8li.h | 20 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/webpenc.c | 4 |
14 files changed, 1213 insertions, 565 deletions
diff --git a/src/3rdparty/libwebp/src/enc/alpha.c b/src/3rdparty/libwebp/src/enc/alpha.c index 3c970b0..03e3ad0 100644 --- a/src/3rdparty/libwebp/src/enc/alpha.c +++ b/src/3rdparty/libwebp/src/enc/alpha.c @@ -79,7 +79,11 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, config.quality = 8.f * effort_level; assert(config.quality >= 0 && config.quality <= 100.f); - ok = (VP8LEncodeStream(&config, &picture, bw) == VP8_ENC_OK); + // TODO(urvang): Temporary fix to avoid generating images that trigger + // a decoder bug related to alpha with color cache. + // See: https://code.google.com/p/webp/issues/detail?id=239 + // Need to re-enable this later. + ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK); WebPPictureFree(&picture); ok = ok && !bw->error_; if (!ok) { @@ -118,7 +122,6 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, assert(method >= ALPHA_NO_COMPRESSION); assert(method <= ALPHA_LOSSLESS_COMPRESSION); assert(sizeof(header) == ALPHA_HEADER_LEN); - // TODO(skal): have a common function and #define's to validate alpha params. filter_func = WebPFilters[filter]; if (filter_func != NULL) { diff --git a/src/3rdparty/libwebp/src/enc/backward_references.c b/src/3rdparty/libwebp/src/enc/backward_references.c index c39437d..136a24a 100644 --- a/src/3rdparty/libwebp/src/enc/backward_references.c +++ b/src/3rdparty/libwebp/src/enc/backward_references.c @@ -27,11 +27,19 @@ #define MAX_ENTROPY (1e30f) // 1M window (4M bytes) minus 120 special codes for short distances. -#define WINDOW_SIZE ((1 << 20) - 120) +#define WINDOW_SIZE_BITS 20 +#define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120) // Bounds for the match length. #define MIN_LENGTH 2 -#define MAX_LENGTH 4096 +// If you change this, you need MAX_LENGTH_BITS + WINDOW_SIZE_BITS <= 32 as it +// is used in VP8LHashChain. +#define MAX_LENGTH_BITS 12 +// We want the max value to be attainable and stored in MAX_LENGTH_BITS bits. +#define MAX_LENGTH ((1 << MAX_LENGTH_BITS) - 1) +#if MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32 +#error "MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32" +#endif // ----------------------------------------------------------------------------- @@ -57,32 +65,19 @@ static int DistanceToPlaneCode(int xsize, int dist) { return dist + 120; } -// Returns the exact index where array1 and array2 are different if this -// index is strictly superior to best_len_match. Otherwise, it returns 0. +// Returns the exact index where array1 and array2 are different. For an index +// inferior or equal to best_len_match, the return value just has to be strictly +// inferior to best_len_match. The current behavior is to return 0 if this index +// is best_len_match, and the index itself otherwise. // If no two elements are the same, it returns max_limit. static WEBP_INLINE int FindMatchLength(const uint32_t* const array1, const uint32_t* const array2, - int best_len_match, - int max_limit) { - int match_len; - + int best_len_match, int max_limit) { // Before 'expensive' linear match, check if the two arrays match at the // current best length index. if (array1[best_len_match] != array2[best_len_match]) return 0; -#if defined(WEBP_USE_SSE2) - // Check if anything is different up to best_len_match excluded. - // memcmp seems to be slower on ARM so it is disabled for now. - if (memcmp(array1, array2, best_len_match * sizeof(*array1))) return 0; - match_len = best_len_match + 1; -#else - match_len = 0; -#endif - - while (match_len < max_limit && array1[match_len] == array2[match_len]) { - ++match_len; - } - return match_len; + return VP8LVectorMismatch(array1, array2, max_limit); } // ----------------------------------------------------------------------------- @@ -194,31 +189,24 @@ int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, // ----------------------------------------------------------------------------- // Hash chains -// initialize as empty -static void HashChainReset(VP8LHashChain* const p) { - assert(p != NULL); - // Set the int32_t arrays to -1. - memset(p->chain_, 0xff, p->size_ * sizeof(*p->chain_)); - memset(p->hash_to_first_index_, 0xff, - HASH_SIZE * sizeof(*p->hash_to_first_index_)); -} - int VP8LHashChainInit(VP8LHashChain* const p, int size) { assert(p->size_ == 0); - assert(p->chain_ == NULL); + assert(p->offset_length_ == NULL); assert(size > 0); - p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_)); - if (p->chain_ == NULL) return 0; + p->offset_length_ = + (uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length_)); + if (p->offset_length_ == NULL) return 0; p->size_ = size; - HashChainReset(p); + return 1; } void VP8LHashChainClear(VP8LHashChain* const p) { assert(p != NULL); - WebPSafeFree(p->chain_); + WebPSafeFree(p->offset_length_); + p->size_ = 0; - p->chain_ = NULL; + p->offset_length_ = NULL; } // ----------------------------------------------------------------------------- @@ -234,18 +222,10 @@ static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) { return key; } -// Insertion of two pixels at a time. -static void HashChainInsert(VP8LHashChain* const p, - const uint32_t* const argb, int pos) { - const uint32_t hash_code = GetPixPairHash64(argb); - p->chain_[pos] = p->hash_to_first_index_[hash_code]; - p->hash_to_first_index_[hash_code] = pos; -} - // Returns the maximum number of hash chain lookups to do for a -// given compression quality. Return value in range [6, 86]. -static int GetMaxItersForQuality(int quality, int low_effort) { - return (low_effort ? 6 : 8) + (quality * quality) / 128; +// given compression quality. Return value in range [8, 86]. +static int GetMaxItersForQuality(int quality) { + return 8 + (quality * quality) / 128; } static int GetWindowSizeForHashChain(int quality, int xsize) { @@ -261,63 +241,120 @@ static WEBP_INLINE int MaxFindCopyLength(int len) { return (len < MAX_LENGTH) ? len : MAX_LENGTH; } -static void HashChainFindOffset(const VP8LHashChain* const p, int base_position, - const uint32_t* const argb, int len, - int window_size, int* const distance_ptr) { - const uint32_t* const argb_start = argb + base_position; - const int min_pos = - (base_position > window_size) ? base_position - window_size : 0; +int VP8LHashChainFill(VP8LHashChain* const p, int quality, + const uint32_t* const argb, int xsize, int ysize) { + const int size = xsize * ysize; + const int iter_max = GetMaxItersForQuality(quality); + const int iter_min = iter_max - quality / 10; + const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize); int pos; - assert(len <= MAX_LENGTH); - for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; - pos >= min_pos; - pos = p->chain_[pos]) { - const int curr_length = - FindMatchLength(argb + pos, argb_start, len - 1, len); - if (curr_length == len) break; + uint32_t base_position; + int32_t* hash_to_first_index; + // Temporarily use the p->offset_length_ as a hash chain. + int32_t* chain = (int32_t*)p->offset_length_; + assert(p->size_ != 0); + assert(p->offset_length_ != NULL); + + hash_to_first_index = + (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index)); + if (hash_to_first_index == NULL) return 0; + + // Set the int32_t array to -1. + memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index)); + // Fill the chain linking pixels with the same hash. + for (pos = 0; pos < size - 1; ++pos) { + const uint32_t hash_code = GetPixPairHash64(argb + pos); + chain[pos] = hash_to_first_index[hash_code]; + hash_to_first_index[hash_code] = pos; } - *distance_ptr = base_position - pos; -} - -static int HashChainFindCopy(const VP8LHashChain* const p, - int base_position, - const uint32_t* const argb, int max_len, - int window_size, int iter_max, - int* const distance_ptr, - int* const length_ptr) { - const uint32_t* const argb_start = argb + base_position; - int iter = iter_max; - int best_length = 0; - int best_distance = 0; - const int min_pos = - (base_position > window_size) ? base_position - window_size : 0; - int pos; - int length_max = 256; - if (max_len < length_max) { - length_max = max_len; - } - for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; - pos >= min_pos; - pos = p->chain_[pos]) { - int curr_length; - int distance; - if (--iter < 0) { - break; + WebPSafeFree(hash_to_first_index); + + // Find the best match interval at each pixel, defined by an offset to the + // pixel and a length. The right-most pixel cannot match anything to the right + // (hence a best length of 0) and the left-most pixel nothing to the left + // (hence an offset of 0). + p->offset_length_[0] = p->offset_length_[size - 1] = 0; + for (base_position = size - 2 < 0 ? 0 : size - 2; base_position > 0;) { + const int max_len = MaxFindCopyLength(size - 1 - base_position); + const uint32_t* const argb_start = argb + base_position; + int iter = iter_max; + int best_length = 0; + uint32_t best_distance = 0; + const int min_pos = + (base_position > window_size) ? base_position - window_size : 0; + const int length_max = (max_len < 256) ? max_len : 256; + uint32_t max_base_position; + + for (pos = chain[base_position]; pos >= min_pos; pos = chain[pos]) { + int curr_length; + if (--iter < 0) { + break; + } + assert(base_position > (uint32_t)pos); + + curr_length = + FindMatchLength(argb + pos, argb_start, best_length, max_len); + if (best_length < curr_length) { + best_length = curr_length; + best_distance = base_position - pos; + // Stop if we have reached the maximum length. Otherwise, make sure + // we have executed a minimum number of iterations depending on the + // quality. + if ((best_length == MAX_LENGTH) || + (curr_length >= length_max && iter < iter_min)) { + break; + } + } } - - curr_length = FindMatchLength(argb + pos, argb_start, best_length, max_len); - if (best_length < curr_length) { - distance = base_position - pos; - best_length = curr_length; - best_distance = distance; - if (curr_length >= length_max) { + // We have the best match but in case the two intervals continue matching + // to the left, we have the best matches for the left-extended pixels. + max_base_position = base_position; + while (1) { + assert(best_length <= MAX_LENGTH); + assert(best_distance <= WINDOW_SIZE); + p->offset_length_[base_position] = + (best_distance << MAX_LENGTH_BITS) | (uint32_t)best_length; + --base_position; + // Stop if we don't have a match or if we are out of bounds. + if (best_distance == 0 || base_position == 0) break; + // Stop if we cannot extend the matching intervals to the left. + if (base_position < best_distance || + argb[base_position - best_distance] != argb[base_position]) { break; } + // Stop if we are matching at its limit because there could be a closer + // matching interval with the same maximum length. Then again, if the + // matching interval is as close as possible (best_distance == 1), we will + // never find anything better so let's continue. + if (best_length == MAX_LENGTH && best_distance != 1 && + base_position + MAX_LENGTH < max_base_position) { + break; + } + if (best_length < MAX_LENGTH) { + ++best_length; + max_base_position = base_position; + } } } - *distance_ptr = best_distance; - *length_ptr = best_length; - return (best_length >= MIN_LENGTH); + return 1; +} + +static WEBP_INLINE int HashChainFindOffset(const VP8LHashChain* const p, + const int base_position) { + return p->offset_length_[base_position] >> MAX_LENGTH_BITS; +} + +static WEBP_INLINE int HashChainFindLength(const VP8LHashChain* const p, + const int base_position) { + return p->offset_length_[base_position] & ((1U << MAX_LENGTH_BITS) - 1); +} + +static WEBP_INLINE void HashChainFindCopy(const VP8LHashChain* const p, + int base_position, + int* const offset_ptr, + int* const length_ptr) { + *offset_ptr = HashChainFindOffset(p, base_position); + *length_ptr = HashChainFindLength(p, base_position); } static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache, @@ -384,84 +421,62 @@ static int BackwardReferencesRle(int xsize, int ysize, static int BackwardReferencesLz77(int xsize, int ysize, const uint32_t* const argb, int cache_bits, - int quality, int low_effort, - VP8LHashChain* const hash_chain, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { int i; + int i_last_check = -1; int ok = 0; int cc_init = 0; const int use_color_cache = (cache_bits > 0); const int pix_count = xsize * ysize; VP8LColorCache hashers; - int iter_max = GetMaxItersForQuality(quality, low_effort); - const int window_size = GetWindowSizeForHashChain(quality, xsize); - int min_matches = 32; if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } ClearBackwardRefs(refs); - HashChainReset(hash_chain); - for (i = 0; i < pix_count - 2; ) { + for (i = 0; i < pix_count;) { // Alternative#1: Code the pixels starting at 'i' using backward reference. int offset = 0; int len = 0; - const int max_len = MaxFindCopyLength(pix_count - i); - HashChainFindCopy(hash_chain, i, argb, max_len, window_size, - iter_max, &offset, &len); - if (len > MIN_LENGTH || (len == MIN_LENGTH && offset <= 512)) { - int offset2 = 0; - int len2 = 0; - int k; - min_matches = 8; - HashChainInsert(hash_chain, &argb[i], i); - if ((len < (max_len >> 2)) && !low_effort) { - // Evaluate Alternative#2: Insert the pixel at 'i' as literal, and code - // the pixels starting at 'i + 1' using backward reference. - HashChainFindCopy(hash_chain, i + 1, argb, max_len - 1, - window_size, iter_max, &offset2, - &len2); - if (len2 > len + 1) { - AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); - i++; // Backward reference to be done for next pixel. - len = len2; - offset = offset2; + int j; + HashChainFindCopy(hash_chain, i, &offset, &len); + if (len > MIN_LENGTH + 1) { + const int len_ini = len; + int max_reach = 0; + assert(i + len < pix_count); + // Only start from what we have not checked already. + i_last_check = (i > i_last_check) ? i : i_last_check; + // We know the best match for the current pixel but we try to find the + // best matches for the current pixel AND the next one combined. + // The naive method would use the intervals: + // [i,i+len) + [i+len, length of best match at i+len) + // while we check if we can use: + // [i,j) (where j<=i+len) + [j, length of best match at j) + for (j = i_last_check + 1; j <= i + len_ini; ++j) { + const int len_j = HashChainFindLength(hash_chain, j); + const int reach = + j + (len_j > MIN_LENGTH + 1 ? len_j : 1); // 1 for single literal. + if (reach > max_reach) { + len = j - i; + max_reach = reach; } } - BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); - if (use_color_cache) { - for (k = 0; k < len; ++k) { - VP8LColorCacheInsert(&hashers, argb[i + k]); - } - } - // Add to the hash_chain (but cannot add the last pixel). - if (offset >= 3 && offset != xsize) { - const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i; - for (k = 2; k < last - 8; k += 2) { - HashChainInsert(hash_chain, &argb[i + k], i + k); - } - for (; k < last; ++k) { - HashChainInsert(hash_chain, &argb[i + k], i + k); - } - } - i += len; } else { + len = 1; + } + // Go with literal or backward reference. + assert(len > 0); + if (len == 1) { AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); - HashChainInsert(hash_chain, &argb[i], i); - ++i; - --min_matches; - if (min_matches <= 0) { - AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); - HashChainInsert(hash_chain, &argb[i], i); - ++i; + } else { + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); + if (use_color_cache) { + for (j = i; j < i + len; ++j) VP8LColorCacheInsert(&hashers, argb[j]); } } - } - while (i < pix_count) { - // Handle the last pixel(s). - AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); - ++i; + i += len; } ok = !refs->error_; @@ -482,7 +497,7 @@ typedef struct { static int BackwardReferencesTraceBackwards( int xsize, int ysize, const uint32_t* const argb, int quality, - int cache_bits, VP8LHashChain* const hash_chain, + int cache_bits, const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs); static void ConvertPopulationCountTableToBitEstimates( @@ -558,16 +573,14 @@ static WEBP_INLINE double GetDistanceCost(const CostModel* const m, return m->distance_[code] + extra_bits; } -static void AddSingleLiteralWithCostModel( - const uint32_t* const argb, VP8LHashChain* const hash_chain, - VP8LColorCache* const hashers, const CostModel* const cost_model, int idx, - int is_last, int use_color_cache, double prev_cost, float* const cost, - uint16_t* const dist_array) { +static void AddSingleLiteralWithCostModel(const uint32_t* const argb, + VP8LColorCache* const hashers, + const CostModel* const cost_model, + int idx, int use_color_cache, + double prev_cost, float* const cost, + uint16_t* const dist_array) { double cost_val = prev_cost; const uint32_t color = argb[0]; - if (!is_last) { - HashChainInsert(hash_chain, argb, idx); - } if (use_color_cache && VP8LColorCacheContains(hashers, color)) { const double mul0 = 0.68; const int ix = VP8LColorCacheGetIndex(hashers, color); @@ -583,30 +596,598 @@ static void AddSingleLiteralWithCostModel( } } +// ----------------------------------------------------------------------------- +// CostManager and interval handling + +// Empirical value to avoid high memory consumption but good for performance. +#define COST_CACHE_INTERVAL_SIZE_MAX 100 + +// To perform backward reference every pixel at index index_ is considered and +// the cost for the MAX_LENGTH following pixels computed. Those following pixels +// at index index_ + k (k from 0 to MAX_LENGTH) have a cost of: +// distance_cost_ at index_ + GetLengthCost(cost_model, k) +// (named cost) (named cached cost) +// and the minimum value is kept. GetLengthCost(cost_model, k) is cached in an +// array of size MAX_LENGTH. +// Instead of performing MAX_LENGTH comparisons per pixel, we keep track of the +// minimal values using intervals, for which lower_ and upper_ bounds are kept. +// An interval is defined by the index_ of the pixel that generated it and +// is only useful in a range of indices from start_ to end_ (exclusive), i.e. +// it contains the minimum value for pixels between start_ and end_. +// Intervals are stored in a linked list and ordered by start_. When a new +// interval has a better minimum, old intervals are split or removed. +typedef struct CostInterval CostInterval; +struct CostInterval { + double lower_; + double upper_; + int start_; + int end_; + double distance_cost_; + int index_; + CostInterval* previous_; + CostInterval* next_; +}; + +// The GetLengthCost(cost_model, k) part of the costs is also bounded for +// efficiency in a set of intervals of a different type. +// If those intervals are small enough, they are not used for comparison and +// written into the costs right away. +typedef struct { + double lower_; // Lower bound of the interval. + double upper_; // Upper bound of the interval. + int start_; + int end_; // Exclusive. + int do_write_; // If !=0, the interval is saved to cost instead of being kept + // for comparison. +} CostCacheInterval; + +// This structure is in charge of managing intervals and costs. +// It caches the different CostCacheInterval, caches the different +// GetLengthCost(cost_model, k) in cost_cache_ and the CostInterval's (whose +// count_ is limited by COST_CACHE_INTERVAL_SIZE_MAX). +#define COST_MANAGER_MAX_FREE_LIST 10 +typedef struct { + CostInterval* head_; + int count_; // The number of stored intervals. + CostCacheInterval* cache_intervals_; + size_t cache_intervals_size_; + double cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). + double min_cost_cache_; // The minimum value in cost_cache_[1:]. + double max_cost_cache_; // The maximum value in cost_cache_[1:]. + float* costs_; + uint16_t* dist_array_; + // Most of the time, we only need few intervals -> use a free-list, to avoid + // fragmentation with small allocs in most common cases. + CostInterval intervals_[COST_MANAGER_MAX_FREE_LIST]; + CostInterval* free_intervals_; + // These are regularly malloc'd remains. This list can't grow larger than than + // size COST_CACHE_INTERVAL_SIZE_MAX - COST_MANAGER_MAX_FREE_LIST, note. + CostInterval* recycled_intervals_; + // Buffer used in BackwardReferencesHashChainDistanceOnly to store the ends + // of the intervals that can have impacted the cost at a pixel. + int* interval_ends_; + int interval_ends_size_; +} CostManager; + +static int IsCostCacheIntervalWritable(int start, int end) { + // 100 is the length for which we consider an interval for comparison, and not + // for writing. + // The first intervals are very small and go in increasing size. This constant + // helps merging them into one big interval (up to index 150/200 usually from + // which intervals start getting much bigger). + // This value is empirical. + return (end - start + 1 < 100); +} + +static void CostIntervalAddToFreeList(CostManager* const manager, + CostInterval* const interval) { + interval->next_ = manager->free_intervals_; + manager->free_intervals_ = interval; +} + +static int CostIntervalIsInFreeList(const CostManager* const manager, + const CostInterval* const interval) { + return (interval >= &manager->intervals_[0] && + interval <= &manager->intervals_[COST_MANAGER_MAX_FREE_LIST - 1]); +} + +static void CostManagerInitFreeList(CostManager* const manager) { + int i; + manager->free_intervals_ = NULL; + for (i = 0; i < COST_MANAGER_MAX_FREE_LIST; ++i) { + CostIntervalAddToFreeList(manager, &manager->intervals_[i]); + } +} + +static void DeleteIntervalList(CostManager* const manager, + const CostInterval* interval) { + while (interval != NULL) { + const CostInterval* const next = interval->next_; + if (!CostIntervalIsInFreeList(manager, interval)) { + WebPSafeFree((void*)interval); + } // else: do nothing + interval = next; + } +} + +static void CostManagerClear(CostManager* const manager) { + if (manager == NULL) return; + + WebPSafeFree(manager->costs_); + WebPSafeFree(manager->cache_intervals_); + WebPSafeFree(manager->interval_ends_); + + // Clear the interval lists. + DeleteIntervalList(manager, manager->head_); + manager->head_ = NULL; + DeleteIntervalList(manager, manager->recycled_intervals_); + manager->recycled_intervals_ = NULL; + + // Reset pointers, count_ and cache_intervals_size_. + memset(manager, 0, sizeof(*manager)); + CostManagerInitFreeList(manager); +} + +static int CostManagerInit(CostManager* const manager, + uint16_t* const dist_array, int pix_count, + const CostModel* const cost_model) { + int i; + const int cost_cache_size = (pix_count > MAX_LENGTH) ? MAX_LENGTH : pix_count; + // This constant is tied to the cost_model we use. + // Empirically, differences between intervals is usually of more than 1. + const double min_cost_diff = 0.1; + + manager->costs_ = NULL; + manager->cache_intervals_ = NULL; + manager->interval_ends_ = NULL; + manager->head_ = NULL; + manager->recycled_intervals_ = NULL; + manager->count_ = 0; + manager->dist_array_ = dist_array; + CostManagerInitFreeList(manager); + + // Fill in the cost_cache_. + manager->cache_intervals_size_ = 1; + manager->cost_cache_[0] = 0; + for (i = 1; i < cost_cache_size; ++i) { + manager->cost_cache_[i] = GetLengthCost(cost_model, i); + // Get an approximation of the number of bound intervals. + if (fabs(manager->cost_cache_[i] - manager->cost_cache_[i - 1]) > + min_cost_diff) { + ++manager->cache_intervals_size_; + } + // Compute the minimum of cost_cache_. + if (i == 1) { + manager->min_cost_cache_ = manager->cost_cache_[1]; + manager->max_cost_cache_ = manager->cost_cache_[1]; + } else if (manager->cost_cache_[i] < manager->min_cost_cache_) { + manager->min_cost_cache_ = manager->cost_cache_[i]; + } else if (manager->cost_cache_[i] > manager->max_cost_cache_) { + manager->max_cost_cache_ = manager->cost_cache_[i]; + } + } + + // With the current cost models, we have 15 intervals, so we are safe by + // setting a maximum of COST_CACHE_INTERVAL_SIZE_MAX. + if (manager->cache_intervals_size_ > COST_CACHE_INTERVAL_SIZE_MAX) { + manager->cache_intervals_size_ = COST_CACHE_INTERVAL_SIZE_MAX; + } + manager->cache_intervals_ = (CostCacheInterval*)WebPSafeMalloc( + manager->cache_intervals_size_, sizeof(*manager->cache_intervals_)); + if (manager->cache_intervals_ == NULL) { + CostManagerClear(manager); + return 0; + } + + // Fill in the cache_intervals_. + { + double cost_prev = -1e38f; // unprobably low initial value + CostCacheInterval* prev = NULL; + CostCacheInterval* cur = manager->cache_intervals_; + const CostCacheInterval* const end = + manager->cache_intervals_ + manager->cache_intervals_size_; + + // Consecutive values in cost_cache_ are compared and if a big enough + // difference is found, a new interval is created and bounded. + for (i = 0; i < cost_cache_size; ++i) { + const double cost_val = manager->cost_cache_[i]; + if (i == 0 || + (fabs(cost_val - cost_prev) > min_cost_diff && cur + 1 < end)) { + if (i > 1) { + const int is_writable = + IsCostCacheIntervalWritable(cur->start_, cur->end_); + // Merge with the previous interval if both are writable. + if (is_writable && cur != manager->cache_intervals_ && + prev->do_write_) { + // Update the previous interval. + prev->end_ = cur->end_; + if (cur->lower_ < prev->lower_) { + prev->lower_ = cur->lower_; + } else if (cur->upper_ > prev->upper_) { + prev->upper_ = cur->upper_; + } + } else { + cur->do_write_ = is_writable; + prev = cur; + ++cur; + } + } + // Initialize an interval. + cur->start_ = i; + cur->do_write_ = 0; + cur->lower_ = cost_val; + cur->upper_ = cost_val; + } else { + // Update the current interval bounds. + if (cost_val < cur->lower_) { + cur->lower_ = cost_val; + } else if (cost_val > cur->upper_) { + cur->upper_ = cost_val; + } + } + cur->end_ = i + 1; + cost_prev = cost_val; + } + manager->cache_intervals_size_ = cur + 1 - manager->cache_intervals_; + } + + manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_)); + if (manager->costs_ == NULL) { + CostManagerClear(manager); + return 0; + } + // Set the initial costs_ high for every pixel as we will keep the minimum. + for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f; + + // The cost at pixel is influenced by the cost intervals from previous pixels. + // Let us take the specific case where the offset is the same (which actually + // happens a lot in case of uniform regions). + // pixel i contributes to j>i a cost of: offset cost + cost_cache_[j-i] + // pixel i+1 contributes to j>i a cost of: 2*offset cost + cost_cache_[j-i-1] + // pixel i+2 contributes to j>i a cost of: 3*offset cost + cost_cache_[j-i-2] + // and so on. + // A pixel i influences the following length(j) < MAX_LENGTH pixels. What is + // the value of j such that pixel i + j cannot influence any of those pixels? + // This value is such that: + // max of cost_cache_ < j*offset cost + min of cost_cache_ + // (pixel i + j 's cost cannot beat the worst cost given by pixel i). + // This value will be used to optimize the cost computation in + // BackwardReferencesHashChainDistanceOnly. + { + // The offset cost is computed in GetDistanceCost and has a minimum value of + // the minimum in cost_model->distance_. The case where the offset cost is 0 + // will be dealt with differently later so we are only interested in the + // minimum non-zero offset cost. + double offset_cost_min = 0.; + int size; + for (i = 0; i < NUM_DISTANCE_CODES; ++i) { + if (cost_model->distance_[i] != 0) { + if (offset_cost_min == 0.) { + offset_cost_min = cost_model->distance_[i]; + } else if (cost_model->distance_[i] < offset_cost_min) { + offset_cost_min = cost_model->distance_[i]; + } + } + } + // In case all the cost_model->distance_ is 0, the next non-zero cost we + // can have is from the extra bit in GetDistanceCost, hence 1. + if (offset_cost_min < 1.) offset_cost_min = 1.; + + size = 1 + (int)ceil((manager->max_cost_cache_ - manager->min_cost_cache_) / + offset_cost_min); + // Empirically, we usually end up with a value below 100. + if (size > MAX_LENGTH) size = MAX_LENGTH; + + manager->interval_ends_ = + (int*)WebPSafeMalloc(size, sizeof(*manager->interval_ends_)); + if (manager->interval_ends_ == NULL) { + CostManagerClear(manager); + return 0; + } + manager->interval_ends_size_ = size; + } + + return 1; +} + +// Given the distance_cost for pixel 'index', update the cost at pixel 'i' if it +// is smaller than the previously computed value. +static WEBP_INLINE void UpdateCost(CostManager* const manager, int i, int index, + double distance_cost) { + int k = i - index; + double cost_tmp; + assert(k >= 0 && k < MAX_LENGTH); + cost_tmp = distance_cost + manager->cost_cache_[k]; + + if (manager->costs_[i] > cost_tmp) { + manager->costs_[i] = (float)cost_tmp; + manager->dist_array_[i] = k + 1; + } +} + +// Given the distance_cost for pixel 'index', update the cost for all the pixels +// between 'start' and 'end' excluded. +static WEBP_INLINE void UpdateCostPerInterval(CostManager* const manager, + int start, int end, int index, + double distance_cost) { + int i; + for (i = start; i < end; ++i) UpdateCost(manager, i, index, distance_cost); +} + +// Given two intervals, make 'prev' be the previous one of 'next' in 'manager'. +static WEBP_INLINE void ConnectIntervals(CostManager* const manager, + CostInterval* const prev, + CostInterval* const next) { + if (prev != NULL) { + prev->next_ = next; + } else { + manager->head_ = next; + } + + if (next != NULL) next->previous_ = prev; +} + +// Pop an interval in the manager. +static WEBP_INLINE void PopInterval(CostManager* const manager, + CostInterval* const interval) { + CostInterval* const next = interval->next_; + + if (interval == NULL) return; + + ConnectIntervals(manager, interval->previous_, next); + if (CostIntervalIsInFreeList(manager, interval)) { + CostIntervalAddToFreeList(manager, interval); + } else { // recycle regularly malloc'd intervals too + interval->next_ = manager->recycled_intervals_; + manager->recycled_intervals_ = interval; + } + --manager->count_; + assert(manager->count_ >= 0); +} + +// Update the cost at index i by going over all the stored intervals that +// overlap with i. +static WEBP_INLINE void UpdateCostPerIndex(CostManager* const manager, int i) { + CostInterval* current = manager->head_; + + while (current != NULL && current->start_ <= i) { + if (current->end_ <= i) { + // We have an outdated interval, remove it. + CostInterval* next = current->next_; + PopInterval(manager, current); + current = next; + } else { + UpdateCost(manager, i, current->index_, current->distance_cost_); + current = current->next_; + } + } +} + +// Given a current orphan interval and its previous interval, before +// it was orphaned (which can be NULL), set it at the right place in the list +// of intervals using the start_ ordering and the previous interval as a hint. +static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager, + CostInterval* const current, + CostInterval* previous) { + assert(current != NULL); + + if (previous == NULL) previous = manager->head_; + while (previous != NULL && current->start_ < previous->start_) { + previous = previous->previous_; + } + while (previous != NULL && previous->next_ != NULL && + previous->next_->start_ < current->start_) { + previous = previous->next_; + } + + if (previous != NULL) { + ConnectIntervals(manager, current, previous->next_); + } else { + ConnectIntervals(manager, current, manager->head_); + } + ConnectIntervals(manager, previous, current); +} + +// Insert an interval in the list contained in the manager by starting at +// interval_in as a hint. The intervals are sorted by start_ value. +static WEBP_INLINE void InsertInterval(CostManager* const manager, + CostInterval* const interval_in, + double distance_cost, double lower, + double upper, int index, int start, + int end) { + CostInterval* interval_new; + + if (IsCostCacheIntervalWritable(start, end) || + manager->count_ >= COST_CACHE_INTERVAL_SIZE_MAX) { + // Write down the interval if it is too small. + UpdateCostPerInterval(manager, start, end, index, distance_cost); + return; + } + if (manager->free_intervals_ != NULL) { + interval_new = manager->free_intervals_; + manager->free_intervals_ = interval_new->next_; + } else if (manager->recycled_intervals_ != NULL) { + interval_new = manager->recycled_intervals_; + manager->recycled_intervals_ = interval_new->next_; + } else { // malloc for good + interval_new = (CostInterval*)WebPSafeMalloc(1, sizeof(*interval_new)); + if (interval_new == NULL) { + // Write down the interval if we cannot create it. + UpdateCostPerInterval(manager, start, end, index, distance_cost); + return; + } + } + + interval_new->distance_cost_ = distance_cost; + interval_new->lower_ = lower; + interval_new->upper_ = upper; + interval_new->index_ = index; + interval_new->start_ = start; + interval_new->end_ = end; + PositionOrphanInterval(manager, interval_new, interval_in); + + ++manager->count_; +} + +// When an interval has its start_ or end_ modified, it needs to be +// repositioned in the linked list. +static WEBP_INLINE void RepositionInterval(CostManager* const manager, + CostInterval* const interval) { + if (IsCostCacheIntervalWritable(interval->start_, interval->end_)) { + // Maybe interval has been resized and is small enough to be removed. + UpdateCostPerInterval(manager, interval->start_, interval->end_, + interval->index_, interval->distance_cost_); + PopInterval(manager, interval); + return; + } + + // Early exit if interval is at the right spot. + if ((interval->previous_ == NULL || + interval->previous_->start_ <= interval->start_) && + (interval->next_ == NULL || + interval->start_ <= interval->next_->start_)) { + return; + } + + ConnectIntervals(manager, interval->previous_, interval->next_); + PositionOrphanInterval(manager, interval, interval->previous_); +} + +// Given a new cost interval defined by its start at index, its last value and +// distance_cost, add its contributions to the previous intervals and costs. +// If handling the interval or one of its subintervals becomes to heavy, its +// contribution is added to the costs right away. +static WEBP_INLINE void PushInterval(CostManager* const manager, + double distance_cost, int index, + int last) { + size_t i; + CostInterval* interval = manager->head_; + CostInterval* interval_next; + const CostCacheInterval* const cost_cache_intervals = + manager->cache_intervals_; + + for (i = 0; i < manager->cache_intervals_size_ && + cost_cache_intervals[i].start_ < last; + ++i) { + // Define the intersection of the ith interval with the new one. + int start = index + cost_cache_intervals[i].start_; + const int end = index + (cost_cache_intervals[i].end_ > last + ? last + : cost_cache_intervals[i].end_); + const double lower_in = cost_cache_intervals[i].lower_; + const double upper_in = cost_cache_intervals[i].upper_; + const double lower_full_in = distance_cost + lower_in; + const double upper_full_in = distance_cost + upper_in; + + if (cost_cache_intervals[i].do_write_) { + UpdateCostPerInterval(manager, start, end, index, distance_cost); + continue; + } + + for (; interval != NULL && interval->start_ < end && start < end; + interval = interval_next) { + const double lower_full_interval = + interval->distance_cost_ + interval->lower_; + const double upper_full_interval = + interval->distance_cost_ + interval->upper_; + + interval_next = interval->next_; + + // Make sure we have some overlap + if (start >= interval->end_) continue; + + if (lower_full_in >= upper_full_interval) { + // When intervals are represented, the lower, the better. + // [**********************************************************] + // start end + // [----------------------------------] + // interval->start_ interval->end_ + // If we are worse than what we already have, add whatever we have so + // far up to interval. + const int start_new = interval->end_; + InsertInterval(manager, interval, distance_cost, lower_in, upper_in, + index, start, interval->start_); + start = start_new; + continue; + } + + // We know the two intervals intersect. + if (upper_full_in >= lower_full_interval) { + // There is no clear cut on which is best, so let's keep both. + // [*********[*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*]***********] + // start interval->start_ interval->end_ end + // OR + // [*********[*-*-*-*-*-*-*-*-*-*-*-]----------------------] + // start interval->start_ end interval->end_ + const int end_new = (interval->end_ <= end) ? interval->end_ : end; + InsertInterval(manager, interval, distance_cost, lower_in, upper_in, + index, start, end_new); + start = end_new; + } else if (start <= interval->start_ && interval->end_ <= end) { + // [----------------------------------] + // interval->start_ interval->end_ + // [**************************************************************] + // start end + // We can safely remove the old interval as it is fully included. + PopInterval(manager, interval); + } else { + if (interval->start_ <= start && end <= interval->end_) { + // [--------------------------------------------------------------] + // interval->start_ interval->end_ + // [*****************************] + // start end + // We have to split the old interval as it fully contains the new one. + const int end_original = interval->end_; + interval->end_ = start; + InsertInterval(manager, interval, interval->distance_cost_, + interval->lower_, interval->upper_, interval->index_, + end, end_original); + } else if (interval->start_ < start) { + // [------------------------------------] + // interval->start_ interval->end_ + // [*****************************] + // start end + interval->end_ = start; + } else { + // [------------------------------------] + // interval->start_ interval->end_ + // [*****************************] + // start end + interval->start_ = end; + } + + // The interval has been modified, we need to reposition it or write it. + RepositionInterval(manager, interval); + } + } + // Insert the remaining interval from start to end. + InsertInterval(manager, interval, distance_cost, lower_in, upper_in, index, + start, end); + } +} + static int BackwardReferencesHashChainDistanceOnly( - int xsize, int ysize, const uint32_t* const argb, - int quality, int cache_bits, VP8LHashChain* const hash_chain, + int xsize, int ysize, const uint32_t* const argb, int quality, + int cache_bits, const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, uint16_t* const dist_array) { int i; int ok = 0; int cc_init = 0; const int pix_count = xsize * ysize; const int use_color_cache = (cache_bits > 0); - float* const cost = - (float*)WebPSafeMalloc(pix_count, sizeof(*cost)); const size_t literal_array_size = sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES + ((cache_bits > 0) ? (1 << cache_bits) : 0)); const size_t cost_model_size = sizeof(CostModel) + literal_array_size; CostModel* const cost_model = - (CostModel*)WebPSafeMalloc(1ULL, cost_model_size); + (CostModel*)WebPSafeCalloc(1ULL, cost_model_size); VP8LColorCache hashers; const int skip_length = 32 + quality; const int skip_min_distance_code = 2; - int iter_max = GetMaxItersForQuality(quality, 0); - const int window_size = GetWindowSizeForHashChain(quality, xsize); + CostManager* cost_manager = + (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager)); - if (cost == NULL || cost_model == NULL) goto Error; + if (cost_model == NULL || cost_manager == NULL) goto Error; cost_model->literal_ = (double*)(cost_model + 1); if (use_color_cache) { @@ -618,34 +1199,91 @@ static int BackwardReferencesHashChainDistanceOnly( goto Error; } - for (i = 0; i < pix_count; ++i) cost[i] = 1e38f; + if (!CostManagerInit(cost_manager, dist_array, pix_count, cost_model)) { + goto Error; + } // We loop one pixel at a time, but store all currently best points to // non-processed locations from this point. dist_array[0] = 0; - HashChainReset(hash_chain); // Add first pixel as literal. - AddSingleLiteralWithCostModel(argb + 0, hash_chain, &hashers, cost_model, 0, - 0, use_color_cache, 0.0, cost, dist_array); + AddSingleLiteralWithCostModel(argb + 0, &hashers, cost_model, 0, + use_color_cache, 0.0, cost_manager->costs_, + dist_array); + for (i = 1; i < pix_count - 1; ++i) { - int offset = 0; - int len = 0; - double prev_cost = cost[i - 1]; - const int max_len = MaxFindCopyLength(pix_count - i); - HashChainFindCopy(hash_chain, i, argb, max_len, window_size, - iter_max, &offset, &len); + int offset = 0, len = 0; + double prev_cost = cost_manager->costs_[i - 1]; + HashChainFindCopy(hash_chain, i, &offset, &len); if (len >= MIN_LENGTH) { const int code = DistanceToPlaneCode(xsize, offset); - const double distance_cost = - prev_cost + GetDistanceCost(cost_model, code); - int k; - for (k = 1; k < len; ++k) { - const double cost_val = distance_cost + GetLengthCost(cost_model, k); - if (cost[i + k] > cost_val) { - cost[i + k] = (float)cost_val; - dist_array[i + k] = k + 1; + const double offset_cost = GetDistanceCost(cost_model, code); + const int first_i = i; + int j_max = 0, interval_ends_index = 0; + const int is_offset_zero = (offset_cost == 0.); + + if (!is_offset_zero) { + j_max = (int)ceil( + (cost_manager->max_cost_cache_ - cost_manager->min_cost_cache_) / + offset_cost); + if (j_max < 1) { + j_max = 1; + } else if (j_max > cost_manager->interval_ends_size_ - 1) { + // This could only happen in the case of MAX_LENGTH. + j_max = cost_manager->interval_ends_size_ - 1; + } + } // else j_max is unused anyway. + + // Instead of considering all contributions from a pixel i by calling: + // PushInterval(cost_manager, prev_cost + offset_cost, i, len); + // we optimize these contributions in case offset_cost stays the same for + // consecutive pixels. This describes a set of pixels similar to a + // previous set (e.g. constant color regions). + for (; i < pix_count - 1; ++i) { + int offset_next, len_next; + prev_cost = cost_manager->costs_[i - 1]; + + if (is_offset_zero) { + // No optimization can be made so we just push all of the + // contributions from i. + PushInterval(cost_manager, prev_cost, i, len); + } else { + // j_max is chosen as the smallest j such that: + // max of cost_cache_ < j*offset cost + min of cost_cache_ + // Therefore, the pixel influenced by i-j_max, cannot be influenced + // by i. Only the costs after the end of what i contributed need to be + // updated. cost_manager->interval_ends_ is a circular buffer that + // stores those ends. + const double distance_cost = prev_cost + offset_cost; + int j = cost_manager->interval_ends_[interval_ends_index]; + if (i - first_i <= j_max || + !IsCostCacheIntervalWritable(j, i + len)) { + PushInterval(cost_manager, distance_cost, i, len); + } else { + for (; j < i + len; ++j) { + UpdateCost(cost_manager, j, i, distance_cost); + } + } + // Store the new end in the circular buffer. + assert(interval_ends_index < cost_manager->interval_ends_size_); + cost_manager->interval_ends_[interval_ends_index] = i + len; + if (++interval_ends_index > j_max) interval_ends_index = 0; } + + // Check whether i is the last pixel to consider, as it is handled + // differently. + if (i + 1 >= pix_count - 1) break; + HashChainFindCopy(hash_chain, i + 1, &offset_next, &len_next); + if (offset_next != offset) break; + len = len_next; + UpdateCostPerIndex(cost_manager, i); + AddSingleLiteralWithCostModel(argb + i, &hashers, cost_model, i, + use_color_cache, prev_cost, + cost_manager->costs_, dist_array); } + // Submit the last pixel. + UpdateCostPerIndex(cost_manager, i + 1); + // This if is for speedup only. It roughly doubles the speed, and // makes compression worse by .1 %. if (len >= skip_length && code <= skip_min_distance_code) { @@ -653,53 +1291,55 @@ static int BackwardReferencesHashChainDistanceOnly( // lookups for better copies. // 1) insert the hashes. if (use_color_cache) { + int k; for (k = 0; k < len; ++k) { VP8LColorCacheInsert(&hashers, argb[i + k]); } } - // 2) Add to the hash_chain (but cannot add the last pixel) + // 2) jump. { - const int last = (len + i < pix_count - 1) ? len + i - : pix_count - 1; - for (k = i; k < last; ++k) { - HashChainInsert(hash_chain, &argb[k], k); - } + const int i_next = i + len - 1; // for loop does ++i, thus -1 here. + for (; i <= i_next; ++i) UpdateCostPerIndex(cost_manager, i + 1); + i = i_next; } - // 3) jump. - i += len - 1; // for loop does ++i, thus -1 here. goto next_symbol; } - if (len != MIN_LENGTH) { + if (len > MIN_LENGTH) { int code_min_length; double cost_total; - HashChainFindOffset(hash_chain, i, argb, MIN_LENGTH, window_size, - &offset); + offset = HashChainFindOffset(hash_chain, i); code_min_length = DistanceToPlaneCode(xsize, offset); cost_total = prev_cost + GetDistanceCost(cost_model, code_min_length) + GetLengthCost(cost_model, 1); - if (cost[i + 1] > cost_total) { - cost[i + 1] = (float)cost_total; + if (cost_manager->costs_[i + 1] > cost_total) { + cost_manager->costs_[i + 1] = (float)cost_total; dist_array[i + 1] = 2; } } + } else { // len < MIN_LENGTH + UpdateCostPerIndex(cost_manager, i + 1); } - AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i, - 0, use_color_cache, prev_cost, cost, - dist_array); + + AddSingleLiteralWithCostModel(argb + i, &hashers, cost_model, i, + use_color_cache, prev_cost, + cost_manager->costs_, dist_array); + next_symbol: ; } // Handle the last pixel. if (i == (pix_count - 1)) { - AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i, - 1, use_color_cache, cost[pix_count - 2], cost, - dist_array); + AddSingleLiteralWithCostModel( + argb + i, &hashers, cost_model, i, use_color_cache, + cost_manager->costs_[pix_count - 2], cost_manager->costs_, dist_array); } + ok = !refs->error_; Error: if (cc_init) VP8LColorCacheClear(&hashers); + CostManagerClear(cost_manager); WebPSafeFree(cost_model); - WebPSafeFree(cost); + WebPSafeFree(cost_manager); return ok; } @@ -723,18 +1363,14 @@ static void TraceBackwards(uint16_t* const dist_array, } static int BackwardReferencesHashChainFollowChosenPath( - int xsize, int ysize, const uint32_t* const argb, - int quality, int cache_bits, + const uint32_t* const argb, int cache_bits, const uint16_t* const chosen_path, int chosen_path_size, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs) { - const int pix_count = xsize * ysize; + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { const int use_color_cache = (cache_bits > 0); int ix; int i = 0; int ok = 0; int cc_init = 0; - const int window_size = GetWindowSizeForHashChain(quality, xsize); VP8LColorCache hashers; if (use_color_cache) { @@ -743,25 +1379,17 @@ static int BackwardReferencesHashChainFollowChosenPath( } ClearBackwardRefs(refs); - HashChainReset(hash_chain); for (ix = 0; ix < chosen_path_size; ++ix) { - int offset = 0; const int len = chosen_path[ix]; if (len != 1) { int k; - HashChainFindOffset(hash_chain, i, argb, len, window_size, &offset); + const int offset = HashChainFindOffset(hash_chain, i); BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); if (use_color_cache) { for (k = 0; k < len; ++k) { VP8LColorCacheInsert(&hashers, argb[i + k]); } } - { - const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i; - for (k = 0; k < last; ++k) { - HashChainInsert(hash_chain, &argb[i + k], i + k); - } - } i += len; } else { PixOrCopy v; @@ -774,9 +1402,6 @@ static int BackwardReferencesHashChainFollowChosenPath( v = PixOrCopyCreateLiteral(argb[i]); } BackwardRefsCursorAdd(refs, v); - if (i + 1 < pix_count) { - HashChainInsert(hash_chain, &argb[i], i); - } ++i; } } @@ -787,11 +1412,10 @@ static int BackwardReferencesHashChainFollowChosenPath( } // Returns 1 on success. -static int BackwardReferencesTraceBackwards(int xsize, int ysize, - const uint32_t* const argb, - int quality, int cache_bits, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs) { +static int BackwardReferencesTraceBackwards( + int xsize, int ysize, const uint32_t* const argb, int quality, + int cache_bits, const VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs) { int ok = 0; const int dist_array_size = xsize * ysize; uint16_t* chosen_path = NULL; @@ -808,8 +1432,7 @@ static int BackwardReferencesTraceBackwards(int xsize, int ysize, } TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size); if (!BackwardReferencesHashChainFollowChosenPath( - xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size, - hash_chain, refs)) { + argb, cache_bits, chosen_path, chosen_path_size, hash_chain, refs)) { goto Error; } ok = 1; @@ -897,7 +1520,7 @@ static double ComputeCacheEntropy(const uint32_t* argb, // Returns 0 in case of memory error. static int CalculateBestCacheSize(const uint32_t* const argb, int xsize, int ysize, int quality, - VP8LHashChain* const hash_chain, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, int* const lz77_computed, int* const best_cache_bits) { @@ -917,8 +1540,8 @@ static int CalculateBestCacheSize(const uint32_t* const argb, // Local color cache is disabled. return 1; } - if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, quality, 0, - hash_chain, refs)) { + if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, hash_chain, + refs)) { return 0; } // Do a binary search to find the optimal entropy for cache_bits. @@ -983,13 +1606,12 @@ static int BackwardRefsWithLocalCache(const uint32_t* const argb, } static VP8LBackwardRefs* GetBackwardReferencesLowEffort( - int width, int height, const uint32_t* const argb, int quality, - int* const cache_bits, VP8LHashChain* const hash_chain, + int width, int height, const uint32_t* const argb, + int* const cache_bits, const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2]) { VP8LBackwardRefs* refs_lz77 = &refs_array[0]; *cache_bits = 0; - if (!BackwardReferencesLz77(width, height, argb, 0, quality, - 1 /* Low effort. */, hash_chain, refs_lz77)) { + if (!BackwardReferencesLz77(width, height, argb, 0, hash_chain, refs_lz77)) { return NULL; } BackwardReferences2DLocality(width, refs_lz77); @@ -998,7 +1620,7 @@ static VP8LBackwardRefs* GetBackwardReferencesLowEffort( static VP8LBackwardRefs* GetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int* const cache_bits, VP8LHashChain* const hash_chain, + int* const cache_bits, const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2]) { int lz77_is_useful; int lz77_computed; @@ -1021,8 +1643,8 @@ static VP8LBackwardRefs* GetBackwardReferences( } } } else { - if (!BackwardReferencesLz77(width, height, argb, *cache_bits, quality, - 0 /* Low effort. */, hash_chain, refs_lz77)) { + if (!BackwardReferencesLz77(width, height, argb, *cache_bits, hash_chain, + refs_lz77)) { goto Error; } } @@ -1081,11 +1703,11 @@ static VP8LBackwardRefs* GetBackwardReferences( VP8LBackwardRefs* VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain, - VP8LBackwardRefs refs_array[2]) { + int low_effort, int* const cache_bits, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2]) { if (low_effort) { - return GetBackwardReferencesLowEffort(width, height, argb, quality, - cache_bits, hash_chain, refs_array); + return GetBackwardReferencesLowEffort(width, height, argb, cache_bits, + hash_chain, refs_array); } else { return GetBackwardReferences(width, height, argb, quality, cache_bits, hash_chain, refs_array); diff --git a/src/3rdparty/libwebp/src/enc/backward_references.h b/src/3rdparty/libwebp/src/enc/backward_references.h index daa084d..0cadb11 100644 --- a/src/3rdparty/libwebp/src/enc/backward_references.h +++ b/src/3rdparty/libwebp/src/enc/backward_references.h @@ -115,11 +115,12 @@ static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) { typedef struct VP8LHashChain VP8LHashChain; struct VP8LHashChain { - // Stores the most recently added position with the given hash value. - int32_t hash_to_first_index_[HASH_SIZE]; - // chain_[pos] stores the previous position with the same hash value - // for every pixel in the image. - int32_t* chain_; + // The 20 most significant bits contain the offset at which the best match + // is found. These 20 bits are the limit defined by GetWindowSizeForHashChain + // (through WINDOW_SIZE = 1<<20). + // The lower 12 bits contain the length of the match. The 12 bit limit is + // defined in MaxFindCopyLength with MAX_LENGTH=4096. + uint32_t* offset_length_; // This is the maximum size of the hash_chain that can be constructed. // Typically this is the pixel count (width x height) for a given image. int size_; @@ -127,6 +128,9 @@ struct VP8LHashChain { // Must be called first, to set size. int VP8LHashChainInit(VP8LHashChain* const p, int size); +// Pre-compute the best matches for argb. +int VP8LHashChainFill(VP8LHashChain* const p, int quality, + const uint32_t* const argb, int xsize, int ysize); void VP8LHashChainClear(VP8LHashChain* const p); // release memory // ----------------------------------------------------------------------------- @@ -192,8 +196,8 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) { // refs[0] or refs[1]. VP8LBackwardRefs* VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain, - VP8LBackwardRefs refs[2]); + int low_effort, int* const cache_bits, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs[2]); #ifdef __cplusplus } diff --git a/src/3rdparty/libwebp/src/enc/filter.c b/src/3rdparty/libwebp/src/enc/filter.c index 41813cf..e8ea8b4 100644 --- a/src/3rdparty/libwebp/src/enc/filter.c +++ b/src/3rdparty/libwebp/src/enc/filter.c @@ -107,10 +107,9 @@ static void DoFilter(const VP8EncIterator* const it, int level) { //------------------------------------------------------------------------------ // SSIM metric -enum { KERNEL = 3 }; static const double kMinValue = 1.e-10; // minimal threshold -void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst) { +void VP8SSIMAddStats(const VP8DistoStats* const src, VP8DistoStats* const dst) { dst->w += src->w; dst->xm += src->xm; dst->ym += src->ym; @@ -119,32 +118,7 @@ void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst) { dst->yym += src->yym; } -static void VP8SSIMAccumulate(const uint8_t* src1, int stride1, - const uint8_t* src2, int stride2, - int xo, int yo, int W, int H, - DistoStats* const stats) { - const int ymin = (yo - KERNEL < 0) ? 0 : yo - KERNEL; - const int ymax = (yo + KERNEL > H - 1) ? H - 1 : yo + KERNEL; - const int xmin = (xo - KERNEL < 0) ? 0 : xo - KERNEL; - const int xmax = (xo + KERNEL > W - 1) ? W - 1 : xo + KERNEL; - int x, y; - src1 += ymin * stride1; - src2 += ymin * stride2; - for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) { - for (x = xmin; x <= xmax; ++x) { - const int s1 = src1[x]; - const int s2 = src2[x]; - stats->w += 1; - stats->xm += s1; - stats->ym += s2; - stats->xxm += s1 * s1; - stats->xym += s1 * s2; - stats->yym += s2 * s2; - } - } -} - -double VP8SSIMGet(const DistoStats* const stats) { +double VP8SSIMGet(const VP8DistoStats* const stats) { const double xmxm = stats->xm * stats->xm; const double ymym = stats->ym * stats->ym; const double xmym = stats->xm * stats->ym; @@ -165,7 +139,7 @@ double VP8SSIMGet(const DistoStats* const stats) { return (fden != 0.) ? fnum / fden : kMinValue; } -double VP8SSIMGetSquaredError(const DistoStats* const s) { +double VP8SSIMGetSquaredError(const VP8DistoStats* const s) { if (s->w > 0.) { const double iw2 = 1. / (s->w * s->w); const double sxx = s->xxm * s->w - s->xm * s->xm; @@ -177,34 +151,66 @@ double VP8SSIMGetSquaredError(const DistoStats* const s) { return kMinValue; } +#define LIMIT(A, M) ((A) > (M) ? (M) : (A)) +static void VP8SSIMAccumulateRow(const uint8_t* src1, int stride1, + const uint8_t* src2, int stride2, + int y, int W, int H, + VP8DistoStats* const stats) { + int x = 0; + const int w0 = LIMIT(VP8_SSIM_KERNEL, W); + for (x = 0; x < w0; ++x) { + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); + } + for (; x <= W - 8 + VP8_SSIM_KERNEL; ++x) { + VP8SSIMAccumulate( + src1 + (y - VP8_SSIM_KERNEL) * stride1 + (x - VP8_SSIM_KERNEL), stride1, + src2 + (y - VP8_SSIM_KERNEL) * stride2 + (x - VP8_SSIM_KERNEL), stride2, + stats); + } + for (; x < W; ++x) { + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); + } +} + void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1, const uint8_t* src2, int stride2, - int W, int H, DistoStats* const stats) { + int W, int H, VP8DistoStats* const stats) { int x, y; - for (y = 0; y < H; ++y) { + const int h0 = LIMIT(VP8_SSIM_KERNEL, H); + const int h1 = LIMIT(VP8_SSIM_KERNEL, H - VP8_SSIM_KERNEL); + for (y = 0; y < h0; ++y) { + for (x = 0; x < W; ++x) { + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); + } + } + for (; y < h1; ++y) { + VP8SSIMAccumulateRow(src1, stride1, src2, stride2, y, W, H, stats); + } + for (; y < H; ++y) { for (x = 0; x < W; ++x) { - VP8SSIMAccumulate(src1, stride1, src2, stride2, x, y, W, H, stats); + VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats); } } } +#undef LIMIT static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) { int x, y; - DistoStats s = { .0, .0, .0, .0, .0, .0 }; + VP8DistoStats s = { .0, .0, .0, .0, .0, .0 }; // compute SSIM in a 10 x 10 window - for (x = 3; x < 13; x++) { - for (y = 3; y < 13; y++) { - VP8SSIMAccumulate(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS, - x, y, 16, 16, &s); + for (y = VP8_SSIM_KERNEL; y < 16 - VP8_SSIM_KERNEL; y++) { + for (x = VP8_SSIM_KERNEL; x < 16 - VP8_SSIM_KERNEL; x++) { + VP8SSIMAccumulateClipped(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS, + x, y, 16, 16, &s); } } for (x = 1; x < 7; x++) { for (y = 1; y < 7; y++) { - VP8SSIMAccumulate(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS, - x, y, 8, 8, &s); - VP8SSIMAccumulate(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS, - x, y, 8, 8, &s); + VP8SSIMAccumulateClipped(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS, + x, y, 8, 8, &s); + VP8SSIMAccumulateClipped(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS, + x, y, 8, 8, &s); } } return VP8SSIMGet(&s); @@ -222,6 +228,7 @@ void VP8InitFilter(VP8EncIterator* const it) { (*it->lf_stats_)[s][i] = 0; } } + VP8SSIMDspInit(); } } diff --git a/src/3rdparty/libwebp/src/enc/histogram.c b/src/3rdparty/libwebp/src/enc/histogram.c index 869882d..395372b 100644 --- a/src/3rdparty/libwebp/src/enc/histogram.c +++ b/src/3rdparty/libwebp/src/enc/histogram.c @@ -386,29 +386,27 @@ static void UpdateHistogramCost(VP8LHistogram* const h) { } static int GetBinIdForEntropy(double min, double max, double val) { - const double range = max - min + 1e-6; - const double delta = val - min; - return (int)(NUM_PARTITIONS * delta / range); + const double range = max - min; + if (range > 0.) { + const double delta = val - min; + return (int)((NUM_PARTITIONS - 1e-6) * delta / range); + } else { + return 0; + } } -static int GetHistoBinIndexLowEffort( - const VP8LHistogram* const h, const DominantCostRange* const c) { - const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_, - h->literal_cost_); +static int GetHistoBinIndex(const VP8LHistogram* const h, + const DominantCostRange* const c, int low_effort) { + int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_, + h->literal_cost_); assert(bin_id < NUM_PARTITIONS); - return bin_id; -} - -static int GetHistoBinIndex( - const VP8LHistogram* const h, const DominantCostRange* const c) { - const int bin_id = - GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_) + - NUM_PARTITIONS * GetBinIdForEntropy(c->red_min_, c->red_max_, - h->red_cost_) + - NUM_PARTITIONS * NUM_PARTITIONS * GetBinIdForEntropy(c->literal_min_, - c->literal_max_, - h->literal_cost_); - assert(bin_id < BIN_SIZE); + if (!low_effort) { + bin_id = bin_id * NUM_PARTITIONS + + GetBinIdForEntropy(c->red_min_, c->red_max_, h->red_cost_); + bin_id = bin_id * NUM_PARTITIONS + + GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_); + assert(bin_id < BIN_SIZE); + } return bin_id; } @@ -469,16 +467,13 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, // bin-hash histograms on three of the dominant (literal, red and blue) // symbol costs. for (i = 0; i < histo_size; ++i) { - int num_histos; - VP8LHistogram* const histo = histograms[i]; - const int16_t bin_id = low_effort ? - (int16_t)GetHistoBinIndexLowEffort(histo, &cost_range) : - (int16_t)GetHistoBinIndex(histo, &cost_range); + const VP8LHistogram* const histo = histograms[i]; + const int bin_id = GetHistoBinIndex(histo, &cost_range, low_effort); const int bin_offset = bin_id * bin_depth; // bin_map[n][0] for every bin 'n' maintains the counter for the number of // histograms in that bin. // Get and increment the num_histos in that bin. - num_histos = ++bin_map[bin_offset]; + const int num_histos = ++bin_map[bin_offset]; assert(bin_offset + num_histos < bin_depth * BIN_SIZE); // Add histogram i'th index at num_histos (last) position in the bin_map. bin_map[bin_offset + num_histos] = i; @@ -636,8 +631,11 @@ static void UpdateQueueFront(HistoQueue* const histo_queue) { // ----------------------------------------------------------------------------- static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2, - HistogramPair* const pair, - VP8LHistogram* const histos) { + HistogramPair* const pair) { + VP8LHistogram* h1; + VP8LHistogram* h2; + double sum_cost; + if (idx1 > idx2) { const int tmp = idx2; idx2 = idx1; @@ -645,15 +643,17 @@ static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2, } pair->idx1 = idx1; pair->idx2 = idx2; - pair->cost_diff = - HistogramAddEval(histograms[idx1], histograms[idx2], histos, 0); - pair->cost_combo = histos->bit_cost_; + h1 = histograms[idx1]; + h2 = histograms[idx2]; + sum_cost = h1->bit_cost_ + h2->bit_cost_; + pair->cost_combo = 0.; + GetCombinedHistogramEntropy(h1, h2, sum_cost, &pair->cost_combo); + pair->cost_diff = pair->cost_combo - sum_cost; } // Combines histograms by continuously choosing the one with the highest cost // reduction. -static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, - VP8LHistogram* const histos) { +static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) { int ok = 0; int image_histo_size = image_histo->size; int i, j; @@ -672,8 +672,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, clusters[i] = i; for (j = i + 1; j < image_histo_size; ++j) { // Initialize positions array. - PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size], - histos); + PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size]); UpdateQueueFront(&histo_queue); } } @@ -715,7 +714,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, for (i = 0; i < image_histo_size; ++i) { if (clusters[i] != idx1) { PreparePair(histograms, idx1, clusters[i], - &histo_queue.queue[histo_queue.size], histos); + &histo_queue.queue[histo_queue.size]); UpdateQueueFront(&histo_queue); } } @@ -736,11 +735,10 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, return ok; } -static VP8LHistogram* HistogramCombineStochastic( - VP8LHistogramSet* const image_histo, - VP8LHistogram* tmp_histo, - VP8LHistogram* best_combo, - int quality, int min_cluster_size) { +static void HistogramCombineStochastic(VP8LHistogramSet* const image_histo, + VP8LHistogram* tmp_histo, + VP8LHistogram* best_combo, + int quality, int min_cluster_size) { int iter; uint32_t seed = 0; int tries_with_no_success = 0; @@ -800,7 +798,6 @@ static VP8LHistogram* HistogramCombineStochastic( } } image_histo->size = image_histo_size; - return best_combo; } // ----------------------------------------------------------------------------- @@ -808,24 +805,23 @@ static VP8LHistogram* HistogramCombineStochastic( // Find the best 'out' histogram for each of the 'in' histograms. // Note: we assume that out[]->bit_cost_ is already up-to-date. -static void HistogramRemap(const VP8LHistogramSet* const orig_histo, - const VP8LHistogramSet* const image_histo, +static void HistogramRemap(const VP8LHistogramSet* const in, + const VP8LHistogramSet* const out, uint16_t* const symbols) { int i; - VP8LHistogram** const orig_histograms = orig_histo->histograms; - VP8LHistogram** const histograms = image_histo->histograms; - const int orig_histo_size = orig_histo->size; - const int image_histo_size = image_histo->size; - if (image_histo_size > 1) { - for (i = 0; i < orig_histo_size; ++i) { + VP8LHistogram** const in_histo = in->histograms; + VP8LHistogram** const out_histo = out->histograms; + const int in_size = in->size; + const int out_size = out->size; + if (out_size > 1) { + for (i = 0; i < in_size; ++i) { int best_out = 0; - double best_bits = - HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST); + double best_bits = MAX_COST; int k; - for (k = 1; k < image_histo_size; ++k) { + for (k = 0; k < out_size; ++k) { const double cur_bits = - HistogramAddThresh(histograms[k], orig_histograms[i], best_bits); - if (cur_bits < best_bits) { + HistogramAddThresh(out_histo[k], in_histo[i], best_bits); + if (k == 0 || cur_bits < best_bits) { best_bits = cur_bits; best_out = k; } @@ -833,20 +829,20 @@ static void HistogramRemap(const VP8LHistogramSet* const orig_histo, symbols[i] = best_out; } } else { - assert(image_histo_size == 1); - for (i = 0; i < orig_histo_size; ++i) { + assert(out_size == 1); + for (i = 0; i < in_size; ++i) { symbols[i] = 0; } } // Recompute each out based on raw and symbols. - for (i = 0; i < image_histo_size; ++i) { - HistogramClear(histograms[i]); + for (i = 0; i < out_size; ++i) { + HistogramClear(out_histo[i]); } - for (i = 0; i < orig_histo_size; ++i) { + for (i = 0; i < in_size; ++i) { const int idx = symbols[i]; - VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]); + VP8LHistogramAdd(in_histo[i], out_histo[idx], out_histo[idx]); } } @@ -920,11 +916,10 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, const float x = quality / 100.f; // cubic ramp between 1 and MAX_HISTO_GREEDY: const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1)); - cur_combo = HistogramCombineStochastic(image_histo, - tmp_histos->histograms[0], - cur_combo, quality, threshold_size); + HistogramCombineStochastic(image_histo, tmp_histos->histograms[0], + cur_combo, quality, threshold_size); if ((image_histo->size <= threshold_size) && - !HistogramCombineGreedy(image_histo, cur_combo)) { + !HistogramCombineGreedy(image_histo)) { goto Error; } } diff --git a/src/3rdparty/libwebp/src/enc/near_lossless.c b/src/3rdparty/libwebp/src/enc/near_lossless.c index 9bc0f0e..f4ab91f 100644 --- a/src/3rdparty/libwebp/src/enc/near_lossless.c +++ b/src/3rdparty/libwebp/src/enc/near_lossless.c @@ -14,6 +14,7 @@ // Author: Jyrki Alakuijala (jyrki@google.com) // Converted to C by Aleksander Kramarz (akramarz@google.com) +#include <assert.h> #include <stdlib.h> #include "../dsp/lossless.h" @@ -23,42 +24,14 @@ #define MIN_DIM_FOR_NEAR_LOSSLESS 64 #define MAX_LIMIT_BITS 5 -// Computes quantized pixel value and distance from original value. -static void GetValAndDistance(int a, int initial, int bits, - int* const val, int* const distance) { - const int mask = ~((1 << bits) - 1); - *val = (initial & mask) | (initial >> (8 - bits)); - *distance = 2 * abs(a - *val); -} - -// Clamps the value to range [0, 255]. -static int Clamp8b(int val) { - const int min_val = 0; - const int max_val = 0xff; - return (val < min_val) ? min_val : (val > max_val) ? max_val : val; -} - -// Quantizes values {a, a+(1<<bits), a-(1<<bits)} and returns the nearest one. +// Quantizes the value up or down to a multiple of 1<<bits (or to 255), +// choosing the closer one, resolving ties using bankers' rounding. static int FindClosestDiscretized(int a, int bits) { - int best_val = a, i; - int min_distance = 256; - - for (i = -1; i <= 1; ++i) { - int candidate, distance; - const int val = Clamp8b(a + i * (1 << bits)); - GetValAndDistance(a, val, bits, &candidate, &distance); - if (i != 0) { - ++distance; - } - // Smallest distance but favor i == 0 over i == -1 and i == 1 - // since that keeps the overall intensity more constant in the - // images. - if (distance < min_distance) { - min_distance = distance; - best_val = candidate; - } - } - return best_val; + const int mask = (1 << bits) - 1; + const int biased = a + (mask >> 1) + ((a >> bits) & 1); + assert(bits > 0); + if (biased > 0xff) return 0xff; + return biased & ~mask; } // Applies FindClosestDiscretized to all channels of pixel. @@ -124,22 +97,11 @@ static void NearLossless(int xsize, int ysize, uint32_t* argb, } } -static int QualityToLimitBits(int quality) { - // quality mapping: - // 0..19 -> 5 - // 0..39 -> 4 - // 0..59 -> 3 - // 0..79 -> 2 - // 0..99 -> 1 - // 100 -> 0 - return MAX_LIMIT_BITS - quality / 20; -} - int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality) { int i; uint32_t* const copy_buffer = (uint32_t*)WebPSafeMalloc(xsize * 3, sizeof(*copy_buffer)); - const int limit_bits = QualityToLimitBits(quality); + const int limit_bits = VP8LNearLosslessBits(quality); assert(argb != NULL); assert(limit_bits >= 0); assert(limit_bits <= MAX_LIMIT_BITS); diff --git a/src/3rdparty/libwebp/src/enc/picture.c b/src/3rdparty/libwebp/src/enc/picture.c index 26679a7..d9befbc 100644 --- a/src/3rdparty/libwebp/src/enc/picture.c +++ b/src/3rdparty/libwebp/src/enc/picture.c @@ -237,6 +237,8 @@ static size_t Encode(const uint8_t* rgba, int width, int height, int stride, WebPMemoryWriter wrt; int ok; + if (output == NULL) return 0; + if (!WebPConfigPreset(&config, WEBP_PRESET_DEFAULT, quality_factor) || !WebPPictureInit(&pic)) { return 0; // shouldn't happen, except if system installation is broken diff --git a/src/3rdparty/libwebp/src/enc/picture_csp.c b/src/3rdparty/libwebp/src/enc/picture_csp.c index 0ef5f9e..607a624 100644 --- a/src/3rdparty/libwebp/src/enc/picture_csp.c +++ b/src/3rdparty/libwebp/src/enc/picture_csp.c @@ -1125,32 +1125,44 @@ static int Import(WebPPicture* const picture, int WebPPictureImportRGB(WebPPicture* picture, const uint8_t* rgb, int rgb_stride) { - return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 0, 0) : 0; + return (picture != NULL && rgb != NULL) + ? Import(picture, rgb, rgb_stride, 3, 0, 0) + : 0; } int WebPPictureImportBGR(WebPPicture* picture, const uint8_t* rgb, int rgb_stride) { - return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 1, 0) : 0; + return (picture != NULL && rgb != NULL) + ? Import(picture, rgb, rgb_stride, 3, 1, 0) + : 0; } int WebPPictureImportRGBA(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { - return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 1) : 0; + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 0, 1) + : 0; } int WebPPictureImportBGRA(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { - return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 1) : 0; + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 1, 1) + : 0; } int WebPPictureImportRGBX(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { - return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 0) : 0; + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 0, 0) + : 0; } int WebPPictureImportBGRX(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { - return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 0) : 0; + return (picture != NULL && rgba != NULL) + ? Import(picture, rgba, rgba_stride, 4, 1, 0) + : 0; } //------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/picture_psnr.c b/src/3rdparty/libwebp/src/enc/picture_psnr.c index 40214ef..81ab1b5 100644 --- a/src/3rdparty/libwebp/src/enc/picture_psnr.c +++ b/src/3rdparty/libwebp/src/enc/picture_psnr.c @@ -27,7 +27,7 @@ static void AccumulateLSIM(const uint8_t* src, int src_stride, const uint8_t* ref, int ref_stride, - int w, int h, DistoStats* stats) { + int w, int h, VP8DistoStats* stats) { int x, y; double total_sse = 0.; for (y = 0; y < h; ++y) { @@ -71,11 +71,13 @@ static float GetPSNR(const double v) { int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, int type, float result[5]) { - DistoStats stats[5]; + VP8DistoStats stats[5]; int w, h; memset(stats, 0, sizeof(stats)); + VP8SSIMDspInit(); + if (src == NULL || ref == NULL || src->width != ref->width || src->height != ref->height || src->use_argb != ref->use_argb || result == NULL) { diff --git a/src/3rdparty/libwebp/src/enc/quant.c b/src/3rdparty/libwebp/src/enc/quant.c index dd6885a..549ad26 100644 --- a/src/3rdparty/libwebp/src/enc/quant.c +++ b/src/3rdparty/libwebp/src/enc/quant.c @@ -30,8 +30,6 @@ #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP // power-law modulation. Must be strictly less than 1. -#define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision - // number of non-zero coeffs below which we consider the block very flat // (and apply a penalty to complex predictions) #define FLATNESS_LIMIT_I16 10 // I16 mode @@ -236,6 +234,8 @@ static int ExpandMatrix(VP8Matrix* const m, int type) { return (sum + 8) >> 4; } +static void CheckLambdaValue(int* const v) { if (*v < 1) *v = 1; } + static void SetupMatrices(VP8Encoder* enc) { int i; const int tlambda_scale = @@ -245,7 +245,7 @@ static void SetupMatrices(VP8Encoder* enc) { for (i = 0; i < num_segments; ++i) { VP8SegmentInfo* const m = &enc->dqm_[i]; const int q = m->quant_; - int q4, q16, quv; + int q_i4, q_i16, q_uv; m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)]; m->y1_.q_[1] = kAcTable[clip(q, 0, 127)]; @@ -255,21 +255,33 @@ static void SetupMatrices(VP8Encoder* enc) { m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)]; m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)]; - q4 = ExpandMatrix(&m->y1_, 0); - q16 = ExpandMatrix(&m->y2_, 1); - quv = ExpandMatrix(&m->uv_, 2); - - m->lambda_i4_ = (3 * q4 * q4) >> 7; - m->lambda_i16_ = (3 * q16 * q16); - m->lambda_uv_ = (3 * quv * quv) >> 6; - m->lambda_mode_ = (1 * q4 * q4) >> 7; - m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3; - m->lambda_trellis_i16_ = (q16 * q16) >> 2; - m->lambda_trellis_uv_ = (quv *quv) << 1; - m->tlambda_ = (tlambda_scale * q4) >> 5; + q_i4 = ExpandMatrix(&m->y1_, 0); + q_i16 = ExpandMatrix(&m->y2_, 1); + q_uv = ExpandMatrix(&m->uv_, 2); + + m->lambda_i4_ = (3 * q_i4 * q_i4) >> 7; + m->lambda_i16_ = (3 * q_i16 * q_i16); + m->lambda_uv_ = (3 * q_uv * q_uv) >> 6; + m->lambda_mode_ = (1 * q_i4 * q_i4) >> 7; + m->lambda_trellis_i4_ = (7 * q_i4 * q_i4) >> 3; + m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2; + m->lambda_trellis_uv_ = (q_uv * q_uv) << 1; + m->tlambda_ = (tlambda_scale * q_i4) >> 5; + + // none of these constants should be < 1 + CheckLambdaValue(&m->lambda_i4_); + CheckLambdaValue(&m->lambda_i16_); + CheckLambdaValue(&m->lambda_uv_); + CheckLambdaValue(&m->lambda_mode_); + CheckLambdaValue(&m->lambda_trellis_i4_); + CheckLambdaValue(&m->lambda_trellis_i16_); + CheckLambdaValue(&m->lambda_trellis_uv_); + CheckLambdaValue(&m->tlambda_); m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto m->max_edge_ = 0; + + m->i4_penalty_ = 1000 * q_i4 * q_i4; } } @@ -348,7 +360,12 @@ static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1, static void SimplifySegments(VP8Encoder* const enc) { int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 }; - const int num_segments = enc->segment_hdr_.num_segments_; + // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an + // explicit check is needed to avoid a spurious warning about 'i' exceeding + // array bounds of 'dqm_' with some compilers (noticed with gcc-4.9). + const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) + ? enc->segment_hdr_.num_segments_ + : NUM_MB_SEGMENTS; int num_final_segments = 1; int s1, s2; for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments @@ -1127,19 +1144,29 @@ static void RefineUsingDistortion(VP8EncIterator* const it, int try_both_modes, int refine_uv_mode, VP8ModeScore* const rd) { score_t best_score = MAX_COST; - score_t score_i4 = (score_t)I4_PENALTY; - int16_t tmp_levels[16][16]; - uint8_t modes_i4[16]; int nz = 0; int mode; int is_i16 = try_both_modes || (it->mb_->type_ == 1); + const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; + // Some empiric constants, of approximate order of magnitude. + const int lambda_d_i16 = 106; + const int lambda_d_i4 = 11; + const int lambda_d_uv = 120; + score_t score_i4 = dqm->i4_penalty_; + score_t i4_bit_sum = 0; + const score_t bit_limit = it->enc_->mb_header_limit_; + if (is_i16) { // First, evaluate Intra16 distortion int best_mode = -1; const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; for (mode = 0; mode < NUM_PRED_MODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; - const score_t score = VP8SSE16x16(src, ref); + const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT + + VP8FixedCostsI16[mode] * lambda_d_i16; + if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) { + continue; + } if (score < best_score) { best_mode = mode; best_score = score; @@ -1159,25 +1186,28 @@ static void RefineUsingDistortion(VP8EncIterator* const it, int best_i4_mode = -1; score_t best_i4_score = MAX_COST; const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; + const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4); VP8MakeIntra4Preds(it); for (mode = 0; mode < NUM_BMODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; - const score_t score = VP8SSE4x4(src, ref); + const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT + + mode_costs[mode] * lambda_d_i4; if (score < best_i4_score) { best_i4_mode = mode; best_i4_score = score; } } - modes_i4[it->i4_] = best_i4_mode; + i4_bit_sum += mode_costs[best_i4_mode]; + rd->modes_i4[it->i4_] = best_i4_mode; score_i4 += best_i4_score; - if (score_i4 >= best_score) { + if (score_i4 >= best_score || i4_bit_sum > bit_limit) { // Intra4 won't be better than Intra16. Bail out and pick Intra16. is_i16 = 1; break; } else { // reconstruct partial block inside yuv_out2_ buffer uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_]; - nz |= ReconstructIntra4(it, tmp_levels[it->i4_], + nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], src, tmp_dst, best_i4_mode) << it->i4_; } } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC)); @@ -1185,8 +1215,7 @@ static void RefineUsingDistortion(VP8EncIterator* const it, // Final reconstruction, depending on which mode is selected. if (!is_i16) { - VP8SetIntra4Mode(it, modes_i4); - memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels)); + VP8SetIntra4Mode(it, rd->modes_i4); SwapOut(it); best_score = score_i4; } else { @@ -1200,7 +1229,8 @@ static void RefineUsingDistortion(VP8EncIterator* const it, const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; for (mode = 0; mode < NUM_PRED_MODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; - const score_t score = VP8SSE16x8(src, ref); + const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT + + VP8FixedCostsUV[mode] * lambda_d_uv; if (score < best_uv_score) { best_mode = mode; best_uv_score = score; diff --git a/src/3rdparty/libwebp/src/enc/vp8enci.h b/src/3rdparty/libwebp/src/enc/vp8enci.h index b2cc8d1..c1fbd76 100644 --- a/src/3rdparty/libwebp/src/enc/vp8enci.h +++ b/src/3rdparty/libwebp/src/enc/vp8enci.h @@ -22,10 +22,6 @@ #include "../utils/utils.h" #include "../webp/encode.h" -#ifdef WEBP_EXPERIMENTAL_FEATURES -#include "./vp8li.h" -#endif // WEBP_EXPERIMENTAL_FEATURES - #ifdef __cplusplus extern "C" { #endif @@ -36,7 +32,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 0 #define ENC_MIN_VERSION 5 -#define ENC_REV_VERSION 0 +#define ENC_REV_VERSION 1 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost @@ -200,6 +196,9 @@ typedef struct { int lambda_i16_, lambda_i4_, lambda_uv_; int lambda_mode_, lambda_trellis_, tlambda_; int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_; + + // lambda values for distortion-based evaluation + score_t i4_penalty_; // penalty for using Intra4 } VP8SegmentInfo; // Handy transient struct to accumulate score and info during RD-optimization @@ -395,6 +394,7 @@ struct VP8Encoder { int method_; // 0=fastest, 6=best/slowest. VP8RDLevel rd_opt_level_; // Deduced from method_. int max_i4_header_bits_; // partition #0 safeness factor + int mb_header_limit_; // rough limit for header bits per MB int thread_level_; // derived from config->thread_level int do_search_; // derived from config->target_XXX int use_tokens_; // if true, use token buffer @@ -477,17 +477,12 @@ int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data // in filter.c - -// SSIM utils -typedef struct { - double w, xm, ym, xxm, xym, yym; -} DistoStats; -void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst); +void VP8SSIMAddStats(const VP8DistoStats* const src, VP8DistoStats* const dst); void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1, const uint8_t* src2, int stride2, - int W, int H, DistoStats* const stats); -double VP8SSIMGet(const DistoStats* const stats); -double VP8SSIMGetSquaredError(const DistoStats* const stats); + int W, int H, VP8DistoStats* const stats); +double VP8SSIMGet(const VP8DistoStats* const stats); +double VP8SSIMGetSquaredError(const VP8DistoStats* const stats); // autofilter void VP8InitFilter(VP8EncIterator* const it); diff --git a/src/3rdparty/libwebp/src/enc/vp8l.c b/src/3rdparty/libwebp/src/enc/vp8l.c index db94e78..c16e256 100644 --- a/src/3rdparty/libwebp/src/enc/vp8l.c +++ b/src/3rdparty/libwebp/src/enc/vp8l.c @@ -126,54 +126,8 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic, int low_effort, uint32_t palette[MAX_PALETTE_SIZE], int* const palette_size) { - int i, x, y, key; - int num_colors = 0; - uint8_t in_use[MAX_PALETTE_SIZE * 4] = { 0 }; - uint32_t colors[MAX_PALETTE_SIZE * 4]; - static const uint32_t kHashMul = 0x1e35a7bd; - const uint32_t* argb = pic->argb; - const int width = pic->width; - const int height = pic->height; - uint32_t last_pix = ~argb[0]; // so we're sure that last_pix != argb[0] - - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - if (argb[x] == last_pix) { - continue; - } - last_pix = argb[x]; - key = (kHashMul * last_pix) >> PALETTE_KEY_RIGHT_SHIFT; - while (1) { - if (!in_use[key]) { - colors[key] = last_pix; - in_use[key] = 1; - ++num_colors; - if (num_colors > MAX_PALETTE_SIZE) { - return 0; - } - break; - } else if (colors[key] == last_pix) { - // The color is already there. - break; - } else { - // Some other color sits there. - // Do linear conflict resolution. - ++key; - key &= (MAX_PALETTE_SIZE * 4 - 1); // key mask for 1K buffer. - } - } - } - argb += pic->argb_stride; - } - - // TODO(skal): could we reuse in_use[] to speed up EncodePalette()? - num_colors = 0; - for (i = 0; i < (int)(sizeof(in_use) / sizeof(in_use[0])); ++i) { - if (in_use[i]) { - palette[num_colors] = colors[i]; - ++num_colors; - } - } + const int num_colors = WebPGetColorPalette(pic, palette); + if (num_colors > MAX_PALETTE_SIZE) return 0; *palette_size = num_colors; qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort); if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) { @@ -336,7 +290,7 @@ static int AnalyzeEntropy(const uint32_t* argb, } } } - free(histo); + WebPSafeFree(histo); return 1; } else { return 0; @@ -761,6 +715,10 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, } // Calculate backward references from ARGB image. + if (VP8LHashChainFill(hash_chain, quality, argb, width, height) == 0) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, &cache_bits, hash_chain, refs_array); if (refs == NULL) { @@ -824,7 +782,8 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2], int width, int height, int quality, - int low_effort, int* cache_bits, + int low_effort, + int use_cache, int* cache_bits, int histogram_bits, size_t init_byte_position, int* const hdr_size, @@ -856,10 +815,14 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, goto Error; } - *cache_bits = MAX_COLOR_CACHE_BITS; + *cache_bits = use_cache ? MAX_COLOR_CACHE_BITS : 0; // 'best_refs' is the reference to the best backward refs and points to one // of refs_array[0] or refs_array[1]. // Calculate backward references from ARGB image. + if (VP8LHashChainFill(hash_chain, quality, argb, width, height) == 0) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } best_refs = VP8LGetBackwardReferences(width, height, argb, quality, low_effort, cache_bits, hash_chain, refs_array); @@ -1007,14 +970,19 @@ static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height, static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, int width, int height, int quality, int low_effort, + int used_subtract_green, VP8LBitWriter* const bw) { const int pred_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, pred_bits); const int transform_height = VP8LSubSampleSize(height, pred_bits); + // we disable near-lossless quantization if palette is used. + const int near_lossless_strength = enc->use_palette_ ? 100 + : enc->config_->near_lossless; VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_, enc->argb_scratch_, enc->transform_data_, - enc->config_->exact); + near_lossless_strength, enc->config_->exact, + used_subtract_green); VP8LPutBits(bw, TRANSFORM_PRESENT, 1); VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); assert(pred_bits >= 2); @@ -1114,6 +1082,12 @@ static WebPEncodingError WriteImage(const WebPPicture* const pic, // ----------------------------------------------------------------------------- +static void ClearTransformBuffer(VP8LEncoder* const enc) { + WebPSafeFree(enc->transform_mem_); + enc->transform_mem_ = NULL; + enc->transform_mem_size_ = 0; +} + // Allocates the memory for argb (W x H) buffer, 2 rows of context for // prediction and transform data. // Flags influencing the memory allocated: @@ -1122,43 +1096,48 @@ static WebPEncodingError WriteImage(const WebPPicture* const pic, static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, int width, int height) { WebPEncodingError err = VP8_ENC_OK; - if (enc->argb_ == NULL) { - const int tile_size = 1 << enc->transform_bits_; - const uint64_t image_size = width * height; - // Ensure enough size for tiles, as well as for two scanlines and two - // extra pixels for CopyImageWithPrediction. - const uint64_t argb_scratch_size = - enc->use_predict_ ? tile_size * width + width + 2 : 0; - const int transform_data_size = - (enc->use_predict_ || enc->use_cross_color_) - ? VP8LSubSampleSize(width, enc->transform_bits_) * - VP8LSubSampleSize(height, enc->transform_bits_) - : 0; - const uint64_t total_size = - image_size + WEBP_ALIGN_CST + - argb_scratch_size + WEBP_ALIGN_CST + - (uint64_t)transform_data_size; - uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem)); + const uint64_t image_size = width * height; + // VP8LResidualImage needs room for 2 scanlines of uint32 pixels with an extra + // pixel in each, plus 2 regular scanlines of bytes. + // TODO(skal): Clean up by using arithmetic in bytes instead of words. + const uint64_t argb_scratch_size = + enc->use_predict_ + ? (width + 1) * 2 + + (width * 2 + sizeof(uint32_t) - 1) / sizeof(uint32_t) + : 0; + const uint64_t transform_data_size = + (enc->use_predict_ || enc->use_cross_color_) + ? VP8LSubSampleSize(width, enc->transform_bits_) * + VP8LSubSampleSize(height, enc->transform_bits_) + : 0; + const uint64_t max_alignment_in_words = + (WEBP_ALIGN_CST + sizeof(uint32_t) - 1) / sizeof(uint32_t); + const uint64_t mem_size = + image_size + max_alignment_in_words + + argb_scratch_size + max_alignment_in_words + + transform_data_size; + uint32_t* mem = enc->transform_mem_; + if (mem == NULL || mem_size > enc->transform_mem_size_) { + ClearTransformBuffer(enc); + mem = (uint32_t*)WebPSafeMalloc(mem_size, sizeof(*mem)); if (mem == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - enc->argb_ = mem; - mem = (uint32_t*)WEBP_ALIGN(mem + image_size); - enc->argb_scratch_ = mem; - mem = (uint32_t*)WEBP_ALIGN(mem + argb_scratch_size); - enc->transform_data_ = mem; - enc->current_width_ = width; + enc->transform_mem_ = mem; + enc->transform_mem_size_ = (size_t)mem_size; } + enc->argb_ = mem; + mem = (uint32_t*)WEBP_ALIGN(mem + image_size); + enc->argb_scratch_ = mem; + mem = (uint32_t*)WEBP_ALIGN(mem + argb_scratch_size); + enc->transform_data_ = mem; + + enc->current_width_ = width; Error: return err; } -static void ClearTransformBuffer(VP8LEncoder* const enc) { - WebPSafeFree(enc->argb_); - enc->argb_ = NULL; -} - static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { WebPEncodingError err = VP8_ENC_OK; const WebPPicture* const picture = enc->pic_; @@ -1178,8 +1157,35 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { // ----------------------------------------------------------------------------- -static void MapToPalette(const uint32_t palette[], int num_colors, +static int SearchColor(const uint32_t sorted[], uint32_t color, int hi) { + int low = 0; + if (sorted[low] == color) return low; // loop invariant: sorted[low] != color + while (1) { + const int mid = (low + hi) >> 1; + if (sorted[mid] == color) { + return mid; + } else if (sorted[mid] < color) { + low = mid; + } else { + hi = mid; + } + } +} + +// Sort palette in increasing order and prepare an inverse mapping array. +static void PrepareMapToPalette(const uint32_t palette[], int num_colors, + uint32_t sorted[], int idx_map[]) { + int i; + memcpy(sorted, palette, num_colors * sizeof(*sorted)); + qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort); + for (i = 0; i < num_colors; ++i) { + idx_map[SearchColor(sorted, palette[i], num_colors)] = i; + } +} + +static void MapToPalette(const uint32_t sorted_palette[], int num_colors, uint32_t* const last_pix, int* const last_idx, + const int idx_map[], const uint32_t* src, uint8_t* dst, int width) { int x; int prev_idx = *last_idx; @@ -1187,14 +1193,8 @@ static void MapToPalette(const uint32_t palette[], int num_colors, for (x = 0; x < width; ++x) { const uint32_t pix = src[x]; if (pix != prev_pix) { - int i; - for (i = 0; i < num_colors; ++i) { - if (pix == palette[i]) { - prev_idx = i; - prev_pix = pix; - break; - } - } + prev_idx = idx_map[SearchColor(sorted_palette, pix, num_colors)]; + prev_pix = pix; } dst[x] = prev_idx; } @@ -1241,11 +1241,16 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, } } else { // Use 1 pixel cache for ARGB pixels. - uint32_t last_pix = palette[0]; - int last_idx = 0; + uint32_t last_pix; + int last_idx; + uint32_t sorted[MAX_PALETTE_SIZE]; + int idx_map[MAX_PALETTE_SIZE]; + PrepareMapToPalette(palette, palette_size, sorted, idx_map); + last_pix = palette[0]; + last_idx = 0; for (y = 0; y < height; ++y) { - MapToPalette(palette, palette_size, &last_pix, &last_idx, - src, tmp_row, width); + MapToPalette(sorted, palette_size, &last_pix, &last_idx, + idx_map, src, tmp_row, width); VP8LBundleColorMap(tmp_row, width, xbits, dst); src += src_stride; dst += dst_stride; @@ -1378,7 +1383,7 @@ static void VP8LEncoderDelete(VP8LEncoder* enc) { WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, const WebPPicture* const picture, - VP8LBitWriter* const bw) { + VP8LBitWriter* const bw, int use_cache) { WebPEncodingError err = VP8_ENC_OK; const int quality = (int)config->quality; const int low_effort = (config->method == 0); @@ -1405,7 +1410,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, } // Apply near-lossless preprocessing. - use_near_lossless = !enc->use_palette_ && (config->near_lossless < 100); + use_near_lossless = + (config->near_lossless < 100) && !enc->use_palette_ && !enc->use_predict_; if (use_near_lossless) { if (!VP8ApplyNearLossless(width, height, picture->argb, config->near_lossless)) { @@ -1457,7 +1463,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, if (enc->use_predict_) { err = ApplyPredictFilter(enc, enc->current_width_, height, quality, - low_effort, bw); + low_effort, enc->use_subtract_green_, bw); if (err != VP8_ENC_OK) goto Error; } @@ -1474,8 +1480,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // Encode and write the transformed image. err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, enc->current_width_, height, quality, low_effort, - &enc->cache_bits_, enc->histo_bits_, byte_position, - &hdr_size, &data_size); + use_cache, &enc->cache_bits_, enc->histo_bits_, + byte_position, &hdr_size, &data_size); if (err != VP8_ENC_OK) goto Error; if (picture->stats != NULL) { @@ -1560,7 +1566,7 @@ int VP8LEncodeImage(const WebPConfig* const config, if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort; // Encode main image stream. - err = VP8LEncodeStream(config, picture, &bw); + err = VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/); if (err != VP8_ENC_OK) goto Error; // TODO(skal): have a fine-grained progress report in VP8LEncodeStream(). diff --git a/src/3rdparty/libwebp/src/enc/vp8li.h b/src/3rdparty/libwebp/src/enc/vp8li.h index 6b6db12..371e276 100644 --- a/src/3rdparty/libwebp/src/enc/vp8li.h +++ b/src/3rdparty/libwebp/src/enc/vp8li.h @@ -25,14 +25,17 @@ extern "C" { #endif typedef struct { - const WebPConfig* config_; // user configuration and parameters - const WebPPicture* pic_; // input picture. + const WebPConfig* config_; // user configuration and parameters + const WebPPicture* pic_; // input picture. - uint32_t* argb_; // Transformed argb image data. - uint32_t* argb_scratch_; // Scratch memory for argb rows - // (used for prediction). - uint32_t* transform_data_; // Scratch memory for transform data. - int current_width_; // Corresponds to packed image width. + uint32_t* argb_; // Transformed argb image data. + uint32_t* argb_scratch_; // Scratch memory for argb rows + // (used for prediction). + uint32_t* transform_data_; // Scratch memory for transform data. + uint32_t* transform_mem_; // Currently allocated memory. + size_t transform_mem_size_; // Currently allocated memory size. + + int current_width_; // Corresponds to packed image width. // Encoding parameters derived from quality parameter. int histo_bits_; @@ -64,9 +67,10 @@ int VP8LEncodeImage(const WebPConfig* const config, const WebPPicture* const picture); // Encodes the main image stream using the supplied bit writer. +// If 'use_cache' is false, disables the use of color cache. WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, const WebPPicture* const picture, - VP8LBitWriter* const bw); + VP8LBitWriter* const bw, int use_cache); //------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/webpenc.c b/src/3rdparty/libwebp/src/enc/webpenc.c index fece736..a7d04ea 100644 --- a/src/3rdparty/libwebp/src/enc/webpenc.c +++ b/src/3rdparty/libwebp/src/enc/webpenc.c @@ -105,6 +105,10 @@ static void MapConfigToTools(VP8Encoder* const enc) { 256 * 16 * 16 * // upper bound: up to 16bit per 4x4 block (limit * limit) / (100 * 100); // ... modulated with a quadratic curve. + // partition0 = 512k max. + enc->mb_header_limit_ = + (score_t)256 * 510 * 8 * 1024 / (enc->mb_w_ * enc->mb_h_); + enc->thread_level_ = config->thread_level; enc->do_search_ = (config->target_size > 0 || config->target_PSNR > 0); |