diff options
author | Qt Forward Merge Bot <qt_forward_merge_bot@qt-project.org> | 2018-04-14 03:00:39 +0200 |
---|---|---|
committer | Qt Forward Merge Bot <qt_forward_merge_bot@qt-project.org> | 2018-04-14 03:00:39 +0200 |
commit | 4677c8927b24da8121b737856b3259914bcf8bb6 (patch) | |
tree | 937be94efda2fdafbb5de8863a868f5eade5f8b2 /src/3rdparty/libwebp/src/enc | |
parent | 7a9b7172693e8fd90bb601d73506619b64a68993 (diff) | |
parent | b83a0a9460432abb82218da247710a1aaf321336 (diff) |
Merge remote-tracking branch 'origin/5.11' into dev
Change-Id: Ib3426c01e32d7c1ec4bf82b00d67a34bbc16ccb2
Diffstat (limited to 'src/3rdparty/libwebp/src/enc')
30 files changed, 2461 insertions, 1911 deletions
diff --git a/src/3rdparty/libwebp/src/enc/alpha_enc.c b/src/3rdparty/libwebp/src/enc/alpha_enc.c index 5a2c931..7e8d87f 100644 --- a/src/3rdparty/libwebp/src/enc/alpha_enc.c +++ b/src/3rdparty/libwebp/src/enc/alpha_enc.c @@ -14,12 +14,12 @@ #include <assert.h> #include <stdlib.h> -#include "./vp8i_enc.h" -#include "../dsp/dsp.h" -#include "../utils/filters_utils.h" -#include "../utils/quant_levels_utils.h" -#include "../utils/utils.h" -#include "../webp/format_constants.h" +#include "src/enc/vp8i_enc.h" +#include "src/dsp/dsp.h" +#include "src/utils/filters_utils.h" +#include "src/utils/quant_levels_utils.h" +#include "src/utils/utils.h" +#include "src/webp/format_constants.h" // ----------------------------------------------------------------------------- // Encodes the given alpha data via specified compression method 'method'. @@ -44,11 +44,11 @@ // invalid quality or method, or // memory allocation for the compressed data fails. -#include "../enc/vp8li_enc.h" +#include "src/enc/vp8li_enc.h" static int EncodeLossless(const uint8_t* const data, int width, int height, int effort_level, // in [0..6] range - VP8LBitWriter* const bw, + int use_quality_100, VP8LBitWriter* const bw, WebPAuxStats* const stats) { int ok = 0; WebPConfig config; @@ -76,7 +76,10 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, // Set a low default quality for encoding alpha. Ensure that Alpha quality at // lower methods (3 and below) is less than the threshold for triggering // costly 'BackwardReferencesTraceBackwards'. - config.quality = 8.f * effort_level; + // If the alpha quality is set to 100 and the method to 6, allow for a high + // lossless quality to trigger the cruncher. + config.quality = + (use_quality_100 && effort_level == 6) ? 100 : 8.f * effort_level; assert(config.quality >= 0 && config.quality <= 100.f); // TODO(urvang): Temporary fix to avoid generating images that trigger @@ -134,7 +137,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, if (method != ALPHA_NO_COMPRESSION) { ok = VP8LBitWriterInit(&tmp_bw, data_size >> 3); ok = ok && EncodeLossless(alpha_src, width, height, effort_level, - &tmp_bw, &result->stats); + !reduce_levels, &tmp_bw, &result->stats); if (ok) { output = VP8LBitWriterFinish(&tmp_bw); output_size = VP8LBitWriterNumBytes(&tmp_bw); @@ -264,6 +267,7 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height, reduce_levels, effort_level, NULL, &best); } if (ok) { +#if !defined(WEBP_DISABLE_STATS) if (stats != NULL) { stats->lossless_features = best.stats.lossless_features; stats->histogram_bits = best.stats.histogram_bits; @@ -274,6 +278,9 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height, stats->lossless_hdr_size = best.stats.lossless_hdr_size; stats->lossless_data_size = best.stats.lossless_data_size; } +#else + (void)stats; +#endif *output_size = VP8BitWriterSize(&best.bw); *output = VP8BitWriterBuf(&best.bw); } else { @@ -339,10 +346,12 @@ static int EncodeAlpha(VP8Encoder* const enc, ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method, filter, reduce_levels, effort_level, output, output_size, pic->stats); +#if !defined(WEBP_DISABLE_STATS) if (pic->stats != NULL) { // need stats? pic->stats->coded_size += (int)(*output_size); enc->sse_[3] = sse; } +#endif } WebPSafeFree(quant_alpha); diff --git a/src/3rdparty/libwebp/src/enc/analysis_enc.c b/src/3rdparty/libwebp/src/enc/analysis_enc.c index dce159b..08f471f 100644 --- a/src/3rdparty/libwebp/src/enc/analysis_enc.c +++ b/src/3rdparty/libwebp/src/enc/analysis_enc.c @@ -15,9 +15,9 @@ #include <string.h> #include <assert.h> -#include "./vp8i_enc.h" -#include "./cost_enc.h" -#include "../utils/utils.h" +#include "src/enc/vp8i_enc.h" +#include "src/enc/cost_enc.h" +#include "src/utils/utils.h" #define MAX_ITERS_K_MEANS 6 diff --git a/src/3rdparty/libwebp/src/enc/backward_references_cost_enc.c b/src/3rdparty/libwebp/src/enc/backward_references_cost_enc.c new file mode 100644 index 0000000..7175496 --- /dev/null +++ b/src/3rdparty/libwebp/src/enc/backward_references_cost_enc.c @@ -0,0 +1,790 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Improves a given set of backward references by analyzing its bit cost. +// The algorithm is similar to the Zopfli compression algorithm but tailored to +// images. +// +// Author: Vincent Rabaud (vrabaud@google.com) +// + +#include <assert.h> + +#include "src/enc/backward_references_enc.h" +#include "src/enc/histogram_enc.h" +#include "src/dsp/lossless_common.h" +#include "src/utils/color_cache_utils.h" +#include "src/utils/utils.h" + +#define VALUES_IN_BYTE 256 + +extern void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs); +extern int VP8LDistanceToPlaneCode(int xsize, int dist); +extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, + const PixOrCopy v); + +typedef struct { + double alpha_[VALUES_IN_BYTE]; + double red_[VALUES_IN_BYTE]; + double blue_[VALUES_IN_BYTE]; + double distance_[NUM_DISTANCE_CODES]; + double* literal_; +} CostModel; + +static void ConvertPopulationCountTableToBitEstimates( + int num_symbols, const uint32_t population_counts[], double output[]) { + uint32_t sum = 0; + int nonzeros = 0; + int i; + for (i = 0; i < num_symbols; ++i) { + sum += population_counts[i]; + if (population_counts[i] > 0) { + ++nonzeros; + } + } + if (nonzeros <= 1) { + memset(output, 0, num_symbols * sizeof(*output)); + } else { + const double logsum = VP8LFastLog2(sum); + for (i = 0; i < num_symbols; ++i) { + output[i] = logsum - VP8LFastLog2(population_counts[i]); + } + } +} + +static int CostModelBuild(CostModel* const m, int xsize, int cache_bits, + const VP8LBackwardRefs* const refs) { + int ok = 0; + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits); + if (histo == NULL) goto Error; + + // The following code is similar to VP8LHistogramCreate but converts the + // distance to plane code. + VP8LHistogramInit(histo, cache_bits); + while (VP8LRefsCursorOk(&c)) { + VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos, VP8LDistanceToPlaneCode, + xsize); + VP8LRefsCursorNext(&c); + } + + ConvertPopulationCountTableToBitEstimates( + VP8LHistogramNumCodes(histo->palette_code_bits_), + histo->literal_, m->literal_); + ConvertPopulationCountTableToBitEstimates( + VALUES_IN_BYTE, histo->red_, m->red_); + ConvertPopulationCountTableToBitEstimates( + VALUES_IN_BYTE, histo->blue_, m->blue_); + ConvertPopulationCountTableToBitEstimates( + VALUES_IN_BYTE, histo->alpha_, m->alpha_); + ConvertPopulationCountTableToBitEstimates( + NUM_DISTANCE_CODES, histo->distance_, m->distance_); + ok = 1; + + Error: + VP8LFreeHistogram(histo); + return ok; +} + +static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) { + return m->alpha_[v >> 24] + + m->red_[(v >> 16) & 0xff] + + m->literal_[(v >> 8) & 0xff] + + m->blue_[v & 0xff]; +} + +static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) { + const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx; + return m->literal_[literal_idx]; +} + +static WEBP_INLINE double GetLengthCost(const CostModel* const m, + uint32_t length) { + int code, extra_bits; + VP8LPrefixEncodeBits(length, &code, &extra_bits); + return m->literal_[VALUES_IN_BYTE + code] + extra_bits; +} + +static WEBP_INLINE double GetDistanceCost(const CostModel* const m, + uint32_t distance) { + int code, extra_bits; + VP8LPrefixEncodeBits(distance, &code, &extra_bits); + return m->distance_[code] + extra_bits; +} + +static WEBP_INLINE void AddSingleLiteralWithCostModel( + const uint32_t* const argb, VP8LColorCache* const hashers, + const CostModel* const cost_model, int idx, int use_color_cache, + float prev_cost, float* const cost, uint16_t* const dist_array) { + double cost_val = prev_cost; + const uint32_t color = argb[idx]; + const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1; + if (ix >= 0) { + // use_color_cache is true and hashers contains color + const double mul0 = 0.68; + cost_val += GetCacheCost(cost_model, ix) * mul0; + } else { + const double mul1 = 0.82; + if (use_color_cache) VP8LColorCacheInsert(hashers, color); + cost_val += GetLiteralCost(cost_model, color) * mul1; + } + if (cost[idx] > cost_val) { + cost[idx] = (float)cost_val; + dist_array[idx] = 1; // only one is inserted. + } +} + +// ----------------------------------------------------------------------------- +// CostManager and interval handling + +// Empirical value to avoid high memory consumption but good for performance. +#define COST_CACHE_INTERVAL_SIZE_MAX 500 + +// To perform backward reference every pixel at index index_ is considered and +// the cost for the MAX_LENGTH following pixels computed. Those following pixels +// at index index_ + k (k from 0 to MAX_LENGTH) have a cost of: +// cost_ = distance cost at index + GetLengthCost(cost_model, k) +// and the minimum value is kept. GetLengthCost(cost_model, k) is cached in an +// array of size MAX_LENGTH. +// Instead of performing MAX_LENGTH comparisons per pixel, we keep track of the +// minimal values using intervals of constant cost. +// An interval is defined by the index_ of the pixel that generated it and +// is only useful in a range of indices from start_ to end_ (exclusive), i.e. +// it contains the minimum value for pixels between start_ and end_. +// Intervals are stored in a linked list and ordered by start_. When a new +// interval has a better value, old intervals are split or removed. There are +// therefore no overlapping intervals. +typedef struct CostInterval CostInterval; +struct CostInterval { + float cost_; + int start_; + int end_; + int index_; + CostInterval* previous_; + CostInterval* next_; +}; + +// The GetLengthCost(cost_model, k) are cached in a CostCacheInterval. +typedef struct { + double cost_; + int start_; + int end_; // Exclusive. +} CostCacheInterval; + +// This structure is in charge of managing intervals and costs. +// It caches the different CostCacheInterval, caches the different +// GetLengthCost(cost_model, k) in cost_cache_ and the CostInterval's (whose +// count_ is limited by COST_CACHE_INTERVAL_SIZE_MAX). +#define COST_MANAGER_MAX_FREE_LIST 10 +typedef struct { + CostInterval* head_; + int count_; // The number of stored intervals. + CostCacheInterval* cache_intervals_; + size_t cache_intervals_size_; + double cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). + float* costs_; + uint16_t* dist_array_; + // Most of the time, we only need few intervals -> use a free-list, to avoid + // fragmentation with small allocs in most common cases. + CostInterval intervals_[COST_MANAGER_MAX_FREE_LIST]; + CostInterval* free_intervals_; + // These are regularly malloc'd remains. This list can't grow larger than than + // size COST_CACHE_INTERVAL_SIZE_MAX - COST_MANAGER_MAX_FREE_LIST, note. + CostInterval* recycled_intervals_; +} CostManager; + +static void CostIntervalAddToFreeList(CostManager* const manager, + CostInterval* const interval) { + interval->next_ = manager->free_intervals_; + manager->free_intervals_ = interval; +} + +static int CostIntervalIsInFreeList(const CostManager* const manager, + const CostInterval* const interval) { + return (interval >= &manager->intervals_[0] && + interval <= &manager->intervals_[COST_MANAGER_MAX_FREE_LIST - 1]); +} + +static void CostManagerInitFreeList(CostManager* const manager) { + int i; + manager->free_intervals_ = NULL; + for (i = 0; i < COST_MANAGER_MAX_FREE_LIST; ++i) { + CostIntervalAddToFreeList(manager, &manager->intervals_[i]); + } +} + +static void DeleteIntervalList(CostManager* const manager, + const CostInterval* interval) { + while (interval != NULL) { + const CostInterval* const next = interval->next_; + if (!CostIntervalIsInFreeList(manager, interval)) { + WebPSafeFree((void*)interval); + } // else: do nothing + interval = next; + } +} + +static void CostManagerClear(CostManager* const manager) { + if (manager == NULL) return; + + WebPSafeFree(manager->costs_); + WebPSafeFree(manager->cache_intervals_); + + // Clear the interval lists. + DeleteIntervalList(manager, manager->head_); + manager->head_ = NULL; + DeleteIntervalList(manager, manager->recycled_intervals_); + manager->recycled_intervals_ = NULL; + + // Reset pointers, count_ and cache_intervals_size_. + memset(manager, 0, sizeof(*manager)); + CostManagerInitFreeList(manager); +} + +static int CostManagerInit(CostManager* const manager, + uint16_t* const dist_array, int pix_count, + const CostModel* const cost_model) { + int i; + const int cost_cache_size = (pix_count > MAX_LENGTH) ? MAX_LENGTH : pix_count; + + manager->costs_ = NULL; + manager->cache_intervals_ = NULL; + manager->head_ = NULL; + manager->recycled_intervals_ = NULL; + manager->count_ = 0; + manager->dist_array_ = dist_array; + CostManagerInitFreeList(manager); + + // Fill in the cost_cache_. + manager->cache_intervals_size_ = 1; + manager->cost_cache_[0] = GetLengthCost(cost_model, 0); + for (i = 1; i < cost_cache_size; ++i) { + manager->cost_cache_[i] = GetLengthCost(cost_model, i); + // Get the number of bound intervals. + if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) { + ++manager->cache_intervals_size_; + } + } + + // With the current cost model, we usually have below 20 intervals. + // The worst case scenario with a cost model would be if every length has a + // different cost, hence MAX_LENGTH but that is impossible with the current + // implementation that spirals around a pixel. + assert(manager->cache_intervals_size_ <= MAX_LENGTH); + manager->cache_intervals_ = (CostCacheInterval*)WebPSafeMalloc( + manager->cache_intervals_size_, sizeof(*manager->cache_intervals_)); + if (manager->cache_intervals_ == NULL) { + CostManagerClear(manager); + return 0; + } + + // Fill in the cache_intervals_. + { + CostCacheInterval* cur = manager->cache_intervals_; + + // Consecutive values in cost_cache_ are compared and if a big enough + // difference is found, a new interval is created and bounded. + cur->start_ = 0; + cur->end_ = 1; + cur->cost_ = manager->cost_cache_[0]; + for (i = 1; i < cost_cache_size; ++i) { + const double cost_val = manager->cost_cache_[i]; + if (cost_val != cur->cost_) { + ++cur; + // Initialize an interval. + cur->start_ = i; + cur->cost_ = cost_val; + } + cur->end_ = i + 1; + } + } + + manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_)); + if (manager->costs_ == NULL) { + CostManagerClear(manager); + return 0; + } + // Set the initial costs_ high for every pixel as we will keep the minimum. + for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f; + + return 1; +} + +// Given the cost and the position that define an interval, update the cost at +// pixel 'i' if it is smaller than the previously computed value. +static WEBP_INLINE void UpdateCost(CostManager* const manager, int i, + int position, float cost) { + const int k = i - position; + assert(k >= 0 && k < MAX_LENGTH); + + if (manager->costs_[i] > cost) { + manager->costs_[i] = cost; + manager->dist_array_[i] = k + 1; + } +} + +// Given the cost and the position that define an interval, update the cost for +// all the pixels between 'start' and 'end' excluded. +static WEBP_INLINE void UpdateCostPerInterval(CostManager* const manager, + int start, int end, int position, + float cost) { + int i; + for (i = start; i < end; ++i) UpdateCost(manager, i, position, cost); +} + +// Given two intervals, make 'prev' be the previous one of 'next' in 'manager'. +static WEBP_INLINE void ConnectIntervals(CostManager* const manager, + CostInterval* const prev, + CostInterval* const next) { + if (prev != NULL) { + prev->next_ = next; + } else { + manager->head_ = next; + } + + if (next != NULL) next->previous_ = prev; +} + +// Pop an interval in the manager. +static WEBP_INLINE void PopInterval(CostManager* const manager, + CostInterval* const interval) { + if (interval == NULL) return; + + ConnectIntervals(manager, interval->previous_, interval->next_); + if (CostIntervalIsInFreeList(manager, interval)) { + CostIntervalAddToFreeList(manager, interval); + } else { // recycle regularly malloc'd intervals too + interval->next_ = manager->recycled_intervals_; + manager->recycled_intervals_ = interval; + } + --manager->count_; + assert(manager->count_ >= 0); +} + +// Update the cost at index i by going over all the stored intervals that +// overlap with i. +// If 'do_clean_intervals' is set to something different than 0, intervals that +// end before 'i' will be popped. +static WEBP_INLINE void UpdateCostAtIndex(CostManager* const manager, int i, + int do_clean_intervals) { + CostInterval* current = manager->head_; + + while (current != NULL && current->start_ <= i) { + CostInterval* const next = current->next_; + if (current->end_ <= i) { + if (do_clean_intervals) { + // We have an outdated interval, remove it. + PopInterval(manager, current); + } + } else { + UpdateCost(manager, i, current->index_, current->cost_); + } + current = next; + } +} + +// Given a current orphan interval and its previous interval, before +// it was orphaned (which can be NULL), set it at the right place in the list +// of intervals using the start_ ordering and the previous interval as a hint. +static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager, + CostInterval* const current, + CostInterval* previous) { + assert(current != NULL); + + if (previous == NULL) previous = manager->head_; + while (previous != NULL && current->start_ < previous->start_) { + previous = previous->previous_; + } + while (previous != NULL && previous->next_ != NULL && + previous->next_->start_ < current->start_) { + previous = previous->next_; + } + + if (previous != NULL) { + ConnectIntervals(manager, current, previous->next_); + } else { + ConnectIntervals(manager, current, manager->head_); + } + ConnectIntervals(manager, previous, current); +} + +// Insert an interval in the list contained in the manager by starting at +// interval_in as a hint. The intervals are sorted by start_ value. +static WEBP_INLINE void InsertInterval(CostManager* const manager, + CostInterval* const interval_in, + float cost, int position, int start, + int end) { + CostInterval* interval_new; + + if (start >= end) return; + if (manager->count_ >= COST_CACHE_INTERVAL_SIZE_MAX) { + // Serialize the interval if we cannot store it. + UpdateCostPerInterval(manager, start, end, position, cost); + return; + } + if (manager->free_intervals_ != NULL) { + interval_new = manager->free_intervals_; + manager->free_intervals_ = interval_new->next_; + } else if (manager->recycled_intervals_ != NULL) { + interval_new = manager->recycled_intervals_; + manager->recycled_intervals_ = interval_new->next_; + } else { // malloc for good + interval_new = (CostInterval*)WebPSafeMalloc(1, sizeof(*interval_new)); + if (interval_new == NULL) { + // Write down the interval if we cannot create it. + UpdateCostPerInterval(manager, start, end, position, cost); + return; + } + } + + interval_new->cost_ = cost; + interval_new->index_ = position; + interval_new->start_ = start; + interval_new->end_ = end; + PositionOrphanInterval(manager, interval_new, interval_in); + + ++manager->count_; +} + +// Given a new cost interval defined by its start at position, its length value +// and distance_cost, add its contributions to the previous intervals and costs. +// If handling the interval or one of its subintervals becomes to heavy, its +// contribution is added to the costs right away. +static WEBP_INLINE void PushInterval(CostManager* const manager, + double distance_cost, int position, + int len) { + size_t i; + CostInterval* interval = manager->head_; + CostInterval* interval_next; + const CostCacheInterval* const cost_cache_intervals = + manager->cache_intervals_; + // If the interval is small enough, no need to deal with the heavy + // interval logic, just serialize it right away. This constant is empirical. + const int kSkipDistance = 10; + + if (len < kSkipDistance) { + int j; + for (j = position; j < position + len; ++j) { + const int k = j - position; + float cost_tmp; + assert(k >= 0 && k < MAX_LENGTH); + cost_tmp = (float)(distance_cost + manager->cost_cache_[k]); + + if (manager->costs_[j] > cost_tmp) { + manager->costs_[j] = cost_tmp; + manager->dist_array_[j] = k + 1; + } + } + return; + } + + for (i = 0; i < manager->cache_intervals_size_ && + cost_cache_intervals[i].start_ < len; + ++i) { + // Define the intersection of the ith interval with the new one. + int start = position + cost_cache_intervals[i].start_; + const int end = position + (cost_cache_intervals[i].end_ > len + ? len + : cost_cache_intervals[i].end_); + const float cost = (float)(distance_cost + cost_cache_intervals[i].cost_); + + for (; interval != NULL && interval->start_ < end; + interval = interval_next) { + interval_next = interval->next_; + + // Make sure we have some overlap + if (start >= interval->end_) continue; + + if (cost >= interval->cost_) { + // When intervals are represented, the lower, the better. + // [**********************************************************[ + // start end + // [----------------------------------[ + // interval->start_ interval->end_ + // If we are worse than what we already have, add whatever we have so + // far up to interval. + const int start_new = interval->end_; + InsertInterval(manager, interval, cost, position, start, + interval->start_); + start = start_new; + if (start >= end) break; + continue; + } + + if (start <= interval->start_) { + if (interval->end_ <= end) { + // [----------------------------------[ + // interval->start_ interval->end_ + // [**************************************************************[ + // start end + // We can safely remove the old interval as it is fully included. + PopInterval(manager, interval); + } else { + // [------------------------------------[ + // interval->start_ interval->end_ + // [*****************************[ + // start end + interval->start_ = end; + break; + } + } else { + if (end < interval->end_) { + // [--------------------------------------------------------------[ + // interval->start_ interval->end_ + // [*****************************[ + // start end + // We have to split the old interval as it fully contains the new one. + const int end_original = interval->end_; + interval->end_ = start; + InsertInterval(manager, interval, interval->cost_, interval->index_, + end, end_original); + interval = interval->next_; + break; + } else { + // [------------------------------------[ + // interval->start_ interval->end_ + // [*****************************[ + // start end + interval->end_ = start; + } + } + } + // Insert the remaining interval from start to end. + InsertInterval(manager, interval, cost, position, start, end); + } +} + +static int BackwardReferencesHashChainDistanceOnly( + int xsize, int ysize, const uint32_t* const argb, int cache_bits, + const VP8LHashChain* const hash_chain, const VP8LBackwardRefs* const refs, + uint16_t* const dist_array) { + int i; + int ok = 0; + int cc_init = 0; + const int pix_count = xsize * ysize; + const int use_color_cache = (cache_bits > 0); + const size_t literal_array_size = + sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES + + ((cache_bits > 0) ? (1 << cache_bits) : 0)); + const size_t cost_model_size = sizeof(CostModel) + literal_array_size; + CostModel* const cost_model = + (CostModel*)WebPSafeCalloc(1ULL, cost_model_size); + VP8LColorCache hashers; + CostManager* cost_manager = + (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager)); + int offset_prev = -1, len_prev = -1; + double offset_cost = -1; + int first_offset_is_constant = -1; // initialized with 'impossible' value + int reach = 0; + + if (cost_model == NULL || cost_manager == NULL) goto Error; + + cost_model->literal_ = (double*)(cost_model + 1); + if (use_color_cache) { + cc_init = VP8LColorCacheInit(&hashers, cache_bits); + if (!cc_init) goto Error; + } + + if (!CostModelBuild(cost_model, xsize, cache_bits, refs)) { + goto Error; + } + + if (!CostManagerInit(cost_manager, dist_array, pix_count, cost_model)) { + goto Error; + } + + // We loop one pixel at a time, but store all currently best points to + // non-processed locations from this point. + dist_array[0] = 0; + // Add first pixel as literal. + AddSingleLiteralWithCostModel(argb, &hashers, cost_model, 0, use_color_cache, + 0.f, cost_manager->costs_, dist_array); + + for (i = 1; i < pix_count; ++i) { + const float prev_cost = cost_manager->costs_[i - 1]; + int offset, len; + VP8LHashChainFindCopy(hash_chain, i, &offset, &len); + + // Try adding the pixel as a literal. + AddSingleLiteralWithCostModel(argb, &hashers, cost_model, i, + use_color_cache, prev_cost, + cost_manager->costs_, dist_array); + + // If we are dealing with a non-literal. + if (len >= 2) { + if (offset != offset_prev) { + const int code = VP8LDistanceToPlaneCode(xsize, offset); + offset_cost = GetDistanceCost(cost_model, code); + first_offset_is_constant = 1; + PushInterval(cost_manager, prev_cost + offset_cost, i, len); + } else { + assert(offset_cost >= 0); + assert(len_prev >= 0); + assert(first_offset_is_constant == 0 || first_offset_is_constant == 1); + // Instead of considering all contributions from a pixel i by calling: + // PushInterval(cost_manager, prev_cost + offset_cost, i, len); + // we optimize these contributions in case offset_cost stays the same + // for consecutive pixels. This describes a set of pixels similar to a + // previous set (e.g. constant color regions). + if (first_offset_is_constant) { + reach = i - 1 + len_prev - 1; + first_offset_is_constant = 0; + } + + if (i + len - 1 > reach) { + // We can only be go further with the same offset if the previous + // length was maxed, hence len_prev == len == MAX_LENGTH. + // TODO(vrabaud), bump i to the end right away (insert cache and + // update cost). + // TODO(vrabaud), check if one of the points in between does not have + // a lower cost. + // Already consider the pixel at "reach" to add intervals that are + // better than whatever we add. + int offset_j, len_j = 0; + int j; + assert(len == MAX_LENGTH || len == pix_count - i); + // Figure out the last consecutive pixel within [i, reach + 1] with + // the same offset. + for (j = i; j <= reach; ++j) { + VP8LHashChainFindCopy(hash_chain, j + 1, &offset_j, &len_j); + if (offset_j != offset) { + VP8LHashChainFindCopy(hash_chain, j, &offset_j, &len_j); + break; + } + } + // Update the cost at j - 1 and j. + UpdateCostAtIndex(cost_manager, j - 1, 0); + UpdateCostAtIndex(cost_manager, j, 0); + + PushInterval(cost_manager, cost_manager->costs_[j - 1] + offset_cost, + j, len_j); + reach = j + len_j - 1; + } + } + } + + UpdateCostAtIndex(cost_manager, i, 1); + offset_prev = offset; + len_prev = len; + } + + ok = !refs->error_; +Error: + if (cc_init) VP8LColorCacheClear(&hashers); + CostManagerClear(cost_manager); + WebPSafeFree(cost_model); + WebPSafeFree(cost_manager); + return ok; +} + +// We pack the path at the end of *dist_array and return +// a pointer to this part of the array. Example: +// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232] +static void TraceBackwards(uint16_t* const dist_array, + int dist_array_size, + uint16_t** const chosen_path, + int* const chosen_path_size) { + uint16_t* path = dist_array + dist_array_size; + uint16_t* cur = dist_array + dist_array_size - 1; + while (cur >= dist_array) { + const int k = *cur; + --path; + *path = k; + cur -= k; + } + *chosen_path = path; + *chosen_path_size = (int)(dist_array + dist_array_size - path); +} + +static int BackwardReferencesHashChainFollowChosenPath( + const uint32_t* const argb, int cache_bits, + const uint16_t* const chosen_path, int chosen_path_size, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { + const int use_color_cache = (cache_bits > 0); + int ix; + int i = 0; + int ok = 0; + int cc_init = 0; + VP8LColorCache hashers; + + if (use_color_cache) { + cc_init = VP8LColorCacheInit(&hashers, cache_bits); + if (!cc_init) goto Error; + } + + VP8LClearBackwardRefs(refs); + for (ix = 0; ix < chosen_path_size; ++ix) { + const int len = chosen_path[ix]; + if (len != 1) { + int k; + const int offset = VP8LHashChainFindOffset(hash_chain, i); + VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); + if (use_color_cache) { + for (k = 0; k < len; ++k) { + VP8LColorCacheInsert(&hashers, argb[i + k]); + } + } + i += len; + } else { + PixOrCopy v; + const int idx = + use_color_cache ? VP8LColorCacheContains(&hashers, argb[i]) : -1; + if (idx >= 0) { + // use_color_cache is true and hashers contains argb[i] + // push pixel as a color cache index + v = PixOrCopyCreateCacheIdx(idx); + } else { + if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]); + v = PixOrCopyCreateLiteral(argb[i]); + } + VP8LBackwardRefsCursorAdd(refs, v); + ++i; + } + } + ok = !refs->error_; + Error: + if (cc_init) VP8LColorCacheClear(&hashers); + return ok; +} + +// Returns 1 on success. +extern int VP8LBackwardReferencesTraceBackwards( + int xsize, int ysize, const uint32_t* const argb, int cache_bits, + const VP8LHashChain* const hash_chain, + const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst); +int VP8LBackwardReferencesTraceBackwards(int xsize, int ysize, + const uint32_t* const argb, + int cache_bits, + const VP8LHashChain* const hash_chain, + const VP8LBackwardRefs* const refs_src, + VP8LBackwardRefs* const refs_dst) { + int ok = 0; + const int dist_array_size = xsize * ysize; + uint16_t* chosen_path = NULL; + int chosen_path_size = 0; + uint16_t* dist_array = + (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array)); + + if (dist_array == NULL) goto Error; + + if (!BackwardReferencesHashChainDistanceOnly( + xsize, ysize, argb, cache_bits, hash_chain, refs_src, dist_array)) { + goto Error; + } + TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size); + if (!BackwardReferencesHashChainFollowChosenPath( + argb, cache_bits, chosen_path, chosen_path_size, hash_chain, + refs_dst)) { + goto Error; + } + ok = 1; + Error: + WebPSafeFree(dist_array); + return ok; +} diff --git a/src/3rdparty/libwebp/src/enc/backward_references_enc.c b/src/3rdparty/libwebp/src/enc/backward_references_enc.c index 7c0559f..3923018 100644 --- a/src/3rdparty/libwebp/src/enc/backward_references_enc.c +++ b/src/3rdparty/libwebp/src/enc/backward_references_enc.c @@ -13,35 +13,24 @@ #include <assert.h> #include <math.h> -#include "./backward_references_enc.h" -#include "./histogram_enc.h" -#include "../dsp/lossless.h" -#include "../dsp/lossless_common.h" -#include "../dsp/dsp.h" -#include "../utils/color_cache_utils.h" -#include "../utils/utils.h" - -#define VALUES_IN_BYTE 256 +#include "src/enc/backward_references_enc.h" +#include "src/enc/histogram_enc.h" +#include "src/dsp/lossless.h" +#include "src/dsp/lossless_common.h" +#include "src/dsp/dsp.h" +#include "src/utils/color_cache_utils.h" +#include "src/utils/utils.h" #define MIN_BLOCK_SIZE 256 // minimum block size for backward references #define MAX_ENTROPY (1e30f) // 1M window (4M bytes) minus 120 special codes for short distances. -#define WINDOW_SIZE_BITS 20 #define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120) // Minimum number of pixels for which it is cheaper to encode a // distance + length instead of each pixel as a literal. #define MIN_LENGTH 4 -// If you change this, you need MAX_LENGTH_BITS + WINDOW_SIZE_BITS <= 32 as it -// is used in VP8LHashChain. -#define MAX_LENGTH_BITS 12 -// We want the max value to be attainable and stored in MAX_LENGTH_BITS bits. -#define MAX_LENGTH ((1 << MAX_LENGTH_BITS) - 1) -#if MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32 -#error "MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32" -#endif // ----------------------------------------------------------------------------- @@ -56,7 +45,8 @@ static const uint8_t plane_to_code_lut[128] = { 119, 116, 111, 106, 97, 88, 84, 74, 72, 75, 85, 89, 98, 107, 112, 117 }; -static int DistanceToPlaneCode(int xsize, int dist) { +extern int VP8LDistanceToPlaneCode(int xsize, int dist); +int VP8LDistanceToPlaneCode(int xsize, int dist) { const int yoffset = dist / xsize; const int xoffset = dist - yoffset * xsize; if (xoffset <= 8 && yoffset < 8) { @@ -91,7 +81,8 @@ struct PixOrCopyBlock { int size_; // currently used size }; -static void ClearBackwardRefs(VP8LBackwardRefs* const refs) { +extern void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs); +void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) { assert(refs != NULL); if (refs->tail_ != NULL) { *refs->tail_ = refs->free_blocks_; // recycle all blocks at once @@ -104,7 +95,7 @@ static void ClearBackwardRefs(VP8LBackwardRefs* const refs) { void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) { assert(refs != NULL); - ClearBackwardRefs(refs); + VP8LClearBackwardRefs(refs); while (refs->free_blocks_ != NULL) { PixOrCopyBlock* const next = refs->free_blocks_->next_; WebPSafeFree(refs->free_blocks_); @@ -163,8 +154,10 @@ static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) { return b; } -static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs, - const PixOrCopy v) { +extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, + const PixOrCopy v); +void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, + const PixOrCopy v) { PixOrCopyBlock* b = refs->last_block_; if (b == NULL || b->size_ == refs->block_size_) { b = BackwardRefsNewBlock(refs); @@ -173,21 +166,6 @@ static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs, b->start_[b->size_++] = v; } -int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, - VP8LBackwardRefs* const dst) { - const PixOrCopyBlock* b = src->refs_; - ClearBackwardRefs(dst); - assert(src->block_size_ == dst->block_size_); - while (b != NULL) { - PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst); - if (new_b == NULL) return 0; // dst->error_ is set - memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_)); - new_b->size_ = b->size_; - b = b->next_; - } - return 1; -} - // ----------------------------------------------------------------------------- // Hash chains @@ -411,24 +389,6 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, return 1; } -static WEBP_INLINE int HashChainFindOffset(const VP8LHashChain* const p, - const int base_position) { - return p->offset_length_[base_position] >> MAX_LENGTH_BITS; -} - -static WEBP_INLINE int HashChainFindLength(const VP8LHashChain* const p, - const int base_position) { - return p->offset_length_[base_position] & ((1U << MAX_LENGTH_BITS) - 1); -} - -static WEBP_INLINE void HashChainFindCopy(const VP8LHashChain* const p, - int base_position, - int* const offset_ptr, - int* const length_ptr) { - *offset_ptr = HashChainFindOffset(p, base_position); - *length_ptr = HashChainFindLength(p, base_position); -} - static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache, VP8LColorCache* const hashers, VP8LBackwardRefs* const refs) { @@ -444,7 +404,7 @@ static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache, } else { v = PixOrCopyCreateLiteral(pixel); } - BackwardRefsCursorAdd(refs, v); + VP8LBackwardRefsCursorAdd(refs, v); } static int BackwardReferencesRle(int xsize, int ysize, @@ -458,7 +418,7 @@ static int BackwardReferencesRle(int xsize, int ysize, if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) { return 0; } - ClearBackwardRefs(refs); + VP8LClearBackwardRefs(refs); // Add first pixel as literal. AddSingleLiteral(argb[0], use_color_cache, &hashers, refs); i = 1; @@ -468,13 +428,13 @@ static int BackwardReferencesRle(int xsize, int ysize, const int prev_row_len = (i < xsize) ? 0 : FindMatchLength(argb + i, argb + i - xsize, 0, max_len); if (rle_len >= prev_row_len && rle_len >= MIN_LENGTH) { - BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len)); + VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len)); // We don't need to update the color cache here since it is always the // same pixel being copied, and that does not change the color cache // state. i += rle_len; } else if (prev_row_len >= MIN_LENGTH) { - BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len)); + VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len)); if (use_color_cache) { for (k = 0; k < prev_row_len; ++k) { VP8LColorCacheInsert(&hashers, argb[i + k]); @@ -506,17 +466,18 @@ static int BackwardReferencesLz77(int xsize, int ysize, cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } - ClearBackwardRefs(refs); + VP8LClearBackwardRefs(refs); for (i = 0; i < pix_count;) { // Alternative#1: Code the pixels starting at 'i' using backward reference. int offset = 0; int len = 0; int j; - HashChainFindCopy(hash_chain, i, &offset, &len); + VP8LHashChainFindCopy(hash_chain, i, &offset, &len); if (len >= MIN_LENGTH) { const int len_ini = len; int max_reach = 0; - assert(i + len < pix_count); + const int j_max = + (i + len_ini >= pix_count) ? pix_count - 1 : i + len_ini; // Only start from what we have not checked already. i_last_check = (i > i_last_check) ? i : i_last_check; // We know the best match for the current pixel but we try to find the @@ -525,13 +486,14 @@ static int BackwardReferencesLz77(int xsize, int ysize, // [i,i+len) + [i+len, length of best match at i+len) // while we check if we can use: // [i,j) (where j<=i+len) + [j, length of best match at j) - for (j = i_last_check + 1; j <= i + len_ini; ++j) { - const int len_j = HashChainFindLength(hash_chain, j); + for (j = i_last_check + 1; j <= j_max; ++j) { + const int len_j = VP8LHashChainFindLength(hash_chain, j); const int reach = j + (len_j >= MIN_LENGTH ? len_j : 1); // 1 for single literal. if (reach > max_reach) { len = j - i; max_reach = reach; + if (max_reach >= pix_count) break; } } } else { @@ -542,7 +504,7 @@ static int BackwardReferencesLz77(int xsize, int ysize, if (len == 1) { AddSingleLiteral(argb[i], use_color_cache, &hashers, refs); } else { - BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); + VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); if (use_color_cache) { for (j = i; j < i + len; ++j) VP8LColorCacheInsert(&hashers, argb[j]); } @@ -556,989 +518,200 @@ static int BackwardReferencesLz77(int xsize, int ysize, return ok; } -// ----------------------------------------------------------------------------- - -typedef struct { - double alpha_[VALUES_IN_BYTE]; - double red_[VALUES_IN_BYTE]; - double blue_[VALUES_IN_BYTE]; - double distance_[NUM_DISTANCE_CODES]; - double* literal_; -} CostModel; - -static int BackwardReferencesTraceBackwards( - int xsize, int ysize, const uint32_t* const argb, int quality, - int cache_bits, const VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs); - -static void ConvertPopulationCountTableToBitEstimates( - int num_symbols, const uint32_t population_counts[], double output[]) { - uint32_t sum = 0; - int nonzeros = 0; - int i; - for (i = 0; i < num_symbols; ++i) { - sum += population_counts[i]; - if (population_counts[i] > 0) { - ++nonzeros; - } - } - if (nonzeros <= 1) { - memset(output, 0, num_symbols * sizeof(*output)); - } else { - const double logsum = VP8LFastLog2(sum); - for (i = 0; i < num_symbols; ++i) { - output[i] = logsum - VP8LFastLog2(population_counts[i]); - } - } -} - -static int CostModelBuild(CostModel* const m, int cache_bits, - VP8LBackwardRefs* const refs) { - int ok = 0; - VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits); - if (histo == NULL) goto Error; - - VP8LHistogramCreate(histo, refs, cache_bits); - - ConvertPopulationCountTableToBitEstimates( - VP8LHistogramNumCodes(histo->palette_code_bits_), - histo->literal_, m->literal_); - ConvertPopulationCountTableToBitEstimates( - VALUES_IN_BYTE, histo->red_, m->red_); - ConvertPopulationCountTableToBitEstimates( - VALUES_IN_BYTE, histo->blue_, m->blue_); - ConvertPopulationCountTableToBitEstimates( - VALUES_IN_BYTE, histo->alpha_, m->alpha_); - ConvertPopulationCountTableToBitEstimates( - NUM_DISTANCE_CODES, histo->distance_, m->distance_); - ok = 1; - - Error: - VP8LFreeHistogram(histo); - return ok; -} - -static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) { - return m->alpha_[v >> 24] + - m->red_[(v >> 16) & 0xff] + - m->literal_[(v >> 8) & 0xff] + - m->blue_[v & 0xff]; -} - -static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) { - const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx; - return m->literal_[literal_idx]; -} - -static WEBP_INLINE double GetLengthCost(const CostModel* const m, - uint32_t length) { - int code, extra_bits; - VP8LPrefixEncodeBits(length, &code, &extra_bits); - return m->literal_[VALUES_IN_BYTE + code] + extra_bits; -} - -static WEBP_INLINE double GetDistanceCost(const CostModel* const m, - uint32_t distance) { - int code, extra_bits; - VP8LPrefixEncodeBits(distance, &code, &extra_bits); - return m->distance_[code] + extra_bits; -} - -static void AddSingleLiteralWithCostModel(const uint32_t* const argb, - VP8LColorCache* const hashers, - const CostModel* const cost_model, - int idx, int use_color_cache, - double prev_cost, float* const cost, - uint16_t* const dist_array) { - double cost_val = prev_cost; - const uint32_t color = argb[0]; - const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1; - if (ix >= 0) { - // use_color_cache is true and hashers contains color - const double mul0 = 0.68; - cost_val += GetCacheCost(cost_model, ix) * mul0; - } else { - const double mul1 = 0.82; - if (use_color_cache) VP8LColorCacheInsert(hashers, color); - cost_val += GetLiteralCost(cost_model, color) * mul1; - } - if (cost[idx] > cost_val) { - cost[idx] = (float)cost_val; - dist_array[idx] = 1; // only one is inserted. - } -} - -// ----------------------------------------------------------------------------- -// CostManager and interval handling - -// Empirical value to avoid high memory consumption but good for performance. -#define COST_CACHE_INTERVAL_SIZE_MAX 100 - -// To perform backward reference every pixel at index index_ is considered and -// the cost for the MAX_LENGTH following pixels computed. Those following pixels -// at index index_ + k (k from 0 to MAX_LENGTH) have a cost of: -// distance_cost_ at index_ + GetLengthCost(cost_model, k) -// (named cost) (named cached cost) -// and the minimum value is kept. GetLengthCost(cost_model, k) is cached in an -// array of size MAX_LENGTH. -// Instead of performing MAX_LENGTH comparisons per pixel, we keep track of the -// minimal values using intervals, for which lower_ and upper_ bounds are kept. -// An interval is defined by the index_ of the pixel that generated it and -// is only useful in a range of indices from start_ to end_ (exclusive), i.e. -// it contains the minimum value for pixels between start_ and end_. -// Intervals are stored in a linked list and ordered by start_. When a new -// interval has a better minimum, old intervals are split or removed. -typedef struct CostInterval CostInterval; -struct CostInterval { - double lower_; - double upper_; - int start_; - int end_; - double distance_cost_; - int index_; - CostInterval* previous_; - CostInterval* next_; -}; - -// The GetLengthCost(cost_model, k) part of the costs is also bounded for -// efficiency in a set of intervals of a different type. -// If those intervals are small enough, they are not used for comparison and -// written into the costs right away. -typedef struct { - double lower_; // Lower bound of the interval. - double upper_; // Upper bound of the interval. - int start_; - int end_; // Exclusive. - int do_write_; // If !=0, the interval is saved to cost instead of being kept - // for comparison. -} CostCacheInterval; - -// This structure is in charge of managing intervals and costs. -// It caches the different CostCacheInterval, caches the different -// GetLengthCost(cost_model, k) in cost_cache_ and the CostInterval's (whose -// count_ is limited by COST_CACHE_INTERVAL_SIZE_MAX). -#define COST_MANAGER_MAX_FREE_LIST 10 -typedef struct { - CostInterval* head_; - int count_; // The number of stored intervals. - CostCacheInterval* cache_intervals_; - size_t cache_intervals_size_; - double cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). - double min_cost_cache_; // The minimum value in cost_cache_[1:]. - double max_cost_cache_; // The maximum value in cost_cache_[1:]. - float* costs_; - uint16_t* dist_array_; - // Most of the time, we only need few intervals -> use a free-list, to avoid - // fragmentation with small allocs in most common cases. - CostInterval intervals_[COST_MANAGER_MAX_FREE_LIST]; - CostInterval* free_intervals_; - // These are regularly malloc'd remains. This list can't grow larger than than - // size COST_CACHE_INTERVAL_SIZE_MAX - COST_MANAGER_MAX_FREE_LIST, note. - CostInterval* recycled_intervals_; - // Buffer used in BackwardReferencesHashChainDistanceOnly to store the ends - // of the intervals that can have impacted the cost at a pixel. - int* interval_ends_; - int interval_ends_size_; -} CostManager; - -static int IsCostCacheIntervalWritable(int start, int end) { - // 100 is the length for which we consider an interval for comparison, and not - // for writing. - // The first intervals are very small and go in increasing size. This constant - // helps merging them into one big interval (up to index 150/200 usually from - // which intervals start getting much bigger). - // This value is empirical. - return (end - start + 1 < 100); -} - -static void CostIntervalAddToFreeList(CostManager* const manager, - CostInterval* const interval) { - interval->next_ = manager->free_intervals_; - manager->free_intervals_ = interval; -} - -static int CostIntervalIsInFreeList(const CostManager* const manager, - const CostInterval* const interval) { - return (interval >= &manager->intervals_[0] && - interval <= &manager->intervals_[COST_MANAGER_MAX_FREE_LIST - 1]); -} - -static void CostManagerInitFreeList(CostManager* const manager) { - int i; - manager->free_intervals_ = NULL; - for (i = 0; i < COST_MANAGER_MAX_FREE_LIST; ++i) { - CostIntervalAddToFreeList(manager, &manager->intervals_[i]); - } -} - -static void DeleteIntervalList(CostManager* const manager, - const CostInterval* interval) { - while (interval != NULL) { - const CostInterval* const next = interval->next_; - if (!CostIntervalIsInFreeList(manager, interval)) { - WebPSafeFree((void*)interval); - } // else: do nothing - interval = next; - } -} - -static void CostManagerClear(CostManager* const manager) { - if (manager == NULL) return; - - WebPSafeFree(manager->costs_); - WebPSafeFree(manager->cache_intervals_); - WebPSafeFree(manager->interval_ends_); - - // Clear the interval lists. - DeleteIntervalList(manager, manager->head_); - manager->head_ = NULL; - DeleteIntervalList(manager, manager->recycled_intervals_); - manager->recycled_intervals_ = NULL; - - // Reset pointers, count_ and cache_intervals_size_. - memset(manager, 0, sizeof(*manager)); - CostManagerInitFreeList(manager); -} - -static int CostManagerInit(CostManager* const manager, - uint16_t* const dist_array, int pix_count, - const CostModel* const cost_model) { +// Compute an LZ77 by forcing matches to happen within a given distance cost. +// We therefore limit the algorithm to the lowest 32 values in the PlaneCode +// definition. +#define WINDOW_OFFSETS_SIZE_MAX 32 +static int BackwardReferencesLz77Box(int xsize, int ysize, + const uint32_t* const argb, int cache_bits, + const VP8LHashChain* const hash_chain_best, + VP8LHashChain* hash_chain, + VP8LBackwardRefs* const refs) { int i; - const int cost_cache_size = (pix_count > MAX_LENGTH) ? MAX_LENGTH : pix_count; - // This constant is tied to the cost_model we use. - // Empirically, differences between intervals is usually of more than 1. - const double min_cost_diff = 0.1; - - manager->costs_ = NULL; - manager->cache_intervals_ = NULL; - manager->interval_ends_ = NULL; - manager->head_ = NULL; - manager->recycled_intervals_ = NULL; - manager->count_ = 0; - manager->dist_array_ = dist_array; - CostManagerInitFreeList(manager); - - // Fill in the cost_cache_. - manager->cache_intervals_size_ = 1; - manager->cost_cache_[0] = 0; - for (i = 1; i < cost_cache_size; ++i) { - manager->cost_cache_[i] = GetLengthCost(cost_model, i); - // Get an approximation of the number of bound intervals. - if (fabs(manager->cost_cache_[i] - manager->cost_cache_[i - 1]) > - min_cost_diff) { - ++manager->cache_intervals_size_; - } - // Compute the minimum of cost_cache_. - if (i == 1) { - manager->min_cost_cache_ = manager->cost_cache_[1]; - manager->max_cost_cache_ = manager->cost_cache_[1]; - } else if (manager->cost_cache_[i] < manager->min_cost_cache_) { - manager->min_cost_cache_ = manager->cost_cache_[i]; - } else if (manager->cost_cache_[i] > manager->max_cost_cache_) { - manager->max_cost_cache_ = manager->cost_cache_[i]; - } - } - - // With the current cost models, we have 15 intervals, so we are safe by - // setting a maximum of COST_CACHE_INTERVAL_SIZE_MAX. - if (manager->cache_intervals_size_ > COST_CACHE_INTERVAL_SIZE_MAX) { - manager->cache_intervals_size_ = COST_CACHE_INTERVAL_SIZE_MAX; - } - manager->cache_intervals_ = (CostCacheInterval*)WebPSafeMalloc( - manager->cache_intervals_size_, sizeof(*manager->cache_intervals_)); - if (manager->cache_intervals_ == NULL) { - CostManagerClear(manager); - return 0; - } - - // Fill in the cache_intervals_. - { - double cost_prev = -1e38f; // unprobably low initial value - CostCacheInterval* prev = NULL; - CostCacheInterval* cur = manager->cache_intervals_; - const CostCacheInterval* const end = - manager->cache_intervals_ + manager->cache_intervals_size_; - - // Consecutive values in cost_cache_ are compared and if a big enough - // difference is found, a new interval is created and bounded. - for (i = 0; i < cost_cache_size; ++i) { - const double cost_val = manager->cost_cache_[i]; - if (i == 0 || - (fabs(cost_val - cost_prev) > min_cost_diff && cur + 1 < end)) { - if (i > 1) { - const int is_writable = - IsCostCacheIntervalWritable(cur->start_, cur->end_); - // Merge with the previous interval if both are writable. - if (is_writable && cur != manager->cache_intervals_ && - prev->do_write_) { - // Update the previous interval. - prev->end_ = cur->end_; - if (cur->lower_ < prev->lower_) { - prev->lower_ = cur->lower_; - } else if (cur->upper_ > prev->upper_) { - prev->upper_ = cur->upper_; - } - } else { - cur->do_write_ = is_writable; - prev = cur; - ++cur; - } - } - // Initialize an interval. - cur->start_ = i; - cur->do_write_ = 0; - cur->lower_ = cost_val; - cur->upper_ = cost_val; - } else { - // Update the current interval bounds. - if (cost_val < cur->lower_) { - cur->lower_ = cost_val; - } else if (cost_val > cur->upper_) { - cur->upper_ = cost_val; - } - } - cur->end_ = i + 1; - cost_prev = cost_val; + const int pix_count = xsize * ysize; + uint16_t* counts; + int window_offsets[WINDOW_OFFSETS_SIZE_MAX] = {0}; + int window_offsets_new[WINDOW_OFFSETS_SIZE_MAX] = {0}; + int window_offsets_size = 0; + int window_offsets_new_size = 0; + uint16_t* const counts_ini = + (uint16_t*)WebPSafeMalloc(xsize * ysize, sizeof(*counts_ini)); + int best_offset_prev = -1, best_length_prev = -1; + if (counts_ini == NULL) return 0; + + // counts[i] counts how many times a pixel is repeated starting at position i. + i = pix_count - 2; + counts = counts_ini + i; + counts[1] = 1; + for (; i >= 0; --i, --counts) { + if (argb[i] == argb[i + 1]) { + // Max out the counts to MAX_LENGTH. + counts[0] = counts[1] + (counts[1] != MAX_LENGTH); + } else { + counts[0] = 1; } - manager->cache_intervals_size_ = cur + 1 - manager->cache_intervals_; } - manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_)); - if (manager->costs_ == NULL) { - CostManagerClear(manager); - return 0; - } - // Set the initial costs_ high for every pixel as we will keep the minimum. - for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f; - - // The cost at pixel is influenced by the cost intervals from previous pixels. - // Let us take the specific case where the offset is the same (which actually - // happens a lot in case of uniform regions). - // pixel i contributes to j>i a cost of: offset cost + cost_cache_[j-i] - // pixel i+1 contributes to j>i a cost of: 2*offset cost + cost_cache_[j-i-1] - // pixel i+2 contributes to j>i a cost of: 3*offset cost + cost_cache_[j-i-2] - // and so on. - // A pixel i influences the following length(j) < MAX_LENGTH pixels. What is - // the value of j such that pixel i + j cannot influence any of those pixels? - // This value is such that: - // max of cost_cache_ < j*offset cost + min of cost_cache_ - // (pixel i + j 's cost cannot beat the worst cost given by pixel i). - // This value will be used to optimize the cost computation in - // BackwardReferencesHashChainDistanceOnly. + // Figure out the window offsets around a pixel. They are stored in a + // spiraling order around the pixel as defined by VP8LDistanceToPlaneCode. { - // The offset cost is computed in GetDistanceCost and has a minimum value of - // the minimum in cost_model->distance_. The case where the offset cost is 0 - // will be dealt with differently later so we are only interested in the - // minimum non-zero offset cost. - double offset_cost_min = 0.; - int size; - for (i = 0; i < NUM_DISTANCE_CODES; ++i) { - if (cost_model->distance_[i] != 0) { - if (offset_cost_min == 0.) { - offset_cost_min = cost_model->distance_[i]; - } else if (cost_model->distance_[i] < offset_cost_min) { - offset_cost_min = cost_model->distance_[i]; - } + int x, y; + for (y = 0; y <= 6; ++y) { + for (x = -6; x <= 6; ++x) { + const int offset = y * xsize + x; + int plane_code; + // Ignore offsets that bring us after the pixel. + if (offset <= 0) continue; + plane_code = VP8LDistanceToPlaneCode(xsize, offset) - 1; + if (plane_code >= WINDOW_OFFSETS_SIZE_MAX) continue; + window_offsets[plane_code] = offset; } } - // In case all the cost_model->distance_ is 0, the next non-zero cost we - // can have is from the extra bit in GetDistanceCost, hence 1. - if (offset_cost_min < 1.) offset_cost_min = 1.; - - size = 1 + (int)ceil((manager->max_cost_cache_ - manager->min_cost_cache_) / - offset_cost_min); - // Empirically, we usually end up with a value below 100. - if (size > MAX_LENGTH) size = MAX_LENGTH; - - manager->interval_ends_ = - (int*)WebPSafeMalloc(size, sizeof(*manager->interval_ends_)); - if (manager->interval_ends_ == NULL) { - CostManagerClear(manager); - return 0; - } - manager->interval_ends_size_ = size; - } - - return 1; -} - -// Given the distance_cost for pixel 'index', update the cost at pixel 'i' if it -// is smaller than the previously computed value. -static WEBP_INLINE void UpdateCost(CostManager* const manager, int i, int index, - double distance_cost) { - int k = i - index; - double cost_tmp; - assert(k >= 0 && k < MAX_LENGTH); - cost_tmp = distance_cost + manager->cost_cache_[k]; - - if (manager->costs_[i] > cost_tmp) { - manager->costs_[i] = (float)cost_tmp; - manager->dist_array_[i] = k + 1; - } -} - -// Given the distance_cost for pixel 'index', update the cost for all the pixels -// between 'start' and 'end' excluded. -static WEBP_INLINE void UpdateCostPerInterval(CostManager* const manager, - int start, int end, int index, - double distance_cost) { - int i; - for (i = start; i < end; ++i) UpdateCost(manager, i, index, distance_cost); -} - -// Given two intervals, make 'prev' be the previous one of 'next' in 'manager'. -static WEBP_INLINE void ConnectIntervals(CostManager* const manager, - CostInterval* const prev, - CostInterval* const next) { - if (prev != NULL) { - prev->next_ = next; - } else { - manager->head_ = next; - } - - if (next != NULL) next->previous_ = prev; -} - -// Pop an interval in the manager. -static WEBP_INLINE void PopInterval(CostManager* const manager, - CostInterval* const interval) { - CostInterval* const next = interval->next_; - - if (interval == NULL) return; - - ConnectIntervals(manager, interval->previous_, next); - if (CostIntervalIsInFreeList(manager, interval)) { - CostIntervalAddToFreeList(manager, interval); - } else { // recycle regularly malloc'd intervals too - interval->next_ = manager->recycled_intervals_; - manager->recycled_intervals_ = interval; - } - --manager->count_; - assert(manager->count_ >= 0); -} - -// Update the cost at index i by going over all the stored intervals that -// overlap with i. -static WEBP_INLINE void UpdateCostPerIndex(CostManager* const manager, int i) { - CostInterval* current = manager->head_; - - while (current != NULL && current->start_ <= i) { - if (current->end_ <= i) { - // We have an outdated interval, remove it. - CostInterval* next = current->next_; - PopInterval(manager, current); - current = next; - } else { - UpdateCost(manager, i, current->index_, current->distance_cost_); - current = current->next_; - } - } -} - -// Given a current orphan interval and its previous interval, before -// it was orphaned (which can be NULL), set it at the right place in the list -// of intervals using the start_ ordering and the previous interval as a hint. -static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager, - CostInterval* const current, - CostInterval* previous) { - assert(current != NULL); - - if (previous == NULL) previous = manager->head_; - while (previous != NULL && current->start_ < previous->start_) { - previous = previous->previous_; - } - while (previous != NULL && previous->next_ != NULL && - previous->next_->start_ < current->start_) { - previous = previous->next_; - } - - if (previous != NULL) { - ConnectIntervals(manager, current, previous->next_); - } else { - ConnectIntervals(manager, current, manager->head_); - } - ConnectIntervals(manager, previous, current); -} - -// Insert an interval in the list contained in the manager by starting at -// interval_in as a hint. The intervals are sorted by start_ value. -static WEBP_INLINE void InsertInterval(CostManager* const manager, - CostInterval* const interval_in, - double distance_cost, double lower, - double upper, int index, int start, - int end) { - CostInterval* interval_new; - - if (IsCostCacheIntervalWritable(start, end) || - manager->count_ >= COST_CACHE_INTERVAL_SIZE_MAX) { - // Write down the interval if it is too small. - UpdateCostPerInterval(manager, start, end, index, distance_cost); - return; - } - if (manager->free_intervals_ != NULL) { - interval_new = manager->free_intervals_; - manager->free_intervals_ = interval_new->next_; - } else if (manager->recycled_intervals_ != NULL) { - interval_new = manager->recycled_intervals_; - manager->recycled_intervals_ = interval_new->next_; - } else { // malloc for good - interval_new = (CostInterval*)WebPSafeMalloc(1, sizeof(*interval_new)); - if (interval_new == NULL) { - // Write down the interval if we cannot create it. - UpdateCostPerInterval(manager, start, end, index, distance_cost); - return; - } - } - - interval_new->distance_cost_ = distance_cost; - interval_new->lower_ = lower; - interval_new->upper_ = upper; - interval_new->index_ = index; - interval_new->start_ = start; - interval_new->end_ = end; - PositionOrphanInterval(manager, interval_new, interval_in); - - ++manager->count_; -} - -// When an interval has its start_ or end_ modified, it needs to be -// repositioned in the linked list. -static WEBP_INLINE void RepositionInterval(CostManager* const manager, - CostInterval* const interval) { - if (IsCostCacheIntervalWritable(interval->start_, interval->end_)) { - // Maybe interval has been resized and is small enough to be removed. - UpdateCostPerInterval(manager, interval->start_, interval->end_, - interval->index_, interval->distance_cost_); - PopInterval(manager, interval); - return; - } - - // Early exit if interval is at the right spot. - if ((interval->previous_ == NULL || - interval->previous_->start_ <= interval->start_) && - (interval->next_ == NULL || - interval->start_ <= interval->next_->start_)) { - return; - } - - ConnectIntervals(manager, interval->previous_, interval->next_); - PositionOrphanInterval(manager, interval, interval->previous_); -} - -// Given a new cost interval defined by its start at index, its last value and -// distance_cost, add its contributions to the previous intervals and costs. -// If handling the interval or one of its subintervals becomes to heavy, its -// contribution is added to the costs right away. -static WEBP_INLINE void PushInterval(CostManager* const manager, - double distance_cost, int index, - int last) { - size_t i; - CostInterval* interval = manager->head_; - CostInterval* interval_next; - const CostCacheInterval* const cost_cache_intervals = - manager->cache_intervals_; - - for (i = 0; i < manager->cache_intervals_size_ && - cost_cache_intervals[i].start_ < last; - ++i) { - // Define the intersection of the ith interval with the new one. - int start = index + cost_cache_intervals[i].start_; - const int end = index + (cost_cache_intervals[i].end_ > last - ? last - : cost_cache_intervals[i].end_); - const double lower_in = cost_cache_intervals[i].lower_; - const double upper_in = cost_cache_intervals[i].upper_; - const double lower_full_in = distance_cost + lower_in; - const double upper_full_in = distance_cost + upper_in; - - if (cost_cache_intervals[i].do_write_) { - UpdateCostPerInterval(manager, start, end, index, distance_cost); - continue; + // For narrow images, not all plane codes are reached, so remove those. + for (i = 0; i < WINDOW_OFFSETS_SIZE_MAX; ++i) { + if (window_offsets[i] == 0) continue; + window_offsets[window_offsets_size++] = window_offsets[i]; } - - for (; interval != NULL && interval->start_ < end && start < end; - interval = interval_next) { - const double lower_full_interval = - interval->distance_cost_ + interval->lower_; - const double upper_full_interval = - interval->distance_cost_ + interval->upper_; - - interval_next = interval->next_; - - // Make sure we have some overlap - if (start >= interval->end_) continue; - - if (lower_full_in >= upper_full_interval) { - // When intervals are represented, the lower, the better. - // [**********************************************************] - // start end - // [----------------------------------] - // interval->start_ interval->end_ - // If we are worse than what we already have, add whatever we have so - // far up to interval. - const int start_new = interval->end_; - InsertInterval(manager, interval, distance_cost, lower_in, upper_in, - index, start, interval->start_); - start = start_new; - continue; + // Given a pixel P, find the offsets that reach pixels unreachable from P-1 + // with any of the offsets in window_offsets[]. + for (i = 0; i < window_offsets_size; ++i) { + int j; + int is_reachable = 0; + for (j = 0; j < window_offsets_size && !is_reachable; ++j) { + is_reachable |= (window_offsets[i] == window_offsets[j] + 1); } - - // We know the two intervals intersect. - if (upper_full_in >= lower_full_interval) { - // There is no clear cut on which is best, so let's keep both. - // [*********[*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*]***********] - // start interval->start_ interval->end_ end - // OR - // [*********[*-*-*-*-*-*-*-*-*-*-*-]----------------------] - // start interval->start_ end interval->end_ - const int end_new = (interval->end_ <= end) ? interval->end_ : end; - InsertInterval(manager, interval, distance_cost, lower_in, upper_in, - index, start, end_new); - start = end_new; - } else if (start <= interval->start_ && interval->end_ <= end) { - // [----------------------------------] - // interval->start_ interval->end_ - // [**************************************************************] - // start end - // We can safely remove the old interval as it is fully included. - PopInterval(manager, interval); - } else { - if (interval->start_ <= start && end <= interval->end_) { - // [--------------------------------------------------------------] - // interval->start_ interval->end_ - // [*****************************] - // start end - // We have to split the old interval as it fully contains the new one. - const int end_original = interval->end_; - interval->end_ = start; - InsertInterval(manager, interval, interval->distance_cost_, - interval->lower_, interval->upper_, interval->index_, - end, end_original); - } else if (interval->start_ < start) { - // [------------------------------------] - // interval->start_ interval->end_ - // [*****************************] - // start end - interval->end_ = start; - } else { - // [------------------------------------] - // interval->start_ interval->end_ - // [*****************************] - // start end - interval->start_ = end; - } - - // The interval has been modified, we need to reposition it or write it. - RepositionInterval(manager, interval); + if (!is_reachable) { + window_offsets_new[window_offsets_new_size] = window_offsets[i]; + ++window_offsets_new_size; } } - // Insert the remaining interval from start to end. - InsertInterval(manager, interval, distance_cost, lower_in, upper_in, index, - start, end); - } -} - -static int BackwardReferencesHashChainDistanceOnly( - int xsize, int ysize, const uint32_t* const argb, int quality, - int cache_bits, const VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs, uint16_t* const dist_array) { - int i; - int ok = 0; - int cc_init = 0; - const int pix_count = xsize * ysize; - const int use_color_cache = (cache_bits > 0); - const size_t literal_array_size = sizeof(double) * - (NUM_LITERAL_CODES + NUM_LENGTH_CODES + - ((cache_bits > 0) ? (1 << cache_bits) : 0)); - const size_t cost_model_size = sizeof(CostModel) + literal_array_size; - CostModel* const cost_model = - (CostModel*)WebPSafeCalloc(1ULL, cost_model_size); - VP8LColorCache hashers; - const int skip_length = 32 + quality; - const int skip_min_distance_code = 2; - CostManager* cost_manager = - (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager)); - - if (cost_model == NULL || cost_manager == NULL) goto Error; - - cost_model->literal_ = (double*)(cost_model + 1); - if (use_color_cache) { - cc_init = VP8LColorCacheInit(&hashers, cache_bits); - if (!cc_init) goto Error; } - if (!CostModelBuild(cost_model, cache_bits, refs)) { - goto Error; - } - - if (!CostManagerInit(cost_manager, dist_array, pix_count, cost_model)) { - goto Error; - } - - // We loop one pixel at a time, but store all currently best points to - // non-processed locations from this point. - dist_array[0] = 0; - // Add first pixel as literal. - AddSingleLiteralWithCostModel(argb + 0, &hashers, cost_model, 0, - use_color_cache, 0.0, cost_manager->costs_, - dist_array); - - for (i = 1; i < pix_count - 1; ++i) { - int offset = 0, len = 0; - double prev_cost = cost_manager->costs_[i - 1]; - HashChainFindCopy(hash_chain, i, &offset, &len); - if (len >= 2) { - // If we are dealing with a non-literal. - const int code = DistanceToPlaneCode(xsize, offset); - const double offset_cost = GetDistanceCost(cost_model, code); - const int first_i = i; - int j_max = 0, interval_ends_index = 0; - const int is_offset_zero = (offset_cost == 0.); - - if (!is_offset_zero) { - j_max = (int)ceil( - (cost_manager->max_cost_cache_ - cost_manager->min_cost_cache_) / - offset_cost); - if (j_max < 1) { - j_max = 1; - } else if (j_max > cost_manager->interval_ends_size_ - 1) { - // This could only happen in the case of MAX_LENGTH. - j_max = cost_manager->interval_ends_size_ - 1; + hash_chain->offset_length_[0] = 0; + for (i = 1; i < pix_count; ++i) { + int ind; + int best_length = VP8LHashChainFindLength(hash_chain_best, i); + int best_offset; + int do_compute = 1; + + if (best_length >= MAX_LENGTH) { + // Do not recompute the best match if we already have a maximal one in the + // window. + best_offset = VP8LHashChainFindOffset(hash_chain_best, i); + for (ind = 0; ind < window_offsets_size; ++ind) { + if (best_offset == window_offsets[ind]) { + do_compute = 0; + break; } - } // else j_max is unused anyway. - - // Instead of considering all contributions from a pixel i by calling: - // PushInterval(cost_manager, prev_cost + offset_cost, i, len); - // we optimize these contributions in case offset_cost stays the same for - // consecutive pixels. This describes a set of pixels similar to a - // previous set (e.g. constant color regions). - for (; i < pix_count - 1; ++i) { - int offset_next, len_next; - prev_cost = cost_manager->costs_[i - 1]; - - if (is_offset_zero) { - // No optimization can be made so we just push all of the - // contributions from i. - PushInterval(cost_manager, prev_cost, i, len); - } else { - // j_max is chosen as the smallest j such that: - // max of cost_cache_ < j*offset cost + min of cost_cache_ - // Therefore, the pixel influenced by i-j_max, cannot be influenced - // by i. Only the costs after the end of what i contributed need to be - // updated. cost_manager->interval_ends_ is a circular buffer that - // stores those ends. - const double distance_cost = prev_cost + offset_cost; - int j = cost_manager->interval_ends_[interval_ends_index]; - if (i - first_i <= j_max || - !IsCostCacheIntervalWritable(j, i + len)) { - PushInterval(cost_manager, distance_cost, i, len); - } else { - for (; j < i + len; ++j) { - UpdateCost(cost_manager, j, i, distance_cost); - } - } - // Store the new end in the circular buffer. - assert(interval_ends_index < cost_manager->interval_ends_size_); - cost_manager->interval_ends_[interval_ends_index] = i + len; - if (++interval_ends_index > j_max) interval_ends_index = 0; - } - - // Check whether i is the last pixel to consider, as it is handled - // differently. - if (i + 1 >= pix_count - 1) break; - HashChainFindCopy(hash_chain, i + 1, &offset_next, &len_next); - if (offset_next != offset) break; - len = len_next; - UpdateCostPerIndex(cost_manager, i); - AddSingleLiteralWithCostModel(argb + i, &hashers, cost_model, i, - use_color_cache, prev_cost, - cost_manager->costs_, dist_array); } - // Submit the last pixel. - UpdateCostPerIndex(cost_manager, i + 1); - - // This if is for speedup only. It roughly doubles the speed, and - // makes compression worse by .1 %. - if (len >= skip_length && code <= skip_min_distance_code) { - // Long copy for short distances, let's skip the middle - // lookups for better copies. - // 1) insert the hashes. - if (use_color_cache) { - int k; - for (k = 0; k < len; ++k) { - VP8LColorCacheInsert(&hashers, argb[i + k]); + } + if (do_compute) { + // Figure out if we should use the offset/length from the previous pixel + // as an initial guess and therefore only inspect the offsets in + // window_offsets_new[]. + const int use_prev = + (best_length_prev > 1) && (best_length_prev < MAX_LENGTH); + const int num_ind = + use_prev ? window_offsets_new_size : window_offsets_size; + best_length = use_prev ? best_length_prev - 1 : 0; + best_offset = use_prev ? best_offset_prev : 0; + // Find the longest match in a window around the pixel. + for (ind = 0; ind < num_ind; ++ind) { + int curr_length = 0; + int j = i; + int j_offset = + use_prev ? i - window_offsets_new[ind] : i - window_offsets[ind]; + if (j_offset < 0 || argb[j_offset] != argb[i]) continue; + // The longest match is the sum of how many times each pixel is + // repeated. + do { + const int counts_j_offset = counts_ini[j_offset]; + const int counts_j = counts_ini[j]; + if (counts_j_offset != counts_j) { + curr_length += + (counts_j_offset < counts_j) ? counts_j_offset : counts_j; + break; + } + // The same color is repeated counts_pos times at j_offset and j. + curr_length += counts_j_offset; + j_offset += counts_j_offset; + j += counts_j_offset; + } while (curr_length <= MAX_LENGTH && j < pix_count && + argb[j_offset] == argb[j]); + if (best_length < curr_length) { + best_offset = + use_prev ? window_offsets_new[ind] : window_offsets[ind]; + if (curr_length >= MAX_LENGTH) { + best_length = MAX_LENGTH; + break; + } else { + best_length = curr_length; } - } - // 2) jump. - { - const int i_next = i + len - 1; // for loop does ++i, thus -1 here. - for (; i <= i_next; ++i) UpdateCostPerIndex(cost_manager, i + 1); - i = i_next; - } - goto next_symbol; - } - if (len > 2) { - // Also try the smallest interval possible (size 2). - double cost_total = - prev_cost + offset_cost + GetLengthCost(cost_model, 1); - if (cost_manager->costs_[i + 1] > cost_total) { - cost_manager->costs_[i + 1] = (float)cost_total; - dist_array[i + 1] = 2; } } - } else { - // The pixel is added as a single literal so just update the costs. - UpdateCostPerIndex(cost_manager, i + 1); } - AddSingleLiteralWithCostModel(argb + i, &hashers, cost_model, i, - use_color_cache, prev_cost, - cost_manager->costs_, dist_array); - - next_symbol: ; - } - // Handle the last pixel. - if (i == (pix_count - 1)) { - AddSingleLiteralWithCostModel( - argb + i, &hashers, cost_model, i, use_color_cache, - cost_manager->costs_[pix_count - 2], cost_manager->costs_, dist_array); - } - - ok = !refs->error_; - Error: - if (cc_init) VP8LColorCacheClear(&hashers); - CostManagerClear(cost_manager); - WebPSafeFree(cost_model); - WebPSafeFree(cost_manager); - return ok; -} - -// We pack the path at the end of *dist_array and return -// a pointer to this part of the array. Example: -// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232] -static void TraceBackwards(uint16_t* const dist_array, - int dist_array_size, - uint16_t** const chosen_path, - int* const chosen_path_size) { - uint16_t* path = dist_array + dist_array_size; - uint16_t* cur = dist_array + dist_array_size - 1; - while (cur >= dist_array) { - const int k = *cur; - --path; - *path = k; - cur -= k; - } - *chosen_path = path; - *chosen_path_size = (int)(dist_array + dist_array_size - path); -} - -static int BackwardReferencesHashChainFollowChosenPath( - const uint32_t* const argb, int cache_bits, - const uint16_t* const chosen_path, int chosen_path_size, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { - const int use_color_cache = (cache_bits > 0); - int ix; - int i = 0; - int ok = 0; - int cc_init = 0; - VP8LColorCache hashers; - - if (use_color_cache) { - cc_init = VP8LColorCacheInit(&hashers, cache_bits); - if (!cc_init) goto Error; - } - - ClearBackwardRefs(refs); - for (ix = 0; ix < chosen_path_size; ++ix) { - const int len = chosen_path[ix]; - if (len != 1) { - int k; - const int offset = HashChainFindOffset(hash_chain, i); - BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); - if (use_color_cache) { - for (k = 0; k < len; ++k) { - VP8LColorCacheInsert(&hashers, argb[i + k]); - } - } - i += len; + assert(i + best_length <= pix_count); + assert(best_length <= MAX_LENGTH); + if (best_length <= MIN_LENGTH) { + hash_chain->offset_length_[i] = 0; + best_offset_prev = 0; + best_length_prev = 0; } else { - PixOrCopy v; - const int idx = - use_color_cache ? VP8LColorCacheContains(&hashers, argb[i]) : -1; - if (idx >= 0) { - // use_color_cache is true and hashers contains argb[i] - // push pixel as a color cache index - v = PixOrCopyCreateCacheIdx(idx); - } else { - if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]); - v = PixOrCopyCreateLiteral(argb[i]); - } - BackwardRefsCursorAdd(refs, v); - ++i; + hash_chain->offset_length_[i] = + (best_offset << MAX_LENGTH_BITS) | (uint32_t)best_length; + best_offset_prev = best_offset; + best_length_prev = best_length; } } - ok = !refs->error_; - Error: - if (cc_init) VP8LColorCacheClear(&hashers); - return ok; -} + hash_chain->offset_length_[0] = 0; + WebPSafeFree(counts_ini); -// Returns 1 on success. -static int BackwardReferencesTraceBackwards( - int xsize, int ysize, const uint32_t* const argb, int quality, - int cache_bits, const VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs) { - int ok = 0; - const int dist_array_size = xsize * ysize; - uint16_t* chosen_path = NULL; - int chosen_path_size = 0; - uint16_t* dist_array = - (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array)); - - if (dist_array == NULL) goto Error; - - if (!BackwardReferencesHashChainDistanceOnly( - xsize, ysize, argb, quality, cache_bits, hash_chain, - refs, dist_array)) { - goto Error; - } - TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size); - if (!BackwardReferencesHashChainFollowChosenPath( - argb, cache_bits, chosen_path, chosen_path_size, hash_chain, refs)) { - goto Error; - } - ok = 1; - Error: - WebPSafeFree(dist_array); - return ok; + return BackwardReferencesLz77(xsize, ysize, argb, cache_bits, hash_chain, + refs); } +// ----------------------------------------------------------------------------- + static void BackwardReferences2DLocality(int xsize, const VP8LBackwardRefs* const refs) { VP8LRefsCursor c = VP8LRefsCursorInit(refs); while (VP8LRefsCursorOk(&c)) { if (PixOrCopyIsCopy(c.cur_pos)) { const int dist = c.cur_pos->argb_or_distance; - const int transformed_dist = DistanceToPlaneCode(xsize, dist); + const int transformed_dist = VP8LDistanceToPlaneCode(xsize, dist); c.cur_pos->argb_or_distance = transformed_dist; } VP8LRefsCursorNext(&c); } } -// Computes the entropies for a color cache size (in bits) between 0 (unused) -// and cache_bits_max (inclusive). -// Returns 1 on success, 0 in case of allocation error. -static int ComputeCacheEntropies(const uint32_t* argb, - const VP8LBackwardRefs* const refs, - int cache_bits_max, double entropies[]) { +// Evaluate optimal cache bits for the local color cache. +// The input *best_cache_bits sets the maximum cache bits to use (passing 0 +// implies disabling the local color cache). The local color cache is also +// disabled for the lower (<= 25) quality. +// Returns 0 in case of memory error. +static int CalculateBestCacheSize(const uint32_t* argb, int quality, + const VP8LBackwardRefs* const refs, + int* const best_cache_bits) { + int i; + const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits; + double entropy_min = MAX_ENTROPY; int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 }; VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1]; VP8LRefsCursor c = VP8LRefsCursorInit(refs); VP8LHistogram* histos[MAX_COLOR_CACHE_BITS + 1] = { NULL }; int ok = 0; - int i; + assert(cache_bits_max >= 0 && cache_bits_max <= MAX_COLOR_CACHE_BITS); + + if (cache_bits_max == 0) { + *best_cache_bits = 0; + // Local color cache is disabled. + return 1; + } + + // Allocate data. for (i = 0; i <= cache_bits_max; ++i) { histos[i] = VP8LAllocateHistogram(i); if (histos[i] == NULL) goto Error; @@ -1547,60 +720,65 @@ static int ComputeCacheEntropies(const uint32_t* argb, if (!cc_init[i]) goto Error; } - assert(cache_bits_max >= 0); - // Do not use the color cache for cache_bits=0. + // Find the cache_bits giving the lowest entropy. The search is done in a + // brute-force way as the function (entropy w.r.t cache_bits) can be + // anything in practice. while (VP8LRefsCursorOk(&c)) { - VP8LHistogramAddSinglePixOrCopy(histos[0], c.cur_pos); - VP8LRefsCursorNext(&c); - } - if (cache_bits_max > 0) { - c = VP8LRefsCursorInit(refs); - while (VP8LRefsCursorOk(&c)) { - const PixOrCopy* const v = c.cur_pos; - if (PixOrCopyIsLiteral(v)) { - const uint32_t pix = *argb++; - // The keys of the caches can be derived from the longest one. - int key = HashPix(pix, 32 - cache_bits_max); - for (i = cache_bits_max; i >= 1; --i, key >>= 1) { - if (VP8LColorCacheLookup(&hashers[i], key) == pix) { - ++histos[i]->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key]; - } else { - VP8LColorCacheSet(&hashers[i], key, pix); - ++histos[i]->blue_[pix & 0xff]; - ++histos[i]->literal_[(pix >> 8) & 0xff]; - ++histos[i]->red_[(pix >> 16) & 0xff]; - ++histos[i]->alpha_[pix >> 24]; - } - } - } else { - // Update the histograms for distance/length. - int len = PixOrCopyLength(v); - int code_dist, code_len, extra_bits; - uint32_t argb_prev = *argb ^ 0xffffffffu; - VP8LPrefixEncodeBits(len, &code_len, &extra_bits); - VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code_dist, &extra_bits); - for (i = 1; i <= cache_bits_max; ++i) { - ++histos[i]->literal_[NUM_LITERAL_CODES + code_len]; - ++histos[i]->distance_[code_dist]; + const PixOrCopy* const v = c.cur_pos; + if (PixOrCopyIsLiteral(v)) { + const uint32_t pix = *argb++; + const uint32_t a = (pix >> 24) & 0xff; + const uint32_t r = (pix >> 16) & 0xff; + const uint32_t g = (pix >> 8) & 0xff; + const uint32_t b = (pix >> 0) & 0xff; + // The keys of the caches can be derived from the longest one. + int key = VP8LHashPix(pix, 32 - cache_bits_max); + // Do not use the color cache for cache_bits = 0. + ++histos[0]->blue_[b]; + ++histos[0]->literal_[g]; + ++histos[0]->red_[r]; + ++histos[0]->alpha_[a]; + // Deal with cache_bits > 0. + for (i = cache_bits_max; i >= 1; --i, key >>= 1) { + if (VP8LColorCacheLookup(&hashers[i], key) == pix) { + ++histos[i]->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key]; + } else { + VP8LColorCacheSet(&hashers[i], key, pix); + ++histos[i]->blue_[b]; + ++histos[i]->literal_[g]; + ++histos[i]->red_[r]; + ++histos[i]->alpha_[a]; } - // Update the colors caches. - do { - if (*argb != argb_prev) { - // Efficiency: insert only if the color changes. - int key = HashPix(*argb, 32 - cache_bits_max); - for (i = cache_bits_max; i >= 1; --i, key >>= 1) { - hashers[i].colors_[key] = *argb; - } - argb_prev = *argb; - } - argb++; - } while (--len != 0); } - VP8LRefsCursorNext(&c); + } else { + // We should compute the contribution of the (distance,length) + // histograms but those are the same independently from the cache size. + // As those constant contributions are in the end added to the other + // histogram contributions, we can safely ignore them. + int len = PixOrCopyLength(v); + uint32_t argb_prev = *argb ^ 0xffffffffu; + // Update the color caches. + do { + if (*argb != argb_prev) { + // Efficiency: insert only if the color changes. + int key = VP8LHashPix(*argb, 32 - cache_bits_max); + for (i = cache_bits_max; i >= 1; --i, key >>= 1) { + hashers[i].colors_[key] = *argb; + } + argb_prev = *argb; + } + argb++; + } while (--len != 0); } + VP8LRefsCursorNext(&c); } + for (i = 0; i <= cache_bits_max; ++i) { - entropies[i] = VP8LHistogramEstimateBits(histos[i]); + const double entropy = VP8LHistogramEstimateBits(histos[i]); + if (i == 0 || entropy < entropy_min) { + entropy_min = entropy; + *best_cache_bits = i; + } } ok = 1; Error: @@ -1611,50 +789,6 @@ Error: return ok; } -// Evaluate optimal cache bits for the local color cache. -// The input *best_cache_bits sets the maximum cache bits to use (passing 0 -// implies disabling the local color cache). The local color cache is also -// disabled for the lower (<= 25) quality. -// Returns 0 in case of memory error. -static int CalculateBestCacheSize(const uint32_t* const argb, - int xsize, int ysize, int quality, - const VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs, - int* const lz77_computed, - int* const best_cache_bits) { - int i; - int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits; - double entropy_min = MAX_ENTROPY; - double entropies[MAX_COLOR_CACHE_BITS + 1]; - - assert(cache_bits_high <= MAX_COLOR_CACHE_BITS); - - *lz77_computed = 0; - if (cache_bits_high == 0) { - *best_cache_bits = 0; - // Local color cache is disabled. - return 1; - } - // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color cache - // is not that different in practice. - if (!BackwardReferencesLz77(xsize, ysize, argb, 0, hash_chain, refs)) { - return 0; - } - // Find the cache_bits giving the lowest entropy. The search is done in a - // brute-force way as the function (entropy w.r.t cache_bits) can be - // anything in practice. - if (!ComputeCacheEntropies(argb, refs, cache_bits_high, entropies)) { - return 0; - } - for (i = 0; i <= cache_bits_high; ++i) { - if (i == 0 || entropies[i] < entropy_min) { - entropy_min = entropies[i]; - *best_cache_bits = i; - } - } - return 1; -} - // Update (in-place) backward references for specified cache_bits. static int BackwardRefsWithLocalCache(const uint32_t* const argb, int cache_bits, @@ -1693,8 +827,7 @@ static int BackwardRefsWithLocalCache(const uint32_t* const argb, static VP8LBackwardRefs* GetBackwardReferencesLowEffort( int width, int height, const uint32_t* const argb, int* const cache_bits, const VP8LHashChain* const hash_chain, - VP8LBackwardRefs refs_array[2]) { - VP8LBackwardRefs* refs_lz77 = &refs_array[0]; + VP8LBackwardRefs* const refs_lz77) { *cache_bits = 0; if (!BackwardReferencesLz77(width, height, argb, 0, hash_chain, refs_lz77)) { return NULL; @@ -1703,98 +836,108 @@ static VP8LBackwardRefs* GetBackwardReferencesLowEffort( return refs_lz77; } +extern int VP8LBackwardReferencesTraceBackwards( + int xsize, int ysize, const uint32_t* const argb, int cache_bits, + const VP8LHashChain* const hash_chain, + const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst); static VP8LBackwardRefs* GetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int* const cache_bits, const VP8LHashChain* const hash_chain, - VP8LBackwardRefs refs_array[2]) { - int lz77_is_useful; - int lz77_computed; - double bit_cost_lz77, bit_cost_rle; - VP8LBackwardRefs* best = NULL; - VP8LBackwardRefs* refs_lz77 = &refs_array[0]; - VP8LBackwardRefs* refs_rle = &refs_array[1]; + int lz77_types_to_try, int* const cache_bits, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* best, + VP8LBackwardRefs* worst) { + const int cache_bits_initial = *cache_bits; + double bit_cost_best = -1; VP8LHistogram* histo = NULL; + int lz77_type, lz77_type_best = 0; + VP8LHashChain hash_chain_box; + memset(&hash_chain_box, 0, sizeof(hash_chain_box)); - if (!CalculateBestCacheSize(argb, width, height, quality, hash_chain, - refs_lz77, &lz77_computed, cache_bits)) { - goto Error; - } + histo = VP8LAllocateHistogram(MAX_COLOR_CACHE_BITS); + if (histo == NULL) goto Error; - if (lz77_computed) { - // Transform refs_lz77 for the optimized cache_bits. - if (*cache_bits > 0) { - if (!BackwardRefsWithLocalCache(argb, *cache_bits, refs_lz77)) { - goto Error; - } + for (lz77_type = 1; lz77_types_to_try; + lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) { + int res = 0; + double bit_cost; + int cache_bits_tmp = cache_bits_initial; + if ((lz77_types_to_try & lz77_type) == 0) continue; + switch (lz77_type) { + case kLZ77RLE: + res = BackwardReferencesRle(width, height, argb, 0, worst); + break; + case kLZ77Standard: + // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color + // cache is not that different in practice. + res = BackwardReferencesLz77(width, height, argb, 0, hash_chain, worst); + break; + case kLZ77Box: + if (!VP8LHashChainInit(&hash_chain_box, width * height)) goto Error; + res = BackwardReferencesLz77Box(width, height, argb, 0, hash_chain, + &hash_chain_box, worst); + break; + default: + assert(0); } - } else { - if (!BackwardReferencesLz77(width, height, argb, *cache_bits, hash_chain, - refs_lz77)) { + if (!res) goto Error; + + // Next, try with a color cache and update the references. + if (!CalculateBestCacheSize(argb, quality, worst, &cache_bits_tmp)) { goto Error; } - } - - if (!BackwardReferencesRle(width, height, argb, *cache_bits, refs_rle)) { - goto Error; - } - - histo = VP8LAllocateHistogram(*cache_bits); - if (histo == NULL) goto Error; - - { - // Evaluate LZ77 coding. - VP8LHistogramCreate(histo, refs_lz77, *cache_bits); - bit_cost_lz77 = VP8LHistogramEstimateBits(histo); - // Evaluate RLE coding. - VP8LHistogramCreate(histo, refs_rle, *cache_bits); - bit_cost_rle = VP8LHistogramEstimateBits(histo); - // Decide if LZ77 is useful. - lz77_is_useful = (bit_cost_lz77 < bit_cost_rle); - } - - // Choose appropriate backward reference. - if (lz77_is_useful) { - // TraceBackwards is costly. Don't execute it at lower quality. - const int try_lz77_trace_backwards = (quality >= 25); - best = refs_lz77; // default guess: lz77 is better - if (try_lz77_trace_backwards) { - VP8LBackwardRefs* const refs_trace = refs_rle; - if (!VP8LBackwardRefsCopy(refs_lz77, refs_trace)) { - best = NULL; + if (cache_bits_tmp > 0) { + if (!BackwardRefsWithLocalCache(argb, cache_bits_tmp, worst)) { goto Error; } - if (BackwardReferencesTraceBackwards(width, height, argb, quality, - *cache_bits, hash_chain, - refs_trace)) { - double bit_cost_trace; - // Evaluate LZ77 coding. - VP8LHistogramCreate(histo, refs_trace, *cache_bits); - bit_cost_trace = VP8LHistogramEstimateBits(histo); - if (bit_cost_trace < bit_cost_lz77) { - best = refs_trace; - } - } } - } else { - best = refs_rle; + + // Keep the best backward references. + VP8LHistogramCreate(histo, worst, cache_bits_tmp); + bit_cost = VP8LHistogramEstimateBits(histo); + if (lz77_type_best == 0 || bit_cost < bit_cost_best) { + VP8LBackwardRefs* const tmp = worst; + worst = best; + best = tmp; + bit_cost_best = bit_cost; + *cache_bits = cache_bits_tmp; + lz77_type_best = lz77_type; + } + } + assert(lz77_type_best > 0); + + // Improve on simple LZ77 but only for high quality (TraceBackwards is + // costly). + if ((lz77_type_best == kLZ77Standard || lz77_type_best == kLZ77Box) && + quality >= 25) { + const VP8LHashChain* const hash_chain_tmp = + (lz77_type_best == kLZ77Standard) ? hash_chain : &hash_chain_box; + if (VP8LBackwardReferencesTraceBackwards(width, height, argb, *cache_bits, + hash_chain_tmp, best, worst)) { + double bit_cost_trace; + VP8LHistogramCreate(histo, worst, *cache_bits); + bit_cost_trace = VP8LHistogramEstimateBits(histo); + if (bit_cost_trace < bit_cost_best) best = worst; + } } BackwardReferences2DLocality(width, best); - Error: +Error: + VP8LHashChainClear(&hash_chain_box); VP8LFreeHistogram(histo); return best; } VP8LBackwardRefs* VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2]) { + int low_effort, int lz77_types_to_try, int* const cache_bits, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1, + VP8LBackwardRefs* const refs_tmp2) { if (low_effort) { return GetBackwardReferencesLowEffort(width, height, argb, cache_bits, - hash_chain, refs_array); + hash_chain, refs_tmp1); } else { - return GetBackwardReferences(width, height, argb, quality, cache_bits, - hash_chain, refs_array); + return GetBackwardReferences(width, height, argb, quality, + lz77_types_to_try, cache_bits, hash_chain, + refs_tmp1, refs_tmp2); } } diff --git a/src/3rdparty/libwebp/src/enc/backward_references_enc.h b/src/3rdparty/libwebp/src/enc/backward_references_enc.h index 3a19aa7..103ddfd 100644 --- a/src/3rdparty/libwebp/src/enc/backward_references_enc.h +++ b/src/3rdparty/libwebp/src/enc/backward_references_enc.h @@ -10,13 +10,13 @@ // Author: Jyrki Alakuijala (jyrki@google.com) // -#ifndef WEBP_ENC_BACKWARD_REFERENCES_H_ -#define WEBP_ENC_BACKWARD_REFERENCES_H_ +#ifndef WEBP_ENC_BACKWARD_REFERENCES_ENC_H_ +#define WEBP_ENC_BACKWARD_REFERENCES_ENC_H_ #include <assert.h> #include <stdlib.h> -#include "../webp/types.h" -#include "../webp/format_constants.h" +#include "src/webp/types.h" +#include "src/webp/format_constants.h" #ifdef __cplusplus extern "C" { @@ -91,11 +91,6 @@ static WEBP_INLINE uint32_t PixOrCopyLength(const PixOrCopy* const p) { return p->len; } -static WEBP_INLINE uint32_t PixOrCopyArgb(const PixOrCopy* const p) { - assert(p->mode == kLiteral); - return p->argb_or_distance; -} - static WEBP_INLINE uint32_t PixOrCopyCacheIdx(const PixOrCopy* const p) { assert(p->mode == kCacheIdx); assert(p->argb_or_distance < (1U << MAX_COLOR_CACHE_BITS)); @@ -113,6 +108,16 @@ static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) { #define HASH_BITS 18 #define HASH_SIZE (1 << HASH_BITS) +// If you change this, you need MAX_LENGTH_BITS + WINDOW_SIZE_BITS <= 32 as it +// is used in VP8LHashChain. +#define MAX_LENGTH_BITS 12 +#define WINDOW_SIZE_BITS 20 +// We want the max value to be attainable and stored in MAX_LENGTH_BITS bits. +#define MAX_LENGTH ((1 << MAX_LENGTH_BITS) - 1) +#if MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32 +#error "MAX_LENGTH_BITS + WINDOW_SIZE_BITS > 32" +#endif + typedef struct VP8LHashChain VP8LHashChain; struct VP8LHashChain { // The 20 most significant bits contain the offset at which the best match @@ -134,6 +139,24 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, int low_effort); void VP8LHashChainClear(VP8LHashChain* const p); // release memory +static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p, + const int base_position) { + return p->offset_length_[base_position] >> MAX_LENGTH_BITS; +} + +static WEBP_INLINE int VP8LHashChainFindLength(const VP8LHashChain* const p, + const int base_position) { + return p->offset_length_[base_position] & ((1U << MAX_LENGTH_BITS) - 1); +} + +static WEBP_INLINE void VP8LHashChainFindCopy(const VP8LHashChain* const p, + int base_position, + int* const offset_ptr, + int* const length_ptr) { + *offset_ptr = VP8LHashChainFindOffset(p, base_position); + *length_ptr = VP8LHashChainFindLength(p, base_position); +} + // ----------------------------------------------------------------------------- // VP8LBackwardRefs (block-based backward-references storage) @@ -158,9 +181,6 @@ struct VP8LBackwardRefs { void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size); // Release memory for backward references. void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs); -// Copies the 'src' backward refs to the 'dst'. Returns 0 in case of error. -int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, - VP8LBackwardRefs* const dst); // Cursor for iterating on references content typedef struct { @@ -189,6 +209,12 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) { // ----------------------------------------------------------------------------- // Main entry points +enum VP8LLZ77Type { + kLZ77Standard = 1, + kLZ77RLE = 2, + kLZ77Box = 4 +}; + // Evaluates best possible backward references for specified quality. // The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache // bits to use (passing 0 implies disabling the local color cache). @@ -197,11 +223,12 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) { // refs[0] or refs[1]. VP8LBackwardRefs* VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs[2]); + int low_effort, int lz77_types_to_try, int* const cache_bits, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1, + VP8LBackwardRefs* const refs_tmp2); #ifdef __cplusplus } #endif -#endif // WEBP_ENC_BACKWARD_REFERENCES_H_ +#endif // WEBP_ENC_BACKWARD_REFERENCES_ENC_H_ diff --git a/src/3rdparty/libwebp/src/enc/config_enc.c b/src/3rdparty/libwebp/src/enc/config_enc.c index 4589dc0..9d48289 100644 --- a/src/3rdparty/libwebp/src/enc/config_enc.c +++ b/src/3rdparty/libwebp/src/enc/config_enc.c @@ -12,10 +12,10 @@ // Author: Skal (pascal.massimino@gmail.com) #ifdef HAVE_CONFIG_H -#include "../webp/config.h" +#include "src/webp/config.h" #endif -#include "../webp/encode.h" +#include "src/webp/encode.h" //------------------------------------------------------------------------------ // WebPConfig diff --git a/src/3rdparty/libwebp/src/enc/cost_enc.c b/src/3rdparty/libwebp/src/enc/cost_enc.c index c823f5a..48fd9bc 100644 --- a/src/3rdparty/libwebp/src/enc/cost_enc.c +++ b/src/3rdparty/libwebp/src/enc/cost_enc.c @@ -11,7 +11,7 @@ // // Author: Skal (pascal.massimino@gmail.com) -#include "./cost_enc.h" +#include "src/enc/cost_enc.h" //------------------------------------------------------------------------------ // Level cost tables diff --git a/src/3rdparty/libwebp/src/enc/cost_enc.h b/src/3rdparty/libwebp/src/enc/cost_enc.h index 99e4b37..bdce1e6 100644 --- a/src/3rdparty/libwebp/src/enc/cost_enc.h +++ b/src/3rdparty/libwebp/src/enc/cost_enc.h @@ -11,12 +11,12 @@ // // Author: Skal (pascal.massimino@gmail.com) -#ifndef WEBP_ENC_COST_H_ -#define WEBP_ENC_COST_H_ +#ifndef WEBP_ENC_COST_ENC_H_ +#define WEBP_ENC_COST_ENC_H_ #include <assert.h> #include <stdlib.h> -#include "./vp8i_enc.h" +#include "src/enc/vp8i_enc.h" #ifdef __cplusplus extern "C" { @@ -79,4 +79,4 @@ extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES]; } // extern "C" #endif -#endif /* WEBP_ENC_COST_H_ */ +#endif /* WEBP_ENC_COST_ENC_H_ */ diff --git a/src/3rdparty/libwebp/src/enc/delta_palettization_enc.c b/src/3rdparty/libwebp/src/enc/delta_palettization_enc.c index eaf0f05..a61c8e6 100644 --- a/src/3rdparty/libwebp/src/enc/delta_palettization_enc.c +++ b/src/3rdparty/libwebp/src/enc/delta_palettization_enc.c @@ -10,11 +10,11 @@ // Author: Mislav Bradac (mislavm@google.com) // -#include "./delta_palettization_enc.h" +#include "src/enc/delta_palettization_enc.h" #ifdef WEBP_EXPERIMENTAL_FEATURES -#include "../webp/types.h" -#include "../dsp/lossless.h" +#include "src/webp/types.h" +#include "src/dsp/lossless.h" #define MK_COL(r, g, b) (((r) << 16) + ((g) << 8) + (b)) diff --git a/src/3rdparty/libwebp/src/enc/delta_palettization_enc.h b/src/3rdparty/libwebp/src/enc/delta_palettization_enc.h index 63048ec..b15e2cd 100644 --- a/src/3rdparty/libwebp/src/enc/delta_palettization_enc.h +++ b/src/3rdparty/libwebp/src/enc/delta_palettization_enc.h @@ -10,11 +10,11 @@ // Author: Mislav Bradac (mislavm@google.com) // -#ifndef WEBP_ENC_DELTA_PALETTIZATION_H_ -#define WEBP_ENC_DELTA_PALETTIZATION_H_ +#ifndef WEBP_ENC_DELTA_PALETTIZATION_ENC_H_ +#define WEBP_ENC_DELTA_PALETTIZATION_ENC_H_ -#include "../webp/encode.h" -#include "../enc/vp8li_enc.h" +#include "src/webp/encode.h" +#include "src/enc/vp8li_enc.h" // Replaces enc->argb_[] input by a palettizable approximation of it, // and generates optimal enc->palette_[]. @@ -22,4 +22,4 @@ // if delta-palettization is not producing expected saving. WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc); -#endif // WEBP_ENC_DELTA_PALETTIZATION_H_ +#endif // WEBP_ENC_DELTA_PALETTIZATION_ENC_H_ diff --git a/src/3rdparty/libwebp/src/enc/filter_enc.c b/src/3rdparty/libwebp/src/enc/filter_enc.c index 4bc3672..580800b 100644 --- a/src/3rdparty/libwebp/src/enc/filter_enc.c +++ b/src/3rdparty/libwebp/src/enc/filter_enc.c @@ -12,8 +12,8 @@ // Author: somnath@google.com (Somnath Banerjee) #include <assert.h> -#include "./vp8i_enc.h" -#include "../dsp/dsp.h" +#include "src/enc/vp8i_enc.h" +#include "src/dsp/dsp.h" // This table gives, for a given sharpness, the filtering strength to be // used (at least) in order to filter a given edge step delta. @@ -65,6 +65,8 @@ int VP8FilterStrengthFromDelta(int sharpness, int delta) { //------------------------------------------------------------------------------ // Paragraph 15.4: compute the inner-edge filtering strength +#if !defined(WEBP_REDUCE_SIZE) + static int GetILevel(int sharpness, int level) { if (sharpness > 0) { if (sharpness > 4) { @@ -129,11 +131,14 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) { return sum; } +#endif // !defined(WEBP_REDUCE_SIZE) + //------------------------------------------------------------------------------ // Exposed APIs: Encoder should call the following 3 functions to adjust // loop filter strength void VP8InitFilter(VP8EncIterator* const it) { +#if !defined(WEBP_REDUCE_SIZE) if (it->lf_stats_ != NULL) { int s, i; for (s = 0; s < NUM_MB_SEGMENTS; s++) { @@ -143,9 +148,13 @@ void VP8InitFilter(VP8EncIterator* const it) { } VP8SSIMDspInit(); } +#else + (void)it; +#endif } void VP8StoreFilterStats(VP8EncIterator* const it) { +#if !defined(WEBP_REDUCE_SIZE) int d; VP8Encoder* const enc = it->enc_; const int s = it->mb_->segment_; @@ -177,10 +186,14 @@ void VP8StoreFilterStats(VP8EncIterator* const it) { DoFilter(it, level); (*it->lf_stats_)[s][level] += GetMBSSIM(it->yuv_in_, it->yuv_out2_); } +#else // defined(WEBP_REDUCE_SIZE) + (void)it; +#endif // !defined(WEBP_REDUCE_SIZE) } void VP8AdjustFilterStrength(VP8EncIterator* const it) { VP8Encoder* const enc = it->enc_; +#if !defined(WEBP_REDUCE_SIZE) if (it->lf_stats_ != NULL) { int s; for (s = 0; s < NUM_MB_SEGMENTS; s++) { @@ -196,7 +209,10 @@ void VP8AdjustFilterStrength(VP8EncIterator* const it) { } enc->dqm_[s].fstrength_ = best_level; } - } else if (enc->config_->filter_strength > 0) { + return; + } +#endif // !defined(WEBP_REDUCE_SIZE) + if (enc->config_->filter_strength > 0) { int max_level = 0; int s; for (s = 0; s < NUM_MB_SEGMENTS; s++) { diff --git a/src/3rdparty/libwebp/src/enc/frame_enc.c b/src/3rdparty/libwebp/src/enc/frame_enc.c index abef523..2b0dc66 100644 --- a/src/3rdparty/libwebp/src/enc/frame_enc.c +++ b/src/3rdparty/libwebp/src/enc/frame_enc.c @@ -14,10 +14,10 @@ #include <string.h> #include <math.h> -#include "./cost_enc.h" -#include "./vp8i_enc.h" -#include "../dsp/dsp.h" -#include "../webp/format_constants.h" // RIFF constants +#include "src/enc/cost_enc.h" +#include "src/enc/vp8i_enc.h" +#include "src/dsp/dsp.h" +#include "src/webp/format_constants.h" // RIFF constants #define SEGMENT_VISU 0 #define DEBUG_SEARCH 0 // useful to track search convergence @@ -200,11 +200,13 @@ static void SetSegmentProbas(VP8Encoder* const enc) { const VP8MBInfo* const mb = &enc->mb_info_[n]; p[mb->segment_]++; } +#if !defined(WEBP_DISABLE_STATS) if (enc->pic_->stats != NULL) { for (n = 0; n < NUM_MB_SEGMENTS; ++n) { enc->pic_->stats->segment_size[n] = p[n]; } } +#endif if (enc->segment_hdr_.num_segments_ > 1) { uint8_t* const probas = enc->proba_.segments_; probas[0] = GetProba(p[0] + p[1], p[2] + p[3]); @@ -452,6 +454,8 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd, //------------------------------------------------------------------------------ // ExtraInfo map / Debug function +#if !defined(WEBP_DISABLE_STATS) + #if SEGMENT_VISU static void SetBlock(uint8_t* p, int value, int size) { int y; @@ -516,6 +520,20 @@ static void StoreSideInfo(const VP8EncIterator* const it) { #endif } +#else // defined(WEBP_DISABLE_STATS) +static void ResetSSE(VP8Encoder* const enc) { + (void)enc; +} +static void StoreSideInfo(const VP8EncIterator* const it) { + VP8Encoder* const enc = it->enc_; + WebPPicture* const pic = enc->pic_; + if (pic->extra_info != NULL) { + memset(pic->extra_info, 0, + enc->mb_w_ * enc->mb_h_ * sizeof(*pic->extra_info)); + } +} +#endif // !defined(WEBP_DISABLE_STATS) + static double GetPSNR(uint64_t mse, uint64_t size) { return (mse > 0 && size > 0) ? 10. * log10(255. * 255. * size / mse) : 99; } @@ -640,7 +658,7 @@ static int StatLoop(VP8Encoder* const enc) { // Main loops // -static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 }; +static const uint8_t kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 }; static int PreLoopInitialize(VP8Encoder* const enc) { int p; @@ -670,6 +688,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) { } if (ok) { // All good. Finish up. +#if !defined(WEBP_DISABLE_STATS) if (enc->pic_->stats != NULL) { // finalize byte counters... int i, s; for (i = 0; i <= 2; ++i) { @@ -678,6 +697,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) { } } } +#endif VP8AdjustFilterStrength(it); // ...and store filter stats. } else { // Something bad happened -> need to do some memory cleanup. diff --git a/src/3rdparty/libwebp/src/enc/histogram_enc.c b/src/3rdparty/libwebp/src/enc/histogram_enc.c index 808b6f7..056a972 100644 --- a/src/3rdparty/libwebp/src/enc/histogram_enc.c +++ b/src/3rdparty/libwebp/src/enc/histogram_enc.c @@ -10,16 +10,16 @@ // Author: Jyrki Alakuijala (jyrki@google.com) // #ifdef HAVE_CONFIG_H -#include "../webp/config.h" +#include "src/webp/config.h" #endif #include <math.h> -#include "./backward_references_enc.h" -#include "./histogram_enc.h" -#include "../dsp/lossless.h" -#include "../dsp/lossless_common.h" -#include "../utils/utils.h" +#include "src/enc/backward_references_enc.h" +#include "src/enc/histogram_enc.h" +#include "src/dsp/lossless.h" +#include "src/dsp/lossless_common.h" +#include "src/utils/utils.h" #define MAX_COST 1.e38 @@ -76,7 +76,7 @@ void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs, VP8LHistogram* const histo) { VP8LRefsCursor c = VP8LRefsCursorInit(refs); while (VP8LRefsCursorOk(&c)) { - VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos); + VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos, NULL, 0); VP8LRefsCursorNext(&c); } } @@ -138,7 +138,9 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) { // ----------------------------------------------------------------------------- void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, - const PixOrCopy* const v) { + const PixOrCopy* const v, + int (*const distance_modifier)(int, int), + int distance_modifier_arg0) { if (PixOrCopyIsLiteral(v)) { ++histo->alpha_[PixOrCopyLiteral(v, 3)]; ++histo->red_[PixOrCopyLiteral(v, 2)]; @@ -152,7 +154,13 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, int code, extra_bits; VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits); ++histo->literal_[NUM_LITERAL_CODES + code]; - VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits); + if (distance_modifier == NULL) { + VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits); + } else { + VP8LPrefixEncodeBits( + distance_modifier(distance_modifier_arg0, PixOrCopyDistance(v)), + &code, &extra_bits); + } ++histo->distance_[code]; } } @@ -473,7 +481,7 @@ static void HistogramBuild( while (VP8LRefsCursorOk(&c)) { const PixOrCopy* const v = c.cur_pos; const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits); - VP8LHistogramAddSinglePixOrCopy(histograms[ix], v); + VP8LHistogramAddSinglePixOrCopy(histograms[ix], v, NULL, 0); x += PixOrCopyLength(v); while (x >= xsize) { x -= xsize; @@ -523,11 +531,12 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, // Compact image_histo[] by merging some histograms with same bin_id together if // it's advantageous. -static VP8LHistogram* HistogramCombineEntropyBin( - VP8LHistogramSet* const image_histo, - VP8LHistogram* cur_combo, - const uint16_t* const bin_map, int bin_map_size, int num_bins, - double combine_cost_factor, int low_effort) { +static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo, + VP8LHistogram* cur_combo, + const uint16_t* const bin_map, + int bin_map_size, int num_bins, + double combine_cost_factor, + int low_effort) { VP8LHistogram** const histograms = image_histo->histograms; int idx; // Work in-place: processed histograms are put at the beginning of @@ -593,14 +602,13 @@ static VP8LHistogram* HistogramCombineEntropyBin( UpdateHistogramCost(histograms[idx]); } } - return cur_combo; } +// Implement a Lehmer random number generator with a multiplicative constant of +// 48271 and a modulo constant of 2^31 − 1. static uint32_t MyRand(uint32_t* const seed) { - *seed = (*seed * 16807ull) & 0xffffffffu; - if (*seed == 0) { - *seed = 1; - } + *seed = (uint32_t)(((uint64_t)(*seed) * 48271u) % 2147483647u); + assert(*seed > 0); return *seed; } @@ -641,57 +649,75 @@ static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) { static void HistoQueueClear(HistoQueue* const histo_queue) { assert(histo_queue != NULL); WebPSafeFree(histo_queue->queue); + histo_queue->size = 0; + histo_queue->max_size = 0; } -static void SwapHistogramPairs(HistogramPair *p1, - HistogramPair *p2) { - const HistogramPair tmp = *p1; - *p1 = *p2; - *p2 = tmp; +// Pop a specific pair in the queue by replacing it with the last one +// and shrinking the queue. +static void HistoQueuePopPair(HistoQueue* const histo_queue, + HistogramPair* const pair) { + assert(pair >= histo_queue->queue && + pair < (histo_queue->queue + histo_queue->size)); + assert(histo_queue->size > 0); + *pair = histo_queue->queue[histo_queue->size - 1]; + --histo_queue->size; } -// Given a valid priority queue in range [0, queue_size) this function checks -// whether histo_queue[queue_size] should be accepted and swaps it with the -// front if it is smaller. Otherwise, it leaves it as is. -static void UpdateQueueFront(HistoQueue* const histo_queue) { - if (histo_queue->queue[histo_queue->size].cost_diff >= 0) return; - - if (histo_queue->queue[histo_queue->size].cost_diff < - histo_queue->queue[0].cost_diff) { - SwapHistogramPairs(histo_queue->queue, - histo_queue->queue + histo_queue->size); +// Check whether a pair in the queue should be updated as head or not. +static void HistoQueueUpdateHead(HistoQueue* const histo_queue, + HistogramPair* const pair) { + assert(pair->cost_diff < 0.); + assert(pair >= histo_queue->queue && + pair < (histo_queue->queue + histo_queue->size)); + assert(histo_queue->size > 0); + if (pair->cost_diff < histo_queue->queue[0].cost_diff) { + // Replace the best pair. + const HistogramPair tmp = histo_queue->queue[0]; + histo_queue->queue[0] = *pair; + *pair = tmp; } - ++histo_queue->size; - - // We cannot add more elements than the capacity. - // The allocation adds an extra element to the official capacity so that - // histo_queue->queue[histo_queue->max_size] is read/written within bound. - assert(histo_queue->size <= histo_queue->max_size); } -// ----------------------------------------------------------------------------- - -static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2, - HistogramPair* const pair) { - VP8LHistogram* h1; - VP8LHistogram* h2; +// Create a pair from indices "idx1" and "idx2" provided its cost +// is inferior to "threshold", a negative entropy. +// It returns the cost of the pair, or 0. if it superior to threshold. +static double HistoQueuePush(HistoQueue* const histo_queue, + VP8LHistogram** const histograms, int idx1, + int idx2, double threshold) { + const VP8LHistogram* h1; + const VP8LHistogram* h2; + HistogramPair pair; double sum_cost; + assert(threshold <= 0.); if (idx1 > idx2) { const int tmp = idx2; idx2 = idx1; idx1 = tmp; } - pair->idx1 = idx1; - pair->idx2 = idx2; + pair.idx1 = idx1; + pair.idx2 = idx2; h1 = histograms[idx1]; h2 = histograms[idx2]; sum_cost = h1->bit_cost_ + h2->bit_cost_; - pair->cost_combo = 0.; - GetCombinedHistogramEntropy(h1, h2, sum_cost, &pair->cost_combo); - pair->cost_diff = pair->cost_combo - sum_cost; + pair.cost_combo = 0.; + GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair.cost_combo); + pair.cost_diff = pair.cost_combo - sum_cost; + + // Do not even consider the pair if it does not improve the entropy. + if (pair.cost_diff >= threshold) return 0.; + + // We cannot add more elements than the capacity. + assert(histo_queue->size < histo_queue->max_size); + histo_queue->queue[histo_queue->size++] = pair; + HistoQueueUpdateHead(histo_queue, &histo_queue->queue[histo_queue->size - 1]); + + return pair.cost_diff; } +// ----------------------------------------------------------------------------- + // Combines histograms by continuously choosing the one with the highest cost // reduction. static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) { @@ -714,13 +740,11 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) { clusters[i] = i; for (j = i + 1; j < image_histo_size; ++j) { // Initialize positions array. - PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size]); - UpdateQueueFront(&histo_queue); + HistoQueuePush(&histo_queue, histograms, i, j, 0.); } } while (image_histo_size > 1 && histo_queue.size > 0) { - HistogramPair* copy_to; const int idx1 = histo_queue.queue[0].idx1; const int idx2 = histo_queue.queue[0].idx2; HistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]); @@ -733,31 +757,22 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) { } --image_histo_size; - // Remove pairs intersecting the just combined best pair. This will - // therefore pop the head of the queue. - copy_to = histo_queue.queue; - for (i = 0; i < histo_queue.size; ++i) { + // Remove pairs intersecting the just combined best pair. + for (i = 0; i < histo_queue.size;) { HistogramPair* const p = histo_queue.queue + i; if (p->idx1 == idx1 || p->idx2 == idx1 || p->idx1 == idx2 || p->idx2 == idx2) { - // Do not copy the invalid pair. - continue; - } - if (p->cost_diff < histo_queue.queue[0].cost_diff) { - // Replace the top of the queue if we found better. - SwapHistogramPairs(histo_queue.queue, p); + HistoQueuePopPair(&histo_queue, p); + } else { + HistoQueueUpdateHead(&histo_queue, p); + ++i; } - SwapHistogramPairs(copy_to, p); - ++copy_to; } - histo_queue.size = (int)(copy_to - histo_queue.queue); // Push new pairs formed with combined histogram to the queue. for (i = 0; i < image_histo_size; ++i) { if (clusters[i] != idx1) { - PreparePair(histograms, idx1, clusters[i], - &histo_queue.queue[histo_queue.size]); - UpdateQueueFront(&histo_queue); + HistoQueuePush(&histo_queue, histograms, idx1, clusters[i], 0.); } } } @@ -777,90 +792,130 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) { return ok; } -static void HistogramCombineStochastic(VP8LHistogramSet* const image_histo, - VP8LHistogram* tmp_histo, - VP8LHistogram* best_combo, - int quality, int min_cluster_size) { +// Perform histogram aggregation using a stochastic approach. +// 'do_greedy' is set to 1 if a greedy approach needs to be performed +// afterwards, 0 otherwise. +static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, + int min_cluster_size, + int* const do_greedy) { int iter; - uint32_t seed = 0; + uint32_t seed = 1; int tries_with_no_success = 0; int image_histo_size = image_histo->size; - const int iter_mult = (quality < 25) ? 2 : 2 + (quality - 25) / 8; - const int outer_iters = image_histo_size * iter_mult; - const int num_pairs = image_histo_size / 2; + const int outer_iters = image_histo_size; const int num_tries_no_success = outer_iters / 2; - int idx2_max = image_histo_size - 1; - int do_brute_dorce = 0; VP8LHistogram** const histograms = image_histo->histograms; + // Priority queue of histogram pairs. Its size of "kCostHeapSizeSqrt"^2 + // impacts the quality of the compression and the speed: the smaller the + // faster but the worse for the compression. + HistoQueue histo_queue; + const int kHistoQueueSizeSqrt = 3; + int ok = 0; + if (!HistoQueueInit(&histo_queue, kHistoQueueSizeSqrt)) { + goto End; + } // Collapse similar histograms in 'image_histo'. ++min_cluster_size; - for (iter = 0; - iter < outer_iters && image_histo_size >= min_cluster_size; + for (iter = 0; iter < outer_iters && image_histo_size >= min_cluster_size && + ++tries_with_no_success < num_tries_no_success; ++iter) { - double best_cost_diff = 0.; + double best_cost = + (histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff; int best_idx1 = -1, best_idx2 = 1; int j; - int num_tries = - (num_pairs < image_histo_size) ? num_pairs : image_histo_size; - // Use a brute force approach if: - // - stochastic has not worked for a while and - // - if the number of iterations for brute force is less than the number of - // iterations if we never find a match ever again stochastically (hence - // num_tries times the number of remaining outer iterations). - do_brute_dorce = - (tries_with_no_success > 10) && - (idx2_max * (idx2_max + 1) < 2 * num_tries * (outer_iters - iter)); - if (do_brute_dorce) num_tries = idx2_max; - - seed += iter; - for (j = 0; j < num_tries; ++j) { - double curr_cost_diff; - // Choose two histograms at random and try to combine them. - uint32_t idx1, idx2; - if (do_brute_dorce) { - // Use a brute force approach. - idx1 = (uint32_t)j; - idx2 = (uint32_t)idx2_max; - } else { - const uint32_t tmp = (j & 7) + 1; - const uint32_t diff = - (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1); - idx1 = MyRand(&seed) % image_histo_size; - idx2 = (idx1 + diff + 1) % image_histo_size; - if (idx1 == idx2) { - continue; - } - } + const uint32_t rand_range = (image_histo_size - 1) * image_histo_size; + // image_histo_size / 2 was chosen empirically. Less means faster but worse + // compression. + const int num_tries = image_histo_size / 2; - // Calculate cost reduction on combining. - curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2], - tmp_histo, best_cost_diff); - if (curr_cost_diff < best_cost_diff) { // found a better pair? - HistogramSwap(&best_combo, &tmp_histo); - best_cost_diff = curr_cost_diff; - best_idx1 = idx1; - best_idx2 = idx2; + for (j = 0; j < num_tries; ++j) { + double curr_cost; + // Choose two different histograms at random and try to combine them. + const uint32_t tmp = MyRand(&seed) % rand_range; + const uint32_t idx1 = tmp / (image_histo_size - 1); + uint32_t idx2 = tmp % (image_histo_size - 1); + if (idx2 >= idx1) ++idx2; + + // Calculate cost reduction on combination. + curr_cost = + HistoQueuePush(&histo_queue, histograms, idx1, idx2, best_cost); + if (curr_cost < 0) { // found a better pair? + best_cost = curr_cost; + // Empty the queue if we reached full capacity. + if (histo_queue.size == histo_queue.max_size) break; } } - if (do_brute_dorce) --idx2_max; - - if (best_idx1 >= 0) { - HistogramSwap(&best_combo, &histograms[best_idx1]); - // swap best_idx2 slot with last one (which is now unused) - --image_histo_size; - if (idx2_max >= image_histo_size) idx2_max = image_histo_size - 1; - if (best_idx2 != image_histo_size) { - HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]); - histograms[image_histo_size] = NULL; - } - tries_with_no_success = 0; + if (histo_queue.size == 0) continue; + + // Merge the two best histograms. + best_idx1 = histo_queue.queue[0].idx1; + best_idx2 = histo_queue.queue[0].idx2; + assert(best_idx1 < best_idx2); + HistogramAddEval(histograms[best_idx1], histograms[best_idx2], + histograms[best_idx1], 0); + // Swap the best_idx2 histogram with the last one (which is now unused). + --image_histo_size; + if (best_idx2 != image_histo_size) { + HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]); } - if (++tries_with_no_success >= num_tries_no_success || idx2_max == 0) { - break; + histograms[image_histo_size] = NULL; + // Parse the queue and update each pair that deals with best_idx1, + // best_idx2 or image_histo_size. + for (j = 0; j < histo_queue.size;) { + HistogramPair* const p = histo_queue.queue + j; + const int is_idx1_best = p->idx1 == best_idx1 || p->idx1 == best_idx2; + const int is_idx2_best = p->idx2 == best_idx1 || p->idx2 == best_idx2; + int do_eval = 0; + // The front pair could have been duplicated by a random pick so + // check for it all the time nevertheless. + if (is_idx1_best && is_idx2_best) { + HistoQueuePopPair(&histo_queue, p); + continue; + } + // Any pair containing one of the two best indices should only refer to + // best_idx1. Its cost should also be updated. + if (is_idx1_best) { + p->idx1 = best_idx1; + do_eval = 1; + } else if (is_idx2_best) { + p->idx2 = best_idx1; + do_eval = 1; + } + if (p->idx2 == image_histo_size) { + // No need to re-evaluate here as it does not involve a pair + // containing best_idx1 or best_idx2. + p->idx2 = best_idx2; + } + assert(p->idx2 < image_histo_size); + // Make sure the index order is respected. + if (p->idx1 > p->idx2) { + const int tmp = p->idx2; + p->idx2 = p->idx1; + p->idx1 = tmp; + } + if (do_eval) { + // Re-evaluate the cost of an updated pair. + GetCombinedHistogramEntropy(histograms[p->idx1], histograms[p->idx2], 0, + &p->cost_diff); + if (p->cost_diff >= 0.) { + HistoQueuePopPair(&histo_queue, p); + continue; + } + } + HistoQueueUpdateHead(&histo_queue, p); + ++j; } + + tries_with_no_success = 0; } image_histo->size = image_histo_size; + *do_greedy = (image_histo->size <= min_cluster_size); + ok = 1; + +End: + HistoQueueClear(&histo_queue); + return ok; } // ----------------------------------------------------------------------------- @@ -925,7 +980,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, int quality, int low_effort, int histo_bits, int cache_bits, VP8LHistogramSet* const image_histo, - VP8LHistogramSet* const tmp_histos, + VP8LHistogram* const tmp_histo, uint16_t* const histogram_symbols) { int ok = 0; const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; @@ -933,7 +988,6 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, const int image_histo_raw_size = histo_xsize * histo_ysize; VP8LHistogramSet* const orig_histo = VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits); - VP8LHistogram* cur_combo; // Don't attempt linear bin-partition heuristic for // histograms of small sizes (as bin_map will be very sparse) and // maximum quality q==100 (to preserve the compression gains at that level). @@ -948,7 +1002,6 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, // Copies the histograms and computes its bit_cost. HistogramCopyAndAnalyze(orig_histo, image_histo); - cur_combo = tmp_histos->histograms[1]; // pick up working slot if (entropy_combine) { const int bin_map_size = orig_histo->size; // Reuse histogram_symbols storage. By definition, it's guaranteed to be ok. @@ -958,10 +1011,9 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort); // Collapse histograms with similar entropy. - cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, - bin_map, bin_map_size, - entropy_combine_num_bins, - combine_cost_factor, low_effort); + HistogramCombineEntropyBin(image_histo, tmp_histo, bin_map, bin_map_size, + entropy_combine_num_bins, combine_cost_factor, + low_effort); } // Don't combine the histograms using stochastic and greedy heuristics for @@ -970,10 +1022,11 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, const float x = quality / 100.f; // cubic ramp between 1 and MAX_HISTO_GREEDY: const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1)); - HistogramCombineStochastic(image_histo, tmp_histos->histograms[0], - cur_combo, quality, threshold_size); - if ((image_histo->size <= threshold_size) && - !HistogramCombineGreedy(image_histo)) { + int do_greedy; + if (!HistogramCombineStochastic(image_histo, threshold_size, &do_greedy)) { + goto Error; + } + if (do_greedy && !HistogramCombineGreedy(image_histo)) { goto Error; } } diff --git a/src/3rdparty/libwebp/src/enc/histogram_enc.h b/src/3rdparty/libwebp/src/enc/histogram_enc.h index a9d258a..15b1fbd 100644 --- a/src/3rdparty/libwebp/src/enc/histogram_enc.h +++ b/src/3rdparty/libwebp/src/enc/histogram_enc.h @@ -11,14 +11,14 @@ // // Models the histograms of literal and distance codes. -#ifndef WEBP_ENC_HISTOGRAM_H_ -#define WEBP_ENC_HISTOGRAM_H_ +#ifndef WEBP_ENC_HISTOGRAM_ENC_H_ +#define WEBP_ENC_HISTOGRAM_ENC_H_ #include <string.h> -#include "./backward_references_enc.h" -#include "../webp/format_constants.h" -#include "../webp/types.h" +#include "src/enc/backward_references_enc.h" +#include "src/webp/format_constants.h" +#include "src/webp/types.h" #ifdef __cplusplus extern "C" { @@ -90,7 +90,9 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits); // Accumulate a token 'v' into a histogram. void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, - const PixOrCopy* const v); + const PixOrCopy* const v, + int (*const distance_modifier)(int, int), + int distance_modifier_arg0); static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { return NUM_LITERAL_CODES + NUM_LENGTH_CODES + @@ -103,7 +105,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, int quality, int low_effort, int histogram_bits, int cache_bits, VP8LHistogramSet* const image_in, - VP8LHistogramSet* const tmp_histos, + VP8LHistogram* const tmp_histo, uint16_t* const histogram_symbols); // Returns the entropy for the symbols in the input array. @@ -120,4 +122,4 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p); } #endif -#endif // WEBP_ENC_HISTOGRAM_H_ +#endif // WEBP_ENC_HISTOGRAM_ENC_H_ diff --git a/src/3rdparty/libwebp/src/enc/iterator_enc.c b/src/3rdparty/libwebp/src/enc/iterator_enc.c index e48d30b..cfacfd2 100644 --- a/src/3rdparty/libwebp/src/enc/iterator_enc.c +++ b/src/3rdparty/libwebp/src/enc/iterator_enc.c @@ -13,7 +13,7 @@ #include <string.h> -#include "./vp8i_enc.h" +#include "src/enc/vp8i_enc.h" //------------------------------------------------------------------------------ // VP8Iterator diff --git a/src/3rdparty/libwebp/src/enc/near_lossless_enc.c b/src/3rdparty/libwebp/src/enc/near_lossless_enc.c index 2bd03ab..cadd14c 100644 --- a/src/3rdparty/libwebp/src/enc/near_lossless_enc.c +++ b/src/3rdparty/libwebp/src/enc/near_lossless_enc.c @@ -17,18 +17,20 @@ #include <assert.h> #include <stdlib.h> -#include "../dsp/lossless_common.h" -#include "../utils/utils.h" -#include "./vp8i_enc.h" +#include "src/dsp/lossless_common.h" +#include "src/utils/utils.h" +#include "src/enc/vp8li_enc.h" + +#if (WEBP_NEAR_LOSSLESS == 1) #define MIN_DIM_FOR_NEAR_LOSSLESS 64 #define MAX_LIMIT_BITS 5 // Quantizes the value up or down to a multiple of 1<<bits (or to 255), // choosing the closer one, resolving ties using bankers' rounding. -static int FindClosestDiscretized(int a, int bits) { - const int mask = (1 << bits) - 1; - const int biased = a + (mask >> 1) + ((a >> bits) & 1); +static uint32_t FindClosestDiscretized(uint32_t a, int bits) { + const uint32_t mask = (1u << bits) - 1; + const uint32_t biased = a + (mask >> 1) + ((a >> bits) & 1); assert(bits > 0); if (biased > 0xff) return 0xff; return biased & ~mask; @@ -69,22 +71,30 @@ static int IsSmooth(const uint32_t* const prev_row, } // Adjusts pixel values of image with given maximum error. -static void NearLossless(int xsize, int ysize, uint32_t* argb, - int limit_bits, uint32_t* copy_buffer) { +static void NearLossless(int xsize, int ysize, const uint32_t* argb_src, + int stride, int limit_bits, uint32_t* copy_buffer, + uint32_t* argb_dst) { int x, y; const int limit = 1 << limit_bits; uint32_t* prev_row = copy_buffer; uint32_t* curr_row = prev_row + xsize; uint32_t* next_row = curr_row + xsize; - memcpy(copy_buffer, argb, xsize * 2 * sizeof(argb[0])); + memcpy(curr_row, argb_src, xsize * sizeof(argb_src[0])); + memcpy(next_row, argb_src + stride, xsize * sizeof(argb_src[0])); - for (y = 1; y < ysize - 1; ++y) { - uint32_t* const curr_argb_row = argb + y * xsize; - uint32_t* const next_argb_row = curr_argb_row + xsize; - memcpy(next_row, next_argb_row, xsize * sizeof(argb[0])); - for (x = 1; x < xsize - 1; ++x) { - if (!IsSmooth(prev_row, curr_row, next_row, x, limit)) { - curr_argb_row[x] = ClosestDiscretizedArgb(curr_row[x], limit_bits); + for (y = 0; y < ysize; ++y, argb_src += stride, argb_dst += xsize) { + if (y == 0 || y == ysize - 1) { + memcpy(argb_dst, argb_src, xsize * sizeof(argb_src[0])); + } else { + memcpy(next_row, argb_src + stride, xsize * sizeof(argb_src[0])); + argb_dst[0] = argb_src[0]; + argb_dst[xsize - 1] = argb_src[xsize - 1]; + for (x = 1; x < xsize - 1; ++x) { + if (IsSmooth(prev_row, curr_row, next_row, x, limit)) { + argb_dst[x] = curr_row[x]; + } else { + argb_dst[x] = ClosestDiscretizedArgb(curr_row[x], limit_bits); + } } } { @@ -97,26 +107,45 @@ static void NearLossless(int xsize, int ysize, uint32_t* argb, } } -int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality) { +int VP8ApplyNearLossless(const WebPPicture* const picture, int quality, + uint32_t* const argb_dst) { int i; + const int xsize = picture->width; + const int ysize = picture->height; + const int stride = picture->argb_stride; uint32_t* const copy_buffer = (uint32_t*)WebPSafeMalloc(xsize * 3, sizeof(*copy_buffer)); const int limit_bits = VP8LNearLosslessBits(quality); - assert(argb != NULL); - assert(limit_bits >= 0); + assert(argb_dst != NULL); + assert(limit_bits > 0); assert(limit_bits <= MAX_LIMIT_BITS); if (copy_buffer == NULL) { return 0; } // For small icon images, don't attempt to apply near-lossless compression. - if (xsize < MIN_DIM_FOR_NEAR_LOSSLESS && ysize < MIN_DIM_FOR_NEAR_LOSSLESS) { + if ((xsize < MIN_DIM_FOR_NEAR_LOSSLESS && + ysize < MIN_DIM_FOR_NEAR_LOSSLESS) || + ysize < 3) { + for (i = 0; i < ysize; ++i) { + memcpy(argb_dst + i * xsize, picture->argb + i * picture->argb_stride, + xsize * sizeof(*argb_dst)); + } WebPSafeFree(copy_buffer); return 1; } - for (i = limit_bits; i != 0; --i) { - NearLossless(xsize, ysize, argb, i, copy_buffer); + NearLossless(xsize, ysize, picture->argb, stride, limit_bits, copy_buffer, + argb_dst); + for (i = limit_bits - 1; i != 0; --i) { + NearLossless(xsize, ysize, argb_dst, xsize, i, copy_buffer, argb_dst); } WebPSafeFree(copy_buffer); return 1; } +#else // (WEBP_NEAR_LOSSLESS == 1) + +// Define a stub to suppress compiler warnings. +extern void VP8LNearLosslessStub(void); +WEBP_TSAN_IGNORE_FUNCTION void VP8LNearLosslessStub(void) {} + +#endif // (WEBP_NEAR_LOSSLESS == 1) diff --git a/src/3rdparty/libwebp/src/enc/picture_csp_enc.c b/src/3rdparty/libwebp/src/enc/picture_csp_enc.c index e5d1c75..d531dd0 100644 --- a/src/3rdparty/libwebp/src/enc/picture_csp_enc.c +++ b/src/3rdparty/libwebp/src/enc/picture_csp_enc.c @@ -15,10 +15,12 @@ #include <stdlib.h> #include <math.h> -#include "./vp8i_enc.h" -#include "../utils/random_utils.h" -#include "../utils/utils.h" -#include "../dsp/yuv.h" +#include "src/enc/vp8i_enc.h" +#include "src/utils/random_utils.h" +#include "src/utils/utils.h" +#include "src/dsp/dsp.h" +#include "src/dsp/lossless.h" +#include "src/dsp/yuv.h" // Uncomment to disable gamma-compression during RGB->U/V averaging #define USE_GAMMA_COMPRESSION @@ -39,12 +41,15 @@ static const union { static int CheckNonOpaque(const uint8_t* alpha, int width, int height, int x_step, int y_step) { if (alpha == NULL) return 0; - while (height-- > 0) { - int x; - for (x = 0; x < width * x_step; x += x_step) { - if (alpha[x] != 0xff) return 1; // TODO(skal): check 4/8 bytes at a time. + WebPInitAlphaProcessing(); + if (x_step == 1) { + for (; height-- > 0; alpha += y_step) { + if (WebPHasAlpha8b(alpha, width)) return 1; + } + } else { + for (; height-- > 0; alpha += y_step) { + if (WebPHasAlpha32b(alpha, width)) return 1; } - alpha += y_step; } return 0; } @@ -56,15 +61,10 @@ int WebPPictureHasTransparency(const WebPPicture* picture) { return CheckNonOpaque(picture->a, picture->width, picture->height, 1, picture->a_stride); } else { - int x, y; - const uint32_t* argb = picture->argb; - if (argb == NULL) return 0; - for (y = 0; y < picture->height; ++y) { - for (x = 0; x < picture->width; ++x) { - if (argb[x] < 0xff000000u) return 1; // test any alpha values != 0xff - } - argb += picture->argb_stride; - } + const int alpha_offset = ALPHA_IS_LAST ? 3 : 0; + return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset, + picture->width, picture->height, + 4, picture->argb_stride * sizeof(*picture->argb)); } return 0; } @@ -171,7 +171,7 @@ typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W #if defined(USE_GAMMA_COMPRESSION) // float variant of gamma-correction -// We use tables of different size and precision for the Rec709 +// We use tables of different size and precision for the Rec709 / BT2020 // transfer function. #define kGammaF (1./0.45) static float kGammaToLinearTabF[MAX_Y_T + 1]; // size scales with Y_FIX @@ -183,8 +183,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) { int v; const double norm = 1. / MAX_Y_T; const double scale = 1. / kGammaTabSize; - const double a = 0.099; - const double thresh = 0.018; + const double a = 0.09929682680944; + const double thresh = 0.018053968510807; for (v = 0; v <= MAX_Y_T; ++v) { const double g = norm * v; if (g <= thresh * 4.5) { @@ -856,7 +856,6 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr, return 0; } if (has_alpha) { - WebPInitAlphaProcessing(); assert(step == 4); #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE) assert(kAlphaFix + kGammaFix <= 31); @@ -1085,40 +1084,45 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) { // automatic import / conversion static int Import(WebPPicture* const picture, - const uint8_t* const rgb, int rgb_stride, + const uint8_t* rgb, int rgb_stride, int step, int swap_rb, int import_alpha) { int y; const uint8_t* r_ptr = rgb + (swap_rb ? 2 : 0); const uint8_t* g_ptr = rgb + 1; const uint8_t* b_ptr = rgb + (swap_rb ? 0 : 2); - const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL; const int width = picture->width; const int height = picture->height; if (!picture->use_argb) { + const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL; return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, 0.f /* no dithering */, 0, picture); } if (!WebPPictureAlloc(picture)) return 0; - VP8EncDspARGBInit(); + VP8LDspInit(); + WebPInitAlphaProcessing(); if (import_alpha) { uint32_t* dst = picture->argb; + const int do_copy = + (!swap_rb && !ALPHA_IS_LAST) || (swap_rb && ALPHA_IS_LAST); assert(step == 4); for (y = 0; y < height; ++y) { - VP8PackARGB(a_ptr, r_ptr, g_ptr, b_ptr, width, dst); - a_ptr += rgb_stride; - r_ptr += rgb_stride; - g_ptr += rgb_stride; - b_ptr += rgb_stride; + if (do_copy) { + memcpy(dst, rgb, width * 4); + } else { + // RGBA input order. Need to swap R and B. + VP8LConvertBGRAToRGBA((const uint32_t*)rgb, width, (uint8_t*)dst); + } + rgb += rgb_stride; dst += picture->argb_stride; } } else { uint32_t* dst = picture->argb; assert(step >= 3); for (y = 0; y < height; ++y) { - VP8PackRGB(r_ptr, g_ptr, b_ptr, width, step, dst); + WebPPackRGB(r_ptr, g_ptr, b_ptr, width, step, dst); r_ptr += rgb_stride; g_ptr += rgb_stride; b_ptr += rgb_stride; @@ -1130,12 +1134,7 @@ static int Import(WebPPicture* const picture, // Public API -int WebPPictureImportRGB(WebPPicture* picture, - const uint8_t* rgb, int rgb_stride) { - return (picture != NULL && rgb != NULL) - ? Import(picture, rgb, rgb_stride, 3, 0, 0) - : 0; -} +#if !defined(WEBP_REDUCE_CSP) int WebPPictureImportBGR(WebPPicture* picture, const uint8_t* rgb, int rgb_stride) { @@ -1144,31 +1143,41 @@ int WebPPictureImportBGR(WebPPicture* picture, : 0; } -int WebPPictureImportRGBA(WebPPicture* picture, +int WebPPictureImportBGRA(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { return (picture != NULL && rgba != NULL) - ? Import(picture, rgba, rgba_stride, 4, 0, 1) + ? Import(picture, rgba, rgba_stride, 4, 1, 1) : 0; } -int WebPPictureImportBGRA(WebPPicture* picture, + +int WebPPictureImportBGRX(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { return (picture != NULL && rgba != NULL) - ? Import(picture, rgba, rgba_stride, 4, 1, 1) + ? Import(picture, rgba, rgba_stride, 4, 1, 0) : 0; } -int WebPPictureImportRGBX(WebPPicture* picture, +#endif // WEBP_REDUCE_CSP + +int WebPPictureImportRGB(WebPPicture* picture, + const uint8_t* rgb, int rgb_stride) { + return (picture != NULL && rgb != NULL) + ? Import(picture, rgb, rgb_stride, 3, 0, 0) + : 0; +} + +int WebPPictureImportRGBA(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { return (picture != NULL && rgba != NULL) - ? Import(picture, rgba, rgba_stride, 4, 0, 0) + ? Import(picture, rgba, rgba_stride, 4, 0, 1) : 0; } -int WebPPictureImportBGRX(WebPPicture* picture, +int WebPPictureImportRGBX(WebPPicture* picture, const uint8_t* rgba, int rgba_stride) { return (picture != NULL && rgba != NULL) - ? Import(picture, rgba, rgba_stride, 4, 1, 0) + ? Import(picture, rgba, rgba_stride, 4, 0, 0) : 0; } diff --git a/src/3rdparty/libwebp/src/enc/picture_enc.c b/src/3rdparty/libwebp/src/enc/picture_enc.c index dfa6651..c691622 100644 --- a/src/3rdparty/libwebp/src/enc/picture_enc.c +++ b/src/3rdparty/libwebp/src/enc/picture_enc.c @@ -14,9 +14,9 @@ #include <assert.h> #include <stdlib.h> -#include "./vp8i_enc.h" -#include "../dsp/dsp.h" -#include "../utils/utils.h" +#include "src/enc/vp8i_enc.h" +#include "src/dsp/dsp.h" +#include "src/utils/utils.h" //------------------------------------------------------------------------------ // WebPPicture @@ -76,13 +76,12 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) { return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } // allocate a new buffer. - memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb)); + memory = WebPSafeMalloc(argb_size + WEBP_ALIGN_CST, sizeof(*picture->argb)); if (memory == NULL) { return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } - // TODO(skal): align plane to cache line? picture->memory_argb_ = memory; - picture->argb = (uint32_t*)memory; + picture->argb = (uint32_t*)WEBP_ALIGN(memory); picture->argb_stride = width; return 1; } @@ -92,8 +91,8 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { (WebPEncCSP)((int)picture->colorspace & WEBP_CSP_UV_MASK); const int has_alpha = (int)picture->colorspace & WEBP_CSP_ALPHA_BIT; const int y_stride = width; - const int uv_width = (width + 1) >> 1; - const int uv_height = (height + 1) >> 1; + const int uv_width = (int)(((int64_t)width + 1) >> 1); + const int uv_height = (int)(((int64_t)height + 1) >> 1); const int uv_stride = uv_width; int a_width, a_stride; uint64_t y_size, uv_size, a_size, total_size; @@ -118,8 +117,8 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { total_size = y_size + a_size + 2 * uv_size; // Security and validation checks - if (width <= 0 || height <= 0 || // luma/alpha param error - uv_width < 0 || uv_height < 0) { // u/v param error + if (width <= 0 || height <= 0 || // luma/alpha param error + uv_width <= 0 || uv_height <= 0) { // u/v param error return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } // allocate a new buffer. @@ -271,9 +270,11 @@ size_t NAME(const uint8_t* in, int w, int h, int bps, float q, \ } ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB) -ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR) ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA) +#if !defined(WEBP_REDUCE_CSP) +ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR) ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA) +#endif // WEBP_REDUCE_CSP #undef ENCODE_FUNC @@ -284,9 +285,11 @@ size_t NAME(const uint8_t* in, int w, int h, int bps, uint8_t** out) { \ } LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB) -LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR) LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA) +#if !defined(WEBP_REDUCE_CSP) +LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR) LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA) +#endif // WEBP_REDUCE_CSP #undef LOSSLESS_ENCODE_FUNC diff --git a/src/3rdparty/libwebp/src/enc/picture_psnr_enc.c b/src/3rdparty/libwebp/src/enc/picture_psnr_enc.c index 9c0b229..362a7c7 100644 --- a/src/3rdparty/libwebp/src/enc/picture_psnr_enc.c +++ b/src/3rdparty/libwebp/src/enc/picture_psnr_enc.c @@ -11,11 +11,15 @@ // // Author: Skal (pascal.massimino@gmail.com) +#include "src/webp/encode.h" + +#if !(defined(WEBP_DISABLE_STATS) || defined(WEBP_REDUCE_SIZE)) + #include <math.h> #include <stdlib.h> -#include "./vp8i_enc.h" -#include "../utils/utils.h" +#include "src/enc/vp8i_enc.h" +#include "src/utils/utils.h" typedef double (*AccumulateFunc)(const uint8_t* src, int src_stride, const uint8_t* ref, int ref_stride, @@ -210,4 +214,34 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, return ok; } -//------------------------------------------------------------------------------ +#else // defined(WEBP_DISABLE_STATS) +int WebPPlaneDistortion(const uint8_t* src, size_t src_stride, + const uint8_t* ref, size_t ref_stride, + int width, int height, size_t x_step, + int type, float* distortion, float* result) { + (void)src; + (void)src_stride; + (void)ref; + (void)ref_stride; + (void)width; + (void)height; + (void)x_step; + (void)type; + if (distortion == NULL || result == NULL) return 0; + *distortion = 0.f; + *result = 0.f; + return 1; +} + +int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, + int type, float results[5]) { + int i; + (void)src; + (void)ref; + (void)type; + if (results == NULL) return 0; + for (i = 0; i < 5; ++i) results[i] = 0.f; + return 1; +} + +#endif // !defined(WEBP_DISABLE_STATS) diff --git a/src/3rdparty/libwebp/src/enc/picture_rescale_enc.c b/src/3rdparty/libwebp/src/enc/picture_rescale_enc.c index 0b7181c..58a6ae7 100644 --- a/src/3rdparty/libwebp/src/enc/picture_rescale_enc.c +++ b/src/3rdparty/libwebp/src/enc/picture_rescale_enc.c @@ -11,12 +11,16 @@ // // Author: Skal (pascal.massimino@gmail.com) +#include "src/webp/encode.h" + +#if !defined(WEBP_REDUCE_SIZE) + #include <assert.h> #include <stdlib.h> -#include "./vp8i_enc.h" -#include "../utils/rescaler_utils.h" -#include "../utils/utils.h" +#include "src/enc/vp8i_enc.h" +#include "src/utils/rescaler_utils.h" +#include "src/utils/utils.h" #define HALVE(x) (((x) + 1) >> 1) @@ -261,4 +265,45 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) { return 1; } -//------------------------------------------------------------------------------ +#else // defined(WEBP_REDUCE_SIZE) + +int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { + (void)src; + (void)dst; + return 0; +} + +int WebPPictureIsView(const WebPPicture* picture) { + (void)picture; + return 0; +} + +int WebPPictureView(const WebPPicture* src, + int left, int top, int width, int height, + WebPPicture* dst) { + (void)src; + (void)left; + (void)top; + (void)width; + (void)height; + (void)dst; + return 0; +} + +int WebPPictureCrop(WebPPicture* pic, + int left, int top, int width, int height) { + (void)pic; + (void)left; + (void)top; + (void)width; + (void)height; + return 0; +} + +int WebPPictureRescale(WebPPicture* pic, int width, int height) { + (void)pic; + (void)width; + (void)height; + return 0; +} +#endif // !defined(WEBP_REDUCE_SIZE) diff --git a/src/3rdparty/libwebp/src/enc/picture_tools_enc.c b/src/3rdparty/libwebp/src/enc/picture_tools_enc.c index 895df51..be292d4 100644 --- a/src/3rdparty/libwebp/src/enc/picture_tools_enc.c +++ b/src/3rdparty/libwebp/src/enc/picture_tools_enc.c @@ -13,8 +13,8 @@ #include <assert.h> -#include "./vp8i_enc.h" -#include "../dsp/yuv.h" +#include "src/enc/vp8i_enc.h" +#include "src/dsp/yuv.h" static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) { return (0xff000000u | (r << 16) | (g << 8) | b); @@ -25,20 +25,7 @@ static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) { #define SIZE 8 #define SIZE2 (SIZE / 2) -static int is_transparent_area(const uint8_t* ptr, int stride, int size) { - int y, x; - for (y = 0; y < size; ++y) { - for (x = 0; x < size; ++x) { - if (ptr[x]) { - return 0; - } - } - ptr += stride; - } - return 1; -} - -static int is_transparent_argb_area(const uint32_t* ptr, int stride, int size) { +static int IsTransparentARGBArea(const uint32_t* ptr, int stride, int size) { int y, x; for (y = 0; y < size; ++y) { for (x = 0; x < size; ++x) { @@ -51,7 +38,7 @@ static int is_transparent_argb_area(const uint32_t* ptr, int stride, int size) { return 1; } -static void flatten(uint8_t* ptr, int v, int stride, int size) { +static void Flatten(uint8_t* ptr, int v, int stride, int size) { int y; for (y = 0; y < size; ++y) { memset(ptr, v, size); @@ -59,7 +46,7 @@ static void flatten(uint8_t* ptr, int v, int stride, int size) { } } -static void flatten_argb(uint32_t* ptr, uint32_t v, int stride, int size) { +static void FlattenARGB(uint32_t* ptr, uint32_t v, int stride, int size) { int x, y; for (y = 0; y < size; ++y) { for (x = 0; x < size; ++x) ptr[x] = v; @@ -67,54 +54,114 @@ static void flatten_argb(uint32_t* ptr, uint32_t v, int stride, int size) { } } +// Smoothen the luma components of transparent pixels. Return true if the whole +// block is transparent. +static int SmoothenBlock(const uint8_t* a_ptr, int a_stride, uint8_t* y_ptr, + int y_stride, int width, int height) { + int sum = 0, count = 0; + int x, y; + const uint8_t* alpha_ptr = a_ptr; + uint8_t* luma_ptr = y_ptr; + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + if (alpha_ptr[x] != 0) { + ++count; + sum += luma_ptr[x]; + } + } + alpha_ptr += a_stride; + luma_ptr += y_stride; + } + if (count > 0 && count < width * height) { + const uint8_t avg_u8 = (uint8_t)(sum / count); + alpha_ptr = a_ptr; + luma_ptr = y_ptr; + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + if (alpha_ptr[x] == 0) luma_ptr[x] = avg_u8; + } + alpha_ptr += a_stride; + luma_ptr += y_stride; + } + } + return (count == 0); +} + void WebPCleanupTransparentArea(WebPPicture* pic) { int x, y, w, h; if (pic == NULL) return; w = pic->width / SIZE; h = pic->height / SIZE; - // note: we ignore the left-overs on right/bottom + // note: we ignore the left-overs on right/bottom, except for SmoothenBlock(). if (pic->use_argb) { uint32_t argb_value = 0; for (y = 0; y < h; ++y) { int need_reset = 1; for (x = 0; x < w; ++x) { const int off = (y * pic->argb_stride + x) * SIZE; - if (is_transparent_argb_area(pic->argb + off, pic->argb_stride, SIZE)) { + if (IsTransparentARGBArea(pic->argb + off, pic->argb_stride, SIZE)) { if (need_reset) { argb_value = pic->argb[off]; need_reset = 0; } - flatten_argb(pic->argb + off, argb_value, pic->argb_stride, SIZE); + FlattenARGB(pic->argb + off, argb_value, pic->argb_stride, SIZE); } else { need_reset = 1; } } } } else { - const uint8_t* const a_ptr = pic->a; + const int width = pic->width; + const int height = pic->height; + const int y_stride = pic->y_stride; + const int uv_stride = pic->uv_stride; + const int a_stride = pic->a_stride; + uint8_t* y_ptr = pic->y; + uint8_t* u_ptr = pic->u; + uint8_t* v_ptr = pic->v; + const uint8_t* a_ptr = pic->a; int values[3] = { 0 }; - if (a_ptr == NULL) return; // nothing to do - for (y = 0; y < h; ++y) { + if (a_ptr == NULL || y_ptr == NULL || u_ptr == NULL || v_ptr == NULL) { + return; + } + for (y = 0; y + SIZE <= height; y += SIZE) { int need_reset = 1; - for (x = 0; x < w; ++x) { - const int off_a = (y * pic->a_stride + x) * SIZE; - const int off_y = (y * pic->y_stride + x) * SIZE; - const int off_uv = (y * pic->uv_stride + x) * SIZE2; - if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) { + for (x = 0; x + SIZE <= width; x += SIZE) { + if (SmoothenBlock(a_ptr + x, a_stride, y_ptr + x, y_stride, + SIZE, SIZE)) { if (need_reset) { - values[0] = pic->y[off_y]; - values[1] = pic->u[off_uv]; - values[2] = pic->v[off_uv]; + values[0] = y_ptr[x]; + values[1] = u_ptr[x >> 1]; + values[2] = v_ptr[x >> 1]; need_reset = 0; } - flatten(pic->y + off_y, values[0], pic->y_stride, SIZE); - flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2); - flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2); + Flatten(y_ptr + x, values[0], y_stride, SIZE); + Flatten(u_ptr + (x >> 1), values[1], uv_stride, SIZE2); + Flatten(v_ptr + (x >> 1), values[2], uv_stride, SIZE2); } else { need_reset = 1; } } + if (x < width) { + SmoothenBlock(a_ptr + x, a_stride, y_ptr + x, y_stride, + width - x, SIZE); + } + a_ptr += SIZE * a_stride; + y_ptr += SIZE * y_stride; + u_ptr += SIZE2 * uv_stride; + v_ptr += SIZE2 * uv_stride; + } + if (y < height) { + const int sub_height = height - y; + for (x = 0; x + SIZE <= width; x += SIZE) { + SmoothenBlock(a_ptr + x, a_stride, y_ptr + x, y_stride, + SIZE, sub_height); + } + if (x < width) { + SmoothenBlock(a_ptr + x, a_stride, y_ptr + x, y_stride, + width - x, sub_height); + } } } } @@ -144,9 +191,9 @@ void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) { // Blend color and remove transparency info #define BLEND(V0, V1, ALPHA) \ - ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 16) + ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101 + 256) >> 16) #define BLEND_10BIT(V0, V1, ALPHA) \ - ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 18) + ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101 + 1024) >> 18) void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { const int red = (background_rgb >> 16) & 0xff; diff --git a/src/3rdparty/libwebp/src/enc/predictor_enc.c b/src/3rdparty/libwebp/src/enc/predictor_enc.c index 0639b74..f3715f5 100644 --- a/src/3rdparty/libwebp/src/enc/predictor_enc.c +++ b/src/3rdparty/libwebp/src/enc/predictor_enc.c @@ -14,9 +14,9 @@ // Urvang Joshi (urvang@google.com) // Vincent Rabaud (vrabaud@google.com) -#include "../dsp/lossless.h" -#include "../dsp/lossless_common.h" -#include "./vp8li_enc.h" +#include "src/dsp/lossless.h" +#include "src/dsp/lossless_common.h" +#include "src/enc/vp8li_enc.h" #define MAX_DIFF_COST (1e30f) @@ -26,7 +26,6 @@ static const uint32_t kMaskAlpha = 0xff000000; // Mostly used to reduce code size + readability static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; } -static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; } //------------------------------------------------------------------------------ // Methods to calculate Entropy (Shannon). @@ -90,6 +89,9 @@ static WEBP_INLINE void PredictBatch(int mode, int x_start, int y, } } +#if (WEBP_NEAR_LOSSLESS == 1) +static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; } + static int MaxDiffBetweenPixels(uint32_t p1, uint32_t p2) { const int diff_a = abs((int)(p1 >> 24) - (int)(p2 >> 24)); const int diff_r = abs((int)((p1 >> 16) & 0xff) - (int)((p2 >> 16) & 0xff)); @@ -180,6 +182,7 @@ static uint8_t NearLosslessComponent(uint8_t value, uint8_t predict, // max_quantization which is a power of 2, smaller than max_diff). Take care if // value and predict have undergone subtract green, which means that red and // blue are represented as offsets from green. +#define NEAR_LOSSLESS_DIFF(a, b) (uint8_t)((((int)(a) - (int)(b))) & 0xff) static uint32_t NearLossless(uint32_t value, uint32_t predict, int max_quantization, int max_diff, int used_subtract_green) { @@ -196,7 +199,7 @@ static uint32_t NearLossless(uint32_t value, uint32_t predict, } if ((value >> 24) == 0 || (value >> 24) == 0xff) { // Preserve transparency of fully transparent or fully opaque pixels. - a = ((value >> 24) - (predict >> 24)) & 0xff; + a = NEAR_LOSSLESS_DIFF(value >> 24, predict >> 24); } else { a = NearLosslessComponent(value >> 24, predict >> 24, 0xff, quantization); } @@ -209,15 +212,17 @@ static uint32_t NearLossless(uint32_t value, uint32_t predict, // The amount by which green has been adjusted during quantization. It is // subtracted from red and blue for compensation, to avoid accumulating two // quantization errors in them. - green_diff = (new_green - (value >> 8)) & 0xff; + green_diff = NEAR_LOSSLESS_DIFF(new_green, value >> 8); } - r = NearLosslessComponent(((value >> 16) - green_diff) & 0xff, + r = NearLosslessComponent(NEAR_LOSSLESS_DIFF(value >> 16, green_diff), (predict >> 16) & 0xff, 0xff - new_green, quantization); - b = NearLosslessComponent((value - green_diff) & 0xff, predict & 0xff, - 0xff - new_green, quantization); + b = NearLosslessComponent(NEAR_LOSSLESS_DIFF(value, green_diff), + predict & 0xff, 0xff - new_green, quantization); return ((uint32_t)a << 24) | ((uint32_t)r << 16) | ((uint32_t)g << 8) | b; } +#undef NEAR_LOSSLESS_DIFF +#endif // (WEBP_NEAR_LOSSLESS == 1) // Stores the difference between the pixel and its prediction in "out". // In case of a lossy encoding, updates the source image to avoid propagating @@ -244,6 +249,7 @@ static WEBP_INLINE void GetResidual( } else { predict = pred_func(current_row[x - 1], upper_row + x); } +#if (WEBP_NEAR_LOSSLESS == 1) if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 || x == 0 || x == width - 1) { residual = VP8LSubPixels(current_row[x], predict); @@ -254,6 +260,13 @@ static WEBP_INLINE void GetResidual( current_row[x] = VP8LAddPixels(predict, residual); // x is never 0 here so we do not need to update upper_row like below. } +#else + (void)max_diffs; + (void)height; + (void)max_quantization; + (void)used_subtract_green; + residual = VP8LSubPixels(current_row[x], predict); +#endif if ((current_row[x] & kMaskAlpha) == 0) { // If alpha is 0, cleanup RGB. We can choose the RGB values of the // residual for best compression. The prediction of alpha itself can be @@ -296,11 +309,12 @@ static int GetBestPredictorForTile(int width, int height, const int max_x = GetMin(tile_size, width - start_x); // Whether there exist columns just outside the tile. const int have_left = (start_x > 0); - const int have_right = (max_x < width - start_x); // Position and size of the strip covering the tile and adjacent columns if // they exist. const int context_start_x = start_x - have_left; - const int context_width = max_x + have_left + have_right; +#if (WEBP_NEAR_LOSSLESS == 1) + const int context_width = max_x + have_left + (max_x < width - start_x); +#endif const int tiles_per_row = VP8LSubSampleSize(width, bits); // Prediction modes of the left and above neighbor tiles. const int left_mode = (tile_x > 0) ? @@ -352,10 +366,12 @@ static int GetBestPredictorForTile(int width, int height, memcpy(current_row + context_start_x, argb + y * width + context_start_x, sizeof(*argb) * (max_x + have_left + (y + 1 < height))); +#if (WEBP_NEAR_LOSSLESS == 1) if (max_quantization > 1 && y >= 1 && y + 1 < height) { MaxDiffsForRow(context_width, width, argb + y * width + context_start_x, max_diffs + context_start_x, used_subtract_green); } +#endif GetResidual(width, height, upper_row, current_row, max_diffs, mode, start_x, start_x + max_x, y, max_quantization, exact, @@ -405,7 +421,9 @@ static void CopyImageWithPrediction(int width, int height, uint32_t* upper_row = argb_scratch; uint32_t* current_row = upper_row + width + 1; uint8_t* current_max_diffs = (uint8_t*)(current_row + width + 1); +#if (WEBP_NEAR_LOSSLESS == 1) uint8_t* lower_max_diffs = current_max_diffs + width; +#endif int y; for (y = 0; y < height; ++y) { @@ -420,6 +438,7 @@ static void CopyImageWithPrediction(int width, int height, PredictBatch(kPredLowEffort, 0, y, width, current_row, upper_row, argb + y * width); } else { +#if (WEBP_NEAR_LOSSLESS == 1) if (max_quantization > 1) { // Compute max_diffs for the lower row now, because that needs the // contents of argb for the current row, which we will overwrite with @@ -432,6 +451,7 @@ static void CopyImageWithPrediction(int width, int height, used_subtract_green); } } +#endif for (x = 0; x < width;) { const int mode = (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff; diff --git a/src/3rdparty/libwebp/src/enc/quant_enc.c b/src/3rdparty/libwebp/src/enc/quant_enc.c index b118fb2..3b1a312 100644 --- a/src/3rdparty/libwebp/src/enc/quant_enc.c +++ b/src/3rdparty/libwebp/src/enc/quant_enc.c @@ -15,8 +15,8 @@ #include <math.h> #include <stdlib.h> // for abs() -#include "./vp8i_enc.h" -#include "./cost_enc.h" +#include "src/enc/vp8i_enc.h" +#include "src/enc/cost_enc.h" #define DO_TRELLIS_I4 1 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate. @@ -457,11 +457,11 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) { // Form the predictions in cache // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index -const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 }; -const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 }; +const uint16_t VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 }; +const uint16_t VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 }; // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index -const int VP8I4ModeOffsets[NUM_BMODES] = { +const uint16_t VP8I4ModeOffsets[NUM_BMODES] = { I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4 }; @@ -492,14 +492,14 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) { // |YYYY|....| 12 // +----+----+ -const int VP8Scan[16] = { // Luma +const uint16_t VP8Scan[16] = { // Luma 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, }; -static const int VP8ScanUV[4 + 4] = { +static const uint16_t VP8ScanUV[4 + 4] = { 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V }; @@ -1162,7 +1162,7 @@ static void RefineUsingDistortion(VP8EncIterator* const it, const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; for (mode = 0; mode < NUM_PRED_MODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; - const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT + const score_t score = (score_t)VP8SSE16x16(src, ref) * RD_DISTO_MULT + VP8FixedCostsI16[mode] * lambda_d_i16; if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) { continue; diff --git a/src/3rdparty/libwebp/src/enc/syntax_enc.c b/src/3rdparty/libwebp/src/enc/syntax_enc.c index 90665bd..a9e5a6c 100644 --- a/src/3rdparty/libwebp/src/enc/syntax_enc.c +++ b/src/3rdparty/libwebp/src/enc/syntax_enc.c @@ -13,10 +13,10 @@ #include <assert.h> -#include "../utils/utils.h" -#include "../webp/format_constants.h" // RIFF constants -#include "../webp/mux_types.h" // ALPHA_FLAG -#include "./vp8i_enc.h" +#include "src/utils/utils.h" +#include "src/webp/format_constants.h" // RIFF constants +#include "src/webp/mux_types.h" // ALPHA_FLAG +#include "src/enc/vp8i_enc.h" //------------------------------------------------------------------------------ // Helper functions @@ -289,11 +289,17 @@ static int GeneratePartition0(VP8Encoder* const enc) { pos3 = VP8BitWriterPos(bw); +#if !defined(WEBP_DISABLE_STATS) if (enc->pic_->stats) { enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3); enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3); enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_; } +#else + (void)pos1; + (void)pos2; + (void)pos3; +#endif if (bw->error_) { return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); } diff --git a/src/3rdparty/libwebp/src/enc/token_enc.c b/src/3rdparty/libwebp/src/enc/token_enc.c index 02a0d72..3a2192a 100644 --- a/src/3rdparty/libwebp/src/enc/token_enc.c +++ b/src/3rdparty/libwebp/src/enc/token_enc.c @@ -20,9 +20,9 @@ #include <stdlib.h> #include <string.h> -#include "./cost_enc.h" -#include "./vp8i_enc.h" -#include "../utils/utils.h" +#include "src/enc/cost_enc.h" +#include "src/enc/vp8i_enc.h" +#include "src/utils/utils.h" #if !defined(DISABLE_TOKEN_BUFFER) @@ -195,39 +195,6 @@ int VP8RecordCoeffTokens(int ctx, const struct VP8Residual* const res, #undef TOKEN_ID //------------------------------------------------------------------------------ -// This function works, but isn't currently used. Saved for later. - -#if 0 - -static void Record(int bit, proba_t* const stats) { - proba_t p = *stats; - if (p >= 0xffff0000u) { // an overflow is inbound. - p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2. - } - // record bit count (lower 16 bits) and increment total count (upper 16 bits). - p += 0x00010000u + bit; - *stats = p; -} - -void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) { - const VP8Tokens* p = b->pages_; - while (p != NULL) { - const int N = (p->next_ == NULL) ? b->left_ : 0; - int n = MAX_NUM_TOKEN; - const token_t* const tokens = TOKEN_DATA(p); - while (n-- > N) { - const token_t token = tokens[n]; - if (!(token & FIXED_PROBA_BIT)) { - Record((token >> 15) & 1, stats + (token & 0x3fffu)); - } - } - p = p->next_; - } -} - -#endif // 0 - -//------------------------------------------------------------------------------ // Final coding pass, with known probabilities int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, @@ -283,8 +250,9 @@ size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas) { #else // DISABLE_TOKEN_BUFFER -void VP8TBufferInit(VP8TBuffer* const b) { +void VP8TBufferInit(VP8TBuffer* const b, int page_size) { (void)b; + (void)page_size; } void VP8TBufferClear(VP8TBuffer* const b) { (void)b; diff --git a/src/3rdparty/libwebp/src/enc/tree_enc.c b/src/3rdparty/libwebp/src/enc/tree_enc.c index 2c40fe7..64ed283 100644 --- a/src/3rdparty/libwebp/src/enc/tree_enc.c +++ b/src/3rdparty/libwebp/src/enc/tree_enc.c @@ -11,7 +11,7 @@ // // Author: Skal (pascal.massimino@gmail.com) -#include "./vp8i_enc.h" +#include "src/enc/vp8i_enc.h" //------------------------------------------------------------------------------ // Default probabilities diff --git a/src/3rdparty/libwebp/src/enc/vp8i_enc.h b/src/3rdparty/libwebp/src/enc/vp8i_enc.h index 93c95ec..3463491 100644 --- a/src/3rdparty/libwebp/src/enc/vp8i_enc.h +++ b/src/3rdparty/libwebp/src/enc/vp8i_enc.h @@ -11,16 +11,16 @@ // // Author: Skal (pascal.massimino@gmail.com) -#ifndef WEBP_ENC_VP8ENCI_H_ -#define WEBP_ENC_VP8ENCI_H_ +#ifndef WEBP_ENC_VP8I_ENC_H_ +#define WEBP_ENC_VP8I_ENC_H_ #include <string.h> // for memcpy() -#include "../dec/common_dec.h" -#include "../dsp/dsp.h" -#include "../utils/bit_writer_utils.h" -#include "../utils/thread_utils.h" -#include "../utils/utils.h" -#include "../webp/encode.h" +#include "src/dec/common_dec.h" +#include "src/dsp/dsp.h" +#include "src/utils/bit_writer_utils.h" +#include "src/utils/thread_utils.h" +#include "src/utils/utils.h" +#include "src/webp/encode.h" #ifdef __cplusplus extern "C" { @@ -32,7 +32,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 0 #define ENC_MIN_VERSION 6 -#define ENC_REV_VERSION 0 +#define ENC_REV_VERSION 1 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost @@ -75,10 +75,10 @@ typedef enum { // Rate-distortion optimization levels #define U_OFF_ENC (16) #define V_OFF_ENC (16 + 8) -extern const int VP8Scan[16]; // in quant.c -extern const int VP8UVModeOffsets[4]; // in analyze.c -extern const int VP8I16ModeOffsets[4]; -extern const int VP8I4ModeOffsets[NUM_BMODES]; +extern const uint16_t VP8Scan[16]; +extern const uint16_t VP8UVModeOffsets[4]; +extern const uint16_t VP8I16ModeOffsets[4]; +extern const uint16_t VP8I4ModeOffsets[NUM_BMODES]; // Layout of prediction blocks // intra 16x16 @@ -330,9 +330,6 @@ int VP8RecordCoeffTokens(int ctx, const struct VP8Residual* const res, // Estimate the final coded size given a set of 'probas'. size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas); -// unused for now -void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats); - #endif // !DISABLE_TOKEN_BUFFER //------------------------------------------------------------------------------ @@ -502,19 +499,10 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); // compressibility (no guarantee, though). Assumes that pic->use_argb is true. void WebPCleanupTransparentAreaLossless(WebPPicture* const pic); - // in near_lossless.c -// Near lossless preprocessing in RGB color-space. -int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality); -// Near lossless adjustment for predictors. -void VP8ApplyNearLosslessPredict(int xsize, int ysize, int pred_bits, - const uint32_t* argb_orig, - uint32_t* argb, uint32_t* argb_scratch, - const uint32_t* const transform_data, - int quality, int subtract_green); //------------------------------------------------------------------------------ #ifdef __cplusplus } // extern "C" #endif -#endif /* WEBP_ENC_VP8ENCI_H_ */ +#endif /* WEBP_ENC_VP8I_ENC_H_ */ diff --git a/src/3rdparty/libwebp/src/enc/vp8l_enc.c b/src/3rdparty/libwebp/src/enc/vp8l_enc.c index b1a793d..312e521 100644 --- a/src/3rdparty/libwebp/src/enc/vp8l_enc.c +++ b/src/3rdparty/libwebp/src/enc/vp8l_enc.c @@ -15,20 +15,19 @@ #include <assert.h> #include <stdlib.h> -#include "./backward_references_enc.h" -#include "./histogram_enc.h" -#include "./vp8i_enc.h" -#include "./vp8li_enc.h" -#include "../dsp/lossless.h" -#include "../dsp/lossless_common.h" -#include "../utils/bit_writer_utils.h" -#include "../utils/huffman_encode_utils.h" -#include "../utils/utils.h" -#include "../webp/format_constants.h" - -#include "./delta_palettization_enc.h" - -#define PALETTE_KEY_RIGHT_SHIFT 22 // Key for 1K buffer. +#include "src/enc/backward_references_enc.h" +#include "src/enc/histogram_enc.h" +#include "src/enc/vp8i_enc.h" +#include "src/enc/vp8li_enc.h" +#include "src/dsp/lossless.h" +#include "src/dsp/lossless_common.h" +#include "src/utils/bit_writer_utils.h" +#include "src/utils/huffman_encode_utils.h" +#include "src/utils/utils.h" +#include "src/webp/format_constants.h" + +#include "src/enc/delta_palettization_enc.h" + // Maximum number of histogram images (sub-blocks). #define MAX_HUFF_IMAGE_SIZE 2600 @@ -128,7 +127,10 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic, uint32_t palette[MAX_PALETTE_SIZE], int* const palette_size) { const int num_colors = WebPGetColorPalette(pic, palette); - if (num_colors > MAX_PALETTE_SIZE) return 0; + if (num_colors > MAX_PALETTE_SIZE) { + *palette_size = 0; + return 0; + } *palette_size = num_colors; qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort); if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) { @@ -188,22 +190,33 @@ static WEBP_INLINE uint32_t HashPix(uint32_t pix) { static int AnalyzeEntropy(const uint32_t* argb, int width, int height, int argb_stride, int use_palette, + int palette_size, int transform_bits, EntropyIx* const min_entropy_ix, int* const red_and_blue_always_zero) { // Allocate histogram set with cache_bits = 0. - uint32_t* const histo = - (uint32_t*)WebPSafeCalloc(kHistoTotal, sizeof(*histo) * 256); + uint32_t* histo; + + if (use_palette && palette_size <= 16) { + // In the case of small palettes, we pack 2, 4 or 8 pixels together. In + // practice, small palettes are better than any other transform. + *min_entropy_ix = kPalette; + *red_and_blue_always_zero = 1; + return 1; + } + histo = (uint32_t*)WebPSafeCalloc(kHistoTotal, sizeof(*histo) * 256); if (histo != NULL) { int i, x, y; - const uint32_t* prev_row = argb; - const uint32_t* curr_row = argb + argb_stride; - for (y = 1; y < height; ++y) { - uint32_t prev_pix = curr_row[0]; - for (x = 1; x < width; ++x) { + const uint32_t* prev_row = NULL; + const uint32_t* curr_row = argb; + uint32_t pix_prev = argb[0]; // Skip the first pixel. + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { const uint32_t pix = curr_row[x]; - const uint32_t pix_diff = VP8LSubPixels(pix, prev_pix); - if ((pix_diff == 0) || (pix == prev_row[x])) continue; - prev_pix = pix; + const uint32_t pix_diff = VP8LSubPixels(pix, pix_prev); + pix_prev = pix; + if ((pix_diff == 0) || (prev_row != NULL && pix == prev_row[x])) { + continue; + } AddSingle(pix, &histo[kHistoAlpha * 256], &histo[kHistoRed * 256], @@ -264,8 +277,24 @@ static int AnalyzeEntropy(const uint32_t* argb, entropy_comp[kHistoRedPredSubGreen] + entropy_comp[kHistoGreenPred] + entropy_comp[kHistoBluePredSubGreen]; - // Palette mode seems more efficient in a breakeven case. Bias with 1.0. - entropy[kPalette] = entropy_comp[kHistoPalette] - 1.0; + entropy[kPalette] = entropy_comp[kHistoPalette]; + + // When including transforms, there is an overhead in bits from + // storing them. This overhead is small but matters for small images. + // For spatial, there are 14 transformations. + entropy[kSpatial] += VP8LSubSampleSize(width, transform_bits) * + VP8LSubSampleSize(height, transform_bits) * + VP8LFastLog2(14); + // For color transforms: 24 as only 3 channels are considered in a + // ColorTransformElement. + entropy[kSpatialSubGreen] += VP8LSubSampleSize(width, transform_bits) * + VP8LSubSampleSize(height, transform_bits) * + VP8LFastLog2(24); + // For palettes, add the cost of storing the palette. + // We empirically estimate the cost of a compressed entry as 8 bits. + // The palette is differential-coded when compressed hence a much + // lower cost than sizeof(uint32_t)*8. + entropy[kPalette] += palette_size * 8; *min_entropy_ix = kDirect; for (k = kDirect + 1; k <= last_mode_to_analyze; ++k) { @@ -273,6 +302,7 @@ static int AnalyzeEntropy(const uint32_t* argb, *min_entropy_ix = (EntropyIx)k; } } + assert((int)*min_entropy_ix <= last_mode_to_analyze); *red_and_blue_always_zero = 1; // Let's check if the histogram of the chosen entropy mode has // non-zero red and blue values. If all are zero, we can later skip @@ -325,60 +355,95 @@ static int GetTransformBits(int method, int histo_bits) { return res; } -static int AnalyzeAndInit(VP8LEncoder* const enc) { +// Set of parameters to be used in each iteration of the cruncher. +#define CRUNCH_CONFIGS_LZ77_MAX 2 +typedef struct { + int entropy_idx_; + int lz77s_types_to_try_[CRUNCH_CONFIGS_LZ77_MAX]; + int lz77s_types_to_try_size_; +} CrunchConfig; + +#define CRUNCH_CONFIGS_MAX kNumEntropyIx + +static int EncoderAnalyze(VP8LEncoder* const enc, + CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX], + int* const crunch_configs_size, + int* const red_and_blue_always_zero) { const WebPPicture* const pic = enc->pic_; const int width = pic->width; const int height = pic->height; - const int pix_cnt = width * height; const WebPConfig* const config = enc->config_; const int method = config->method; const int low_effort = (config->method == 0); - // we round the block size up, so we're guaranteed to have - // at max MAX_REFS_BLOCK_PER_IMAGE blocks used: - int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1; + int i; + int use_palette; + int n_lz77s; assert(pic != NULL && pic->argb != NULL); - enc->use_cross_color_ = 0; - enc->use_predict_ = 0; - enc->use_subtract_green_ = 0; - enc->use_palette_ = + use_palette = AnalyzeAndCreatePalette(pic, low_effort, enc->palette_, &enc->palette_size_); // TODO(jyrki): replace the decision to be based on an actual estimate // of entropy, or even spatial variance of entropy. - enc->histo_bits_ = GetHistoBits(method, enc->use_palette_, + enc->histo_bits_ = GetHistoBits(method, use_palette, pic->width, pic->height); enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_); if (low_effort) { // AnalyzeEntropy is somewhat slow. - enc->use_predict_ = !enc->use_palette_; - enc->use_subtract_green_ = !enc->use_palette_; - enc->use_cross_color_ = 0; + crunch_configs[0].entropy_idx_ = use_palette ? kPalette : kSpatialSubGreen; + n_lz77s = 1; + *crunch_configs_size = 1; } else { - int red_and_blue_always_zero; EntropyIx min_entropy_ix; - if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, - enc->use_palette_, &min_entropy_ix, - &red_and_blue_always_zero)) { + // Try out multiple LZ77 on images with few colors. + n_lz77s = (enc->palette_size_ > 0 && enc->palette_size_ <= 16) ? 2 : 1; + if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, use_palette, + enc->palette_size_, enc->transform_bits_, + &min_entropy_ix, red_and_blue_always_zero)) { return 0; } - enc->use_palette_ = (min_entropy_ix == kPalette); - enc->use_subtract_green_ = - (min_entropy_ix == kSubGreen) || (min_entropy_ix == kSpatialSubGreen); - enc->use_predict_ = - (min_entropy_ix == kSpatial) || (min_entropy_ix == kSpatialSubGreen); - enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_; + if (method == 6 && config->quality == 100) { + // Go brute force on all transforms. + *crunch_configs_size = 0; + for (i = 0; i < kNumEntropyIx; ++i) { + if (i != kPalette || use_palette) { + assert(*crunch_configs_size < CRUNCH_CONFIGS_MAX); + crunch_configs[(*crunch_configs_size)++].entropy_idx_ = i; + } + } + } else { + // Only choose the guessed best transform. + *crunch_configs_size = 1; + crunch_configs[0].entropy_idx_ = min_entropy_ix; + } + } + // Fill in the different LZ77s. + assert(n_lz77s <= CRUNCH_CONFIGS_LZ77_MAX); + for (i = 0; i < *crunch_configs_size; ++i) { + int j; + for (j = 0; j < n_lz77s; ++j) { + crunch_configs[i].lz77s_types_to_try_[j] = + (j == 0) ? kLZ77Standard | kLZ77RLE : kLZ77Box; + } + crunch_configs[i].lz77s_types_to_try_size_ = n_lz77s; } + return 1; +} +static int EncoderInit(VP8LEncoder* const enc) { + const WebPPicture* const pic = enc->pic_; + const int width = pic->width; + const int height = pic->height; + const int pix_cnt = width * height; + // we round the block size up, so we're guaranteed to have + // at most MAX_REFS_BLOCK_PER_IMAGE blocks used: + const int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1; + int i; if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0; - // palette-friendly input typically uses less literals - // -> reduce block size a bit - if (enc->use_palette_) refs_block_size /= 2; - VP8LBackwardRefsInit(&enc->refs_[0], refs_block_size); - VP8LBackwardRefsInit(&enc->refs_[1], refs_block_size); + for (i = 0; i < 3; ++i) VP8LBackwardRefsInit(&enc->refs_[i], refs_block_size); return 1; } @@ -571,11 +636,16 @@ static void StoreFullHuffmanCode(VP8LBitWriter* const bw, length = write_trimmed_length ? trimmed_length : num_tokens; VP8LPutBits(bw, write_trimmed_length, 1); if (write_trimmed_length) { - const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1); - const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2; - VP8LPutBits(bw, nbitpairs - 1, 3); - assert(trimmed_length >= 2); - VP8LPutBits(bw, trimmed_length - 2, nbitpairs * 2); + if (trimmed_length == 2) { + VP8LPutBits(bw, 0, 3 + 2); // nbitpairs=1, trimmed_length=2 + } else { + const int nbits = BitsLog2Floor(trimmed_length - 2); + const int nbitpairs = nbits / 2 + 1; + assert(trimmed_length > 2); + assert(nbitpairs - 1 < 8); + VP8LPutBits(bw, nbitpairs - 1, 3); + VP8LPutBits(bw, trimmed_length - 2, nbitpairs * 2); + } } StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code); } @@ -642,7 +712,7 @@ static WEBP_INLINE void WriteHuffmanCodeWithExtraBits( static WebPEncodingError StoreImageToBitMask( VP8LBitWriter* const bw, int width, int histo_bits, - VP8LBackwardRefs* const refs, + const VP8LBackwardRefs* const refs, const uint16_t* histogram_symbols, const HuffmanTreeCode* const huffman_codes) { const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1; @@ -665,7 +735,7 @@ static WebPEncodingError StoreImageToBitMask( codes = huffman_codes + 5 * histogram_ix; } if (PixOrCopyIsLiteral(v)) { - static const int order[] = { 1, 2, 0, 3 }; + static const uint8_t order[] = { 1, 2, 0, 3 }; int k; for (k = 0; k < 4; ++k) { const int code = PixOrCopyLiteral(v, order[k]); @@ -705,7 +775,8 @@ static WebPEncodingError StoreImageToBitMask( static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, const uint32_t* const argb, VP8LHashChain* const hash_chain, - VP8LBackwardRefs refs_array[2], + VP8LBackwardRefs* const refs_tmp1, + VP8LBackwardRefs* const refs_tmp2, int width, int height, int quality, int low_effort) { int i; @@ -730,8 +801,9 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, &cache_bits, - hash_chain, refs_array); + refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, + kLZ77Standard | kLZ77RLE, &cache_bits, + hash_chain, refs_tmp1, refs_tmp2); if (refs == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; @@ -788,39 +860,37 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, return err; } -static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, - const uint32_t* const argb, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs refs_array[2], - int width, int height, int quality, - int low_effort, - int use_cache, int* cache_bits, - int histogram_bits, - size_t init_byte_position, - int* const hdr_size, - int* const data_size) { +static WebPEncodingError EncodeImageInternal( + VP8LBitWriter* const bw, const uint32_t* const argb, + VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[3], int width, + int height, int quality, int low_effort, int use_cache, + const CrunchConfig* const config, int* cache_bits, int histogram_bits, + size_t init_byte_position, int* const hdr_size, int* const data_size) { WebPEncodingError err = VP8_ENC_OK; const uint32_t histogram_image_xysize = VP8LSubSampleSize(width, histogram_bits) * VP8LSubSampleSize(height, histogram_bits); VP8LHistogramSet* histogram_image = NULL; - VP8LHistogramSet* tmp_histos = NULL; + VP8LHistogram* tmp_histo = NULL; int histogram_image_size = 0; size_t bit_array_size = 0; - HuffmanTree* huff_tree = NULL; + HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc( + 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); HuffmanTreeToken* tokens = NULL; HuffmanTreeCode* huffman_codes = NULL; - VP8LBackwardRefs refs; - VP8LBackwardRefs* best_refs; + VP8LBackwardRefs* refs_best; + VP8LBackwardRefs* refs_tmp; uint16_t* const histogram_symbols = (uint16_t*)WebPSafeMalloc(histogram_image_xysize, sizeof(*histogram_symbols)); + int lz77s_idx; + VP8LBitWriter bw_init = *bw, bw_best; + int hdr_size_tmp; assert(histogram_bits >= MIN_HUFFMAN_BITS); assert(histogram_bits <= MAX_HUFFMAN_BITS); assert(hdr_size != NULL); assert(data_size != NULL); - VP8LBackwardRefsInit(&refs, refs_array[0].block_size_); if (histogram_symbols == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; @@ -836,142 +906,162 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, // 'best_refs' is the reference to the best backward refs and points to one // of refs_array[0] or refs_array[1]. // Calculate backward references from ARGB image. - if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, - low_effort)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - best_refs = VP8LGetBackwardReferences(width, height, argb, quality, - low_effort, cache_bits, hash_chain, - refs_array); - if (best_refs == NULL || !VP8LBackwardRefsCopy(best_refs, &refs)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - histogram_image = - VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits); - tmp_histos = VP8LAllocateHistogramSet(2, *cache_bits); - if (histogram_image == NULL || tmp_histos == NULL) { + if (huff_tree == NULL || + !VP8LHashChainFill(hash_chain, quality, argb, width, height, + low_effort) || + !VP8LBitWriterInit(&bw_best, 0) || + (config->lz77s_types_to_try_size_ > 1 && + !VP8LBitWriterClone(bw, &bw_best))) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } + for (lz77s_idx = 0; lz77s_idx < config->lz77s_types_to_try_size_; + ++lz77s_idx) { + refs_best = VP8LGetBackwardReferences( + width, height, argb, quality, low_effort, + config->lz77s_types_to_try_[lz77s_idx], cache_bits, hash_chain, + &refs_array[0], &refs_array[1]); + if (refs_best == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Keep the best references aside and use the other element from the first + // two as a temporary for later usage. + refs_tmp = &refs_array[refs_best == &refs_array[0] ? 1 : 0]; + + histogram_image = + VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits); + tmp_histo = VP8LAllocateHistogram(*cache_bits); + if (histogram_image == NULL || tmp_histo == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } - // Build histogram image and symbols from backward references. - if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, low_effort, - histogram_bits, *cache_bits, histogram_image, - tmp_histos, histogram_symbols)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Create Huffman bit lengths and codes for each histogram image. - histogram_image_size = histogram_image->size; - bit_array_size = 5 * histogram_image_size; - huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, - sizeof(*huffman_codes)); - // Note: some histogram_image entries may point to tmp_histos[], so the latter - // need to outlive the following call to GetHuffBitLengthsAndCodes(). - if (huffman_codes == NULL || - !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Free combined histograms. - VP8LFreeHistogramSet(histogram_image); - histogram_image = NULL; + // Build histogram image and symbols from backward references. + if (!VP8LGetHistoImageSymbols(width, height, refs_best, quality, low_effort, + histogram_bits, *cache_bits, histogram_image, + tmp_histo, histogram_symbols)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Create Huffman bit lengths and codes for each histogram image. + histogram_image_size = histogram_image->size; + bit_array_size = 5 * histogram_image_size; + huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, + sizeof(*huffman_codes)); + // Note: some histogram_image entries may point to tmp_histos[], so the + // latter need to outlive the following call to GetHuffBitLengthsAndCodes(). + if (huffman_codes == NULL || + !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Free combined histograms. + VP8LFreeHistogramSet(histogram_image); + histogram_image = NULL; - // Free scratch histograms. - VP8LFreeHistogramSet(tmp_histos); - tmp_histos = NULL; + // Free scratch histograms. + VP8LFreeHistogram(tmp_histo); + tmp_histo = NULL; - // Color Cache parameters. - if (*cache_bits > 0) { - VP8LPutBits(bw, 1, 1); - VP8LPutBits(bw, *cache_bits, 4); - } else { - VP8LPutBits(bw, 0, 1); - } + // Color Cache parameters. + if (*cache_bits > 0) { + VP8LPutBits(bw, 1, 1); + VP8LPutBits(bw, *cache_bits, 4); + } else { + VP8LPutBits(bw, 0, 1); + } - // Huffman image + meta huffman. - { - const int write_histogram_image = (histogram_image_size > 1); - VP8LPutBits(bw, write_histogram_image, 1); - if (write_histogram_image) { - uint32_t* const histogram_argb = - (uint32_t*)WebPSafeMalloc(histogram_image_xysize, - sizeof(*histogram_argb)); - int max_index = 0; - uint32_t i; - if (histogram_argb == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - for (i = 0; i < histogram_image_xysize; ++i) { - const int symbol_index = histogram_symbols[i] & 0xffff; - histogram_argb[i] = (symbol_index << 8); - if (symbol_index >= max_index) { - max_index = symbol_index + 1; + // Huffman image + meta huffman. + { + const int write_histogram_image = (histogram_image_size > 1); + VP8LPutBits(bw, write_histogram_image, 1); + if (write_histogram_image) { + uint32_t* const histogram_argb = + (uint32_t*)WebPSafeMalloc(histogram_image_xysize, + sizeof(*histogram_argb)); + int max_index = 0; + uint32_t i; + if (histogram_argb == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + for (i = 0; i < histogram_image_xysize; ++i) { + const int symbol_index = histogram_symbols[i] & 0xffff; + histogram_argb[i] = (symbol_index << 8); + if (symbol_index >= max_index) { + max_index = symbol_index + 1; + } } + histogram_image_size = max_index; + + VP8LPutBits(bw, histogram_bits - 2, 3); + err = EncodeImageNoHuffman( + bw, histogram_argb, hash_chain, refs_tmp, &refs_array[2], + VP8LSubSampleSize(width, histogram_bits), + VP8LSubSampleSize(height, histogram_bits), quality, low_effort); + WebPSafeFree(histogram_argb); + if (err != VP8_ENC_OK) goto Error; } - histogram_image_size = max_index; - - VP8LPutBits(bw, histogram_bits - 2, 3); - err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array, - VP8LSubSampleSize(width, histogram_bits), - VP8LSubSampleSize(height, histogram_bits), - quality, low_effort); - WebPSafeFree(histogram_argb); - if (err != VP8_ENC_OK) goto Error; } - } - // Store Huffman codes. - { - int i; - int max_tokens = 0; - huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES, - sizeof(*huff_tree)); - if (huff_tree == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Find maximum number of symbols for the huffman tree-set. - for (i = 0; i < 5 * histogram_image_size; ++i) { - HuffmanTreeCode* const codes = &huffman_codes[i]; - if (max_tokens < codes->num_symbols) { - max_tokens = codes->num_symbols; + // Store Huffman codes. + { + int i; + int max_tokens = 0; + // Find maximum number of symbols for the huffman tree-set. + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + if (max_tokens < codes->num_symbols) { + max_tokens = codes->num_symbols; + } + } + tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); + if (tokens == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + StoreHuffmanCode(bw, huff_tree, tokens, codes); + ClearHuffmanTreeIfOnlyOneSymbol(codes); } } - tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, - sizeof(*tokens)); - if (tokens == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; + // Store actual literals. + hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); + err = StoreImageToBitMask(bw, width, histogram_bits, refs_best, + histogram_symbols, huffman_codes); + // Keep track of the smallest image so far. + if (lz77s_idx == 0 || + VP8LBitWriterNumBytes(bw) < VP8LBitWriterNumBytes(&bw_best)) { + *hdr_size = hdr_size_tmp; + *data_size = + (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); + VP8LBitWriterSwap(bw, &bw_best); } - for (i = 0; i < 5 * histogram_image_size; ++i) { - HuffmanTreeCode* const codes = &huffman_codes[i]; - StoreHuffmanCode(bw, huff_tree, tokens, codes); - ClearHuffmanTreeIfOnlyOneSymbol(codes); + // Reset the bit writer for the following iteration if any. + if (config->lz77s_types_to_try_size_ > 1) VP8LBitWriterReset(&bw_init, bw); + WebPSafeFree(tokens); + tokens = NULL; + if (huffman_codes != NULL) { + WebPSafeFree(huffman_codes->codes); + WebPSafeFree(huffman_codes); + huffman_codes = NULL; } } - - *hdr_size = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); - // Store actual literals. - err = StoreImageToBitMask(bw, width, histogram_bits, &refs, - histogram_symbols, huffman_codes); - *data_size = - (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); + VP8LBitWriterSwap(bw, &bw_best); Error: WebPSafeFree(tokens); WebPSafeFree(huff_tree); VP8LFreeHistogramSet(histogram_image); - VP8LFreeHistogramSet(tmp_histos); - VP8LBackwardRefsClear(&refs); + VP8LFreeHistogram(tmp_histo); if (huffman_codes != NULL) { WebPSafeFree(huffman_codes->codes); WebPSafeFree(huffman_codes); } WebPSafeFree(histogram_symbols); + VP8LBitWriterWipeOut(&bw_best); return err; } @@ -1005,11 +1095,11 @@ static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); assert(pred_bits >= 2); VP8LPutBits(bw, pred_bits - 2, 3); - return EncodeImageNoHuffman(bw, enc->transform_data_, - (VP8LHashChain*)&enc->hash_chain_, - (VP8LBackwardRefs*)enc->refs_, // cast const away - transform_width, transform_height, - quality, low_effort); + return EncodeImageNoHuffman( + bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, + (VP8LBackwardRefs*)&enc->refs_[0], // cast const away + (VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height, + quality, low_effort); } static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, @@ -1026,11 +1116,11 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2); assert(ccolor_transform_bits >= 2); VP8LPutBits(bw, ccolor_transform_bits - 2, 3); - return EncodeImageNoHuffman(bw, enc->transform_data_, - (VP8LHashChain*)&enc->hash_chain_, - (VP8LBackwardRefs*)enc->refs_, // cast const away - transform_width, transform_height, - quality, low_effort); + return EncodeImageNoHuffman( + bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, + (VP8LBackwardRefs*)&enc->refs_[0], // cast const away + (VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height, + quality, low_effort); } // ----------------------------------------------------------------------------- @@ -1144,6 +1234,7 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, } enc->transform_mem_ = mem; enc->transform_mem_size_ = (size_t)mem_size; + enc->argb_content_ = kEncoderNone; } enc->argb_ = mem; mem = (uint32_t*)WEBP_ALIGN(mem + image_size); @@ -1164,11 +1255,13 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { int y; err = AllocateTransformBuffer(enc, width, height); if (err != VP8_ENC_OK) return err; + if (enc->argb_content_ == kEncoderARGB) return VP8_ENC_OK; for (y = 0; y < height; ++y) { memcpy(enc->argb_ + y * width, picture->argb + y * picture->argb_stride, width * sizeof(*enc->argb_)); } + enc->argb_content_ = kEncoderARGB; assert(enc->current_width_ == width); return VP8_ENC_OK; } @@ -1215,12 +1308,13 @@ static WEBP_INLINE uint32_t ApplyPaletteHash0(uint32_t color) { static WEBP_INLINE uint32_t ApplyPaletteHash1(uint32_t color) { // Forget about alpha. - return ((color & 0x00ffffffu) * 4222244071u) >> (32 - PALETTE_INV_SIZE_BITS); + return ((uint32_t)((color & 0x00ffffffu) * 4222244071ull)) >> + (32 - PALETTE_INV_SIZE_BITS); } static WEBP_INLINE uint32_t ApplyPaletteHash2(uint32_t color) { // Forget about alpha. - return (color & 0x00ffffffu) * ((1u << 31) - 1) >> + return ((uint32_t)((color & 0x00ffffffu) * ((1ull << 31) - 1))) >> (32 - PALETTE_INV_SIZE_BITS); } @@ -1346,6 +1440,7 @@ static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, err = ApplyPalette(src, src_stride, enc->argb_, enc->current_width_, palette, palette_size, width, height, xbits); + enc->argb_content_ = kEncoderPalette; return err; } @@ -1364,8 +1459,9 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort, tmp_palette[i] = VP8LSubPixels(palette[i], palette[i - 1]); } tmp_palette[0] = palette[0]; - return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, enc->refs_, - palette_size, 1, 20 /* quality */, low_effort); + return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, + &enc->refs_[0], &enc->refs_[1], palette_size, 1, + 20 /* quality */, low_effort); } #ifdef WEBP_EXPERIMENTAL_FEATURES @@ -1400,10 +1496,11 @@ static WebPEncodingError EncodeDeltaPalettePredictorImage( VP8LPutBits(bw, TRANSFORM_PRESENT, 1); VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); VP8LPutBits(bw, pred_bits - 2, 3); - err = EncodeImageNoHuffman(bw, predictors, &enc->hash_chain_, - (VP8LBackwardRefs*)enc->refs_, // cast const away - transform_width, transform_height, - quality, low_effort); + err = EncodeImageNoHuffman( + bw, predictors, &enc->hash_chain_, + (VP8LBackwardRefs*)&enc->refs_[0], // cast const away + (VP8LBackwardRefs*)&enc->refs_[1], + transform_width, transform_height, quality, low_effort); WebPSafeFree(predictors); return err; } @@ -1422,6 +1519,7 @@ static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config, } enc->config_ = config; enc->pic_ = picture; + enc->argb_content_ = kEncoderNone; VP8LEncDspInit(); @@ -1430,9 +1528,9 @@ static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config, static void VP8LEncoderDelete(VP8LEncoder* enc) { if (enc != NULL) { + int i; VP8LHashChainClear(&enc->hash_chain_); - VP8LBackwardRefsClear(&enc->refs_[0]); - VP8LBackwardRefsClear(&enc->refs_[1]); + for (i = 0; i < 3; ++i) VP8LBackwardRefsClear(&enc->refs_[i]); ClearTransformBuffer(enc); WebPSafeFree(enc); } @@ -1441,134 +1539,347 @@ static void VP8LEncoderDelete(VP8LEncoder* enc) { // ----------------------------------------------------------------------------- // Main call -WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - const WebPPicture* const picture, - VP8LBitWriter* const bw, int use_cache) { +typedef struct { + const WebPConfig* config_; + const WebPPicture* picture_; + VP8LBitWriter* bw_; + VP8LEncoder* enc_; + int use_cache_; + CrunchConfig crunch_configs_[CRUNCH_CONFIGS_MAX]; + int num_crunch_configs_; + int red_and_blue_always_zero_; + WebPEncodingError err_; + WebPAuxStats* stats_; +} StreamEncodeContext; + +static int EncodeStreamHook(void* input, void* data2) { + StreamEncodeContext* const params = (StreamEncodeContext*)input; + const WebPConfig* const config = params->config_; + const WebPPicture* const picture = params->picture_; + VP8LBitWriter* const bw = params->bw_; + VP8LEncoder* const enc = params->enc_; + const int use_cache = params->use_cache_; + const CrunchConfig* const crunch_configs = params->crunch_configs_; + const int num_crunch_configs = params->num_crunch_configs_; + const int red_and_blue_always_zero = params->red_and_blue_always_zero_; +#if !defined(WEBP_DISABLE_STATS) + WebPAuxStats* const stats = params->stats_; +#endif WebPEncodingError err = VP8_ENC_OK; const int quality = (int)config->quality; const int low_effort = (config->method == 0); +#if (WEBP_NEAR_LOSSLESS == 1) || defined(WEBP_EXPERIMENTAL_FEATURES) const int width = picture->width; +#endif const int height = picture->height; - VP8LEncoder* const enc = VP8LEncoderNew(config, picture); const size_t byte_position = VP8LBitWriterNumBytes(bw); +#if (WEBP_NEAR_LOSSLESS == 1) int use_near_lossless = 0; +#endif int hdr_size = 0; int data_size = 0; int use_delta_palette = 0; + int idx; + size_t best_size = 0; + VP8LBitWriter bw_init = *bw, bw_best; + (void)data2; - if (enc == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - - // --------------------------------------------------------------------------- - // Analyze image (entropy, num_palettes etc) - - if (!AnalyzeAndInit(enc)) { + if (!VP8LBitWriterInit(&bw_best, 0) || + (num_crunch_configs > 1 && !VP8LBitWriterClone(bw, &bw_best))) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - // Apply near-lossless preprocessing. - use_near_lossless = - (config->near_lossless < 100) && !enc->use_palette_ && !enc->use_predict_; - if (use_near_lossless) { - if (!VP8ApplyNearLossless(width, height, picture->argb, - config->near_lossless)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; + for (idx = 0; idx < num_crunch_configs; ++idx) { + const int entropy_idx = crunch_configs[idx].entropy_idx_; + enc->use_palette_ = (entropy_idx == kPalette); + enc->use_subtract_green_ = + (entropy_idx == kSubGreen) || (entropy_idx == kSpatialSubGreen); + enc->use_predict_ = + (entropy_idx == kSpatial) || (entropy_idx == kSpatialSubGreen); + if (low_effort) { + enc->use_cross_color_ = 0; + } else { + enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_; } - } + // Reset any parameter in the encoder that is set in the previous iteration. + enc->cache_bits_ = 0; + VP8LBackwardRefsClear(&enc->refs_[0]); + VP8LBackwardRefsClear(&enc->refs_[1]); -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (config->use_delta_palette) { - enc->use_predict_ = 1; - enc->use_cross_color_ = 0; - enc->use_subtract_green_ = 0; - enc->use_palette_ = 1; - err = MakeInputImageCopy(enc); - if (err != VP8_ENC_OK) goto Error; - err = WebPSearchOptimalDeltaPalette(enc); - if (err != VP8_ENC_OK) goto Error; - if (enc->use_palette_) { +#if (WEBP_NEAR_LOSSLESS == 1) + // Apply near-lossless preprocessing. + use_near_lossless = (config->near_lossless < 100) && !enc->use_palette_ && + !enc->use_predict_; + if (use_near_lossless) { err = AllocateTransformBuffer(enc, width, height); if (err != VP8_ENC_OK) goto Error; - err = EncodeDeltaPalettePredictorImage(bw, enc, quality, low_effort); + if ((enc->argb_content_ != kEncoderNearLossless) && + !VP8ApplyNearLossless(picture, config->near_lossless, enc->argb_)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + enc->argb_content_ = kEncoderNearLossless; + } else { + enc->argb_content_ = kEncoderNone; + } +#else + enc->argb_content_ = kEncoderNone; +#endif + +#ifdef WEBP_EXPERIMENTAL_FEATURES + if (config->use_delta_palette) { + enc->use_predict_ = 1; + enc->use_cross_color_ = 0; + enc->use_subtract_green_ = 0; + enc->use_palette_ = 1; + if (enc->argb_content_ != kEncoderNearLossless && + enc->argb_content_ != kEncoderPalette) { + err = MakeInputImageCopy(enc); + if (err != VP8_ENC_OK) goto Error; + } + err = WebPSearchOptimalDeltaPalette(enc); if (err != VP8_ENC_OK) goto Error; - use_delta_palette = 1; + if (enc->use_palette_) { + err = AllocateTransformBuffer(enc, width, height); + if (err != VP8_ENC_OK) goto Error; + err = EncodeDeltaPalettePredictorImage(bw, enc, quality, low_effort); + if (err != VP8_ENC_OK) goto Error; + use_delta_palette = 1; + } } - } #endif // WEBP_EXPERIMENTAL_FEATURES - // Encode palette - if (enc->use_palette_) { - err = EncodePalette(bw, low_effort, enc); - if (err != VP8_ENC_OK) goto Error; - err = MapImageFromPalette(enc, use_delta_palette); - if (err != VP8_ENC_OK) goto Error; - // If using a color cache, do not have it bigger than the number of colors. - if (use_cache && enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) { - enc->cache_bits_ = BitsLog2Floor(enc->palette_size_) + 1; - } - } - if (!use_delta_palette) { - // In case image is not packed. - if (enc->argb_ == NULL) { - err = MakeInputImageCopy(enc); + // Encode palette + if (enc->use_palette_) { + err = EncodePalette(bw, low_effort, enc); + if (err != VP8_ENC_OK) goto Error; + err = MapImageFromPalette(enc, use_delta_palette); if (err != VP8_ENC_OK) goto Error; + // If using a color cache, do not have it bigger than the number of + // colors. + if (use_cache && enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) { + enc->cache_bits_ = BitsLog2Floor(enc->palette_size_) + 1; + } } + if (!use_delta_palette) { + // In case image is not packed. + if (enc->argb_content_ != kEncoderNearLossless && + enc->argb_content_ != kEncoderPalette) { + err = MakeInputImageCopy(enc); + if (err != VP8_ENC_OK) goto Error; + } - // ------------------------------------------------------------------------- - // Apply transforms and write transform data. + // ----------------------------------------------------------------------- + // Apply transforms and write transform data. - if (enc->use_subtract_green_) { - ApplySubtractGreen(enc, enc->current_width_, height, bw); - } + if (enc->use_subtract_green_) { + ApplySubtractGreen(enc, enc->current_width_, height, bw); + } - if (enc->use_predict_) { - err = ApplyPredictFilter(enc, enc->current_width_, height, quality, - low_effort, enc->use_subtract_green_, bw); - if (err != VP8_ENC_OK) goto Error; + if (enc->use_predict_) { + err = ApplyPredictFilter(enc, enc->current_width_, height, quality, + low_effort, enc->use_subtract_green_, bw); + if (err != VP8_ENC_OK) goto Error; + } + + if (enc->use_cross_color_) { + err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality, + low_effort, bw); + if (err != VP8_ENC_OK) goto Error; + } } - if (enc->use_cross_color_) { - err = ApplyCrossColorFilter(enc, enc->current_width_, - height, quality, low_effort, bw); - if (err != VP8_ENC_OK) goto Error; + VP8LPutBits(bw, !TRANSFORM_PRESENT, 1); // No more transforms. + + // ------------------------------------------------------------------------- + // Encode and write the transformed image. + err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, + enc->current_width_, height, quality, low_effort, + use_cache, &crunch_configs[idx], + &enc->cache_bits_, enc->histo_bits_, + byte_position, &hdr_size, &data_size); + if (err != VP8_ENC_OK) goto Error; + + // If we are better than what we already have. + if (idx == 0 || VP8LBitWriterNumBytes(bw) < best_size) { + best_size = VP8LBitWriterNumBytes(bw); + // Store the BitWriter. + VP8LBitWriterSwap(bw, &bw_best); +#if !defined(WEBP_DISABLE_STATS) + // Update the stats. + if (stats != NULL) { + stats->lossless_features = 0; + if (enc->use_predict_) stats->lossless_features |= 1; + if (enc->use_cross_color_) stats->lossless_features |= 2; + if (enc->use_subtract_green_) stats->lossless_features |= 4; + if (enc->use_palette_) stats->lossless_features |= 8; + stats->histogram_bits = enc->histo_bits_; + stats->transform_bits = enc->transform_bits_; + stats->cache_bits = enc->cache_bits_; + stats->palette_size = enc->palette_size_; + stats->lossless_size = (int)(best_size - byte_position); + stats->lossless_hdr_size = hdr_size; + stats->lossless_data_size = data_size; + } +#endif } + // Reset the bit writer for the following iteration if any. + if (num_crunch_configs > 1) VP8LBitWriterReset(&bw_init, bw); } + VP8LBitWriterSwap(&bw_best, bw); - VP8LPutBits(bw, !TRANSFORM_PRESENT, 1); // No more transforms. +Error: + VP8LBitWriterWipeOut(&bw_best); + params->err_ = err; + // The hook should return false in case of error. + return (err == VP8_ENC_OK); +} - // --------------------------------------------------------------------------- - // Encode and write the transformed image. - err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, - enc->current_width_, height, quality, low_effort, - use_cache, &enc->cache_bits_, enc->histo_bits_, - byte_position, &hdr_size, &data_size); - if (err != VP8_ENC_OK) goto Error; +WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, + const WebPPicture* const picture, + VP8LBitWriter* const bw_main, + int use_cache) { + WebPEncodingError err = VP8_ENC_OK; + VP8LEncoder* const enc_main = VP8LEncoderNew(config, picture); + VP8LEncoder* enc_side = NULL; + CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX]; + int num_crunch_configs_main, num_crunch_configs_side = 0; + int idx; + int red_and_blue_always_zero = 0; + WebPWorker worker_main, worker_side; + StreamEncodeContext params_main, params_side; + // The main thread uses picture->stats, the side thread uses stats_side. + WebPAuxStats stats_side; + VP8LBitWriter bw_side; + const WebPWorkerInterface* const worker_interface = WebPGetWorkerInterface(); + int ok_main; - if (picture->stats != NULL) { - WebPAuxStats* const stats = picture->stats; - stats->lossless_features = 0; - if (enc->use_predict_) stats->lossless_features |= 1; - if (enc->use_cross_color_) stats->lossless_features |= 2; - if (enc->use_subtract_green_) stats->lossless_features |= 4; - if (enc->use_palette_) stats->lossless_features |= 8; - stats->histogram_bits = enc->histo_bits_; - stats->transform_bits = enc->transform_bits_; - stats->cache_bits = enc->cache_bits_; - stats->palette_size = enc->palette_size_; - stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position); - stats->lossless_hdr_size = hdr_size; - stats->lossless_data_size = data_size; + // Analyze image (entropy, num_palettes etc) + if (enc_main == NULL || + !EncoderAnalyze(enc_main, crunch_configs, &num_crunch_configs_main, + &red_and_blue_always_zero) || + !EncoderInit(enc_main) || !VP8LBitWriterInit(&bw_side, 0)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; } - Error: - VP8LEncoderDelete(enc); + // Split the configs between the main and side threads (if any). + if (config->thread_level > 0) { + num_crunch_configs_side = num_crunch_configs_main / 2; + for (idx = 0; idx < num_crunch_configs_side; ++idx) { + params_side.crunch_configs_[idx] = + crunch_configs[num_crunch_configs_main - num_crunch_configs_side + + idx]; + } + params_side.num_crunch_configs_ = num_crunch_configs_side; + } + num_crunch_configs_main -= num_crunch_configs_side; + for (idx = 0; idx < num_crunch_configs_main; ++idx) { + params_main.crunch_configs_[idx] = crunch_configs[idx]; + } + params_main.num_crunch_configs_ = num_crunch_configs_main; + + // Fill in the parameters for the thread workers. + { + const int params_size = (num_crunch_configs_side > 0) ? 2 : 1; + for (idx = 0; idx < params_size; ++idx) { + // Create the parameters for each worker. + WebPWorker* const worker = (idx == 0) ? &worker_main : &worker_side; + StreamEncodeContext* const param = + (idx == 0) ? ¶ms_main : ¶ms_side; + param->config_ = config; + param->picture_ = picture; + param->use_cache_ = use_cache; + param->red_and_blue_always_zero_ = red_and_blue_always_zero; + if (idx == 0) { + param->stats_ = picture->stats; + param->bw_ = bw_main; + param->enc_ = enc_main; + } else { + param->stats_ = (picture->stats == NULL) ? NULL : &stats_side; + // Create a side bit writer. + if (!VP8LBitWriterClone(bw_main, &bw_side)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + param->bw_ = &bw_side; + // Create a side encoder. + enc_side = VP8LEncoderNew(config, picture); + if (enc_side == NULL || !EncoderInit(enc_side)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + // Copy the values that were computed for the main encoder. + enc_side->histo_bits_ = enc_main->histo_bits_; + enc_side->transform_bits_ = enc_main->transform_bits_; + enc_side->palette_size_ = enc_main->palette_size_; + memcpy(enc_side->palette_, enc_main->palette_, + sizeof(enc_main->palette_)); + param->enc_ = enc_side; + } + // Create the workers. + worker_interface->Init(worker); + worker->data1 = param; + worker->data2 = NULL; + worker->hook = (WebPWorkerHook)EncodeStreamHook; + } + } + + // Start the second thread if needed. + if (num_crunch_configs_side != 0) { + if (!worker_interface->Reset(&worker_side)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } +#if !defined(WEBP_DISABLE_STATS) + // This line is here and not in the param initialization above to remove a + // Clang static analyzer warning. + if (picture->stats != NULL) { + memcpy(&stats_side, picture->stats, sizeof(stats_side)); + } +#endif + // This line is only useful to remove a Clang static analyzer warning. + params_side.err_ = VP8_ENC_OK; + worker_interface->Launch(&worker_side); + } + // Execute the main thread. + worker_interface->Execute(&worker_main); + ok_main = worker_interface->Sync(&worker_main); + worker_interface->End(&worker_main); + if (num_crunch_configs_side != 0) { + // Wait for the second thread. + const int ok_side = worker_interface->Sync(&worker_side); + worker_interface->End(&worker_side); + if (!ok_main || !ok_side) { + err = ok_main ? params_side.err_ : params_main.err_; + goto Error; + } + if (VP8LBitWriterNumBytes(&bw_side) < VP8LBitWriterNumBytes(bw_main)) { + VP8LBitWriterSwap(bw_main, &bw_side); +#if !defined(WEBP_DISABLE_STATS) + if (picture->stats != NULL) { + memcpy(picture->stats, &stats_side, sizeof(*picture->stats)); + } +#endif + } + } else { + if (!ok_main) { + err = params_main.err_; + goto Error; + } + } + +Error: + VP8LBitWriterWipeOut(&bw_side); + VP8LEncoderDelete(enc_main); + VP8LEncoderDelete(enc_side); return err; } +#undef CRUNCH_CONFIGS_MAX +#undef CRUNCH_CONFIGS_LZ77_MAX + int VP8LEncodeImage(const WebPConfig* const config, const WebPPicture* const picture) { int width, height; @@ -1642,11 +1953,13 @@ int VP8LEncodeImage(const WebPConfig* const config, if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort; +#if !defined(WEBP_DISABLE_STATS) // Save size. if (picture->stats != NULL) { picture->stats->coded_size += (int)coded_size; picture->stats->lossless_size = (int)coded_size; } +#endif if (picture->extra_info != NULL) { const int mb_w = (width + 15) >> 4; diff --git a/src/3rdparty/libwebp/src/enc/vp8li_enc.h b/src/3rdparty/libwebp/src/enc/vp8li_enc.h index 8c5fbcb..298a4a0 100644 --- a/src/3rdparty/libwebp/src/enc/vp8li_enc.h +++ b/src/3rdparty/libwebp/src/enc/vp8li_enc.h @@ -11,14 +11,23 @@ // // Author: Vikas Arora (vikaas.arora@gmail.com) -#ifndef WEBP_ENC_VP8LI_H_ -#define WEBP_ENC_VP8LI_H_ +#ifndef WEBP_ENC_VP8LI_ENC_H_ +#define WEBP_ENC_VP8LI_ENC_H_ -#include "./backward_references_enc.h" -#include "./histogram_enc.h" -#include "../utils/bit_writer_utils.h" -#include "../webp/encode.h" -#include "../webp/format_constants.h" +#ifdef HAVE_CONFIG_H +#include "src/webp/config.h" +#endif +// Either WEBP_NEAR_LOSSLESS is defined as 0 in config.h when compiling to +// disable near-lossless, or it is enabled by default. +#ifndef WEBP_NEAR_LOSSLESS +#define WEBP_NEAR_LOSSLESS 1 +#endif + +#include "src/enc/backward_references_enc.h" +#include "src/enc/histogram_enc.h" +#include "src/utils/bit_writer_utils.h" +#include "src/webp/encode.h" +#include "src/webp/format_constants.h" #ifdef __cplusplus extern "C" { @@ -27,16 +36,24 @@ extern "C" { // maximum value of transform_bits_ in VP8LEncoder. #define MAX_TRANSFORM_BITS 6 +typedef enum { + kEncoderNone = 0, + kEncoderARGB, + kEncoderNearLossless, + kEncoderPalette +} VP8LEncoderARGBContent; + typedef struct { const WebPConfig* config_; // user configuration and parameters const WebPPicture* pic_; // input picture. - uint32_t* argb_; // Transformed argb image data. - uint32_t* argb_scratch_; // Scratch memory for argb rows - // (used for prediction). - uint32_t* transform_data_; // Scratch memory for transform data. - uint32_t* transform_mem_; // Currently allocated memory. - size_t transform_mem_size_; // Currently allocated memory size. + uint32_t* argb_; // Transformed argb image data. + VP8LEncoderARGBContent argb_content_; // Content type of the argb buffer. + uint32_t* argb_scratch_; // Scratch memory for argb rows + // (used for prediction). + uint32_t* transform_data_; // Scratch memory for transform data. + uint32_t* transform_mem_; // Currently allocated memory. + size_t transform_mem_size_; // Currently allocated memory size. int current_width_; // Corresponds to packed image width. @@ -54,8 +71,7 @@ typedef struct { uint32_t palette_[MAX_PALETTE_SIZE]; // Some 'scratch' (potentially large) objects. - struct VP8LBackwardRefs refs_[2]; // Backward Refs array corresponding to - // LZ77 & RLE coding. + struct VP8LBackwardRefs refs_[3]; // Backward Refs array for temporaries. VP8LHashChain hash_chain_; // HashChain data for constructing // backward references. } VP8LEncoder; @@ -75,6 +91,13 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, const WebPPicture* const picture, VP8LBitWriter* const bw, int use_cache); +#if (WEBP_NEAR_LOSSLESS == 1) +// in near_lossless.c +// Near lossless preprocessing in RGB color-space. +int VP8ApplyNearLossless(const WebPPicture* const picture, int quality, + uint32_t* const argb_dst); +#endif + //------------------------------------------------------------------------------ // Image transforms in predictor.c. @@ -92,4 +115,4 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality, } // extern "C" #endif -#endif /* WEBP_ENC_VP8LI_H_ */ +#endif /* WEBP_ENC_VP8LI_ENC_H_ */ diff --git a/src/3rdparty/libwebp/src/enc/webp_enc.c b/src/3rdparty/libwebp/src/enc/webp_enc.c index f18461e..283cda8 100644 --- a/src/3rdparty/libwebp/src/enc/webp_enc.c +++ b/src/3rdparty/libwebp/src/enc/webp_enc.c @@ -16,10 +16,10 @@ #include <string.h> #include <math.h> -#include "./cost_enc.h" -#include "./vp8i_enc.h" -#include "./vp8li_enc.h" -#include "../utils/utils.h" +#include "src/enc/cost_enc.h" +#include "src/enc/vp8i_enc.h" +#include "src/enc/vp8li_enc.h" +#include "src/utils/utils.h" // #define PRINT_MEMORY_INFO @@ -207,7 +207,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, enc->preds_w_ = preds_w; enc->mb_info_ = (VP8MBInfo*)mem; mem += info_size; - enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_; + enc->preds_ = mem + 1 + enc->preds_w_; mem += preds_size; enc->nz_ = 1 + (uint32_t*)WEBP_ALIGN(mem); mem += nz_size; @@ -216,7 +216,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, // top samples (all 16-aligned) mem = (uint8_t*)WEBP_ALIGN(mem); - enc->y_top_ = (uint8_t*)mem; + enc->y_top_ = mem; enc->uv_top_ = enc->y_top_ + top_stride; mem += 2 * top_stride; assert(mem <= (uint8_t*)enc + size); @@ -256,6 +256,7 @@ static int DeleteVP8Encoder(VP8Encoder* enc) { //------------------------------------------------------------------------------ +#if !defined(WEBP_DISABLE_STATS) static double GetPSNR(uint64_t err, uint64_t size) { return (err > 0 && size > 0) ? 10. * log10(255. * 255. * size / err) : 99.; } @@ -270,8 +271,10 @@ static void FinalizePSNR(const VP8Encoder* const enc) { stats->PSNR[3] = (float)GetPSNR(sse[0] + sse[1] + sse[2], size * 3 / 2); stats->PSNR[4] = (float)GetPSNR(sse[3], size); } +#endif // !defined(WEBP_DISABLE_STATS) static void StoreStats(VP8Encoder* const enc) { +#if !defined(WEBP_DISABLE_STATS) WebPAuxStats* const stats = enc->pic_->stats; if (stats != NULL) { int i, s; @@ -288,7 +291,9 @@ static void StoreStats(VP8Encoder* const enc) { stats->block_count[i] = enc->block_count_[i]; } } +#else // defined(WEBP_DISABLE_STATS) WebPReportProgress(enc->pic_, 100, &enc->percent_); // done! +#endif // !defined(WEBP_DISABLE_STATS) } int WebPEncodingSetError(const WebPPicture* const pic, @@ -336,10 +341,6 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { if (!config->lossless) { VP8Encoder* enc = NULL; - if (!config->exact) { - WebPCleanupTransparentArea(pic); - } - if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) { // Make sure we have YUVA samples. if (config->use_sharp_yuv || (config->preprocessing & 4)) { @@ -361,6 +362,10 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { } } + if (!config->exact) { + WebPCleanupTransparentArea(pic); + } + enc = InitVP8Encoder(config, pic); if (enc == NULL) return 0; // pic->error is already set. // Note: each of the tasks below account for 20% in the progress report. |