diff options
Diffstat (limited to 'src/3rdparty/libwebp/src/enc')
24 files changed, 3692 insertions, 2629 deletions
diff --git a/src/3rdparty/libwebp/src/enc/alpha.c b/src/3rdparty/libwebp/src/enc/alpha.c index 21d4b5c..79cb94d 100644 --- a/src/3rdparty/libwebp/src/enc/alpha.c +++ b/src/3rdparty/libwebp/src/enc/alpha.c @@ -17,6 +17,7 @@ #include "./vp8enci.h" #include "../utils/filters.h" #include "../utils/quant_levels.h" +#include "../utils/utils.h" #include "../webp/format_constants.h" // ----------------------------------------------------------------------------- @@ -34,7 +35,7 @@ // // 'output' corresponds to the buffer containing compressed alpha data. // This buffer is allocated by this method and caller should call -// free(*output) when done. +// WebPSafeFree(*output) when done. // 'output_size' corresponds to size of this compressed alpha buffer. // // Returns 1 on successfully encoding the alpha and @@ -46,12 +47,11 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, int effort_level, // in [0..6] range - VP8BitWriter* const bw, + VP8LBitWriter* const bw, WebPAuxStats* const stats) { int ok = 0; WebPConfig config; WebPPicture picture; - VP8LBitWriter tmp_bw; WebPPictureInit(&picture); picture.width = width; @@ -83,16 +83,15 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, config.quality = 8.f * effort_level; assert(config.quality >= 0 && config.quality <= 100.f); - ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3); - ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK); + ok = (VP8LEncodeStream(&config, &picture, bw) == VP8_ENC_OK); WebPPictureFree(&picture); - if (ok) { - const uint8_t* const buffer = VP8LBitWriterFinish(&tmp_bw); - const size_t buffer_size = VP8LBitWriterNumBytes(&tmp_bw); - VP8BitWriterAppend(bw, buffer, buffer_size); + ok = ok && !bw->error_; + if (!ok) { + VP8LBitWriterDestroy(bw); + return 0; } - VP8LBitWriterDestroy(&tmp_bw); - return ok && !bw->error_; + return 1; + } // ----------------------------------------------------------------------------- @@ -114,8 +113,10 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, const uint8_t* alpha_src; WebPFilterFunc filter_func; uint8_t header; - size_t expected_size; const size_t data_size = width * height; + const uint8_t* output = NULL; + size_t output_size = 0; + VP8LBitWriter tmp_bw; assert((uint64_t)data_size == (uint64_t)width * height); // as per spec assert(filter >= 0 && filter < WEBP_FILTER_LAST); @@ -124,15 +125,6 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, assert(sizeof(header) == ALPHA_HEADER_LEN); // TODO(skal): have a common function and #define's to validate alpha params. - expected_size = - (method == ALPHA_NO_COMPRESSION) ? (ALPHA_HEADER_LEN + data_size) - : (data_size >> 5); - header = method | (filter << 2); - if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4; - - VP8BitWriterInit(&result->bw, expected_size); - VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN); - filter_func = WebPFilters[filter]; if (filter_func != NULL) { filter_func(data, width, height, width, tmp_alpha); @@ -141,14 +133,42 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, alpha_src = data; } + if (method != ALPHA_NO_COMPRESSION) { + ok = VP8LBitWriterInit(&tmp_bw, data_size >> 3); + ok = ok && EncodeLossless(alpha_src, width, height, effort_level, + &tmp_bw, &result->stats); + if (ok) { + output = VP8LBitWriterFinish(&tmp_bw); + output_size = VP8LBitWriterNumBytes(&tmp_bw); + if (output_size > data_size) { + // compressed size is larger than source! Revert to uncompressed mode. + method = ALPHA_NO_COMPRESSION; + VP8LBitWriterDestroy(&tmp_bw); + } + } else { + VP8LBitWriterDestroy(&tmp_bw); + return 0; + } + } + if (method == ALPHA_NO_COMPRESSION) { - ok = VP8BitWriterAppend(&result->bw, alpha_src, width * height); - ok = ok && !result->bw.error_; - } else { - ok = EncodeLossless(alpha_src, width, height, effort_level, - &result->bw, &result->stats); - VP8BitWriterFinish(&result->bw); + output = alpha_src; + output_size = data_size; + ok = 1; + } + + // Emit final result. + header = method | (filter << 2); + if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4; + + VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size); + ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN); + ok = ok && VP8BitWriterAppend(&result->bw, output, output_size); + + if (method != ALPHA_NO_COMPRESSION) { + VP8LBitWriterDestroy(&tmp_bw); } + ok = ok && !result->bw.error_; result->score = VP8BitWriterSize(&result->bw); return ok; } @@ -231,7 +251,7 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height, GetFilterMap(alpha, width, height, filter, effort_level); InitFilterTrial(&best); if (try_map != FILTER_TRY_NONE) { - uint8_t* filtered_alpha = (uint8_t*)malloc(data_size); + uint8_t* filtered_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size); if (filtered_alpha == NULL) return 0; for (filter = WEBP_FILTER_NONE; ok && try_map; ++filter, try_map >>= 1) { @@ -248,7 +268,7 @@ static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height, } } } - free(filtered_alpha); + WebPSafeFree(filtered_alpha); } else { ok = EncodeAlphaInternal(alpha, width, height, method, WEBP_FILTER_NONE, reduce_levels, effort_level, NULL, &best); @@ -298,7 +318,7 @@ static int EncodeAlpha(VP8Encoder* const enc, filter = WEBP_FILTER_NONE; } - quant_alpha = (uint8_t*)malloc(data_size); + quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size); if (quant_alpha == NULL) { return 0; } @@ -325,7 +345,7 @@ static int EncodeAlpha(VP8Encoder* const enc, } } - free(quant_alpha); + WebPSafeFree(quant_alpha); return ok; } @@ -346,7 +366,7 @@ static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) { return 0; } if (alpha_size != (uint32_t)alpha_size) { // Sanity check. - free(alpha_data); + WebPSafeFree(alpha_data); return 0; } enc->alpha_data_size_ = (uint32_t)alpha_size; @@ -361,7 +381,7 @@ void VP8EncInitAlpha(VP8Encoder* const enc) { enc->alpha_data_size_ = 0; if (enc->thread_level_ > 0) { WebPWorker* const worker = &enc->alpha_worker_; - WebPWorkerInit(worker); + WebPGetWorkerInterface()->Init(worker); worker->data1 = enc; worker->data2 = NULL; worker->hook = (WebPWorkerHook)CompressAlphaJob; @@ -372,10 +392,11 @@ int VP8EncStartAlpha(VP8Encoder* const enc) { if (enc->has_alpha_) { if (enc->thread_level_ > 0) { WebPWorker* const worker = &enc->alpha_worker_; - if (!WebPWorkerReset(worker)) { // Makes sure worker is good to go. + // Makes sure worker is good to go. + if (!WebPGetWorkerInterface()->Reset(worker)) { return 0; } - WebPWorkerLaunch(worker); + WebPGetWorkerInterface()->Launch(worker); return 1; } else { return CompressAlphaJob(enc, NULL); // just do the job right away @@ -388,7 +409,7 @@ int VP8EncFinishAlpha(VP8Encoder* const enc) { if (enc->has_alpha_) { if (enc->thread_level_ > 0) { WebPWorker* const worker = &enc->alpha_worker_; - if (!WebPWorkerSync(worker)) return 0; // error + if (!WebPGetWorkerInterface()->Sync(worker)) return 0; // error } } return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); @@ -398,10 +419,12 @@ int VP8EncDeleteAlpha(VP8Encoder* const enc) { int ok = 1; if (enc->thread_level_ > 0) { WebPWorker* const worker = &enc->alpha_worker_; - ok = WebPWorkerSync(worker); // finish anything left in flight - WebPWorkerEnd(worker); // still need to end the worker, even if !ok + // finish anything left in flight + ok = WebPGetWorkerInterface()->Sync(worker); + // still need to end the worker, even if !ok + WebPGetWorkerInterface()->End(worker); } - free(enc->alpha_data_); + WebPSafeFree(enc->alpha_data_); enc->alpha_data_ = NULL; enc->alpha_data_size_ = 0; enc->has_alpha_ = 0; diff --git a/src/3rdparty/libwebp/src/enc/analysis.c b/src/3rdparty/libwebp/src/enc/analysis.c index 7d4cfdc..e019465 100644 --- a/src/3rdparty/libwebp/src/enc/analysis.c +++ b/src/3rdparty/libwebp/src/enc/analysis.c @@ -30,7 +30,7 @@ static void SmoothSegmentMap(VP8Encoder* const enc) { const int w = enc->mb_w_; const int h = enc->mb_h_; const int majority_cnt_3_x_3_grid = 5; - uint8_t* const tmp = (uint8_t*)WebPSafeMalloc((uint64_t)w * h, sizeof(*tmp)); + uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp)); assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec if (tmp == NULL) return; @@ -63,7 +63,7 @@ static void SmoothSegmentMap(VP8Encoder* const enc) { mb->segment_ = tmp[x + y * w]; } } - free(tmp); + WebPSafeFree(tmp); } //------------------------------------------------------------------------------ @@ -141,7 +141,11 @@ static void MergeHistograms(const VP8Histogram* const in, static void AssignSegments(VP8Encoder* const enc, const int alphas[MAX_ALPHA + 1]) { - const int nb = enc->segment_hdr_.num_segments_; + // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an + // explicit check is needed to avoid spurious warning about 'n + 1' exceeding + // array bounds of 'centers' with some compilers (noticed with gcc-4.9). + const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ? + enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS; int centers[NUM_MB_SEGMENTS]; int weighted_average = 0; int map[MAX_ALPHA + 1]; @@ -151,6 +155,7 @@ static void AssignSegments(VP8Encoder* const enc, int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS]; assert(nb >= 1); + assert(nb <= NUM_MB_SEGMENTS); // bracket the input for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {} @@ -225,18 +230,15 @@ static void AssignSegments(VP8Encoder* const enc, // susceptibility and set best modes for this macroblock. // Segment assignment is done later. -// Number of modes to inspect for alpha_ evaluation. For high-quality settings -// (method >= FAST_ANALYSIS_METHOD) we don't need to test all the possible modes -// during the analysis phase. -#define FAST_ANALYSIS_METHOD 4 // method above which we do partial analysis +// Number of modes to inspect for alpha_ evaluation. We don't need to test all +// the possible modes during the analysis phase: we risk falling into a local +// optimum, or be subject to boundary effect #define MAX_INTRA16_MODE 2 #define MAX_INTRA4_MODE 2 #define MAX_UV_MODE 2 static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { - const int max_mode = - (it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_INTRA16_MODE - : NUM_PRED_MODES; + const int max_mode = MAX_INTRA16_MODE; int mode; int best_alpha = DEFAULT_ALPHA; int best_mode = 0; @@ -262,9 +264,7 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, int best_alpha) { uint8_t modes[16]; - const int max_mode = - (it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_INTRA4_MODE - : NUM_BMODES; + const int max_mode = MAX_INTRA4_MODE; int i4_alpha; VP8Histogram total_histo = { { 0 } }; int cur_histo = 0; @@ -306,10 +306,9 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { int best_alpha = DEFAULT_ALPHA; int best_mode = 0; - const int max_mode = - (it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_UV_MODE - : NUM_PRED_MODES; + const int max_mode = MAX_UV_MODE; int mode; + VP8MakeChroma8Preds(it); for (mode = 0; mode < max_mode; ++mode) { VP8Histogram histo = { { 0 } }; @@ -425,7 +424,7 @@ static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) { // initialize the job struct with some TODOs static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job, int start_row, int end_row) { - WebPWorkerInit(&job->worker); + WebPGetWorkerInterface()->Init(&job->worker); job->worker.data1 = job; job->worker.data2 = &job->it; job->worker.hook = (WebPWorkerHook)DoSegmentsJob; @@ -458,6 +457,8 @@ int VP8EncAnalyze(VP8Encoder* const enc) { #else const int do_mt = 0; #endif + const WebPWorkerInterface* const worker_interface = + WebPGetWorkerInterface(); SegmentJob main_job; if (do_mt) { SegmentJob side_job; @@ -467,23 +468,23 @@ int VP8EncAnalyze(VP8Encoder* const enc) { InitSegmentJob(enc, &side_job, split_row, last_row); // we don't need to call Reset() on main_job.worker, since we're calling // WebPWorkerExecute() on it - ok &= WebPWorkerReset(&side_job.worker); + ok &= worker_interface->Reset(&side_job.worker); // launch the two jobs in parallel if (ok) { - WebPWorkerLaunch(&side_job.worker); - WebPWorkerExecute(&main_job.worker); - ok &= WebPWorkerSync(&side_job.worker); - ok &= WebPWorkerSync(&main_job.worker); + worker_interface->Launch(&side_job.worker); + worker_interface->Execute(&main_job.worker); + ok &= worker_interface->Sync(&side_job.worker); + ok &= worker_interface->Sync(&main_job.worker); } - WebPWorkerEnd(&side_job.worker); + worker_interface->End(&side_job.worker); if (ok) MergeJobs(&side_job, &main_job); // merge results together } else { // Even for single-thread case, we use the generic Worker tools. InitSegmentJob(enc, &main_job, 0, last_row); - WebPWorkerExecute(&main_job.worker); - ok &= WebPWorkerSync(&main_job.worker); + worker_interface->Execute(&main_job.worker); + ok &= worker_interface->Sync(&main_job.worker); } - WebPWorkerEnd(&main_job.worker); + worker_interface->End(&main_job.worker); if (ok) { enc->alpha_ = main_job.alpha / total_mb; enc->uv_alpha_ = main_job.uv_alpha / total_mb; diff --git a/src/3rdparty/libwebp/src/enc/backward_references.c b/src/3rdparty/libwebp/src/enc/backward_references.c index 77b4be7..a3c30aa 100644 --- a/src/3rdparty/libwebp/src/enc/backward_references.c +++ b/src/3rdparty/libwebp/src/enc/backward_references.c @@ -12,7 +12,6 @@ #include <assert.h> #include <math.h> -#include <stdio.h> #include "./backward_references.h" #include "./histogram.h" @@ -22,10 +21,12 @@ #define VALUES_IN_BYTE 256 -#define HASH_BITS 18 -#define HASH_SIZE (1 << HASH_BITS) #define HASH_MULTIPLIER (0xc6a4a7935bd1e995ULL) +#define MIN_BLOCK_SIZE 256 // minimum block size for backward references + +#define MAX_ENTROPY (1e30f) + // 1M window (4M bytes) minus 120 special codes for short distances. #define WINDOW_SIZE ((1 << 20) - 120) @@ -33,14 +34,6 @@ #define MIN_LENGTH 2 #define MAX_LENGTH 4096 -typedef struct { - // Stores the most recently added position with the given hash value. - int32_t hash_to_first_index_[HASH_SIZE]; - // chain_[pos] stores the previous position with the same hash value - // for every pixel in the image. - int32_t* chain_; -} HashChain; - // ----------------------------------------------------------------------------- static const uint8_t plane_to_code_lut[128] = { @@ -78,65 +71,152 @@ static WEBP_INLINE int FindMatchLength(const uint32_t* const array1, // ----------------------------------------------------------------------------- // VP8LBackwardRefs -void VP8LInitBackwardRefs(VP8LBackwardRefs* const refs) { - if (refs != NULL) { - refs->refs = NULL; - refs->size = 0; - refs->max_size = 0; +struct PixOrCopyBlock { + PixOrCopyBlock* next_; // next block (or NULL) + PixOrCopy* start_; // data start + int size_; // currently used size +}; + +static void ClearBackwardRefs(VP8LBackwardRefs* const refs) { + assert(refs != NULL); + if (refs->tail_ != NULL) { + *refs->tail_ = refs->free_blocks_; // recycle all blocks at once } + refs->free_blocks_ = refs->refs_; + refs->tail_ = &refs->refs_; + refs->last_block_ = NULL; + refs->refs_ = NULL; } -void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) { - if (refs != NULL) { - free(refs->refs); - VP8LInitBackwardRefs(refs); +void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) { + assert(refs != NULL); + ClearBackwardRefs(refs); + while (refs->free_blocks_ != NULL) { + PixOrCopyBlock* const next = refs->free_blocks_->next_; + WebPSafeFree(refs->free_blocks_); + refs->free_blocks_ = next; } } -int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size) { +void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) { assert(refs != NULL); - refs->size = 0; - refs->max_size = 0; - refs->refs = (PixOrCopy*)WebPSafeMalloc((uint64_t)max_size, - sizeof(*refs->refs)); - if (refs->refs == NULL) return 0; - refs->max_size = max_size; + memset(refs, 0, sizeof(*refs)); + refs->tail_ = &refs->refs_; + refs->block_size_ = + (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size; +} + +VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) { + VP8LRefsCursor c; + c.cur_block_ = refs->refs_; + if (refs->refs_ != NULL) { + c.cur_pos = c.cur_block_->start_; + c.last_pos_ = c.cur_pos + c.cur_block_->size_; + } else { + c.cur_pos = NULL; + c.last_pos_ = NULL; + } + return c; +} + +void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) { + PixOrCopyBlock* const b = c->cur_block_->next_; + c->cur_pos = (b == NULL) ? NULL : b->start_; + c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_; + c->cur_block_ = b; +} + +// Create a new block, either from the free list or allocated +static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) { + PixOrCopyBlock* b = refs->free_blocks_; + if (b == NULL) { // allocate new memory chunk + const size_t total_size = + sizeof(*b) + refs->block_size_ * sizeof(*b->start_); + b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size); + if (b == NULL) { + refs->error_ |= 1; + return NULL; + } + b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b)); // not always aligned + } else { // recycle from free-list + refs->free_blocks_ = b->next_; + } + *refs->tail_ = b; + refs->tail_ = &b->next_; + refs->last_block_ = b; + b->next_ = NULL; + b->size_ = 0; + return b; +} + +static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs, + const PixOrCopy v) { + PixOrCopyBlock* b = refs->last_block_; + if (b == NULL || b->size_ == refs->block_size_) { + b = BackwardRefsNewBlock(refs); + if (b == NULL) return; // refs->error_ is set + } + b->start_[b->size_++] = v; +} + +int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, + VP8LBackwardRefs* const dst) { + const PixOrCopyBlock* b = src->refs_; + ClearBackwardRefs(dst); + assert(src->block_size_ == dst->block_size_); + while (b != NULL) { + PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst); + if (new_b == NULL) return 0; // dst->error_ is set + memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_)); + new_b->size_ = b->size_; + b = b->next_; + } return 1; } // ----------------------------------------------------------------------------- // Hash chains -static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) { - uint64_t key = ((uint64_t)(argb[1]) << 32) | argb[0]; - key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS); - return key; -} - -static int HashChainInit(HashChain* const p, int size) { +// initialize as empty +static void HashChainInit(VP8LHashChain* const p) { int i; - p->chain_ = (int*)WebPSafeMalloc((uint64_t)size, sizeof(*p->chain_)); - if (p->chain_ == NULL) { - return 0; - } - for (i = 0; i < size; ++i) { + assert(p != NULL); + for (i = 0; i < p->size_; ++i) { p->chain_[i] = -1; } for (i = 0; i < HASH_SIZE; ++i) { p->hash_to_first_index_[i] = -1; } +} + +int VP8LHashChainInit(VP8LHashChain* const p, int size) { + assert(p->size_ == 0); + assert(p->chain_ == NULL); + assert(size > 0); + p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_)); + if (p->chain_ == NULL) return 0; + p->size_ = size; + HashChainInit(p); return 1; } -static void HashChainDelete(HashChain* const p) { - if (p != NULL) { - free(p->chain_); - free(p); - } +void VP8LHashChainClear(VP8LHashChain* const p) { + assert(p != NULL); + WebPSafeFree(p->chain_); + p->size_ = 0; + p->chain_ = NULL; +} + +// ----------------------------------------------------------------------------- + +static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) { + uint64_t key = ((uint64_t)argb[1] << 32) | argb[0]; + key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS); + return key; } // Insertion of two pixels at a time. -static void HashChainInsert(HashChain* const p, +static void HashChainInsert(VP8LHashChain* const p, const uint32_t* const argb, int pos) { const uint64_t hash_code = GetPixPairHash64(argb); p->chain_[pos] = p->hash_to_first_index_[hash_code]; @@ -161,7 +241,7 @@ static void GetParamsForHashChainFindCopy(int quality, int xsize, *iter_limit = (cache_bits > 0) ? iter_neg : iter_neg / 2; } -static int HashChainFindCopy(const HashChain* const p, +static int HashChainFindCopy(const VP8LHashChain* const p, int base_position, int xsize_signed, const uint32_t* const argb, int max_len, int window_size, int iter_pos, int iter_limit, @@ -185,10 +265,8 @@ static int HashChainFindCopy(const HashChain* const p, uint64_t val; uint32_t curr_length; uint32_t distance; - const uint64_t* const ptr1 = - (const uint64_t*)(argb + pos + best_length - 1); - const uint64_t* const ptr2 = - (const uint64_t*)(argb_start + best_length - 1); + const uint32_t* const ptr1 = (argb + pos + best_length - 1); + const uint32_t* const ptr2 = (argb_start + best_length - 1); if (iter_pos < 0) { if (iter_pos < iter_limit || best_val >= 0xff0000) { @@ -199,7 +277,7 @@ static int HashChainFindCopy(const HashChain* const p, // Before 'expensive' linear match, check if the two arrays match at the // current best length index and also for the succeeding elements. - if (*ptr1 != *ptr2) continue; + if (ptr1[0] != ptr2[0] || ptr1[1] != ptr2[1]) continue; curr_length = FindMatchLength(argb + pos, argb_start, max_len); if (curr_length < best_length) continue; @@ -237,64 +315,61 @@ static int HashChainFindCopy(const HashChain* const p, } static WEBP_INLINE void PushBackCopy(VP8LBackwardRefs* const refs, int length) { - int size = refs->size; while (length >= MAX_LENGTH) { - refs->refs[size++] = PixOrCopyCreateCopy(1, MAX_LENGTH); + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, MAX_LENGTH)); length -= MAX_LENGTH; } if (length > 0) { - refs->refs[size++] = PixOrCopyCreateCopy(1, length); + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, length)); } - refs->size = size; } -static void BackwardReferencesRle(int xsize, int ysize, - const uint32_t* const argb, - VP8LBackwardRefs* const refs) { +static int BackwardReferencesRle(int xsize, int ysize, + const uint32_t* const argb, + VP8LBackwardRefs* const refs) { const int pix_count = xsize * ysize; int match_len = 0; int i; - refs->size = 0; + ClearBackwardRefs(refs); PushBackCopy(refs, match_len); // i=0 case - refs->refs[refs->size++] = PixOrCopyCreateLiteral(argb[0]); + BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[0])); for (i = 1; i < pix_count; ++i) { if (argb[i] == argb[i - 1]) { ++match_len; } else { PushBackCopy(refs, match_len); match_len = 0; - refs->refs[refs->size++] = PixOrCopyCreateLiteral(argb[i]); + BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[i])); } } PushBackCopy(refs, match_len); + return !refs->error_; } static int BackwardReferencesHashChain(int xsize, int ysize, const uint32_t* const argb, int cache_bits, int quality, + VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { int i; int ok = 0; int cc_init = 0; const int use_color_cache = (cache_bits > 0); const int pix_count = xsize * ysize; - HashChain* const hash_chain = (HashChain*)malloc(sizeof(*hash_chain)); VP8LColorCache hashers; int window_size = WINDOW_SIZE; int iter_pos = 1; int iter_limit = -1; - if (hash_chain == NULL) return 0; if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } - if (!HashChainInit(hash_chain, pix_count)) goto Error; - - refs->size = 0; + ClearBackwardRefs(refs); GetParamsForHashChainFindCopy(quality, xsize, cache_bits, &window_size, &iter_pos, &iter_limit); + HashChainInit(hash_chain); for (i = 0; i < pix_count; ) { // Alternative#1: Code the pixels starting at 'i' using backward reference. int offset = 0; @@ -320,14 +395,15 @@ static int BackwardReferencesHashChain(int xsize, int ysize, if (len2 > len + 1) { const uint32_t pixel = argb[i]; // Alternative#2 is a better match. So push pixel at 'i' as literal. + PixOrCopy v; if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) { const int ix = VP8LColorCacheGetIndex(&hashers, pixel); - refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix); + v = PixOrCopyCreateCacheIdx(ix); } else { if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel); - refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel); + v = PixOrCopyCreateLiteral(pixel); } - ++refs->size; + BackwardRefsCursorAdd(refs, v); i++; // Backward reference to be done for next pixel. len = len2; offset = offset2; @@ -336,7 +412,7 @@ static int BackwardReferencesHashChain(int xsize, int ysize, if (len >= MAX_LENGTH) { len = MAX_LENGTH - 1; } - refs->refs[refs->size++] = PixOrCopyCreateCopy(offset, len); + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); if (use_color_cache) { for (k = 0; k < len; ++k) { VP8LColorCacheInsert(&hashers, argb[i + k]); @@ -352,25 +428,25 @@ static int BackwardReferencesHashChain(int xsize, int ysize, i += len; } else { const uint32_t pixel = argb[i]; + PixOrCopy v; if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) { // push pixel as a PixOrCopyCreateCacheIdx pixel const int ix = VP8LColorCacheGetIndex(&hashers, pixel); - refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix); + v = PixOrCopyCreateCacheIdx(ix); } else { if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel); - refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel); + v = PixOrCopyCreateLiteral(pixel); } - ++refs->size; + BackwardRefsCursorAdd(refs, v); if (i + 1 < pix_count) { HashChainInsert(hash_chain, &argb[i], i); } ++i; } } - ok = 1; + ok = !refs->error_; Error: if (cc_init) VP8LColorCacheClear(&hashers); - HashChainDelete(hash_chain); return ok; } @@ -387,11 +463,12 @@ typedef struct { static int BackwardReferencesTraceBackwards( int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb, int quality, int cache_bits, + VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs); static void ConvertPopulationCountTableToBitEstimates( - int num_symbols, const int population_counts[], double output[]) { - int sum = 0; + int num_symbols, const uint32_t population_counts[], double output[]) { + uint32_t sum = 0; int nonzeros = 0; int i; for (i = 0; i < num_symbols; ++i) { @@ -412,39 +489,45 @@ static void ConvertPopulationCountTableToBitEstimates( static int CostModelBuild(CostModel* const m, int xsize, int ysize, int recursion_level, const uint32_t* const argb, - int quality, int cache_bits) { + int quality, int cache_bits, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs) { int ok = 0; - VP8LHistogram histo; - VP8LBackwardRefs refs; - - if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize)) goto Error; + VP8LHistogram* histo = NULL; + ClearBackwardRefs(refs); if (recursion_level > 0) { if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1, - argb, quality, cache_bits, &refs)) { + argb, quality, cache_bits, hash_chain, + refs)) { goto Error; } } else { if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality, - &refs)) { + hash_chain, refs)) { goto Error; } } - VP8LHistogramCreate(&histo, &refs, cache_bits); + histo = VP8LAllocateHistogram(cache_bits); + if (histo == NULL) goto Error; + + VP8LHistogramCreate(histo, refs, cache_bits); + ConvertPopulationCountTableToBitEstimates( - VP8LHistogramNumCodes(&histo), histo.literal_, m->literal_); + VP8LHistogramNumCodes(histo->palette_code_bits_), + histo->literal_, m->literal_); ConvertPopulationCountTableToBitEstimates( - VALUES_IN_BYTE, histo.red_, m->red_); + VALUES_IN_BYTE, histo->red_, m->red_); ConvertPopulationCountTableToBitEstimates( - VALUES_IN_BYTE, histo.blue_, m->blue_); + VALUES_IN_BYTE, histo->blue_, m->blue_); ConvertPopulationCountTableToBitEstimates( - VALUES_IN_BYTE, histo.alpha_, m->alpha_); + VALUES_IN_BYTE, histo->alpha_, m->alpha_); ConvertPopulationCountTableToBitEstimates( - NUM_DISTANCE_CODES, histo.distance_, m->distance_); + NUM_DISTANCE_CODES, histo->distance_, m->distance_); ok = 1; Error: - VP8LClearBackwardRefs(&refs); + VP8LFreeHistogram(histo); return ok; } @@ -476,16 +559,16 @@ static WEBP_INLINE double GetDistanceCost(const CostModel* const m, static int BackwardReferencesHashChainDistanceOnly( int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb, - int quality, int cache_bits, uint32_t* const dist_array) { + int quality, int cache_bits, VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs, uint32_t* const dist_array) { int i; int ok = 0; int cc_init = 0; const int pix_count = xsize * ysize; const int use_color_cache = (cache_bits > 0); float* const cost = - (float*)WebPSafeMalloc((uint64_t)pix_count, sizeof(*cost)); - CostModel* cost_model = (CostModel*)malloc(sizeof(*cost_model)); - HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain)); + (float*)WebPSafeMalloc(pix_count, sizeof(*cost)); + CostModel* cost_model = (CostModel*)WebPSafeMalloc(1ULL, sizeof(*cost_model)); VP8LColorCache hashers; const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68; const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82; @@ -494,9 +577,7 @@ static int BackwardReferencesHashChainDistanceOnly( int iter_pos = 1; int iter_limit = -1; - if (cost == NULL || cost_model == NULL || hash_chain == NULL) goto Error; - - if (!HashChainInit(hash_chain, pix_count)) goto Error; + if (cost == NULL || cost_model == NULL) goto Error; if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); @@ -504,7 +585,7 @@ static int BackwardReferencesHashChainDistanceOnly( } if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb, - quality, cache_bits)) { + quality, cache_bits, hash_chain, refs)) { goto Error; } @@ -515,6 +596,7 @@ static int BackwardReferencesHashChainDistanceOnly( dist_array[0] = 0; GetParamsForHashChainFindCopy(quality, xsize, cache_bits, &window_size, &iter_pos, &iter_limit); + HashChainInit(hash_chain); for (i = 0; i < pix_count; ++i) { double prev_cost = 0.0; int shortmax; @@ -589,12 +671,11 @@ static int BackwardReferencesHashChainDistanceOnly( } // Last pixel still to do, it can only be a single step if not reached // through cheaper means already. - ok = 1; + ok = !refs->error_; Error: if (cc_init) VP8LColorCacheClear(&hashers); - HashChainDelete(hash_chain); - free(cost_model); - free(cost); + WebPSafeFree(cost_model); + WebPSafeFree(cost); return ok; } @@ -621,6 +702,7 @@ static int BackwardReferencesHashChainFollowChosenPath( int xsize, int ysize, const uint32_t* const argb, int quality, int cache_bits, const uint32_t* const chosen_path, int chosen_path_size, + VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { const int pix_count = xsize * ysize; const int use_color_cache = (cache_bits > 0); @@ -633,20 +715,17 @@ static int BackwardReferencesHashChainFollowChosenPath( int window_size = WINDOW_SIZE; int iter_pos = 1; int iter_limit = -1; - HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain)); VP8LColorCache hashers; - if (hash_chain == NULL || !HashChainInit(hash_chain, pix_count)) { - goto Error; - } if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; } - refs->size = 0; + ClearBackwardRefs(refs); GetParamsForHashChainFindCopy(quality, xsize, cache_bits, &window_size, &iter_pos, &iter_limit); + HashChainInit(hash_chain); for (ix = 0; ix < chosen_path_size; ++ix, ++size) { int offset = 0; int len = 0; @@ -656,7 +735,7 @@ static int BackwardReferencesHashChainFollowChosenPath( window_size, iter_pos, iter_limit, &offset, &len); assert(len == max_len); - refs->refs[size] = PixOrCopyCreateCopy(offset, len); + BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len)); if (use_color_cache) { for (k = 0; k < len; ++k) { VP8LColorCacheInsert(&hashers, argb[i + k]); @@ -670,26 +749,25 @@ static int BackwardReferencesHashChainFollowChosenPath( } i += len; } else { + PixOrCopy v; if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) { // push pixel as a color cache index const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]); - refs->refs[size] = PixOrCopyCreateCacheIdx(idx); + v = PixOrCopyCreateCacheIdx(idx); } else { if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]); - refs->refs[size] = PixOrCopyCreateLiteral(argb[i]); + v = PixOrCopyCreateLiteral(argb[i]); } + BackwardRefsCursorAdd(refs, v); if (i + 1 < pix_count) { HashChainInsert(hash_chain, &argb[i], i); } ++i; } } - assert(size <= refs->max_size); - refs->size = size; - ok = 1; + ok = !refs->error_; Error: if (cc_init) VP8LColorCacheClear(&hashers); - HashChainDelete(hash_chain); return ok; } @@ -698,142 +776,129 @@ static int BackwardReferencesTraceBackwards(int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb, int quality, int cache_bits, + VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs) { int ok = 0; const int dist_array_size = xsize * ysize; uint32_t* chosen_path = NULL; int chosen_path_size = 0; uint32_t* dist_array = - (uint32_t*)WebPSafeMalloc((uint64_t)dist_array_size, sizeof(*dist_array)); + (uint32_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array)); if (dist_array == NULL) goto Error; if (!BackwardReferencesHashChainDistanceOnly( - xsize, ysize, recursive_cost_model, argb, quality, cache_bits, - dist_array)) { + xsize, ysize, recursive_cost_model, argb, quality, cache_bits, hash_chain, + refs, dist_array)) { goto Error; } TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size); if (!BackwardReferencesHashChainFollowChosenPath( xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size, - refs)) { + hash_chain, refs)) { goto Error; } ok = 1; Error: - free(dist_array); + WebPSafeFree(dist_array); return ok; } static void BackwardReferences2DLocality(int xsize, - VP8LBackwardRefs* const refs) { - int i; - for (i = 0; i < refs->size; ++i) { - if (PixOrCopyIsCopy(&refs->refs[i])) { - const int dist = refs->refs[i].argb_or_distance; + const VP8LBackwardRefs* const refs) { + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + while (VP8LRefsCursorOk(&c)) { + if (PixOrCopyIsCopy(c.cur_pos)) { + const int dist = c.cur_pos->argb_or_distance; const int transformed_dist = DistanceToPlaneCode(xsize, dist); - refs->refs[i].argb_or_distance = transformed_dist; + c.cur_pos->argb_or_distance = transformed_dist; } + VP8LRefsCursorNext(&c); } } -int VP8LGetBackwardReferences(int width, int height, - const uint32_t* const argb, - int quality, int cache_bits, int use_2d_locality, - VP8LBackwardRefs* const best) { - int ok = 0; +VP8LBackwardRefs* VP8LGetBackwardReferences( + int width, int height, const uint32_t* const argb, int quality, + int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2]) { int lz77_is_useful; - VP8LBackwardRefs refs_rle, refs_lz77; const int num_pix = width * height; - - VP8LBackwardRefsAlloc(&refs_rle, num_pix); - VP8LBackwardRefsAlloc(&refs_lz77, num_pix); - VP8LInitBackwardRefs(best); - if (refs_rle.refs == NULL || refs_lz77.refs == NULL) { - Error1: - VP8LClearBackwardRefs(&refs_rle); - VP8LClearBackwardRefs(&refs_lz77); - goto End; - } + VP8LBackwardRefs* best = NULL; + VP8LBackwardRefs* const refs_lz77 = &refs_array[0]; + VP8LBackwardRefs* const refs_rle = &refs_array[1]; if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality, - &refs_lz77)) { - goto End; + hash_chain, refs_lz77)) { + return NULL; + } + if (!BackwardReferencesRle(width, height, argb, refs_rle)) { + return NULL; } - // Backward Reference using RLE only. - BackwardReferencesRle(width, height, argb, &refs_rle); { double bit_cost_lz77, bit_cost_rle; - VP8LHistogram* const histo = (VP8LHistogram*)malloc(sizeof(*histo)); - if (histo == NULL) goto Error1; - // Evaluate lz77 coding - VP8LHistogramCreate(histo, &refs_lz77, cache_bits); + VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits); + if (histo == NULL) return NULL; + // Evaluate LZ77 coding. + VP8LHistogramCreate(histo, refs_lz77, cache_bits); bit_cost_lz77 = VP8LHistogramEstimateBits(histo); - // Evaluate RLE coding - VP8LHistogramCreate(histo, &refs_rle, cache_bits); + // Evaluate RLE coding. + VP8LHistogramCreate(histo, refs_rle, cache_bits); bit_cost_rle = VP8LHistogramEstimateBits(histo); // Decide if LZ77 is useful. lz77_is_useful = (bit_cost_lz77 < bit_cost_rle); - free(histo); + VP8LFreeHistogram(histo); } // Choose appropriate backward reference. if (lz77_is_useful) { // TraceBackwards is costly. Don't execute it at lower quality. const int try_lz77_trace_backwards = (quality >= 25); - *best = refs_lz77; // default guess: lz77 is better - VP8LClearBackwardRefs(&refs_rle); + best = refs_lz77; // default guess: lz77 is better if (try_lz77_trace_backwards) { // Set recursion level for large images using a color cache. const int recursion_level = (num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0; - VP8LBackwardRefs refs_trace; - if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) { - goto End; - } + VP8LBackwardRefs* const refs_trace = &refs_array[1]; + ClearBackwardRefs(refs_trace); if (BackwardReferencesTraceBackwards(width, height, recursion_level, argb, - quality, cache_bits, &refs_trace)) { - VP8LClearBackwardRefs(&refs_lz77); - *best = refs_trace; + quality, cache_bits, hash_chain, + refs_trace)) { + best = refs_trace; } } } else { - VP8LClearBackwardRefs(&refs_lz77); - *best = refs_rle; + best = refs_rle; } if (use_2d_locality) BackwardReferences2DLocality(width, best); - ok = 1; - - End: - if (!ok) { - VP8LClearBackwardRefs(best); - } - return ok; + return best; } -// Returns 1 on success. -static int ComputeCacheHistogram(const uint32_t* const argb, - int xsize, int ysize, - const VP8LBackwardRefs* const refs, - int cache_bits, - VP8LHistogram* const histo) { +// Returns entropy for the given cache bits. +static double ComputeCacheEntropy(const uint32_t* const argb, + int xsize, int ysize, + const VP8LBackwardRefs* const refs, + int cache_bits) { int pixel_index = 0; - int i; uint32_t k; - VP8LColorCache hashers; const int use_color_cache = (cache_bits > 0); int cc_init = 0; + double entropy = MAX_ENTROPY; + const double kSmallPenaltyForLargeCache = 4.0; + VP8LColorCache hashers; + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits); + if (histo == NULL) goto Error; if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); - if (!cc_init) return 0; + if (!cc_init) goto Error; } - for (i = 0; i < refs->size; ++i) { - const PixOrCopy* const v = &refs->refs[i]; + while (VP8LRefsCursorOk(&c)) { + const PixOrCopy* const v = c.cur_pos; if (PixOrCopyIsLiteral(v)) { if (use_color_cache && VP8LColorCacheContains(&hashers, argb[pixel_index])) { @@ -853,42 +918,58 @@ static int ComputeCacheHistogram(const uint32_t* const argb, } } pixel_index += PixOrCopyLength(v); + VP8LRefsCursorNext(&c); } assert(pixel_index == xsize * ysize); (void)xsize; // xsize is not used in non-debug compilations otherwise. (void)ysize; // ysize is not used in non-debug compilations otherwise. + entropy = VP8LHistogramEstimateBits(histo) + + kSmallPenaltyForLargeCache * cache_bits; + Error: if (cc_init) VP8LColorCacheClear(&hashers); - return 1; + VP8LFreeHistogram(histo); + return entropy; } -// Returns how many bits are to be used for a color cache. +// *best_cache_bits will contain how many bits are to be used for a color cache. +// Returns 0 in case of memory error. int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb, - int xsize, int ysize, + int xsize, int ysize, int quality, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs, int* const best_cache_bits) { - int ok = 0; - int cache_bits; - double lowest_entropy = 1e99; - VP8LBackwardRefs refs; - static const double kSmallPenaltyForLargeCache = 4.0; - static const int quality = 30; - if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize) || - !BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, &refs)) { - goto Error; + int eval_low = 1; + int eval_high = 1; + double entropy_low = MAX_ENTROPY; + double entropy_high = MAX_ENTROPY; + int cache_bits_low = 0; + int cache_bits_high = MAX_COLOR_CACHE_BITS; + + if (!BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, hash_chain, + refs)) { + return 0; } - for (cache_bits = 0; cache_bits <= MAX_COLOR_CACHE_BITS; ++cache_bits) { - double cur_entropy; - VP8LHistogram histo; - VP8LHistogramInit(&histo, cache_bits); - ComputeCacheHistogram(argb, xsize, ysize, &refs, cache_bits, &histo); - cur_entropy = VP8LHistogramEstimateBits(&histo) + - kSmallPenaltyForLargeCache * cache_bits; - if (cache_bits == 0 || cur_entropy < lowest_entropy) { - *best_cache_bits = cache_bits; - lowest_entropy = cur_entropy; + // Do a binary search to find the optimal entropy for cache_bits. + while (cache_bits_high - cache_bits_low > 1) { + if (eval_low) { + entropy_low = + ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_low); + eval_low = 0; + } + if (eval_high) { + entropy_high = + ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_high); + eval_high = 0; + } + if (entropy_high < entropy_low) { + *best_cache_bits = cache_bits_high; + cache_bits_low = (cache_bits_low + cache_bits_high) / 2; + eval_low = 1; + } else { + *best_cache_bits = cache_bits_low; + cache_bits_high = (cache_bits_low + cache_bits_high) / 2; + eval_high = 1; } } - ok = 1; - Error: - VP8LClearBackwardRefs(&refs); - return ok; + return 1; } diff --git a/src/3rdparty/libwebp/src/enc/backward_references.h b/src/3rdparty/libwebp/src/enc/backward_references.h index e1c75f0..c2c81c5 100644 --- a/src/3rdparty/libwebp/src/enc/backward_references.h +++ b/src/3rdparty/libwebp/src/enc/backward_references.h @@ -113,36 +113,96 @@ static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) { } // ----------------------------------------------------------------------------- -// VP8LBackwardRefs +// VP8LHashChain + +#define HASH_BITS 18 +#define HASH_SIZE (1 << HASH_BITS) + +typedef struct VP8LHashChain VP8LHashChain; +struct VP8LHashChain { + // Stores the most recently added position with the given hash value. + int32_t hash_to_first_index_[HASH_SIZE]; + // chain_[pos] stores the previous position with the same hash value + // for every pixel in the image. + int32_t* chain_; + // This is the maximum size of the hash_chain that can be constructed. + // Typically this is the pixel count (width x height) for a given image. + int size_; +}; -typedef struct { - PixOrCopy* refs; - int size; // currently used - int max_size; // maximum capacity -} VP8LBackwardRefs; +// Must be called first, to set size. +int VP8LHashChainInit(VP8LHashChain* const p, int size); +void VP8LHashChainClear(VP8LHashChain* const p); // release memory -// Initialize the object. Must be called first. 'refs' can be NULL. -void VP8LInitBackwardRefs(VP8LBackwardRefs* const refs); +// ----------------------------------------------------------------------------- +// VP8LBackwardRefs (block-based backward-references storage) + +// maximum number of reference blocks the image will be segmented into +#define MAX_REFS_BLOCK_PER_IMAGE 16 + +typedef struct PixOrCopyBlock PixOrCopyBlock; // forward declaration +typedef struct VP8LBackwardRefs VP8LBackwardRefs; + +// Container for blocks chain +struct VP8LBackwardRefs { + int block_size_; // common block-size + int error_; // set to true if some memory error occurred + PixOrCopyBlock* refs_; // list of currently used blocks + PixOrCopyBlock** tail_; // for list recycling + PixOrCopyBlock* free_blocks_; // free-list + PixOrCopyBlock* last_block_; // used for adding new refs (internal) +}; -// Release memory and re-initialize the object. 'refs' can be NULL. -void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs); +// Initialize the object. 'block_size' is the common block size to store +// references (typically, width * height / MAX_REFS_BLOCK_PER_IMAGE). +void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size); +// Release memory for backward references. +void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs); +// Copies the 'src' backward refs to the 'dst'. Returns 0 in case of error. +int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src, + VP8LBackwardRefs* const dst); -// Allocate 'max_size' references. Returns false in case of memory error. -int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size); +// Cursor for iterating on references content +typedef struct { + // public: + PixOrCopy* cur_pos; // current position + // private: + PixOrCopyBlock* cur_block_; // current block in the refs list + const PixOrCopy* last_pos_; // sentinel for switching to next block +} VP8LRefsCursor; + +// Returns a cursor positioned at the beginning of the references list. +VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs); +// Returns true if cursor is pointing at a valid position. +static WEBP_INLINE int VP8LRefsCursorOk(const VP8LRefsCursor* const c) { + return (c->cur_pos != NULL); +} +// Move to next block of references. Internal, not to be called directly. +void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c); +// Move to next position, or NULL. Should not be called if !VP8LRefsCursorOk(). +static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) { + assert(c != NULL); + assert(VP8LRefsCursorOk(c)); + if (++c->cur_pos == c->last_pos_) VP8LRefsCursorNextBlock(c); +} // ----------------------------------------------------------------------------- // Main entry points // Evaluates best possible backward references for specified quality. // Further optimize for 2D locality if use_2d_locality flag is set. -int VP8LGetBackwardReferences(int width, int height, - const uint32_t* const argb, - int quality, int cache_bits, int use_2d_locality, - VP8LBackwardRefs* const best); +// The return value is the pointer to the best of the two backward refs viz, +// refs[0] or refs[1]. +VP8LBackwardRefs* VP8LGetBackwardReferences( + int width, int height, const uint32_t* const argb, int quality, + int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs[2]); // Produce an estimate for a good color cache size for the image. int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb, - int xsize, int ysize, + int xsize, int ysize, int quality, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const ref, int* const best_cache_bits); #ifdef __cplusplus diff --git a/src/3rdparty/libwebp/src/enc/config.c b/src/3rdparty/libwebp/src/enc/config.c index af7f0b0..53a3bb2 100644 --- a/src/3rdparty/libwebp/src/enc/config.c +++ b/src/3rdparty/libwebp/src/enc/config.c @@ -111,7 +111,11 @@ int WebPValidateConfig(const WebPConfig* config) { return 0; if (config->show_compressed < 0 || config->show_compressed > 1) return 0; +#if WEBP_ENCODER_ABI_VERSION > 0x0204 + if (config->preprocessing < 0 || config->preprocessing > 7) +#else if (config->preprocessing < 0 || config->preprocessing > 3) +#endif return 0; if (config->partitions < 0 || config->partitions > 3) return 0; @@ -138,3 +142,25 @@ int WebPValidateConfig(const WebPConfig* config) { //------------------------------------------------------------------------------ +#if WEBP_ENCODER_ABI_VERSION > 0x0202 +#define MAX_LEVEL 9 + +// Mapping between -z level and -m / -q parameter settings. +static const struct { + uint8_t method_; + uint8_t quality_; +} kLosslessPresets[MAX_LEVEL + 1] = { + { 0, 0 }, { 1, 20 }, { 2, 25 }, { 3, 30 }, { 3, 50 }, + { 4, 50 }, { 4, 75 }, { 4, 90 }, { 5, 90 }, { 6, 100 } +}; + +int WebPConfigLosslessPreset(WebPConfig* config, int level) { + if (config == NULL || level < 0 || level > MAX_LEVEL) return 0; + config->lossless = 1; + config->method = kLosslessPresets[level].method_; + config->quality = kLosslessPresets[level].quality_; + return 1; +} +#endif + +//------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/cost.c b/src/3rdparty/libwebp/src/enc/cost.c index 09699f8..9d2cc01 100644 --- a/src/3rdparty/libwebp/src/enc/cost.c +++ b/src/3rdparty/libwebp/src/enc/cost.c @@ -360,9 +360,10 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) { for (ctx = 0; ctx < NUM_CTX; ++ctx) { const uint8_t* const p = proba->coeffs_[ctype][band][ctx]; uint16_t* const table = proba->level_cost_[ctype][band][ctx]; - const int cost_base = VP8BitCost(1, p[1]); + const int cost0 = (ctx > 0) ? VP8BitCost(1, p[0]) : 0; + const int cost_base = VP8BitCost(1, p[1]) + cost0; int v; - table[0] = VP8BitCost(0, p[1]); + table[0] = VP8BitCost(0, p[1]) + cost0; for (v = 1; v <= MAX_VARIABLE_LEVEL; ++v) { table[v] = cost_base + VariableLevelCost(v, p); } @@ -486,4 +487,249 @@ const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = { }; //------------------------------------------------------------------------------ +// Mode costs +static int GetResidualCost(int ctx0, const VP8Residual* const res) { + int n = res->first; + // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 + const int p0 = res->prob[n][ctx0][0]; + const uint16_t* t = res->cost[n][ctx0]; + // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0 + // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll + // be missing during the loop. + int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; + + if (res->last < 0) { + return VP8BitCost(0, p0); + } + for (; n < res->last; ++n) { + const int v = abs(res->coeffs[n]); + const int b = VP8EncBands[n + 1]; + const int ctx = (v >= 2) ? 2 : v; + cost += VP8LevelCost(t, v); + t = res->cost[b][ctx]; + } + // Last coefficient is always non-zero + { + const int v = abs(res->coeffs[n]); + assert(v != 0); + cost += VP8LevelCost(t, v); + if (n < 15) { + const int b = VP8EncBands[n + 1]; + const int ctx = (v == 1) ? 1 : 2; + const int last_p0 = res->prob[b][ctx][0]; + cost += VP8BitCost(0, last_p0); + } + } + return cost; +} + +//------------------------------------------------------------------------------ +// init function + +#if defined(WEBP_USE_MIPS32) +extern int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res); +#endif // WEBP_USE_MIPS32 + +// TODO(skal): this, and GetResidualCost(), should probably go somewhere +// under src/dsp/ at some point. +VP8GetResidualCostFunc VP8GetResidualCost; + +void VP8GetResidualCostInit(void) { + VP8GetResidualCost = GetResidualCost; + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_MIPS32) + if (VP8GetCPUInfo(kMIPS32)) { + VP8GetResidualCost = VP8GetResidualCostMIPS32; + } +#endif + } +} + +//------------------------------------------------------------------------------ +// helper functions for residuals struct VP8Residual. + +void VP8InitResidual(int first, int coeff_type, + VP8Encoder* const enc, VP8Residual* const res) { + res->coeff_type = coeff_type; + res->prob = enc->proba_.coeffs_[coeff_type]; + res->stats = enc->proba_.stats_[coeff_type]; + res->cost = enc->proba_.level_cost_[coeff_type]; + res->first = first; +} + +static void SetResidualCoeffs(const int16_t* const coeffs, + VP8Residual* const res) { + int n; + res->last = -1; + assert(res->first == 0 || coeffs[0] == 0); + for (n = 15; n >= 0; --n) { + if (coeffs[n]) { + res->last = n; + break; + } + } + res->coeffs = coeffs; +} + +//------------------------------------------------------------------------------ +// init function + +#if defined(WEBP_USE_SSE2) +extern void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs, + VP8Residual* const res); +#endif // WEBP_USE_SSE2 + +VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; + +void VP8SetResidualCoeffsInit(void) { + VP8SetResidualCoeffs = SetResidualCoeffs; + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + VP8SetResidualCoeffs = VP8SetResidualCoeffsSSE2; + } +#endif + } +} + +//------------------------------------------------------------------------------ +// Mode costs + +int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) { + const int x = (it->i4_ & 3), y = (it->i4_ >> 2); + VP8Residual res; + VP8Encoder* const enc = it->enc_; + int R = 0; + int ctx; + + VP8InitResidual(0, 3, enc, &res); + ctx = it->top_nz_[x] + it->left_nz_[y]; + VP8SetResidualCoeffs(levels, &res); + R += VP8GetResidualCost(ctx, &res); + return R; +} + +int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) { + VP8Residual res; + VP8Encoder* const enc = it->enc_; + int x, y; + int R = 0; + + VP8IteratorNzToBytes(it); // re-import the non-zero context + + // DC + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); + R += VP8GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res); + + // AC + VP8InitResidual(1, 0, enc, &res); + for (y = 0; y < 4; ++y) { + for (x = 0; x < 4; ++x) { + const int ctx = it->top_nz_[x] + it->left_nz_[y]; + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + R += VP8GetResidualCost(ctx, &res); + it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0); + } + } + return R; +} + +int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) { + VP8Residual res; + VP8Encoder* const enc = it->enc_; + int ch, x, y; + int R = 0; + + VP8IteratorNzToBytes(it); // re-import the non-zero context + + VP8InitResidual(0, 2, enc, &res); + for (ch = 0; ch <= 2; ch += 2) { + for (y = 0; y < 2; ++y) { + for (x = 0; x < 2; ++x) { + const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + R += VP8GetResidualCost(ctx, &res); + it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0); + } + } + } + return R; +} + + +//------------------------------------------------------------------------------ +// Recording of token probabilities. + +// Record proba context used +static int Record(int bit, proba_t* const stats) { + proba_t p = *stats; + if (p >= 0xffff0000u) { // an overflow is inbound. + p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2. + } + // record bit count (lower 16 bits) and increment total count (upper 16 bits). + p += 0x00010000u + bit; + *stats = p; + return bit; +} + +// We keep the table-free variant around for reference, in case. +#define USE_LEVEL_CODE_TABLE + +// Simulate block coding, but only record statistics. +// Note: no need to record the fixed probas. +int VP8RecordCoeffs(int ctx, const VP8Residual* const res) { + int n = res->first; + // should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1 + proba_t* s = res->stats[n][ctx]; + if (res->last < 0) { + Record(0, s + 0); + return 0; + } + while (n <= res->last) { + int v; + Record(1, s + 0); // order of record doesn't matter + while ((v = res->coeffs[n++]) == 0) { + Record(0, s + 1); + s = res->stats[VP8EncBands[n]][0]; + } + Record(1, s + 1); + if (!Record(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1 + s = res->stats[VP8EncBands[n]][1]; + } else { + v = abs(v); +#if !defined(USE_LEVEL_CODE_TABLE) + if (!Record(v > 4, s + 3)) { + if (Record(v != 2, s + 4)) + Record(v == 4, s + 5); + } else if (!Record(v > 10, s + 6)) { + Record(v > 6, s + 7); + } else if (!Record((v >= 3 + (8 << 2)), s + 8)) { + Record((v >= 3 + (8 << 1)), s + 9); + } else { + Record((v >= 3 + (8 << 3)), s + 10); + } +#else + if (v > MAX_VARIABLE_LEVEL) { + v = MAX_VARIABLE_LEVEL; + } + + { + const int bits = VP8LevelCodes[v - 1][1]; + int pattern = VP8LevelCodes[v - 1][0]; + int i; + for (i = 0; (pattern >>= 1) != 0; ++i) { + const int mask = 2 << i; + if (pattern & 1) Record(!!(bits & mask), s + 3 + i); + } + } +#endif + s = res->stats[VP8EncBands[n]][2]; + } + } + if (n < 16) Record(0, s + 0); + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/cost.h b/src/3rdparty/libwebp/src/enc/cost.h index 3cbad1a..4e55895 100644 --- a/src/3rdparty/libwebp/src/enc/cost.h +++ b/src/3rdparty/libwebp/src/enc/cost.h @@ -14,12 +14,38 @@ #ifndef WEBP_ENC_COST_H_ #define WEBP_ENC_COST_H_ +#include <assert.h> +#include <stdlib.h> #include "./vp8enci.h" #ifdef __cplusplus extern "C" { #endif +// On-the-fly info about the current set of residuals. Handy to avoid +// passing zillions of params. +typedef struct { + int first; + int last; + const int16_t* coeffs; + + int coeff_type; + ProbaArray* prob; + StatsArray* stats; + CostArray* cost; +} VP8Residual; + +void VP8InitResidual(int first, int coeff_type, + VP8Encoder* const enc, VP8Residual* const res); + +typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs, + VP8Residual* const res); +extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; + +void VP8SetResidualCoeffsInit(void); // must be called first + +int VP8RecordCoeffs(int ctx, const VP8Residual* const res); + // approximate cost per level: extern const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1]; extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p) @@ -29,6 +55,12 @@ static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) { return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba]; } +// Cost calculation function. +typedef int (*VP8GetResidualCostFunc)(int ctx0, const VP8Residual* const res); +extern VP8GetResidualCostFunc VP8GetResidualCost; + +void VP8GetResidualCostInit(void); // must be called first + // Level cost calculations extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2]; void VP8CalculateLevelCosts(VP8Proba* const proba); diff --git a/src/3rdparty/libwebp/src/enc/filter.c b/src/3rdparty/libwebp/src/enc/filter.c index dd27804..11db4bd 100644 --- a/src/3rdparty/libwebp/src/enc/filter.c +++ b/src/3rdparty/libwebp/src/enc/filter.c @@ -13,6 +13,7 @@ #include <assert.h> #include "./vp8enci.h" +#include "../dsp/dsp.h" // This table gives, for a given sharpness, the filtering strength to be // used (at least) in order to filter a given edge step delta. @@ -61,180 +62,6 @@ int VP8FilterStrengthFromDelta(int sharpness, int delta) { return kLevelsFromDelta[sharpness][pos]; } -// ----------------------------------------------------------------------------- -// NOTE: clip1, tables and InitTables are repeated entries of dsp.c -static uint8_t abs0[255 + 255 + 1]; // abs(i) -static uint8_t abs1[255 + 255 + 1]; // abs(i)>>1 -static int8_t sclip1[1020 + 1020 + 1]; // clips [-1020, 1020] to [-128, 127] -static int8_t sclip2[112 + 112 + 1]; // clips [-112, 112] to [-16, 15] -static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] - -static int tables_ok = 0; - -static void InitTables(void) { - if (!tables_ok) { - int i; - for (i = -255; i <= 255; ++i) { - abs0[255 + i] = (i < 0) ? -i : i; - abs1[255 + i] = abs0[255 + i] >> 1; - } - for (i = -1020; i <= 1020; ++i) { - sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i; - } - for (i = -112; i <= 112; ++i) { - sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i; - } - for (i = -255; i <= 255 + 255; ++i) { - clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; - } - tables_ok = 1; - } -} - -//------------------------------------------------------------------------------ -// Edge filtering functions - -// 4 pixels in, 2 pixels out -static WEBP_INLINE void do_filter2(uint8_t* p, int step) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1]; - const int a1 = sclip2[112 + ((a + 4) >> 3)]; - const int a2 = sclip2[112 + ((a + 3) >> 3)]; - p[-step] = clip1[255 + p0 + a2]; - p[ 0] = clip1[255 + q0 - a1]; -} - -// 4 pixels in, 4 pixels out -static WEBP_INLINE void do_filter4(uint8_t* p, int step) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - const int a = 3 * (q0 - p0); - const int a1 = sclip2[112 + ((a + 4) >> 3)]; - const int a2 = sclip2[112 + ((a + 3) >> 3)]; - const int a3 = (a1 + 1) >> 1; - p[-2*step] = clip1[255 + p1 + a3]; - p[- step] = clip1[255 + p0 + a2]; - p[ 0] = clip1[255 + q0 - a1]; - p[ step] = clip1[255 + q1 - a3]; -} - -// high edge-variance -static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh); -} - -static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) { - const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; - return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh; -} - -static WEBP_INLINE int needs_filter2(const uint8_t* p, - int step, int t, int it) { - const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; - const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step]; - if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t) - return 0; - return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it && - abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it && - abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it; -} - -//------------------------------------------------------------------------------ -// Simple In-loop filtering (Paragraph 15.2) - -static void SimpleVFilter16(uint8_t* p, int stride, int thresh) { - int i; - for (i = 0; i < 16; ++i) { - if (needs_filter(p + i, stride, thresh)) { - do_filter2(p + i, stride); - } - } -} - -static void SimpleHFilter16(uint8_t* p, int stride, int thresh) { - int i; - for (i = 0; i < 16; ++i) { - if (needs_filter(p + i * stride, 1, thresh)) { - do_filter2(p + i * stride, 1); - } - } -} - -static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4 * stride; - SimpleVFilter16(p, stride, thresh); - } -} - -static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4; - SimpleHFilter16(p, stride, thresh); - } -} - -//------------------------------------------------------------------------------ -// Complex In-loop filtering (Paragraph 15.3) - -static WEBP_INLINE void FilterLoop24(uint8_t* p, - int hstride, int vstride, int size, - int thresh, int ithresh, int hev_thresh) { - while (size-- > 0) { - if (needs_filter2(p, hstride, thresh, ithresh)) { - if (hev(p, hstride, hev_thresh)) { - do_filter2(p, hstride); - } else { - do_filter4(p, hstride); - } - } - p += vstride; - } -} - -// on three inner edges -static void VFilter16i(uint8_t* p, int stride, - int thresh, int ithresh, int hev_thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4 * stride; - FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh); - } -} - -static void HFilter16i(uint8_t* p, int stride, - int thresh, int ithresh, int hev_thresh) { - int k; - for (k = 3; k > 0; --k) { - p += 4; - FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh); - } -} - -static void VFilter8i(uint8_t* u, uint8_t* v, int stride, - int thresh, int ithresh, int hev_thresh) { - FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); - FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); -} - -static void HFilter8i(uint8_t* u, uint8_t* v, int stride, - int thresh, int ithresh, int hev_thresh) { - FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); - FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); -} - -//------------------------------------------------------------------------------ - -void (*VP8EncVFilter16i)(uint8_t*, int, int, int, int) = VFilter16i; -void (*VP8EncHFilter16i)(uint8_t*, int, int, int, int) = HFilter16i; -void (*VP8EncVFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = VFilter8i; -void (*VP8EncHFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = HFilter8i; - -void (*VP8EncSimpleVFilter16i)(uint8_t*, int, int) = SimpleVFilter16i; -void (*VP8EncSimpleHFilter16i)(uint8_t*, int, int) = SimpleHFilter16i; - //------------------------------------------------------------------------------ // Paragraph 15.4: compute the inner-edge filtering strength @@ -266,14 +93,14 @@ static void DoFilter(const VP8EncIterator* const it, int level) { memcpy(y_dst, it->yuv_out_, YUV_SIZE * sizeof(uint8_t)); if (enc->filter_hdr_.simple_ == 1) { // simple - VP8EncSimpleHFilter16i(y_dst, BPS, limit); - VP8EncSimpleVFilter16i(y_dst, BPS, limit); + VP8SimpleHFilter16i(y_dst, BPS, limit); + VP8SimpleVFilter16i(y_dst, BPS, limit); } else { // complex const int hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0; - VP8EncHFilter16i(y_dst, BPS, limit, ilevel, hev_thresh); - VP8EncHFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh); - VP8EncVFilter16i(y_dst, BPS, limit, ilevel, hev_thresh); - VP8EncVFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh); + VP8HFilter16i(y_dst, BPS, limit, ilevel, hev_thresh); + VP8HFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh); + VP8VFilter16i(y_dst, BPS, limit, ilevel, hev_thresh); + VP8VFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh); } } @@ -387,7 +214,6 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) { void VP8InitFilter(VP8EncIterator* const it) { if (it->lf_stats_ != NULL) { int s, i; - InitTables(); for (s = 0; s < NUM_MB_SEGMENTS; s++) { for (i = 0; i < MAX_LF_LEVELS; i++) { (*it->lf_stats_)[s][i] = 0; @@ -468,4 +294,3 @@ void VP8AdjustFilterStrength(VP8EncIterator* const it) { } // ----------------------------------------------------------------------------- - diff --git a/src/3rdparty/libwebp/src/enc/frame.c b/src/3rdparty/libwebp/src/enc/frame.c index 2582244..cdf1dab 100644 --- a/src/3rdparty/libwebp/src/enc/frame.c +++ b/src/3rdparty/libwebp/src/enc/frame.c @@ -11,8 +11,6 @@ // // Author: Skal (pascal.massimino@gmail.com) -#include <assert.h> -#include <stdlib.h> #include <string.h> #include <math.h> @@ -23,19 +21,6 @@ #define SEGMENT_VISU 0 #define DEBUG_SEARCH 0 // useful to track search convergence -// On-the-fly info about the current set of residuals. Handy to avoid -// passing zillions of params. -typedef struct { - int first; - int last; - const int16_t* coeffs; - - int coeff_type; - ProbaArray* prob; - StatsArray* stats; - CostArray* cost; -} VP8Residual; - //------------------------------------------------------------------------------ // multi-pass convergence @@ -142,83 +127,6 @@ static int FinalizeSkipProba(VP8Encoder* const enc) { return size; } -//------------------------------------------------------------------------------ -// Recording of token probabilities. - -static void ResetTokenStats(VP8Encoder* const enc) { - VP8Proba* const proba = &enc->proba_; - memset(proba->stats_, 0, sizeof(proba->stats_)); -} - -// Record proba context used -static int Record(int bit, proba_t* const stats) { - proba_t p = *stats; - if (p >= 0xffff0000u) { // an overflow is inbound. - p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2. - } - // record bit count (lower 16 bits) and increment total count (upper 16 bits). - p += 0x00010000u + bit; - *stats = p; - return bit; -} - -// We keep the table free variant around for reference, in case. -#define USE_LEVEL_CODE_TABLE - -// Simulate block coding, but only record statistics. -// Note: no need to record the fixed probas. -static int RecordCoeffs(int ctx, const VP8Residual* const res) { - int n = res->first; - // should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1 - proba_t* s = res->stats[n][ctx]; - if (res->last < 0) { - Record(0, s + 0); - return 0; - } - while (n <= res->last) { - int v; - Record(1, s + 0); // order of record doesn't matter - while ((v = res->coeffs[n++]) == 0) { - Record(0, s + 1); - s = res->stats[VP8EncBands[n]][0]; - } - Record(1, s + 1); - if (!Record(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1 - s = res->stats[VP8EncBands[n]][1]; - } else { - v = abs(v); -#if !defined(USE_LEVEL_CODE_TABLE) - if (!Record(v > 4, s + 3)) { - if (Record(v != 2, s + 4)) - Record(v == 4, s + 5); - } else if (!Record(v > 10, s + 6)) { - Record(v > 6, s + 7); - } else if (!Record((v >= 3 + (8 << 2)), s + 8)) { - Record((v >= 3 + (8 << 1)), s + 9); - } else { - Record((v >= 3 + (8 << 3)), s + 10); - } -#else - if (v > MAX_VARIABLE_LEVEL) - v = MAX_VARIABLE_LEVEL; - - { - const int bits = VP8LevelCodes[v - 1][1]; - int pattern = VP8LevelCodes[v - 1][0]; - int i; - for (i = 0; (pattern >>= 1) != 0; ++i) { - const int mask = 2 << i; - if (pattern & 1) Record(!!(bits & mask), s + 3 + i); - } - } -#endif - s = res->stats[VP8EncBands[n]][2]; - } - } - if (n < 16) Record(0, s + 0); - return 1; -} - // Collect statistics and deduce probabilities for next coding pass. // Return the total bit-cost for coding the probability updates. static int CalcTokenProba(int nb, int total) { @@ -231,6 +139,11 @@ static int BranchCost(int nb, int total, int proba) { return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba); } +static void ResetTokenStats(VP8Encoder* const enc) { + VP8Proba* const proba = &enc->proba_; + memset(proba->stats_, 0, sizeof(proba->stats_)); +} + static int FinalizeTokenProbas(VP8Proba* const proba) { int has_changed = 0; int size = 0; @@ -309,131 +222,6 @@ static void SetSegmentProbas(VP8Encoder* const enc) { } //------------------------------------------------------------------------------ -// helper functions for residuals struct VP8Residual. - -static void InitResidual(int first, int coeff_type, - VP8Encoder* const enc, VP8Residual* const res) { - res->coeff_type = coeff_type; - res->prob = enc->proba_.coeffs_[coeff_type]; - res->stats = enc->proba_.stats_[coeff_type]; - res->cost = enc->proba_.level_cost_[coeff_type]; - res->first = first; -} - -static void SetResidualCoeffs(const int16_t* const coeffs, - VP8Residual* const res) { - int n; - res->last = -1; - for (n = 15; n >= res->first; --n) { - if (coeffs[n]) { - res->last = n; - break; - } - } - res->coeffs = coeffs; -} - -//------------------------------------------------------------------------------ -// Mode costs - -static int GetResidualCost(int ctx0, const VP8Residual* const res) { - int n = res->first; - // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 - int p0 = res->prob[n][ctx0][0]; - const uint16_t* t = res->cost[n][ctx0]; - int cost; - - if (res->last < 0) { - return VP8BitCost(0, p0); - } - cost = VP8BitCost(1, p0); - for (; n < res->last; ++n) { - const int v = abs(res->coeffs[n]); - const int b = VP8EncBands[n + 1]; - const int ctx = (v >= 2) ? 2 : v; - cost += VP8LevelCost(t, v); - t = res->cost[b][ctx]; - // the masking trick is faster than "if (v) cost += ..." with clang - cost += (v ? ~0U : 0) & VP8BitCost(1, res->prob[b][ctx][0]); - } - // Last coefficient is always non-zero - { - const int v = abs(res->coeffs[n]); - assert(v != 0); - cost += VP8LevelCost(t, v); - if (n < 15) { - const int b = VP8EncBands[n + 1]; - const int ctx = (v == 1) ? 1 : 2; - const int last_p0 = res->prob[b][ctx][0]; - cost += VP8BitCost(0, last_p0); - } - } - return cost; -} - -int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) { - const int x = (it->i4_ & 3), y = (it->i4_ >> 2); - VP8Residual res; - VP8Encoder* const enc = it->enc_; - int R = 0; - int ctx; - - InitResidual(0, 3, enc, &res); - ctx = it->top_nz_[x] + it->left_nz_[y]; - SetResidualCoeffs(levels, &res); - R += GetResidualCost(ctx, &res); - return R; -} - -int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) { - VP8Residual res; - VP8Encoder* const enc = it->enc_; - int x, y; - int R = 0; - - VP8IteratorNzToBytes(it); // re-import the non-zero context - - // DC - InitResidual(0, 1, enc, &res); - SetResidualCoeffs(rd->y_dc_levels, &res); - R += GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res); - - // AC - InitResidual(1, 0, enc, &res); - for (y = 0; y < 4; ++y) { - for (x = 0; x < 4; ++x) { - const int ctx = it->top_nz_[x] + it->left_nz_[y]; - SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); - R += GetResidualCost(ctx, &res); - it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0); - } - } - return R; -} - -int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) { - VP8Residual res; - VP8Encoder* const enc = it->enc_; - int ch, x, y; - int R = 0; - - VP8IteratorNzToBytes(it); // re-import the non-zero context - - InitResidual(0, 2, enc, &res); - for (ch = 0; ch <= 2; ch += 2) { - for (y = 0; y < 2; ++y) { - for (x = 0; x < 2; ++x) { - const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); - R += GetResidualCost(ctx, &res); - it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0); - } - } - } - return R; -} - -//------------------------------------------------------------------------------ // Coefficient coding static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) { @@ -521,32 +309,32 @@ static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it, pos1 = VP8BitWriterPos(bw); if (i16) { - InitResidual(0, 1, enc, &res); - SetResidualCoeffs(rd->y_dc_levels, &res); + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); it->top_nz_[8] = it->left_nz_[8] = PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res); - InitResidual(1, 0, enc, &res); + VP8InitResidual(1, 0, enc, &res); } else { - InitResidual(0, 3, enc, &res); + VP8InitResidual(0, 3, enc, &res); } // luma-AC for (y = 0; y < 4; ++y) { for (x = 0; x < 4; ++x) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; - SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res); } } pos2 = VP8BitWriterPos(bw); // U/V - InitResidual(0, 2, enc, &res); + VP8InitResidual(0, 2, enc, &res); for (ch = 0; ch <= 2; ch += 2) { for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = PutCoeffs(bw, ctx, &res); } @@ -571,33 +359,33 @@ static void RecordResiduals(VP8EncIterator* const it, VP8IteratorNzToBytes(it); if (it->mb_->type_ == 1) { // i16x16 - InitResidual(0, 1, enc, &res); - SetResidualCoeffs(rd->y_dc_levels, &res); + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); it->top_nz_[8] = it->left_nz_[8] = - RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res); - InitResidual(1, 0, enc, &res); + VP8RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res); + VP8InitResidual(1, 0, enc, &res); } else { - InitResidual(0, 3, enc, &res); + VP8InitResidual(0, 3, enc, &res); } // luma-AC for (y = 0; y < 4; ++y) { for (x = 0; x < 4; ++x) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; - SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); - it->top_nz_[x] = it->left_nz_[y] = RecordCoeffs(ctx, &res); + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + it->top_nz_[x] = it->left_nz_[y] = VP8RecordCoeffs(ctx, &res); } } // U/V - InitResidual(0, 2, enc, &res); + VP8InitResidual(0, 2, enc, &res); for (ch = 0; ch <= 2; ch += 2) { for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = - RecordCoeffs(ctx, &res); + VP8RecordCoeffs(ctx, &res); } } } @@ -610,8 +398,8 @@ static void RecordResiduals(VP8EncIterator* const it, #if !defined(DISABLE_TOKEN_BUFFER) -static void RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd, - VP8TBuffer* const tokens) { +static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd, + VP8TBuffer* const tokens) { int x, y, ch; VP8Residual res; VP8Encoder* const enc = it->enc_; @@ -619,44 +407,45 @@ static void RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd, VP8IteratorNzToBytes(it); if (it->mb_->type_ == 1) { // i16x16 const int ctx = it->top_nz_[8] + it->left_nz_[8]; - InitResidual(0, 1, enc, &res); - SetResidualCoeffs(rd->y_dc_levels, &res); + VP8InitResidual(0, 1, enc, &res); + VP8SetResidualCoeffs(rd->y_dc_levels, &res); it->top_nz_[8] = it->left_nz_[8] = VP8RecordCoeffTokens(ctx, 1, res.first, res.last, res.coeffs, tokens); - RecordCoeffs(ctx, &res); - InitResidual(1, 0, enc, &res); + VP8RecordCoeffs(ctx, &res); + VP8InitResidual(1, 0, enc, &res); } else { - InitResidual(0, 3, enc, &res); + VP8InitResidual(0, 3, enc, &res); } // luma-AC for (y = 0; y < 4; ++y) { for (x = 0; x < 4; ++x) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; - SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); + VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); it->top_nz_[x] = it->left_nz_[y] = VP8RecordCoeffTokens(ctx, res.coeff_type, res.first, res.last, res.coeffs, tokens); - RecordCoeffs(ctx, &res); + VP8RecordCoeffs(ctx, &res); } } // U/V - InitResidual(0, 2, enc, &res); + VP8InitResidual(0, 2, enc, &res); for (ch = 0; ch <= 2; ch += 2) { for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); + VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = VP8RecordCoeffTokens(ctx, 2, res.first, res.last, res.coeffs, tokens); - RecordCoeffs(ctx, &res); + VP8RecordCoeffs(ctx, &res); } } } VP8IteratorBytesToNz(it); + return !tokens->error_; } #endif // !DISABLE_TOKEN_BUFFER @@ -719,7 +508,7 @@ static void StoreSideInfo(const VP8EncIterator* const it) { } case 7: *info = mb->alpha_; break; default: *info = 0; break; - }; + } } #if SEGMENT_VISU // visualize segments and prediction modes SetBlock(it->yuv_out_ + Y_OFF, mb->segment_ * 64, 16); @@ -863,7 +652,10 @@ static int PreLoopInitialize(VP8Encoder* const enc) { for (p = 0; ok && p < enc->num_parts_; ++p) { ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts); } - if (!ok) VP8EncFreeBitWriters(enc); // malloc error occurred + if (!ok) { + VP8EncFreeBitWriters(enc); // malloc error occurred + WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + } return ok; } @@ -928,11 +720,6 @@ int VP8EncLoop(VP8Encoder* const enc) { } else { // reset predictors after a skip ResetAfterSkip(&it); } -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (enc->use_layer_) { - VP8EncCodeLayerBlock(&it); - } -#endif StoreSideInfo(&it); VP8StoreFilterStats(&it); VP8IteratorExport(&it); @@ -997,14 +784,13 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { cnt = max_count; } VP8Decimate(&it, &info, rd_opt); - RecordTokens(&it, &info, &enc->tokens_); + ok = RecordTokens(&it, &info, &enc->tokens_); + if (!ok) { + WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + break; + } size_p0 += info.H; distortion += info.D; -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (enc->use_layer_) { - VP8EncCodeLayerBlock(&it); - } -#endif if (is_last_pass) { StoreSideInfo(&it); VP8StoreFilterStats(&it); diff --git a/src/3rdparty/libwebp/src/enc/histogram.c b/src/3rdparty/libwebp/src/enc/histogram.c index abd253b..7c6abb4 100644 --- a/src/3rdparty/libwebp/src/enc/histogram.c +++ b/src/3rdparty/libwebp/src/enc/histogram.c @@ -10,31 +10,64 @@ // Author: Jyrki Alakuijala (jyrki@google.com) // #ifdef HAVE_CONFIG_H -#include "config.h" +#include "../webp/config.h" #endif #include <math.h> -#include <stdio.h> #include "./backward_references.h" #include "./histogram.h" #include "../dsp/lossless.h" #include "../utils/utils.h" +#define MAX_COST 1.e38 + +// Number of partitions for the three dominant (literal, red and blue) symbol +// costs. +#define NUM_PARTITIONS 4 +// The size of the bin-hash corresponding to the three dominant costs. +#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS) + static void HistogramClear(VP8LHistogram* const p) { - memset(p->literal_, 0, sizeof(p->literal_)); - memset(p->red_, 0, sizeof(p->red_)); - memset(p->blue_, 0, sizeof(p->blue_)); - memset(p->alpha_, 0, sizeof(p->alpha_)); - memset(p->distance_, 0, sizeof(p->distance_)); - p->bit_cost_ = 0; + uint32_t* const literal = p->literal_; + const int cache_bits = p->palette_code_bits_; + const int histo_size = VP8LGetHistogramSize(cache_bits); + memset(p, 0, histo_size); + p->palette_code_bits_ = cache_bits; + p->literal_ = literal; +} + +static void HistogramCopy(const VP8LHistogram* const src, + VP8LHistogram* const dst) { + uint32_t* const dst_literal = dst->literal_; + const int dst_cache_bits = dst->palette_code_bits_; + const int histo_size = VP8LGetHistogramSize(dst_cache_bits); + assert(src->palette_code_bits_ == dst_cache_bits); + memcpy(dst, src, histo_size); + dst->literal_ = dst_literal; +} + +int VP8LGetHistogramSize(int cache_bits) { + const int literal_size = VP8LHistogramNumCodes(cache_bits); + const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size; + assert(total_size <= (size_t)0x7fffffff); + return (int)total_size; +} + +void VP8LFreeHistogram(VP8LHistogram* const histo) { + WebPSafeFree(histo); +} + +void VP8LFreeHistogramSet(VP8LHistogramSet* const histo) { + WebPSafeFree(histo); } void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs, VP8LHistogram* const histo) { - int i; - for (i = 0; i < refs->size; ++i) { - VP8LHistogramAddSinglePixOrCopy(histo, &refs->refs[i]); + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + while (VP8LRefsCursorOk(&c)) { + VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos); + VP8LRefsCursorNext(&c); } } @@ -53,13 +86,24 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) { HistogramClear(p); } +VP8LHistogram* VP8LAllocateHistogram(int cache_bits) { + VP8LHistogram* histo = NULL; + const int total_size = VP8LGetHistogramSize(cache_bits); + uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory)); + if (memory == NULL) return NULL; + histo = (VP8LHistogram*)memory; + // literal_ won't necessary be aligned. + histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram)); + VP8LHistogramInit(histo, cache_bits); + return histo; +} + VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) { int i; VP8LHistogramSet* set; - VP8LHistogram* bulk; - const uint64_t total_size = sizeof(*set) - + (uint64_t)size * sizeof(*set->histograms) - + (uint64_t)size * sizeof(**set->histograms); + const size_t total_size = sizeof(*set) + + sizeof(*set->histograms) * size + + (size_t)VP8LGetHistogramSize(cache_bits) * size; uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory)); if (memory == NULL) return NULL; @@ -67,12 +111,15 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) { memory += sizeof(*set); set->histograms = (VP8LHistogram**)memory; memory += size * sizeof(*set->histograms); - bulk = (VP8LHistogram*)memory; set->max_size = size; set->size = size; for (i = 0; i < size; ++i) { - set->histograms[i] = bulk + i; + set->histograms[i] = (VP8LHistogram*)memory; + // literal_ won't necessary be aligned. + set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram)); VP8LHistogramInit(set->histograms[i], cache_bits); + // There's no padding/alignment between successive histograms. + memory += VP8LGetHistogramSize(cache_bits); } return set; } @@ -87,36 +134,21 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, ++histo->literal_[PixOrCopyLiteral(v, 1)]; ++histo->blue_[PixOrCopyLiteral(v, 0)]; } else if (PixOrCopyIsCacheIdx(v)) { - int literal_ix = 256 + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v); + const int literal_ix = + NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v); ++histo->literal_[literal_ix]; } else { int code, extra_bits; VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits); - ++histo->literal_[256 + code]; + ++histo->literal_[NUM_LITERAL_CODES + code]; VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits); ++histo->distance_[code]; } } -static double BitsEntropy(const int* const array, int n) { - double retval = 0.; - int sum = 0; - int nonzeros = 0; - int max_val = 0; - int i; +static WEBP_INLINE double BitsEntropyRefine(int nonzeros, int sum, int max_val, + double retval) { double mix; - for (i = 0; i < n; ++i) { - if (array[i] != 0) { - sum += array[i]; - ++nonzeros; - retval -= VP8LFastSLog2(array[i]); - if (max_val < array[i]) { - max_val = array[i]; - } - } - } - retval += VP8LFastSLog2(sum); - if (nonzeros < 5) { if (nonzeros <= 1) { return 0; @@ -147,95 +179,142 @@ static double BitsEntropy(const int* const array, int n) { } } -// Returns the cost encode the rle-encoded entropy code. -// The constants in this function are experimental. -static double HuffmanCost(const int* const population, int length) { - // Small bias because Huffman code length is typically not stored in - // full length. - static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3; - static const double kSmallBias = 9.1; - double retval = kHuffmanCodeOfHuffmanCodeSize - kSmallBias; - int streak = 0; - int i = 0; - for (; i < length - 1; ++i) { - ++streak; - if (population[i] == population[i + 1]) { - continue; - } - last_streak_hack: - // population[i] points now to the symbol in the streak of same values. - if (streak > 3) { - if (population[i] == 0) { - retval += 1.5625 + 0.234375 * streak; - } else { - retval += 2.578125 + 0.703125 * streak; - } - } else { - if (population[i] == 0) { - retval += 1.796875 * streak; - } else { - retval += 3.28125 * streak; +static double BitsEntropy(const uint32_t* const array, int n) { + double retval = 0.; + uint32_t sum = 0; + int nonzeros = 0; + uint32_t max_val = 0; + int i; + for (i = 0; i < n; ++i) { + if (array[i] != 0) { + sum += array[i]; + ++nonzeros; + retval -= VP8LFastSLog2(array[i]); + if (max_val < array[i]) { + max_val = array[i]; } } - streak = 0; } - if (i == length - 1) { - ++streak; - goto last_streak_hack; + retval += VP8LFastSLog2(sum); + return BitsEntropyRefine(nonzeros, sum, max_val, retval); +} + +static double BitsEntropyCombined(const uint32_t* const X, + const uint32_t* const Y, int n) { + double retval = 0.; + int sum = 0; + int nonzeros = 0; + int max_val = 0; + int i; + for (i = 0; i < n; ++i) { + const int xy = X[i] + Y[i]; + if (xy != 0) { + sum += xy; + ++nonzeros; + retval -= VP8LFastSLog2(xy); + if (max_val < xy) { + max_val = xy; + } + } } + retval += VP8LFastSLog2(sum); + return BitsEntropyRefine(nonzeros, sum, max_val, retval); +} + +static double InitialHuffmanCost(void) { + // Small bias because Huffman code length is typically not stored in + // full length. + static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3; + static const double kSmallBias = 9.1; + return kHuffmanCodeOfHuffmanCodeSize - kSmallBias; +} + +// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3) +static double FinalHuffmanCost(const VP8LStreaks* const stats) { + double retval = InitialHuffmanCost(); + retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1]; + retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1]; + retval += 1.796875 * stats->streaks[0][0]; + retval += 3.28125 * stats->streaks[1][0]; return retval; } -static double PopulationCost(const int* const population, int length) { +// Trampolines +static double HuffmanCost(const uint32_t* const population, int length) { + const VP8LStreaks stats = VP8LHuffmanCostCount(population, length); + return FinalHuffmanCost(&stats); +} + +static double HuffmanCostCombined(const uint32_t* const X, + const uint32_t* const Y, int length) { + const VP8LStreaks stats = VP8LHuffmanCostCombinedCount(X, Y, length); + return FinalHuffmanCost(&stats); +} + +// Aggregated costs +static double PopulationCost(const uint32_t* const population, int length) { return BitsEntropy(population, length) + HuffmanCost(population, length); } -static double ExtraCost(const int* const population, int length) { - int i; - double cost = 0.; - for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2]; - return cost; +static double GetCombinedEntropy(const uint32_t* const X, + const uint32_t* const Y, int length) { + return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length); } // Estimates the Entropy + Huffman + other block overhead size cost. double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { - return PopulationCost(p->literal_, VP8LHistogramNumCodes(p)) - + PopulationCost(p->red_, 256) - + PopulationCost(p->blue_, 256) - + PopulationCost(p->alpha_, 256) - + PopulationCost(p->distance_, NUM_DISTANCE_CODES) - + ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) - + ExtraCost(p->distance_, NUM_DISTANCE_CODES); + return + PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_)) + + PopulationCost(p->red_, NUM_LITERAL_CODES) + + PopulationCost(p->blue_, NUM_LITERAL_CODES) + + PopulationCost(p->alpha_, NUM_LITERAL_CODES) + + PopulationCost(p->distance_, NUM_DISTANCE_CODES) + + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES) + + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); } double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) { - return BitsEntropy(p->literal_, VP8LHistogramNumCodes(p)) - + BitsEntropy(p->red_, 256) - + BitsEntropy(p->blue_, 256) - + BitsEntropy(p->alpha_, 256) - + BitsEntropy(p->distance_, NUM_DISTANCE_CODES) - + ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) - + ExtraCost(p->distance_, NUM_DISTANCE_CODES); + return + BitsEntropy(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_)) + + BitsEntropy(p->red_, NUM_LITERAL_CODES) + + BitsEntropy(p->blue_, NUM_LITERAL_CODES) + + BitsEntropy(p->alpha_, NUM_LITERAL_CODES) + + BitsEntropy(p->distance_, NUM_DISTANCE_CODES) + + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES) + + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); } // ----------------------------------------------------------------------------- // Various histogram combine/cost-eval functions -// Adds 'in' histogram to 'out' -static void HistogramAdd(const VP8LHistogram* const in, - VP8LHistogram* const out) { - int i; - for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) { - out->literal_[i] += in->literal_[i]; - } - for (i = 0; i < NUM_DISTANCE_CODES; ++i) { - out->distance_[i] += in->distance_[i]; - } - for (i = 0; i < 256; ++i) { - out->red_[i] += in->red_[i]; - out->blue_[i] += in->blue_[i]; - out->alpha_[i] += in->alpha_[i]; - } +static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, + const VP8LHistogram* const b, + double cost_threshold, + double* cost) { + const int palette_code_bits = a->palette_code_bits_; + assert(a->palette_code_bits_ == b->palette_code_bits_); + *cost += GetCombinedEntropy(a->literal_, b->literal_, + VP8LHistogramNumCodes(palette_code_bits)); + *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, + b->literal_ + NUM_LITERAL_CODES, + NUM_LENGTH_CODES); + if (*cost > cost_threshold) return 0; + + *cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES); + if (*cost > cost_threshold) return 0; + + *cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES); + if (*cost > cost_threshold) return 0; + + *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES); + if (*cost > cost_threshold) return 0; + + *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES); + *cost += VP8LExtraCostCombined(a->distance_, b->distance_, + NUM_DISTANCE_CODES); + if (*cost > cost_threshold) return 0; + + return 1; } // Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing @@ -250,41 +329,14 @@ static double HistogramAddEval(const VP8LHistogram* const a, double cost_threshold) { double cost = 0; const double sum_cost = a->bit_cost_ + b->bit_cost_; - int i; - cost_threshold += sum_cost; - // palette_code_bits_ is part of the cost evaluation for literal_. - // TODO(skal): remove/simplify this palette_code_bits_? - out->palette_code_bits_ = - (a->palette_code_bits_ > b->palette_code_bits_) ? a->palette_code_bits_ : - b->palette_code_bits_; - for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) { - out->literal_[i] = a->literal_[i] + b->literal_[i]; - } - cost += PopulationCost(out->literal_, VP8LHistogramNumCodes(out)); - cost += ExtraCost(out->literal_ + 256, NUM_LENGTH_CODES); - if (cost > cost_threshold) return cost; - - for (i = 0; i < 256; ++i) out->red_[i] = a->red_[i] + b->red_[i]; - cost += PopulationCost(out->red_, 256); - if (cost > cost_threshold) return cost; - - for (i = 0; i < 256; ++i) out->blue_[i] = a->blue_[i] + b->blue_[i]; - cost += PopulationCost(out->blue_, 256); - if (cost > cost_threshold) return cost; - - for (i = 0; i < NUM_DISTANCE_CODES; ++i) { - out->distance_[i] = a->distance_[i] + b->distance_[i]; + if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) { + VP8LHistogramAdd(a, b, out); + out->bit_cost_ = cost; + out->palette_code_bits_ = a->palette_code_bits_; } - cost += PopulationCost(out->distance_, NUM_DISTANCE_CODES); - cost += ExtraCost(out->distance_, NUM_DISTANCE_CODES); - if (cost > cost_threshold) return cost; - for (i = 0; i < 256; ++i) out->alpha_[i] = a->alpha_[i] + b->alpha_[i]; - cost += PopulationCost(out->alpha_, 256); - - out->bit_cost_ = cost; return cost - sum_cost; } @@ -294,52 +346,92 @@ static double HistogramAddEval(const VP8LHistogram* const a, static double HistogramAddThresh(const VP8LHistogram* const a, const VP8LHistogram* const b, double cost_threshold) { - int tmp[PIX_OR_COPY_CODES_MAX]; // <= max storage we'll need - int i; double cost = -a->bit_cost_; + GetCombinedHistogramEntropy(a, b, cost_threshold, &cost); + return cost; +} - for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) { - tmp[i] = a->literal_[i] + b->literal_[i]; - } - // note that the tests are ordered so that the usually largest - // cost shares come first. - cost += PopulationCost(tmp, VP8LHistogramNumCodes(a)); - cost += ExtraCost(tmp + 256, NUM_LENGTH_CODES); - if (cost > cost_threshold) return cost; - - for (i = 0; i < 256; ++i) tmp[i] = a->red_[i] + b->red_[i]; - cost += PopulationCost(tmp, 256); - if (cost > cost_threshold) return cost; - - for (i = 0; i < 256; ++i) tmp[i] = a->blue_[i] + b->blue_[i]; - cost += PopulationCost(tmp, 256); - if (cost > cost_threshold) return cost; - - for (i = 0; i < NUM_DISTANCE_CODES; ++i) { - tmp[i] = a->distance_[i] + b->distance_[i]; - } - cost += PopulationCost(tmp, NUM_DISTANCE_CODES); - cost += ExtraCost(tmp, NUM_DISTANCE_CODES); - if (cost > cost_threshold) return cost; +// ----------------------------------------------------------------------------- - for (i = 0; i < 256; ++i) tmp[i] = a->alpha_[i] + b->alpha_[i]; - cost += PopulationCost(tmp, 256); +// The structure to keep track of cost range for the three dominant entropy +// symbols. +// TODO(skal): Evaluate if float can be used here instead of double for +// representing the entropy costs. +typedef struct { + double literal_max_; + double literal_min_; + double red_max_; + double red_min_; + double blue_max_; + double blue_min_; +} DominantCostRange; + +static void DominantCostRangeInit(DominantCostRange* const c) { + c->literal_max_ = 0.; + c->literal_min_ = MAX_COST; + c->red_max_ = 0.; + c->red_min_ = MAX_COST; + c->blue_max_ = 0.; + c->blue_min_ = MAX_COST; +} - return cost; +static void UpdateDominantCostRange( + const VP8LHistogram* const h, DominantCostRange* const c) { + if (c->literal_max_ < h->literal_cost_) c->literal_max_ = h->literal_cost_; + if (c->literal_min_ > h->literal_cost_) c->literal_min_ = h->literal_cost_; + if (c->red_max_ < h->red_cost_) c->red_max_ = h->red_cost_; + if (c->red_min_ > h->red_cost_) c->red_min_ = h->red_cost_; + if (c->blue_max_ < h->blue_cost_) c->blue_max_ = h->blue_cost_; + if (c->blue_min_ > h->blue_cost_) c->blue_min_ = h->blue_cost_; } -// ----------------------------------------------------------------------------- +static void UpdateHistogramCost(VP8LHistogram* const h) { + const double alpha_cost = PopulationCost(h->alpha_, NUM_LITERAL_CODES); + const double distance_cost = + PopulationCost(h->distance_, NUM_DISTANCE_CODES) + + VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); + const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); + h->literal_cost_ = PopulationCost(h->literal_, num_codes) + + VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, + NUM_LENGTH_CODES); + h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES); + h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES); + h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ + + alpha_cost + distance_cost; +} -static void HistogramBuildImage(int xsize, int histo_bits, - const VP8LBackwardRefs* const backward_refs, - VP8LHistogramSet* const image) { - int i; +static int GetBinIdForEntropy(double min, double max, double val) { + const double range = max - min + 1e-6; + const double delta = val - min; + return (int)(NUM_PARTITIONS * delta / range); +} + +// TODO(vikasa): Evaluate, if there's any correlation between red & blue. +static int GetHistoBinIndex( + const VP8LHistogram* const h, const DominantCostRange* const c) { + const int bin_id = + GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_) + + NUM_PARTITIONS * GetBinIdForEntropy(c->red_min_, c->red_max_, + h->red_cost_) + + NUM_PARTITIONS * NUM_PARTITIONS * GetBinIdForEntropy(c->literal_min_, + c->literal_max_, + h->literal_cost_); + assert(bin_id < BIN_SIZE); + return bin_id; +} + +// Construct the histograms from backward references. +static void HistogramBuild( + int xsize, int histo_bits, const VP8LBackwardRefs* const backward_refs, + VP8LHistogramSet* const image_histo) { int x = 0, y = 0; const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits); - VP8LHistogram** const histograms = image->histograms; + VP8LHistogram** const histograms = image_histo->histograms; + VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs); assert(histo_bits > 0); - for (i = 0; i < backward_refs->size; ++i) { - const PixOrCopy* const v = &backward_refs->refs[i]; + // Construct the Histo from a given backward references. + while (VP8LRefsCursorOk(&c)) { + const PixOrCopy* const v = c.cur_pos; const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits); VP8LHistogramAddSinglePixOrCopy(histograms[ix], v); x += PixOrCopyLength(v); @@ -347,9 +439,119 @@ static void HistogramBuildImage(int xsize, int histo_bits, x -= xsize; ++y; } + VP8LRefsCursorNext(&c); } } +// Copies the histograms and computes its bit_cost. +static void HistogramCopyAndAnalyze( + VP8LHistogramSet* const orig_histo, VP8LHistogramSet* const image_histo) { + int i; + const int histo_size = orig_histo->size; + VP8LHistogram** const orig_histograms = orig_histo->histograms; + VP8LHistogram** const histograms = image_histo->histograms; + for (i = 0; i < histo_size; ++i) { + VP8LHistogram* const histo = orig_histograms[i]; + UpdateHistogramCost(histo); + // Copy histograms from orig_histo[] to image_histo[]. + HistogramCopy(histo, histograms[i]); + } +} + +// Partition histograms to different entropy bins for three dominant (literal, +// red and blue) symbol costs and compute the histogram aggregate bit_cost. +static void HistogramAnalyzeEntropyBin( + VP8LHistogramSet* const image_histo, int16_t* const bin_map) { + int i; + VP8LHistogram** const histograms = image_histo->histograms; + const int histo_size = image_histo->size; + const int bin_depth = histo_size + 1; + DominantCostRange cost_range; + DominantCostRangeInit(&cost_range); + + // Analyze the dominant (literal, red and blue) entropy costs. + for (i = 0; i < histo_size; ++i) { + VP8LHistogram* const histo = histograms[i]; + UpdateDominantCostRange(histo, &cost_range); + } + + // bin-hash histograms on three of the dominant (literal, red and blue) + // symbol costs. + for (i = 0; i < histo_size; ++i) { + int num_histos; + VP8LHistogram* const histo = histograms[i]; + const int16_t bin_id = (int16_t)GetHistoBinIndex(histo, &cost_range); + const int bin_offset = bin_id * bin_depth; + // bin_map[n][0] for every bin 'n' maintains the counter for the number of + // histograms in that bin. + // Get and increment the num_histos in that bin. + num_histos = ++bin_map[bin_offset]; + assert(bin_offset + num_histos < bin_depth * BIN_SIZE); + // Add histogram i'th index at num_histos (last) position in the bin_map. + bin_map[bin_offset + num_histos] = i; + } +} + +// Compact the histogram set by moving the valid one left in the set to the +// head and moving the ones that have been merged to other histograms towards +// the end. +// TODO(vikasa): Evaluate if this method can be avoided by altering the code +// logic of HistogramCombineEntropyBin main loop. +static void HistogramCompactBins(VP8LHistogramSet* const image_histo) { + int start = 0; + int end = image_histo->size - 1; + VP8LHistogram** const histograms = image_histo->histograms; + while (start < end) { + while (start <= end && histograms[start] != NULL && + histograms[start]->bit_cost_ != 0.) { + ++start; + } + while (start <= end && histograms[end]->bit_cost_ == 0.) { + histograms[end] = NULL; + --end; + } + if (start < end) { + assert(histograms[start] != NULL); + assert(histograms[end] != NULL); + HistogramCopy(histograms[end], histograms[start]); + histograms[end] = NULL; + --end; + } + } + image_histo->size = end + 1; +} + +static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo, + VP8LHistogram* const histos, + int16_t* const bin_map, int bin_depth, + double combine_cost_factor) { + int bin_id; + VP8LHistogram* cur_combo = histos; + VP8LHistogram** const histograms = image_histo->histograms; + + for (bin_id = 0; bin_id < BIN_SIZE; ++bin_id) { + const int bin_offset = bin_id * bin_depth; + const int num_histos = bin_map[bin_offset]; + const int idx1 = bin_map[bin_offset + 1]; + int n; + for (n = 2; n <= num_histos; ++n) { + const int idx2 = bin_map[bin_offset + n]; + const double bit_cost_idx2 = histograms[idx2]->bit_cost_; + if (bit_cost_idx2 > 0.) { + const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor; + const double curr_cost_diff = + HistogramAddEval(histograms[idx1], histograms[idx2], + cur_combo, bit_cost_thresh); + if (curr_cost_diff < bit_cost_thresh) { + HistogramCopy(cur_combo, histograms[idx1]); + histograms[idx2]->bit_cost_ = 0.; + } + } + } + } + HistogramCompactBins(image_histo); +} + static uint32_t MyRand(uint32_t *seed) { *seed *= 16807U; if (*seed == 0) { @@ -358,48 +560,45 @@ static uint32_t MyRand(uint32_t *seed) { return *seed; } -static int HistogramCombine(const VP8LHistogramSet* const in, - VP8LHistogramSet* const out, int iter_mult, - int num_pairs, int num_tries_no_success) { - int ok = 0; - int i, iter; +static void HistogramCombine(VP8LHistogramSet* const image_histo, + VP8LHistogramSet* const histos, int quality) { + int iter; uint32_t seed = 0; int tries_with_no_success = 0; - int out_size = in->size; - const int outer_iters = in->size * iter_mult; + int image_histo_size = image_histo->size; + const int iter_mult = (quality < 25) ? 2 : 2 + (quality - 25) / 8; + const int outer_iters = image_histo_size * iter_mult; + const int num_pairs = image_histo_size / 2; + const int num_tries_no_success = outer_iters / 2; const int min_cluster_size = 2; - VP8LHistogram* const histos = (VP8LHistogram*)malloc(2 * sizeof(*histos)); - VP8LHistogram* cur_combo = histos + 0; // trial merged histogram - VP8LHistogram* best_combo = histos + 1; // best merged histogram so far - if (histos == NULL) goto End; - - // Copy histograms from in[] to out[]. - assert(in->size <= out->size); - for (i = 0; i < in->size; ++i) { - in->histograms[i]->bit_cost_ = VP8LHistogramEstimateBits(in->histograms[i]); - *out->histograms[i] = *in->histograms[i]; - } - - // Collapse similar histograms in 'out'. - for (iter = 0; iter < outer_iters && out_size >= min_cluster_size; ++iter) { + VP8LHistogram** const histograms = image_histo->histograms; + VP8LHistogram* cur_combo = histos->histograms[0]; // trial histogram + VP8LHistogram* best_combo = histos->histograms[1]; // best histogram so far + + // Collapse similar histograms in 'image_histo'. + for (iter = 0; + iter < outer_iters && image_histo_size >= min_cluster_size; + ++iter) { double best_cost_diff = 0.; int best_idx1 = -1, best_idx2 = 1; int j; - const int num_tries = (num_pairs < out_size) ? num_pairs : out_size; + const int num_tries = + (num_pairs < image_histo_size) ? num_pairs : image_histo_size; seed += iter; for (j = 0; j < num_tries; ++j) { double curr_cost_diff; // Choose two histograms at random and try to combine them. - const uint32_t idx1 = MyRand(&seed) % out_size; + const uint32_t idx1 = MyRand(&seed) % image_histo_size; const uint32_t tmp = (j & 7) + 1; - const uint32_t diff = (tmp < 3) ? tmp : MyRand(&seed) % (out_size - 1); - const uint32_t idx2 = (idx1 + diff + 1) % out_size; + const uint32_t diff = + (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1); + const uint32_t idx2 = (idx1 + diff + 1) % image_histo_size; if (idx1 == idx2) { continue; } + // Calculate cost reduction on combining. - curr_cost_diff = HistogramAddEval(out->histograms[idx1], - out->histograms[idx2], + curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2], cur_combo, best_cost_diff); if (curr_cost_diff < best_cost_diff) { // found a better pair? { // swap cur/best combo histograms @@ -414,12 +613,12 @@ static int HistogramCombine(const VP8LHistogramSet* const in, } if (best_idx1 >= 0) { - *out->histograms[best_idx1] = *best_combo; + HistogramCopy(best_combo, histograms[best_idx1]); // swap best_idx2 slot with last one (which is now unused) - --out_size; - if (best_idx2 != out_size) { - out->histograms[best_idx2] = out->histograms[out_size]; - out->histograms[out_size] = NULL; // just for sanity check. + --image_histo_size; + if (best_idx2 != image_histo_size) { + HistogramCopy(histograms[image_histo_size], histograms[best_idx2]); + histograms[image_histo_size] = NULL; } tries_with_no_success = 0; } @@ -427,38 +626,28 @@ static int HistogramCombine(const VP8LHistogramSet* const in, break; } } - out->size = out_size; - ok = 1; - - End: - free(histos); - return ok; + image_histo->size = image_histo_size; } // ----------------------------------------------------------------------------- // Histogram refinement -// What is the bit cost of moving square_histogram from cur_symbol to candidate. -static double HistogramDistance(const VP8LHistogram* const square_histogram, - const VP8LHistogram* const candidate, - double cost_threshold) { - return HistogramAddThresh(candidate, square_histogram, cost_threshold); -} - // Find the best 'out' histogram for each of the 'in' histograms. // Note: we assume that out[]->bit_cost_ is already up-to-date. -static void HistogramRemap(const VP8LHistogramSet* const in, - const VP8LHistogramSet* const out, +static void HistogramRemap(const VP8LHistogramSet* const orig_histo, + const VP8LHistogramSet* const image_histo, uint16_t* const symbols) { int i; - for (i = 0; i < in->size; ++i) { + VP8LHistogram** const orig_histograms = orig_histo->histograms; + VP8LHistogram** const histograms = image_histo->histograms; + for (i = 0; i < orig_histo->size; ++i) { int best_out = 0; double best_bits = - HistogramDistance(in->histograms[i], out->histograms[0], 1.e38); + HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST); int k; - for (k = 1; k < out->size; ++k) { + for (k = 1; k < image_histo->size; ++k) { const double cur_bits = - HistogramDistance(in->histograms[i], out->histograms[k], best_bits); + HistogramAddThresh(histograms[k], orig_histograms[i], best_bits); if (cur_bits < best_bits) { best_bits = cur_bits; best_out = k; @@ -468,45 +657,85 @@ static void HistogramRemap(const VP8LHistogramSet* const in, } // Recompute each out based on raw and symbols. - for (i = 0; i < out->size; ++i) { - HistogramClear(out->histograms[i]); + for (i = 0; i < image_histo->size; ++i) { + HistogramClear(histograms[i]); } - for (i = 0; i < in->size; ++i) { - HistogramAdd(in->histograms[i], out->histograms[symbols[i]]); + + for (i = 0; i < orig_histo->size; ++i) { + const int idx = symbols[i]; + VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]); } } +static double GetCombineCostFactor(int histo_size, int quality) { + double combine_cost_factor = 0.16; + if (histo_size > 256) combine_cost_factor /= 2.; + if (histo_size > 512) combine_cost_factor /= 2.; + if (histo_size > 1024) combine_cost_factor /= 2.; + if (quality <= 50) combine_cost_factor /= 2.; + return combine_cost_factor; +} + int VP8LGetHistoImageSymbols(int xsize, int ysize, const VP8LBackwardRefs* const refs, int quality, int histo_bits, int cache_bits, - VP8LHistogramSet* const image_in, + VP8LHistogramSet* const image_histo, uint16_t* const histogram_symbols) { int ok = 0; const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1; - const int histo_image_raw_size = histo_xsize * histo_ysize; - - // Heuristic params for HistogramCombine(). - const int num_tries_no_success = 8 + (quality >> 1); - const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4); - const int num_pairs = (quality < 25) ? 10 : (5 * quality) >> 3; - - VP8LHistogramSet* const image_out = - VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits); - if (image_out == NULL) return 0; - - // Build histogram image. - HistogramBuildImage(xsize, histo_bits, refs, image_out); - // Collapse similar histograms. - if (!HistogramCombine(image_out, image_in, iter_mult, num_pairs, - num_tries_no_success)) { + const int image_histo_raw_size = histo_xsize * histo_ysize; + + // The bin_map for every bin follows following semantics: + // bin_map[n][0] = num_histo; // The number of histograms in that bin. + // bin_map[n][1] = index of first histogram in that bin; + // bin_map[n][num_histo] = index of last histogram in that bin; + // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = un-used indices. + const int bin_depth = image_histo_raw_size + 1; + int16_t* bin_map = NULL; + VP8LHistogramSet* const histos = VP8LAllocateHistogramSet(2, cache_bits); + VP8LHistogramSet* const orig_histo = + VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits); + + if (orig_histo == NULL || histos == NULL) { goto Error; } + + // Don't attempt linear bin-partition heuristic for: + // histograms of small sizes, as bin_map will be very sparse and; + // Higher qualities (> 90), to preserve the compression gains at those + // quality settings. + if (orig_histo->size > 2 * BIN_SIZE && quality < 90) { + const int bin_map_size = bin_depth * BIN_SIZE; + bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map)); + if (bin_map == NULL) goto Error; + } + + // Construct the histograms from backward references. + HistogramBuild(xsize, histo_bits, refs, orig_histo); + // Copies the histograms and computes its bit_cost. + HistogramCopyAndAnalyze(orig_histo, image_histo); + + if (bin_map != NULL) { + const double combine_cost_factor = + GetCombineCostFactor(image_histo_raw_size, quality); + HistogramAnalyzeEntropyBin(orig_histo, bin_map); + // Collapse histograms with similar entropy. + HistogramCombineEntropyBin(image_histo, histos->histograms[0], + bin_map, bin_depth, combine_cost_factor); + } + + // Collapse similar histograms by random histogram-pair compares. + HistogramCombine(image_histo, histos, quality); + // Find the optimal map from original histograms to the final ones. - HistogramRemap(image_out, image_in, histogram_symbols); + HistogramRemap(orig_histo, image_histo, histogram_symbols); + ok = 1; -Error: - free(image_out); + Error: + WebPSafeFree(bin_map); + VP8LFreeHistogramSet(orig_histo); + VP8LFreeHistogramSet(histos); return ok; } diff --git a/src/3rdparty/libwebp/src/enc/histogram.h b/src/3rdparty/libwebp/src/enc/histogram.h index 4d346a8..1cf4c54 100644 --- a/src/3rdparty/libwebp/src/enc/histogram.h +++ b/src/3rdparty/libwebp/src/enc/histogram.h @@ -32,18 +32,21 @@ extern "C" { typedef struct { // literal_ contains green literal, palette-code and // copy-length-prefix histogram - int literal_[PIX_OR_COPY_CODES_MAX]; - int red_[256]; - int blue_[256]; - int alpha_[256]; + uint32_t* literal_; // Pointer to the allocated buffer for literal. + uint32_t red_[NUM_LITERAL_CODES]; + uint32_t blue_[NUM_LITERAL_CODES]; + uint32_t alpha_[NUM_LITERAL_CODES]; // Backward reference prefix-code histogram. - int distance_[NUM_DISTANCE_CODES]; + uint32_t distance_[NUM_DISTANCE_CODES]; int palette_code_bits_; - double bit_cost_; // cached value of VP8LHistogramEstimateBits(this) + double bit_cost_; // cached value of VP8LHistogramEstimateBits(this) + double literal_cost_; // Cached values of dominant entropy costs: + double red_cost_; // literal, red & blue. + double blue_cost_; } VP8LHistogram; // Collection of histograms with fixed capacity, allocated as one -// big memory chunk. Can be destroyed by simply calling 'free()'. +// big memory chunk. Can be destroyed by calling WebPSafeFree(). typedef struct { int size; // number of slots currently in use int max_size; // maximum capacity @@ -59,6 +62,9 @@ void VP8LHistogramCreate(VP8LHistogram* const p, const VP8LBackwardRefs* const refs, int palette_code_bits); +// Return the size of the histogram for a given palette_code_bits. +int VP8LGetHistogramSize(int palette_code_bits); + // Set the palette_code_bits and reset the stats. void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits); @@ -66,10 +72,21 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits); void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs, VP8LHistogram* const histo); +// Free the memory allocated for the histogram. +void VP8LFreeHistogram(VP8LHistogram* const histo); + +// Free the memory allocated for the histogram set. +void VP8LFreeHistogramSet(VP8LHistogramSet* const histo); + // Allocate an array of pointer to histograms, allocated and initialized // using 'cache_bits'. Return NULL in case of memory error. VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits); +// Allocate and initialize histogram object with specified 'cache_bits'. +// Returns NULL in case of memory error. +// Special case of VP8LAllocateHistogramSet, with size equals 1. +VP8LHistogram* VP8LAllocateHistogram(int cache_bits); + // Accumulate a token 'v' into a histogram. void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, const PixOrCopy* const v); @@ -82,9 +99,9 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p); // represent the entropy code itself. double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p); -static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) { - return 256 + NUM_LENGTH_CODES + - ((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0); +static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { + return NUM_LITERAL_CODES + NUM_LENGTH_CODES + + ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0); } // Builds the histogram image. diff --git a/src/3rdparty/libwebp/src/enc/layer.c b/src/3rdparty/libwebp/src/enc/layer.c deleted file mode 100644 index 2402362..0000000 --- a/src/3rdparty/libwebp/src/enc/layer.c +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Enhancement layer (for YUV444/422) -// -// Author: Skal (pascal.massimino@gmail.com) - -#include <stdlib.h> - -#include "./vp8enci.h" - -//------------------------------------------------------------------------------ - -void VP8EncInitLayer(VP8Encoder* const enc) { - enc->use_layer_ = (enc->pic_->u0 != NULL); - enc->layer_data_size_ = 0; - enc->layer_data_ = NULL; - if (enc->use_layer_) { - VP8BitWriterInit(&enc->layer_bw_, enc->mb_w_ * enc->mb_h_ * 3); - } -} - -void VP8EncCodeLayerBlock(VP8EncIterator* it) { - (void)it; // remove a warning -} - -int VP8EncFinishLayer(VP8Encoder* const enc) { - if (enc->use_layer_) { - enc->layer_data_ = VP8BitWriterFinish(&enc->layer_bw_); - enc->layer_data_size_ = VP8BitWriterSize(&enc->layer_bw_); - } - return 1; -} - -void VP8EncDeleteLayer(VP8Encoder* enc) { - free(enc->layer_data_); -} - diff --git a/src/3rdparty/libwebp/src/enc/picture.c b/src/3rdparty/libwebp/src/enc/picture.c index 011690d..9a66fbe 100644 --- a/src/3rdparty/libwebp/src/enc/picture.c +++ b/src/3rdparty/libwebp/src/enc/picture.c @@ -7,506 +7,170 @@ // be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // -// WebPPicture utils: colorspace conversion, crop, ... +// WebPPicture class basis // // Author: Skal (pascal.massimino@gmail.com) #include <assert.h> #include <stdlib.h> -#include <math.h> #include "./vp8enci.h" -#include "../utils/alpha_processing.h" -#include "../utils/random.h" -#include "../utils/rescaler.h" #include "../utils/utils.h" -#include "../dsp/dsp.h" -#include "../dsp/yuv.h" - -// Uncomment to disable gamma-compression during RGB->U/V averaging -#define USE_GAMMA_COMPRESSION - -#define HALVE(x) (((x) + 1) >> 1) -#define IS_YUV_CSP(csp, YUV_CSP) (((csp) & WEBP_CSP_UV_MASK) == (YUV_CSP)) - -static const union { - uint32_t argb; - uint8_t bytes[4]; -} test_endian = { 0xff000000u }; -#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff) - -static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) { - return (0xff000000u | (r << 16) | (g << 8) | b); -} //------------------------------------------------------------------------------ // WebPPicture //------------------------------------------------------------------------------ -int WebPPictureAlloc(WebPPicture* picture) { - if (picture != NULL) { - const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK; - const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT; - const int width = picture->width; - const int height = picture->height; - - if (!picture->use_argb) { - const int y_stride = width; - const int uv_width = HALVE(width); - const int uv_height = HALVE(height); - const int uv_stride = uv_width; - int uv0_stride = 0; - int a_width, a_stride; - uint64_t y_size, uv_size, uv0_size, a_size, total_size; - uint8_t* mem; - - // U/V - switch (uv_csp) { - case WEBP_YUV420: - break; -#ifdef WEBP_EXPERIMENTAL_FEATURES - case WEBP_YUV400: // for now, we'll just reset the U/V samples - break; - case WEBP_YUV422: - uv0_stride = uv_width; - break; - case WEBP_YUV444: - uv0_stride = width; - break; -#endif - default: - return 0; - } - uv0_size = height * uv0_stride; - - // alpha - a_width = has_alpha ? width : 0; - a_stride = a_width; - y_size = (uint64_t)y_stride * height; - uv_size = (uint64_t)uv_stride * uv_height; - a_size = (uint64_t)a_stride * height; - - total_size = y_size + a_size + 2 * uv_size + 2 * uv0_size; - - // Security and validation checks - if (width <= 0 || height <= 0 || // luma/alpha param error - uv_width < 0 || uv_height < 0) { // u/v param error - return 0; - } - // Clear previous buffer and allocate a new one. - WebPPictureFree(picture); // erase previous buffer - mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem)); - if (mem == NULL) return 0; - - // From now on, we're in the clear, we can no longer fail... - picture->memory_ = (void*)mem; - picture->y_stride = y_stride; - picture->uv_stride = uv_stride; - picture->a_stride = a_stride; - picture->uv0_stride = uv0_stride; - // TODO(skal): we could align the y/u/v planes and adjust stride. - picture->y = mem; - mem += y_size; - - picture->u = mem; - mem += uv_size; - picture->v = mem; - mem += uv_size; - - if (a_size) { - picture->a = mem; - mem += a_size; - } - if (uv0_size) { - picture->u0 = mem; - mem += uv0_size; - picture->v0 = mem; - mem += uv0_size; - } - (void)mem; // makes the static analyzer happy - } else { - void* memory; - const uint64_t argb_size = (uint64_t)width * height; - if (width <= 0 || height <= 0) { - return 0; - } - // Clear previous buffer and allocate a new one. - WebPPictureFree(picture); // erase previous buffer - memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb)); - if (memory == NULL) return 0; +static int DummyWriter(const uint8_t* data, size_t data_size, + const WebPPicture* const picture) { + // The following are to prevent 'unused variable' error message. + (void)data; + (void)data_size; + (void)picture; + return 1; +} - // TODO(skal): align plane to cache line? - picture->memory_argb_ = memory; - picture->argb = (uint32_t*)memory; - picture->argb_stride = width; - } +int WebPPictureInitInternal(WebPPicture* picture, int version) { + if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) { + return 0; // caller/system version mismatch! + } + if (picture != NULL) { + memset(picture, 0, sizeof(*picture)); + picture->writer = DummyWriter; + WebPEncodingSetError(picture, VP8_ENC_OK); } return 1; } -// Remove reference to the ARGB buffer (doesn't free anything). -static void PictureResetARGB(WebPPicture* const picture) { +//------------------------------------------------------------------------------ + +static void WebPPictureResetBufferARGB(WebPPicture* const picture) { picture->memory_argb_ = NULL; picture->argb = NULL; picture->argb_stride = 0; } -// Remove reference to the YUVA buffer (doesn't free anything). -static void PictureResetYUVA(WebPPicture* const picture) { +static void WebPPictureResetBufferYUVA(WebPPicture* const picture) { picture->memory_ = NULL; picture->y = picture->u = picture->v = picture->a = NULL; - picture->u0 = picture->v0 = NULL; picture->y_stride = picture->uv_stride = 0; picture->a_stride = 0; - picture->uv0_stride = 0; } -// Grab the 'specs' (writer, *opaque, width, height...) from 'src' and copy them -// into 'dst'. Mark 'dst' as not owning any memory. -static void WebPPictureGrabSpecs(const WebPPicture* const src, - WebPPicture* const dst) { - assert(src != NULL && dst != NULL); - *dst = *src; - PictureResetYUVA(dst); - PictureResetARGB(dst); +void WebPPictureResetBuffers(WebPPicture* const picture) { + WebPPictureResetBufferARGB(picture); + WebPPictureResetBufferYUVA(picture); } -// Allocate a new argb buffer, discarding any existing one and preserving -// the other YUV(A) buffer. -static int PictureAllocARGB(WebPPicture* const picture) { - WebPPicture tmp; - free(picture->memory_argb_); - PictureResetARGB(picture); - picture->use_argb = 1; - WebPPictureGrabSpecs(picture, &tmp); - if (!WebPPictureAlloc(&tmp)) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - picture->memory_argb_ = tmp.memory_argb_; - picture->argb = tmp.argb; - picture->argb_stride = tmp.argb_stride; - return 1; -} +int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) { + void* memory; + const uint64_t argb_size = (uint64_t)width * height; -// Release memory owned by 'picture' (both YUV and ARGB buffers). -void WebPPictureFree(WebPPicture* picture) { - if (picture != NULL) { - free(picture->memory_); - free(picture->memory_argb_); - PictureResetYUVA(picture); - PictureResetARGB(picture); - } -} + assert(picture != NULL); -//------------------------------------------------------------------------------ -// Picture copying + WebPSafeFree(picture->memory_argb_); + WebPPictureResetBufferARGB(picture); -// Not worth moving to dsp/enc.c (only used here). -static void CopyPlane(const uint8_t* src, int src_stride, - uint8_t* dst, int dst_stride, int width, int height) { - while (height-- > 0) { - memcpy(dst, src, width); - src += src_stride; - dst += dst_stride; + if (width <= 0 || height <= 0) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } -} - -// Adjust top-left corner to chroma sample position. -static void SnapTopLeftPosition(const WebPPicture* const pic, - int* const left, int* const top) { - if (!pic->use_argb) { - const int is_yuv422 = IS_YUV_CSP(pic->colorspace, WEBP_YUV422); - if (IS_YUV_CSP(pic->colorspace, WEBP_YUV420) || is_yuv422) { - *left &= ~1; - if (!is_yuv422) *top &= ~1; - } + // allocate a new buffer. + memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb)); + if (memory == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } -} - -// Adjust top-left corner and verify that the sub-rectangle is valid. -static int AdjustAndCheckRectangle(const WebPPicture* const pic, - int* const left, int* const top, - int width, int height) { - SnapTopLeftPosition(pic, left, top); - if ((*left) < 0 || (*top) < 0) return 0; - if (width <= 0 || height <= 0) return 0; - if ((*left) + width > pic->width) return 0; - if ((*top) + height > pic->height) return 0; + // TODO(skal): align plane to cache line? + picture->memory_argb_ = memory; + picture->argb = (uint32_t*)memory; + picture->argb_stride = width; return 1; } -int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { - if (src == NULL || dst == NULL) return 0; - if (src == dst) return 1; +int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { + const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK; + const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT; + const int y_stride = width; + const int uv_width = (width + 1) >> 1; + const int uv_height = (height + 1) >> 1; + const int uv_stride = uv_width; + int a_width, a_stride; + uint64_t y_size, uv_size, a_size, total_size; + uint8_t* mem; - WebPPictureGrabSpecs(src, dst); - if (!WebPPictureAlloc(dst)) return 0; + assert(picture != NULL); - if (!src->use_argb) { - CopyPlane(src->y, src->y_stride, - dst->y, dst->y_stride, dst->width, dst->height); - CopyPlane(src->u, src->uv_stride, - dst->u, dst->uv_stride, HALVE(dst->width), HALVE(dst->height)); - CopyPlane(src->v, src->uv_stride, - dst->v, dst->uv_stride, HALVE(dst->width), HALVE(dst->height)); - if (dst->a != NULL) { - CopyPlane(src->a, src->a_stride, - dst->a, dst->a_stride, dst->width, dst->height); - } -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (dst->u0 != NULL) { - int uv0_width = src->width; - if (IS_YUV_CSP(dst->colorspace, WEBP_YUV422)) { - uv0_width = HALVE(uv0_width); - } - CopyPlane(src->u0, src->uv0_stride, - dst->u0, dst->uv0_stride, uv0_width, dst->height); - CopyPlane(src->v0, src->uv0_stride, - dst->v0, dst->uv0_stride, uv0_width, dst->height); - } -#endif - } else { - CopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride, - (uint8_t*)dst->argb, 4 * dst->argb_stride, - 4 * dst->width, dst->height); - } - return 1; -} + WebPSafeFree(picture->memory_); + WebPPictureResetBufferYUVA(picture); -int WebPPictureIsView(const WebPPicture* picture) { - if (picture == NULL) return 0; - if (picture->use_argb) { - return (picture->memory_argb_ == NULL); + if (uv_csp != WEBP_YUV420) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); } - return (picture->memory_ == NULL); -} -int WebPPictureView(const WebPPicture* src, - int left, int top, int width, int height, - WebPPicture* dst) { - if (src == NULL || dst == NULL) return 0; + // alpha + a_width = has_alpha ? width : 0; + a_stride = a_width; + y_size = (uint64_t)y_stride * height; + uv_size = (uint64_t)uv_stride * uv_height; + a_size = (uint64_t)a_stride * height; - // verify rectangle position. - if (!AdjustAndCheckRectangle(src, &left, &top, width, height)) return 0; + total_size = y_size + a_size + 2 * uv_size; - if (src != dst) { // beware of aliasing! We don't want to leak 'memory_'. - WebPPictureGrabSpecs(src, dst); + // Security and validation checks + if (width <= 0 || height <= 0 || // luma/alpha param error + uv_width < 0 || uv_height < 0) { // u/v param error + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } - dst->width = width; - dst->height = height; - if (!src->use_argb) { - dst->y = src->y + top * src->y_stride + left; - dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1); - dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1); - dst->y_stride = src->y_stride; - dst->uv_stride = src->uv_stride; - if (src->a != NULL) { - dst->a = src->a + top * src->a_stride + left; - dst->a_stride = src->a_stride; - } -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (src->u0 != NULL) { - const int left_pos = - IS_YUV_CSP(dst->colorspace, WEBP_YUV422) ? (left >> 1) : left; - dst->u0 = src->u0 + top * src->uv0_stride + left_pos; - dst->v0 = src->v0 + top * src->uv0_stride + left_pos; - dst->uv0_stride = src->uv0_stride; - } -#endif - } else { - dst->argb = src->argb + top * src->argb_stride + left; - dst->argb_stride = src->argb_stride; + // allocate a new buffer. + mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem)); + if (mem == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } - return 1; -} - -//------------------------------------------------------------------------------ -// Picture cropping -int WebPPictureCrop(WebPPicture* pic, - int left, int top, int width, int height) { - WebPPicture tmp; + // From now on, we're in the clear, we can no longer fail... + picture->memory_ = (void*)mem; + picture->y_stride = y_stride; + picture->uv_stride = uv_stride; + picture->a_stride = a_stride; - if (pic == NULL) return 0; - if (!AdjustAndCheckRectangle(pic, &left, &top, width, height)) return 0; + // TODO(skal): we could align the y/u/v planes and adjust stride. + picture->y = mem; + mem += y_size; - WebPPictureGrabSpecs(pic, &tmp); - tmp.width = width; - tmp.height = height; - if (!WebPPictureAlloc(&tmp)) return 0; + picture->u = mem; + mem += uv_size; + picture->v = mem; + mem += uv_size; - if (!pic->use_argb) { - const int y_offset = top * pic->y_stride + left; - const int uv_offset = (top / 2) * pic->uv_stride + left / 2; - CopyPlane(pic->y + y_offset, pic->y_stride, - tmp.y, tmp.y_stride, width, height); - CopyPlane(pic->u + uv_offset, pic->uv_stride, - tmp.u, tmp.uv_stride, HALVE(width), HALVE(height)); - CopyPlane(pic->v + uv_offset, pic->uv_stride, - tmp.v, tmp.uv_stride, HALVE(width), HALVE(height)); - - if (tmp.a != NULL) { - const int a_offset = top * pic->a_stride + left; - CopyPlane(pic->a + a_offset, pic->a_stride, - tmp.a, tmp.a_stride, width, height); - } -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (tmp.u0 != NULL) { - int w = width; - int left_pos = left; - if (IS_YUV_CSP(tmp.colorspace, WEBP_YUV422)) { - w = HALVE(w); - left_pos = HALVE(left_pos); - } - CopyPlane(pic->u0 + top * pic->uv0_stride + left_pos, pic->uv0_stride, - tmp.u0, tmp.uv0_stride, w, height); - CopyPlane(pic->v0 + top * pic->uv0_stride + left_pos, pic->uv0_stride, - tmp.v0, tmp.uv0_stride, w, height); - } -#endif - } else { - const uint8_t* const src = - (const uint8_t*)(pic->argb + top * pic->argb_stride + left); - CopyPlane(src, pic->argb_stride * 4, - (uint8_t*)tmp.argb, tmp.argb_stride * 4, - width * 4, height); + if (a_size > 0) { + picture->a = mem; + mem += a_size; } - WebPPictureFree(pic); - *pic = tmp; + (void)mem; // makes the static analyzer happy return 1; } -//------------------------------------------------------------------------------ -// Simple picture rescaler - -static void RescalePlane(const uint8_t* src, - int src_width, int src_height, int src_stride, - uint8_t* dst, - int dst_width, int dst_height, int dst_stride, - int32_t* const work, - int num_channels) { - WebPRescaler rescaler; - int y = 0; - WebPRescalerInit(&rescaler, src_width, src_height, - dst, dst_width, dst_height, dst_stride, - num_channels, - src_width, dst_width, - src_height, dst_height, - work); - memset(work, 0, 2 * dst_width * num_channels * sizeof(*work)); - while (y < src_height) { - y += WebPRescalerImport(&rescaler, src_height - y, - src + y * src_stride, src_stride); - WebPRescalerExport(&rescaler); - } -} +int WebPPictureAlloc(WebPPicture* picture) { + if (picture != NULL) { + const int width = picture->width; + const int height = picture->height; -static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) { - uint32_t* ptr = pic->argb; - int y; - for (y = 0; y < pic->height; ++y) { - WebPMultARGBRow(ptr, pic->width, inverse); - ptr += pic->argb_stride; - } -} + WebPPictureFree(picture); // erase previous buffer -static void AlphaMultiplyY(WebPPicture* const pic, int inverse) { - const uint8_t* ptr_a = pic->a; - if (ptr_a != NULL) { - uint8_t* ptr_y = pic->y; - int y; - for (y = 0; y < pic->height; ++y) { - WebPMultRow(ptr_y, ptr_a, pic->width, inverse); - ptr_y += pic->y_stride; - ptr_a += pic->a_stride; + if (!picture->use_argb) { + return WebPPictureAllocYUVA(picture, width, height); + } else { + return WebPPictureAllocARGB(picture, width, height); } } + return 1; } -int WebPPictureRescale(WebPPicture* pic, int width, int height) { - WebPPicture tmp; - int prev_width, prev_height; - int32_t* work; - - if (pic == NULL) return 0; - prev_width = pic->width; - prev_height = pic->height; - // if width is unspecified, scale original proportionally to height ratio. - if (width == 0) { - width = (prev_width * height + prev_height / 2) / prev_height; - } - // if height is unspecified, scale original proportionally to width ratio. - if (height == 0) { - height = (prev_height * width + prev_width / 2) / prev_width; - } - // Check if the overall dimensions still make sense. - if (width <= 0 || height <= 0) return 0; - - WebPPictureGrabSpecs(pic, &tmp); - tmp.width = width; - tmp.height = height; - if (!WebPPictureAlloc(&tmp)) return 0; - - if (!pic->use_argb) { - work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); - if (work == NULL) { - WebPPictureFree(&tmp); - return 0; - } - // If present, we need to rescale alpha first (for AlphaMultiplyY). - if (pic->a != NULL) { - RescalePlane(pic->a, prev_width, prev_height, pic->a_stride, - tmp.a, width, height, tmp.a_stride, work, 1); - } - - // We take transparency into account on the luma plane only. That's not - // totally exact blending, but still is a good approximation. - AlphaMultiplyY(pic, 0); - RescalePlane(pic->y, prev_width, prev_height, pic->y_stride, - tmp.y, width, height, tmp.y_stride, work, 1); - AlphaMultiplyY(&tmp, 1); - - RescalePlane(pic->u, - HALVE(prev_width), HALVE(prev_height), pic->uv_stride, - tmp.u, - HALVE(width), HALVE(height), tmp.uv_stride, work, 1); - RescalePlane(pic->v, - HALVE(prev_width), HALVE(prev_height), pic->uv_stride, - tmp.v, - HALVE(width), HALVE(height), tmp.uv_stride, work, 1); - -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (tmp.u0 != NULL) { - const int s = IS_YUV_CSP(tmp.colorspace, WEBP_YUV422) ? 2 : 1; - RescalePlane( - pic->u0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride, - tmp.u0, (width + s / 2) / s, height, tmp.uv0_stride, work, 1); - RescalePlane( - pic->v0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride, - tmp.v0, (width + s / 2) / s, height, tmp.uv0_stride, work, 1); - } -#endif - } else { - work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); - if (work == NULL) { - WebPPictureFree(&tmp); - return 0; - } - // In order to correctly interpolate colors, we need to apply the alpha - // weighting first (black-matting), scale the RGB values, and remove - // the premultiplication afterward (while preserving the alpha channel). - AlphaMultiplyARGB(pic, 0); - RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height, - pic->argb_stride * 4, - (uint8_t*)tmp.argb, width, height, - tmp.argb_stride * 4, - work, 4); - AlphaMultiplyARGB(&tmp, 1); +void WebPPictureFree(WebPPicture* picture) { + if (picture != NULL) { + WebPSafeFree(picture->memory_); + WebPSafeFree(picture->memory_argb_); + WebPPictureResetBuffers(picture); } - WebPPictureFree(pic); - free(work); - *pic = tmp; - return 1; } //------------------------------------------------------------------------------ @@ -538,7 +202,7 @@ int WebPMemoryWrite(const uint8_t* data, size_t data_size, if (w->size > 0) { memcpy(new_mem, w->mem, w->size); } - free(w->mem); + WebPSafeFree(w->mem); w->mem = new_mem; // down-cast is ok, thanks to WebPSafeMalloc w->max_size = (size_t)next_max_size; @@ -550,713 +214,15 @@ int WebPMemoryWrite(const uint8_t* data, size_t data_size, return 1; } -//------------------------------------------------------------------------------ -// Detection of non-trivial transparency - -// Returns true if alpha[] has non-0xff values. -static int CheckNonOpaque(const uint8_t* alpha, int width, int height, - int x_step, int y_step) { - if (alpha == NULL) return 0; - while (height-- > 0) { - int x; - for (x = 0; x < width * x_step; x += x_step) { - if (alpha[x] != 0xff) return 1; // TODO(skal): check 4/8 bytes at a time. - } - alpha += y_step; - } - return 0; -} - -// Checking for the presence of non-opaque alpha. -int WebPPictureHasTransparency(const WebPPicture* picture) { - if (picture == NULL) return 0; - if (!picture->use_argb) { - return CheckNonOpaque(picture->a, picture->width, picture->height, - 1, picture->a_stride); - } else { - int x, y; - const uint32_t* argb = picture->argb; - if (argb == NULL) return 0; - for (y = 0; y < picture->height; ++y) { - for (x = 0; x < picture->width; ++x) { - if (argb[x] < 0xff000000u) return 1; // test any alpha values != 0xff - } - argb += picture->argb_stride; - } - } - return 0; -} - -//------------------------------------------------------------------------------ -// RGB -> YUV conversion - -static int RGBToY(int r, int g, int b, VP8Random* const rg) { - return VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX)); -} - -static int RGBToU(int r, int g, int b, VP8Random* const rg) { - return VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); -} - -static int RGBToV(int r, int g, int b, VP8Random* const rg) { - return VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); -} - -//------------------------------------------------------------------------------ - -#if defined(USE_GAMMA_COMPRESSION) - -// gamma-compensates loss of resolution during chroma subsampling -#define kGamma 0.80 -#define kGammaFix 12 // fixed-point precision for linear values -#define kGammaScale ((1 << kGammaFix) - 1) -#define kGammaTabFix 7 // fixed-point fractional bits precision -#define kGammaTabScale (1 << kGammaTabFix) -#define kGammaTabRounder (kGammaTabScale >> 1) -#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix)) - -static int kLinearToGammaTab[kGammaTabSize + 1]; -static uint16_t kGammaToLinearTab[256]; -static int kGammaTablesOk = 0; - -static void InitGammaTables(void) { - if (!kGammaTablesOk) { - int v; - const double scale = 1. / kGammaScale; - for (v = 0; v <= 255; ++v) { - kGammaToLinearTab[v] = - (uint16_t)(pow(v / 255., kGamma) * kGammaScale + .5); - } - for (v = 0; v <= kGammaTabSize; ++v) { - const double x = scale * (v << kGammaTabFix); - kLinearToGammaTab[v] = (int)(pow(x, 1. / kGamma) * 255. + .5); - } - kGammaTablesOk = 1; +void WebPMemoryWriterClear(WebPMemoryWriter* writer) { + if (writer != NULL) { + WebPSafeFree(writer->mem); + writer->mem = NULL; + writer->size = 0; + writer->max_size = 0; } } -static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { - return kGammaToLinearTab[v]; -} - -// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision -// U/V value, suitable for RGBToU/V calls. -static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { - const int v = base_value << shift; // final uplifted value - const int tab_pos = v >> (kGammaTabFix + 2); // integer part - const int x = v & ((kGammaTabScale << 2) - 1); // fractional part - const int v0 = kLinearToGammaTab[tab_pos]; - const int v1 = kLinearToGammaTab[tab_pos + 1]; - const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate - return (y + kGammaTabRounder) >> kGammaTabFix; // descale -} - -#else - -static void InitGammaTables(void) {} -static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; } -static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { - (void)shift; - return v; -} - -#endif // USE_GAMMA_COMPRESSION - -//------------------------------------------------------------------------------ - -#define SUM4(ptr) LinearToGamma( \ - GammaToLinear((ptr)[0]) + \ - GammaToLinear((ptr)[step]) + \ - GammaToLinear((ptr)[rgb_stride]) + \ - GammaToLinear((ptr)[rgb_stride + step]), 0) \ - -#define SUM2H(ptr) \ - LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[step]), 1) -#define SUM2V(ptr) \ - LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1) -#define SUM1(ptr) \ - LinearToGamma(GammaToLinear((ptr)[0]), 2) - -#define RGB_TO_UV(x, y, SUM) { \ - const int src = (2 * (step * (x) + (y) * rgb_stride)); \ - const int dst = (x) + (y) * picture->uv_stride; \ - const int r = SUM(r_ptr + src); \ - const int g = SUM(g_ptr + src); \ - const int b = SUM(b_ptr + src); \ - picture->u[dst] = RGBToU(r, g, b, &rg); \ - picture->v[dst] = RGBToV(r, g, b, &rg); \ -} - -#define RGB_TO_UV0(x_in, x_out, y, SUM) { \ - const int src = (step * (x_in) + (y) * rgb_stride); \ - const int dst = (x_out) + (y) * picture->uv0_stride; \ - const int r = SUM(r_ptr + src); \ - const int g = SUM(g_ptr + src); \ - const int b = SUM(b_ptr + src); \ - picture->u0[dst] = RGBToU(r, g, b, &rg); \ - picture->v0[dst] = RGBToV(r, g, b, &rg); \ -} - -static void MakeGray(WebPPicture* const picture) { - int y; - const int uv_width = HALVE(picture->width); - const int uv_height = HALVE(picture->height); - for (y = 0; y < uv_height; ++y) { - memset(picture->u + y * picture->uv_stride, 128, uv_width); - memset(picture->v + y * picture->uv_stride, 128, uv_width); - } -} - -static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, - const uint8_t* const g_ptr, - const uint8_t* const b_ptr, - const uint8_t* const a_ptr, - int step, // bytes per pixel - int rgb_stride, // bytes per scanline - float dithering, - WebPPicture* const picture) { - const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK; - int x, y; - const int width = picture->width; - const int height = picture->height; - const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride); - VP8Random rg; - - picture->colorspace = uv_csp; - picture->use_argb = 0; - if (has_alpha) { - picture->colorspace |= WEBP_CSP_ALPHA_BIT; - } - if (!WebPPictureAlloc(picture)) return 0; - - VP8InitRandom(&rg, dithering); - InitGammaTables(); - - // Import luma plane - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const int offset = step * x + y * rgb_stride; - picture->y[x + y * picture->y_stride] = - RGBToY(r_ptr[offset], g_ptr[offset], b_ptr[offset], &rg); - } - } - - // Downsample U/V plane - if (uv_csp != WEBP_YUV400) { - for (y = 0; y < (height >> 1); ++y) { - for (x = 0; x < (width >> 1); ++x) { - RGB_TO_UV(x, y, SUM4); - } - if (width & 1) { - RGB_TO_UV(x, y, SUM2V); - } - } - if (height & 1) { - for (x = 0; x < (width >> 1); ++x) { - RGB_TO_UV(x, y, SUM2H); - } - if (width & 1) { - RGB_TO_UV(x, y, SUM1); - } - } - -#ifdef WEBP_EXPERIMENTAL_FEATURES - // Store original U/V samples too - if (uv_csp == WEBP_YUV422) { - for (y = 0; y < height; ++y) { - for (x = 0; x < (width >> 1); ++x) { - RGB_TO_UV0(2 * x, x, y, SUM2H); - } - if (width & 1) { - RGB_TO_UV0(2 * x, x, y, SUM1); - } - } - } else if (uv_csp == WEBP_YUV444) { - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - RGB_TO_UV0(x, x, y, SUM1); - } - } - } -#endif - } else { - MakeGray(picture); - } - - if (has_alpha) { - assert(step >= 4); - assert(picture->a != NULL); - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - picture->a[x + y * picture->a_stride] = - a_ptr[step * x + y * rgb_stride]; - } - } - } - return 1; -} - -static int Import(WebPPicture* const picture, - const uint8_t* const rgb, int rgb_stride, - int step, int swap_rb, int import_alpha) { - const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0); - const uint8_t* const g_ptr = rgb + 1; - const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2); - const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL; - const int width = picture->width; - const int height = picture->height; - - if (!picture->use_argb) { - return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, - 0.f /* no dithering */, picture); - } - if (import_alpha) { - picture->colorspace |= WEBP_CSP_ALPHA_BIT; - } else { - picture->colorspace &= ~WEBP_CSP_ALPHA_BIT; - } - if (!WebPPictureAlloc(picture)) return 0; - - if (!import_alpha) { - int x, y; - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const int offset = step * x + y * rgb_stride; - const uint32_t argb = - MakeARGB32(r_ptr[offset], g_ptr[offset], b_ptr[offset]); - picture->argb[x + y * picture->argb_stride] = argb; - } - } - } else { - int x, y; - assert(step >= 4); - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const int offset = step * x + y * rgb_stride; - const uint32_t argb = ((uint32_t)a_ptr[offset] << 24) | - (r_ptr[offset] << 16) | - (g_ptr[offset] << 8) | - (b_ptr[offset]); - picture->argb[x + y * picture->argb_stride] = argb; - } - } - } - return 1; -} -#undef SUM4 -#undef SUM2V -#undef SUM2H -#undef SUM1 -#undef RGB_TO_UV - -int WebPPictureImportRGB(WebPPicture* picture, - const uint8_t* rgb, int rgb_stride) { - return Import(picture, rgb, rgb_stride, 3, 0, 0); -} - -int WebPPictureImportBGR(WebPPicture* picture, - const uint8_t* rgb, int rgb_stride) { - return Import(picture, rgb, rgb_stride, 3, 1, 0); -} - -int WebPPictureImportRGBA(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return Import(picture, rgba, rgba_stride, 4, 0, 1); -} - -int WebPPictureImportBGRA(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return Import(picture, rgba, rgba_stride, 4, 1, 1); -} - -int WebPPictureImportRGBX(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return Import(picture, rgba, rgba_stride, 4, 0, 0); -} - -int WebPPictureImportBGRX(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return Import(picture, rgba, rgba_stride, 4, 1, 0); -} - -//------------------------------------------------------------------------------ -// Automatic YUV <-> ARGB conversions. - -int WebPPictureYUVAToARGB(WebPPicture* picture) { - if (picture == NULL) return 0; - if (picture->y == NULL || picture->u == NULL || picture->v == NULL) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); - } - if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); - } - if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); - } - // Allocate a new argb buffer (discarding the previous one). - if (!PictureAllocARGB(picture)) return 0; - - // Convert - { - int y; - const int width = picture->width; - const int height = picture->height; - const int argb_stride = 4 * picture->argb_stride; - uint8_t* dst = (uint8_t*)picture->argb; - const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y; - WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST); - - // First row, with replicated top samples. - upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width); - cur_y += picture->y_stride; - dst += argb_stride; - // Center rows. - for (y = 1; y + 1 < height; y += 2) { - const uint8_t* const top_u = cur_u; - const uint8_t* const top_v = cur_v; - cur_u += picture->uv_stride; - cur_v += picture->uv_stride; - upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v, - dst, dst + argb_stride, width); - cur_y += 2 * picture->y_stride; - dst += 2 * argb_stride; - } - // Last row (if needed), with replicated bottom samples. - if (height > 1 && !(height & 1)) { - upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width); - } - // Insert alpha values if needed, in replacement for the default 0xff ones. - if (picture->colorspace & WEBP_CSP_ALPHA_BIT) { - for (y = 0; y < height; ++y) { - uint32_t* const argb_dst = picture->argb + y * picture->argb_stride; - const uint8_t* const src = picture->a + y * picture->a_stride; - int x; - for (x = 0; x < width; ++x) { - argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24); - } - } - } - } - return 1; -} - -int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace, - float dithering) { - if (picture == NULL) return 0; - if (picture->argb == NULL) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); - } else { - const uint8_t* const argb = (const uint8_t*)picture->argb; - const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1; - const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2; - const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3; - const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0; - // We work on a tmp copy of 'picture', because ImportYUVAFromRGBA() - // would be calling WebPPictureFree(picture) otherwise. - WebPPicture tmp = *picture; - PictureResetARGB(&tmp); // reset ARGB buffer so that it's not free()'d. - tmp.use_argb = 0; - tmp.colorspace = colorspace & WEBP_CSP_UV_MASK; - if (!ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, dithering, - &tmp)) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - // Copy back the YUV specs into 'picture'. - tmp.argb = picture->argb; - tmp.argb_stride = picture->argb_stride; - tmp.memory_argb_ = picture->memory_argb_; - *picture = tmp; - } - return 1; -} - -int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) { - return WebPPictureARGBToYUVADithered(picture, colorspace, 0.f); -} - -//------------------------------------------------------------------------------ -// Helper: clean up fully transparent area to help compressibility. - -#define SIZE 8 -#define SIZE2 (SIZE / 2) -static int is_transparent_area(const uint8_t* ptr, int stride, int size) { - int y, x; - for (y = 0; y < size; ++y) { - for (x = 0; x < size; ++x) { - if (ptr[x]) { - return 0; - } - } - ptr += stride; - } - return 1; -} - -static WEBP_INLINE void flatten(uint8_t* ptr, int v, int stride, int size) { - int y; - for (y = 0; y < size; ++y) { - memset(ptr, v, size); - ptr += stride; - } -} - -void WebPCleanupTransparentArea(WebPPicture* pic) { - int x, y, w, h; - const uint8_t* a_ptr; - int values[3] = { 0 }; - - if (pic == NULL) return; - - a_ptr = pic->a; - if (a_ptr == NULL) return; // nothing to do - - w = pic->width / SIZE; - h = pic->height / SIZE; - for (y = 0; y < h; ++y) { - int need_reset = 1; - for (x = 0; x < w; ++x) { - const int off_a = (y * pic->a_stride + x) * SIZE; - const int off_y = (y * pic->y_stride + x) * SIZE; - const int off_uv = (y * pic->uv_stride + x) * SIZE2; - if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) { - if (need_reset) { - values[0] = pic->y[off_y]; - values[1] = pic->u[off_uv]; - values[2] = pic->v[off_uv]; - need_reset = 0; - } - flatten(pic->y + off_y, values[0], pic->y_stride, SIZE); - flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2); - flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2); - } else { - need_reset = 1; - } - } - // ignore the left-overs on right/bottom - } -} - -#undef SIZE -#undef SIZE2 - -//------------------------------------------------------------------------------ -// Blend color and remove transparency info - -#define BLEND(V0, V1, ALPHA) \ - ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 16) -#define BLEND_10BIT(V0, V1, ALPHA) \ - ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 18) - -void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { - const int red = (background_rgb >> 16) & 0xff; - const int green = (background_rgb >> 8) & 0xff; - const int blue = (background_rgb >> 0) & 0xff; - VP8Random rg; - int x, y; - if (pic == NULL) return; - VP8InitRandom(&rg, 0.f); - if (!pic->use_argb) { - const int uv_width = (pic->width >> 1); // omit last pixel during u/v loop - const int Y0 = RGBToY(red, green, blue, &rg); - // VP8RGBToU/V expects the u/v values summed over four pixels - const int U0 = RGBToU(4 * red, 4 * green, 4 * blue, &rg); - const int V0 = RGBToV(4 * red, 4 * green, 4 * blue, &rg); - const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT; - if (!has_alpha || pic->a == NULL) return; // nothing to do - for (y = 0; y < pic->height; ++y) { - // Luma blending - uint8_t* const y_ptr = pic->y + y * pic->y_stride; - uint8_t* const a_ptr = pic->a + y * pic->a_stride; - for (x = 0; x < pic->width; ++x) { - const int alpha = a_ptr[x]; - if (alpha < 0xff) { - y_ptr[x] = BLEND(Y0, y_ptr[x], a_ptr[x]); - } - } - // Chroma blending every even line - if ((y & 1) == 0) { - uint8_t* const u = pic->u + (y >> 1) * pic->uv_stride; - uint8_t* const v = pic->v + (y >> 1) * pic->uv_stride; - uint8_t* const a_ptr2 = - (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride; - for (x = 0; x < uv_width; ++x) { - // Average four alpha values into a single blending weight. - // TODO(skal): might lead to visible contouring. Can we do better? - const int alpha = - a_ptr[2 * x + 0] + a_ptr[2 * x + 1] + - a_ptr2[2 * x + 0] + a_ptr2[2 * x + 1]; - u[x] = BLEND_10BIT(U0, u[x], alpha); - v[x] = BLEND_10BIT(V0, v[x], alpha); - } - if (pic->width & 1) { // rightmost pixel - const int alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]); - u[x] = BLEND_10BIT(U0, u[x], alpha); - v[x] = BLEND_10BIT(V0, v[x], alpha); - } - } - memset(a_ptr, 0xff, pic->width); - } - } else { - uint32_t* argb = pic->argb; - const uint32_t background = MakeARGB32(red, green, blue); - for (y = 0; y < pic->height; ++y) { - for (x = 0; x < pic->width; ++x) { - const int alpha = (argb[x] >> 24) & 0xff; - if (alpha != 0xff) { - if (alpha > 0) { - int r = (argb[x] >> 16) & 0xff; - int g = (argb[x] >> 8) & 0xff; - int b = (argb[x] >> 0) & 0xff; - r = BLEND(red, r, alpha); - g = BLEND(green, g, alpha); - b = BLEND(blue, b, alpha); - argb[x] = MakeARGB32(r, g, b); - } else { - argb[x] = background; - } - } - } - argb += pic->argb_stride; - } - } -} - -#undef BLEND -#undef BLEND_10BIT - -//------------------------------------------------------------------------------ -// local-min distortion -// -// For every pixel in the *reference* picture, we search for the local best -// match in the compressed image. This is not a symmetrical measure. - -// search radius. Shouldn't be too large. -#define RADIUS 2 - -static float AccumulateLSIM(const uint8_t* src, int src_stride, - const uint8_t* ref, int ref_stride, - int w, int h) { - int x, y; - double total_sse = 0.; - for (y = 0; y < h; ++y) { - const int y_0 = (y - RADIUS < 0) ? 0 : y - RADIUS; - const int y_1 = (y + RADIUS + 1 >= h) ? h : y + RADIUS + 1; - for (x = 0; x < w; ++x) { - const int x_0 = (x - RADIUS < 0) ? 0 : x - RADIUS; - const int x_1 = (x + RADIUS + 1 >= w) ? w : x + RADIUS + 1; - double best_sse = 255. * 255.; - const double value = (double)ref[y * ref_stride + x]; - int i, j; - for (j = y_0; j < y_1; ++j) { - const uint8_t* s = src + j * src_stride; - for (i = x_0; i < x_1; ++i) { - const double sse = (double)(s[i] - value) * (s[i] - value); - if (sse < best_sse) best_sse = sse; - } - } - total_sse += best_sse; - } - } - return (float)total_sse; -} -#undef RADIUS - -//------------------------------------------------------------------------------ -// Distortion - -// Max value returned in case of exact similarity. -static const double kMinDistortion_dB = 99.; -static float GetPSNR(const double v) { - return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.)) - : kMinDistortion_dB); -} - -int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, - int type, float result[5]) { - DistoStats stats[5]; - int has_alpha; - int uv_w, uv_h; - - if (src == NULL || ref == NULL || - src->width != ref->width || src->height != ref->height || - src->y == NULL || ref->y == NULL || - src->u == NULL || ref->u == NULL || - src->v == NULL || ref->v == NULL || - result == NULL) { - return 0; - } - // TODO(skal): provide distortion for ARGB too. - if (src->use_argb == 1 || src->use_argb != ref->use_argb) { - return 0; - } - - has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT); - if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) || - (has_alpha && (src->a == NULL || ref->a == NULL))) { - return 0; - } - - memset(stats, 0, sizeof(stats)); - - uv_w = HALVE(src->width); - uv_h = HALVE(src->height); - if (type >= 2) { - float sse[4]; - sse[0] = AccumulateLSIM(src->y, src->y_stride, - ref->y, ref->y_stride, src->width, src->height); - sse[1] = AccumulateLSIM(src->u, src->uv_stride, - ref->u, ref->uv_stride, uv_w, uv_h); - sse[2] = AccumulateLSIM(src->v, src->uv_stride, - ref->v, ref->uv_stride, uv_w, uv_h); - sse[3] = has_alpha ? AccumulateLSIM(src->a, src->a_stride, - ref->a, ref->a_stride, - src->width, src->height) - : 0.f; - result[0] = GetPSNR(sse[0] / (src->width * src->height)); - result[1] = GetPSNR(sse[1] / (uv_w * uv_h)); - result[2] = GetPSNR(sse[2] / (uv_w * uv_h)); - result[3] = GetPSNR(sse[3] / (src->width * src->height)); - { - double total_sse = sse[0] + sse[1] + sse[2]; - int total_pixels = src->width * src->height + 2 * uv_w * uv_h; - if (has_alpha) { - total_pixels += src->width * src->height; - total_sse += sse[3]; - } - result[4] = GetPSNR(total_sse / total_pixels); - } - } else { - int c; - VP8SSIMAccumulatePlane(src->y, src->y_stride, - ref->y, ref->y_stride, - src->width, src->height, &stats[0]); - VP8SSIMAccumulatePlane(src->u, src->uv_stride, - ref->u, ref->uv_stride, - uv_w, uv_h, &stats[1]); - VP8SSIMAccumulatePlane(src->v, src->uv_stride, - ref->v, ref->uv_stride, - uv_w, uv_h, &stats[2]); - if (has_alpha) { - VP8SSIMAccumulatePlane(src->a, src->a_stride, - ref->a, ref->a_stride, - src->width, src->height, &stats[3]); - } - for (c = 0; c <= 4; ++c) { - if (type == 1) { - const double v = VP8SSIMGet(&stats[c]); - result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v) - : kMinDistortion_dB); - } else { - const double v = VP8SSIMGetSquaredError(&stats[c]); - result[c] = GetPSNR(v); - } - // Accumulate forward - if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]); - } - } - return 1; -} - //------------------------------------------------------------------------------ // Simplest high-level calls: @@ -1286,7 +252,7 @@ static size_t Encode(const uint8_t* rgba, int width, int height, int stride, ok = import(&pic, rgba, stride) && WebPEncode(&config, &pic); WebPPictureFree(&pic); if (!ok) { - free(wrt.mem); + WebPMemoryWriterClear(&wrt); *output = NULL; return 0; } @@ -1321,4 +287,3 @@ LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA) #undef LOSSLESS_ENCODE_FUNC //------------------------------------------------------------------------------ - diff --git a/src/3rdparty/libwebp/src/enc/picture_csp.c b/src/3rdparty/libwebp/src/enc/picture_csp.c new file mode 100644 index 0000000..7875f62 --- /dev/null +++ b/src/3rdparty/libwebp/src/enc/picture_csp.c @@ -0,0 +1,1114 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebPPicture utils for colorspace conversion +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <stdlib.h> +#include <math.h> + +#include "./vp8enci.h" +#include "../utils/random.h" +#include "../utils/utils.h" +#include "../dsp/yuv.h" + +// Uncomment to disable gamma-compression during RGB->U/V averaging +#define USE_GAMMA_COMPRESSION + +// If defined, use table to compute x / alpha. +#define USE_INVERSE_ALPHA_TABLE + +static const union { + uint32_t argb; + uint8_t bytes[4]; +} test_endian = { 0xff000000u }; +#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff) + +static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) { + return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b); +} + +//------------------------------------------------------------------------------ +// Detection of non-trivial transparency + +// Returns true if alpha[] has non-0xff values. +static int CheckNonOpaque(const uint8_t* alpha, int width, int height, + int x_step, int y_step) { + if (alpha == NULL) return 0; + while (height-- > 0) { + int x; + for (x = 0; x < width * x_step; x += x_step) { + if (alpha[x] != 0xff) return 1; // TODO(skal): check 4/8 bytes at a time. + } + alpha += y_step; + } + return 0; +} + +// Checking for the presence of non-opaque alpha. +int WebPPictureHasTransparency(const WebPPicture* picture) { + if (picture == NULL) return 0; + if (!picture->use_argb) { + return CheckNonOpaque(picture->a, picture->width, picture->height, + 1, picture->a_stride); + } else { + int x, y; + const uint32_t* argb = picture->argb; + if (argb == NULL) return 0; + for (y = 0; y < picture->height; ++y) { + for (x = 0; x < picture->width; ++x) { + if (argb[x] < 0xff000000u) return 1; // test any alpha values != 0xff + } + argb += picture->argb_stride; + } + } + return 0; +} + +//------------------------------------------------------------------------------ +// Code for gamma correction + +#if defined(USE_GAMMA_COMPRESSION) + +// gamma-compensates loss of resolution during chroma subsampling +#define kGamma 0.80 // for now we use a different gamma value than kGammaF +#define kGammaFix 12 // fixed-point precision for linear values +#define kGammaScale ((1 << kGammaFix) - 1) +#define kGammaTabFix 7 // fixed-point fractional bits precision +#define kGammaTabScale (1 << kGammaTabFix) +#define kGammaTabRounder (kGammaTabScale >> 1) +#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix)) + +static int kLinearToGammaTab[kGammaTabSize + 1]; +static uint16_t kGammaToLinearTab[256]; +static int kGammaTablesOk = 0; + +static void InitGammaTables(void) { + if (!kGammaTablesOk) { + int v; + const double scale = (double)(1 << kGammaTabFix) / kGammaScale; + const double norm = 1. / 255.; + for (v = 0; v <= 255; ++v) { + kGammaToLinearTab[v] = + (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5); + } + for (v = 0; v <= kGammaTabSize; ++v) { + kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5); + } + kGammaTablesOk = 1; + } +} + +static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { + return kGammaToLinearTab[v]; +} + +static WEBP_INLINE int Interpolate(int v) { + const int tab_pos = v >> (kGammaTabFix + 2); // integer part + const int x = v & ((kGammaTabScale << 2) - 1); // fractional part + const int v0 = kLinearToGammaTab[tab_pos]; + const int v1 = kLinearToGammaTab[tab_pos + 1]; + const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate + assert(tab_pos + 1 < kGammaTabSize + 1); + return y; +} + +// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision +// U/V value, suitable for RGBToU/V calls. +static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { + const int y = Interpolate(base_value << shift); // final uplifted value + return (y + kGammaTabRounder) >> kGammaTabFix; // descale +} + +#else + +static void InitGammaTables(void) {} +static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; } +static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { + return (int)(base_value << shift); +} + +#endif // USE_GAMMA_COMPRESSION + +//------------------------------------------------------------------------------ +// RGB -> YUV conversion + +static int RGBToY(int r, int g, int b, VP8Random* const rg) { + return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF) + : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX)); +} + +static int RGBToU(int r, int g, int b, VP8Random* const rg) { + return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2) + : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); +} + +static int RGBToV(int r, int g, int b, VP8Random* const rg) { + return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2) + : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); +} + +//------------------------------------------------------------------------------ +// Smart RGB->YUV conversion + +static const int kNumIterations = 6; +static const int kMinDimensionIterativeConversion = 4; + +// We use a-priori a different precision for storing RGB and Y/W components +// We could use YFIX=0 and only uint8_t for fixed_y_t, but it produces some +// banding sometimes. Better use extra precision. +// TODO(skal): cleanup once TFIX/YFIX values are fixed. + +typedef int16_t fixed_t; // signed type with extra TFIX precision for UV +typedef uint16_t fixed_y_t; // unsigned type with extra YFIX precision for W +#define TFIX 6 // fixed-point precision of RGB +#define YFIX 2 // fixed point precision for Y/W + +#define THALF ((1 << TFIX) >> 1) +#define MAX_Y_T ((256 << YFIX) - 1) +#define TROUNDER (1 << (YUV_FIX + TFIX - 1)) + +#if defined(USE_GAMMA_COMPRESSION) + +// float variant of gamma-correction +// We use tables of different size and precision, along with a 'real-world' +// Gamma value close to ~2. +#define kGammaF 2.2 +static float kGammaToLinearTabF[MAX_Y_T + 1]; // size scales with Y_FIX +static float kLinearToGammaTabF[kGammaTabSize + 2]; +static int kGammaTablesFOk = 0; + +static void InitGammaTablesF(void) { + if (!kGammaTablesFOk) { + int v; + const double norm = 1. / MAX_Y_T; + const double scale = 1. / kGammaTabSize; + for (v = 0; v <= MAX_Y_T; ++v) { + kGammaToLinearTabF[v] = (float)pow(norm * v, kGammaF); + } + for (v = 0; v <= kGammaTabSize; ++v) { + kLinearToGammaTabF[v] = (float)(MAX_Y_T * pow(scale * v, 1. / kGammaF)); + } + // to prevent small rounding errors to cause read-overflow: + kLinearToGammaTabF[kGammaTabSize + 1] = kLinearToGammaTabF[kGammaTabSize]; + kGammaTablesFOk = 1; + } +} + +static WEBP_INLINE float GammaToLinearF(int v) { + return kGammaToLinearTabF[v]; +} + +static WEBP_INLINE float LinearToGammaF(float value) { + const float v = value * kGammaTabSize; + const int tab_pos = (int)v; + const float x = v - (float)tab_pos; // fractional part + const float v0 = kLinearToGammaTabF[tab_pos + 0]; + const float v1 = kLinearToGammaTabF[tab_pos + 1]; + const float y = v1 * x + v0 * (1.f - x); // interpolate + return y; +} + +#else + +static void InitGammaTablesF(void) {} +static WEBP_INLINE float GammaToLinearF(int v) { + const float norm = 1.f / MAX_Y_T; + return norm * v; +} +static WEBP_INLINE float LinearToGammaF(float value) { + return MAX_Y_T * value; +} + +#endif // USE_GAMMA_COMPRESSION + +//------------------------------------------------------------------------------ + +// precision: YFIX -> TFIX +static WEBP_INLINE int FixedYToW(int v) { +#if TFIX == YFIX + return v; +#elif TFIX >= YFIX + return v << (TFIX - YFIX); +#else + return v >> (YFIX - TFIX); +#endif +} + +static WEBP_INLINE int FixedWToY(int v) { +#if TFIX == YFIX + return v; +#elif YFIX >= TFIX + return v << (YFIX - TFIX); +#else + return v >> (TFIX - YFIX); +#endif +} + +static uint8_t clip_8b(fixed_t v) { + return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; +} + +static fixed_y_t clip_y(int y) { + return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T; +} + +// precision: TFIX -> YFIX +static fixed_y_t clip_fixed_t(fixed_t v) { + const int y = FixedWToY(v); + const fixed_y_t w = clip_y(y); + return w; +} + +//------------------------------------------------------------------------------ + +static int RGBToGray(int r, int g, int b) { + const int luma = 19595 * r + 38470 * g + 7471 * b + YUV_HALF; + return (luma >> YUV_FIX); +} + +static float RGBToGrayF(float r, float g, float b) { + return 0.299f * r + 0.587f * g + 0.114f * b; +} + +static float ScaleDown(int a, int b, int c, int d) { + const float A = GammaToLinearF(a); + const float B = GammaToLinearF(b); + const float C = GammaToLinearF(c); + const float D = GammaToLinearF(d); + return LinearToGammaF(0.25f * (A + B + C + D)); +} + +static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int len) { + while (len-- > 0) { + const float R = GammaToLinearF(src[0]); + const float G = GammaToLinearF(src[1]); + const float B = GammaToLinearF(src[2]); + const float Y = RGBToGrayF(R, G, B); + *dst++ = (fixed_y_t)(LinearToGammaF(Y) + .5); + src += 3; + } +} + +static WEBP_INLINE void UpdateChroma(const fixed_y_t* src1, + const fixed_y_t* src2, + fixed_t* dst, fixed_y_t* tmp, int len) { + while (len--> 0) { + const float r = ScaleDown(src1[0], src1[3], src2[0], src2[3]); + const float g = ScaleDown(src1[1], src1[4], src2[1], src2[4]); + const float b = ScaleDown(src1[2], src1[5], src2[2], src2[5]); + const float W = RGBToGrayF(r, g, b); + dst[0] = (fixed_t)FixedYToW((int)(r - W)); + dst[1] = (fixed_t)FixedYToW((int)(g - W)); + dst[2] = (fixed_t)FixedYToW((int)(b - W)); + dst += 3; + src1 += 6; + src2 += 6; + if (tmp != NULL) { + tmp[0] = tmp[1] = clip_y((int)(W + .5)); + tmp += 2; + } + } +} + +//------------------------------------------------------------------------------ + +static WEBP_INLINE int Filter(const fixed_t* const A, const fixed_t* const B, + int rightwise) { + int v; + if (!rightwise) { + v = (A[0] * 9 + A[-3] * 3 + B[0] * 3 + B[-3]); + } else { + v = (A[0] * 9 + A[+3] * 3 + B[0] * 3 + B[+3]); + } + return (v + 8) >> 4; +} + +static WEBP_INLINE int Filter2(int A, int B) { return (A * 3 + B + 2) >> 2; } + +//------------------------------------------------------------------------------ + +// 8bit -> YFIX +static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { + return ((fixed_y_t)a << YFIX) | (1 << (YFIX - 1)); +} + +static void ImportOneRow(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, + int pic_width, + fixed_y_t* const dst) { + int i; + for (i = 0; i < pic_width; ++i) { + const int off = i * step; + dst[3 * i + 0] = UpLift(r_ptr[off]); + dst[3 * i + 1] = UpLift(g_ptr[off]); + dst[3 * i + 2] = UpLift(b_ptr[off]); + } + if (pic_width & 1) { // replicate rightmost pixel + memcpy(dst + 3 * pic_width, dst + 3 * (pic_width - 1), 3 * sizeof(*dst)); + } +} + +static void InterpolateTwoRows(const fixed_y_t* const best_y, + const fixed_t* const prev_uv, + const fixed_t* const cur_uv, + const fixed_t* const next_uv, + int w, + fixed_y_t* const out1, + fixed_y_t* const out2) { + int i, k; + { // special boundary case for i==0 + const int W0 = FixedYToW(best_y[0]); + const int W1 = FixedYToW(best_y[w]); + for (k = 0; k <= 2; ++k) { + out1[k] = clip_fixed_t(Filter2(cur_uv[k], prev_uv[k]) + W0); + out2[k] = clip_fixed_t(Filter2(cur_uv[k], next_uv[k]) + W1); + } + } + for (i = 1; i < w - 1; ++i) { + const int W0 = FixedYToW(best_y[i + 0]); + const int W1 = FixedYToW(best_y[i + w]); + const int off = 3 * (i >> 1); + for (k = 0; k <= 2; ++k) { + const int tmp0 = Filter(cur_uv + off + k, prev_uv + off + k, i & 1); + const int tmp1 = Filter(cur_uv + off + k, next_uv + off + k, i & 1); + out1[3 * i + k] = clip_fixed_t(tmp0 + W0); + out2[3 * i + k] = clip_fixed_t(tmp1 + W1); + } + } + { // special boundary case for i == w - 1 + const int W0 = FixedYToW(best_y[i + 0]); + const int W1 = FixedYToW(best_y[i + w]); + const int off = 3 * (i >> 1); + for (k = 0; k <= 2; ++k) { + out1[3 * i + k] = + clip_fixed_t(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0); + out2[3 * i + k] = + clip_fixed_t(Filter2(cur_uv[off + k], next_uv[off + k]) + W1); + } + } +} + +static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) { + const int luma = 16839 * r + 33059 * g + 6420 * b + TROUNDER; + return clip_8b(16 + (luma >> (YUV_FIX + TFIX))); +} + +static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) { + const int u = -9719 * r - 19081 * g + 28800 * b + TROUNDER; + return clip_8b(128 + (u >> (YUV_FIX + TFIX))); +} + +static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) { + const int v = +28800 * r - 24116 * g - 4684 * b + TROUNDER; + return clip_8b(128 + (v >> (YUV_FIX + TFIX))); +} + +static int ConvertWRGBToYUV(const fixed_y_t* const best_y, + const fixed_t* const best_uv, + WebPPicture* const picture) { + int i, j; + const int w = (picture->width + 1) & ~1; + const int h = (picture->height + 1) & ~1; + const int uv_w = w >> 1; + const int uv_h = h >> 1; + for (j = 0; j < picture->height; ++j) { + for (i = 0; i < picture->width; ++i) { + const int off = 3 * ((i >> 1) + (j >> 1) * uv_w); + const int off2 = i + j * picture->y_stride; + const int W = FixedYToW(best_y[i + j * w]); + const int r = best_uv[off + 0] + W; + const int g = best_uv[off + 1] + W; + const int b = best_uv[off + 2] + W; + picture->y[off2] = ConvertRGBToY(r, g, b); + } + } + for (j = 0; j < uv_h; ++j) { + uint8_t* const dst_u = picture->u + j * picture->uv_stride; + uint8_t* const dst_v = picture->v + j * picture->uv_stride; + for (i = 0; i < uv_w; ++i) { + const int off = 3 * (i + j * uv_w); + const int r = best_uv[off + 0]; + const int g = best_uv[off + 1]; + const int b = best_uv[off + 2]; + dst_u[i] = ConvertRGBToU(r, g, b); + dst_v[i] = ConvertRGBToV(r, g, b); + } + } + return 1; +} + +//------------------------------------------------------------------------------ +// Main function + +#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T))) + +static int PreprocessARGB(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, int rgb_stride, + WebPPicture* const picture) { + // we expand the right/bottom border if needed + const int w = (picture->width + 1) & ~1; + const int h = (picture->height + 1) & ~1; + const int uv_w = w >> 1; + const int uv_h = h >> 1; + int i, j, iter; + + // TODO(skal): allocate one big memory chunk. But for now, it's easier + // for valgrind debugging to have several chunks. + fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch + fixed_y_t* const best_y = SAFE_ALLOC(w, h, fixed_y_t); + fixed_y_t* const target_y = SAFE_ALLOC(w, h, fixed_y_t); + fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); + fixed_t* const best_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); + fixed_t* const target_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); + fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); + int ok; + + if (best_y == NULL || best_uv == NULL || + target_y == NULL || target_uv == NULL || + best_rgb_y == NULL || best_rgb_uv == NULL || + tmp_buffer == NULL) { + ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto End; + } + assert(picture->width >= kMinDimensionIterativeConversion); + assert(picture->height >= kMinDimensionIterativeConversion); + + // Import RGB samples to W/RGB representation. + for (j = 0; j < picture->height; j += 2) { + const int is_last_row = (j == picture->height - 1); + fixed_y_t* const src1 = tmp_buffer; + fixed_y_t* const src2 = tmp_buffer + 3 * w; + const int off1 = j * rgb_stride; + const int off2 = off1 + rgb_stride; + const int uv_off = (j >> 1) * 3 * uv_w; + fixed_y_t* const dst_y = best_y + j * w; + + // prepare two rows of input + ImportOneRow(r_ptr + off1, g_ptr + off1, b_ptr + off1, + step, picture->width, src1); + if (!is_last_row) { + ImportOneRow(r_ptr + off2, g_ptr + off2, b_ptr + off2, + step, picture->width, src2); + } else { + memcpy(src2, src1, 3 * w * sizeof(*src2)); + } + UpdateW(src1, target_y + (j + 0) * w, w); + UpdateW(src2, target_y + (j + 1) * w, w); + UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w); + memcpy(best_uv + uv_off, target_uv + uv_off, 3 * uv_w * sizeof(*best_uv)); + memcpy(dst_y + w, dst_y, w * sizeof(*dst_y)); + } + + // Iterate and resolve clipping conflicts. + for (iter = 0; iter < kNumIterations; ++iter) { + int k; + const fixed_t* cur_uv = best_uv; + const fixed_t* prev_uv = best_uv; + for (j = 0; j < h; j += 2) { + fixed_y_t* const src1 = tmp_buffer; + fixed_y_t* const src2 = tmp_buffer + 3 * w; + + { + const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); + InterpolateTwoRows(best_y + j * w, prev_uv, cur_uv, next_uv, + w, src1, src2); + prev_uv = cur_uv; + cur_uv = next_uv; + } + + UpdateW(src1, best_rgb_y + 0 * w, w); + UpdateW(src2, best_rgb_y + 1 * w, w); + UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w); + + // update two rows of Y and one row of RGB + for (i = 0; i < 2 * w; ++i) { + const int off = i + j * w; + const int diff_y = target_y[off] - best_rgb_y[i]; + const int new_y = (int)best_y[off] + diff_y; + best_y[off] = clip_y(new_y); + } + for (i = 0; i < uv_w; ++i) { + const int off = 3 * (i + (j >> 1) * uv_w); + int W; + for (k = 0; k <= 2; ++k) { + const int diff_uv = (int)target_uv[off + k] - best_rgb_uv[3 * i + k]; + best_uv[off + k] += diff_uv; + } + W = RGBToGray(best_uv[off + 0], best_uv[off + 1], best_uv[off + 2]); + for (k = 0; k <= 2; ++k) { + best_uv[off + k] -= W; + } + } + } + // TODO(skal): add early-termination criterion + } + + // final reconstruction + ok = ConvertWRGBToYUV(best_y, best_uv, picture); + + End: + WebPSafeFree(best_y); + WebPSafeFree(best_uv); + WebPSafeFree(target_y); + WebPSafeFree(target_uv); + WebPSafeFree(best_rgb_y); + WebPSafeFree(best_rgb_uv); + WebPSafeFree(tmp_buffer); + return ok; +} +#undef SAFE_ALLOC + +//------------------------------------------------------------------------------ +// "Fast" regular RGB->YUV + +#define SUM4(ptr, step) LinearToGamma( \ + GammaToLinear((ptr)[0]) + \ + GammaToLinear((ptr)[(step)]) + \ + GammaToLinear((ptr)[rgb_stride]) + \ + GammaToLinear((ptr)[rgb_stride + (step)]), 0) \ + +#define SUM2(ptr) \ + LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1) + +#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride]) +#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4)) + +#if defined(USE_INVERSE_ALPHA_TABLE) + +static const int kAlphaFix = 19; +// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix +// formula is then equal to v / a in most (99.6%) cases. Note that this table +// and constant are adjusted very tightly to fit 32b arithmetic. +// In particular, they use the fact that the operands for 'v / a' are actually +// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3 +// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid +// overflow is: kGammaFix + kAlphaFix <= 31. +static const uint32_t kInvAlpha[4 * 0xff + 1] = { + 0, /* alpha = 0 */ + 524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536, + 58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768, + 30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845, + 20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384, + 15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107, + 12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922, + 10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362, + 9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192, + 8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281, + 7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553, + 6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957, + 5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461, + 5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041, + 4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681, + 4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369, + 4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096, + 4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855, + 3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640, + 3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449, + 3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276, + 3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120, + 3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978, + 2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849, + 2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730, + 2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621, + 2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520, + 2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427, + 2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340, + 2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259, + 2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184, + 2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114, + 2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048, + 2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985, + 1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927, + 1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872, + 1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820, + 1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771, + 1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724, + 1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680, + 1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638, + 1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598, + 1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560, + 1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524, + 1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489, + 1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456, + 1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424, + 1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394, + 1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365, + 1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337, + 1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310, + 1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285, + 1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260, + 1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236, + 1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213, + 1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191, + 1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170, + 1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149, + 1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129, + 1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110, + 1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092, + 1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074, + 1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057, + 1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040, + 1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024, + 1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008, + 1006, 1004, 1002, 1000, 998, 996, 994, 992, + 991, 989, 987, 985, 983, 981, 979, 978, + 976, 974, 972, 970, 969, 967, 965, 963, + 961, 960, 958, 956, 954, 953, 951, 949, + 948, 946, 944, 942, 941, 939, 937, 936, + 934, 932, 931, 929, 927, 926, 924, 923, + 921, 919, 918, 916, 914, 913, 911, 910, + 908, 907, 905, 903, 902, 900, 899, 897, + 896, 894, 893, 891, 890, 888, 887, 885, + 884, 882, 881, 879, 878, 876, 875, 873, + 872, 870, 869, 868, 866, 865, 863, 862, + 860, 859, 858, 856, 855, 853, 852, 851, + 849, 848, 846, 845, 844, 842, 841, 840, + 838, 837, 836, 834, 833, 832, 830, 829, + 828, 826, 825, 824, 823, 821, 820, 819, + 817, 816, 815, 814, 812, 811, 810, 809, + 807, 806, 805, 804, 802, 801, 800, 799, + 798, 796, 795, 794, 793, 791, 790, 789, + 788, 787, 786, 784, 783, 782, 781, 780, + 779, 777, 776, 775, 774, 773, 772, 771, + 769, 768, 767, 766, 765, 764, 763, 762, + 760, 759, 758, 757, 756, 755, 754, 753, + 752, 751, 750, 748, 747, 746, 745, 744, + 743, 742, 741, 740, 739, 738, 737, 736, + 735, 734, 733, 732, 731, 730, 729, 728, + 727, 726, 725, 724, 723, 722, 721, 720, + 719, 718, 717, 716, 715, 714, 713, 712, + 711, 710, 709, 708, 707, 706, 705, 704, + 703, 702, 701, 700, 699, 699, 698, 697, + 696, 695, 694, 693, 692, 691, 690, 689, + 688, 688, 687, 686, 685, 684, 683, 682, + 681, 680, 680, 679, 678, 677, 676, 675, + 674, 673, 673, 672, 671, 670, 669, 668, + 667, 667, 666, 665, 664, 663, 662, 661, + 661, 660, 659, 658, 657, 657, 656, 655, + 654, 653, 652, 652, 651, 650, 649, 648, + 648, 647, 646, 645, 644, 644, 643, 642, + 641, 640, 640, 639, 638, 637, 637, 636, + 635, 634, 633, 633, 632, 631, 630, 630, + 629, 628, 627, 627, 626, 625, 624, 624, + 623, 622, 621, 621, 620, 619, 618, 618, + 617, 616, 616, 615, 614, 613, 613, 612, + 611, 611, 610, 609, 608, 608, 607, 606, + 606, 605, 604, 604, 603, 602, 601, 601, + 600, 599, 599, 598, 597, 597, 596, 595, + 595, 594, 593, 593, 592, 591, 591, 590, + 589, 589, 588, 587, 587, 586, 585, 585, + 584, 583, 583, 582, 581, 581, 580, 579, + 579, 578, 578, 577, 576, 576, 575, 574, + 574, 573, 572, 572, 571, 571, 570, 569, + 569, 568, 568, 567, 566, 566, 565, 564, + 564, 563, 563, 562, 561, 561, 560, 560, + 559, 558, 558, 557, 557, 556, 555, 555, + 554, 554, 553, 553, 552, 551, 551, 550, + 550, 549, 548, 548, 547, 547, 546, 546, + 545, 544, 544, 543, 543, 542, 542, 541, + 541, 540, 539, 539, 538, 538, 537, 537, + 536, 536, 535, 534, 534, 533, 533, 532, + 532, 531, 531, 530, 530, 529, 529, 528, + 527, 527, 526, 526, 525, 525, 524, 524, + 523, 523, 522, 522, 521, 521, 520, 520, + 519, 519, 518, 518, 517, 517, 516, 516, + 515, 515, 514, 514 +}; + +// Note that LinearToGamma() expects the values to be premultiplied by 4, +// so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly. +#define DIVIDE_BY_ALPHA(sum, a) (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2)) + +#else + +#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a)) + +#endif // USE_INVERSE_ALPHA_TABLE + +static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src, + const uint8_t* a_ptr, + uint32_t total_a, int step, + int rgb_stride) { + const uint32_t sum = + a_ptr[0] * GammaToLinear(src[0]) + + a_ptr[step] * GammaToLinear(src[step]) + + a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) + + a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]); + assert(total_a > 0 && total_a <= 4 * 0xff); +#if defined(USE_INVERSE_ALPHA_TABLE) + assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32)); +#endif + return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0); +} + +static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, + uint8_t* const dst_y, + int width, + VP8Random* const rg) { + int i, j; + for (i = 0, j = 0; i < width; ++i, j += step) { + dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg); + } +} + +static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + const uint8_t* const a_ptr, + int rgb_stride, + uint8_t* const dst_u, + uint8_t* const dst_v, + int width, + VP8Random* const rg) { + int i, j; + // we loop over 2x2 blocks and produce one U/V value for each. + for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * sizeof(uint32_t)) { + const uint32_t a = SUM4ALPHA(a_ptr + j); + int r, g, b; + if (a == 4 * 0xff || a == 0) { + r = SUM4(r_ptr + j, 4); + g = SUM4(g_ptr + j, 4); + b = SUM4(b_ptr + j, 4); + } else { + r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride); + g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride); + b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride); + } + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } + if (width & 1) { + const uint32_t a = 2u * SUM2ALPHA(a_ptr + j); + int r, g, b; + if (a == 4 * 0xff || a == 0) { + r = SUM2(r_ptr + j); + g = SUM2(g_ptr + j); + b = SUM2(b_ptr + j); + } else { + r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride); + g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride); + b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride); + } + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } +} + +static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int step, int rgb_stride, + uint8_t* const dst_u, + uint8_t* const dst_v, + int width, + VP8Random* const rg) { + int i, j; + for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * step) { + const int r = SUM4(r_ptr + j, step); + const int g = SUM4(g_ptr + j, step); + const int b = SUM4(b_ptr + j, step); + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } + if (width & 1) { + const int r = SUM2(r_ptr + j); + const int g = SUM2(g_ptr + j); + const int b = SUM2(b_ptr + j); + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } +} + +static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + const uint8_t* const a_ptr, + int step, // bytes per pixel + int rgb_stride, // bytes per scanline + float dithering, + int use_iterative_conversion, + WebPPicture* const picture) { + int y; + const int width = picture->width; + const int height = picture->height; + const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride); + + picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420; + picture->use_argb = 0; + + // disable smart conversion if source is too small (overkill). + if (width < kMinDimensionIterativeConversion || + height < kMinDimensionIterativeConversion) { + use_iterative_conversion = 0; + } + + if (!WebPPictureAllocYUVA(picture, width, height)) { + return 0; + } + if (has_alpha) { + WebPInitAlphaProcessing(); + assert(step == 4); +#if defined(USE_INVERSE_ALPHA_TABLE) + assert(kAlphaFix + kGammaFix <= 31); +#endif + } + + if (use_iterative_conversion) { + InitGammaTablesF(); + if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) { + return 0; + } + if (has_alpha) { + WebPExtractAlpha(a_ptr, rgb_stride, width, height, + picture->a, picture->a_stride); + } + } else { + uint8_t* dst_y = picture->y; + uint8_t* dst_u = picture->u; + uint8_t* dst_v = picture->v; + uint8_t* dst_a = picture->a; + + VP8Random base_rg; + VP8Random* rg = NULL; + if (dithering > 0.) { + VP8InitRandom(&base_rg, dithering); + rg = &base_rg; + } + + InitGammaTables(); + + // Downsample Y/U/V planes, two rows at a time + for (y = 0; y < (height >> 1); ++y) { + int rows_have_alpha = has_alpha; + const int off1 = (2 * y + 0) * rgb_stride; + const int off2 = (2 * y + 1) * rgb_stride; + ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step, + dst_y, width, rg); + ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step, + dst_y + picture->y_stride, width, rg); + dst_y += 2 * picture->y_stride; + if (has_alpha) { + rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride, + width, 2, + dst_a, picture->a_stride); + dst_a += 2 * picture->a_stride; + } + if (!rows_have_alpha) { + ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1, + step, rgb_stride, dst_u, dst_v, width, rg); + } else { + ConvertRowsToUVWithAlpha(r_ptr + off1, g_ptr + off1, b_ptr + off1, + a_ptr + off1, rgb_stride, + dst_u, dst_v, width, rg); + } + dst_u += picture->uv_stride; + dst_v += picture->uv_stride; + } + if (height & 1) { // extra last row + const int off = 2 * y * rgb_stride; + int row_has_alpha = has_alpha; + ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step, + dst_y, width, rg); + if (row_has_alpha) { + row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0); + } + if (!row_has_alpha) { + ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off, + step, 0, dst_u, dst_v, width, rg); + } else { + ConvertRowsToUVWithAlpha(r_ptr + off, g_ptr + off, b_ptr + off, + a_ptr + off, 0, + dst_u, dst_v, width, rg); + } + } + } + return 1; +} + +#undef SUM4 +#undef SUM2 +#undef SUM4ALPHA +#undef SUM2ALPHA + +//------------------------------------------------------------------------------ +// call for ARGB->YUVA conversion + +static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace, + float dithering, int use_iterative_conversion) { + if (picture == NULL) return 0; + if (picture->argb == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); + } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); + } else { + const uint8_t* const argb = (const uint8_t*)picture->argb; + const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1; + const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2; + const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3; + const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0; + + picture->colorspace = WEBP_YUV420; + return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, + dithering, use_iterative_conversion, picture); + } +} + +int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace, + float dithering) { + return PictureARGBToYUVA(picture, colorspace, dithering, 0); +} + +int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) { + return PictureARGBToYUVA(picture, colorspace, 0.f, 0); +} + +#if WEBP_ENCODER_ABI_VERSION > 0x0204 +int WebPPictureSmartARGBToYUVA(WebPPicture* picture) { + return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1); +} +#endif + +//------------------------------------------------------------------------------ +// call for YUVA -> ARGB conversion + +int WebPPictureYUVAToARGB(WebPPicture* picture) { + if (picture == NULL) return 0; + if (picture->y == NULL || picture->u == NULL || picture->v == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); + } + if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); + } + if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); + } + // Allocate a new argb buffer (discarding the previous one). + if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0; + picture->use_argb = 1; + + // Convert + { + int y; + const int width = picture->width; + const int height = picture->height; + const int argb_stride = 4 * picture->argb_stride; + uint8_t* dst = (uint8_t*)picture->argb; + const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y; + WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST); + + // First row, with replicated top samples. + upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width); + cur_y += picture->y_stride; + dst += argb_stride; + // Center rows. + for (y = 1; y + 1 < height; y += 2) { + const uint8_t* const top_u = cur_u; + const uint8_t* const top_v = cur_v; + cur_u += picture->uv_stride; + cur_v += picture->uv_stride; + upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v, + dst, dst + argb_stride, width); + cur_y += 2 * picture->y_stride; + dst += 2 * argb_stride; + } + // Last row (if needed), with replicated bottom samples. + if (height > 1 && !(height & 1)) { + upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width); + } + // Insert alpha values if needed, in replacement for the default 0xff ones. + if (picture->colorspace & WEBP_CSP_ALPHA_BIT) { + for (y = 0; y < height; ++y) { + uint32_t* const argb_dst = picture->argb + y * picture->argb_stride; + const uint8_t* const src = picture->a + y * picture->a_stride; + int x; + for (x = 0; x < width; ++x) { + argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24); + } + } + } + } + return 1; +} + +//------------------------------------------------------------------------------ +// automatic import / conversion + +static int Import(WebPPicture* const picture, + const uint8_t* const rgb, int rgb_stride, + int step, int swap_rb, int import_alpha) { + int y; + const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0); + const uint8_t* const g_ptr = rgb + 1; + const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2); + const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL; + const int width = picture->width; + const int height = picture->height; + + if (!picture->use_argb) { + return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, + 0.f /* no dithering */, 0, picture); + } + if (!WebPPictureAlloc(picture)) return 0; + + assert(step >= (import_alpha ? 4 : 3)); + for (y = 0; y < height; ++y) { + uint32_t* const dst = &picture->argb[y * picture->argb_stride]; + int x; + for (x = 0; x < width; ++x) { + const int offset = step * x + y * rgb_stride; + dst[x] = MakeARGB32(import_alpha ? a_ptr[offset] : 0xff, + r_ptr[offset], g_ptr[offset], b_ptr[offset]); + } + } + return 1; +} + +// Public API + +int WebPPictureImportRGB(WebPPicture* picture, + const uint8_t* rgb, int rgb_stride) { + return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 0, 0) : 0; +} + +int WebPPictureImportBGR(WebPPicture* picture, + const uint8_t* rgb, int rgb_stride) { + return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 1, 0) : 0; +} + +int WebPPictureImportRGBA(WebPPicture* picture, + const uint8_t* rgba, int rgba_stride) { + return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 1) : 0; +} + +int WebPPictureImportBGRA(WebPPicture* picture, + const uint8_t* rgba, int rgba_stride) { + return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 1) : 0; +} + +int WebPPictureImportRGBX(WebPPicture* picture, + const uint8_t* rgba, int rgba_stride) { + return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 0) : 0; +} + +int WebPPictureImportBGRX(WebPPicture* picture, + const uint8_t* rgba, int rgba_stride) { + return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 0) : 0; +} + +//------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/picture_psnr.c b/src/3rdparty/libwebp/src/enc/picture_psnr.c new file mode 100644 index 0000000..2254b7e --- /dev/null +++ b/src/3rdparty/libwebp/src/enc/picture_psnr.c @@ -0,0 +1,150 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebPPicture tools for measuring distortion +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <math.h> + +#include "./vp8enci.h" + +//------------------------------------------------------------------------------ +// local-min distortion +// +// For every pixel in the *reference* picture, we search for the local best +// match in the compressed image. This is not a symmetrical measure. + +#define RADIUS 2 // search radius. Shouldn't be too large. + +static float AccumulateLSIM(const uint8_t* src, int src_stride, + const uint8_t* ref, int ref_stride, + int w, int h) { + int x, y; + double total_sse = 0.; + for (y = 0; y < h; ++y) { + const int y_0 = (y - RADIUS < 0) ? 0 : y - RADIUS; + const int y_1 = (y + RADIUS + 1 >= h) ? h : y + RADIUS + 1; + for (x = 0; x < w; ++x) { + const int x_0 = (x - RADIUS < 0) ? 0 : x - RADIUS; + const int x_1 = (x + RADIUS + 1 >= w) ? w : x + RADIUS + 1; + double best_sse = 255. * 255.; + const double value = (double)ref[y * ref_stride + x]; + int i, j; + for (j = y_0; j < y_1; ++j) { + const uint8_t* s = src + j * src_stride; + for (i = x_0; i < x_1; ++i) { + const double sse = (double)(s[i] - value) * (s[i] - value); + if (sse < best_sse) best_sse = sse; + } + } + total_sse += best_sse; + } + } + return (float)total_sse; +} +#undef RADIUS + +//------------------------------------------------------------------------------ +// Distortion + +// Max value returned in case of exact similarity. +static const double kMinDistortion_dB = 99.; +static float GetPSNR(const double v) { + return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.)) + : kMinDistortion_dB); +} + +int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, + int type, float result[5]) { + DistoStats stats[5]; + int has_alpha; + int uv_w, uv_h; + + if (src == NULL || ref == NULL || + src->width != ref->width || src->height != ref->height || + src->y == NULL || ref->y == NULL || + src->u == NULL || ref->u == NULL || + src->v == NULL || ref->v == NULL || + result == NULL) { + return 0; + } + // TODO(skal): provide distortion for ARGB too. + if (src->use_argb == 1 || src->use_argb != ref->use_argb) { + return 0; + } + + has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT); + if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) || + (has_alpha && (src->a == NULL || ref->a == NULL))) { + return 0; + } + + memset(stats, 0, sizeof(stats)); + + uv_w = (src->width + 1) >> 1; + uv_h = (src->height + 1) >> 1; + if (type >= 2) { + float sse[4]; + sse[0] = AccumulateLSIM(src->y, src->y_stride, + ref->y, ref->y_stride, src->width, src->height); + sse[1] = AccumulateLSIM(src->u, src->uv_stride, + ref->u, ref->uv_stride, uv_w, uv_h); + sse[2] = AccumulateLSIM(src->v, src->uv_stride, + ref->v, ref->uv_stride, uv_w, uv_h); + sse[3] = has_alpha ? AccumulateLSIM(src->a, src->a_stride, + ref->a, ref->a_stride, + src->width, src->height) + : 0.f; + result[0] = GetPSNR(sse[0] / (src->width * src->height)); + result[1] = GetPSNR(sse[1] / (uv_w * uv_h)); + result[2] = GetPSNR(sse[2] / (uv_w * uv_h)); + result[3] = GetPSNR(sse[3] / (src->width * src->height)); + { + double total_sse = sse[0] + sse[1] + sse[2]; + int total_pixels = src->width * src->height + 2 * uv_w * uv_h; + if (has_alpha) { + total_pixels += src->width * src->height; + total_sse += sse[3]; + } + result[4] = GetPSNR(total_sse / total_pixels); + } + } else { + int c; + VP8SSIMAccumulatePlane(src->y, src->y_stride, + ref->y, ref->y_stride, + src->width, src->height, &stats[0]); + VP8SSIMAccumulatePlane(src->u, src->uv_stride, + ref->u, ref->uv_stride, + uv_w, uv_h, &stats[1]); + VP8SSIMAccumulatePlane(src->v, src->uv_stride, + ref->v, ref->uv_stride, + uv_w, uv_h, &stats[2]); + if (has_alpha) { + VP8SSIMAccumulatePlane(src->a, src->a_stride, + ref->a, ref->a_stride, + src->width, src->height, &stats[3]); + } + for (c = 0; c <= 4; ++c) { + if (type == 1) { + const double v = VP8SSIMGet(&stats[c]); + result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v) + : kMinDistortion_dB); + } else { + const double v = VP8SSIMGetSquaredError(&stats[c]); + result[c] = GetPSNR(v); + } + // Accumulate forward + if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]); + } + } + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/picture_rescale.c b/src/3rdparty/libwebp/src/enc/picture_rescale.c new file mode 100644 index 0000000..de52848 --- /dev/null +++ b/src/3rdparty/libwebp/src/enc/picture_rescale.c @@ -0,0 +1,285 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebPPicture tools: copy, crop, rescaling and view. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include <assert.h> +#include <stdlib.h> + +#include "./vp8enci.h" +#include "../utils/rescaler.h" +#include "../utils/utils.h" + +#define HALVE(x) (((x) + 1) >> 1) + +// Grab the 'specs' (writer, *opaque, width, height...) from 'src' and copy them +// into 'dst'. Mark 'dst' as not owning any memory. +static void PictureGrabSpecs(const WebPPicture* const src, + WebPPicture* const dst) { + assert(src != NULL && dst != NULL); + *dst = *src; + WebPPictureResetBuffers(dst); +} + +//------------------------------------------------------------------------------ +// Picture copying + +static void CopyPlane(const uint8_t* src, int src_stride, + uint8_t* dst, int dst_stride, int width, int height) { + while (height-- > 0) { + memcpy(dst, src, width); + src += src_stride; + dst += dst_stride; + } +} + +// Adjust top-left corner to chroma sample position. +static void SnapTopLeftPosition(const WebPPicture* const pic, + int* const left, int* const top) { + if (!pic->use_argb) { + *left &= ~1; + *top &= ~1; + } +} + +// Adjust top-left corner and verify that the sub-rectangle is valid. +static int AdjustAndCheckRectangle(const WebPPicture* const pic, + int* const left, int* const top, + int width, int height) { + SnapTopLeftPosition(pic, left, top); + if ((*left) < 0 || (*top) < 0) return 0; + if (width <= 0 || height <= 0) return 0; + if ((*left) + width > pic->width) return 0; + if ((*top) + height > pic->height) return 0; + return 1; +} + +int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { + if (src == NULL || dst == NULL) return 0; + if (src == dst) return 1; + + PictureGrabSpecs(src, dst); + if (!WebPPictureAlloc(dst)) return 0; + + if (!src->use_argb) { + CopyPlane(src->y, src->y_stride, + dst->y, dst->y_stride, dst->width, dst->height); + CopyPlane(src->u, src->uv_stride, + dst->u, dst->uv_stride, HALVE(dst->width), HALVE(dst->height)); + CopyPlane(src->v, src->uv_stride, + dst->v, dst->uv_stride, HALVE(dst->width), HALVE(dst->height)); + if (dst->a != NULL) { + CopyPlane(src->a, src->a_stride, + dst->a, dst->a_stride, dst->width, dst->height); + } + } else { + CopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride, + (uint8_t*)dst->argb, 4 * dst->argb_stride, + 4 * dst->width, dst->height); + } + return 1; +} + +int WebPPictureIsView(const WebPPicture* picture) { + if (picture == NULL) return 0; + if (picture->use_argb) { + return (picture->memory_argb_ == NULL); + } + return (picture->memory_ == NULL); +} + +int WebPPictureView(const WebPPicture* src, + int left, int top, int width, int height, + WebPPicture* dst) { + if (src == NULL || dst == NULL) return 0; + + // verify rectangle position. + if (!AdjustAndCheckRectangle(src, &left, &top, width, height)) return 0; + + if (src != dst) { // beware of aliasing! We don't want to leak 'memory_'. + PictureGrabSpecs(src, dst); + } + dst->width = width; + dst->height = height; + if (!src->use_argb) { + dst->y = src->y + top * src->y_stride + left; + dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1); + dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1); + dst->y_stride = src->y_stride; + dst->uv_stride = src->uv_stride; + if (src->a != NULL) { + dst->a = src->a + top * src->a_stride + left; + dst->a_stride = src->a_stride; + } + } else { + dst->argb = src->argb + top * src->argb_stride + left; + dst->argb_stride = src->argb_stride; + } + return 1; +} + +//------------------------------------------------------------------------------ +// Picture cropping + +int WebPPictureCrop(WebPPicture* pic, + int left, int top, int width, int height) { + WebPPicture tmp; + + if (pic == NULL) return 0; + if (!AdjustAndCheckRectangle(pic, &left, &top, width, height)) return 0; + + PictureGrabSpecs(pic, &tmp); + tmp.width = width; + tmp.height = height; + if (!WebPPictureAlloc(&tmp)) return 0; + + if (!pic->use_argb) { + const int y_offset = top * pic->y_stride + left; + const int uv_offset = (top / 2) * pic->uv_stride + left / 2; + CopyPlane(pic->y + y_offset, pic->y_stride, + tmp.y, tmp.y_stride, width, height); + CopyPlane(pic->u + uv_offset, pic->uv_stride, + tmp.u, tmp.uv_stride, HALVE(width), HALVE(height)); + CopyPlane(pic->v + uv_offset, pic->uv_stride, + tmp.v, tmp.uv_stride, HALVE(width), HALVE(height)); + + if (tmp.a != NULL) { + const int a_offset = top * pic->a_stride + left; + CopyPlane(pic->a + a_offset, pic->a_stride, + tmp.a, tmp.a_stride, width, height); + } + } else { + const uint8_t* const src = + (const uint8_t*)(pic->argb + top * pic->argb_stride + left); + CopyPlane(src, pic->argb_stride * 4, + (uint8_t*)tmp.argb, tmp.argb_stride * 4, + width * 4, height); + } + WebPPictureFree(pic); + *pic = tmp; + return 1; +} + +//------------------------------------------------------------------------------ +// Simple picture rescaler + +static void RescalePlane(const uint8_t* src, + int src_width, int src_height, int src_stride, + uint8_t* dst, + int dst_width, int dst_height, int dst_stride, + int32_t* const work, + int num_channels) { + WebPRescaler rescaler; + int y = 0; + WebPRescalerInit(&rescaler, src_width, src_height, + dst, dst_width, dst_height, dst_stride, + num_channels, + src_width, dst_width, + src_height, dst_height, + work); + memset(work, 0, 2 * dst_width * num_channels * sizeof(*work)); + while (y < src_height) { + y += WebPRescalerImport(&rescaler, src_height - y, + src + y * src_stride, src_stride); + WebPRescalerExport(&rescaler); + } +} + +static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) { + assert(pic->argb != NULL); + WebPMultARGBRows((uint8_t*)pic->argb, pic->argb_stride * sizeof(*pic->argb), + pic->width, pic->height, inverse); +} + +static void AlphaMultiplyY(WebPPicture* const pic, int inverse) { + if (pic->a != NULL) { + WebPMultRows(pic->y, pic->y_stride, pic->a, pic->a_stride, + pic->width, pic->height, inverse); + } +} + +int WebPPictureRescale(WebPPicture* pic, int width, int height) { + WebPPicture tmp; + int prev_width, prev_height; + int32_t* work; + + if (pic == NULL) return 0; + prev_width = pic->width; + prev_height = pic->height; + // if width is unspecified, scale original proportionally to height ratio. + if (width == 0) { + width = (prev_width * height + prev_height / 2) / prev_height; + } + // if height is unspecified, scale original proportionally to width ratio. + if (height == 0) { + height = (prev_height * width + prev_width / 2) / prev_width; + } + // Check if the overall dimensions still make sense. + if (width <= 0 || height <= 0) return 0; + + PictureGrabSpecs(pic, &tmp); + tmp.width = width; + tmp.height = height; + if (!WebPPictureAlloc(&tmp)) return 0; + + if (!pic->use_argb) { + work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); + if (work == NULL) { + WebPPictureFree(&tmp); + return 0; + } + // If present, we need to rescale alpha first (for AlphaMultiplyY). + if (pic->a != NULL) { + WebPInitAlphaProcessing(); + RescalePlane(pic->a, prev_width, prev_height, pic->a_stride, + tmp.a, width, height, tmp.a_stride, work, 1); + } + + // We take transparency into account on the luma plane only. That's not + // totally exact blending, but still is a good approximation. + AlphaMultiplyY(pic, 0); + RescalePlane(pic->y, prev_width, prev_height, pic->y_stride, + tmp.y, width, height, tmp.y_stride, work, 1); + AlphaMultiplyY(&tmp, 1); + + RescalePlane(pic->u, + HALVE(prev_width), HALVE(prev_height), pic->uv_stride, + tmp.u, + HALVE(width), HALVE(height), tmp.uv_stride, work, 1); + RescalePlane(pic->v, + HALVE(prev_width), HALVE(prev_height), pic->uv_stride, + tmp.v, + HALVE(width), HALVE(height), tmp.uv_stride, work, 1); + } else { + work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); + if (work == NULL) { + WebPPictureFree(&tmp); + return 0; + } + // In order to correctly interpolate colors, we need to apply the alpha + // weighting first (black-matting), scale the RGB values, and remove + // the premultiplication afterward (while preserving the alpha channel). + WebPInitAlphaProcessing(); + AlphaMultiplyARGB(pic, 0); + RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height, + pic->argb_stride * 4, + (uint8_t*)tmp.argb, width, height, + tmp.argb_stride * 4, + work, 4); + AlphaMultiplyARGB(&tmp, 1); + } + WebPPictureFree(pic); + WebPSafeFree(work); + *pic = tmp; + return 1; +} + +//------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/picture_tools.c b/src/3rdparty/libwebp/src/enc/picture_tools.c new file mode 100644 index 0000000..7c73646 --- /dev/null +++ b/src/3rdparty/libwebp/src/enc/picture_tools.c @@ -0,0 +1,206 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// WebPPicture tools: alpha handling, etc. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./vp8enci.h" +#include "../dsp/yuv.h" + +static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) { + return (0xff000000u | (r << 16) | (g << 8) | b); +} + +//------------------------------------------------------------------------------ +// Helper: clean up fully transparent area to help compressibility. + +#define SIZE 8 +#define SIZE2 (SIZE / 2) +static int is_transparent_area(const uint8_t* ptr, int stride, int size) { + int y, x; + for (y = 0; y < size; ++y) { + for (x = 0; x < size; ++x) { + if (ptr[x]) { + return 0; + } + } + ptr += stride; + } + return 1; +} + +static int is_transparent_argb_area(const uint32_t* ptr, int stride, int size) { + int y, x; + for (y = 0; y < size; ++y) { + for (x = 0; x < size; ++x) { + if (ptr[x] & 0xff000000u) { + return 0; + } + } + ptr += stride; + } + return 1; +} + +static void flatten(uint8_t* ptr, int v, int stride, int size) { + int y; + for (y = 0; y < size; ++y) { + memset(ptr, v, size); + ptr += stride; + } +} + +static void flatten_argb(uint32_t* ptr, uint32_t v, int stride, int size) { + int x, y; + for (y = 0; y < size; ++y) { + for (x = 0; x < size; ++x) ptr[x] = v; + ptr += stride; + } +} + +void WebPCleanupTransparentArea(WebPPicture* pic) { + int x, y, w, h; + if (pic == NULL) return; + w = pic->width / SIZE; + h = pic->height / SIZE; + + // note: we ignore the left-overs on right/bottom + if (pic->use_argb) { + uint32_t argb_value = 0; + for (y = 0; y < h; ++y) { + int need_reset = 1; + for (x = 0; x < w; ++x) { + const int off = (y * pic->argb_stride + x) * SIZE; + if (is_transparent_argb_area(pic->argb + off, pic->argb_stride, SIZE)) { + if (need_reset) { + argb_value = pic->argb[off]; + need_reset = 0; + } + flatten_argb(pic->argb + off, argb_value, pic->argb_stride, SIZE); + } else { + need_reset = 1; + } + } + } + } else { + const uint8_t* const a_ptr = pic->a; + int values[3] = { 0 }; + if (a_ptr == NULL) return; // nothing to do + for (y = 0; y < h; ++y) { + int need_reset = 1; + for (x = 0; x < w; ++x) { + const int off_a = (y * pic->a_stride + x) * SIZE; + const int off_y = (y * pic->y_stride + x) * SIZE; + const int off_uv = (y * pic->uv_stride + x) * SIZE2; + if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) { + if (need_reset) { + values[0] = pic->y[off_y]; + values[1] = pic->u[off_uv]; + values[2] = pic->v[off_uv]; + need_reset = 0; + } + flatten(pic->y + off_y, values[0], pic->y_stride, SIZE); + flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2); + flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2); + } else { + need_reset = 1; + } + } + } + } +} + +#undef SIZE +#undef SIZE2 + +//------------------------------------------------------------------------------ +// Blend color and remove transparency info + +#define BLEND(V0, V1, ALPHA) \ + ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 16) +#define BLEND_10BIT(V0, V1, ALPHA) \ + ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 18) + +void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { + const int red = (background_rgb >> 16) & 0xff; + const int green = (background_rgb >> 8) & 0xff; + const int blue = (background_rgb >> 0) & 0xff; + int x, y; + if (pic == NULL) return; + if (!pic->use_argb) { + const int uv_width = (pic->width >> 1); // omit last pixel during u/v loop + const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF); + // VP8RGBToU/V expects the u/v values summed over four pixels + const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF); + const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF); + const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT; + if (!has_alpha || pic->a == NULL) return; // nothing to do + for (y = 0; y < pic->height; ++y) { + // Luma blending + uint8_t* const y_ptr = pic->y + y * pic->y_stride; + uint8_t* const a_ptr = pic->a + y * pic->a_stride; + for (x = 0; x < pic->width; ++x) { + const int alpha = a_ptr[x]; + if (alpha < 0xff) { + y_ptr[x] = BLEND(Y0, y_ptr[x], a_ptr[x]); + } + } + // Chroma blending every even line + if ((y & 1) == 0) { + uint8_t* const u = pic->u + (y >> 1) * pic->uv_stride; + uint8_t* const v = pic->v + (y >> 1) * pic->uv_stride; + uint8_t* const a_ptr2 = + (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride; + for (x = 0; x < uv_width; ++x) { + // Average four alpha values into a single blending weight. + // TODO(skal): might lead to visible contouring. Can we do better? + const int alpha = + a_ptr[2 * x + 0] + a_ptr[2 * x + 1] + + a_ptr2[2 * x + 0] + a_ptr2[2 * x + 1]; + u[x] = BLEND_10BIT(U0, u[x], alpha); + v[x] = BLEND_10BIT(V0, v[x], alpha); + } + if (pic->width & 1) { // rightmost pixel + const int alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]); + u[x] = BLEND_10BIT(U0, u[x], alpha); + v[x] = BLEND_10BIT(V0, v[x], alpha); + } + } + memset(a_ptr, 0xff, pic->width); + } + } else { + uint32_t* argb = pic->argb; + const uint32_t background = MakeARGB32(red, green, blue); + for (y = 0; y < pic->height; ++y) { + for (x = 0; x < pic->width; ++x) { + const int alpha = (argb[x] >> 24) & 0xff; + if (alpha != 0xff) { + if (alpha > 0) { + int r = (argb[x] >> 16) & 0xff; + int g = (argb[x] >> 8) & 0xff; + int b = (argb[x] >> 0) & 0xff; + r = BLEND(red, r, alpha); + g = BLEND(green, g, alpha); + b = BLEND(blue, b, alpha); + argb[x] = MakeARGB32(r, g, b); + } else { + argb[x] = background; + } + } + } + argb += pic->argb_stride; + } + } +} + +#undef BLEND +#undef BLEND_10BIT + +//------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/quant.c b/src/3rdparty/libwebp/src/enc/quant.c index e1d202b..9130a41 100644 --- a/src/3rdparty/libwebp/src/enc/quant.c +++ b/src/3rdparty/libwebp/src/enc/quant.c @@ -395,7 +395,7 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) { dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV); // We also boost the dc-uv-quant a little, based on sns-strength, since // U/V channels are quite more reactive to high quants (flat DC-blocks - // tend to appear, and are displeasant). + // tend to appear, and are unpleasant). dq_uv_dc = -4 * enc->config_->sns_strength / 100; dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed @@ -454,13 +454,14 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) { // |UUVV| 20 // +----+ -const int VP8Scan[16 + 4 + 4] = { - // Luma +const int VP8Scan[16] = { // Luma 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, +}; +static const int VP8ScanUV[4 + 4] = { 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V }; @@ -514,24 +515,27 @@ static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { //------------------------------------------------------------------------------ // Performs trellis-optimized quantization. -// Trellis - +// Trellis node typedef struct { - int prev; // best previous - int level; // level - int sign; // sign of coeff_i - score_t cost; // bit cost - score_t error; // distortion = sum of (|coeff_i| - level_i * Q_i)^2 - int ctx; // context (only depends on 'level'. Could be spared.) + int8_t prev; // best previous node + int8_t sign; // sign of coeff_i + int16_t level; // level } Node; +// Score state +typedef struct { + score_t score; // partial RD score + const uint16_t* costs; // shortcut to cost tables +} ScoreState; + // If a coefficient was quantized to a value Q (using a neutral bias), // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA] // We don't test negative values though. #define MIN_DELTA 0 // how much lower level to try #define MAX_DELTA 1 // how much higher #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA) -#define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA]) +#define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA]) +#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA]) static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) { // TODO: incorporate the "* 256" in the tables? @@ -543,34 +547,36 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, return rate * lambda + 256 * distortion; } -static int TrellisQuantizeBlock(const VP8EncIterator* const it, +static int TrellisQuantizeBlock(const VP8Encoder* const enc, int16_t in[16], int16_t out[16], int ctx0, int coeff_type, const VP8Matrix* const mtx, int lambda) { - ProbaArray* const last_costs = it->enc_->proba_.coeffs_[coeff_type]; - CostArray* const costs = it->enc_->proba_.level_cost_[coeff_type]; + const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; + const CostArray* const costs = enc->proba_.level_cost_[coeff_type]; const int first = (coeff_type == 0) ? 1 : 0; - Node nodes[17][NUM_NODES]; + Node nodes[16][NUM_NODES]; + ScoreState score_states[2][NUM_NODES]; + ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA); + ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA); int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous score_t best_score; - int best_node; - int last = first - 1; - int n, m, p, nz; + int n, m, p, last; { score_t cost; - score_t max_error; const int thresh = mtx->q_[1] * mtx->q_[1] / 4; - const int last_proba = last_costs[VP8EncBands[first]][ctx0][0]; + const int last_proba = probas[VP8EncBands[first]][ctx0][0]; - // compute maximal distortion. - max_error = 0; - for (n = first; n < 16; ++n) { - const int j = kZigzag[n]; + // compute the position of the last interesting coefficient + last = first - 1; + for (n = 15; n >= first; --n) { + const int j = kZigzag[n]; const int err = in[j] * in[j]; - max_error += kWeightTrellis[j] * err; - if (err > thresh) last = n; + if (err > thresh) { + last = n; + break; + } } // we don't need to go inspect up to n = 16 coeffs. We can just go up // to last + 1 (inclusive) without losing much. @@ -578,92 +584,95 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it, // compute 'skip' score. This is the max score one can do. cost = VP8BitCost(0, last_proba); - best_score = RDScoreTrellis(lambda, cost, max_error); + best_score = RDScoreTrellis(lambda, cost, 0); // initialize source node. - n = first - 1; for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { - NODE(n, m).cost = 0; - NODE(n, m).error = max_error; - NODE(n, m).ctx = ctx0; + const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0; + ss_cur[m].score = RDScoreTrellis(lambda, rate, 0); + ss_cur[m].costs = costs[VP8EncBands[first]][ctx0]; } } // traverse trellis. for (n = first; n <= last; ++n) { - const int j = kZigzag[n]; - const int Q = mtx->q_[j]; - const int iQ = mtx->iq_[j]; - const int B = BIAS(0x00); // neutral bias + const int j = kZigzag[n]; + const uint32_t Q = mtx->q_[j]; + const uint32_t iQ = mtx->iq_[j]; + const uint32_t B = BIAS(0x00); // neutral bias // note: it's important to take sign of the _original_ coeff, // so we don't have to consider level < 0 afterward. const int sign = (in[j] < 0); - const int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; + const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; int level0 = QUANTDIV(coeff0, iQ, B); if (level0 > MAX_LEVEL) level0 = MAX_LEVEL; + { // Swap current and previous score states + ScoreState* const tmp = ss_cur; + ss_cur = ss_prev; + ss_prev = tmp; + } + // test all alternate level values around level0. for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { Node* const cur = &NODE(n, m); - int delta_error, new_error; - score_t cur_score = MAX_COST; int level = level0 + m; - int last_proba; - - cur->sign = sign; - cur->level = level; - cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2; + const int ctx = (level > 2) ? 2 : level; + const int band = VP8EncBands[n + 1]; + score_t base_score, last_pos_score; + score_t best_cur_score = MAX_COST; + int best_prev = 0; // default, in case + + ss_cur[m].score = MAX_COST; + ss_cur[m].costs = costs[band][ctx]; if (level > MAX_LEVEL || level < 0) { // node is dead? - cur->cost = MAX_COST; continue; } - last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0]; - // Compute delta_error = how much coding this level will - // subtract as distortion to max_error - new_error = coeff0 - level * Q; - delta_error = - kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error); + // Compute extra rate cost if last coeff's position is < 15 + { + const score_t last_pos_cost = + (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; + last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); + } + + { + // Compute delta_error = how much coding this level will + // subtract to max_error as distortion. + // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2 + const int new_error = coeff0 - level * Q; + const int delta_error = + kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0); + base_score = RDScoreTrellis(lambda, 0, delta_error); + } // Inspect all possible non-dead predecessors. Retain only the best one. for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) { - const Node* const prev = &NODE(n - 1, p); - const int prev_ctx = prev->ctx; - const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx]; - const score_t total_error = prev->error - delta_error; - score_t cost, base_cost, score; - - if (prev->cost >= MAX_COST) { // dead node? - continue; - } - - // Base cost of both terminal/non-terminal - base_cost = prev->cost + VP8LevelCost(tcost, level); - + // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically + // eliminated since their score can't be better than the current best. + const score_t cost = VP8LevelCost(ss_prev[p].costs, level); // Examine node assuming it's a non-terminal one. - cost = base_cost; - if (level && n < 15) { - cost += VP8BitCost(1, last_proba); + const score_t score = + base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); + if (score < best_cur_score) { + best_cur_score = score; + best_prev = p; } - score = RDScoreTrellis(lambda, cost, total_error); - if (score < cur_score) { - cur_score = score; - cur->cost = cost; - cur->error = total_error; - cur->prev = p; - } - - // Now, record best terminal node (and thus best entry in the graph). - if (level) { - cost = base_cost; - if (n < 15) cost += VP8BitCost(0, last_proba); - score = RDScoreTrellis(lambda, cost, total_error); - if (score < best_score) { - best_score = score; - best_path[0] = n; // best eob position - best_path[1] = m; // best level - best_path[2] = p; // best predecessor - } + } + // Store best finding in current node. + cur->sign = sign; + cur->level = level; + cur->prev = best_prev; + ss_cur[m].score = best_cur_score; + + // Now, record best terminal node (and thus best entry in the graph). + if (level != 0) { + const score_t score = best_cur_score + last_pos_score; + if (score < best_score) { + best_score = score; + best_path[0] = n; // best eob position + best_path[1] = m; // best node index + best_path[2] = best_prev; // best predecessor } } } @@ -676,23 +685,25 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it, return 0; // skip! } - // Unwind the best path. - // Note: best-prev on terminal node is not necessarily equal to the - // best_prev for non-terminal. So we patch best_path[2] in. - n = best_path[0]; - best_node = best_path[1]; - NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal - nz = 0; - - for (; n >= first; --n) { - const Node* const node = &NODE(n, best_node); - const int j = kZigzag[n]; - out[n] = node->sign ? -node->level : node->level; - nz |= (node->level != 0); - in[j] = out[n] * mtx->q_[j]; - best_node = node->prev; + { + // Unwind the best path. + // Note: best-prev on terminal node is not necessarily equal to the + // best_prev for non-terminal. So we patch best_path[2] in. + int nz = 0; + int best_node = best_path[1]; + n = best_path[0]; + NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal + + for (; n >= first; --n) { + const Node* const node = &NODE(n, best_node); + const int j = kZigzag[n]; + out[n] = node->sign ? -node->level : node->level; + nz |= node->level; + in[j] = out[n] * mtx->q_[j]; + best_node = node->prev; + } + return (nz != 0); } - return nz; } #undef NODE @@ -706,10 +717,10 @@ static int ReconstructIntra16(VP8EncIterator* const it, VP8ModeScore* const rd, uint8_t* const yuv_out, int mode) { - VP8Encoder* const enc = it->enc_; + const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; const uint8_t* const src = it->yuv_in_ + Y_OFF; - VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; + const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; int nz = 0; int n; int16_t tmp[16][16], dc_tmp[16]; @@ -727,20 +738,25 @@ static int ReconstructIntra16(VP8EncIterator* const it, for (x = 0; x < 4; ++x, ++n) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; const int non_zero = - TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0, - &dqm->y1_, dqm->lambda_trellis_i16_); + TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, + &dqm->y1_, dqm->lambda_trellis_i16_); it->top_nz_[x] = it->left_nz_[y] = non_zero; + rd->y_ac_levels[n][0] = 0; nz |= non_zero << n; } } } else { for (n = 0; n < 16; ++n) { - nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], 1, &dqm->y1_) << n; + // Zero-out the first coeff, so that: a) nz is correct below, and + // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. + tmp[n][0] = 0; + nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; + assert(rd->y_ac_levels[n][0] == 0); } } // Transform back - VP8ITransformWHT(dc_tmp, tmp[0]); + VP8TransformWHT(dc_tmp, tmp[0]); for (n = 0; n < 16; n += 2) { VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1); } @@ -763,10 +779,10 @@ static int ReconstructIntra4(VP8EncIterator* const it, if (DO_TRELLIS_I4 && it->do_trellis_) { const int x = it->i4_ & 3, y = it->i4_ >> 2; const int ctx = it->top_nz_[x] + it->left_nz_[y]; - nz = TrellisQuantizeBlock(it, tmp, levels, ctx, 3, &dqm->y1_, + nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_, dqm->lambda_trellis_i4_); } else { - nz = VP8EncQuantizeBlock(tmp, levels, 0, &dqm->y1_); + nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_); } VP8ITransform(ref, tmp, yuv_out, 0); return nz; @@ -783,7 +799,7 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, int16_t tmp[8][16]; for (n = 0; n < 8; ++n) { - VP8FTransform(src + VP8Scan[16 + n], ref + VP8Scan[16 + n], tmp[n]); + VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); } if (DO_TRELLIS_UV && it->do_trellis_) { int ch, x, y; @@ -792,8 +808,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, for (x = 0; x < 2; ++x, ++n) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; const int non_zero = - TrellisQuantizeBlock(it, tmp[n], rd->uv_levels[n], ctx, 2, - &dqm->uv_, dqm->lambda_trellis_uv_); + TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, + &dqm->uv_, dqm->lambda_trellis_uv_); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero; nz |= non_zero << n; } @@ -801,12 +817,12 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, } } else { for (n = 0; n < 8; ++n) { - nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], 0, &dqm->uv_) << n; + nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; } } for (n = 0; n < 8; n += 2) { - VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1); + VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1); } return (nz << 16); } @@ -851,8 +867,7 @@ static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) { static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) { const int kNumBlocks = 16; - VP8Encoder* const enc = it->enc_; - VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; + VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_i16_; const int tlambda = dqm->tlambda_; const uint8_t* const src = it->yuv_in_ + Y_OFF; @@ -999,8 +1014,7 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { const int kNumBlocks = 8; - const VP8Encoder* const enc = it->enc_; - const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; + const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_uv_; const uint8_t* const src = it->yuv_in_ + U_OFF; uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer diff --git a/src/3rdparty/libwebp/src/enc/syntax.c b/src/3rdparty/libwebp/src/enc/syntax.c index 08cfe79..d1ff0a5 100644 --- a/src/3rdparty/libwebp/src/enc/syntax.c +++ b/src/3rdparty/libwebp/src/enc/syntax.c @@ -263,53 +263,16 @@ static int EmitPartitionsSize(const VP8Encoder* const enc, //------------------------------------------------------------------------------ -#ifdef WEBP_EXPERIMENTAL_FEATURES - -#define KTRAILER_SIZE 8 - -static int WriteExtensions(VP8Encoder* const enc) { - uint8_t buffer[KTRAILER_SIZE]; - VP8BitWriter* const bw = &enc->bw_; - WebPPicture* const pic = enc->pic_; - - // Layer (bytes 0..3) - PutLE24(buffer + 0, enc->layer_data_size_); - buffer[3] = enc->pic_->colorspace & WEBP_CSP_UV_MASK; - if (enc->layer_data_size_ > 0) { - assert(enc->use_layer_); - // append layer data to last partition - if (!VP8BitWriterAppend(&enc->parts_[enc->num_parts_ - 1], - enc->layer_data_, enc->layer_data_size_)) { - return WebPEncodingSetError(pic, VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY); - } - } - - buffer[KTRAILER_SIZE - 1] = 0x01; // marker - if (!VP8BitWriterAppend(bw, buffer, KTRAILER_SIZE)) { - return WebPEncodingSetError(pic, VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY); - } - return 1; -} - -#endif /* WEBP_EXPERIMENTAL_FEATURES */ - -//------------------------------------------------------------------------------ - -static size_t GeneratePartition0(VP8Encoder* const enc) { +static int GeneratePartition0(VP8Encoder* const enc) { VP8BitWriter* const bw = &enc->bw_; const int mb_size = enc->mb_w_ * enc->mb_h_; uint64_t pos1, pos2, pos3; -#ifdef WEBP_EXPERIMENTAL_FEATURES - const int need_extensions = enc->use_layer_; -#endif pos1 = VP8BitWriterPos(bw); - VP8BitWriterInit(bw, mb_size * 7 / 8); // ~7 bits per macroblock -#ifdef WEBP_EXPERIMENTAL_FEATURES - VP8PutBitUniform(bw, need_extensions); // extensions -#else + if (!VP8BitWriterInit(bw, mb_size * 7 / 8)) { // ~7 bits per macroblock + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + } VP8PutBitUniform(bw, 0); // colorspace -#endif VP8PutBitUniform(bw, 0); // clamp type PutSegmentHeader(bw, enc); @@ -324,21 +287,17 @@ static size_t GeneratePartition0(VP8Encoder* const enc) { VP8CodeIntraModes(enc); VP8BitWriterFinish(bw); -#ifdef WEBP_EXPERIMENTAL_FEATURES - if (need_extensions && !WriteExtensions(enc)) { - return 0; - } -#endif - pos3 = VP8BitWriterPos(bw); if (enc->pic_->stats) { enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3); enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3); enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_; - enc->pic_->stats->layer_data_size = (int)enc->layer_data_size_; } - return !bw->error_; + if (bw->error_) { + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + } + return 1; } void VP8EncFreeBitWriters(VP8Encoder* const enc) { @@ -360,7 +319,8 @@ int VP8EncWrite(VP8Encoder* const enc) { int p; // Partition #0 with header and partition sizes - ok = !!GeneratePartition0(enc); + ok = GeneratePartition0(enc); + if (!ok) return 0; // Compute VP8 size vp8_size = VP8_FRAME_HEADER_SIZE + diff --git a/src/3rdparty/libwebp/src/enc/token.c b/src/3rdparty/libwebp/src/enc/token.c index e696642..8af13a0 100644 --- a/src/3rdparty/libwebp/src/enc/token.c +++ b/src/3rdparty/libwebp/src/enc/token.c @@ -22,27 +22,32 @@ #include "./cost.h" #include "./vp8enci.h" +#include "../utils/utils.h" #if !defined(DISABLE_TOKEN_BUFFER) // we use pages to reduce the number of memcpy() -#define MAX_NUM_TOKEN 8192 // max number of token per page +#define MIN_PAGE_SIZE 8192 // minimum number of token per page #define FIXED_PROBA_BIT (1u << 14) +typedef uint16_t token_t; // bit#15: bit + // bit #14: constant proba or idx + // bits 0..13: slot or constant proba struct VP8Tokens { - uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit - // bit #14: constant proba or idx - // bits 0..13: slot or constant proba - VP8Tokens* next_; + VP8Tokens* next_; // pointer to next page }; +// Token data is located in memory just after the next_ field. +// This macro is used to return their address and hide the trick. +#define TOKEN_DATA(p) ((token_t*)&(p)[1]) //------------------------------------------------------------------------------ -void VP8TBufferInit(VP8TBuffer* const b) { +void VP8TBufferInit(VP8TBuffer* const b, int page_size) { b->tokens_ = NULL; b->pages_ = NULL; b->last_page_ = &b->pages_; b->left_ = 0; + b->page_size_ = (page_size < MIN_PAGE_SIZE) ? MIN_PAGE_SIZE : page_size; b->error_ = 0; } @@ -51,24 +56,29 @@ void VP8TBufferClear(VP8TBuffer* const b) { const VP8Tokens* p = b->pages_; while (p != NULL) { const VP8Tokens* const next = p->next_; - free((void*)p); + WebPSafeFree((void*)p); p = next; } - VP8TBufferInit(b); + VP8TBufferInit(b, b->page_size_); } } static int TBufferNewPage(VP8TBuffer* const b) { - VP8Tokens* const page = b->error_ ? NULL : (VP8Tokens*)malloc(sizeof(*page)); + VP8Tokens* page = NULL; + const size_t size = sizeof(*page) + b->page_size_ * sizeof(token_t); + if (!b->error_) { + page = (VP8Tokens*)WebPSafeMalloc(1ULL, size); + } if (page == NULL) { b->error_ = 1; return 0; } + page->next_ = NULL; + *b->last_page_ = page; b->last_page_ = &page->next_; - b->left_ = MAX_NUM_TOKEN; - b->tokens_ = page->tokens_; - page->next_ = NULL; + b->left_ = b->page_size_; + b->tokens_ = TOKEN_DATA(page); return 1; } @@ -195,8 +205,9 @@ void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) { while (p != NULL) { const int N = (p->next_ == NULL) ? b->left_ : 0; int n = MAX_NUM_TOKEN; + const token_t* const tokens = TOKEN_DATA(p); while (n-- > N) { - const uint16_t token = p->tokens_[n]; + const token_t token = tokens[n]; if (!(token & FIXED_PROBA_BIT)) { Record((token >> 15) & 1, stats + (token & 0x3fffu)); } @@ -214,13 +225,14 @@ int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, const uint8_t* const probas, int final_pass) { const VP8Tokens* p = b->pages_; (void)final_pass; - if (b->error_) return 0; + assert(!b->error_); while (p != NULL) { const VP8Tokens* const next = p->next_; const int N = (next == NULL) ? b->left_ : 0; - int n = MAX_NUM_TOKEN; + int n = b->page_size_; + const token_t* const tokens = TOKEN_DATA(p); while (n-- > N) { - const uint16_t token = p->tokens_[n]; + const token_t token = tokens[n]; const int bit = (token >> 15) & 1; if (token & FIXED_PROBA_BIT) { VP8PutBit(bw, bit, token & 0xffu); // constant proba @@ -228,7 +240,7 @@ int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, VP8PutBit(bw, bit, probas[token & 0x3fffu]); } } - if (final_pass) free((void*)p); + if (final_pass) WebPSafeFree((void*)p); p = next; } if (final_pass) b->pages_ = NULL; @@ -239,13 +251,14 @@ int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw, size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas) { size_t size = 0; const VP8Tokens* p = b->pages_; - if (b->error_) return 0; + assert(!b->error_); while (p != NULL) { const VP8Tokens* const next = p->next_; const int N = (next == NULL) ? b->left_ : 0; - int n = MAX_NUM_TOKEN; + int n = b->page_size_; + const token_t* const tokens = TOKEN_DATA(p); while (n-- > N) { - const uint16_t token = p->tokens_[n]; + const token_t token = tokens[n]; const int bit = token & (1 << 15); if (token & FIXED_PROBA_BIT) { size += VP8BitCost(bit, token & 0xffu); diff --git a/src/3rdparty/libwebp/src/enc/vp8enci.h b/src/3rdparty/libwebp/src/enc/vp8enci.h index 71adf6c..74c8f70 100644 --- a/src/3rdparty/libwebp/src/enc/vp8enci.h +++ b/src/3rdparty/libwebp/src/enc/vp8enci.h @@ -30,7 +30,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 0 #define ENC_MIN_VERSION 4 -#define ENC_REV_VERSION 0 +#define ENC_REV_VERSION 3 // intra prediction modes enum { B_DC_PRED = 0, // 4x4 modes @@ -130,8 +130,8 @@ typedef enum { // Rate-distortion optimization levels #define ALIGN_CST 15 #define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST) -extern const int VP8Scan[16 + 4 + 4]; // in quant.c -extern const int VP8UVModeOffsets[4]; // in analyze.c +extern const int VP8Scan[16]; // in quant.c +extern const int VP8UVModeOffsets[4]; // in analyze.c extern const int VP8I16ModeOffsets[4]; extern const int VP8I4ModeOffsets[NUM_BMODES]; @@ -160,14 +160,16 @@ extern const int VP8I4ModeOffsets[NUM_BMODES]; #define I4TMP (6 * 16 * BPS + 8 * BPS + 8) typedef int64_t score_t; // type used for scores, rate, distortion +// Note that MAX_COST is not the maximum allowed by sizeof(score_t), +// in order to allow overflowing computations. #define MAX_COST ((score_t)0x7fffffffffffffLL) #define QFIX 17 #define BIAS(b) ((b) << (QFIX - 8)) // Fun fact: this is the _only_ line where we're actually being lossy and // discarding bits. -static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) { - return (n * iQ + B) >> QFIX; +static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) { + return (int)((n * iQ + B) >> QFIX); } // size of histogram used by CollectHistogram. @@ -204,9 +206,9 @@ typedef struct { typedef struct { uint8_t segments_[3]; // probabilities for segment tree uint8_t skip_proba_; // final probability of being skipped. - ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes + ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes - CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k + CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes int dirty_; // if true, need to call VP8CalculateLevelCosts() int use_skip_proba_; // Note: we always use skip_proba for now. int nb_skip_; // number of skipped blocks @@ -236,8 +238,8 @@ typedef struct { typedef struct VP8Matrix { uint16_t q_[16]; // quantizer steps uint16_t iq_[16]; // reciprocals, fixed point. - uint16_t bias_[16]; // rounding bias - uint16_t zthresh_[16]; // value under which a coefficient is zeroed + uint32_t bias_[16]; // rounding bias + uint32_t zthresh_[16]; // value below which a coefficient is zeroed uint16_t sharpen_[16]; // frequency boosters for slight sharpening } VP8Matrix; @@ -361,12 +363,14 @@ typedef struct { VP8Tokens* pages_; // first page VP8Tokens** last_page_; // last page uint16_t* tokens_; // set to (*last_page_)->tokens_ - int left_; // how many free tokens left before the page is full. + int left_; // how many free tokens left before the page is full + int page_size_; // number of tokens per page #endif int error_; // true in case of malloc error } VP8TBuffer; -void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer +// initialize an empty buffer +void VP8TBufferInit(VP8TBuffer* const b, int page_size); void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory #if !defined(DISABLE_TOKEN_BUFFER) @@ -422,12 +426,6 @@ struct VP8Encoder { uint32_t alpha_data_size_; WebPWorker alpha_worker_; - // enhancement layer - int use_layer_; - VP8BitWriter layer_bw_; - uint8_t* layer_data_; - size_t layer_data_size_; - // quantization info (one set of DC/AC dequant factor per segment) VP8SegmentInfo dqm_[NUM_MB_SEGMENTS]; int base_quant_; // nominal quantizer value. Only used @@ -459,10 +457,10 @@ struct VP8Encoder { VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1) uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1) uint32_t* nz_; // non-zero bit context: mb_w+1 - uint8_t *y_top_; // top luma samples. - uint8_t *uv_top_; // top u/v samples. + uint8_t* y_top_; // top luma samples. + uint8_t* uv_top_; // top u/v samples. // U and V are packed into 16 bytes (8 U + 8 V) - LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off) + LFStats* lf_stats_; // autofilter stats (if NULL, autofilter is off) }; //------------------------------------------------------------------------------ @@ -533,12 +531,6 @@ int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data - // in layer.c -void VP8EncInitLayer(VP8Encoder* const enc); // init everything -void VP8EncCodeLayerBlock(VP8EncIterator* it); // code one more macroblock -int VP8EncFinishLayer(VP8Encoder* const enc); // finalize coding -void VP8EncDeleteLayer(VP8Encoder* enc); // reclaim memory - // in filter.c // SSIM utils @@ -561,8 +553,28 @@ void VP8AdjustFilterStrength(VP8EncIterator* const it); // step of 'delta', given a sharpness parameter 'sharpness'. int VP8FilterStrengthFromDelta(int sharpness, int delta); + // misc utils for picture_*.c: + +// Remove reference to the ARGB/YUVA buffer (doesn't free anything). +void WebPPictureResetBuffers(WebPPicture* const picture); + +// Allocates ARGB buffer of given dimension (previous one is always free'd). +// Preserves the YUV(A) buffer. Returns false in case of error (invalid param, +// out-of-memory). +int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height); + +// Allocates YUVA buffer of given dimension (previous one is always free'd). +// Uses picture->csp to determine whether an alpha buffer is needed. +// Preserves the ARGB buffer. +// Returns false in case of error (invalid param, out-of-memory). +int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); + //------------------------------------------------------------------------------ +#if WEBP_ENCODER_ABI_VERSION <= 0x0203 +void WebPMemoryWriterClear(WebPMemoryWriter* writer); +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/3rdparty/libwebp/src/enc/vp8l.c b/src/3rdparty/libwebp/src/enc/vp8l.c index 1572631..c2bb13d 100644 --- a/src/3rdparty/libwebp/src/enc/vp8l.c +++ b/src/3rdparty/libwebp/src/enc/vp8l.c @@ -106,14 +106,9 @@ static int AnalyzeEntropy(const uint32_t* argb, const uint32_t* last_line = NULL; uint32_t last_pix = argb[0]; // so we're sure that pix_diff == 0 - VP8LHistogram* nonpredicted = NULL; - VP8LHistogram* predicted = - (VP8LHistogram*)malloc(2 * sizeof(*predicted)); - if (predicted == NULL) return 0; - nonpredicted = predicted + 1; - - VP8LHistogramInit(predicted, 0); - VP8LHistogramInit(nonpredicted, 0); + VP8LHistogramSet* const histo_set = VP8LAllocateHistogramSet(2, 0); + if (histo_set == NULL) return 0; + for (y = 0; y < height; ++y) { for (x = 0; x < width; ++x) { const uint32_t pix = argb[x]; @@ -126,21 +121,28 @@ static int AnalyzeEntropy(const uint32_t* argb, { const PixOrCopy pix_token = PixOrCopyCreateLiteral(pix); const PixOrCopy pix_diff_token = PixOrCopyCreateLiteral(pix_diff); - VP8LHistogramAddSinglePixOrCopy(nonpredicted, &pix_token); - VP8LHistogramAddSinglePixOrCopy(predicted, &pix_diff_token); + VP8LHistogramAddSinglePixOrCopy(histo_set->histograms[0], &pix_token); + VP8LHistogramAddSinglePixOrCopy(histo_set->histograms[1], + &pix_diff_token); } } last_line = argb; argb += argb_stride; } - *nonpredicted_bits = VP8LHistogramEstimateBitsBulk(nonpredicted); - *predicted_bits = VP8LHistogramEstimateBitsBulk(predicted); - free(predicted); + *nonpredicted_bits = VP8LHistogramEstimateBitsBulk(histo_set->histograms[0]); + *predicted_bits = VP8LHistogramEstimateBitsBulk(histo_set->histograms[1]); + VP8LFreeHistogramSet(histo_set); return 1; } -static int VP8LEncAnalyze(VP8LEncoder* const enc, WebPImageHint image_hint) { +static int AnalyzeAndInit(VP8LEncoder* const enc, WebPImageHint image_hint) { const WebPPicture* const pic = enc->pic_; + const int width = pic->width; + const int height = pic->height; + const int pix_cnt = width * height; + // we round the block size up, so we're guaranteed to have + // at max MAX_REFS_BLOCK_PER_IMAGE blocks used: + int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1; assert(pic != NULL && pic->argb != NULL); enc->use_palette_ = @@ -158,7 +160,7 @@ static int VP8LEncAnalyze(VP8LEncoder* const enc, WebPImageHint image_hint) { enc->use_cross_color_ = 1; } else { double non_pred_entropy, pred_entropy; - if (!AnalyzeEntropy(pic->argb, pic->width, pic->height, pic->argb_stride, + if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride, &non_pred_entropy, &pred_entropy)) { return 0; } @@ -168,27 +170,38 @@ static int VP8LEncAnalyze(VP8LEncoder* const enc, WebPImageHint image_hint) { } } } + if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0; + + // palette-friendly input typically uses less literals + // -> reduce block size a bit + if (enc->use_palette_) refs_block_size /= 2; + VP8LBackwardRefsInit(&enc->refs_[0], refs_block_size); + VP8LBackwardRefsInit(&enc->refs_[1], refs_block_size); return 1; } +// Returns false in case of memory error. static int GetHuffBitLengthsAndCodes( const VP8LHistogramSet* const histogram_image, HuffmanTreeCode* const huffman_codes) { int i, k; - int ok = 1; + int ok = 0; uint64_t total_length_size = 0; uint8_t* mem_buf = NULL; const int histogram_image_size = histogram_image->size; + int max_num_symbols = 0; + uint8_t* buf_rle = NULL; + HuffmanTree* huff_tree = NULL; // Iterate over all histograms and get the aggregate number of codes used. for (i = 0; i < histogram_image_size; ++i) { const VP8LHistogram* const histo = histogram_image->histograms[i]; HuffmanTreeCode* const codes = &huffman_codes[5 * i]; for (k = 0; k < 5; ++k) { - const int num_symbols = (k == 0) ? VP8LHistogramNumCodes(histo) - : (k == 4) ? NUM_DISTANCE_CODES - : 256; + const int num_symbols = + (k == 0) ? VP8LHistogramNumCodes(histo->palette_code_bits_) : + (k == 4) ? NUM_DISTANCE_CODES : 256; codes[k].num_symbols = num_symbols; total_length_size += num_symbols; } @@ -200,10 +213,8 @@ static int GetHuffBitLengthsAndCodes( uint8_t* lengths; mem_buf = (uint8_t*)WebPSafeCalloc(total_length_size, sizeof(*lengths) + sizeof(*codes)); - if (mem_buf == NULL) { - ok = 0; - goto End; - } + if (mem_buf == NULL) goto End; + codes = (uint16_t*)mem_buf; lengths = (uint8_t*)&codes[total_length_size]; for (i = 0; i < 5 * histogram_image_size; ++i) { @@ -212,24 +223,33 @@ static int GetHuffBitLengthsAndCodes( huffman_codes[i].code_lengths = lengths; codes += bit_length; lengths += bit_length; + if (max_num_symbols < bit_length) { + max_num_symbols = bit_length; + } } } + buf_rle = (uint8_t*)WebPSafeMalloc(1ULL, max_num_symbols); + huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * max_num_symbols, + sizeof(*huff_tree)); + if (buf_rle == NULL || huff_tree == NULL) goto End; + // Create Huffman trees. - for (i = 0; ok && (i < histogram_image_size); ++i) { + for (i = 0; i < histogram_image_size; ++i) { HuffmanTreeCode* const codes = &huffman_codes[5 * i]; VP8LHistogram* const histo = histogram_image->histograms[i]; - ok = ok && VP8LCreateHuffmanTree(histo->literal_, 15, codes + 0); - ok = ok && VP8LCreateHuffmanTree(histo->red_, 15, codes + 1); - ok = ok && VP8LCreateHuffmanTree(histo->blue_, 15, codes + 2); - ok = ok && VP8LCreateHuffmanTree(histo->alpha_, 15, codes + 3); - ok = ok && VP8LCreateHuffmanTree(histo->distance_, 15, codes + 4); + VP8LCreateHuffmanTree(histo->literal_, 15, buf_rle, huff_tree, codes + 0); + VP8LCreateHuffmanTree(histo->red_, 15, buf_rle, huff_tree, codes + 1); + VP8LCreateHuffmanTree(histo->blue_, 15, buf_rle, huff_tree, codes + 2); + VP8LCreateHuffmanTree(histo->alpha_, 15, buf_rle, huff_tree, codes + 3); + VP8LCreateHuffmanTree(histo->distance_, 15, buf_rle, huff_tree, codes + 4); } - + ok = 1; End: + WebPSafeFree(huff_tree); + WebPSafeFree(buf_rle); if (!ok) { - free(mem_buf); - // If one VP8LCreateHuffmanTree() above fails, we need to clean up behind. + WebPSafeFree(mem_buf); memset(huffman_codes, 0, 5 * histogram_image_size * sizeof(*huffman_codes)); } return ok; @@ -296,18 +316,16 @@ static void StoreHuffmanTreeToBitMask( } } -static int StoreFullHuffmanCode(VP8LBitWriter* const bw, - const HuffmanTreeCode* const tree) { - int ok = 0; +// 'huff_tree' and 'tokens' are pre-alloacted buffers. +static void StoreFullHuffmanCode(VP8LBitWriter* const bw, + HuffmanTree* const huff_tree, + HuffmanTreeToken* const tokens, + const HuffmanTreeCode* const tree) { uint8_t code_length_bitdepth[CODE_LENGTH_CODES] = { 0 }; uint16_t code_length_bitdepth_symbols[CODE_LENGTH_CODES] = { 0 }; const int max_tokens = tree->num_symbols; int num_tokens; HuffmanTreeCode huffman_code; - HuffmanTreeToken* const tokens = - (HuffmanTreeToken*)WebPSafeMalloc((uint64_t)max_tokens, sizeof(*tokens)); - if (tokens == NULL) return 0; - huffman_code.num_symbols = CODE_LENGTH_CODES; huffman_code.code_lengths = code_length_bitdepth; huffman_code.codes = code_length_bitdepth_symbols; @@ -315,15 +333,14 @@ static int StoreFullHuffmanCode(VP8LBitWriter* const bw, VP8LWriteBits(bw, 1, 0); num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens); { - int histogram[CODE_LENGTH_CODES] = { 0 }; + uint32_t histogram[CODE_LENGTH_CODES] = { 0 }; + uint8_t buf_rle[CODE_LENGTH_CODES] = { 0 }; int i; for (i = 0; i < num_tokens; ++i) { ++histogram[tokens[i].code]; } - if (!VP8LCreateHuffmanTree(histogram, 7, &huffman_code)) { - goto End; - } + VP8LCreateHuffmanTree(histogram, 7, buf_rle, huff_tree, &huffman_code); } StoreHuffmanTreeOfHuffmanTreeToBitMask(bw, code_length_bitdepth); @@ -360,14 +377,13 @@ static int StoreFullHuffmanCode(VP8LBitWriter* const bw, } StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code); } - ok = 1; - End: - free(tokens); - return ok; } -static int StoreHuffmanCode(VP8LBitWriter* const bw, - const HuffmanTreeCode* const huffman_code) { +// 'huff_tree' and 'tokens' are pre-alloacted buffers. +static void StoreHuffmanCode(VP8LBitWriter* const bw, + HuffmanTree* const huff_tree, + HuffmanTreeToken* const tokens, + const HuffmanTreeCode* const huffman_code) { int i; int count = 0; int symbols[2] = { 0, 0 }; @@ -385,7 +401,6 @@ static int StoreHuffmanCode(VP8LBitWriter* const bw, if (count == 0) { // emit minimal tree for empty cases // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0 VP8LWriteBits(bw, 4, 0x01); - return 1; } else if (count <= 2 && symbols[0] < kMaxSymbol && symbols[1] < kMaxSymbol) { VP8LWriteBits(bw, 1, 1); // Small tree marker to encode 1 or 2 symbols. VP8LWriteBits(bw, 1, count - 1); @@ -399,9 +414,8 @@ static int StoreHuffmanCode(VP8LBitWriter* const bw, if (count == 2) { VP8LWriteBits(bw, 8, symbols[1]); } - return 1; } else { - return StoreFullHuffmanCode(bw, huffman_code); + StoreFullHuffmanCode(bw, huff_tree, tokens, huffman_code); } } @@ -413,18 +427,18 @@ static void WriteHuffmanCode(VP8LBitWriter* const bw, VP8LWriteBits(bw, depth, symbol); } -static void StoreImageToBitMask( +static WebPEncodingError StoreImageToBitMask( VP8LBitWriter* const bw, int width, int histo_bits, - const VP8LBackwardRefs* const refs, + VP8LBackwardRefs* const refs, const uint16_t* histogram_symbols, const HuffmanTreeCode* const huffman_codes) { // x and y trace the position in the image. int x = 0; int y = 0; const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1; - int i; - for (i = 0; i < refs->size; ++i) { - const PixOrCopy* const v = &refs->refs[i]; + VP8LRefsCursor c = VP8LRefsCursorInit(refs); + while (VP8LRefsCursorOk(&c)) { + const PixOrCopy* const v = c.cur_pos; const int histogram_ix = histogram_symbols[histo_bits ? (y >> histo_bits) * histo_xsize + (x >> histo_bits) : 0]; @@ -458,88 +472,128 @@ static void StoreImageToBitMask( x -= width; ++y; } + VP8LRefsCursorNext(&c); } + return bw->error_ ? VP8_ENC_ERROR_OUT_OF_MEMORY : VP8_ENC_OK; } // Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31 -static int EncodeImageNoHuffman(VP8LBitWriter* const bw, - const uint32_t* const argb, - int width, int height, int quality) { +static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, + const uint32_t* const argb, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2], + int width, int height, + int quality) { int i; - int ok = 0; - VP8LBackwardRefs refs; + int max_tokens = 0; + WebPEncodingError err = VP8_ENC_OK; + VP8LBackwardRefs* refs; + HuffmanTreeToken* tokens = NULL; HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } }; const uint16_t histogram_symbols[1] = { 0 }; // only one tree, one symbol VP8LHistogramSet* const histogram_image = VP8LAllocateHistogramSet(1, 0); - if (histogram_image == NULL) return 0; + HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc( + 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); + if (histogram_image == NULL || huff_tree == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } // Calculate backward references from ARGB image. - if (!VP8LGetBackwardReferences(width, height, argb, quality, 0, 1, &refs)) { + refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, 1, + hash_chain, refs_array); + if (refs == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } // Build histogram image and symbols from backward references. - VP8LHistogramStoreRefs(&refs, histogram_image->histograms[0]); + VP8LHistogramStoreRefs(refs, histogram_image->histograms[0]); // Create Huffman bit lengths and codes for each histogram image. assert(histogram_image->size == 1); if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } // No color cache, no Huffman image. VP8LWriteBits(bw, 1, 0); - // Store Huffman codes. + // Find maximum number of symbols for the huffman tree-set. for (i = 0; i < 5; ++i) { HuffmanTreeCode* const codes = &huffman_codes[i]; - if (!StoreHuffmanCode(bw, codes)) { - goto Error; + if (max_tokens < codes->num_symbols) { + max_tokens = codes->num_symbols; } + } + + tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); + if (tokens == NULL) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; + goto Error; + } + + // Store Huffman codes. + for (i = 0; i < 5; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + StoreHuffmanCode(bw, huff_tree, tokens, codes); ClearHuffmanTreeIfOnlyOneSymbol(codes); } // Store actual literals. - StoreImageToBitMask(bw, width, 0, &refs, histogram_symbols, huffman_codes); - ok = 1; + err = StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, + huffman_codes); Error: - free(histogram_image); - VP8LClearBackwardRefs(&refs); - free(huffman_codes[0].codes); - return ok; + WebPSafeFree(tokens); + WebPSafeFree(huff_tree); + VP8LFreeHistogramSet(histogram_image); + WebPSafeFree(huffman_codes[0].codes); + return err; } -static int EncodeImageInternal(VP8LBitWriter* const bw, - const uint32_t* const argb, - int width, int height, int quality, - int cache_bits, int histogram_bits) { - int ok = 0; +static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, + const uint32_t* const argb, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs refs_array[2], + int width, int height, int quality, + int cache_bits, + int histogram_bits) { + WebPEncodingError err = VP8_ENC_OK; const int use_2d_locality = 1; const int use_color_cache = (cache_bits > 0); const uint32_t histogram_image_xysize = VP8LSubSampleSize(width, histogram_bits) * VP8LSubSampleSize(height, histogram_bits); VP8LHistogramSet* histogram_image = - VP8LAllocateHistogramSet(histogram_image_xysize, 0); + VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits); int histogram_image_size = 0; size_t bit_array_size = 0; + HuffmanTree* huff_tree = NULL; + HuffmanTreeToken* tokens = NULL; HuffmanTreeCode* huffman_codes = NULL; VP8LBackwardRefs refs; + VP8LBackwardRefs* best_refs; uint16_t* const histogram_symbols = - (uint16_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize, + (uint16_t*)WebPSafeMalloc(histogram_image_xysize, sizeof(*histogram_symbols)); assert(histogram_bits >= MIN_HUFFMAN_BITS); assert(histogram_bits <= MAX_HUFFMAN_BITS); + VP8LBackwardRefsInit(&refs, refs_array[0].block_size_); if (histogram_image == NULL || histogram_symbols == NULL) { - free(histogram_image); - free(histogram_symbols); + VP8LFreeHistogramSet(histogram_image); + WebPSafeFree(histogram_symbols); return 0; } + // 'best_refs' is the reference to the best backward refs and points to one + // of refs_array[0] or refs_array[1]. // Calculate backward references from ARGB image. - if (!VP8LGetBackwardReferences(width, height, argb, quality, cache_bits, - use_2d_locality, &refs)) { + best_refs = VP8LGetBackwardReferences(width, height, argb, quality, + cache_bits, use_2d_locality, + hash_chain, refs_array); + if (best_refs == NULL || !VP8LBackwardRefsCopy(best_refs, &refs)) { goto Error; } // Build histogram image and symbols from backward references. @@ -559,7 +613,7 @@ static int EncodeImageInternal(VP8LBitWriter* const bw, goto Error; } // Free combined histograms. - free(histogram_image); + VP8LFreeHistogramSet(histogram_image); histogram_image = NULL; // Color Cache parameters. @@ -574,7 +628,7 @@ static int EncodeImageInternal(VP8LBitWriter* const bw, VP8LWriteBits(bw, 1, write_histogram_image); if (write_histogram_image) { uint32_t* const histogram_argb = - (uint32_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize, + (uint32_t*)WebPSafeMalloc(histogram_image_xysize, sizeof(*histogram_argb)); int max_index = 0; uint32_t i; @@ -589,40 +643,54 @@ static int EncodeImageInternal(VP8LBitWriter* const bw, histogram_image_size = max_index; VP8LWriteBits(bw, 3, histogram_bits - 2); - ok = EncodeImageNoHuffman(bw, histogram_argb, - VP8LSubSampleSize(width, histogram_bits), - VP8LSubSampleSize(height, histogram_bits), - quality); - free(histogram_argb); - if (!ok) goto Error; + err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array, + VP8LSubSampleSize(width, histogram_bits), + VP8LSubSampleSize(height, histogram_bits), + quality); + WebPSafeFree(histogram_argb); + if (err != VP8_ENC_OK) goto Error; } } // Store Huffman codes. { int i; + int max_tokens = 0; + huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES, + sizeof(*huff_tree)); + if (huff_tree == NULL) goto Error; + // Find maximum number of symbols for the huffman tree-set. + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + if (max_tokens < codes->num_symbols) { + max_tokens = codes->num_symbols; + } + } + tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, + sizeof(*tokens)); + if (tokens == NULL) goto Error; for (i = 0; i < 5 * histogram_image_size; ++i) { HuffmanTreeCode* const codes = &huffman_codes[i]; - if (!StoreHuffmanCode(bw, codes)) goto Error; + StoreHuffmanCode(bw, huff_tree, tokens, codes); ClearHuffmanTreeIfOnlyOneSymbol(codes); } } // Store actual literals. - StoreImageToBitMask(bw, width, histogram_bits, &refs, - histogram_symbols, huffman_codes); - ok = 1; + err = StoreImageToBitMask(bw, width, histogram_bits, &refs, + histogram_symbols, huffman_codes); Error: - free(histogram_image); - - VP8LClearBackwardRefs(&refs); + WebPSafeFree(tokens); + WebPSafeFree(huff_tree); + VP8LFreeHistogramSet(histogram_image); + VP8LBackwardRefsClear(&refs); if (huffman_codes != NULL) { - free(huffman_codes->codes); - free(huffman_codes); + WebPSafeFree(huffman_codes->codes); + WebPSafeFree(huffman_codes); } - free(histogram_symbols); - return ok; + WebPSafeFree(histogram_symbols); + return err; } // ----------------------------------------------------------------------------- @@ -630,17 +698,16 @@ static int EncodeImageInternal(VP8LBitWriter* const bw, // Check if it would be a good idea to subtract green from red and blue. We // only impact entropy in red/blue components, don't bother to look at others. -static int EvalAndApplySubtractGreen(VP8LEncoder* const enc, - int width, int height, - VP8LBitWriter* const bw) { +static WebPEncodingError EvalAndApplySubtractGreen(VP8LEncoder* const enc, + int width, int height, + VP8LBitWriter* const bw) { if (!enc->use_palette_) { int i; const uint32_t* const argb = enc->argb_; double bit_cost_before, bit_cost_after; - VP8LHistogram* const histo = (VP8LHistogram*)malloc(sizeof(*histo)); - if (histo == NULL) return 0; - - VP8LHistogramInit(histo, 1); + // Allocate histogram with cache_bits = 1. + VP8LHistogram* const histo = VP8LAllocateHistogram(1); + if (histo == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; for (i = 0; i < width * height; ++i) { const uint32_t c = argb[i]; ++histo->red_[(c >> 16) & 0xff]; @@ -656,7 +723,7 @@ static int EvalAndApplySubtractGreen(VP8LEncoder* const enc, ++histo->blue_[((c >> 0) - green) & 0xff]; } bit_cost_after = VP8LHistogramEstimateBits(histo); - free(histo); + VP8LFreeHistogram(histo); // Check if subtracting green yields low entropy. enc->use_subtract_green_ = (bit_cost_after < bit_cost_before); @@ -666,12 +733,12 @@ static int EvalAndApplySubtractGreen(VP8LEncoder* const enc, VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height); } } - return 1; + return VP8_ENC_OK; } -static int ApplyPredictFilter(const VP8LEncoder* const enc, - int width, int height, int quality, - VP8LBitWriter* const bw) { +static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, + int width, int height, int quality, + VP8LBitWriter* const bw) { const int pred_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, pred_bits); const int transform_height = VP8LSubSampleSize(height, pred_bits); @@ -682,32 +749,32 @@ static int ApplyPredictFilter(const VP8LEncoder* const enc, VP8LWriteBits(bw, 2, PREDICTOR_TRANSFORM); assert(pred_bits >= 2); VP8LWriteBits(bw, 3, pred_bits - 2); - if (!EncodeImageNoHuffman(bw, enc->transform_data_, - transform_width, transform_height, quality)) { - return 0; - } - return 1; + return EncodeImageNoHuffman(bw, enc->transform_data_, + (VP8LHashChain*)&enc->hash_chain_, + (VP8LBackwardRefs*)enc->refs_, // cast const away + transform_width, transform_height, + quality); } -static int ApplyCrossColorFilter(const VP8LEncoder* const enc, - int width, int height, int quality, - VP8LBitWriter* const bw) { +static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, + int width, int height, + int quality, + VP8LBitWriter* const bw) { const int ccolor_transform_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits); const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits); - const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16; - VP8LColorSpaceTransform(width, height, ccolor_transform_bits, step, + VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, enc->argb_, enc->transform_data_); VP8LWriteBits(bw, 1, TRANSFORM_PRESENT); VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM); assert(ccolor_transform_bits >= 2); VP8LWriteBits(bw, 3, ccolor_transform_bits - 2); - if (!EncodeImageNoHuffman(bw, enc->transform_data_, - transform_width, transform_height, quality)) { - return 0; - } - return 1; + return EncodeImageNoHuffman(bw, enc->transform_data_, + (VP8LHashChain*)&enc->hash_chain_, + (VP8LBackwardRefs*)enc->refs_, // cast const away + transform_width, transform_height, + quality); } // ----------------------------------------------------------------------------- @@ -785,11 +852,11 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, const int tile_size = 1 << enc->transform_bits_; const uint64_t image_size = width * height; const uint64_t argb_scratch_size = tile_size * width + width; - const uint64_t transform_data_size = - (uint64_t)VP8LSubSampleSize(width, enc->transform_bits_) * - (uint64_t)VP8LSubSampleSize(height, enc->transform_bits_); + const int transform_data_size = + VP8LSubSampleSize(width, enc->transform_bits_) * + VP8LSubSampleSize(height, enc->transform_bits_); const uint64_t total_size = - image_size + argb_scratch_size + transform_data_size; + image_size + argb_scratch_size + (uint64_t)transform_data_size; uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem)); if (mem == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; @@ -888,7 +955,7 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, if (err != VP8_ENC_OK) goto Error; dst = enc->argb_; - row = (uint8_t*)WebPSafeMalloc((uint64_t)width, sizeof(*row)); + row = (uint8_t*)WebPSafeMalloc(width, sizeof(*row)); if (row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; ApplyPalette(src, dst, pic->argb_stride, enc->current_width_, @@ -902,42 +969,48 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, for (i = palette_size - 1; i >= 1; --i) { palette[i] = VP8LSubPixels(palette[i], palette[i - 1]); } - if (!EncodeImageNoHuffman(bw, palette, palette_size, 1, quality)) { - err = VP8_ENC_ERROR_INVALID_CONFIGURATION; - goto Error; - } + err = EncodeImageNoHuffman(bw, palette, &enc->hash_chain_, enc->refs_, + palette_size, 1, quality); Error: - free(row); + WebPSafeFree(row); return err; } // ----------------------------------------------------------------------------- static int GetHistoBits(int method, int use_palette, int width, int height) { - const uint64_t hist_size = sizeof(VP8LHistogram); + const int hist_size = VP8LGetHistogramSize(MAX_COLOR_CACHE_BITS); // Make tile size a function of encoding method (Range: 0 to 6). int histo_bits = (use_palette ? 9 : 7) - method; while (1) { - const uint64_t huff_image_size = VP8LSubSampleSize(width, histo_bits) * - VP8LSubSampleSize(height, histo_bits) * - hist_size; - if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break; + const int huff_image_size = VP8LSubSampleSize(width, histo_bits) * + VP8LSubSampleSize(height, histo_bits); + if ((uint64_t)huff_image_size * hist_size <= MAX_HUFF_IMAGE_SIZE) break; ++histo_bits; } return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS : (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits; } +static int GetTransformBits(int method, int histo_bits) { + const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5; + return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits; +} + +static int GetCacheBits(float quality) { + return (quality <= 25.f) ? 0 : 7; +} + static void FinishEncParams(VP8LEncoder* const enc) { const WebPConfig* const config = enc->config_; const WebPPicture* const pic = enc->pic_; const int method = config->method; const float quality = config->quality; const int use_palette = enc->use_palette_; - enc->transform_bits_ = (method < 4) ? 5 : (method > 4) ? 3 : 4; enc->histo_bits_ = GetHistoBits(method, use_palette, pic->width, pic->height); - enc->cache_bits_ = (quality <= 25.f) ? 0 : 7; + enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_); + enc->cache_bits_ = GetCacheBits(quality); } // ----------------------------------------------------------------------------- @@ -945,7 +1018,7 @@ static void FinishEncParams(VP8LEncoder* const enc) { static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config, const WebPPicture* const picture) { - VP8LEncoder* const enc = (VP8LEncoder*)calloc(1, sizeof(*enc)); + VP8LEncoder* const enc = (VP8LEncoder*)WebPSafeCalloc(1ULL, sizeof(*enc)); if (enc == NULL) { WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); return NULL; @@ -959,8 +1032,13 @@ static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config, } static void VP8LEncoderDelete(VP8LEncoder* enc) { - free(enc->argb_); - free(enc); + if (enc != NULL) { + VP8LHashChainClear(&enc->hash_chain_); + VP8LBackwardRefsClear(&enc->refs_[0]); + VP8LBackwardRefsClear(&enc->refs_[1]); + WebPSafeFree(enc->argb_); + WebPSafeFree(enc); + } } // ----------------------------------------------------------------------------- @@ -984,7 +1062,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // --------------------------------------------------------------------------- // Analyze image (entropy, num_palettes etc) - if (!VP8LEncAnalyze(enc, config->image_hint)) { + if (!AnalyzeAndInit(enc, config->image_hint)) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } @@ -1003,6 +1081,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, int y; err = AllocateTransformBuffer(enc, width, height); if (err != VP8_ENC_OK) goto Error; + assert(enc->argb_ != NULL); for (y = 0; y < height; ++y) { memcpy(enc->argb_ + y * width, picture->argb + y * picture->argb_stride, @@ -1014,23 +1093,17 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // --------------------------------------------------------------------------- // Apply transforms and write transform data. - if (!EvalAndApplySubtractGreen(enc, enc->current_width_, height, bw)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } + err = EvalAndApplySubtractGreen(enc, enc->current_width_, height, bw); + if (err != VP8_ENC_OK) goto Error; if (enc->use_predict_) { - if (!ApplyPredictFilter(enc, enc->current_width_, height, quality, bw)) { - err = VP8_ENC_ERROR_INVALID_CONFIGURATION; - goto Error; - } + err = ApplyPredictFilter(enc, enc->current_width_, height, quality, bw); + if (err != VP8_ENC_OK) goto Error; } if (enc->use_cross_color_) { - if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality, bw)) { - err = VP8_ENC_ERROR_INVALID_CONFIGURATION; - goto Error; - } + err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality, bw); + if (err != VP8_ENC_OK) goto Error; } VP8LWriteBits(bw, 1, !TRANSFORM_PRESENT); // No more transforms. @@ -1040,8 +1113,9 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, if (enc->cache_bits_ > 0) { if (!VP8LCalculateEstimateForCacheSize(enc->argb_, enc->current_width_, - height, &enc->cache_bits_)) { - err = VP8_ENC_ERROR_INVALID_CONFIGURATION; + height, quality, &enc->hash_chain_, + &enc->refs_[0], &enc->cache_bits_)) { + err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } } @@ -1049,11 +1123,10 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // --------------------------------------------------------------------------- // Encode and write the transformed image. - if (!EncodeImageInternal(bw, enc->argb_, enc->current_width_, height, - quality, enc->cache_bits_, enc->histo_bits_)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } + err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, + enc->current_width_, height, quality, + enc->cache_bits_, enc->histo_bits_); + if (err != VP8_ENC_OK) goto Error; if (picture->stats != NULL) { WebPAuxStats* const stats = picture->stats; @@ -1080,6 +1153,7 @@ int VP8LEncodeImage(const WebPConfig* const config, int has_alpha; size_t coded_size; int percent = 0; + int initial_size; WebPEncodingError err = VP8_ENC_OK; VP8LBitWriter bw; @@ -1093,7 +1167,11 @@ int VP8LEncodeImage(const WebPConfig* const config, width = picture->width; height = picture->height; - if (!VP8LBitWriterInit(&bw, (width * height) >> 1)) { + // Initialize BitWriter with size corresponding to 16 bpp to photo images and + // 8 bpp for graphical images. + initial_size = (config->image_hint == WEBP_HINT_GRAPH) ? + width * height : width * height * 2; + if (!VP8LBitWriterInit(&bw, initial_size)) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } @@ -1165,4 +1243,3 @@ int VP8LEncodeImage(const WebPConfig* const config, } //------------------------------------------------------------------------------ - diff --git a/src/3rdparty/libwebp/src/enc/vp8li.h b/src/3rdparty/libwebp/src/enc/vp8li.h index 96d6fae..6b6db12 100644 --- a/src/3rdparty/libwebp/src/enc/vp8li.h +++ b/src/3rdparty/libwebp/src/enc/vp8li.h @@ -14,6 +14,7 @@ #ifndef WEBP_ENC_VP8LI_H_ #define WEBP_ENC_VP8LI_H_ +#include "./backward_references.h" #include "./histogram.h" #include "../utils/bit_writer.h" #include "../webp/encode.h" @@ -45,6 +46,12 @@ typedef struct { int use_palette_; int palette_size_; uint32_t palette_[MAX_PALETTE_SIZE]; + + // Some 'scratch' (potentially large) objects. + struct VP8LBackwardRefs refs_[2]; // Backward Refs array corresponding to + // LZ77 & RLE coding. + VP8LHashChain hash_chain_; // HashChain data for constructing + // backward references. } VP8LEncoder; //------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/enc/webpenc.c b/src/3rdparty/libwebp/src/enc/webpenc.c index 207cce6..ca85e0b 100644 --- a/src/3rdparty/libwebp/src/enc/webpenc.c +++ b/src/3rdparty/libwebp/src/enc/webpenc.c @@ -18,6 +18,7 @@ #include "./vp8enci.h" #include "./vp8li.h" +#include "./cost.h" #include "../utils/utils.h" // #define PRINT_MEMORY_INFO @@ -33,31 +34,6 @@ int WebPGetEncoderVersion(void) { } //------------------------------------------------------------------------------ -// WebPPicture -//------------------------------------------------------------------------------ - -static int DummyWriter(const uint8_t* data, size_t data_size, - const WebPPicture* const picture) { - // The following are to prevent 'unused variable' error message. - (void)data; - (void)data_size; - (void)picture; - return 1; -} - -int WebPPictureInitInternal(WebPPicture* picture, int version) { - if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) { - return 0; // caller/system version mismatch! - } - if (picture != NULL) { - memset(picture, 0, sizeof(*picture)); - picture->writer = DummyWriter; - WebPEncodingSetError(picture, VP8_ENC_OK); - } - return 1; -} - -//------------------------------------------------------------------------------ // VP8Encoder //------------------------------------------------------------------------------ @@ -143,23 +119,21 @@ static void MapConfigToTools(VP8Encoder* const enc) { // Memory scaling with dimensions: // memory (bytes) ~= 2.25 * w + 0.0625 * w * h // -// Typical memory footprint (768x510 picture) -// Memory used: -// encoder: 33919 -// block cache: 2880 -// info: 3072 -// preds: 24897 -// top samples: 1623 -// non-zero: 196 -// lf-stats: 2048 -// total: 68635 +// Typical memory footprint (614x440 picture) +// encoder: 22111 +// info: 4368 +// preds: 17741 +// top samples: 1263 +// non-zero: 175 +// lf-stats: 0 +// total: 45658 // Transient object sizes: -// VP8EncIterator: 352 -// VP8ModeScore: 912 -// VP8SegmentInfo: 532 -// VP8Proba: 31032 +// VP8EncIterator: 3360 +// VP8ModeScore: 872 +// VP8SegmentInfo: 732 +// VP8Proba: 18352 // LFStats: 2048 -// Picture size (yuv): 589824 +// Picture size (yuv): 419328 static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, WebPPicture* const picture) { @@ -251,13 +225,16 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, ResetSegmentHeader(enc); ResetFilterHeader(enc); ResetBoundaryPredictions(enc); - + VP8GetResidualCostInit(); + VP8SetResidualCoeffsInit(); VP8EncInitAlpha(enc); -#ifdef WEBP_EXPERIMENTAL_FEATURES - VP8EncInitLayer(enc); -#endif - VP8TBufferInit(&enc->tokens_); + // lower quality means smaller output -> we modulate a little the page + // size based on quality. This is just a crude 1rst-order prediction. + { + const float scale = 1.f + config->quality * 5.f / 100.f; // in [1,6] + VP8TBufferInit(&enc->tokens_, (int)(mb_w * mb_h * 4 * scale)); + } return enc; } @@ -265,11 +242,8 @@ static int DeleteVP8Encoder(VP8Encoder* enc) { int ok = 1; if (enc != NULL) { ok = VP8EncDeleteAlpha(enc); -#ifdef WEBP_EXPERIMENTAL_FEATURES - VP8EncDeleteLayer(enc); -#endif VP8TBufferClear(&enc->tokens_); - free(enc); + WebPSafeFree(enc); } return ok; } @@ -352,18 +326,26 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { if (!config->lossless) { VP8Encoder* enc = NULL; - if (pic->y == NULL || pic->u == NULL || pic->v == NULL) { + if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) { // Make sure we have YUVA samples. - float dithering = 0.f; - if (config->preprocessing & 2) { - const float x = config->quality / 100.f; - const float x2 = x * x; - // slowly decreasing from max dithering at low quality (q->0) - // to 0.5 dithering amplitude at high quality (q->100) - dithering = 1.0f + (0.5f - 1.0f) * x2 * x2; - } - if (!WebPPictureARGBToYUVADithered(pic, WEBP_YUV420, dithering)) { - return 0; + if (config->preprocessing & 4) { +#if WEBP_ENCODER_ABI_VERSION > 0x0204 + if (!WebPPictureSmartARGBToYUVA(pic)) { + return 0; + } +#endif + } else { + float dithering = 0.f; + if (config->preprocessing & 2) { + const float x = config->quality / 100.f; + const float x2 = x * x; + // slowly decreasing from max dithering at low quality (q->0) + // to 0.5 dithering amplitude at high quality (q->100) + dithering = 1.0f + (0.5f - 1.0f) * x2 * x2; + } + if (!WebPPictureARGBToYUVADithered(pic, WEBP_YUV420, dithering)) { + return 0; + } } } @@ -380,9 +362,6 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { ok = ok && VP8EncTokenLoop(enc); } ok = ok && VP8EncFinishAlpha(enc); -#ifdef WEBP_EXPERIMENTAL_FEATURES - ok = ok && VP8EncFinishLayer(enc); -#endif ok = ok && VP8EncWrite(enc); StoreStats(enc); @@ -401,4 +380,3 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { return ok; } - |