diff options
Diffstat (limited to 'src/3rdparty/libwebp/src/enc/quant_enc.c')
-rw-r--r-- | src/3rdparty/libwebp/src/enc/quant_enc.c | 143 |
1 files changed, 116 insertions, 27 deletions
diff --git a/src/3rdparty/libwebp/src/enc/quant_enc.c b/src/3rdparty/libwebp/src/enc/quant_enc.c index b118fb2..01eb565 100644 --- a/src/3rdparty/libwebp/src/enc/quant_enc.c +++ b/src/3rdparty/libwebp/src/enc/quant_enc.c @@ -15,8 +15,9 @@ #include <math.h> #include <stdlib.h> // for abs() -#include "./vp8i_enc.h" -#include "./cost_enc.h" +#include "src/dsp/quant.h" +#include "src/enc/vp8i_enc.h" +#include "src/enc/cost_enc.h" #define DO_TRELLIS_I4 1 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate. @@ -32,7 +33,7 @@ // number of non-zero coeffs below which we consider the block very flat // (and apply a penalty to complex predictions) -#define FLATNESS_LIMIT_I16 10 // I16 mode +#define FLATNESS_LIMIT_I16 0 // I16 mode (special case) #define FLATNESS_LIMIT_I4 3 // I4 mode #define FLATNESS_LIMIT_UV 2 // UV mode #define FLATNESS_PENALTY 140 // roughly ~1bit per block @@ -457,11 +458,11 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) { // Form the predictions in cache // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index -const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 }; -const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 }; +const uint16_t VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 }; +const uint16_t VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 }; // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index -const int VP8I4ModeOffsets[NUM_BMODES] = { +const uint16_t VP8I4ModeOffsets[NUM_BMODES] = { I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4 }; @@ -492,14 +493,14 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) { // |YYYY|....| 12 // +----+----+ -const int VP8Scan[16] = { // Luma +const uint16_t VP8Scan[16] = { // Luma 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, }; -static const int VP8ScanUV[4 + 4] = { +static const uint16_t VP8ScanUV[4 + 4] = { 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V }; @@ -826,6 +827,85 @@ static int ReconstructIntra4(VP8EncIterator* const it, return nz; } +//------------------------------------------------------------------------------ +// DC-error diffusion + +// Diffusion weights. We under-correct a bit (15/16th of the error is actually +// diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0. +#define C1 7 // fraction of error sent to the 4x4 block below +#define C2 8 // fraction of error sent to the 4x4 block on the right +#define DSHIFT 4 +#define DSCALE 1 // storage descaling, needed to make the error fit int8_t + +// Quantize as usual, but also compute and return the quantization error. +// Error is already divided by DSHIFT. +static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) { + int V = *v; + const int sign = (V < 0); + if (sign) V = -V; + if (V > (int)mtx->zthresh_[0]) { + const int qV = QUANTDIV(V, mtx->iq_[0], mtx->bias_[0]) * mtx->q_[0]; + const int err = (V - qV); + *v = sign ? -qV : qV; + return (sign ? -err : err) >> DSCALE; + } + *v = 0; + return (sign ? -V : V) >> DSCALE; +} + +static void CorrectDCValues(const VP8EncIterator* const it, + const VP8Matrix* const mtx, + int16_t tmp[][16], VP8ModeScore* const rd) { + // | top[0] | top[1] + // --------+--------+--------- + // left[0] | tmp[0] tmp[1] <-> err0 err1 + // left[1] | tmp[2] tmp[3] err2 err3 + // + // Final errors {err1,err2,err3} are preserved and later restored + // as top[]/left[] on the next block. + int ch; + for (ch = 0; ch <= 1; ++ch) { + const int8_t* const top = it->top_derr_[it->x_][ch]; + const int8_t* const left = it->left_derr_[ch]; + int16_t (* const c)[16] = &tmp[ch * 4]; + int err0, err1, err2, err3; + c[0][0] += (C1 * top[0] + C2 * left[0]) >> (DSHIFT - DSCALE); + err0 = QuantizeSingle(&c[0][0], mtx); + c[1][0] += (C1 * top[1] + C2 * err0) >> (DSHIFT - DSCALE); + err1 = QuantizeSingle(&c[1][0], mtx); + c[2][0] += (C1 * err0 + C2 * left[1]) >> (DSHIFT - DSCALE); + err2 = QuantizeSingle(&c[2][0], mtx); + c[3][0] += (C1 * err1 + C2 * err2) >> (DSHIFT - DSCALE); + err3 = QuantizeSingle(&c[3][0], mtx); + // error 'err' is bounded by mtx->q_[0] which is 132 at max. Hence + // err >> DSCALE will fit in an int8_t type if DSCALE>=1. + assert(abs(err1) <= 127 && abs(err2) <= 127 && abs(err3) <= 127); + rd->derr[ch][0] = (int8_t)err1; + rd->derr[ch][1] = (int8_t)err2; + rd->derr[ch][2] = (int8_t)err3; + } +} + +static void StoreDiffusionErrors(VP8EncIterator* const it, + const VP8ModeScore* const rd) { + int ch; + for (ch = 0; ch <= 1; ++ch) { + int8_t* const top = it->top_derr_[it->x_][ch]; + int8_t* const left = it->left_derr_[ch]; + left[0] = rd->derr[ch][0]; // restore err1 + left[1] = 3 * rd->derr[ch][2] >> 2; // ... 3/4th of err3 + top[0] = rd->derr[ch][1]; // ... err2 + top[1] = rd->derr[ch][2] - left[1]; // ... 1/4th of err3. + } +} + +#undef C1 +#undef C2 +#undef DSHIFT +#undef DSCALE + +//------------------------------------------------------------------------------ + static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, uint8_t* const yuv_out, int mode) { const VP8Encoder* const enc = it->enc_; @@ -839,6 +919,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, for (n = 0; n < 8; n += 2) { VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); } + if (it->top_derr_ != NULL) CorrectDCValues(it, &dqm->uv_, tmp, rd); + if (DO_TRELLIS_UV && it->do_trellis_) { int ch, x, y; for (ch = 0, n = 0; ch <= 2; ch += 2) { @@ -896,19 +978,6 @@ static void SwapOut(VP8EncIterator* const it) { SwapPtr(&it->yuv_out_, &it->yuv_out2_); } -static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) { - score_t score = 0; - while (num_blocks-- > 0) { // TODO(skal): refine positional scoring? - int i; - for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC - score += (levels[i] != 0); - if (score > thresh) return 0; - } - levels += 16; - } - return 1; -} - static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { const int kNumBlocks = 16; VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; @@ -919,6 +988,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { VP8ModeScore* rd_cur = &rd_tmp; VP8ModeScore* rd_best = rd; int mode; + int is_flat = IsFlatSource16(it->yuv_in_ + Y_OFF_ENC); rd->mode_i16 = -1; for (mode = 0; mode < NUM_PRED_MODES; ++mode) { @@ -934,10 +1004,14 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0; rd_cur->H = VP8FixedCostsI16[mode]; rd_cur->R = VP8GetCostLuma16(it, rd_cur); - if (mode > 0 && - IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { - // penalty to avoid flat area to be mispredicted by complex mode - rd_cur->R += FLATNESS_PENALTY * kNumBlocks; + if (is_flat) { + // refine the first impression (which was in pixel space) + is_flat = IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16); + if (is_flat) { + // Block is very flat. We put emphasis on the distortion being very low! + rd_cur->D *= 2; + rd_cur->SD *= 2; + } } // Since we always examine Intra16 first, we can overwrite *rd directly. @@ -1018,7 +1092,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { : 0; rd_tmp.H = mode_costs[mode]; - // Add flatness penalty + // Add flatness penalty, to avoid flat area to be mispredicted + // by a complex mode. if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) { rd_tmp.R = FLATNESS_PENALTY * kNumBlocks; } else { @@ -1101,6 +1176,9 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { CopyScore(&rd_best, &rd_uv); rd->mode_uv = mode; memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); + if (it->top_derr_ != NULL) { + memcpy(rd->derr, rd_uv.derr, sizeof(rd_uv.derr)); + } SwapPtr(&dst, &tmp_dst); } } @@ -1109,6 +1187,9 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { if (dst != dst0) { // copy 16x8 block if needed VP8Copy16x8(dst, dst0); } + if (it->top_derr_ != NULL) { // store diffusion errors for next block + StoreDiffusionErrors(it, rd); + } } //------------------------------------------------------------------------------ @@ -1162,16 +1243,24 @@ static void RefineUsingDistortion(VP8EncIterator* const it, const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; for (mode = 0; mode < NUM_PRED_MODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; - const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT + const score_t score = (score_t)VP8SSE16x16(src, ref) * RD_DISTO_MULT + VP8FixedCostsI16[mode] * lambda_d_i16; if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) { continue; } + if (score < best_score) { best_mode = mode; best_score = score; } } + if (it->x_ == 0 || it->y_ == 0) { + // avoid starting a checkerboard resonance from the border. See bug #432. + if (IsFlatSource16(src)) { + best_mode = (it->x_ == 0) ? 0 : 2; + try_both_modes = 0; // stick to i16 + } + } VP8SetIntra16Mode(it, best_mode); // we'll reconstruct later, if i16 mode actually gets selected } |