summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/enc/quant_enc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/libwebp/src/enc/quant_enc.c')
-rw-r--r--src/3rdparty/libwebp/src/enc/quant_enc.c143
1 files changed, 116 insertions, 27 deletions
diff --git a/src/3rdparty/libwebp/src/enc/quant_enc.c b/src/3rdparty/libwebp/src/enc/quant_enc.c
index b118fb2..01eb565 100644
--- a/src/3rdparty/libwebp/src/enc/quant_enc.c
+++ b/src/3rdparty/libwebp/src/enc/quant_enc.c
@@ -15,8 +15,9 @@
#include <math.h>
#include <stdlib.h> // for abs()
-#include "./vp8i_enc.h"
-#include "./cost_enc.h"
+#include "src/dsp/quant.h"
+#include "src/enc/vp8i_enc.h"
+#include "src/enc/cost_enc.h"
#define DO_TRELLIS_I4 1
#define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.
@@ -32,7 +33,7 @@
// number of non-zero coeffs below which we consider the block very flat
// (and apply a penalty to complex predictions)
-#define FLATNESS_LIMIT_I16 10 // I16 mode
+#define FLATNESS_LIMIT_I16 0 // I16 mode (special case)
#define FLATNESS_LIMIT_I4 3 // I4 mode
#define FLATNESS_LIMIT_UV 2 // UV mode
#define FLATNESS_PENALTY 140 // roughly ~1bit per block
@@ -457,11 +458,11 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
// Form the predictions in cache
// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
-const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
-const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
+const uint16_t VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
+const uint16_t VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
-const int VP8I4ModeOffsets[NUM_BMODES] = {
+const uint16_t VP8I4ModeOffsets[NUM_BMODES] = {
I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
};
@@ -492,14 +493,14 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
// |YYYY|....| 12
// +----+----+
-const int VP8Scan[16] = { // Luma
+const uint16_t VP8Scan[16] = { // Luma
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
};
-static const int VP8ScanUV[4 + 4] = {
+static const uint16_t VP8ScanUV[4 + 4] = {
0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U
8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
};
@@ -826,6 +827,85 @@ static int ReconstructIntra4(VP8EncIterator* const it,
return nz;
}
+//------------------------------------------------------------------------------
+// DC-error diffusion
+
+// Diffusion weights. We under-correct a bit (15/16th of the error is actually
+// diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0.
+#define C1 7 // fraction of error sent to the 4x4 block below
+#define C2 8 // fraction of error sent to the 4x4 block on the right
+#define DSHIFT 4
+#define DSCALE 1 // storage descaling, needed to make the error fit int8_t
+
+// Quantize as usual, but also compute and return the quantization error.
+// Error is already divided by DSHIFT.
+static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
+ int V = *v;
+ const int sign = (V < 0);
+ if (sign) V = -V;
+ if (V > (int)mtx->zthresh_[0]) {
+ const int qV = QUANTDIV(V, mtx->iq_[0], mtx->bias_[0]) * mtx->q_[0];
+ const int err = (V - qV);
+ *v = sign ? -qV : qV;
+ return (sign ? -err : err) >> DSCALE;
+ }
+ *v = 0;
+ return (sign ? -V : V) >> DSCALE;
+}
+
+static void CorrectDCValues(const VP8EncIterator* const it,
+ const VP8Matrix* const mtx,
+ int16_t tmp[][16], VP8ModeScore* const rd) {
+ // | top[0] | top[1]
+ // --------+--------+---------
+ // left[0] | tmp[0] tmp[1] <-> err0 err1
+ // left[1] | tmp[2] tmp[3] err2 err3
+ //
+ // Final errors {err1,err2,err3} are preserved and later restored
+ // as top[]/left[] on the next block.
+ int ch;
+ for (ch = 0; ch <= 1; ++ch) {
+ const int8_t* const top = it->top_derr_[it->x_][ch];
+ const int8_t* const left = it->left_derr_[ch];
+ int16_t (* const c)[16] = &tmp[ch * 4];
+ int err0, err1, err2, err3;
+ c[0][0] += (C1 * top[0] + C2 * left[0]) >> (DSHIFT - DSCALE);
+ err0 = QuantizeSingle(&c[0][0], mtx);
+ c[1][0] += (C1 * top[1] + C2 * err0) >> (DSHIFT - DSCALE);
+ err1 = QuantizeSingle(&c[1][0], mtx);
+ c[2][0] += (C1 * err0 + C2 * left[1]) >> (DSHIFT - DSCALE);
+ err2 = QuantizeSingle(&c[2][0], mtx);
+ c[3][0] += (C1 * err1 + C2 * err2) >> (DSHIFT - DSCALE);
+ err3 = QuantizeSingle(&c[3][0], mtx);
+ // error 'err' is bounded by mtx->q_[0] which is 132 at max. Hence
+ // err >> DSCALE will fit in an int8_t type if DSCALE>=1.
+ assert(abs(err1) <= 127 && abs(err2) <= 127 && abs(err3) <= 127);
+ rd->derr[ch][0] = (int8_t)err1;
+ rd->derr[ch][1] = (int8_t)err2;
+ rd->derr[ch][2] = (int8_t)err3;
+ }
+}
+
+static void StoreDiffusionErrors(VP8EncIterator* const it,
+ const VP8ModeScore* const rd) {
+ int ch;
+ for (ch = 0; ch <= 1; ++ch) {
+ int8_t* const top = it->top_derr_[it->x_][ch];
+ int8_t* const left = it->left_derr_[ch];
+ left[0] = rd->derr[ch][0]; // restore err1
+ left[1] = 3 * rd->derr[ch][2] >> 2; // ... 3/4th of err3
+ top[0] = rd->derr[ch][1]; // ... err2
+ top[1] = rd->derr[ch][2] - left[1]; // ... 1/4th of err3.
+ }
+}
+
+#undef C1
+#undef C2
+#undef DSHIFT
+#undef DSCALE
+
+//------------------------------------------------------------------------------
+
static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
uint8_t* const yuv_out, int mode) {
const VP8Encoder* const enc = it->enc_;
@@ -839,6 +919,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
for (n = 0; n < 8; n += 2) {
VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
}
+ if (it->top_derr_ != NULL) CorrectDCValues(it, &dqm->uv_, tmp, rd);
+
if (DO_TRELLIS_UV && it->do_trellis_) {
int ch, x, y;
for (ch = 0, n = 0; ch <= 2; ch += 2) {
@@ -896,19 +978,6 @@ static void SwapOut(VP8EncIterator* const it) {
SwapPtr(&it->yuv_out_, &it->yuv_out2_);
}
-static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
- score_t score = 0;
- while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
- int i;
- for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
- score += (levels[i] != 0);
- if (score > thresh) return 0;
- }
- levels += 16;
- }
- return 1;
-}
-
static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
const int kNumBlocks = 16;
VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
@@ -919,6 +988,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
VP8ModeScore* rd_cur = &rd_tmp;
VP8ModeScore* rd_best = rd;
int mode;
+ int is_flat = IsFlatSource16(it->yuv_in_ + Y_OFF_ENC);
rd->mode_i16 = -1;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
@@ -934,10 +1004,14 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
rd_cur->H = VP8FixedCostsI16[mode];
rd_cur->R = VP8GetCostLuma16(it, rd_cur);
- if (mode > 0 &&
- IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
- // penalty to avoid flat area to be mispredicted by complex mode
- rd_cur->R += FLATNESS_PENALTY * kNumBlocks;
+ if (is_flat) {
+ // refine the first impression (which was in pixel space)
+ is_flat = IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16);
+ if (is_flat) {
+ // Block is very flat. We put emphasis on the distortion being very low!
+ rd_cur->D *= 2;
+ rd_cur->SD *= 2;
+ }
}
// Since we always examine Intra16 first, we can overwrite *rd directly.
@@ -1018,7 +1092,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
: 0;
rd_tmp.H = mode_costs[mode];
- // Add flatness penalty
+ // Add flatness penalty, to avoid flat area to be mispredicted
+ // by a complex mode.
if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
} else {
@@ -1101,6 +1176,9 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
CopyScore(&rd_best, &rd_uv);
rd->mode_uv = mode;
memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
+ if (it->top_derr_ != NULL) {
+ memcpy(rd->derr, rd_uv.derr, sizeof(rd_uv.derr));
+ }
SwapPtr(&dst, &tmp_dst);
}
}
@@ -1109,6 +1187,9 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
if (dst != dst0) { // copy 16x8 block if needed
VP8Copy16x8(dst, dst0);
}
+ if (it->top_derr_ != NULL) { // store diffusion errors for next block
+ StoreDiffusionErrors(it, rd);
+ }
}
//------------------------------------------------------------------------------
@@ -1162,16 +1243,24 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
- const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT
+ const score_t score = (score_t)VP8SSE16x16(src, ref) * RD_DISTO_MULT
+ VP8FixedCostsI16[mode] * lambda_d_i16;
if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) {
continue;
}
+
if (score < best_score) {
best_mode = mode;
best_score = score;
}
}
+ if (it->x_ == 0 || it->y_ == 0) {
+ // avoid starting a checkerboard resonance from the border. See bug #432.
+ if (IsFlatSource16(src)) {
+ best_mode = (it->x_ == 0) ? 0 : 2;
+ try_both_modes = 0; // stick to i16
+ }
+ }
VP8SetIntra16Mode(it, best_mode);
// we'll reconstruct later, if i16 mode actually gets selected
}