summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/enc/quant.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/libwebp/src/enc/quant.c')
-rw-r--r--src/3rdparty/libwebp/src/enc/quant.c277
1 files changed, 180 insertions, 97 deletions
diff --git a/src/3rdparty/libwebp/src/enc/quant.c b/src/3rdparty/libwebp/src/enc/quant.c
index 9130a41..dd6885a 100644
--- a/src/3rdparty/libwebp/src/enc/quant.c
+++ b/src/3rdparty/libwebp/src/enc/quant.c
@@ -30,7 +30,7 @@
#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
// power-law modulation. Must be strictly less than 1.
-#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
+#define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision
// number of non-zero coeffs below which we consider the block very flat
// (and apply a penalty to complex predictions)
@@ -41,6 +41,8 @@
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
+#define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda)
+
// #define DEBUG_BLOCK
//------------------------------------------------------------------------------
@@ -54,15 +56,37 @@ static void PrintBlockInfo(const VP8EncIterator* const it,
const VP8ModeScore* const rd) {
int i, j;
const int is_i16 = (it->mb_->type_ == 1);
+ const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;
+ const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;
+ const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;
+ const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;
printf("SOURCE / OUTPUT / ABS DELTA\n");
- for (j = 0; j < 24; ++j) {
- if (j == 16) printf("\n"); // newline before the U/V block
- for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
+ for (j = 0; j < 16; ++j) {
+ for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);
printf(" ");
- for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);
+ for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]);
printf(" ");
for (i = 0; i < 16; ++i) {
- printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));
+ printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS]));
+ }
+ printf("\n");
+ }
+ printf("\n"); // newline before the U/V block
+ for (j = 0; j < 8; ++j) {
+ for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]);
+ printf(" ");
+ for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]);
+ printf(" ");
+ for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]);
+ printf(" ");
+ for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]);
+ printf(" ");
+ for (i = 0; i < 8; ++i) {
+ printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
+ }
+ printf(" ");
+ for (i = 8; i < 16; ++i) {
+ printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
}
printf("\n");
}
@@ -444,15 +468,12 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
// Quantize
// Layout:
-// +----+
-// |YYYY| 0
-// |YYYY| 4
-// |YYYY| 8
-// |YYYY| 12
-// +----+
-// |UUVV| 16
-// |UUVV| 20
-// +----+
+// +----+----+
+// |YYYY|UUVV| 0
+// |YYYY|UUVV| 4
+// |YYYY|....| 8
+// |YYYY|....| 12
+// +----+----+
const int VP8Scan[16] = { // Luma
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
@@ -538,13 +559,12 @@ typedef struct {
#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
- // TODO: incorporate the "* 256" in the tables?
- rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
+ rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD);
}
static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
score_t distortion) {
- return rate * lambda + 256 * distortion;
+ return rate * lambda + RD_DISTO_MULT * distortion;
}
static int TrellisQuantizeBlock(const VP8Encoder* const enc,
@@ -553,7 +573,8 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
const VP8Matrix* const mtx,
int lambda) {
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
- const CostArray* const costs = enc->proba_.level_cost_[coeff_type];
+ CostArrayPtr const costs =
+ (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
const int first = (coeff_type == 0) ? 1 : 0;
Node nodes[16][NUM_NODES];
ScoreState score_states[2][NUM_NODES];
@@ -590,7 +611,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
- ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];
+ ss_cur[m].costs = costs[first][ctx0];
}
}
@@ -624,7 +645,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
int best_prev = 0; // default, in case
ss_cur[m].score = MAX_COST;
- ss_cur[m].costs = costs[band][ctx];
+ ss_cur[m].costs = costs[n + 1][ctx];
if (level > MAX_LEVEL || level < 0) { // node is dead?
continue;
}
@@ -719,14 +740,14 @@ static int ReconstructIntra16(VP8EncIterator* const it,
int mode) {
const VP8Encoder* const enc = it->enc_;
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
- const uint8_t* const src = it->yuv_in_ + Y_OFF;
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
int nz = 0;
int n;
int16_t tmp[16][16], dc_tmp[16];
- for (n = 0; n < 16; ++n) {
- VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
+ for (n = 0; n < 16; n += 2) {
+ VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
}
VP8FTransformWHT(tmp[0], dc_tmp);
nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
@@ -746,12 +767,13 @@ static int ReconstructIntra16(VP8EncIterator* const it,
}
}
} else {
- for (n = 0; n < 16; ++n) {
+ for (n = 0; n < 16; n += 2) {
// Zero-out the first coeff, so that: a) nz is correct below, and
// b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
- tmp[n][0] = 0;
- nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
- assert(rd->y_ac_levels[n][0] == 0);
+ tmp[n][0] = tmp[n + 1][0] = 0;
+ nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
+ assert(rd->y_ac_levels[n + 0][0] == 0);
+ assert(rd->y_ac_levels[n + 1][0] == 0);
}
}
@@ -792,14 +814,14 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
uint8_t* const yuv_out, int mode) {
const VP8Encoder* const enc = it->enc_;
const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
- const uint8_t* const src = it->yuv_in_ + U_OFF;
+ const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
int nz = 0;
int n;
int16_t tmp[8][16];
- for (n = 0; n < 8; ++n) {
- VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
+ for (n = 0; n < 8; n += 2) {
+ VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
}
if (DO_TRELLIS_UV && it->do_trellis_) {
int ch, x, y;
@@ -816,8 +838,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
}
}
} else {
- for (n = 0; n < 8; ++n) {
- nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
+ for (n = 0; n < 8; n += 2) {
+ nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
}
}
@@ -842,6 +864,12 @@ static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
}
+static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
+ VP8ModeScore* const tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
static void SwapPtr(uint8_t** a, uint8_t** b) {
uint8_t* const tmp = *a;
*a = *b;
@@ -865,46 +893,47 @@ static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
return 1;
}
-static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
+static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
const int kNumBlocks = 16;
VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_i16_;
const int tlambda = dqm->tlambda_;
- const uint8_t* const src = it->yuv_in_ + Y_OFF;
- VP8ModeScore rd16;
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
+ VP8ModeScore rd_tmp;
+ VP8ModeScore* rd_cur = &rd_tmp;
+ VP8ModeScore* rd_best = rd;
int mode;
rd->mode_i16 = -1;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
- uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
- int nz;
+ uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer
+ rd_cur->mode_i16 = mode;
// Reconstruct
- nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
+ rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);
// Measure RD-score
- rd16.D = VP8SSE16x16(src, tmp_dst);
- rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
- : 0;
- rd16.H = VP8FixedCostsI16[mode];
- rd16.R = VP8GetCostLuma16(it, &rd16);
+ rd_cur->D = VP8SSE16x16(src, tmp_dst);
+ rd_cur->SD =
+ tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
+ rd_cur->H = VP8FixedCostsI16[mode];
+ rd_cur->R = VP8GetCostLuma16(it, rd_cur);
if (mode > 0 &&
- IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
+ IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
// penalty to avoid flat area to be mispredicted by complex mode
- rd16.R += FLATNESS_PENALTY * kNumBlocks;
+ rd_cur->R += FLATNESS_PENALTY * kNumBlocks;
}
// Since we always examine Intra16 first, we can overwrite *rd directly.
- SetRDScore(lambda, &rd16);
- if (mode == 0 || rd16.score < rd->score) {
- CopyScore(rd, &rd16);
- rd->mode_i16 = mode;
- rd->nz = nz;
- memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
- memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
+ SetRDScore(lambda, rd_cur);
+ if (mode == 0 || rd_cur->score < rd_best->score) {
+ SwapModeScore(&rd_cur, &rd_best);
SwapOut(it);
}
}
+ if (rd_best != rd) {
+ memcpy(rd, rd_best, sizeof(*rd));
+ }
SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
VP8SetIntra16Mode(it, rd->mode_i16);
@@ -933,8 +962,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_i4_;
const int tlambda = dqm->tlambda_;
- const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
- uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
+ const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;
+ uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;
int total_header_bits = 0;
VP8ModeScore rd_best;
@@ -972,17 +1001,28 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
: 0;
rd_tmp.H = mode_costs[mode];
- rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
+
+ // Add flatness penalty
if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
- rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;
+ rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
+ } else {
+ rd_tmp.R = 0;
}
+ // early-out check
SetRDScore(lambda, &rd_tmp);
+ if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;
+
+ // finish computing score
+ rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);
+ SetRDScore(lambda, &rd_tmp);
+
if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
CopyScore(&rd_i4, &rd_tmp);
best_mode = mode;
SwapPtr(&tmp_dst, &best_block);
- memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
+ memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,
+ sizeof(rd_best.y_ac_levels[it->i4_]));
}
}
SetRDScore(dqm->lambda_mode_, &rd_i4);
@@ -1016,9 +1056,10 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
const int kNumBlocks = 8;
const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
const int lambda = dqm->lambda_uv_;
- const uint8_t* const src = it->yuv_in_ + U_OFF;
- uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer
- uint8_t* const dst0 = it->yuv_out_ + U_OFF;
+ const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
+ uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer
+ uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;
+ uint8_t* dst = dst0;
VP8ModeScore rd_best;
int mode;
@@ -1032,7 +1073,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
// Compute RD-score
rd_uv.D = VP8SSE16x8(src, tmp_dst);
- rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.
+ rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas.
rd_uv.H = VP8FixedCostsUV[mode];
rd_uv.R = VP8GetCostUV(it, &rd_uv);
if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
@@ -1044,11 +1085,14 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
CopyScore(&rd_best, &rd_uv);
rd->mode_uv = mode;
memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
- memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?
+ SwapPtr(&dst, &tmp_dst);
}
}
VP8SetIntraUVMode(it, rd->mode_uv);
AddScore(rd, &rd_best);
+ if (dst != dst0) { // copy 16x8 block if needed
+ VP8Copy16x8(dst, dst0);
+ }
}
//------------------------------------------------------------------------------
@@ -1060,35 +1104,41 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
int nz = 0;
if (is_i16) {
- nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
+ nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
} else {
VP8IteratorStartI4(it);
do {
const int mode =
it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
- const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
- uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
+ uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
VP8MakeIntra4Preds(it);
nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
src, dst, mode) << it->i4_;
- } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
+ } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
}
- nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
+ nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
rd->nz = nz;
}
// Refine intra16/intra4 sub-modes based on distortion only (not rate).
-static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
- const int is_i16 = (it->mb_->type_ == 1);
+static void RefineUsingDistortion(VP8EncIterator* const it,
+ int try_both_modes, int refine_uv_mode,
+ VP8ModeScore* const rd) {
score_t best_score = MAX_COST;
+ score_t score_i4 = (score_t)I4_PENALTY;
+ int16_t tmp_levels[16][16];
+ uint8_t modes_i4[16];
+ int nz = 0;
+ int mode;
+ int is_i16 = try_both_modes || (it->mb_->type_ == 1);
- if (try_both_i4_i16 || is_i16) {
- int mode;
+ if (is_i16) { // First, evaluate Intra16 distortion
int best_mode = -1;
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
- const uint8_t* const src = it->yuv_in_ + Y_OFF;
const score_t score = VP8SSE16x16(src, ref);
if (score < best_score) {
best_mode = mode;
@@ -1096,39 +1146,72 @@ static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
}
}
VP8SetIntra16Mode(it, best_mode);
+ // we'll reconstruct later, if i16 mode actually gets selected
}
- if (try_both_i4_i16 || !is_i16) {
- uint8_t modes_i4[16];
+
+ // Next, evaluate Intra4
+ if (try_both_modes || !is_i16) {
// We don't evaluate the rate here, but just account for it through a
// constant penalty (i4 mode usually needs more bits compared to i16).
- score_t score_i4 = (score_t)I4_PENALTY;
-
+ is_i16 = 0;
VP8IteratorStartI4(it);
do {
- int mode;
- int best_sub_mode = -1;
- score_t best_sub_score = MAX_COST;
- const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
+ int best_i4_mode = -1;
+ score_t best_i4_score = MAX_COST;
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
- // TODO(skal): we don't really need the prediction pixels here,
- // but just the distortion against 'src'.
VP8MakeIntra4Preds(it);
for (mode = 0; mode < NUM_BMODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
const score_t score = VP8SSE4x4(src, ref);
- if (score < best_sub_score) {
- best_sub_mode = mode;
- best_sub_score = score;
+ if (score < best_i4_score) {
+ best_i4_mode = mode;
+ best_i4_score = score;
}
}
- modes_i4[it->i4_] = best_sub_mode;
- score_i4 += best_sub_score;
- if (score_i4 >= best_score) break;
- } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
- if (score_i4 < best_score) {
- VP8SetIntra4Mode(it, modes_i4);
+ modes_i4[it->i4_] = best_i4_mode;
+ score_i4 += best_i4_score;
+ if (score_i4 >= best_score) {
+ // Intra4 won't be better than Intra16. Bail out and pick Intra16.
+ is_i16 = 1;
+ break;
+ } else { // reconstruct partial block inside yuv_out2_ buffer
+ uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];
+ nz |= ReconstructIntra4(it, tmp_levels[it->i4_],
+ src, tmp_dst, best_i4_mode) << it->i4_;
+ }
+ } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));
+ }
+
+ // Final reconstruction, depending on which mode is selected.
+ if (!is_i16) {
+ VP8SetIntra4Mode(it, modes_i4);
+ memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels));
+ SwapOut(it);
+ best_score = score_i4;
+ } else {
+ nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
+ }
+
+ // ... and UV!
+ if (refine_uv_mode) {
+ int best_mode = -1;
+ score_t best_uv_score = MAX_COST;
+ const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
+ for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+ const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
+ const score_t score = VP8SSE16x8(src, ref);
+ if (score < best_uv_score) {
+ best_mode = mode;
+ best_uv_score = score;
+ }
}
+ VP8SetIntraUVMode(it, best_mode);
}
+ nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
+
+ rd->nz = nz;
+ rd->score = best_score;
}
//------------------------------------------------------------------------------
@@ -1158,13 +1241,13 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
SimpleQuantize(it, rd);
}
} else {
- // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
- // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
- DistoRefine(it, (method >= 2));
- SimpleQuantize(it, rd);
+ // At this point we have heuristically decided intra16 / intra4.
+ // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower).
+ // For method <= 1, we don't re-examine the decision but just go ahead with
+ // quantization/reconstruction.
+ RefineUsingDistortion(it, (method >= 2), (method >= 1), rd);
}
is_skipped = (rd->nz == 0);
VP8SetSkip(it, is_skipped);
return is_skipped;
}
-