diff options
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/enc.c')
-rw-r--r-- | src/3rdparty/libwebp/src/dsp/enc.c | 292 |
1 files changed, 102 insertions, 190 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/enc.c b/src/3rdparty/libwebp/src/dsp/enc.c index f31bc6d..1c807f1 100644 --- a/src/3rdparty/libwebp/src/dsp/enc.c +++ b/src/3rdparty/libwebp/src/dsp/enc.c @@ -14,16 +14,18 @@ #include <assert.h> #include <stdlib.h> // for abs() -#include "./dsp.h" -#include "../enc/vp8i_enc.h" +#include "src/dsp/dsp.h" +#include "src/enc/vp8i_enc.h" static WEBP_INLINE uint8_t clip_8b(int v) { return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; } +#if !WEBP_NEON_OMIT_C_CODE static WEBP_INLINE int clip_max(int v, int max) { return (v > max) ? max : v; } +#endif // !WEBP_NEON_OMIT_C_CODE //------------------------------------------------------------------------------ // Compute susceptibility based on DCT-coeff histograms: @@ -56,9 +58,10 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], histo->last_non_zero = last_non_zero; } -static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, - int start_block, int end_block, - VP8Histogram* const histo) { +#if !WEBP_NEON_OMIT_C_CODE +static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred, + int start_block, int end_block, + VP8Histogram* const histo) { int j; int distribution[MAX_COEFF_THRESH + 1] = { 0 }; for (j = start_block; j < end_block; ++j) { @@ -76,6 +79,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, } VP8SetHistogramData(distribution, histo); } +#endif // !WEBP_NEON_OMIT_C_CODE //------------------------------------------------------------------------------ // run-time tables (~4k) @@ -100,6 +104,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) { //------------------------------------------------------------------------------ // Transforms (Paragraph 14.4) +#if !WEBP_NEON_OMIT_C_CODE + #define STORE(x, y, v) \ dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) @@ -140,15 +146,15 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, } } -static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, - int do_two) { +static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst, + int do_two) { ITransformOne(ref, in, dst); if (do_two) { ITransformOne(ref + 4, in + 16, dst + 4); } } -static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { +static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) { int i; int tmp[16]; for (i = 0; i < 4; ++i, src += BPS, ref += BPS) { @@ -176,13 +182,16 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); } } +#endif // !WEBP_NEON_OMIT_C_CODE -static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) { +static void FTransform2_C(const uint8_t* src, const uint8_t* ref, + int16_t* out) { VP8FTransform(src, ref, out); VP8FTransform(src + 4, ref + 4, out + 16); } -static void FTransformWHT(const int16_t* in, int16_t* out) { +#if !WEBP_NEON_OMIT_C_CODE +static void FTransformWHT_C(const int16_t* in, int16_t* out) { // input is 12b signed int32_t tmp[16]; int i; @@ -211,6 +220,7 @@ static void FTransformWHT(const int16_t* in, int16_t* out) { out[12 + i] = b3 >> 1; } } +#endif // !WEBP_NEON_OMIT_C_CODE #undef MUL #undef STORE @@ -303,8 +313,8 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, //------------------------------------------------------------------------------ // Chroma 8x8 prediction (paragraph 12.2) -static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, - const uint8_t* top) { +static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left, + const uint8_t* top) { // U block DCMode(C8DC8 + dst, left, top, 8, 8, 4); VerticalPred(C8VE8 + dst, top, 8); @@ -323,8 +333,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, //------------------------------------------------------------------------------ // luma 16x16 prediction (paragraph 12.3) -static void Intra16Preds(uint8_t* dst, - const uint8_t* left, const uint8_t* top) { +static void Intra16Preds_C(uint8_t* dst, + const uint8_t* left, const uint8_t* top) { DCMode(I16DC16 + dst, left, top, 16, 16, 5); VerticalPred(I16VE16 + dst, top, 16); HorizontalPred(I16HE16 + dst, left, 16); @@ -507,7 +517,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) { // Left samples are top[-5 .. -2], top_left is top[-1], top are // located at top[0..3], and top right is top[4..7] -static void Intra4Preds(uint8_t* dst, const uint8_t* top) { +static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) { DC4(I4DC4 + dst, top); TM4(I4TM4 + dst, top); VE4(I4VE4 + dst, top); @@ -523,6 +533,7 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) { //------------------------------------------------------------------------------ // Metric +#if !WEBP_NEON_OMIT_C_CODE static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, int w, int h) { int count = 0; @@ -538,20 +549,21 @@ static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, return count; } -static int SSE16x16(const uint8_t* a, const uint8_t* b) { +static int SSE16x16_C(const uint8_t* a, const uint8_t* b) { return GetSSE(a, b, 16, 16); } -static int SSE16x8(const uint8_t* a, const uint8_t* b) { +static int SSE16x8_C(const uint8_t* a, const uint8_t* b) { return GetSSE(a, b, 16, 8); } -static int SSE8x8(const uint8_t* a, const uint8_t* b) { +static int SSE8x8_C(const uint8_t* a, const uint8_t* b) { return GetSSE(a, b, 8, 8); } -static int SSE4x4(const uint8_t* a, const uint8_t* b) { +static int SSE4x4_C(const uint8_t* a, const uint8_t* b) { return GetSSE(a, b, 4, 4); } +#endif // !WEBP_NEON_OMIT_C_CODE -static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) { +static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) { int k, x, y; for (k = 0; k < 4; ++k) { uint32_t avg = 0; @@ -571,6 +583,7 @@ static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) { // We try to match the spectral content (weighted) between source and // reconstructed samples. +#if !WEBP_NEON_OMIT_C_CODE // Hadamard transform // Returns the weighted sum of the absolute value of transformed coefficients. // w[] contains a row-major 4 by 4 symmetric matrix. @@ -608,24 +621,25 @@ static int TTransform(const uint8_t* in, const uint16_t* w) { return sum; } -static int Disto4x4(const uint8_t* const a, const uint8_t* const b, - const uint16_t* const w) { +static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b, + const uint16_t* const w) { const int sum1 = TTransform(a, w); const int sum2 = TTransform(b, w); return abs(sum2 - sum1) >> 5; } -static int Disto16x16(const uint8_t* const a, const uint8_t* const b, - const uint16_t* const w) { +static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b, + const uint16_t* const w) { int D = 0; int x, y; for (y = 0; y < 16 * BPS; y += 4 * BPS) { for (x = 0; x < 16; x += 4) { - D += Disto4x4(a + x + y, b + x + y, w); + D += Disto4x4_C(a + x + y, b + x + y, w); } } return D; } +#endif // !WEBP_NEON_OMIT_C_CODE //------------------------------------------------------------------------------ // Quantization @@ -636,8 +650,8 @@ static const uint8_t kZigzag[16] = { }; // Simple quantization -static int QuantizeBlock(int16_t in[16], int16_t out[16], - const VP8Matrix* const mtx) { +static int QuantizeBlock_C(int16_t in[16], int16_t out[16], + const VP8Matrix* const mtx) { int last = -1; int n; for (n = 0; n < 16; ++n) { @@ -662,13 +676,15 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], return (last >= 0); } -static int Quantize2Blocks(int16_t in[32], int16_t out[32], - const VP8Matrix* const mtx) { +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC +static int Quantize2Blocks_C(int16_t in[32], int16_t out[32], + const VP8Matrix* const mtx) { int nz; nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0; nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1; return nz; } +#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC //------------------------------------------------------------------------------ // Block copy @@ -682,149 +698,15 @@ static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) { } } -static void Copy4x4(const uint8_t* src, uint8_t* dst) { +static void Copy4x4_C(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4, 4); } -static void Copy16x8(const uint8_t* src, uint8_t* dst) { +static void Copy16x8_C(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 16, 8); } //------------------------------------------------------------------------------ -// SSIM / PSNR - -// hat-shaped filter. Sum of coefficients is equal to 16. -static const uint32_t kWeight[2 * VP8_SSIM_KERNEL + 1] = { - 1, 2, 3, 4, 3, 2, 1 -}; -static const uint32_t kWeightSum = 16 * 16; // sum{kWeight}^2 - -static WEBP_INLINE double SSIMCalculation( - const VP8DistoStats* const stats, uint32_t N /*num samples*/) { - const uint32_t w2 = N * N; - const uint32_t C1 = 20 * w2; - const uint32_t C2 = 60 * w2; - const uint32_t C3 = 8 * 8 * w2; // 'dark' limit ~= 6 - const uint64_t xmxm = (uint64_t)stats->xm * stats->xm; - const uint64_t ymym = (uint64_t)stats->ym * stats->ym; - if (xmxm + ymym >= C3) { - const int64_t xmym = (int64_t)stats->xm * stats->ym; - const int64_t sxy = (int64_t)stats->xym * N - xmym; // can be negative - const uint64_t sxx = (uint64_t)stats->xxm * N - xmxm; - const uint64_t syy = (uint64_t)stats->yym * N - ymym; - // we descale by 8 to prevent overflow during the fnum/fden multiply. - const uint64_t num_S = (2 * (uint64_t)(sxy < 0 ? 0 : sxy) + C2) >> 8; - const uint64_t den_S = (sxx + syy + C2) >> 8; - const uint64_t fnum = (2 * xmym + C1) * num_S; - const uint64_t fden = (xmxm + ymym + C1) * den_S; - const double r = (double)fnum / fden; - assert(r >= 0. && r <= 1.0); - return r; - } - return 1.; // area is too dark to contribute meaningfully -} - -double VP8SSIMFromStats(const VP8DistoStats* const stats) { - return SSIMCalculation(stats, kWeightSum); -} - -double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats) { - return SSIMCalculation(stats, stats->w); -} - -static double SSIMGetClipped_C(const uint8_t* src1, int stride1, - const uint8_t* src2, int stride2, - int xo, int yo, int W, int H) { - VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 }; - const int ymin = (yo - VP8_SSIM_KERNEL < 0) ? 0 : yo - VP8_SSIM_KERNEL; - const int ymax = (yo + VP8_SSIM_KERNEL > H - 1) ? H - 1 - : yo + VP8_SSIM_KERNEL; - const int xmin = (xo - VP8_SSIM_KERNEL < 0) ? 0 : xo - VP8_SSIM_KERNEL; - const int xmax = (xo + VP8_SSIM_KERNEL > W - 1) ? W - 1 - : xo + VP8_SSIM_KERNEL; - int x, y; - src1 += ymin * stride1; - src2 += ymin * stride2; - for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) { - for (x = xmin; x <= xmax; ++x) { - const uint32_t w = kWeight[VP8_SSIM_KERNEL + x - xo] - * kWeight[VP8_SSIM_KERNEL + y - yo]; - const uint32_t s1 = src1[x]; - const uint32_t s2 = src2[x]; - stats.w += w; - stats.xm += w * s1; - stats.ym += w * s2; - stats.xxm += w * s1 * s1; - stats.xym += w * s1 * s2; - stats.yym += w * s2 * s2; - } - } - return VP8SSIMFromStatsClipped(&stats); -} - -static double SSIMGet_C(const uint8_t* src1, int stride1, - const uint8_t* src2, int stride2) { - VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 }; - int x, y; - for (y = 0; y <= 2 * VP8_SSIM_KERNEL; ++y, src1 += stride1, src2 += stride2) { - for (x = 0; x <= 2 * VP8_SSIM_KERNEL; ++x) { - const uint32_t w = kWeight[x] * kWeight[y]; - const uint32_t s1 = src1[x]; - const uint32_t s2 = src2[x]; - stats.xm += w * s1; - stats.ym += w * s2; - stats.xxm += w * s1 * s1; - stats.xym += w * s1 * s2; - stats.yym += w * s2 * s2; - } - } - return VP8SSIMFromStats(&stats); -} - -//------------------------------------------------------------------------------ - -static uint32_t AccumulateSSE(const uint8_t* src1, - const uint8_t* src2, int len) { - int i; - uint32_t sse2 = 0; - assert(len <= 65535); // to ensure that accumulation fits within uint32_t - for (i = 0; i < len; ++i) { - const int32_t diff = src1[i] - src2[i]; - sse2 += diff * diff; - } - return sse2; -} - -//------------------------------------------------------------------------------ - -VP8SSIMGetFunc VP8SSIMGet; -VP8SSIMGetClippedFunc VP8SSIMGetClipped; -VP8AccumulateSSEFunc VP8AccumulateSSE; - -extern void VP8SSIMDspInitSSE2(void); - -static volatile VP8CPUInfo ssim_last_cpuinfo_used = - (VP8CPUInfo)&ssim_last_cpuinfo_used; - -WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) { - if (ssim_last_cpuinfo_used == VP8GetCPUInfo) return; - - VP8SSIMGetClipped = SSIMGetClipped_C; - VP8SSIMGet = SSIMGet_C; - - VP8AccumulateSSE = AccumulateSSE; - if (VP8GetCPUInfo != NULL) { -#if defined(WEBP_USE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - VP8SSIMDspInitSSE2(); - } -#endif - } - - ssim_last_cpuinfo_used = VP8GetCPUInfo; -} - -//------------------------------------------------------------------------------ // Initialization // Speed-critical function pointers. We have to initialize them to the default @@ -868,26 +750,32 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { InitTables(); // default C implementations - VP8CollectHistogram = CollectHistogram; - VP8ITransform = ITransform; - VP8FTransform = FTransform; - VP8FTransform2 = FTransform2; - VP8FTransformWHT = FTransformWHT; - VP8EncPredLuma4 = Intra4Preds; - VP8EncPredLuma16 = Intra16Preds; - VP8EncPredChroma8 = IntraChromaPreds; - VP8SSE16x16 = SSE16x16; - VP8SSE8x8 = SSE8x8; - VP8SSE16x8 = SSE16x8; - VP8SSE4x4 = SSE4x4; - VP8TDisto4x4 = Disto4x4; - VP8TDisto16x16 = Disto16x16; - VP8Mean16x4 = Mean16x4; - VP8EncQuantizeBlock = QuantizeBlock; - VP8EncQuantize2Blocks = Quantize2Blocks; - VP8EncQuantizeBlockWHT = QuantizeBlock; - VP8Copy4x4 = Copy4x4; - VP8Copy16x8 = Copy16x8; +#if !WEBP_NEON_OMIT_C_CODE + VP8ITransform = ITransform_C; + VP8FTransform = FTransform_C; + VP8FTransformWHT = FTransformWHT_C; + VP8TDisto4x4 = Disto4x4_C; + VP8TDisto16x16 = Disto16x16_C; + VP8CollectHistogram = CollectHistogram_C; + VP8SSE16x16 = SSE16x16_C; + VP8SSE16x8 = SSE16x8_C; + VP8SSE8x8 = SSE8x8_C; + VP8SSE4x4 = SSE4x4_C; +#endif + +#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC + VP8EncQuantizeBlock = QuantizeBlock_C; + VP8EncQuantize2Blocks = Quantize2Blocks_C; +#endif + + VP8FTransform2 = FTransform2_C; + VP8EncPredLuma4 = Intra4Preds_C; + VP8EncPredLuma16 = Intra16Preds_C; + VP8EncPredChroma8 = IntraChromaPreds_C; + VP8Mean16x4 = Mean16x4_C; + VP8EncQuantizeBlockWHT = QuantizeBlock_C; + VP8Copy4x4 = Copy4x4_C; + VP8Copy16x8 = Copy16x8_C; // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { @@ -906,11 +794,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { VP8EncDspInitAVX2(); } #endif -#if defined(WEBP_USE_NEON) - if (VP8GetCPUInfo(kNEON)) { - VP8EncDspInitNEON(); - } -#endif #if defined(WEBP_USE_MIPS32) if (VP8GetCPUInfo(kMIPS32)) { VP8EncDspInitMIPS32(); @@ -927,5 +810,34 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { } #endif } + +#if defined(WEBP_USE_NEON) + if (WEBP_NEON_OMIT_C_CODE || + (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { + VP8EncDspInitNEON(); + } +#endif + + assert(VP8ITransform != NULL); + assert(VP8FTransform != NULL); + assert(VP8FTransformWHT != NULL); + assert(VP8TDisto4x4 != NULL); + assert(VP8TDisto16x16 != NULL); + assert(VP8CollectHistogram != NULL); + assert(VP8SSE16x16 != NULL); + assert(VP8SSE16x8 != NULL); + assert(VP8SSE8x8 != NULL); + assert(VP8SSE4x4 != NULL); + assert(VP8EncQuantizeBlock != NULL); + assert(VP8EncQuantize2Blocks != NULL); + assert(VP8FTransform2 != NULL); + assert(VP8EncPredLuma4 != NULL); + assert(VP8EncPredLuma16 != NULL); + assert(VP8EncPredChroma8 != NULL); + assert(VP8Mean16x4 != NULL); + assert(VP8EncQuantizeBlockWHT != NULL); + assert(VP8Copy4x4 != NULL); + assert(VP8Copy16x8 != NULL); + enc_last_cpuinfo_used = VP8GetCPUInfo; } |