summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/dsp/enc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/enc.c')
-rw-r--r--src/3rdparty/libwebp/src/dsp/enc.c292
1 files changed, 102 insertions, 190 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/enc.c b/src/3rdparty/libwebp/src/dsp/enc.c
index f31bc6d..1c807f1 100644
--- a/src/3rdparty/libwebp/src/dsp/enc.c
+++ b/src/3rdparty/libwebp/src/dsp/enc.c
@@ -14,16 +14,18 @@
#include <assert.h>
#include <stdlib.h> // for abs()
-#include "./dsp.h"
-#include "../enc/vp8i_enc.h"
+#include "src/dsp/dsp.h"
+#include "src/enc/vp8i_enc.h"
static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
}
+#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE int clip_max(int v, int max) {
return (v > max) ? max : v;
}
+#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
@@ -56,9 +58,10 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
histo->last_non_zero = last_non_zero;
}
-static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
- int start_block, int end_block,
- VP8Histogram* const histo) {
+#if !WEBP_NEON_OMIT_C_CODE
+static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
+ int start_block, int end_block,
+ VP8Histogram* const histo) {
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
for (j = start_block; j < end_block; ++j) {
@@ -76,6 +79,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
}
VP8SetHistogramData(distribution, histo);
}
+#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// run-time tables (~4k)
@@ -100,6 +104,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
+#if !WEBP_NEON_OMIT_C_CODE
+
#define STORE(x, y, v) \
dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
@@ -140,15 +146,15 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
}
}
-static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
- int do_two) {
+static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+ int do_two) {
ITransformOne(ref, in, dst);
if (do_two) {
ITransformOne(ref + 4, in + 16, dst + 4);
}
}
-static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
int i;
int tmp[16];
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
@@ -176,13 +182,16 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
}
}
+#endif // !WEBP_NEON_OMIT_C_CODE
-static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
+ int16_t* out) {
VP8FTransform(src, ref, out);
VP8FTransform(src + 4, ref + 4, out + 16);
}
-static void FTransformWHT(const int16_t* in, int16_t* out) {
+#if !WEBP_NEON_OMIT_C_CODE
+static void FTransformWHT_C(const int16_t* in, int16_t* out) {
// input is 12b signed
int32_t tmp[16];
int i;
@@ -211,6 +220,7 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
out[12 + i] = b3 >> 1;
}
}
+#endif // !WEBP_NEON_OMIT_C_CODE
#undef MUL
#undef STORE
@@ -303,8 +313,8 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
//------------------------------------------------------------------------------
// Chroma 8x8 prediction (paragraph 12.2)
-static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
- const uint8_t* top) {
+static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
+ const uint8_t* top) {
// U block
DCMode(C8DC8 + dst, left, top, 8, 8, 4);
VerticalPred(C8VE8 + dst, top, 8);
@@ -323,8 +333,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
//------------------------------------------------------------------------------
// luma 16x16 prediction (paragraph 12.3)
-static void Intra16Preds(uint8_t* dst,
- const uint8_t* left, const uint8_t* top) {
+static void Intra16Preds_C(uint8_t* dst,
+ const uint8_t* left, const uint8_t* top) {
DCMode(I16DC16 + dst, left, top, 16, 16, 5);
VerticalPred(I16VE16 + dst, top, 16);
HorizontalPred(I16HE16 + dst, left, 16);
@@ -507,7 +517,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
// Left samples are top[-5 .. -2], top_left is top[-1], top are
// located at top[0..3], and top right is top[4..7]
-static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
DC4(I4DC4 + dst, top);
TM4(I4TM4 + dst, top);
VE4(I4VE4 + dst, top);
@@ -523,6 +533,7 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
//------------------------------------------------------------------------------
// Metric
+#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
int w, int h) {
int count = 0;
@@ -538,20 +549,21 @@ static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
return count;
}
-static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
return GetSSE(a, b, 16, 16);
}
-static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
return GetSSE(a, b, 16, 8);
}
-static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
return GetSSE(a, b, 8, 8);
}
-static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
return GetSSE(a, b, 4, 4);
}
+#endif // !WEBP_NEON_OMIT_C_CODE
-static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
+static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
int k, x, y;
for (k = 0; k < 4; ++k) {
uint32_t avg = 0;
@@ -571,6 +583,7 @@ static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
// We try to match the spectral content (weighted) between source and
// reconstructed samples.
+#if !WEBP_NEON_OMIT_C_CODE
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
// w[] contains a row-major 4 by 4 symmetric matrix.
@@ -608,24 +621,25 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
return sum;
}
-static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
- const uint16_t* const w) {
+static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
+ const uint16_t* const w) {
const int sum1 = TTransform(a, w);
const int sum2 = TTransform(b, w);
return abs(sum2 - sum1) >> 5;
}
-static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
- const uint16_t* const w) {
+static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
+ const uint16_t* const w) {
int D = 0;
int x, y;
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
for (x = 0; x < 16; x += 4) {
- D += Disto4x4(a + x + y, b + x + y, w);
+ D += Disto4x4_C(a + x + y, b + x + y, w);
}
}
return D;
}
+#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// Quantization
@@ -636,8 +650,8 @@ static const uint8_t kZigzag[16] = {
};
// Simple quantization
-static int QuantizeBlock(int16_t in[16], int16_t out[16],
- const VP8Matrix* const mtx) {
+static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
+ const VP8Matrix* const mtx) {
int last = -1;
int n;
for (n = 0; n < 16; ++n) {
@@ -662,13 +676,15 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return (last >= 0);
}
-static int Quantize2Blocks(int16_t in[32], int16_t out[32],
- const VP8Matrix* const mtx) {
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
+ const VP8Matrix* const mtx) {
int nz;
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
+#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
//------------------------------------------------------------------------------
// Block copy
@@ -682,149 +698,15 @@ static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
}
}
-static void Copy4x4(const uint8_t* src, uint8_t* dst) {
+static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
Copy(src, dst, 4, 4);
}
-static void Copy16x8(const uint8_t* src, uint8_t* dst) {
+static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
Copy(src, dst, 16, 8);
}
//------------------------------------------------------------------------------
-// SSIM / PSNR
-
-// hat-shaped filter. Sum of coefficients is equal to 16.
-static const uint32_t kWeight[2 * VP8_SSIM_KERNEL + 1] = {
- 1, 2, 3, 4, 3, 2, 1
-};
-static const uint32_t kWeightSum = 16 * 16; // sum{kWeight}^2
-
-static WEBP_INLINE double SSIMCalculation(
- const VP8DistoStats* const stats, uint32_t N /*num samples*/) {
- const uint32_t w2 = N * N;
- const uint32_t C1 = 20 * w2;
- const uint32_t C2 = 60 * w2;
- const uint32_t C3 = 8 * 8 * w2; // 'dark' limit ~= 6
- const uint64_t xmxm = (uint64_t)stats->xm * stats->xm;
- const uint64_t ymym = (uint64_t)stats->ym * stats->ym;
- if (xmxm + ymym >= C3) {
- const int64_t xmym = (int64_t)stats->xm * stats->ym;
- const int64_t sxy = (int64_t)stats->xym * N - xmym; // can be negative
- const uint64_t sxx = (uint64_t)stats->xxm * N - xmxm;
- const uint64_t syy = (uint64_t)stats->yym * N - ymym;
- // we descale by 8 to prevent overflow during the fnum/fden multiply.
- const uint64_t num_S = (2 * (uint64_t)(sxy < 0 ? 0 : sxy) + C2) >> 8;
- const uint64_t den_S = (sxx + syy + C2) >> 8;
- const uint64_t fnum = (2 * xmym + C1) * num_S;
- const uint64_t fden = (xmxm + ymym + C1) * den_S;
- const double r = (double)fnum / fden;
- assert(r >= 0. && r <= 1.0);
- return r;
- }
- return 1.; // area is too dark to contribute meaningfully
-}
-
-double VP8SSIMFromStats(const VP8DistoStats* const stats) {
- return SSIMCalculation(stats, kWeightSum);
-}
-
-double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats) {
- return SSIMCalculation(stats, stats->w);
-}
-
-static double SSIMGetClipped_C(const uint8_t* src1, int stride1,
- const uint8_t* src2, int stride2,
- int xo, int yo, int W, int H) {
- VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 };
- const int ymin = (yo - VP8_SSIM_KERNEL < 0) ? 0 : yo - VP8_SSIM_KERNEL;
- const int ymax = (yo + VP8_SSIM_KERNEL > H - 1) ? H - 1
- : yo + VP8_SSIM_KERNEL;
- const int xmin = (xo - VP8_SSIM_KERNEL < 0) ? 0 : xo - VP8_SSIM_KERNEL;
- const int xmax = (xo + VP8_SSIM_KERNEL > W - 1) ? W - 1
- : xo + VP8_SSIM_KERNEL;
- int x, y;
- src1 += ymin * stride1;
- src2 += ymin * stride2;
- for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
- for (x = xmin; x <= xmax; ++x) {
- const uint32_t w = kWeight[VP8_SSIM_KERNEL + x - xo]
- * kWeight[VP8_SSIM_KERNEL + y - yo];
- const uint32_t s1 = src1[x];
- const uint32_t s2 = src2[x];
- stats.w += w;
- stats.xm += w * s1;
- stats.ym += w * s2;
- stats.xxm += w * s1 * s1;
- stats.xym += w * s1 * s2;
- stats.yym += w * s2 * s2;
- }
- }
- return VP8SSIMFromStatsClipped(&stats);
-}
-
-static double SSIMGet_C(const uint8_t* src1, int stride1,
- const uint8_t* src2, int stride2) {
- VP8DistoStats stats = { 0, 0, 0, 0, 0, 0 };
- int x, y;
- for (y = 0; y <= 2 * VP8_SSIM_KERNEL; ++y, src1 += stride1, src2 += stride2) {
- for (x = 0; x <= 2 * VP8_SSIM_KERNEL; ++x) {
- const uint32_t w = kWeight[x] * kWeight[y];
- const uint32_t s1 = src1[x];
- const uint32_t s2 = src2[x];
- stats.xm += w * s1;
- stats.ym += w * s2;
- stats.xxm += w * s1 * s1;
- stats.xym += w * s1 * s2;
- stats.yym += w * s2 * s2;
- }
- }
- return VP8SSIMFromStats(&stats);
-}
-
-//------------------------------------------------------------------------------
-
-static uint32_t AccumulateSSE(const uint8_t* src1,
- const uint8_t* src2, int len) {
- int i;
- uint32_t sse2 = 0;
- assert(len <= 65535); // to ensure that accumulation fits within uint32_t
- for (i = 0; i < len; ++i) {
- const int32_t diff = src1[i] - src2[i];
- sse2 += diff * diff;
- }
- return sse2;
-}
-
-//------------------------------------------------------------------------------
-
-VP8SSIMGetFunc VP8SSIMGet;
-VP8SSIMGetClippedFunc VP8SSIMGetClipped;
-VP8AccumulateSSEFunc VP8AccumulateSSE;
-
-extern void VP8SSIMDspInitSSE2(void);
-
-static volatile VP8CPUInfo ssim_last_cpuinfo_used =
- (VP8CPUInfo)&ssim_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) {
- if (ssim_last_cpuinfo_used == VP8GetCPUInfo) return;
-
- VP8SSIMGetClipped = SSIMGetClipped_C;
- VP8SSIMGet = SSIMGet_C;
-
- VP8AccumulateSSE = AccumulateSSE;
- if (VP8GetCPUInfo != NULL) {
-#if defined(WEBP_USE_SSE2)
- if (VP8GetCPUInfo(kSSE2)) {
- VP8SSIMDspInitSSE2();
- }
-#endif
- }
-
- ssim_last_cpuinfo_used = VP8GetCPUInfo;
-}
-
-//------------------------------------------------------------------------------
// Initialization
// Speed-critical function pointers. We have to initialize them to the default
@@ -868,26 +750,32 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
InitTables();
// default C implementations
- VP8CollectHistogram = CollectHistogram;
- VP8ITransform = ITransform;
- VP8FTransform = FTransform;
- VP8FTransform2 = FTransform2;
- VP8FTransformWHT = FTransformWHT;
- VP8EncPredLuma4 = Intra4Preds;
- VP8EncPredLuma16 = Intra16Preds;
- VP8EncPredChroma8 = IntraChromaPreds;
- VP8SSE16x16 = SSE16x16;
- VP8SSE8x8 = SSE8x8;
- VP8SSE16x8 = SSE16x8;
- VP8SSE4x4 = SSE4x4;
- VP8TDisto4x4 = Disto4x4;
- VP8TDisto16x16 = Disto16x16;
- VP8Mean16x4 = Mean16x4;
- VP8EncQuantizeBlock = QuantizeBlock;
- VP8EncQuantize2Blocks = Quantize2Blocks;
- VP8EncQuantizeBlockWHT = QuantizeBlock;
- VP8Copy4x4 = Copy4x4;
- VP8Copy16x8 = Copy16x8;
+#if !WEBP_NEON_OMIT_C_CODE
+ VP8ITransform = ITransform_C;
+ VP8FTransform = FTransform_C;
+ VP8FTransformWHT = FTransformWHT_C;
+ VP8TDisto4x4 = Disto4x4_C;
+ VP8TDisto16x16 = Disto16x16_C;
+ VP8CollectHistogram = CollectHistogram_C;
+ VP8SSE16x16 = SSE16x16_C;
+ VP8SSE16x8 = SSE16x8_C;
+ VP8SSE8x8 = SSE8x8_C;
+ VP8SSE4x4 = SSE4x4_C;
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+ VP8EncQuantizeBlock = QuantizeBlock_C;
+ VP8EncQuantize2Blocks = Quantize2Blocks_C;
+#endif
+
+ VP8FTransform2 = FTransform2_C;
+ VP8EncPredLuma4 = Intra4Preds_C;
+ VP8EncPredLuma16 = Intra16Preds_C;
+ VP8EncPredChroma8 = IntraChromaPreds_C;
+ VP8Mean16x4 = Mean16x4_C;
+ VP8EncQuantizeBlockWHT = QuantizeBlock_C;
+ VP8Copy4x4 = Copy4x4_C;
+ VP8Copy16x8 = Copy16x8_C;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
@@ -906,11 +794,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
VP8EncDspInitAVX2();
}
#endif
-#if defined(WEBP_USE_NEON)
- if (VP8GetCPUInfo(kNEON)) {
- VP8EncDspInitNEON();
- }
-#endif
#if defined(WEBP_USE_MIPS32)
if (VP8GetCPUInfo(kMIPS32)) {
VP8EncDspInitMIPS32();
@@ -927,5 +810,34 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
}
#endif
}
+
+#if defined(WEBP_USE_NEON)
+ if (WEBP_NEON_OMIT_C_CODE ||
+ (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+ VP8EncDspInitNEON();
+ }
+#endif
+
+ assert(VP8ITransform != NULL);
+ assert(VP8FTransform != NULL);
+ assert(VP8FTransformWHT != NULL);
+ assert(VP8TDisto4x4 != NULL);
+ assert(VP8TDisto16x16 != NULL);
+ assert(VP8CollectHistogram != NULL);
+ assert(VP8SSE16x16 != NULL);
+ assert(VP8SSE16x8 != NULL);
+ assert(VP8SSE8x8 != NULL);
+ assert(VP8SSE4x4 != NULL);
+ assert(VP8EncQuantizeBlock != NULL);
+ assert(VP8EncQuantize2Blocks != NULL);
+ assert(VP8FTransform2 != NULL);
+ assert(VP8EncPredLuma4 != NULL);
+ assert(VP8EncPredLuma16 != NULL);
+ assert(VP8EncPredChroma8 != NULL);
+ assert(VP8Mean16x4 != NULL);
+ assert(VP8EncQuantizeBlockWHT != NULL);
+ assert(VP8Copy4x4 != NULL);
+ assert(VP8Copy16x8 != NULL);
+
enc_last_cpuinfo_used = VP8GetCPUInfo;
}