diff options
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/dsp.h')
-rw-r--r-- | src/3rdparty/libwebp/src/dsp/dsp.h | 237 |
1 files changed, 221 insertions, 16 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/dsp.h b/src/3rdparty/libwebp/src/dsp/dsp.h index a2c3951..95f1ce0 100644 --- a/src/3rdparty/libwebp/src/dsp/dsp.h +++ b/src/3rdparty/libwebp/src/dsp/dsp.h @@ -14,16 +14,15 @@ #ifndef WEBP_DSP_DSP_H_ #define WEBP_DSP_DSP_H_ -#ifdef HAVE_CONFIG_H -#include "../webp/config.h" -#endif - #include "../webp/types.h" +#include "../utils/utils.h" #ifdef __cplusplus extern "C" { #endif +#define BPS 32 // this is the common stride for enc/dec + //------------------------------------------------------------------------------ // CPU detection @@ -45,6 +44,11 @@ extern "C" { #define WEBP_MSC_SSE2 // Visual C++ SSE2 targets #endif +#if defined(_MSC_VER) && _MSC_VER >= 1500 && \ + (defined(_M_X64) || defined(_M_IX86)) +#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets +#endif + // WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp // files without intrinsics, allowing the corresponding Init() to be called. // Files containing intrinsics will need to be built targeting the instruction @@ -53,6 +57,10 @@ extern "C" { #define WEBP_USE_SSE2 #endif +#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41) +#define WEBP_USE_SSE41 +#endif + #if defined(__AVX2__) || defined(WEBP_HAVE_AVX2) #define WEBP_USE_AVX2 #endif @@ -68,25 +76,53 @@ extern "C" { #define WEBP_USE_NEON #endif +#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) +#define WEBP_USE_NEON +#define WEBP_USE_INTRINSICS +#endif + #if defined(__mips__) && !defined(__mips64) && \ defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) #define WEBP_USE_MIPS32 #if (__mips_isa_rev >= 2) #define WEBP_USE_MIPS32_R2 +#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2) +#define WEBP_USE_MIPS_DSP_R2 +#endif +#endif +#endif + +// This macro prevents thread_sanitizer from reporting known concurrent writes. +#define WEBP_TSAN_IGNORE_FUNCTION +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) +#undef WEBP_TSAN_IGNORE_FUNCTION +#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) #endif #endif typedef enum { kSSE2, kSSE3, + kSSE4_1, kAVX, kAVX2, kNEON, - kMIPS32 + kMIPS32, + kMIPSdspR2 } CPUFeature; // returns true if the CPU supports the feature. typedef int (*VP8CPUInfo)(CPUFeature feature); -extern VP8CPUInfo VP8GetCPUInfo; +WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo; + +//------------------------------------------------------------------------------ +// Init stub generator + +// Defines an init function stub to ensure each module exposes a symbol, +// avoiding a compiler warning. +#define WEBP_DSP_INIT_STUB(func) \ + extern void func(void); \ + WEBP_TSAN_IGNORE_FUNCTION void func(void) {} //------------------------------------------------------------------------------ // Encoding @@ -100,6 +136,7 @@ typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out); typedef void (*VP8WHT)(const int16_t* in, int16_t* out); extern VP8Idct VP8ITransform; extern VP8Fdct VP8FTransform; +extern VP8Fdct VP8FTransform2; // performs two transforms at a time extern VP8WHT VP8FTransformWHT; // Predictions // *dst is the destination block. *top and *left can be NULL. @@ -118,26 +155,63 @@ extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16; typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst); extern VP8BlockCopy VP8Copy4x4; +extern VP8BlockCopy VP8Copy16x8; // Quantization struct VP8Matrix; // forward declaration typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16], const struct VP8Matrix* const mtx); +// Same as VP8QuantizeBlock, but quantizes two consecutive blocks. +typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32], + const struct VP8Matrix* const mtx); + extern VP8QuantizeBlock VP8EncQuantizeBlock; +extern VP8Quantize2Blocks VP8EncQuantize2Blocks; // specific to 2nd transform: typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16], const struct VP8Matrix* const mtx); extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; -// Collect histogram for susceptibility calculation and accumulate in histo[]. -struct VP8Histogram; +extern const int VP8DspScan[16 + 4 + 4]; + +// Collect histogram for susceptibility calculation. +#define MAX_COEFF_THRESH 31 // size of histogram used by CollectHistogram. +typedef struct { + // We only need to store max_value and last_non_zero, not the distribution. + int max_value; + int last_non_zero; +} VP8Histogram; typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, - struct VP8Histogram* const histo); -extern const int VP8DspScan[16 + 4 + 4]; + VP8Histogram* const histo); extern VP8CHisto VP8CollectHistogram; +// General-purpose util function to help VP8CollectHistogram(). +void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], + VP8Histogram* const histo); -void VP8EncDspInit(void); // must be called before using any of the above +// must be called before using any of the above +void VP8EncDspInit(void); + +//------------------------------------------------------------------------------ +// cost functions (encoding) + +extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p) +// approximate cost per level: +extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1]; +extern const uint8_t VP8EncBands[16 + 1]; + +struct VP8Residual; +typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs, + struct VP8Residual* const res); +extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; + +// Cost calculation function. +typedef int (*VP8GetResidualCostFunc)(int ctx0, + const struct VP8Residual* const res); +extern VP8GetResidualCostFunc VP8GetResidualCost; + +// must be called before anything using the above +void VP8EncDspCostInit(void); //------------------------------------------------------------------------------ // Decoding @@ -155,16 +229,17 @@ extern VP8WHT VP8TransformWHT; // *dst is the destination block, with stride BPS. Boundary samples are // assumed accessible when needed. typedef void (*VP8PredFunc)(uint8_t* dst); -extern const VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */]; -extern const VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */]; -extern const VP8PredFunc VP8PredLuma4[/* NUM_BMODES */]; +extern VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */]; +extern VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */]; +extern VP8PredFunc VP8PredLuma4[/* NUM_BMODES */]; // clipping tables (for filtering) extern const int8_t* const VP8ksclip1; // clips [-1020, 1020] to [-128, 127] extern const int8_t* const VP8ksclip2; // clips [-112, 112] to [-16, 15] extern const uint8_t* const VP8kclip1; // clips [-255,511] to [0,255] extern const uint8_t* const VP8kabs0; // abs(x) for x in [-255,255] -void VP8InitClipTables(void); // must be called first +// must be called first +void VP8InitClipTables(void); // simple filter (only for luma) typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh); @@ -236,13 +311,81 @@ typedef void (*WebPYUV444Converter)(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int len); -extern const WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */]; +extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */]; // Must be called before using the WebPUpsamplers[] (and for premultiplied // colorspaces like rgbA, rgbA4444, etc) void WebPInitUpsamplers(void); // Must be called before using WebPSamplers[] void WebPInitSamplers(void); +// Must be called before using WebPYUV444Converters[] +void WebPInitYUV444Converters(void); + +//------------------------------------------------------------------------------ +// ARGB -> YUV converters + +// Convert ARGB samples to luma Y. +extern void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width); +// Convert ARGB samples to U/V with downsampling. do_store should be '1' for +// even lines and '0' for odd ones. 'src_width' is the original width, not +// the U/V one. +extern void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v, + int src_width, int do_store); + +// Convert a row of accumulated (four-values) of rgba32 toward U/V +extern void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb, + uint8_t* u, uint8_t* v, int width); + +// Convert RGB or BGR to Y +extern void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width); +extern void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width); + +// used for plain-C fallback. +extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v, + int src_width, int do_store); +extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, + uint8_t* u, uint8_t* v, int width); + +// Must be called before using the above. +void WebPInitConvertARGBToYUV(void); + +//------------------------------------------------------------------------------ +// Rescaler + +struct WebPRescaler; + +// Import a row of data and save its contribution in the rescaler. +// 'channel' denotes the channel number to be imported. 'Expand' corresponds to +// the wrk->x_expand case. Otherwise, 'Shrink' is to be used. +typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk, + const uint8_t* src); + +extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand; +extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink; + +// Export one row (starting at x_out position) from rescaler. +// 'Expand' corresponds to the wrk->y_expand case. +// Otherwise 'Shrink' is to be used +typedef void (*WebPRescalerExportRowFunc)(struct WebPRescaler* const wrk); +extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand; +extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink; + +// Plain-C implementation, as fall-back. +extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk, + const uint8_t* src); +extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk, + const uint8_t* src); +extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk); +extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk); + +// Main entry calls: +extern void WebPRescalerImportRow(struct WebPRescaler* const wrk, + const uint8_t* src); +// Export one row (starting at x_out position) from rescaler. +extern void WebPRescalerExportRow(struct WebPRescaler* const wrk); + +// Must be called first before using the above. +void WebPRescalerDspInit(void); //------------------------------------------------------------------------------ // Utilities for processing transparent channel. @@ -256,6 +399,18 @@ extern void (*WebPApplyAlphaMultiply)( extern void (*WebPApplyAlphaMultiply4444)( uint8_t* rgba4444, int w, int h, int stride); +// Dispatch the values from alpha[] plane to the ARGB destination 'dst'. +// Returns true if alpha[] plane has non-trivial values different from 0xff. +extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride, + int width, int height, + uint8_t* dst, int dst_stride); + +// Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the +// A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units. +extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride, + int width, int height, + uint32_t* dst, int dst_stride); + // Extract the alpha values from 32b values in argb[] and pack them into alpha[] // (this is the opposite of WebPDispatchAlpha). // Returns true if there's only trivial 0xff alpha values. @@ -282,9 +437,59 @@ void WebPMultRows(uint8_t* ptr, int stride, const uint8_t* alpha, int alpha_stride, int width, int num_rows, int inverse); +// Plain-C versions, used as fallback by some implementations. +void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha, + int width, int inverse); +void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse); + // To be called first before using the above. void WebPInitAlphaProcessing(void); +// ARGB packing function: a/r/g/b input is rgba or bgra order. +extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r, + const uint8_t* g, const uint8_t* b, int len, + uint32_t* out); + +// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order. +extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, + int len, int step, uint32_t* out); + +// To be called first before using the above. +void VP8EncDspARGBInit(void); + +//------------------------------------------------------------------------------ +// Filter functions + +typedef enum { // Filter types. + WEBP_FILTER_NONE = 0, + WEBP_FILTER_HORIZONTAL, + WEBP_FILTER_VERTICAL, + WEBP_FILTER_GRADIENT, + WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1, // end marker + WEBP_FILTER_BEST, // meta-types + WEBP_FILTER_FAST +} WEBP_FILTER_TYPE; + +typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height, + int stride, uint8_t* out); +typedef void (*WebPUnfilterFunc)(int width, int height, int stride, + int row, int num_rows, uint8_t* data); + +// Filter the given data using the given predictor. +// 'in' corresponds to a 2-dimensional pixel array of size (stride * height) +// in raster order. +// 'stride' is number of bytes per scan line (with possible padding). +// 'out' should be pre-allocated. +extern WebPFilterFunc WebPFilters[WEBP_FILTER_LAST]; + +// In-place reconstruct the original data from the given filtered data. +// The reconstruction will be done for 'num_rows' rows starting from 'row' +// (assuming rows upto 'row - 1' are already reconstructed). +extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST]; + +// To be called first before using the above. +void VP8FiltersInit(void); + #ifdef __cplusplus } // extern "C" #endif |