diff options
Diffstat (limited to 'src/3rdparty/libwebp/src/utils')
22 files changed, 1495 insertions, 865 deletions
diff --git a/src/3rdparty/libwebp/src/utils/alpha_processing.c b/src/3rdparty/libwebp/src/utils/alpha_processing.c deleted file mode 100644 index 7362ff9..0000000 --- a/src/3rdparty/libwebp/src/utils/alpha_processing.c +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Utilities for processing transparent channel. -// -// Author: Skal (pascal.massimino@gmail.com) - -#include <assert.h> -#include "./alpha_processing.h" - -// Tables can be faster on some platform but incur some extra binary size (~2k). -// #define USE_TABLES_FOR_ALPHA_MULT - -// ----------------------------------------------------------------------------- - -#define MFIX 24 // 24bit fixed-point arithmetic -#define HALF ((1u << MFIX) >> 1) -#define KINV_255 ((1u << MFIX) / 255u) - -static uint32_t Mult(uint8_t x, uint32_t mult) { - const uint32_t v = (x * mult + HALF) >> MFIX; - assert(v <= 255); // <- 24bit precision is enough to ensure that. - return v; -} - -#ifdef USE_TABLES_FOR_ALPHA_MULT - -static const uint32_t kMultTables[2][256] = { - { // (255u << MFIX) / alpha - 0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000, - 0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2, - 0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000, - 0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8, - 0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3, - 0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492, - 0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3, - 0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8, - 0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7, - 0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0, - 0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4, - 0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6, - 0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace, - 0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a, - 0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf, - 0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b, - 0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d, - 0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec, - 0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9, - 0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249, - 0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70, - 0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213, - 0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10, - 0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5, - 0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed, - 0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a, - 0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741, - 0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0, - 0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e, - 0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157, - 0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67, - 0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4, - 0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc, - 0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b, - 0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830, - 0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be, - 0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276, - 0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc, - 0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2, - 0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358, - 0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0, - 0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465, - 0x01030c30, 0x01020612, 0x01010204, 0x01000000 }, - { // alpha * KINV_255 - 0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505, - 0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b, - 0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111, - 0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717, - 0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d, - 0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323, - 0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929, - 0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f, - 0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535, - 0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b, - 0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141, - 0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747, - 0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d, - 0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353, - 0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959, - 0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f, - 0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565, - 0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b, - 0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171, - 0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777, - 0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d, - 0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383, - 0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989, - 0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f, - 0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595, - 0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b, - 0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1, - 0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7, - 0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad, - 0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3, - 0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9, - 0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf, - 0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5, - 0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb, - 0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1, - 0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7, - 0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd, - 0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3, - 0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9, - 0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef, - 0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5, - 0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb, - 0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff } -}; - -static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) { - return kMultTables[!inverse][a]; -} - -#else - -static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) { - return inverse ? (255u << MFIX) / a : a * KINV_255; -} - -#endif // USE_TABLES_FOR_ALPHA_MULT - -void WebPMultARGBRow(uint32_t* const ptr, int width, int inverse) { - int x; - for (x = 0; x < width; ++x) { - const uint32_t argb = ptr[x]; - if (argb < 0xff000000u) { // alpha < 255 - if (argb <= 0x00ffffffu) { // alpha == 0 - ptr[x] = 0; - } else { - const uint32_t alpha = (argb >> 24) & 0xff; - const uint32_t scale = GetScale(alpha, inverse); - uint32_t out = argb & 0xff000000u; - out |= Mult(argb >> 0, scale) << 0; - out |= Mult(argb >> 8, scale) << 8; - out |= Mult(argb >> 16, scale) << 16; - ptr[x] = out; - } - } - } -} - -void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, - int inverse) { - int n; - for (n = 0; n < num_rows; ++n) { - WebPMultARGBRow((uint32_t*)ptr, width, inverse); - ptr += stride; - } -} - -void WebPMultRow(uint8_t* const ptr, const uint8_t* const alpha, - int width, int inverse) { - int x; - for (x = 0; x < width; ++x) { - const uint32_t a = alpha[x]; - if (a != 255) { - if (a == 0) { - ptr[x] = 0; - } else { - const uint32_t scale = GetScale(a, inverse); - ptr[x] = Mult(ptr[x], scale); - } - } - } -} - -void WebPMultRows(uint8_t* ptr, int stride, - const uint8_t* alpha, int alpha_stride, - int width, int num_rows, int inverse) { - int n; - for (n = 0; n < num_rows; ++n) { - WebPMultRow(ptr, alpha, width, inverse); - ptr += stride; - alpha += alpha_stride; - } -} - -#undef KINV_255 -#undef HALF -#undef MFIX - diff --git a/src/3rdparty/libwebp/src/utils/alpha_processing.h b/src/3rdparty/libwebp/src/utils/alpha_processing.h deleted file mode 100644 index 80e1ae4..0000000 --- a/src/3rdparty/libwebp/src/utils/alpha_processing.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Utilities for processing transparent channel. -// -// Author: Skal (pascal.massimino@gmail.com) - -#ifndef WEBP_UTILS_ALPHA_PROCESSING_H_ -#define WEBP_UTILS_ALPHA_PROCESSING_H_ - -#include "../webp/types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B). -// Un-Multiply operation transforms x into x * 255 / A. - -// Pre-Multiply or Un-Multiply (if 'inverse' is true) argb values in a row. -void WebPMultARGBRow(uint32_t* const ptr, int width, int inverse); - -// Same a WebPMultARGBRow(), but for several rows. -void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, - int inverse); - -// Same for a row of single values, with side alpha values. -void WebPMultRow(uint8_t* const ptr, const uint8_t* const alpha, - int width, int inverse); - -// Same a WebPMultRow(), but for several 'num_rows' rows. -void WebPMultRows(uint8_t* ptr, int stride, - const uint8_t* alpha, int alpha_stride, - int width, int num_rows, int inverse); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // WEBP_UTILS_ALPHA_PROCESSING_H_ diff --git a/src/3rdparty/libwebp/src/utils/bit_reader.c b/src/3rdparty/libwebp/src/utils/bit_reader.c index bfa4d7d..64503e6 100644 --- a/src/3rdparty/libwebp/src/utils/bit_reader.c +++ b/src/3rdparty/libwebp/src/utils/bit_reader.c @@ -7,18 +7,16 @@ // be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // -// Boolean decoder +// Boolean decoder non-inlined methods // // Author: Skal (pascal.massimino@gmail.com) -#include "./bit_reader.h" - -#ifndef USE_RIGHT_JUSTIFY -#define MK(X) (((range_t)(X) << (BITS)) | (MASK)) -#else -#define MK(X) ((range_t)(X)) +#ifdef HAVE_CONFIG_H +#include "../webp/config.h" #endif +#include "./bit_reader_inl.h" + //------------------------------------------------------------------------------ // VP8BitReader @@ -27,12 +25,20 @@ void VP8InitBitReader(VP8BitReader* const br, assert(br != NULL); assert(start != NULL); assert(start <= end); - br->range_ = MK(255 - 1); + br->range_ = 255 - 1; br->buf_ = start; br->buf_end_ = end; br->value_ = 0; br->bits_ = -8; // to load the very first 8bits br->eof_ = 0; + VP8LoadNewBytes(br); +} + +void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) { + if (br->buf_ != NULL) { + br->buf_ += offset; + br->buf_end_ += offset; + } } const uint8_t kVP8Log2Range[128] = { @@ -47,45 +53,35 @@ const uint8_t kVP8Log2Range[128] = { 0 }; -// range = (range << kVP8Log2Range[range]) + trailing 1's +// range = ((range - 1) << kVP8Log2Range[range]) + 1 const range_t kVP8NewRange[128] = { - MK(127), MK(127), MK(191), MK(127), MK(159), MK(191), MK(223), MK(127), - MK(143), MK(159), MK(175), MK(191), MK(207), MK(223), MK(239), MK(127), - MK(135), MK(143), MK(151), MK(159), MK(167), MK(175), MK(183), MK(191), - MK(199), MK(207), MK(215), MK(223), MK(231), MK(239), MK(247), MK(127), - MK(131), MK(135), MK(139), MK(143), MK(147), MK(151), MK(155), MK(159), - MK(163), MK(167), MK(171), MK(175), MK(179), MK(183), MK(187), MK(191), - MK(195), MK(199), MK(203), MK(207), MK(211), MK(215), MK(219), MK(223), - MK(227), MK(231), MK(235), MK(239), MK(243), MK(247), MK(251), MK(127), - MK(129), MK(131), MK(133), MK(135), MK(137), MK(139), MK(141), MK(143), - MK(145), MK(147), MK(149), MK(151), MK(153), MK(155), MK(157), MK(159), - MK(161), MK(163), MK(165), MK(167), MK(169), MK(171), MK(173), MK(175), - MK(177), MK(179), MK(181), MK(183), MK(185), MK(187), MK(189), MK(191), - MK(193), MK(195), MK(197), MK(199), MK(201), MK(203), MK(205), MK(207), - MK(209), MK(211), MK(213), MK(215), MK(217), MK(219), MK(221), MK(223), - MK(225), MK(227), MK(229), MK(231), MK(233), MK(235), MK(237), MK(239), - MK(241), MK(243), MK(245), MK(247), MK(249), MK(251), MK(253), MK(127) + 127, 127, 191, 127, 159, 191, 223, 127, + 143, 159, 175, 191, 207, 223, 239, 127, + 135, 143, 151, 159, 167, 175, 183, 191, + 199, 207, 215, 223, 231, 239, 247, 127, + 131, 135, 139, 143, 147, 151, 155, 159, + 163, 167, 171, 175, 179, 183, 187, 191, + 195, 199, 203, 207, 211, 215, 219, 223, + 227, 231, 235, 239, 243, 247, 251, 127, + 129, 131, 133, 135, 137, 139, 141, 143, + 145, 147, 149, 151, 153, 155, 157, 159, + 161, 163, 165, 167, 169, 171, 173, 175, + 177, 179, 181, 183, 185, 187, 189, 191, + 193, 195, 197, 199, 201, 203, 205, 207, + 209, 211, 213, 215, 217, 219, 221, 223, + 225, 227, 229, 231, 233, 235, 237, 239, + 241, 243, 245, 247, 249, 251, 253, 127 }; -#undef MK - void VP8LoadFinalBytes(VP8BitReader* const br) { assert(br != NULL && br->buf_ != NULL); // Only read 8bits at a time if (br->buf_ < br->buf_end_) { -#ifndef USE_RIGHT_JUSTIFY - br->value_ |= (bit_t)(*br->buf_++) << ((BITS) - 8 - br->bits_); -#else - br->value_ = (bit_t)(*br->buf_++) | (br->value_ << 8); -#endif br->bits_ += 8; + br->value_ = (bit_t)(*br->buf_++) | (br->value_ << 8); } else if (!br->eof_) { -#ifdef USE_RIGHT_JUSTIFY - // These are not strictly needed, but it makes the behaviour - // consistent for both USE_RIGHT_JUSTIFY and !USE_RIGHT_JUSTIFY. br->value_ <<= 8; br->bits_ += 8; -#endif br->eof_ = 1; } } @@ -109,36 +105,48 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) { //------------------------------------------------------------------------------ // VP8LBitReader -#define MAX_NUM_BIT_READ 25 +#define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits. -#define LBITS 64 // Number of bits prefetched. -#define WBITS 32 // Minimum number of bytes needed after VP8LFillBitWindow. -#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits. +#if !defined(WEBP_FORCE_ALIGNED) && \ + (defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \ + defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64__) || defined(_M_X64)) +#define VP8L_USE_UNALIGNED_LOAD +#endif -static const uint32_t kBitMask[MAX_NUM_BIT_READ] = { - 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, - 65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215 +static const uint32_t kBitMask[VP8L_MAX_NUM_BIT_READ + 1] = { + 0, + 0x000001, 0x000003, 0x000007, 0x00000f, + 0x00001f, 0x00003f, 0x00007f, 0x0000ff, + 0x0001ff, 0x0003ff, 0x0007ff, 0x000fff, + 0x001fff, 0x003fff, 0x007fff, 0x00ffff, + 0x01ffff, 0x03ffff, 0x07ffff, 0x0fffff, + 0x1fffff, 0x3fffff, 0x7fffff, 0xffffff }; -void VP8LInitBitReader(VP8LBitReader* const br, - const uint8_t* const start, +void VP8LInitBitReader(VP8LBitReader* const br, const uint8_t* const start, size_t length) { size_t i; + vp8l_val_t value = 0; assert(br != NULL); assert(start != NULL); assert(length < 0xfffffff8u); // can't happen with a RIFF chunk. - br->buf_ = start; br->len_ = length; br->val_ = 0; - br->pos_ = 0; br->bit_pos_ = 0; br->eos_ = 0; br->error_ = 0; - for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) { - br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (8 * i); - ++br->pos_; + + if (length > sizeof(br->val_)) { + length = sizeof(br->val_); + } + for (i = 0; i < length; ++i) { + value |= (vp8l_val_t)start[i] << (8 * i); } + br->val_ = value; + br->pos_ = length; + br->buf_ = start; } void VP8LBitReaderSetBuffer(VP8LBitReader* const br, @@ -146,55 +154,51 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br, assert(br != NULL); assert(buf != NULL); assert(len < 0xfffffff8u); // can't happen with a RIFF chunk. - br->eos_ = (br->pos_ >= len); br->buf_ = buf; br->len_ = len; + // pos_ > len_ should be considered a param error. + br->error_ = (br->pos_ > br->len_); + br->eos_ = br->error_ || VP8LIsEndOfStream(br); } -// If not at EOS, reload up to LBITS byte-by-byte +// If not at EOS, reload up to VP8L_LBITS byte-by-byte static void ShiftBytes(VP8LBitReader* const br) { while (br->bit_pos_ >= 8 && br->pos_ < br->len_) { br->val_ >>= 8; - br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (LBITS - 8); + br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (VP8L_LBITS - 8); ++br->pos_; br->bit_pos_ -= 8; } + br->eos_ = VP8LIsEndOfStream(br); } -void VP8LFillBitWindow(VP8LBitReader* const br) { - if (br->bit_pos_ >= WBITS) { -#if (defined(__x86_64__) || defined(_M_X64)) - if (br->pos_ + sizeof(br->val_) < br->len_) { - br->val_ >>= WBITS; - br->bit_pos_ -= WBITS; - // The expression below needs a little-endian arch to work correctly. - // This gives a large speedup for decoding speed. - br->val_ |= *(const vp8l_val_t*)(br->buf_ + br->pos_) << (LBITS - WBITS); - br->pos_ += LOG8_WBITS; - return; - } -#endif - ShiftBytes(br); // Slow path. - if (br->pos_ == br->len_ && br->bit_pos_ >= LBITS) { - br->eos_ = 1; - } +void VP8LDoFillBitWindow(VP8LBitReader* const br) { + assert(br->bit_pos_ >= VP8L_WBITS); + // TODO(jzern): given the fixed read size it may be possible to force + // alignment in this block. +#if defined(VP8L_USE_UNALIGNED_LOAD) + if (br->pos_ + sizeof(br->val_) < br->len_) { + br->val_ >>= VP8L_WBITS; + br->bit_pos_ -= VP8L_WBITS; + // The expression below needs a little-endian arch to work correctly. + // This gives a large speedup for decoding speed. + br->val_ |= (vp8l_val_t)*(const uint32_t*)(br->buf_ + br->pos_) << + (VP8L_LBITS - VP8L_WBITS); + br->pos_ += VP8L_LOG8_WBITS; + return; } +#endif + ShiftBytes(br); // Slow path. } uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) { assert(n_bits >= 0); // Flag an error if end_of_stream or n_bits is more than allowed limit. - if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) { + if (!br->eos_ && n_bits <= VP8L_MAX_NUM_BIT_READ) { const uint32_t val = (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits]; const int new_bits = br->bit_pos_ + n_bits; br->bit_pos_ = new_bits; - // If this read is going to cross the read buffer, set the eos flag. - if (br->pos_ == br->len_) { - if (new_bits >= LBITS) { - br->eos_ = 1; - } - } ShiftBytes(br); return val; } else { @@ -204,4 +208,3 @@ uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) { } //------------------------------------------------------------------------------ - diff --git a/src/3rdparty/libwebp/src/utils/bit_reader.h b/src/3rdparty/libwebp/src/utils/bit_reader.h index 98df98a..f569734 100644 --- a/src/3rdparty/libwebp/src/utils/bit_reader.h +++ b/src/3rdparty/libwebp/src/utils/bit_reader.h @@ -29,110 +29,62 @@ extern "C" { // However, since range_ is only 8bit, we only need an active window of 8 bits // for value_. Left bits (MSB) gets zeroed and shifted away when value_ falls // below 128, range_ is updated, and fresh bits read from the bitstream are -// brought in as LSB. -// To avoid reading the fresh bits one by one (slow), we cache a few of them -// ahead (actually, we cache BITS of them ahead. See below). There's two -// strategies regarding how to shift these looked-ahead fresh bits into the -// 8bit window of value_: either we shift them in, while keeping the position of -// the window fixed. Or we slide the window to the right while keeping the cache -// bits at a fixed, right-justified, position. +// brought in as LSB. To avoid reading the fresh bits one by one (slow), we +// cache BITS of them ahead. The total of (BITS + 8) bits must fit into a +// natural register (with type bit_t). To fetch BITS bits from bitstream we +// use a type lbit_t. // -// Example, for BITS=16: here is the content of value_ for both strategies: -// -// !USE_RIGHT_JUSTIFY || USE_RIGHT_JUSTIFY -// || -// <- 8b -><- 8b -><- BITS bits -> || <- 8b+3b -><- 8b -><- 13 bits -> -// [unused][value_][cached bits][0] || [unused...][value_][cached bits] -// [........00vvvvvvBBBBBBBBBBBBB000]LSB || [...........00vvvvvvBBBBBBBBBBBBB] -// || -// After calling VP8Shift(), where we need to shift away two zeros: -// [........vvvvvvvvBBBBBBBBBBB00000]LSB || [.............vvvvvvvvBBBBBBBBBBB] -// || -// Just before we need to call VP8LoadNewBytes(), the situation is: -// [........vvvvvv000000000000000000]LSB || [..........................vvvvvv] -// || -// And just after calling VP8LoadNewBytes(): -// [........vvvvvvvvBBBBBBBBBBBBBBBB]LSB || [........vvvvvvvvBBBBBBBBBBBBBBBB] -// -// -> we're back to eight active 'value_' bits (marked 'v') and BITS cached -// bits (marked 'B') -// -// The right-justify strategy tends to use less shifts and is often faster. - -//------------------------------------------------------------------------------ // BITS can be any multiple of 8 from 8 to 56 (inclusive). // Pick values that fit natural register size. -#if !defined(WEBP_REFERENCE_IMPLEMENTATION) - -#define USE_RIGHT_JUSTIFY - #if defined(__i386__) || defined(_M_IX86) // x86 32bit -#define BITS 16 +#define BITS 24 #elif defined(__x86_64__) || defined(_M_X64) // x86 64bit #define BITS 56 #elif defined(__arm__) || defined(_M_ARM) // ARM #define BITS 24 -#else // reasonable default +#elif defined(__mips__) // MIPS #define BITS 24 -#endif - -#else // reference choices - -#define USE_RIGHT_JUSTIFY -#define BITS 8 - +#else // reasonable default +#define BITS 24 // TODO(skal): test aarch64 and find the proper BITS value. #endif //------------------------------------------------------------------------------ -// Derived types and constants +// Derived types and constants: +// bit_t = natural register type for storing 'value_' (which is BITS+8 bits) +// range_t = register for 'range_' (which is 8bits only) -// bit_t = natural register type -// lbit_t = natural type for memory I/O - -#if (BITS > 32) -typedef uint64_t bit_t; -typedef uint64_t lbit_t; -#elif (BITS == 32) +#if (BITS > 24) typedef uint64_t bit_t; -typedef uint32_t lbit_t; -#elif (BITS == 24) -typedef uint32_t bit_t; -typedef uint32_t lbit_t; -#elif (BITS == 16) -typedef uint32_t bit_t; -typedef uint16_t lbit_t; #else typedef uint32_t bit_t; -typedef uint8_t lbit_t; #endif -#ifndef USE_RIGHT_JUSTIFY -typedef bit_t range_t; // type for storing range_ -#define MASK ((((bit_t)1) << (BITS)) - 1) -#else -typedef uint32_t range_t; // range_ only uses 8bits here. No need for bit_t. -#endif +typedef uint32_t range_t; //------------------------------------------------------------------------------ // Bitreader typedef struct VP8BitReader VP8BitReader; struct VP8BitReader { + // boolean decoder (keep the field ordering as is!) + bit_t value_; // current value + range_t range_; // current range minus 1. In [127, 254] interval. + int bits_; // number of valid bits left + // read buffer const uint8_t* buf_; // next byte to be read const uint8_t* buf_end_; // end of read buffer int eof_; // true if input is exhausted - - // boolean decoder - range_t range_; // current range minus 1. In [127, 254] interval. - bit_t value_; // current value - int bits_; // number of valid bits left }; // Initialize the bit reader and the boolean decoder. void VP8InitBitReader(VP8BitReader* const br, const uint8_t* const start, const uint8_t* const end); +// Update internal pointers to displace the byte buffer by the +// relative offset 'offset'. +void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset); + // return the next value made of 'num_bits' bits uint32_t VP8GetValue(VP8BitReader* const br, int num_bits); static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) { @@ -142,152 +94,22 @@ static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) { // return the next value with sign-extension. int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits); -// Read a bit with proba 'prob'. Speed-critical function! -extern const uint8_t kVP8Log2Range[128]; -extern const range_t kVP8NewRange[128]; - -void VP8LoadFinalBytes(VP8BitReader* const br); // special case for the tail - -static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) { - assert(br != NULL && br->buf_ != NULL); - // Read 'BITS' bits at a time if possible. - if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) { - // convert memory type to register type (with some zero'ing!) - bit_t bits; - const lbit_t in_bits = *(const lbit_t*)br->buf_; - br->buf_ += (BITS) >> 3; -#if !defined(__BIG_ENDIAN__) -#if (BITS > 32) -// gcc 4.3 has builtin functions for swap32/swap64 -#if defined(__GNUC__) && \ - (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) - bits = (bit_t)__builtin_bswap64(in_bits); -#elif defined(_MSC_VER) - bits = (bit_t)_byteswap_uint64(in_bits); -#elif defined(__x86_64__) - __asm__ volatile("bswapq %0" : "=r"(bits) : "0"(in_bits)); -#else // generic code for swapping 64-bit values (suggested by bdb@) - bits = (bit_t)in_bits; - bits = ((bits & 0xffffffff00000000ull) >> 32) | - ((bits & 0x00000000ffffffffull) << 32); - bits = ((bits & 0xffff0000ffff0000ull) >> 16) | - ((bits & 0x0000ffff0000ffffull) << 16); - bits = ((bits & 0xff00ff00ff00ff00ull) >> 8) | - ((bits & 0x00ff00ff00ff00ffull) << 8); -#endif - bits >>= 64 - BITS; -#elif (BITS >= 24) -#if defined(__i386__) || defined(__x86_64__) - { - lbit_t swapped_in_bits; - __asm__ volatile("bswap %k0" : "=r"(swapped_in_bits) : "0"(in_bits)); - bits = (bit_t)swapped_in_bits; // 24b/32b -> 32b/64b zero-extension - } -#elif defined(_MSC_VER) - bits = (bit_t)_byteswap_ulong(in_bits); -#else - bits = (bit_t)(in_bits >> 24) | ((in_bits >> 8) & 0xff00) - | ((in_bits << 8) & 0xff0000) | (in_bits << 24); -#endif // x86 - bits >>= (32 - BITS); -#elif (BITS == 16) - // gcc will recognize a 'rorw $8, ...' here: - bits = (bit_t)(in_bits >> 8) | ((in_bits & 0xff) << 8); -#else // BITS == 8 - bits = (bit_t)in_bits; -#endif -#else // BIG_ENDIAN - bits = (bit_t)in_bits; - if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS); -#endif -#ifndef USE_RIGHT_JUSTIFY - br->value_ |= bits << (-br->bits_); -#else - br->value_ = bits | (br->value_ << (BITS)); -#endif - br->bits_ += (BITS); - } else { - VP8LoadFinalBytes(br); // no need to be inlined - } -} - -static WEBP_INLINE int VP8BitUpdate(VP8BitReader* const br, range_t split) { - if (br->bits_ < 0) { // Make sure we have a least BITS bits in 'value_' - VP8LoadNewBytes(br); - } -#ifndef USE_RIGHT_JUSTIFY - split |= (MASK); - if (br->value_ > split) { - br->range_ -= split + 1; - br->value_ -= split + 1; - return 1; - } else { - br->range_ = split; - return 0; - } -#else - { - const int pos = br->bits_; - const range_t value = (range_t)(br->value_ >> pos); - if (value > split) { - br->range_ -= split + 1; - br->value_ -= (bit_t)(split + 1) << pos; - return 1; - } else { - br->range_ = split; - return 0; - } - } -#endif -} - -static WEBP_INLINE void VP8Shift(VP8BitReader* const br) { -#ifndef USE_RIGHT_JUSTIFY - // range_ is in [0..127] interval here. - const bit_t idx = br->range_ >> (BITS); - const int shift = kVP8Log2Range[idx]; - br->range_ = kVP8NewRange[idx]; - br->value_ <<= shift; - br->bits_ -= shift; -#else - const int shift = kVP8Log2Range[br->range_]; - assert(br->range_ < (range_t)128); - br->range_ = kVP8NewRange[br->range_]; - br->bits_ -= shift; -#endif -} - -static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) { -#ifndef USE_RIGHT_JUSTIFY - // It's important to avoid generating a 64bit x 64bit multiply here. - // We just need an 8b x 8b after all. - const range_t split = - (range_t)((uint32_t)(br->range_ >> (BITS)) * prob) << ((BITS) - 8); - const int bit = VP8BitUpdate(br, split); - if (br->range_ <= (((range_t)0x7e << (BITS)) | (MASK))) { - VP8Shift(br); - } - return bit; -#else - const range_t split = (br->range_ * prob) >> 8; - const int bit = VP8BitUpdate(br, split); - if (br->range_ <= (range_t)0x7e) { - VP8Shift(br); - } - return bit; -#endif -} - -static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) { - const range_t split = (br->range_ >> 1); - const int bit = VP8BitUpdate(br, split); - VP8Shift(br); - return bit ? -v : v; -} +// bit_reader_inl.h will implement the following methods: +// static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) +// static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) +// and should be included by the .c files that actually need them. +// This is to avoid recompiling the whole library whenever this file is touched, +// and also allowing platform-specific ad-hoc hacks. // ----------------------------------------------------------------------------- // Bitreader for lossless format +// maximum number of bits (inclusive) the bit-reader can handle: +#define VP8L_MAX_NUM_BIT_READ 24 + +#define VP8L_LBITS 64 // Number of bits prefetched. +#define VP8L_WBITS 32 // Minimum number of bytes ready after VP8LFillBitWindow. + typedef uint64_t vp8l_val_t; // right now, this bit-reader can only use 64bit. typedef struct { @@ -308,9 +130,10 @@ void VP8LInitBitReader(VP8LBitReader* const br, void VP8LBitReaderSetBuffer(VP8LBitReader* const br, const uint8_t* const buffer, size_t length); -// Reads the specified number of bits from Read Buffer. -// Flags an error in case end_of_stream or n_bits is more than allowed limit. -// Flags eos if this read attempt is going to cross the read buffer. +// Reads the specified number of bits from read buffer. +// Flags an error in case end_of_stream or n_bits is more than the allowed limit +// of VP8L_MAX_NUM_BIT_READ (inclusive). +// Flags eos_ if this read attempt is going to cross the read buffer. uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits); // Return the prefetched bits, so they can be looked up. @@ -318,14 +141,26 @@ static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) { return (uint32_t)(br->val_ >> br->bit_pos_); } +// Returns true if there was an attempt at reading bit past the end of +// the buffer. Doesn't set br->eos_ flag. +static WEBP_INLINE int VP8LIsEndOfStream(const VP8LBitReader* const br) { + assert(br->pos_ <= br->len_); + return (br->pos_ == br->len_) && (br->bit_pos_ > VP8L_LBITS); +} + // For jumping over a number of bits in the bit stream when accessed with // VP8LPrefetchBits and VP8LFillBitWindow. static WEBP_INLINE void VP8LSetBitPos(VP8LBitReader* const br, int val) { br->bit_pos_ = val; + br->eos_ = VP8LIsEndOfStream(br); } // Advances the read buffer by 4 bytes to make room for reading next 32 bits. -void VP8LFillBitWindow(VP8LBitReader* const br); +// Speed critical, but infrequent part of the code can be non-inlined. +extern void VP8LDoFillBitWindow(VP8LBitReader* const br); +static WEBP_INLINE void VP8LFillBitWindow(VP8LBitReader* const br) { + if (br->bit_pos_ >= VP8L_WBITS) VP8LDoFillBitWindow(br); +} #ifdef __cplusplus } // extern "C" diff --git a/src/3rdparty/libwebp/src/utils/bit_reader_inl.h b/src/3rdparty/libwebp/src/utils/bit_reader_inl.h new file mode 100644 index 0000000..81427c6 --- /dev/null +++ b/src/3rdparty/libwebp/src/utils/bit_reader_inl.h @@ -0,0 +1,172 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Specific inlined methods for boolean decoder [VP8GetBit() ...] +// This file should be included by the .c sources that actually need to call +// these methods. +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_UTILS_BIT_READER_INL_H_ +#define WEBP_UTILS_BIT_READER_INL_H_ + +#ifdef HAVE_CONFIG_H +#include "../webp/config.h" +#endif + +#ifdef WEBP_FORCE_ALIGNED +#include <string.h> // memcpy +#endif + +#include "../dsp/dsp.h" +#include "./bit_reader.h" +#include "./endian_inl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//------------------------------------------------------------------------------ +// Derived type lbit_t = natural type for memory I/O + +#if (BITS > 32) +typedef uint64_t lbit_t; +#elif (BITS > 16) +typedef uint32_t lbit_t; +#elif (BITS > 8) +typedef uint16_t lbit_t; +#else +typedef uint8_t lbit_t; +#endif + +extern const uint8_t kVP8Log2Range[128]; +extern const range_t kVP8NewRange[128]; + +// special case for the tail byte-reading +void VP8LoadFinalBytes(VP8BitReader* const br); + +//------------------------------------------------------------------------------ +// Inlined critical functions + +// makes sure br->value_ has at least BITS bits worth of data +static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) { + assert(br != NULL && br->buf_ != NULL); + // Read 'BITS' bits at a time if possible. + if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) { + // convert memory type to register type (with some zero'ing!) + bit_t bits; +#if defined(WEBP_FORCE_ALIGNED) + lbit_t in_bits; + memcpy(&in_bits, br->buf_, sizeof(in_bits)); +#elif defined(WEBP_USE_MIPS32) + // This is needed because of un-aligned read. + lbit_t in_bits; + lbit_t* p_buf_ = (lbit_t*)br->buf_; + __asm__ volatile( + ".set push \n\t" + ".set at \n\t" + ".set macro \n\t" + "ulw %[in_bits], 0(%[p_buf_]) \n\t" + ".set pop \n\t" + : [in_bits]"=r"(in_bits) + : [p_buf_]"r"(p_buf_) + : "memory", "at" + ); +#else + const lbit_t in_bits = *(const lbit_t*)br->buf_; +#endif + br->buf_ += BITS >> 3; +#if !defined(WORDS_BIGENDIAN) +#if (BITS > 32) + bits = BSwap64(in_bits); + bits >>= 64 - BITS; +#elif (BITS >= 24) + bits = BSwap32(in_bits); + bits >>= (32 - BITS); +#elif (BITS == 16) + bits = BSwap16(in_bits); +#else // BITS == 8 + bits = (bit_t)in_bits; +#endif // BITS > 32 +#else // WORDS_BIGENDIAN + bits = (bit_t)in_bits; + if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS); +#endif + br->value_ = bits | (br->value_ << BITS); + br->bits_ += BITS; + } else { + VP8LoadFinalBytes(br); // no need to be inlined + } +} + +// Read a bit with proba 'prob'. Speed-critical function! +static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) { + // Don't move this declaration! It makes a big speed difference to store + // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't + // alter br->range_ value. + range_t range = br->range_; + if (br->bits_ < 0) { + VP8LoadNewBytes(br); + } + { + const int pos = br->bits_; + const range_t split = (range * prob) >> 8; + const range_t value = (range_t)(br->value_ >> pos); +#if defined(__arm__) || defined(_M_ARM) // ARM-specific + const int bit = ((int)(split - value) >> 31) & 1; + if (value > split) { + range -= split + 1; + br->value_ -= (bit_t)(split + 1) << pos; + } else { + range = split; + } +#else // faster version on x86 + int bit; // Don't use 'const int bit = (value > split);", it's slower. + if (value > split) { + range -= split + 1; + br->value_ -= (bit_t)(split + 1) << pos; + bit = 1; + } else { + range = split; + bit = 0; + } +#endif + if (range <= (range_t)0x7e) { + const int shift = kVP8Log2Range[range]; + range = kVP8NewRange[range]; + br->bits_ -= shift; + } + br->range_ = range; + return bit; + } +} + +// simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here) +static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) { + if (br->bits_ < 0) { + VP8LoadNewBytes(br); + } + { + const int pos = br->bits_; + const range_t split = br->range_ >> 1; + const range_t value = (range_t)(br->value_ >> pos); + const int32_t mask = (int32_t)(split - value) >> 31; // -1 or 0 + br->bits_ -= 1; + br->range_ += mask; + br->range_ |= 1; + br->value_ -= (bit_t)((split + 1) & mask) << pos; + return (v ^ mask) - mask; + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBP_UTILS_BIT_READER_INL_H_ diff --git a/src/3rdparty/libwebp/src/utils/bit_writer.c b/src/3rdparty/libwebp/src/utils/bit_writer.c index 29810a1..9875ca6 100644 --- a/src/3rdparty/libwebp/src/utils/bit_writer.c +++ b/src/3rdparty/libwebp/src/utils/bit_writer.c @@ -15,7 +15,10 @@ #include <assert.h> #include <string.h> // for memcpy() #include <stdlib.h> + #include "./bit_writer.h" +#include "./endian_inl.h" +#include "./utils.h" //------------------------------------------------------------------------------ // VP8BitWriter @@ -34,7 +37,7 @@ static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) { new_size = 2 * bw->max_pos_; if (new_size < needed_size) new_size = needed_size; if (new_size < 1024) new_size = 1024; - new_buf = (uint8_t*)malloc(new_size); + new_buf = (uint8_t*)WebPSafeMalloc(1ULL, new_size); if (new_buf == NULL) { bw->error_ = 1; return 0; @@ -43,13 +46,13 @@ static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) { assert(bw->buf_ != NULL); memcpy(new_buf, bw->buf_, bw->pos_); } - free(bw->buf_); + WebPSafeFree(bw->buf_); bw->buf_ = new_buf; bw->max_pos_ = new_size; return 1; } -static void kFlush(VP8BitWriter* const bw) { +static void Flush(VP8BitWriter* const bw) { const int s = 8 + bw->nb_bits_; const int32_t bits = bw->value_ >> s; assert(bw->nb_bits_ >= 0); @@ -115,7 +118,7 @@ int VP8PutBit(VP8BitWriter* const bw, int bit, int prob) { bw->range_ = kNewRange[bw->range_]; bw->value_ <<= shift; bw->nb_bits_ += shift; - if (bw->nb_bits_ > 0) kFlush(bw); + if (bw->nb_bits_ > 0) Flush(bw); } return bit; } @@ -132,7 +135,7 @@ int VP8PutBitUniform(VP8BitWriter* const bw, int bit) { bw->range_ = kNewRange[bw->range_]; bw->value_ <<= 1; bw->nb_bits_ += 1; - if (bw->nb_bits_ > 0) kFlush(bw); + if (bw->nb_bits_ > 0) Flush(bw); } return bit; } @@ -170,14 +173,14 @@ int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size) { uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw) { VP8PutValue(bw, 0, 9 - bw->nb_bits_); bw->nb_bits_ = 0; // pad with zeroes - kFlush(bw); + Flush(bw); return bw->buf_; } int VP8BitWriterAppend(VP8BitWriter* const bw, const uint8_t* data, size_t size) { - assert(data); - if (bw->nb_bits_ != -8) return 0; // kFlush() must have been called + assert(data != NULL); + if (bw->nb_bits_ != -8) return 0; // Flush() must have been called if (!BitWriterResize(bw, size)) return 0; memcpy(bw->buf_ + bw->pos_, data, size); bw->pos_ += size; @@ -185,8 +188,8 @@ int VP8BitWriterAppend(VP8BitWriter* const bw, } void VP8BitWriterWipeOut(VP8BitWriter* const bw) { - if (bw) { - free(bw->buf_); + if (bw != NULL) { + WebPSafeFree(bw->buf_); memset(bw, 0, sizeof(*bw)); } } @@ -194,32 +197,43 @@ void VP8BitWriterWipeOut(VP8BitWriter* const bw) { //------------------------------------------------------------------------------ // VP8LBitWriter +// This is the minimum amount of size the memory buffer is guaranteed to grow +// when extra space is needed. +#define MIN_EXTRA_SIZE (32768ULL) + +#define VP8L_WRITER_BYTES ((int)sizeof(vp8l_wtype_t)) +#define VP8L_WRITER_BITS (VP8L_WRITER_BYTES * 8) +#define VP8L_WRITER_MAX_BITS (8 * (int)sizeof(vp8l_atype_t)) + // Returns 1 on success. static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) { uint8_t* allocated_buf; size_t allocated_size; - const size_t current_size = VP8LBitWriterNumBytes(bw); + const size_t max_bytes = bw->end_ - bw->buf_; + const size_t current_size = bw->cur_ - bw->buf_; const uint64_t size_required_64b = (uint64_t)current_size + extra_size; const size_t size_required = (size_t)size_required_64b; if (size_required != size_required_64b) { bw->error_ = 1; return 0; } - if (bw->max_bytes_ > 0 && size_required <= bw->max_bytes_) return 1; - allocated_size = (3 * bw->max_bytes_) >> 1; + if (max_bytes > 0 && size_required <= max_bytes) return 1; + allocated_size = (3 * max_bytes) >> 1; if (allocated_size < size_required) allocated_size = size_required; // make allocated size multiple of 1k allocated_size = (((allocated_size >> 10) + 1) << 10); - allocated_buf = (uint8_t*)malloc(allocated_size); + allocated_buf = (uint8_t*)WebPSafeMalloc(1ULL, allocated_size); if (allocated_buf == NULL) { bw->error_ = 1; return 0; } - memcpy(allocated_buf, bw->buf_, current_size); - free(bw->buf_); + if (current_size > 0) { + memcpy(allocated_buf, bw->buf_, current_size); + } + WebPSafeFree(bw->buf_); bw->buf_ = allocated_buf; - bw->max_bytes_ = allocated_size; - memset(allocated_buf + current_size, 0, allocated_size - current_size); + bw->cur_ = bw->buf_ + current_size; + bw->end_ = bw->buf_ + allocated_size; return 1; } @@ -230,53 +244,64 @@ int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size) { void VP8LBitWriterDestroy(VP8LBitWriter* const bw) { if (bw != NULL) { - free(bw->buf_); + WebPSafeFree(bw->buf_); memset(bw, 0, sizeof(*bw)); } } void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits) { - if (n_bits < 1) return; -#if !defined(__BIG_ENDIAN__) - // Technically, this branch of the code can write up to 25 bits at a time, - // but in prefix encoding, the maximum number of bits written is 18 at a time. - { - uint8_t* const p = &bw->buf_[bw->bit_pos_ >> 3]; - uint32_t v = *(const uint32_t*)p; - v |= bits << (bw->bit_pos_ & 7); - *(uint32_t*)p = v; - bw->bit_pos_ += n_bits; - } -#else // BIG_ENDIAN - { - uint8_t* p = &bw->buf_[bw->bit_pos_ >> 3]; - const int bits_reserved_in_first_byte = bw->bit_pos_ & 7; - const int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte; - // implicit & 0xff is assumed for uint8_t arithmetic - *p++ |= bits << bits_reserved_in_first_byte; - bits >>= 8 - bits_reserved_in_first_byte; - if (bits_left_to_write >= 1) { - *p++ = bits; - bits >>= 8; - if (bits_left_to_write >= 9) { - *p++ = bits; - bits >>= 8; + assert(n_bits <= 32); + // That's the max we can handle: + assert(bw->used_ + n_bits <= 2 * VP8L_WRITER_MAX_BITS); + if (n_bits > 0) { + // Local field copy. + vp8l_atype_t lbits = bw->bits_; + int used = bw->used_; + // Special case of overflow handling for 32bit accumulator (2-steps flush). + if (VP8L_WRITER_BITS == 16) { + if (used + n_bits >= VP8L_WRITER_MAX_BITS) { + // Fill up all the VP8L_WRITER_MAX_BITS so it can be flushed out below. + const int shift = VP8L_WRITER_MAX_BITS - used; + lbits |= (vp8l_atype_t)bits << used; + used = VP8L_WRITER_MAX_BITS; + n_bits -= shift; + bits >>= shift; + assert(n_bits <= VP8L_WRITER_MAX_BITS); } } - assert(n_bits <= 25); - *p = bits; - bw->bit_pos_ += n_bits; + // If needed, make some room by flushing some bits out. + while (used >= VP8L_WRITER_BITS) { + if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) { + const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE; + if (extra_size != (size_t)extra_size || + !VP8LBitWriterResize(bw, (size_t)extra_size)) { + bw->cur_ = bw->buf_; + bw->error_ = 1; + return; + } + } + *(vp8l_wtype_t*)bw->cur_ = (vp8l_wtype_t)WSWAP((vp8l_wtype_t)lbits); + bw->cur_ += VP8L_WRITER_BYTES; + lbits >>= VP8L_WRITER_BITS; + used -= VP8L_WRITER_BITS; + } + // Eventually, insert new bits. + bw->bits_ = lbits | ((vp8l_atype_t)bits << used); + bw->used_ = used + n_bits; } -#endif - if ((bw->bit_pos_ >> 3) > (bw->max_bytes_ - 8)) { - const uint64_t extra_size = 32768ULL + bw->max_bytes_; - if (extra_size != (size_t)extra_size || - !VP8LBitWriterResize(bw, (size_t)extra_size)) { - bw->bit_pos_ = 0; - bw->error_ = 1; +} + +uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) { + // flush leftover bits + if (VP8LBitWriterResize(bw, (bw->used_ + 7) >> 3)) { + while (bw->used_ > 0) { + *bw->cur_++ = (uint8_t)bw->bits_; + bw->bits_ >>= 8; + bw->used_ -= 8; } + bw->used_ = 0; } + return bw->buf_; } //------------------------------------------------------------------------------ - diff --git a/src/3rdparty/libwebp/src/utils/bit_writer.h b/src/3rdparty/libwebp/src/utils/bit_writer.h index 89a9ead..c80d22a 100644 --- a/src/3rdparty/libwebp/src/utils/bit_writer.h +++ b/src/3rdparty/libwebp/src/utils/bit_writer.h @@ -68,51 +68,46 @@ static WEBP_INLINE size_t VP8BitWriterSize(const VP8BitWriter* const bw) { //------------------------------------------------------------------------------ // VP8LBitWriter -// TODO(vikasa): VP8LBitWriter is copied as-is from lossless code. There's scope -// of re-using VP8BitWriter. Will evaluate once basic lossless encoder is -// implemented. -typedef struct { - uint8_t* buf_; - size_t bit_pos_; - size_t max_bytes_; +#if defined(__x86_64__) || defined(_M_X64) // 64bit +typedef uint64_t vp8l_atype_t; // accumulator type +typedef uint32_t vp8l_wtype_t; // writing type +#define WSWAP HToLE32 +#else +typedef uint32_t vp8l_atype_t; +typedef uint16_t vp8l_wtype_t; +#define WSWAP HToLE16 +#endif - // After all bits are written, the caller must observe the state of - // error_. A value of 1 indicates that a memory allocation failure - // has happened during bit writing. A value of 0 indicates successful +typedef struct { + vp8l_atype_t bits_; // bit accumulator + int used_; // number of bits used in accumulator + uint8_t* buf_; // start of buffer + uint8_t* cur_; // current write position + uint8_t* end_; // end of buffer + + // After all bits are written (VP8LBitWriterFinish()), the caller must observe + // the state of error_. A value of 1 indicates that a memory allocation + // failure has happened during bit writing. A value of 0 indicates successful // writing of bits. int error_; } VP8LBitWriter; static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) { - return (bw->bit_pos_ + 7) >> 3; + return (bw->cur_ - bw->buf_) + ((bw->used_ + 7) >> 3); } -static WEBP_INLINE uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) { - return bw->buf_; -} +uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw); // Returns 0 in case of memory allocation error. int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size); void VP8LBitWriterDestroy(VP8LBitWriter* const bw); -// This function writes bits into bytes in increasing addresses, and within -// a byte least-significant-bit first. -// -// The function can write up to 16 bits in one go with WriteBits -// Example: let's assume that 3 bits (Rs below) have been written already: -// -// BYTE-0 BYTE+1 BYTE+2 -// -// 0000 0RRR 0000 0000 0000 0000 -// -// Now, we could write 5 or less bits in MSB by just sifting by 3 -// and OR'ing to BYTE-0. -// -// For n bits, we take the last 5 bytes, OR that with high bits in BYTE-0, -// and locate the rest in BYTE+1 and BYTE+2. -// +// This function writes bits into bytes in increasing addresses (little endian), +// and within a byte least-significant-bit first. +// This function can write up to 32 bits in one go, but VP8LBitReader can only +// read 24 bits max (VP8L_MAX_NUM_BIT_READ). // VP8LBitWriter's error_ flag is set in case of memory allocation error. void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits); diff --git a/src/3rdparty/libwebp/src/utils/color_cache.c b/src/3rdparty/libwebp/src/utils/color_cache.c index 66a4464..8a88f08 100644 --- a/src/3rdparty/libwebp/src/utils/color_cache.c +++ b/src/3rdparty/libwebp/src/utils/color_cache.c @@ -32,7 +32,7 @@ int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) { void VP8LColorCacheClear(VP8LColorCache* const cc) { if (cc != NULL) { - free(cc->colors_); + WebPSafeFree(cc->colors_); cc->colors_ = NULL; } } diff --git a/src/3rdparty/libwebp/src/utils/endian_inl.h b/src/3rdparty/libwebp/src/utils/endian_inl.h new file mode 100644 index 0000000..cd56c37 --- /dev/null +++ b/src/3rdparty/libwebp/src/utils/endian_inl.h @@ -0,0 +1,100 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Endian related functions. + +#ifndef WEBP_UTILS_ENDIAN_INL_H_ +#define WEBP_UTILS_ENDIAN_INL_H_ + +#ifdef HAVE_CONFIG_H +#include "../webp/config.h" +#endif + +#include "../dsp/dsp.h" +#include "../webp/types.h" + +// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) +#if !defined(WORDS_BIGENDIAN) && \ + (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) +#define WORDS_BIGENDIAN +#endif + +#if defined(WORDS_BIGENDIAN) +#define HToLE32 BSwap32 +#define HToLE16 BSwap16 +#else +#define HToLE32(x) (x) +#define HToLE16(x) (x) +#endif + +#if !defined(HAVE_CONFIG_H) +// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64 +#if LOCAL_GCC_PREREQ(4,3) || LOCAL_CLANG_PREREQ(3,3) +#define HAVE_BUILTIN_BSWAP32 +#define HAVE_BUILTIN_BSWAP64 +#endif +// clang-3.3 and gcc-4.8 have a builtin function for swap16 +#if LOCAL_GCC_PREREQ(4,8) || LOCAL_CLANG_PREREQ(3,3) +#define HAVE_BUILTIN_BSWAP16 +#endif +#endif // !HAVE_CONFIG_H + +static WEBP_INLINE uint16_t BSwap16(uint16_t x) { +#if defined(HAVE_BUILTIN_BSWAP16) + return __builtin_bswap16(x); +#elif defined(_MSC_VER) + return _byteswap_ushort(x); +#else + // gcc will recognize a 'rorw $8, ...' here: + return (x >> 8) | ((x & 0xff) << 8); +#endif // HAVE_BUILTIN_BSWAP16 +} + +static WEBP_INLINE uint32_t BSwap32(uint32_t x) { +#if defined(WEBP_USE_MIPS32_R2) + uint32_t ret; + __asm__ volatile ( + "wsbh %[ret], %[x] \n\t" + "rotr %[ret], %[ret], 16 \n\t" + : [ret]"=r"(ret) + : [x]"r"(x) + ); + return ret; +#elif defined(HAVE_BUILTIN_BSWAP32) + return __builtin_bswap32(x); +#elif defined(__i386__) || defined(__x86_64__) + uint32_t swapped_bytes; + __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x)); + return swapped_bytes; +#elif defined(_MSC_VER) + return (uint32_t)_byteswap_ulong(x); +#else + return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); +#endif // HAVE_BUILTIN_BSWAP32 +} + +static WEBP_INLINE uint64_t BSwap64(uint64_t x) { +#if defined(HAVE_BUILTIN_BSWAP64) + return __builtin_bswap64(x); +#elif defined(__x86_64__) + uint64_t swapped_bytes; + __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x)); + return swapped_bytes; +#elif defined(_MSC_VER) + return (uint64_t)_byteswap_uint64(x); +#else // generic code for swapping 64-bit values (suggested by bdb@) + x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32); + x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16); + x = ((x & 0xff00ff00ff00ff00ull) >> 8) | ((x & 0x00ff00ff00ff00ffull) << 8); + return x; +#endif // HAVE_BUILTIN_BSWAP64 +} + +#endif // WEBP_UTILS_ENDIAN_INL_H_ diff --git a/src/3rdparty/libwebp/src/utils/huffman.c b/src/3rdparty/libwebp/src/utils/huffman.c index 8c5739f..c4c16d9 100644 --- a/src/3rdparty/libwebp/src/utils/huffman.c +++ b/src/3rdparty/libwebp/src/utils/huffman.c @@ -22,6 +22,9 @@ // (might be faster on some platform) // #define USE_LUT_REVERSE_BITS +// Huffman data read via DecodeImageStream is represented in two (red and green) +// bytes. +#define MAX_HTREE_GROUPS 0x10000 #define NON_EXISTENT_SYMBOL (-1) static void TreeNodeInit(HuffmanTreeNode* const node) { @@ -46,17 +49,25 @@ static void AssignChildren(HuffmanTree* const tree, TreeNodeInit(children + 1); } +// A Huffman tree is a full binary tree; and in a full binary tree with L +// leaves, the total number of nodes N = 2 * L - 1. +static int HuffmanTreeMaxNodes(int num_leaves) { + return (2 * num_leaves - 1); +} + +static int HuffmanTreeAllocate(HuffmanTree* const tree, int num_nodes) { + assert(tree != NULL); + tree->root_ = + (HuffmanTreeNode*)WebPSafeMalloc(num_nodes, sizeof(*tree->root_)); + return (tree->root_ != NULL); +} + static int TreeInit(HuffmanTree* const tree, int num_leaves) { assert(tree != NULL); if (num_leaves == 0) return 0; - // We allocate maximum possible nodes in the tree at once. - // Note that a Huffman tree is a full binary tree; and in a full binary tree - // with L leaves, the total number of nodes N = 2 * L - 1. - tree->max_nodes_ = 2 * num_leaves - 1; + tree->max_nodes_ = HuffmanTreeMaxNodes(num_leaves); assert(tree->max_nodes_ < (1 << 16)); // limit for the lut_jump_ table - tree->root_ = (HuffmanTreeNode*)WebPSafeMalloc((uint64_t)tree->max_nodes_, - sizeof(*tree->root_)); - if (tree->root_ == NULL) return 0; + if (!HuffmanTreeAllocate(tree, tree->max_nodes_)) return 0; TreeNodeInit(tree->root_); // Initialize root. tree->num_nodes_ = 1; memset(tree->lut_bits_, 255, sizeof(tree->lut_bits_)); @@ -64,17 +75,41 @@ static int TreeInit(HuffmanTree* const tree, int num_leaves) { return 1; } -void HuffmanTreeRelease(HuffmanTree* const tree) { +void VP8LHuffmanTreeFree(HuffmanTree* const tree) { if (tree != NULL) { - free(tree->root_); + WebPSafeFree(tree->root_); tree->root_ = NULL; tree->max_nodes_ = 0; tree->num_nodes_ = 0; } } -int HuffmanCodeLengthsToCodes(const int* const code_lengths, - int code_lengths_size, int* const huff_codes) { +HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups) { + HTreeGroup* const htree_groups = + (HTreeGroup*)WebPSafeCalloc(num_htree_groups, sizeof(*htree_groups)); + assert(num_htree_groups <= MAX_HTREE_GROUPS); + if (htree_groups == NULL) { + return NULL; + } + return htree_groups; +} + +void VP8LHtreeGroupsFree(HTreeGroup* htree_groups, int num_htree_groups) { + if (htree_groups != NULL) { + int i, j; + for (i = 0; i < num_htree_groups; ++i) { + HuffmanTree* const htrees = htree_groups[i].htrees_; + for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { + VP8LHuffmanTreeFree(&htrees[j]); + } + } + WebPSafeFree(htree_groups); + } +} + +int VP8LHuffmanCodeLengthsToCodes( + const int* const code_lengths, int code_lengths_size, + int* const huff_codes) { int symbol; int code_len; int code_length_hist[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 }; @@ -193,9 +228,10 @@ static int TreeAddSymbol(HuffmanTree* const tree, return 1; } -int HuffmanTreeBuildImplicit(HuffmanTree* const tree, - const int* const code_lengths, - int code_lengths_size) { +int VP8LHuffmanTreeBuildImplicit(HuffmanTree* const tree, + const int* const code_lengths, + int* const codes, + int code_lengths_size) { int symbol; int num_symbols = 0; int root_symbol = 0; @@ -219,47 +255,43 @@ int HuffmanTreeBuildImplicit(HuffmanTree* const tree, if (num_symbols == 1) { // Trivial case. const int max_symbol = code_lengths_size; if (root_symbol < 0 || root_symbol >= max_symbol) { - HuffmanTreeRelease(tree); + VP8LHuffmanTreeFree(tree); return 0; } return TreeAddSymbol(tree, root_symbol, 0, 0); } else { // Normal case. int ok = 0; + memset(codes, 0, code_lengths_size * sizeof(*codes)); - // Get Huffman codes from the code lengths. - int* const codes = - (int*)WebPSafeMalloc((uint64_t)code_lengths_size, sizeof(*codes)); - if (codes == NULL) goto End; - - if (!HuffmanCodeLengthsToCodes(code_lengths, code_lengths_size, codes)) { + if (!VP8LHuffmanCodeLengthsToCodes(code_lengths, code_lengths_size, + codes)) { goto End; } // Add symbols one-by-one. for (symbol = 0; symbol < code_lengths_size; ++symbol) { if (code_lengths[symbol] > 0) { - if (!TreeAddSymbol(tree, symbol, codes[symbol], code_lengths[symbol])) { + if (!TreeAddSymbol(tree, symbol, codes[symbol], + code_lengths[symbol])) { goto End; } } } ok = 1; End: - free(codes); ok = ok && IsFull(tree); - if (!ok) HuffmanTreeRelease(tree); + if (!ok) VP8LHuffmanTreeFree(tree); return ok; } } -int HuffmanTreeBuildExplicit(HuffmanTree* const tree, - const int* const code_lengths, - const int* const codes, - const int* const symbols, int max_symbol, - int num_symbols) { +int VP8LHuffmanTreeBuildExplicit(HuffmanTree* const tree, + const int* const code_lengths, + const int* const codes, + const int* const symbols, int max_symbol, + int num_symbols) { int ok = 0; int i; - assert(tree != NULL); assert(code_lengths != NULL); assert(codes != NULL); @@ -282,7 +314,6 @@ int HuffmanTreeBuildExplicit(HuffmanTree* const tree, ok = 1; End: ok = ok && IsFull(tree); - if (!ok) HuffmanTreeRelease(tree); + if (!ok) VP8LHuffmanTreeFree(tree); return ok; } - diff --git a/src/3rdparty/libwebp/src/utils/huffman.h b/src/3rdparty/libwebp/src/utils/huffman.h index e8afd27..624bc17 100644 --- a/src/3rdparty/libwebp/src/utils/huffman.h +++ b/src/3rdparty/libwebp/src/utils/huffman.h @@ -15,6 +15,7 @@ #define WEBP_UTILS_HUFFMAN_H_ #include <assert.h> +#include "../webp/format_constants.h" #include "../webp/types.h" #ifdef __cplusplus @@ -42,6 +43,12 @@ struct HuffmanTree { int num_nodes_; // number of currently occupied nodes }; +// Huffman Tree group. +typedef struct HTreeGroup HTreeGroup; +struct HTreeGroup { + HuffmanTree htrees_[HUFFMAN_CODES_PER_META_CODE]; +}; + // Returns true if the given node is not a leaf of the Huffman tree. static WEBP_INLINE int HuffmanTreeNodeIsNotLeaf( const HuffmanTreeNode* const node) { @@ -56,29 +63,37 @@ static WEBP_INLINE const HuffmanTreeNode* HuffmanTreeNextNode( // Releases the nodes of the Huffman tree. // Note: It does NOT free 'tree' itself. -void HuffmanTreeRelease(HuffmanTree* const tree); +void VP8LHuffmanTreeFree(HuffmanTree* const tree); + +// Creates the instance of HTreeGroup with specified number of tree-groups. +HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups); + +// Releases the memory allocated for HTreeGroup. +void VP8LHtreeGroupsFree(HTreeGroup* htree_groups, int num_htree_groups); // Builds Huffman tree assuming code lengths are implicitly in symbol order. +// The 'huff_codes' and 'code_lengths' are pre-allocated temporary memory +// buffers, used for creating the huffman tree. // Returns false in case of error (invalid tree or memory error). -int HuffmanTreeBuildImplicit(HuffmanTree* const tree, - const int* const code_lengths, - int code_lengths_size); +int VP8LHuffmanTreeBuildImplicit(HuffmanTree* const tree, + const int* const code_lengths, + int* const huff_codes, + int code_lengths_size); // Build a Huffman tree with explicitly given lists of code lengths, codes // and symbols. Verifies that all symbols added are smaller than max_symbol. // Returns false in case of an invalid symbol, invalid tree or memory error. -int HuffmanTreeBuildExplicit(HuffmanTree* const tree, - const int* const code_lengths, - const int* const codes, - const int* const symbols, int max_symbol, - int num_symbols); +int VP8LHuffmanTreeBuildExplicit(HuffmanTree* const tree, + const int* const code_lengths, + const int* const codes, + const int* const symbols, int max_symbol, + int num_symbols); // Utility: converts Huffman code lengths to corresponding Huffman codes. // 'huff_codes' should be pre-allocated. // Returns false in case of error (memory allocation, invalid codes). -int HuffmanCodeLengthsToCodes(const int* const code_lengths, - int code_lengths_size, int* const huff_codes); - +int VP8LHuffmanCodeLengthsToCodes(const int* const code_lengths, + int code_lengths_size, int* const huff_codes); #ifdef __cplusplus } // extern "C" diff --git a/src/3rdparty/libwebp/src/utils/huffman_encode.c b/src/3rdparty/libwebp/src/utils/huffman_encode.c index 9c59867..6421c2b 100644 --- a/src/3rdparty/libwebp/src/utils/huffman_encode.c +++ b/src/3rdparty/libwebp/src/utils/huffman_encode.c @@ -28,13 +28,13 @@ static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) { // Change the population counts in a way that the consequent // Huffman tree compression, especially its RLE-part, give smaller output. -static int OptimizeHuffmanForRle(int length, int* const counts) { - uint8_t* good_for_rle; +static void OptimizeHuffmanForRle(int length, uint8_t* const good_for_rle, + uint32_t* const counts) { // 1) Let's make the Huffman code more compatible with rle encoding. int i; for (; length >= 0; --length) { if (length == 0) { - return 1; // All zeros. + return; // All zeros. } if (counts[length - 1] != 0) { // Now counts[0..length - 1] does not have trailing zeros. @@ -43,15 +43,11 @@ static int OptimizeHuffmanForRle(int length, int* const counts) { } // 2) Let's mark all population counts that already can be encoded // with an rle code. - good_for_rle = (uint8_t*)calloc(length, 1); - if (good_for_rle == NULL) { - return 0; - } { // Let's not spoil any of the existing good rle codes. // Mark any seq of 0's that is longer as 5 as a good_for_rle. // Mark any seq of non-0's that is longer as 7 as a good_for_rle. - int symbol = counts[0]; + uint32_t symbol = counts[0]; int stride = 0; for (i = 0; i < length + 1; ++i) { if (i == length || counts[i] != symbol) { @@ -73,17 +69,17 @@ static int OptimizeHuffmanForRle(int length, int* const counts) { } // 3) Let's replace those population counts that lead to more rle codes. { - int stride = 0; - int limit = counts[0]; - int sum = 0; + uint32_t stride = 0; + uint32_t limit = counts[0]; + uint32_t sum = 0; for (i = 0; i < length + 1; ++i) { if (i == length || good_for_rle[i] || (i != 0 && good_for_rle[i - 1]) || !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) { if (stride >= 4 || (stride >= 3 && sum == 0)) { - int k; + uint32_t k; // The stride must end, collapse what we have, if we have enough (4). - int count = (sum + stride / 2) / stride; + uint32_t count = (sum + stride / 2) / stride; if (count < 1) { count = 1; } @@ -119,17 +115,8 @@ static int OptimizeHuffmanForRle(int length, int* const counts) { } } } - free(good_for_rle); - return 1; } -typedef struct { - int total_count_; - int value_; - int pool_index_left_; - int pool_index_right_; -} HuffmanTree; - // A comparer function for two Huffman trees: sorts first by 'total count' // (more comes first), and then by 'value' (more comes first). static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) { @@ -175,12 +162,12 @@ static void SetBitDepths(const HuffmanTree* const tree, // we are not planning to use this with extremely long blocks. // // See http://en.wikipedia.org/wiki/Huffman_coding -static int GenerateOptimalTree(const int* const histogram, int histogram_size, - int tree_depth_limit, - uint8_t* const bit_depths) { - int count_min; +static void GenerateOptimalTree(const uint32_t* const histogram, + int histogram_size, + HuffmanTree* tree, int tree_depth_limit, + uint8_t* const bit_depths) { + uint32_t count_min; HuffmanTree* tree_pool; - HuffmanTree* tree; int tree_size_orig = 0; int i; @@ -191,15 +178,9 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size, } if (tree_size_orig == 0) { // pretty optimal already! - return 1; + return; } - // 3 * tree_size is enough to cover all the nodes representing a - // population and all the inserted nodes combining two existing nodes. - // The tree pool needs 2 * (tree_size_orig - 1) entities, and the - // tree needs exactly tree_size_orig entities. - tree = (HuffmanTree*)WebPSafeMalloc(3ULL * tree_size_orig, sizeof(*tree)); - if (tree == NULL) return 0; tree_pool = tree + tree_size_orig; // For block sizes with less than 64k symbols we never need to do a @@ -215,7 +196,7 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size, int j; for (j = 0; j < histogram_size; ++j) { if (histogram[j] != 0) { - const int count = + const uint32_t count = (histogram[j] < count_min) ? count_min : histogram[j]; tree[idx].total_count_ = count; tree[idx].value_ = j; @@ -231,7 +212,7 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size, if (tree_size > 1) { // Normal case. int tree_pool_size = 0; while (tree_size > 1) { // Finish when we have only one root. - int count; + uint32_t count; tree_pool[tree_pool_size++] = tree[tree_size - 1]; tree_pool[tree_pool_size++] = tree[tree_size - 2]; count = tree_pool[tree_pool_size - 1].total_count_ + @@ -272,8 +253,6 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size, } } } - free(tree); - return 1; } // ----------------------------------------------------------------------------- @@ -424,17 +403,15 @@ static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) { // ----------------------------------------------------------------------------- // Main entry point -int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit, - HuffmanTreeCode* const tree) { - const int num_symbols = tree->num_symbols; - if (!OptimizeHuffmanForRle(num_symbols, histogram)) { - return 0; - } - if (!GenerateOptimalTree(histogram, num_symbols, - tree_depth_limit, tree->code_lengths)) { - return 0; - } +void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit, + uint8_t* const buf_rle, + HuffmanTree* const huff_tree, + HuffmanTreeCode* const huff_code) { + const int num_symbols = huff_code->num_symbols; + memset(buf_rle, 0, num_symbols * sizeof(*buf_rle)); + OptimizeHuffmanForRle(num_symbols, buf_rle, histogram); + GenerateOptimalTree(histogram, num_symbols, huff_tree, tree_depth_limit, + huff_code->code_lengths); // Create the actual bit codes for the bit lengths. - ConvertBitDepthsToSymbols(tree); - return 1; + ConvertBitDepthsToSymbols(huff_code); } diff --git a/src/3rdparty/libwebp/src/utils/huffman_encode.h b/src/3rdparty/libwebp/src/utils/huffman_encode.h index ee51c68..91aa18f 100644 --- a/src/3rdparty/libwebp/src/utils/huffman_encode.h +++ b/src/3rdparty/libwebp/src/utils/huffman_encode.h @@ -33,14 +33,26 @@ typedef struct { uint16_t* codes; // Symbol Codes. } HuffmanTreeCode; +// Struct to represent the Huffman tree. +// TODO(vikasa): Add comment for the fields of the Struct. +typedef struct { + uint32_t total_count_; + int value_; + int pool_index_left_; // Index for the left sub-tree. + int pool_index_right_; // Index for the right sub-tree. +} HuffmanTree; + // Turn the Huffman tree into a token sequence. // Returns the number of tokens used. int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree, HuffmanTreeToken* tokens, int max_tokens); // Create an optimized tree, and tokenize it. -int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit, - HuffmanTreeCode* const tree); +// 'buf_rle' and 'huff_tree' are pre-allocated and the 'tree' is the constructed +// huffman code tree. +void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit, + uint8_t* const buf_rle, HuffmanTree* const huff_tree, + HuffmanTreeCode* const tree); #ifdef __cplusplus } diff --git a/src/3rdparty/libwebp/src/utils/quant_levels_dec.c b/src/3rdparty/libwebp/src/utils/quant_levels_dec.c index 8489705..5b8b8b4 100644 --- a/src/3rdparty/libwebp/src/utils/quant_levels_dec.c +++ b/src/3rdparty/libwebp/src/utils/quant_levels_dec.c @@ -7,18 +7,273 @@ // be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // -// TODO(skal): implement gradient smoothing. +// Implement gradient smoothing: we replace a current alpha value by its +// surrounding average if it's close enough (that is: the change will be less +// than the minimum distance between two quantized level). +// We use sliding window for computing the 2d moving average. // // Author: Skal (pascal.massimino@gmail.com) #include "./quant_levels_dec.h" -int DequantizeLevels(uint8_t* const data, int width, int height, - int row, int num_rows) { - if (data == NULL || width <= 0 || height <= 0 || row < 0 || num_rows < 0 || - row + num_rows > height) { - return 0; +#include <string.h> // for memset + +#include "./utils.h" + +// #define USE_DITHERING // uncomment to enable ordered dithering (not vital) + +#define FIX 16 // fix-point precision for averaging +#define LFIX 2 // extra precision for look-up table +#define LUT_SIZE ((1 << (8 + LFIX)) - 1) // look-up table size + +#if defined(USE_DITHERING) + +#define DFIX 4 // extra precision for ordered dithering +#define DSIZE 4 // dithering size (must be a power of two) +// cf. http://en.wikipedia.org/wiki/Ordered_dithering +static const uint8_t kOrderedDither[DSIZE][DSIZE] = { + { 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision + { 12, 4, 14, 6 }, + { 3, 11, 1, 9 }, + { 15, 7, 13, 5 } +}; + +#else +#define DFIX 0 +#endif + +typedef struct { + int width_, height_; // dimension + int row_; // current input row being processed + uint8_t* src_; // input pointer + uint8_t* dst_; // output pointer + + int radius_; // filter radius (=delay) + int scale_; // normalization factor, in FIX bits precision + + void* mem_; // all memory + + // various scratch buffers + uint16_t* start_; + uint16_t* cur_; + uint16_t* end_; + uint16_t* top_; + uint16_t* average_; + + // input levels distribution + int num_levels_; // number of quantized levels + int min_, max_; // min and max level values + int min_level_dist_; // smallest distance between two consecutive levels + + int16_t* correction_; // size = 1 + 2*LUT_SIZE -> ~4k memory +} SmoothParams; + +//------------------------------------------------------------------------------ + +#define CLIP_MASK (int)(~0U << (8 + DFIX)) +static WEBP_INLINE uint8_t clip_8b(int v) { + return (!(v & CLIP_MASK)) ? (uint8_t)(v >> DFIX) : (v < 0) ? 0u : 255u; +} + +// vertical accumulation +static void VFilter(SmoothParams* const p) { + const uint8_t* src = p->src_; + const int w = p->width_; + uint16_t* const cur = p->cur_; + const uint16_t* const top = p->top_; + uint16_t* const out = p->end_; + uint16_t sum = 0; // all arithmetic is modulo 16bit + int x; + + for (x = 0; x < w; ++x) { + uint16_t new_value; + sum += src[x]; + new_value = top[x] + sum; + out[x] = new_value - cur[x]; // vertical sum of 'r' pixels. + cur[x] = new_value; + } + // move input pointers one row down + p->top_ = p->cur_; + p->cur_ += w; + if (p->cur_ == p->end_) p->cur_ = p->start_; // roll-over + // We replicate edges, as it's somewhat easier as a boundary condition. + // That's why we don't update the 'src' pointer on top/bottom area: + if (p->row_ >= 0 && p->row_ < p->height_ - 1) { + p->src_ += p->width_; + } +} + +// horizontal accumulation. We use mirror replication of missing pixels, as it's +// a little easier to implement (surprisingly). +static void HFilter(SmoothParams* const p) { + const uint16_t* const in = p->end_; + uint16_t* const out = p->average_; + const uint32_t scale = p->scale_; + const int w = p->width_; + const int r = p->radius_; + + int x; + for (x = 0; x <= r; ++x) { // left mirroring + const uint16_t delta = in[x + r - 1] + in[r - x]; + out[x] = (delta * scale) >> FIX; + } + for (; x < w - r; ++x) { // bulk middle run + const uint16_t delta = in[x + r] - in[x - r - 1]; + out[x] = (delta * scale) >> FIX; + } + for (; x < w; ++x) { // right mirroring + const uint16_t delta = + 2 * in[w - 1] - in[2 * w - 2 - r - x] - in[x - r - 1]; + out[x] = (delta * scale) >> FIX; + } +} + +// emit one filtered output row +static void ApplyFilter(SmoothParams* const p) { + const uint16_t* const average = p->average_; + const int w = p->width_; + const int16_t* const correction = p->correction_; +#if defined(USE_DITHERING) + const uint8_t* const dither = kOrderedDither[p->row_ % DSIZE]; +#endif + uint8_t* const dst = p->dst_; + int x; + for (x = 0; x < w; ++x) { + const int v = dst[x]; + if (v < p->max_ && v > p->min_) { + const int c = (v << DFIX) + correction[average[x] - (v << LFIX)]; +#if defined(USE_DITHERING) + dst[x] = clip_8b(c + dither[x % DSIZE]); +#else + dst[x] = clip_8b(c); +#endif + } } + p->dst_ += w; // advance output pointer +} + +//------------------------------------------------------------------------------ +// Initialize correction table + +static void InitCorrectionLUT(int16_t* const lut, int min_dist) { + // The correction curve is: + // f(x) = x for x <= threshold2 + // f(x) = 0 for x >= threshold1 + // and a linear interpolation for range x=[threshold2, threshold1] + // (along with f(-x) = -f(x) symmetry). + // Note that: threshold2 = 3/4 * threshold1 + const int threshold1 = min_dist << LFIX; + const int threshold2 = (3 * threshold1) >> 2; + const int max_threshold = threshold2 << DFIX; + const int delta = threshold1 - threshold2; + int i; + for (i = 1; i <= LUT_SIZE; ++i) { + int c = (i <= threshold2) ? (i << DFIX) + : (i < threshold1) ? max_threshold * (threshold1 - i) / delta + : 0; + c >>= LFIX; + lut[+i] = +c; + lut[-i] = -c; + } + lut[0] = 0; +} + +static void CountLevels(const uint8_t* const data, int size, + SmoothParams* const p) { + int i, last_level; + uint8_t used_levels[256] = { 0 }; + p->min_ = 255; + p->max_ = 0; + for (i = 0; i < size; ++i) { + const int v = data[i]; + if (v < p->min_) p->min_ = v; + if (v > p->max_) p->max_ = v; + used_levels[v] = 1; + } + // Compute the mininum distance between two non-zero levels. + p->min_level_dist_ = p->max_ - p->min_; + last_level = -1; + for (i = 0; i < 256; ++i) { + if (used_levels[i]) { + ++p->num_levels_; + if (last_level >= 0) { + const int level_dist = i - last_level; + if (level_dist < p->min_level_dist_) { + p->min_level_dist_ = level_dist; + } + } + last_level = i; + } + } +} + +// Initialize all params. +static int InitParams(uint8_t* const data, int width, int height, + int radius, SmoothParams* const p) { + const int R = 2 * radius + 1; // total size of the kernel + + const size_t size_scratch_m = (R + 1) * width * sizeof(*p->start_); + const size_t size_m = width * sizeof(*p->average_); + const size_t size_lut = (1 + 2 * LUT_SIZE) * sizeof(*p->correction_); + const size_t total_size = size_scratch_m + size_m + size_lut; + uint8_t* mem = (uint8_t*)WebPSafeMalloc(1U, total_size); + + if (mem == NULL) return 0; + p->mem_ = (void*)mem; + + p->start_ = (uint16_t*)mem; + p->cur_ = p->start_; + p->end_ = p->start_ + R * width; + p->top_ = p->end_ - width; + memset(p->top_, 0, width * sizeof(*p->top_)); + mem += size_scratch_m; + + p->average_ = (uint16_t*)mem; + mem += size_m; + + p->width_ = width; + p->height_ = height; + p->src_ = data; + p->dst_ = data; + p->radius_ = radius; + p->scale_ = (1 << (FIX + LFIX)) / (R * R); // normalization constant + p->row_ = -radius; + + // analyze the input distribution so we can best-fit the threshold + CountLevels(data, width * height, p); + + // correction table + p->correction_ = ((int16_t*)mem) + LUT_SIZE; + InitCorrectionLUT(p->correction_, p->min_level_dist_); + return 1; } +static void CleanupParams(SmoothParams* const p) { + WebPSafeFree(p->mem_); +} + +int WebPDequantizeLevels(uint8_t* const data, int width, int height, + int strength) { + const int radius = 4 * strength / 100; + if (strength < 0 || strength > 100) return 0; + if (data == NULL || width <= 0 || height <= 0) return 0; // bad params + if (radius > 0) { + SmoothParams p; + memset(&p, 0, sizeof(p)); + if (!InitParams(data, width, height, radius, &p)) return 0; + if (p.num_levels_ > 2) { + for (; p.row_ < p.height_; ++p.row_) { + VFilter(&p); // accumulate average of input + // Need to wait few rows in order to prime the filter, + // before emitting some output. + if (p.row_ >= p.radius_) { + HFilter(&p); + ApplyFilter(&p); + } + } + } + CleanupParams(&p); + } + return 1; +} diff --git a/src/3rdparty/libwebp/src/utils/quant_levels_dec.h b/src/3rdparty/libwebp/src/utils/quant_levels_dec.h index 0288383..9aab068 100644 --- a/src/3rdparty/libwebp/src/utils/quant_levels_dec.h +++ b/src/3rdparty/libwebp/src/utils/quant_levels_dec.h @@ -21,11 +21,12 @@ extern "C" { #endif // Apply post-processing to input 'data' of size 'width'x'height' assuming that -// the source was quantized to a reduced number of levels. The post-processing -// will be applied to 'num_rows' rows of 'data' starting from 'row'. -// Returns false in case of error (data is NULL, invalid parameters, ...). -int DequantizeLevels(uint8_t* const data, int width, int height, - int row, int num_rows); +// the source was quantized to a reduced number of levels. +// Strength is in [0..100] and controls the amount of dithering applied. +// Returns false in case of error (data is NULL, invalid parameters, +// malloc failure, ...). +int WebPDequantizeLevels(uint8_t* const data, int width, int height, + int strength); #ifdef __cplusplus } // extern "C" diff --git a/src/3rdparty/libwebp/src/utils/random.h b/src/3rdparty/libwebp/src/utils/random.h index 08a83e9..c392a61 100644 --- a/src/3rdparty/libwebp/src/utils/random.h +++ b/src/3rdparty/libwebp/src/utils/random.h @@ -45,7 +45,8 @@ static WEBP_INLINE int VP8RandomBits2(VP8Random* const rg, int num_bits, rg->tab_[rg->index1_] = diff; if (++rg->index1_ == VP8_RANDOM_TABLE_SIZE) rg->index1_ = 0; if (++rg->index2_ == VP8_RANDOM_TABLE_SIZE) rg->index2_ = 0; - diff = (diff << 1) >> (32 - num_bits); // sign-extend, 0-center + // sign-extend, 0-center + diff = (int)((uint32_t)diff << 1) >> (32 - num_bits); diff = (diff * amp) >> VP8_RANDOM_DITHER_FIX; // restrict range diff += 1 << (num_bits - 1); // shift back to 0.5-center return diff; diff --git a/src/3rdparty/libwebp/src/utils/rescaler.c b/src/3rdparty/libwebp/src/utils/rescaler.c index 7061246..fad9c6b 100644 --- a/src/3rdparty/libwebp/src/utils/rescaler.c +++ b/src/3rdparty/libwebp/src/utils/rescaler.c @@ -14,41 +14,20 @@ #include <assert.h> #include <stdlib.h> #include "./rescaler.h" +#include "../dsp/dsp.h" //------------------------------------------------------------------------------ +// Implementations of critical functions ImportRow / ExportRow + +void (*WebPRescalerImportRow)(WebPRescaler* const wrk, + const uint8_t* const src, int channel) = NULL; +void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out) = NULL; #define RFIX 30 #define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX) -void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, - uint8_t* const dst, int dst_width, int dst_height, - int dst_stride, int num_channels, int x_add, int x_sub, - int y_add, int y_sub, int32_t* const work) { - wrk->x_expand = (src_width < dst_width); - wrk->src_width = src_width; - wrk->src_height = src_height; - wrk->dst_width = dst_width; - wrk->dst_height = dst_height; - wrk->dst = dst; - wrk->dst_stride = dst_stride; - wrk->num_channels = num_channels; - // for 'x_expand', we use bilinear interpolation - wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub; - wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; - wrk->y_accum = y_add; - wrk->y_add = y_add; - wrk->y_sub = y_sub; - wrk->fx_scale = (1 << RFIX) / x_sub; - wrk->fy_scale = (1 << RFIX) / y_sub; - wrk->fxy_scale = wrk->x_expand ? - ((int64_t)dst_height << RFIX) / (x_sub * src_height) : - ((int64_t)dst_height << RFIX) / (x_add * src_height); - wrk->irow = work; - wrk->frow = work + num_channels * dst_width; -} - -void WebPRescalerImportRow(WebPRescaler* const wrk, - const uint8_t* const src, int channel) { +static void ImportRowC(WebPRescaler* const wrk, + const uint8_t* const src, int channel) { const int x_stride = wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels; int x_in = channel; @@ -84,22 +63,20 @@ void WebPRescalerImportRow(WebPRescaler* const wrk, accum -= wrk->x_sub; } } - // Accumulate the new row's contribution + // Accumulate the contribution of the new row. for (x_out = channel; x_out < x_out_max; x_out += x_stride) { wrk->irow[x_out] += wrk->frow[x_out]; } } -uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk) { +static void ExportRowC(WebPRescaler* const wrk, int x_out) { if (wrk->y_accum <= 0) { - int x_out; uint8_t* const dst = wrk->dst; int32_t* const irow = wrk->irow; const int32_t* const frow = wrk->frow; const int yscale = wrk->fy_scale * (-wrk->y_accum); const int x_out_max = wrk->dst_width * wrk->num_channels; - - for (x_out = 0; x_out < x_out_max; ++x_out) { + for (; x_out < x_out_max; ++x_out) { const int frac = (int)MULT_FIX(frow[x_out], yscale); const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale); dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; @@ -107,9 +84,214 @@ uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk) { } wrk->y_accum += wrk->y_add; wrk->dst += wrk->dst_stride; - return dst; + } +} + +//------------------------------------------------------------------------------ +// MIPS version + +#if defined(WEBP_USE_MIPS32) + +static void ImportRowMIPS(WebPRescaler* const wrk, + const uint8_t* const src, int channel) { + const int x_stride = wrk->num_channels; + const int x_out_max = wrk->dst_width * wrk->num_channels; + const int fx_scale = wrk->fx_scale; + const int x_add = wrk->x_add; + const int x_sub = wrk->x_sub; + int* frow = wrk->frow + channel; + int* irow = wrk->irow + channel; + const uint8_t* src1 = src + channel; + int temp1, temp2, temp3; + int base, frac, sum; + int accum, accum1; + const int x_stride1 = x_stride << 2; + int loop_c = x_out_max - channel; + + if (!wrk->x_expand) { + __asm__ volatile ( + "li %[temp1], 0x8000 \n\t" + "li %[temp2], 0x10000 \n\t" + "li %[sum], 0 \n\t" + "li %[accum], 0 \n\t" + "1: \n\t" + "addu %[accum], %[accum], %[x_add] \n\t" + "blez %[accum], 3f \n\t" + "2: \n\t" + "lbu %[temp3], 0(%[src1]) \n\t" + "subu %[accum], %[accum], %[x_sub] \n\t" + "addu %[src1], %[src1], %[x_stride] \n\t" + "addu %[sum], %[sum], %[temp3] \n\t" + "bgtz %[accum], 2b \n\t" + "3: \n\t" + "lbu %[base], 0(%[src1]) \n\t" + "addu %[src1], %[src1], %[x_stride] \n\t" + "negu %[accum1], %[accum] \n\t" + "mul %[frac], %[base], %[accum1] \n\t" + "addu %[temp3], %[sum], %[base] \n\t" + "mul %[temp3], %[temp3], %[x_sub] \n\t" + "lw %[base], 0(%[irow]) \n\t" + "subu %[loop_c], %[loop_c], %[x_stride] \n\t" + "sll %[accum1], %[frac], 2 \n\t" + "mult %[temp1], %[temp2] \n\t" + "madd %[accum1], %[fx_scale] \n\t" + "mfhi %[sum] \n\t" + "subu %[temp3], %[temp3], %[frac] \n\t" + "sw %[temp3], 0(%[frow]) \n\t" + "add %[base], %[base], %[temp3] \n\t" + "sw %[base], 0(%[irow]) \n\t" + "addu %[irow], %[irow], %[x_stride1] \n\t" + "addu %[frow], %[frow], %[x_stride1] \n\t" + "bgtz %[loop_c], 1b \n\t" + + : [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3), + [sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac), + [frow] "+r" (frow), [irow] "+r" (irow), [accum1] "=&r" (accum1), + [temp2] "=&r" (temp2), [temp1] "=&r" (temp1) + : [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale), + [x_sub] "r" (x_sub), [x_add] "r" (x_add), + [loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1) + : "memory", "hi", "lo" + ); } else { - return NULL; + __asm__ volatile ( + "lbu %[temp1], 0(%[src1]) \n\t" + "move %[temp2], %[temp1] \n\t" + "li %[accum], 0 \n\t" + "1: \n\t" + "bgez %[accum], 2f \n\t" + "move %[temp2], %[temp1] \n\t" + "addu %[src1], %[x_stride] \n\t" + "lbu %[temp1], 0(%[src1]) \n\t" + "addu %[accum], %[x_add] \n\t" + "2: \n\t" + "subu %[temp3], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[accum] \n\t" + "mul %[base], %[temp1], %[x_add] \n\t" + "subu %[accum], %[accum], %[x_sub] \n\t" + "lw %[frac], 0(%[irow]) \n\t" + "subu %[loop_c], %[loop_c], %[x_stride] \n\t" + "addu %[temp3], %[base], %[temp3] \n\t" + "sw %[temp3], 0(%[frow]) \n\t" + "addu %[frow], %[x_stride1] \n\t" + "addu %[frac], %[temp3] \n\t" + "sw %[frac], 0(%[irow]) \n\t" + "addu %[irow], %[x_stride1] \n\t" + "bgtz %[loop_c], 1b \n\t" + + : [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1), + [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [base] "=&r" (base), + [frac] "=&r" (frac), [frow] "+r" (frow), [irow] "+r" (irow) + : [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub), + [x_stride1] "r" (x_stride1), [loop_c] "r" (loop_c) + : "memory", "hi", "lo" + ); + } +} + +static void ExportRowMIPS(WebPRescaler* const wrk, int x_out) { + if (wrk->y_accum <= 0) { + uint8_t* const dst = wrk->dst; + int32_t* const irow = wrk->irow; + const int32_t* const frow = wrk->frow; + const int yscale = wrk->fy_scale * (-wrk->y_accum); + const int x_out_max = wrk->dst_width * wrk->num_channels; + // if wrk->fxy_scale can fit into 32 bits use optimized code, + // otherwise use C code + if ((wrk->fxy_scale >> 32) == 0) { + int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end; + const int temp2 = (int)(wrk->fxy_scale); + const int temp8 = x_out_max << 2; + uint8_t* dst_t = (uint8_t*)dst; + int32_t* irow_t = (int32_t*)irow; + const int32_t* frow_t = (const int32_t*)frow; + + __asm__ volatile( + "addiu %[temp6], $zero, -256 \n\t" + "addiu %[temp7], $zero, 255 \n\t" + "li %[temp3], 0x10000 \n\t" + "li %[temp4], 0x8000 \n\t" + "addu %[loop_end], %[frow_t], %[temp8] \n\t" + "1: \n\t" + "lw %[temp0], 0(%[frow_t]) \n\t" + "mult %[temp3], %[temp4] \n\t" + "addiu %[frow_t], %[frow_t], 4 \n\t" + "sll %[temp0], %[temp0], 2 \n\t" + "madd %[temp0], %[yscale] \n\t" + "mfhi %[temp1] \n\t" + "lw %[temp0], 0(%[irow_t]) \n\t" + "addiu %[dst_t], %[dst_t], 1 \n\t" + "addiu %[irow_t], %[irow_t], 4 \n\t" + "subu %[temp0], %[temp0], %[temp1] \n\t" + "mult %[temp3], %[temp4] \n\t" + "sll %[temp0], %[temp0], 2 \n\t" + "madd %[temp0], %[temp2] \n\t" + "mfhi %[temp5] \n\t" + "sw %[temp1], -4(%[irow_t]) \n\t" + "and %[temp0], %[temp5], %[temp6] \n\t" + "slti %[temp1], %[temp5], 0 \n\t" + "beqz %[temp0], 2f \n\t" + "xor %[temp5], %[temp5], %[temp5] \n\t" + "movz %[temp5], %[temp7], %[temp1] \n\t" + "2: \n\t" + "sb %[temp5], -1(%[dst_t]) \n\t" + "bne %[frow_t], %[loop_end], 1b \n\t" + + : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), + [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), + [temp7]"=&r"(temp7), [frow_t]"+r"(frow_t), [irow_t]"+r"(irow_t), + [dst_t]"+r"(dst_t), [loop_end]"=&r"(loop_end) + : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8) + : "memory", "hi", "lo" + ); + wrk->y_accum += wrk->y_add; + wrk->dst += wrk->dst_stride; + } else { + ExportRowC(wrk, x_out); + } + } +} +#endif // WEBP_USE_MIPS32 + +//------------------------------------------------------------------------------ + +void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, + uint8_t* const dst, int dst_width, int dst_height, + int dst_stride, int num_channels, int x_add, int x_sub, + int y_add, int y_sub, int32_t* const work) { + wrk->x_expand = (src_width < dst_width); + wrk->src_width = src_width; + wrk->src_height = src_height; + wrk->dst_width = dst_width; + wrk->dst_height = dst_height; + wrk->dst = dst; + wrk->dst_stride = dst_stride; + wrk->num_channels = num_channels; + // for 'x_expand', we use bilinear interpolation + wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub; + wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; + wrk->y_accum = y_add; + wrk->y_add = y_add; + wrk->y_sub = y_sub; + wrk->fx_scale = (1 << RFIX) / x_sub; + wrk->fy_scale = (1 << RFIX) / y_sub; + wrk->fxy_scale = wrk->x_expand ? + ((int64_t)dst_height << RFIX) / (x_sub * src_height) : + ((int64_t)dst_height << RFIX) / (x_add * src_height); + wrk->irow = work; + wrk->frow = work + num_channels * dst_width; + + if (WebPRescalerImportRow == NULL) { + WebPRescalerImportRow = ImportRowC; + WebPRescalerExportRow = ExportRowC; + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_MIPS32) + if (VP8GetCPUInfo(kMIPS32)) { + WebPRescalerImportRow = ImportRowMIPS; + WebPRescalerExportRow = ExportRowMIPS; + } +#endif + } } } @@ -142,11 +324,10 @@ int WebPRescalerImport(WebPRescaler* const wrk, int num_lines, int WebPRescalerExport(WebPRescaler* const rescaler) { int total_exported = 0; while (WebPRescalerHasPendingOutput(rescaler)) { - WebPRescalerExportRow(rescaler); + WebPRescalerExportRow(rescaler, 0); ++total_exported; } return total_exported; } //------------------------------------------------------------------------------ - diff --git a/src/3rdparty/libwebp/src/utils/rescaler.h b/src/3rdparty/libwebp/src/utils/rescaler.h index 68e49ce..a6f3787 100644 --- a/src/3rdparty/libwebp/src/utils/rescaler.h +++ b/src/3rdparty/libwebp/src/utils/rescaler.h @@ -52,26 +52,24 @@ void WebPRescalerInit(WebPRescaler* const rescaler, int WebPRescaleNeededLines(const WebPRescaler* const rescaler, int max_num_lines); -// Import a row of data and save its contribution in the rescaler. -// 'channel' denotes the channel number to be imported. -void WebPRescalerImportRow(WebPRescaler* const rescaler, - const uint8_t* const src, int channel); - // Import multiple rows over all channels, until at least one row is ready to // be exported. Returns the actual number of lines that were imported. int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows, const uint8_t* src, int src_stride); +// Import a row of data and save its contribution in the rescaler. +// 'channel' denotes the channel number to be imported. +extern void (*WebPRescalerImportRow)(WebPRescaler* const wrk, + const uint8_t* const src, int channel); +// Export one row (starting at x_out position) from rescaler. +extern void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out); + // Return true if there is pending output rows ready. static WEBP_INLINE int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) { return (rescaler->y_accum <= 0); } -// Export one row from rescaler. Returns the pointer where output was written, -// or NULL if no row was pending. -uint8_t* WebPRescalerExportRow(WebPRescaler* const rescaler); - // Export as many rows as possible. Return the numbers of rows written. int WebPRescalerExport(WebPRescaler* const rescaler); diff --git a/src/3rdparty/libwebp/src/utils/thread.c b/src/3rdparty/libwebp/src/utils/thread.c index a9e3fae..264210b 100644 --- a/src/3rdparty/libwebp/src/utils/thread.c +++ b/src/3rdparty/libwebp/src/utils/thread.c @@ -14,11 +14,35 @@ #include <assert.h> #include <string.h> // for memset() #include "./thread.h" +#include "./utils.h" #ifdef WEBP_USE_THREAD #if defined(_WIN32) +#include <windows.h> +typedef HANDLE pthread_t; +typedef CRITICAL_SECTION pthread_mutex_t; +typedef struct { + HANDLE waiting_sem_; + HANDLE received_sem_; + HANDLE signal_event_; +} pthread_cond_t; + +#else // !_WIN32 + +#include <pthread.h> + +#endif // _WIN32 + +struct WebPWorkerImpl { + pthread_mutex_t mutex_; + pthread_cond_t condition_; + pthread_t thread_; +}; + +#if defined(_WIN32) + //------------------------------------------------------------------------------ // simplistic pthread emulation layer @@ -129,23 +153,25 @@ static int pthread_cond_wait(pthread_cond_t* const condition, //------------------------------------------------------------------------------ +static void Execute(WebPWorker* const worker); // Forward declaration. + static THREADFN ThreadLoop(void* ptr) { WebPWorker* const worker = (WebPWorker*)ptr; int done = 0; while (!done) { - pthread_mutex_lock(&worker->mutex_); + pthread_mutex_lock(&worker->impl_->mutex_); while (worker->status_ == OK) { // wait in idling mode - pthread_cond_wait(&worker->condition_, &worker->mutex_); + pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); } if (worker->status_ == WORK) { - WebPWorkerExecute(worker); + Execute(worker); worker->status_ = OK; } else if (worker->status_ == NOT_OK) { // finish the worker done = 1; } // signal to the main thread that we're done (for Sync()) - pthread_cond_signal(&worker->condition_); - pthread_mutex_unlock(&worker->mutex_); + pthread_cond_signal(&worker->impl_->condition_); + pthread_mutex_unlock(&worker->impl_->mutex_); } return THREAD_RETURN(NULL); // Thread is finished } @@ -153,32 +179,36 @@ static THREADFN ThreadLoop(void* ptr) { // main thread state control static void ChangeState(WebPWorker* const worker, WebPWorkerStatus new_status) { - // no-op when attempting to change state on a thread that didn't come up - if (worker->status_ < OK) return; + // No-op when attempting to change state on a thread that didn't come up. + // Checking status_ without acquiring the lock first would result in a data + // race. + if (worker->impl_ == NULL) return; - pthread_mutex_lock(&worker->mutex_); - // wait for the worker to finish - while (worker->status_ != OK) { - pthread_cond_wait(&worker->condition_, &worker->mutex_); - } - // assign new status and release the working thread if needed - if (new_status != OK) { - worker->status_ = new_status; - pthread_cond_signal(&worker->condition_); + pthread_mutex_lock(&worker->impl_->mutex_); + if (worker->status_ >= OK) { + // wait for the worker to finish + while (worker->status_ != OK) { + pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); + } + // assign new status and release the working thread if needed + if (new_status != OK) { + worker->status_ = new_status; + pthread_cond_signal(&worker->impl_->condition_); + } } - pthread_mutex_unlock(&worker->mutex_); + pthread_mutex_unlock(&worker->impl_->mutex_); } #endif // WEBP_USE_THREAD //------------------------------------------------------------------------------ -void WebPWorkerInit(WebPWorker* const worker) { +static void Init(WebPWorker* const worker) { memset(worker, 0, sizeof(*worker)); worker->status_ = NOT_OK; } -int WebPWorkerSync(WebPWorker* const worker) { +static int Sync(WebPWorker* const worker) { #ifdef WEBP_USE_THREAD ChangeState(worker, OK); #endif @@ -186,56 +216,94 @@ int WebPWorkerSync(WebPWorker* const worker) { return !worker->had_error; } -int WebPWorkerReset(WebPWorker* const worker) { +static int Reset(WebPWorker* const worker) { int ok = 1; worker->had_error = 0; if (worker->status_ < OK) { #ifdef WEBP_USE_THREAD - if (pthread_mutex_init(&worker->mutex_, NULL) || - pthread_cond_init(&worker->condition_, NULL)) { + worker->impl_ = (WebPWorkerImpl*)WebPSafeCalloc(1, sizeof(*worker->impl_)); + if (worker->impl_ == NULL) { return 0; } - pthread_mutex_lock(&worker->mutex_); - ok = !pthread_create(&worker->thread_, NULL, ThreadLoop, worker); + if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) { + goto Error; + } + if (pthread_cond_init(&worker->impl_->condition_, NULL)) { + pthread_mutex_destroy(&worker->impl_->mutex_); + goto Error; + } + pthread_mutex_lock(&worker->impl_->mutex_); + ok = !pthread_create(&worker->impl_->thread_, NULL, ThreadLoop, worker); if (ok) worker->status_ = OK; - pthread_mutex_unlock(&worker->mutex_); + pthread_mutex_unlock(&worker->impl_->mutex_); + if (!ok) { + pthread_mutex_destroy(&worker->impl_->mutex_); + pthread_cond_destroy(&worker->impl_->condition_); + Error: + WebPSafeFree(worker->impl_); + worker->impl_ = NULL; + return 0; + } #else worker->status_ = OK; #endif } else if (worker->status_ > OK) { - ok = WebPWorkerSync(worker); + ok = Sync(worker); } assert(!ok || (worker->status_ == OK)); return ok; } -void WebPWorkerExecute(WebPWorker* const worker) { +static void Execute(WebPWorker* const worker) { if (worker->hook != NULL) { worker->had_error |= !worker->hook(worker->data1, worker->data2); } } -void WebPWorkerLaunch(WebPWorker* const worker) { +static void Launch(WebPWorker* const worker) { #ifdef WEBP_USE_THREAD ChangeState(worker, WORK); #else - WebPWorkerExecute(worker); + Execute(worker); #endif } -void WebPWorkerEnd(WebPWorker* const worker) { - if (worker->status_ >= OK) { +static void End(WebPWorker* const worker) { #ifdef WEBP_USE_THREAD + if (worker->impl_ != NULL) { ChangeState(worker, NOT_OK); - pthread_join(worker->thread_, NULL); - pthread_mutex_destroy(&worker->mutex_); - pthread_cond_destroy(&worker->condition_); + pthread_join(worker->impl_->thread_, NULL); + pthread_mutex_destroy(&worker->impl_->mutex_); + pthread_cond_destroy(&worker->impl_->condition_); + WebPSafeFree(worker->impl_); + worker->impl_ = NULL; + } #else - worker->status_ = NOT_OK; + worker->status_ = NOT_OK; + assert(worker->impl_ == NULL); #endif - } assert(worker->status_ == NOT_OK); } //------------------------------------------------------------------------------ +static WebPWorkerInterface g_worker_interface = { + Init, Reset, Sync, Launch, Execute, End +}; + +int WebPSetWorkerInterface(const WebPWorkerInterface* const winterface) { + if (winterface == NULL || + winterface->Init == NULL || winterface->Reset == NULL || + winterface->Sync == NULL || winterface->Launch == NULL || + winterface->Execute == NULL || winterface->End == NULL) { + return 0; + } + g_worker_interface = *winterface; + return 1; +} + +const WebPWorkerInterface* WebPGetWorkerInterface(void) { + return &g_worker_interface; +} + +//------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/utils/thread.h b/src/3rdparty/libwebp/src/utils/thread.h index aef33bd..7bd451b 100644 --- a/src/3rdparty/libwebp/src/utils/thread.h +++ b/src/3rdparty/libwebp/src/utils/thread.h @@ -15,33 +15,15 @@ #define WEBP_UTILS_THREAD_H_ #ifdef HAVE_CONFIG_H -#include "config.h" +#include "../webp/config.h" #endif +#include "../webp/types.h" + #ifdef __cplusplus extern "C" { #endif -#ifdef WEBP_USE_THREAD - -#if defined(_WIN32) - -#include <windows.h> -typedef HANDLE pthread_t; -typedef CRITICAL_SECTION pthread_mutex_t; -typedef struct { - HANDLE waiting_sem_; - HANDLE received_sem_; - HANDLE signal_event_; -} pthread_cond_t; - -#else - -#include <pthread.h> - -#endif /* _WIN32 */ -#endif /* WEBP_USE_THREAD */ - // State of the worker thread object typedef enum { NOT_OK = 0, // object is unusable @@ -53,13 +35,12 @@ typedef enum { // arguments (data1 and data2), and should return false in case of error. typedef int (*WebPWorkerHook)(void*, void*); -// Synchronize object used to launch job in the worker thread +// Platform-dependent implementation details for the worker. +typedef struct WebPWorkerImpl WebPWorkerImpl; + +// Synchronization object used to launch job in the worker thread typedef struct { -#ifdef WEBP_USE_THREAD - pthread_mutex_t mutex_; - pthread_cond_t condition_; - pthread_t thread_; -#endif + WebPWorkerImpl* impl_; WebPWorkerStatus status_; WebPWorkerHook hook; // hook to call void* data1; // first argument passed to 'hook' @@ -67,26 +48,41 @@ typedef struct { int had_error; // return value of the last call to 'hook' } WebPWorker; -// Must be called first, before any other method. -void WebPWorkerInit(WebPWorker* const worker); -// Must be called to initialize the object and spawn the thread. Re-entrant. -// Will potentially launch the thread. Returns false in case of error. -int WebPWorkerReset(WebPWorker* const worker); -// Makes sure the previous work is finished. Returns true if worker->had_error -// was not set and no error condition was triggered by the working thread. -int WebPWorkerSync(WebPWorker* const worker); -// Triggers the thread to call hook() with data1 and data2 argument. These -// hook/data1/data2 can be changed at any time before calling this function, -// but not be changed afterward until the next call to WebPWorkerSync(). -void WebPWorkerLaunch(WebPWorker* const worker); -// This function is similar to WebPWorkerLaunch() except that it calls the -// hook directly instead of using a thread. Convenient to bypass the thread -// mechanism while still using the WebPWorker structs. WebPWorkerSync() must -// still be called afterward (for error reporting). -void WebPWorkerExecute(WebPWorker* const worker); -// Kill the thread and terminate the object. To use the object again, one -// must call WebPWorkerReset() again. -void WebPWorkerEnd(WebPWorker* const worker); +// The interface for all thread-worker related functions. All these functions +// must be implemented. +typedef struct { + // Must be called first, before any other method. + void (*Init)(WebPWorker* const worker); + // Must be called to initialize the object and spawn the thread. Re-entrant. + // Will potentially launch the thread. Returns false in case of error. + int (*Reset)(WebPWorker* const worker); + // Makes sure the previous work is finished. Returns true if worker->had_error + // was not set and no error condition was triggered by the working thread. + int (*Sync)(WebPWorker* const worker); + // Triggers the thread to call hook() with data1 and data2 arguments. These + // hook/data1/data2 values can be changed at any time before calling this + // function, but not be changed afterward until the next call to Sync(). + void (*Launch)(WebPWorker* const worker); + // This function is similar to Launch() except that it calls the + // hook directly instead of using a thread. Convenient to bypass the thread + // mechanism while still using the WebPWorker structs. Sync() must + // still be called afterward (for error reporting). + void (*Execute)(WebPWorker* const worker); + // Kill the thread and terminate the object. To use the object again, one + // must call Reset() again. + void (*End)(WebPWorker* const worker); +} WebPWorkerInterface; + +// Install a new set of threading functions, overriding the defaults. This +// should be done before any workers are started, i.e., before any encoding or +// decoding takes place. The contents of the interface struct are copied, it +// is safe to free the corresponding memory after this call. This function is +// not thread-safe. Return false in case of invalid pointer or methods. +WEBP_EXTERN(int) WebPSetWorkerInterface( + const WebPWorkerInterface* const interface); + +// Retrieve the currently set thread worker interface. +WEBP_EXTERN(const WebPWorkerInterface*) WebPGetWorkerInterface(void); //------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/utils/utils.c b/src/3rdparty/libwebp/src/utils/utils.c index 5592538..8ff7f12 100644 --- a/src/3rdparty/libwebp/src/utils/utils.c +++ b/src/3rdparty/libwebp/src/utils/utils.c @@ -14,29 +14,198 @@ #include <stdlib.h> #include "./utils.h" +// If PRINT_MEM_INFO is defined, extra info (like total memory used, number of +// alloc/free etc) is printed. For debugging/tuning purpose only (it's slow, +// and not multi-thread safe!). +// An interesting alternative is valgrind's 'massif' tool: +// http://valgrind.org/docs/manual/ms-manual.html +// Here is an example command line: +/* valgrind --tool=massif --massif-out-file=massif.out \ + --stacks=yes --alloc-fn=WebPSafeAlloc --alloc-fn=WebPSafeCalloc + ms_print massif.out +*/ +// In addition: +// * if PRINT_MEM_TRAFFIC is defined, all the details of the malloc/free cycles +// are printed. +// * if MALLOC_FAIL_AT is defined, the global environment variable +// $MALLOC_FAIL_AT is used to simulate a memory error when calloc or malloc +// is called for the nth time. Example usage: +// export MALLOC_FAIL_AT=50 && ./examples/cwebp input.png +// * if MALLOC_LIMIT is defined, the global environment variable $MALLOC_LIMIT +// sets the maximum amount of memory (in bytes) made available to libwebp. +// This can be used to emulate environment with very limited memory. +// Example: export MALLOC_LIMIT=64000000 && ./examples/dwebp picture.webp + +// #define PRINT_MEM_INFO +// #define PRINT_MEM_TRAFFIC +// #define MALLOC_FAIL_AT +// #define MALLOC_LIMIT + //------------------------------------------------------------------------------ // Checked memory allocation +#if defined(PRINT_MEM_INFO) + +#include <stdio.h> +#include <stdlib.h> // for abort() + +static int num_malloc_calls = 0; +static int num_calloc_calls = 0; +static int num_free_calls = 0; +static int countdown_to_fail = 0; // 0 = off + +typedef struct MemBlock MemBlock; +struct MemBlock { + void* ptr_; + size_t size_; + MemBlock* next_; +}; + +static MemBlock* all_blocks = NULL; +static size_t total_mem = 0; +static size_t total_mem_allocated = 0; +static size_t high_water_mark = 0; +static size_t mem_limit = 0; + +static int exit_registered = 0; + +static void PrintMemInfo(void) { + fprintf(stderr, "\nMEMORY INFO:\n"); + fprintf(stderr, "num calls to: malloc = %4d\n", num_malloc_calls); + fprintf(stderr, " calloc = %4d\n", num_calloc_calls); + fprintf(stderr, " free = %4d\n", num_free_calls); + fprintf(stderr, "total_mem: %u\n", (uint32_t)total_mem); + fprintf(stderr, "total_mem allocated: %u\n", (uint32_t)total_mem_allocated); + fprintf(stderr, "high-water mark: %u\n", (uint32_t)high_water_mark); + while (all_blocks != NULL) { + MemBlock* b = all_blocks; + all_blocks = b->next_; + free(b); + } +} + +static void Increment(int* const v) { + if (!exit_registered) { +#if defined(MALLOC_FAIL_AT) + { + const char* const malloc_fail_at_str = getenv("MALLOC_FAIL_AT"); + if (malloc_fail_at_str != NULL) { + countdown_to_fail = atoi(malloc_fail_at_str); + } + } +#endif +#if defined(MALLOC_LIMIT) + { + const char* const malloc_limit_str = getenv("MALLOC_LIMIT"); + if (malloc_limit_str != NULL) { + mem_limit = atoi(malloc_limit_str); + } + } +#endif + (void)countdown_to_fail; + (void)mem_limit; + atexit(PrintMemInfo); + exit_registered = 1; + } + ++*v; +} + +static void AddMem(void* ptr, size_t size) { + if (ptr != NULL) { + MemBlock* const b = (MemBlock*)malloc(sizeof(*b)); + if (b == NULL) abort(); + b->next_ = all_blocks; + all_blocks = b; + b->ptr_ = ptr; + b->size_ = size; + total_mem += size; + total_mem_allocated += size; +#if defined(PRINT_MEM_TRAFFIC) +#if defined(MALLOC_FAIL_AT) + fprintf(stderr, "fail-count: %5d [mem=%u]\n", + num_malloc_calls + num_calloc_calls, (uint32_t)total_mem); +#else + fprintf(stderr, "Mem: %u (+%u)\n", (uint32_t)total_mem, (uint32_t)size); +#endif +#endif + if (total_mem > high_water_mark) high_water_mark = total_mem; + } +} + +static void SubMem(void* ptr) { + if (ptr != NULL) { + MemBlock** b = &all_blocks; + // Inefficient search, but that's just for debugging. + while (*b != NULL && (*b)->ptr_ != ptr) b = &(*b)->next_; + if (*b == NULL) { + fprintf(stderr, "Invalid pointer free! (%p)\n", ptr); + abort(); + } + { + MemBlock* const block = *b; + *b = block->next_; + total_mem -= block->size_; +#if defined(PRINT_MEM_TRAFFIC) + fprintf(stderr, "Mem: %u (-%u)\n", + (uint32_t)total_mem, (uint32_t)block->size_); +#endif + free(block); + } + } +} + +#else +#define Increment(v) do {} while (0) +#define AddMem(p, s) do {} while (0) +#define SubMem(p) do {} while (0) +#endif + // Returns 0 in case of overflow of nmemb * size. static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) { const uint64_t total_size = nmemb * size; if (nmemb == 0) return 1; if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0; if (total_size != (size_t)total_size) return 0; +#if defined(PRINT_MEM_INFO) && defined(MALLOC_FAIL_AT) + if (countdown_to_fail > 0 && --countdown_to_fail == 0) { + return 0; // fake fail! + } +#endif +#if defined(MALLOC_LIMIT) + if (mem_limit > 0 && total_mem + total_size >= mem_limit) { + return 0; // fake fail! + } +#endif + return 1; } void* WebPSafeMalloc(uint64_t nmemb, size_t size) { + void* ptr; + Increment(&num_malloc_calls); if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL; assert(nmemb * size > 0); - return malloc((size_t)(nmemb * size)); + ptr = malloc((size_t)(nmemb * size)); + AddMem(ptr, (size_t)(nmemb * size)); + return ptr; } void* WebPSafeCalloc(uint64_t nmemb, size_t size) { + void* ptr; + Increment(&num_calloc_calls); if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL; assert(nmemb * size > 0); - return calloc((size_t)nmemb, size); + ptr = calloc((size_t)nmemb, size); + AddMem(ptr, (size_t)(nmemb * size)); + return ptr; } -//------------------------------------------------------------------------------ +void WebPSafeFree(void* const ptr) { + if (ptr != NULL) { + Increment(&num_free_calls); + SubMem(ptr); + } + free(ptr); +} +//------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/utils/utils.h b/src/3rdparty/libwebp/src/utils/utils.h index 8bdf0f0..f2c498a 100644 --- a/src/3rdparty/libwebp/src/utils/utils.h +++ b/src/3rdparty/libwebp/src/utils/utils.h @@ -35,10 +35,13 @@ extern "C" { // somewhere (like: malloc(num_pixels * sizeof(*something))). That's why this // safe malloc() borrows the signature from calloc(), pointing at the dangerous // underlying multiply involved. -void* WebPSafeMalloc(uint64_t nmemb, size_t size); +WEBP_EXTERN(void*) WebPSafeMalloc(uint64_t nmemb, size_t size); // Note that WebPSafeCalloc() expects the second argument type to be 'size_t' // in order to favor the "calloc(num_foo, sizeof(foo))" pattern. -void* WebPSafeCalloc(uint64_t nmemb, size_t size); +WEBP_EXTERN(void*) WebPSafeCalloc(uint64_t nmemb, size_t size); + +// Companion deallocation function to the above allocations. +WEBP_EXTERN(void) WebPSafeFree(void* const ptr); //------------------------------------------------------------------------------ // Reading/writing data. @@ -74,6 +77,41 @@ static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) { PutLE16(data + 2, (int)(val >> 16)); } +// Returns (int)floor(log2(n)). n must be > 0. +// use GNU builtins where available. +#if defined(__GNUC__) && \ + ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) +static WEBP_INLINE int BitsLog2Floor(uint32_t n) { + return 31 ^ __builtin_clz(n); +} +#elif defined(_MSC_VER) && _MSC_VER > 1310 && \ + (defined(_M_X64) || defined(_M_IX86)) +#include <intrin.h> +#pragma intrinsic(_BitScanReverse) + +static WEBP_INLINE int BitsLog2Floor(uint32_t n) { + uint32_t first_set_bit; + _BitScanReverse(&first_set_bit, n); + return first_set_bit; +} +#else +static WEBP_INLINE int BitsLog2Floor(uint32_t n) { + int log = 0; + uint32_t value = n; + int i; + + for (i = 4; i >= 0; --i) { + const int shift = (1 << i); + const uint32_t x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + return log; +} +#endif + //------------------------------------------------------------------------------ #ifdef __cplusplus |