summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/libwebp/src/utils')
-rw-r--r--src/3rdparty/libwebp/src/utils/alpha_processing.c196
-rw-r--r--src/3rdparty/libwebp/src/utils/alpha_processing.h46
-rw-r--r--src/3rdparty/libwebp/src/utils/bit_reader.c159
-rw-r--r--src/3rdparty/libwebp/src/utils/bit_reader.h267
-rw-r--r--src/3rdparty/libwebp/src/utils/bit_reader_inl.h172
-rw-r--r--src/3rdparty/libwebp/src/utils/bit_writer.c135
-rw-r--r--src/3rdparty/libwebp/src/utils/bit_writer.h55
-rw-r--r--src/3rdparty/libwebp/src/utils/color_cache.c2
-rw-r--r--src/3rdparty/libwebp/src/utils/endian_inl.h100
-rw-r--r--src/3rdparty/libwebp/src/utils/huffman.c95
-rw-r--r--src/3rdparty/libwebp/src/utils/huffman.h39
-rw-r--r--src/3rdparty/libwebp/src/utils/huffman_encode.c77
-rw-r--r--src/3rdparty/libwebp/src/utils/huffman_encode.h16
-rw-r--r--src/3rdparty/libwebp/src/utils/quant_levels_dec.c267
-rw-r--r--src/3rdparty/libwebp/src/utils/quant_levels_dec.h11
-rw-r--r--src/3rdparty/libwebp/src/utils/random.h3
-rw-r--r--src/3rdparty/libwebp/src/utils/rescaler.c257
-rw-r--r--src/3rdparty/libwebp/src/utils/rescaler.h16
-rw-r--r--src/3rdparty/libwebp/src/utils/thread.c140
-rw-r--r--src/3rdparty/libwebp/src/utils/thread.h90
-rw-r--r--src/3rdparty/libwebp/src/utils/utils.c175
-rw-r--r--src/3rdparty/libwebp/src/utils/utils.h42
22 files changed, 1495 insertions, 865 deletions
diff --git a/src/3rdparty/libwebp/src/utils/alpha_processing.c b/src/3rdparty/libwebp/src/utils/alpha_processing.c
deleted file mode 100644
index 7362ff9..0000000
--- a/src/3rdparty/libwebp/src/utils/alpha_processing.c
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright 2013 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Utilities for processing transparent channel.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include <assert.h>
-#include "./alpha_processing.h"
-
-// Tables can be faster on some platform but incur some extra binary size (~2k).
-// #define USE_TABLES_FOR_ALPHA_MULT
-
-// -----------------------------------------------------------------------------
-
-#define MFIX 24 // 24bit fixed-point arithmetic
-#define HALF ((1u << MFIX) >> 1)
-#define KINV_255 ((1u << MFIX) / 255u)
-
-static uint32_t Mult(uint8_t x, uint32_t mult) {
- const uint32_t v = (x * mult + HALF) >> MFIX;
- assert(v <= 255); // <- 24bit precision is enough to ensure that.
- return v;
-}
-
-#ifdef USE_TABLES_FOR_ALPHA_MULT
-
-static const uint32_t kMultTables[2][256] = {
- { // (255u << MFIX) / alpha
- 0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
- 0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
- 0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
- 0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
- 0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
- 0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
- 0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
- 0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
- 0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
- 0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
- 0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
- 0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
- 0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
- 0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
- 0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
- 0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
- 0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
- 0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
- 0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
- 0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
- 0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
- 0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
- 0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
- 0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
- 0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
- 0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
- 0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
- 0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
- 0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
- 0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
- 0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
- 0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
- 0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
- 0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
- 0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
- 0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
- 0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
- 0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
- 0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
- 0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
- 0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
- 0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
- 0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
- { // alpha * KINV_255
- 0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
- 0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
- 0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
- 0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
- 0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
- 0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
- 0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
- 0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
- 0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
- 0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
- 0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
- 0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
- 0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
- 0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
- 0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
- 0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
- 0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
- 0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
- 0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
- 0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
- 0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
- 0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
- 0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
- 0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
- 0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
- 0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
- 0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
- 0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
- 0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
- 0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
- 0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
- 0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
- 0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
- 0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
- 0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
- 0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
- 0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
- 0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
- 0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
- 0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
- 0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
- 0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
- 0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
-};
-
-static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
- return kMultTables[!inverse][a];
-}
-
-#else
-
-static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
- return inverse ? (255u << MFIX) / a : a * KINV_255;
-}
-
-#endif // USE_TABLES_FOR_ALPHA_MULT
-
-void WebPMultARGBRow(uint32_t* const ptr, int width, int inverse) {
- int x;
- for (x = 0; x < width; ++x) {
- const uint32_t argb = ptr[x];
- if (argb < 0xff000000u) { // alpha < 255
- if (argb <= 0x00ffffffu) { // alpha == 0
- ptr[x] = 0;
- } else {
- const uint32_t alpha = (argb >> 24) & 0xff;
- const uint32_t scale = GetScale(alpha, inverse);
- uint32_t out = argb & 0xff000000u;
- out |= Mult(argb >> 0, scale) << 0;
- out |= Mult(argb >> 8, scale) << 8;
- out |= Mult(argb >> 16, scale) << 16;
- ptr[x] = out;
- }
- }
- }
-}
-
-void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
- int inverse) {
- int n;
- for (n = 0; n < num_rows; ++n) {
- WebPMultARGBRow((uint32_t*)ptr, width, inverse);
- ptr += stride;
- }
-}
-
-void WebPMultRow(uint8_t* const ptr, const uint8_t* const alpha,
- int width, int inverse) {
- int x;
- for (x = 0; x < width; ++x) {
- const uint32_t a = alpha[x];
- if (a != 255) {
- if (a == 0) {
- ptr[x] = 0;
- } else {
- const uint32_t scale = GetScale(a, inverse);
- ptr[x] = Mult(ptr[x], scale);
- }
- }
- }
-}
-
-void WebPMultRows(uint8_t* ptr, int stride,
- const uint8_t* alpha, int alpha_stride,
- int width, int num_rows, int inverse) {
- int n;
- for (n = 0; n < num_rows; ++n) {
- WebPMultRow(ptr, alpha, width, inverse);
- ptr += stride;
- alpha += alpha_stride;
- }
-}
-
-#undef KINV_255
-#undef HALF
-#undef MFIX
-
diff --git a/src/3rdparty/libwebp/src/utils/alpha_processing.h b/src/3rdparty/libwebp/src/utils/alpha_processing.h
deleted file mode 100644
index 80e1ae4..0000000
--- a/src/3rdparty/libwebp/src/utils/alpha_processing.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2013 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Utilities for processing transparent channel.
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#ifndef WEBP_UTILS_ALPHA_PROCESSING_H_
-#define WEBP_UTILS_ALPHA_PROCESSING_H_
-
-#include "../webp/types.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B).
-// Un-Multiply operation transforms x into x * 255 / A.
-
-// Pre-Multiply or Un-Multiply (if 'inverse' is true) argb values in a row.
-void WebPMultARGBRow(uint32_t* const ptr, int width, int inverse);
-
-// Same a WebPMultARGBRow(), but for several rows.
-void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
- int inverse);
-
-// Same for a row of single values, with side alpha values.
-void WebPMultRow(uint8_t* const ptr, const uint8_t* const alpha,
- int width, int inverse);
-
-// Same a WebPMultRow(), but for several 'num_rows' rows.
-void WebPMultRows(uint8_t* ptr, int stride,
- const uint8_t* alpha, int alpha_stride,
- int width, int num_rows, int inverse);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // WEBP_UTILS_ALPHA_PROCESSING_H_
diff --git a/src/3rdparty/libwebp/src/utils/bit_reader.c b/src/3rdparty/libwebp/src/utils/bit_reader.c
index bfa4d7d..64503e6 100644
--- a/src/3rdparty/libwebp/src/utils/bit_reader.c
+++ b/src/3rdparty/libwebp/src/utils/bit_reader.c
@@ -7,18 +7,16 @@
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
-// Boolean decoder
+// Boolean decoder non-inlined methods
//
// Author: Skal (pascal.massimino@gmail.com)
-#include "./bit_reader.h"
-
-#ifndef USE_RIGHT_JUSTIFY
-#define MK(X) (((range_t)(X) << (BITS)) | (MASK))
-#else
-#define MK(X) ((range_t)(X))
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
#endif
+#include "./bit_reader_inl.h"
+
//------------------------------------------------------------------------------
// VP8BitReader
@@ -27,12 +25,20 @@ void VP8InitBitReader(VP8BitReader* const br,
assert(br != NULL);
assert(start != NULL);
assert(start <= end);
- br->range_ = MK(255 - 1);
+ br->range_ = 255 - 1;
br->buf_ = start;
br->buf_end_ = end;
br->value_ = 0;
br->bits_ = -8; // to load the very first 8bits
br->eof_ = 0;
+ VP8LoadNewBytes(br);
+}
+
+void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) {
+ if (br->buf_ != NULL) {
+ br->buf_ += offset;
+ br->buf_end_ += offset;
+ }
}
const uint8_t kVP8Log2Range[128] = {
@@ -47,45 +53,35 @@ const uint8_t kVP8Log2Range[128] = {
0
};
-// range = (range << kVP8Log2Range[range]) + trailing 1's
+// range = ((range - 1) << kVP8Log2Range[range]) + 1
const range_t kVP8NewRange[128] = {
- MK(127), MK(127), MK(191), MK(127), MK(159), MK(191), MK(223), MK(127),
- MK(143), MK(159), MK(175), MK(191), MK(207), MK(223), MK(239), MK(127),
- MK(135), MK(143), MK(151), MK(159), MK(167), MK(175), MK(183), MK(191),
- MK(199), MK(207), MK(215), MK(223), MK(231), MK(239), MK(247), MK(127),
- MK(131), MK(135), MK(139), MK(143), MK(147), MK(151), MK(155), MK(159),
- MK(163), MK(167), MK(171), MK(175), MK(179), MK(183), MK(187), MK(191),
- MK(195), MK(199), MK(203), MK(207), MK(211), MK(215), MK(219), MK(223),
- MK(227), MK(231), MK(235), MK(239), MK(243), MK(247), MK(251), MK(127),
- MK(129), MK(131), MK(133), MK(135), MK(137), MK(139), MK(141), MK(143),
- MK(145), MK(147), MK(149), MK(151), MK(153), MK(155), MK(157), MK(159),
- MK(161), MK(163), MK(165), MK(167), MK(169), MK(171), MK(173), MK(175),
- MK(177), MK(179), MK(181), MK(183), MK(185), MK(187), MK(189), MK(191),
- MK(193), MK(195), MK(197), MK(199), MK(201), MK(203), MK(205), MK(207),
- MK(209), MK(211), MK(213), MK(215), MK(217), MK(219), MK(221), MK(223),
- MK(225), MK(227), MK(229), MK(231), MK(233), MK(235), MK(237), MK(239),
- MK(241), MK(243), MK(245), MK(247), MK(249), MK(251), MK(253), MK(127)
+ 127, 127, 191, 127, 159, 191, 223, 127,
+ 143, 159, 175, 191, 207, 223, 239, 127,
+ 135, 143, 151, 159, 167, 175, 183, 191,
+ 199, 207, 215, 223, 231, 239, 247, 127,
+ 131, 135, 139, 143, 147, 151, 155, 159,
+ 163, 167, 171, 175, 179, 183, 187, 191,
+ 195, 199, 203, 207, 211, 215, 219, 223,
+ 227, 231, 235, 239, 243, 247, 251, 127,
+ 129, 131, 133, 135, 137, 139, 141, 143,
+ 145, 147, 149, 151, 153, 155, 157, 159,
+ 161, 163, 165, 167, 169, 171, 173, 175,
+ 177, 179, 181, 183, 185, 187, 189, 191,
+ 193, 195, 197, 199, 201, 203, 205, 207,
+ 209, 211, 213, 215, 217, 219, 221, 223,
+ 225, 227, 229, 231, 233, 235, 237, 239,
+ 241, 243, 245, 247, 249, 251, 253, 127
};
-#undef MK
-
void VP8LoadFinalBytes(VP8BitReader* const br) {
assert(br != NULL && br->buf_ != NULL);
// Only read 8bits at a time
if (br->buf_ < br->buf_end_) {
-#ifndef USE_RIGHT_JUSTIFY
- br->value_ |= (bit_t)(*br->buf_++) << ((BITS) - 8 - br->bits_);
-#else
- br->value_ = (bit_t)(*br->buf_++) | (br->value_ << 8);
-#endif
br->bits_ += 8;
+ br->value_ = (bit_t)(*br->buf_++) | (br->value_ << 8);
} else if (!br->eof_) {
-#ifdef USE_RIGHT_JUSTIFY
- // These are not strictly needed, but it makes the behaviour
- // consistent for both USE_RIGHT_JUSTIFY and !USE_RIGHT_JUSTIFY.
br->value_ <<= 8;
br->bits_ += 8;
-#endif
br->eof_ = 1;
}
}
@@ -109,36 +105,48 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
//------------------------------------------------------------------------------
// VP8LBitReader
-#define MAX_NUM_BIT_READ 25
+#define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits.
-#define LBITS 64 // Number of bits prefetched.
-#define WBITS 32 // Minimum number of bytes needed after VP8LFillBitWindow.
-#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits.
+#if !defined(WEBP_FORCE_ALIGNED) && \
+ (defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \
+ defined(__i386__) || defined(_M_IX86) || \
+ defined(__x86_64__) || defined(_M_X64))
+#define VP8L_USE_UNALIGNED_LOAD
+#endif
-static const uint32_t kBitMask[MAX_NUM_BIT_READ] = {
- 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
- 65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
+static const uint32_t kBitMask[VP8L_MAX_NUM_BIT_READ + 1] = {
+ 0,
+ 0x000001, 0x000003, 0x000007, 0x00000f,
+ 0x00001f, 0x00003f, 0x00007f, 0x0000ff,
+ 0x0001ff, 0x0003ff, 0x0007ff, 0x000fff,
+ 0x001fff, 0x003fff, 0x007fff, 0x00ffff,
+ 0x01ffff, 0x03ffff, 0x07ffff, 0x0fffff,
+ 0x1fffff, 0x3fffff, 0x7fffff, 0xffffff
};
-void VP8LInitBitReader(VP8LBitReader* const br,
- const uint8_t* const start,
+void VP8LInitBitReader(VP8LBitReader* const br, const uint8_t* const start,
size_t length) {
size_t i;
+ vp8l_val_t value = 0;
assert(br != NULL);
assert(start != NULL);
assert(length < 0xfffffff8u); // can't happen with a RIFF chunk.
- br->buf_ = start;
br->len_ = length;
br->val_ = 0;
- br->pos_ = 0;
br->bit_pos_ = 0;
br->eos_ = 0;
br->error_ = 0;
- for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) {
- br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (8 * i);
- ++br->pos_;
+
+ if (length > sizeof(br->val_)) {
+ length = sizeof(br->val_);
+ }
+ for (i = 0; i < length; ++i) {
+ value |= (vp8l_val_t)start[i] << (8 * i);
}
+ br->val_ = value;
+ br->pos_ = length;
+ br->buf_ = start;
}
void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
@@ -146,55 +154,51 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
assert(br != NULL);
assert(buf != NULL);
assert(len < 0xfffffff8u); // can't happen with a RIFF chunk.
- br->eos_ = (br->pos_ >= len);
br->buf_ = buf;
br->len_ = len;
+ // pos_ > len_ should be considered a param error.
+ br->error_ = (br->pos_ > br->len_);
+ br->eos_ = br->error_ || VP8LIsEndOfStream(br);
}
-// If not at EOS, reload up to LBITS byte-by-byte
+// If not at EOS, reload up to VP8L_LBITS byte-by-byte
static void ShiftBytes(VP8LBitReader* const br) {
while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
br->val_ >>= 8;
- br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (LBITS - 8);
+ br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (VP8L_LBITS - 8);
++br->pos_;
br->bit_pos_ -= 8;
}
+ br->eos_ = VP8LIsEndOfStream(br);
}
-void VP8LFillBitWindow(VP8LBitReader* const br) {
- if (br->bit_pos_ >= WBITS) {
-#if (defined(__x86_64__) || defined(_M_X64))
- if (br->pos_ + sizeof(br->val_) < br->len_) {
- br->val_ >>= WBITS;
- br->bit_pos_ -= WBITS;
- // The expression below needs a little-endian arch to work correctly.
- // This gives a large speedup for decoding speed.
- br->val_ |= *(const vp8l_val_t*)(br->buf_ + br->pos_) << (LBITS - WBITS);
- br->pos_ += LOG8_WBITS;
- return;
- }
-#endif
- ShiftBytes(br); // Slow path.
- if (br->pos_ == br->len_ && br->bit_pos_ >= LBITS) {
- br->eos_ = 1;
- }
+void VP8LDoFillBitWindow(VP8LBitReader* const br) {
+ assert(br->bit_pos_ >= VP8L_WBITS);
+ // TODO(jzern): given the fixed read size it may be possible to force
+ // alignment in this block.
+#if defined(VP8L_USE_UNALIGNED_LOAD)
+ if (br->pos_ + sizeof(br->val_) < br->len_) {
+ br->val_ >>= VP8L_WBITS;
+ br->bit_pos_ -= VP8L_WBITS;
+ // The expression below needs a little-endian arch to work correctly.
+ // This gives a large speedup for decoding speed.
+ br->val_ |= (vp8l_val_t)*(const uint32_t*)(br->buf_ + br->pos_) <<
+ (VP8L_LBITS - VP8L_WBITS);
+ br->pos_ += VP8L_LOG8_WBITS;
+ return;
}
+#endif
+ ShiftBytes(br); // Slow path.
}
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
assert(n_bits >= 0);
// Flag an error if end_of_stream or n_bits is more than allowed limit.
- if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) {
+ if (!br->eos_ && n_bits <= VP8L_MAX_NUM_BIT_READ) {
const uint32_t val =
(uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
const int new_bits = br->bit_pos_ + n_bits;
br->bit_pos_ = new_bits;
- // If this read is going to cross the read buffer, set the eos flag.
- if (br->pos_ == br->len_) {
- if (new_bits >= LBITS) {
- br->eos_ = 1;
- }
- }
ShiftBytes(br);
return val;
} else {
@@ -204,4 +208,3 @@ uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
}
//------------------------------------------------------------------------------
-
diff --git a/src/3rdparty/libwebp/src/utils/bit_reader.h b/src/3rdparty/libwebp/src/utils/bit_reader.h
index 98df98a..f569734 100644
--- a/src/3rdparty/libwebp/src/utils/bit_reader.h
+++ b/src/3rdparty/libwebp/src/utils/bit_reader.h
@@ -29,110 +29,62 @@ extern "C" {
// However, since range_ is only 8bit, we only need an active window of 8 bits
// for value_. Left bits (MSB) gets zeroed and shifted away when value_ falls
// below 128, range_ is updated, and fresh bits read from the bitstream are
-// brought in as LSB.
-// To avoid reading the fresh bits one by one (slow), we cache a few of them
-// ahead (actually, we cache BITS of them ahead. See below). There's two
-// strategies regarding how to shift these looked-ahead fresh bits into the
-// 8bit window of value_: either we shift them in, while keeping the position of
-// the window fixed. Or we slide the window to the right while keeping the cache
-// bits at a fixed, right-justified, position.
+// brought in as LSB. To avoid reading the fresh bits one by one (slow), we
+// cache BITS of them ahead. The total of (BITS + 8) bits must fit into a
+// natural register (with type bit_t). To fetch BITS bits from bitstream we
+// use a type lbit_t.
//
-// Example, for BITS=16: here is the content of value_ for both strategies:
-//
-// !USE_RIGHT_JUSTIFY || USE_RIGHT_JUSTIFY
-// ||
-// <- 8b -><- 8b -><- BITS bits -> || <- 8b+3b -><- 8b -><- 13 bits ->
-// [unused][value_][cached bits][0] || [unused...][value_][cached bits]
-// [........00vvvvvvBBBBBBBBBBBBB000]LSB || [...........00vvvvvvBBBBBBBBBBBBB]
-// ||
-// After calling VP8Shift(), where we need to shift away two zeros:
-// [........vvvvvvvvBBBBBBBBBBB00000]LSB || [.............vvvvvvvvBBBBBBBBBBB]
-// ||
-// Just before we need to call VP8LoadNewBytes(), the situation is:
-// [........vvvvvv000000000000000000]LSB || [..........................vvvvvv]
-// ||
-// And just after calling VP8LoadNewBytes():
-// [........vvvvvvvvBBBBBBBBBBBBBBBB]LSB || [........vvvvvvvvBBBBBBBBBBBBBBBB]
-//
-// -> we're back to eight active 'value_' bits (marked 'v') and BITS cached
-// bits (marked 'B')
-//
-// The right-justify strategy tends to use less shifts and is often faster.
-
-//------------------------------------------------------------------------------
// BITS can be any multiple of 8 from 8 to 56 (inclusive).
// Pick values that fit natural register size.
-#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
-
-#define USE_RIGHT_JUSTIFY
-
#if defined(__i386__) || defined(_M_IX86) // x86 32bit
-#define BITS 16
+#define BITS 24
#elif defined(__x86_64__) || defined(_M_X64) // x86 64bit
#define BITS 56
#elif defined(__arm__) || defined(_M_ARM) // ARM
#define BITS 24
-#else // reasonable default
+#elif defined(__mips__) // MIPS
#define BITS 24
-#endif
-
-#else // reference choices
-
-#define USE_RIGHT_JUSTIFY
-#define BITS 8
-
+#else // reasonable default
+#define BITS 24 // TODO(skal): test aarch64 and find the proper BITS value.
#endif
//------------------------------------------------------------------------------
-// Derived types and constants
+// Derived types and constants:
+// bit_t = natural register type for storing 'value_' (which is BITS+8 bits)
+// range_t = register for 'range_' (which is 8bits only)
-// bit_t = natural register type
-// lbit_t = natural type for memory I/O
-
-#if (BITS > 32)
-typedef uint64_t bit_t;
-typedef uint64_t lbit_t;
-#elif (BITS == 32)
+#if (BITS > 24)
typedef uint64_t bit_t;
-typedef uint32_t lbit_t;
-#elif (BITS == 24)
-typedef uint32_t bit_t;
-typedef uint32_t lbit_t;
-#elif (BITS == 16)
-typedef uint32_t bit_t;
-typedef uint16_t lbit_t;
#else
typedef uint32_t bit_t;
-typedef uint8_t lbit_t;
#endif
-#ifndef USE_RIGHT_JUSTIFY
-typedef bit_t range_t; // type for storing range_
-#define MASK ((((bit_t)1) << (BITS)) - 1)
-#else
-typedef uint32_t range_t; // range_ only uses 8bits here. No need for bit_t.
-#endif
+typedef uint32_t range_t;
//------------------------------------------------------------------------------
// Bitreader
typedef struct VP8BitReader VP8BitReader;
struct VP8BitReader {
+ // boolean decoder (keep the field ordering as is!)
+ bit_t value_; // current value
+ range_t range_; // current range minus 1. In [127, 254] interval.
+ int bits_; // number of valid bits left
+ // read buffer
const uint8_t* buf_; // next byte to be read
const uint8_t* buf_end_; // end of read buffer
int eof_; // true if input is exhausted
-
- // boolean decoder
- range_t range_; // current range minus 1. In [127, 254] interval.
- bit_t value_; // current value
- int bits_; // number of valid bits left
};
// Initialize the bit reader and the boolean decoder.
void VP8InitBitReader(VP8BitReader* const br,
const uint8_t* const start, const uint8_t* const end);
+// Update internal pointers to displace the byte buffer by the
+// relative offset 'offset'.
+void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset);
+
// return the next value made of 'num_bits' bits
uint32_t VP8GetValue(VP8BitReader* const br, int num_bits);
static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) {
@@ -142,152 +94,22 @@ static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) {
// return the next value with sign-extension.
int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits);
-// Read a bit with proba 'prob'. Speed-critical function!
-extern const uint8_t kVP8Log2Range[128];
-extern const range_t kVP8NewRange[128];
-
-void VP8LoadFinalBytes(VP8BitReader* const br); // special case for the tail
-
-static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
- assert(br != NULL && br->buf_ != NULL);
- // Read 'BITS' bits at a time if possible.
- if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) {
- // convert memory type to register type (with some zero'ing!)
- bit_t bits;
- const lbit_t in_bits = *(const lbit_t*)br->buf_;
- br->buf_ += (BITS) >> 3;
-#if !defined(__BIG_ENDIAN__)
-#if (BITS > 32)
-// gcc 4.3 has builtin functions for swap32/swap64
-#if defined(__GNUC__) && \
- (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
- bits = (bit_t)__builtin_bswap64(in_bits);
-#elif defined(_MSC_VER)
- bits = (bit_t)_byteswap_uint64(in_bits);
-#elif defined(__x86_64__)
- __asm__ volatile("bswapq %0" : "=r"(bits) : "0"(in_bits));
-#else // generic code for swapping 64-bit values (suggested by bdb@)
- bits = (bit_t)in_bits;
- bits = ((bits & 0xffffffff00000000ull) >> 32) |
- ((bits & 0x00000000ffffffffull) << 32);
- bits = ((bits & 0xffff0000ffff0000ull) >> 16) |
- ((bits & 0x0000ffff0000ffffull) << 16);
- bits = ((bits & 0xff00ff00ff00ff00ull) >> 8) |
- ((bits & 0x00ff00ff00ff00ffull) << 8);
-#endif
- bits >>= 64 - BITS;
-#elif (BITS >= 24)
-#if defined(__i386__) || defined(__x86_64__)
- {
- lbit_t swapped_in_bits;
- __asm__ volatile("bswap %k0" : "=r"(swapped_in_bits) : "0"(in_bits));
- bits = (bit_t)swapped_in_bits; // 24b/32b -> 32b/64b zero-extension
- }
-#elif defined(_MSC_VER)
- bits = (bit_t)_byteswap_ulong(in_bits);
-#else
- bits = (bit_t)(in_bits >> 24) | ((in_bits >> 8) & 0xff00)
- | ((in_bits << 8) & 0xff0000) | (in_bits << 24);
-#endif // x86
- bits >>= (32 - BITS);
-#elif (BITS == 16)
- // gcc will recognize a 'rorw $8, ...' here:
- bits = (bit_t)(in_bits >> 8) | ((in_bits & 0xff) << 8);
-#else // BITS == 8
- bits = (bit_t)in_bits;
-#endif
-#else // BIG_ENDIAN
- bits = (bit_t)in_bits;
- if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS);
-#endif
-#ifndef USE_RIGHT_JUSTIFY
- br->value_ |= bits << (-br->bits_);
-#else
- br->value_ = bits | (br->value_ << (BITS));
-#endif
- br->bits_ += (BITS);
- } else {
- VP8LoadFinalBytes(br); // no need to be inlined
- }
-}
-
-static WEBP_INLINE int VP8BitUpdate(VP8BitReader* const br, range_t split) {
- if (br->bits_ < 0) { // Make sure we have a least BITS bits in 'value_'
- VP8LoadNewBytes(br);
- }
-#ifndef USE_RIGHT_JUSTIFY
- split |= (MASK);
- if (br->value_ > split) {
- br->range_ -= split + 1;
- br->value_ -= split + 1;
- return 1;
- } else {
- br->range_ = split;
- return 0;
- }
-#else
- {
- const int pos = br->bits_;
- const range_t value = (range_t)(br->value_ >> pos);
- if (value > split) {
- br->range_ -= split + 1;
- br->value_ -= (bit_t)(split + 1) << pos;
- return 1;
- } else {
- br->range_ = split;
- return 0;
- }
- }
-#endif
-}
-
-static WEBP_INLINE void VP8Shift(VP8BitReader* const br) {
-#ifndef USE_RIGHT_JUSTIFY
- // range_ is in [0..127] interval here.
- const bit_t idx = br->range_ >> (BITS);
- const int shift = kVP8Log2Range[idx];
- br->range_ = kVP8NewRange[idx];
- br->value_ <<= shift;
- br->bits_ -= shift;
-#else
- const int shift = kVP8Log2Range[br->range_];
- assert(br->range_ < (range_t)128);
- br->range_ = kVP8NewRange[br->range_];
- br->bits_ -= shift;
-#endif
-}
-
-static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
-#ifndef USE_RIGHT_JUSTIFY
- // It's important to avoid generating a 64bit x 64bit multiply here.
- // We just need an 8b x 8b after all.
- const range_t split =
- (range_t)((uint32_t)(br->range_ >> (BITS)) * prob) << ((BITS) - 8);
- const int bit = VP8BitUpdate(br, split);
- if (br->range_ <= (((range_t)0x7e << (BITS)) | (MASK))) {
- VP8Shift(br);
- }
- return bit;
-#else
- const range_t split = (br->range_ * prob) >> 8;
- const int bit = VP8BitUpdate(br, split);
- if (br->range_ <= (range_t)0x7e) {
- VP8Shift(br);
- }
- return bit;
-#endif
-}
-
-static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
- const range_t split = (br->range_ >> 1);
- const int bit = VP8BitUpdate(br, split);
- VP8Shift(br);
- return bit ? -v : v;
-}
+// bit_reader_inl.h will implement the following methods:
+// static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob)
+// static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v)
+// and should be included by the .c files that actually need them.
+// This is to avoid recompiling the whole library whenever this file is touched,
+// and also allowing platform-specific ad-hoc hacks.
// -----------------------------------------------------------------------------
// Bitreader for lossless format
+// maximum number of bits (inclusive) the bit-reader can handle:
+#define VP8L_MAX_NUM_BIT_READ 24
+
+#define VP8L_LBITS 64 // Number of bits prefetched.
+#define VP8L_WBITS 32 // Minimum number of bytes ready after VP8LFillBitWindow.
+
typedef uint64_t vp8l_val_t; // right now, this bit-reader can only use 64bit.
typedef struct {
@@ -308,9 +130,10 @@ void VP8LInitBitReader(VP8LBitReader* const br,
void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
const uint8_t* const buffer, size_t length);
-// Reads the specified number of bits from Read Buffer.
-// Flags an error in case end_of_stream or n_bits is more than allowed limit.
-// Flags eos if this read attempt is going to cross the read buffer.
+// Reads the specified number of bits from read buffer.
+// Flags an error in case end_of_stream or n_bits is more than the allowed limit
+// of VP8L_MAX_NUM_BIT_READ (inclusive).
+// Flags eos_ if this read attempt is going to cross the read buffer.
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
// Return the prefetched bits, so they can be looked up.
@@ -318,14 +141,26 @@ static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) {
return (uint32_t)(br->val_ >> br->bit_pos_);
}
+// Returns true if there was an attempt at reading bit past the end of
+// the buffer. Doesn't set br->eos_ flag.
+static WEBP_INLINE int VP8LIsEndOfStream(const VP8LBitReader* const br) {
+ assert(br->pos_ <= br->len_);
+ return (br->pos_ == br->len_) && (br->bit_pos_ > VP8L_LBITS);
+}
+
// For jumping over a number of bits in the bit stream when accessed with
// VP8LPrefetchBits and VP8LFillBitWindow.
static WEBP_INLINE void VP8LSetBitPos(VP8LBitReader* const br, int val) {
br->bit_pos_ = val;
+ br->eos_ = VP8LIsEndOfStream(br);
}
// Advances the read buffer by 4 bytes to make room for reading next 32 bits.
-void VP8LFillBitWindow(VP8LBitReader* const br);
+// Speed critical, but infrequent part of the code can be non-inlined.
+extern void VP8LDoFillBitWindow(VP8LBitReader* const br);
+static WEBP_INLINE void VP8LFillBitWindow(VP8LBitReader* const br) {
+ if (br->bit_pos_ >= VP8L_WBITS) VP8LDoFillBitWindow(br);
+}
#ifdef __cplusplus
} // extern "C"
diff --git a/src/3rdparty/libwebp/src/utils/bit_reader_inl.h b/src/3rdparty/libwebp/src/utils/bit_reader_inl.h
new file mode 100644
index 0000000..81427c6
--- /dev/null
+++ b/src/3rdparty/libwebp/src/utils/bit_reader_inl.h
@@ -0,0 +1,172 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Specific inlined methods for boolean decoder [VP8GetBit() ...]
+// This file should be included by the .c sources that actually need to call
+// these methods.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_READER_INL_H_
+#define WEBP_UTILS_BIT_READER_INL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#ifdef WEBP_FORCE_ALIGNED
+#include <string.h> // memcpy
+#endif
+
+#include "../dsp/dsp.h"
+#include "./bit_reader.h"
+#include "./endian_inl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Derived type lbit_t = natural type for memory I/O
+
+#if (BITS > 32)
+typedef uint64_t lbit_t;
+#elif (BITS > 16)
+typedef uint32_t lbit_t;
+#elif (BITS > 8)
+typedef uint16_t lbit_t;
+#else
+typedef uint8_t lbit_t;
+#endif
+
+extern const uint8_t kVP8Log2Range[128];
+extern const range_t kVP8NewRange[128];
+
+// special case for the tail byte-reading
+void VP8LoadFinalBytes(VP8BitReader* const br);
+
+//------------------------------------------------------------------------------
+// Inlined critical functions
+
+// makes sure br->value_ has at least BITS bits worth of data
+static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
+ assert(br != NULL && br->buf_ != NULL);
+ // Read 'BITS' bits at a time if possible.
+ if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) {
+ // convert memory type to register type (with some zero'ing!)
+ bit_t bits;
+#if defined(WEBP_FORCE_ALIGNED)
+ lbit_t in_bits;
+ memcpy(&in_bits, br->buf_, sizeof(in_bits));
+#elif defined(WEBP_USE_MIPS32)
+ // This is needed because of un-aligned read.
+ lbit_t in_bits;
+ lbit_t* p_buf_ = (lbit_t*)br->buf_;
+ __asm__ volatile(
+ ".set push \n\t"
+ ".set at \n\t"
+ ".set macro \n\t"
+ "ulw %[in_bits], 0(%[p_buf_]) \n\t"
+ ".set pop \n\t"
+ : [in_bits]"=r"(in_bits)
+ : [p_buf_]"r"(p_buf_)
+ : "memory", "at"
+ );
+#else
+ const lbit_t in_bits = *(const lbit_t*)br->buf_;
+#endif
+ br->buf_ += BITS >> 3;
+#if !defined(WORDS_BIGENDIAN)
+#if (BITS > 32)
+ bits = BSwap64(in_bits);
+ bits >>= 64 - BITS;
+#elif (BITS >= 24)
+ bits = BSwap32(in_bits);
+ bits >>= (32 - BITS);
+#elif (BITS == 16)
+ bits = BSwap16(in_bits);
+#else // BITS == 8
+ bits = (bit_t)in_bits;
+#endif // BITS > 32
+#else // WORDS_BIGENDIAN
+ bits = (bit_t)in_bits;
+ if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS);
+#endif
+ br->value_ = bits | (br->value_ << BITS);
+ br->bits_ += BITS;
+ } else {
+ VP8LoadFinalBytes(br); // no need to be inlined
+ }
+}
+
+// Read a bit with proba 'prob'. Speed-critical function!
+static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
+ // Don't move this declaration! It makes a big speed difference to store
+ // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
+ // alter br->range_ value.
+ range_t range = br->range_;
+ if (br->bits_ < 0) {
+ VP8LoadNewBytes(br);
+ }
+ {
+ const int pos = br->bits_;
+ const range_t split = (range * prob) >> 8;
+ const range_t value = (range_t)(br->value_ >> pos);
+#if defined(__arm__) || defined(_M_ARM) // ARM-specific
+ const int bit = ((int)(split - value) >> 31) & 1;
+ if (value > split) {
+ range -= split + 1;
+ br->value_ -= (bit_t)(split + 1) << pos;
+ } else {
+ range = split;
+ }
+#else // faster version on x86
+ int bit; // Don't use 'const int bit = (value > split);", it's slower.
+ if (value > split) {
+ range -= split + 1;
+ br->value_ -= (bit_t)(split + 1) << pos;
+ bit = 1;
+ } else {
+ range = split;
+ bit = 0;
+ }
+#endif
+ if (range <= (range_t)0x7e) {
+ const int shift = kVP8Log2Range[range];
+ range = kVP8NewRange[range];
+ br->bits_ -= shift;
+ }
+ br->range_ = range;
+ return bit;
+ }
+}
+
+// simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here)
+static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
+ if (br->bits_ < 0) {
+ VP8LoadNewBytes(br);
+ }
+ {
+ const int pos = br->bits_;
+ const range_t split = br->range_ >> 1;
+ const range_t value = (range_t)(br->value_ >> pos);
+ const int32_t mask = (int32_t)(split - value) >> 31; // -1 or 0
+ br->bits_ -= 1;
+ br->range_ += mask;
+ br->range_ |= 1;
+ br->value_ -= (bit_t)((split + 1) & mask) << pos;
+ return (v ^ mask) - mask;
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBP_UTILS_BIT_READER_INL_H_
diff --git a/src/3rdparty/libwebp/src/utils/bit_writer.c b/src/3rdparty/libwebp/src/utils/bit_writer.c
index 29810a1..9875ca6 100644
--- a/src/3rdparty/libwebp/src/utils/bit_writer.c
+++ b/src/3rdparty/libwebp/src/utils/bit_writer.c
@@ -15,7 +15,10 @@
#include <assert.h>
#include <string.h> // for memcpy()
#include <stdlib.h>
+
#include "./bit_writer.h"
+#include "./endian_inl.h"
+#include "./utils.h"
//------------------------------------------------------------------------------
// VP8BitWriter
@@ -34,7 +37,7 @@ static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) {
new_size = 2 * bw->max_pos_;
if (new_size < needed_size) new_size = needed_size;
if (new_size < 1024) new_size = 1024;
- new_buf = (uint8_t*)malloc(new_size);
+ new_buf = (uint8_t*)WebPSafeMalloc(1ULL, new_size);
if (new_buf == NULL) {
bw->error_ = 1;
return 0;
@@ -43,13 +46,13 @@ static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) {
assert(bw->buf_ != NULL);
memcpy(new_buf, bw->buf_, bw->pos_);
}
- free(bw->buf_);
+ WebPSafeFree(bw->buf_);
bw->buf_ = new_buf;
bw->max_pos_ = new_size;
return 1;
}
-static void kFlush(VP8BitWriter* const bw) {
+static void Flush(VP8BitWriter* const bw) {
const int s = 8 + bw->nb_bits_;
const int32_t bits = bw->value_ >> s;
assert(bw->nb_bits_ >= 0);
@@ -115,7 +118,7 @@ int VP8PutBit(VP8BitWriter* const bw, int bit, int prob) {
bw->range_ = kNewRange[bw->range_];
bw->value_ <<= shift;
bw->nb_bits_ += shift;
- if (bw->nb_bits_ > 0) kFlush(bw);
+ if (bw->nb_bits_ > 0) Flush(bw);
}
return bit;
}
@@ -132,7 +135,7 @@ int VP8PutBitUniform(VP8BitWriter* const bw, int bit) {
bw->range_ = kNewRange[bw->range_];
bw->value_ <<= 1;
bw->nb_bits_ += 1;
- if (bw->nb_bits_ > 0) kFlush(bw);
+ if (bw->nb_bits_ > 0) Flush(bw);
}
return bit;
}
@@ -170,14 +173,14 @@ int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size) {
uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw) {
VP8PutValue(bw, 0, 9 - bw->nb_bits_);
bw->nb_bits_ = 0; // pad with zeroes
- kFlush(bw);
+ Flush(bw);
return bw->buf_;
}
int VP8BitWriterAppend(VP8BitWriter* const bw,
const uint8_t* data, size_t size) {
- assert(data);
- if (bw->nb_bits_ != -8) return 0; // kFlush() must have been called
+ assert(data != NULL);
+ if (bw->nb_bits_ != -8) return 0; // Flush() must have been called
if (!BitWriterResize(bw, size)) return 0;
memcpy(bw->buf_ + bw->pos_, data, size);
bw->pos_ += size;
@@ -185,8 +188,8 @@ int VP8BitWriterAppend(VP8BitWriter* const bw,
}
void VP8BitWriterWipeOut(VP8BitWriter* const bw) {
- if (bw) {
- free(bw->buf_);
+ if (bw != NULL) {
+ WebPSafeFree(bw->buf_);
memset(bw, 0, sizeof(*bw));
}
}
@@ -194,32 +197,43 @@ void VP8BitWriterWipeOut(VP8BitWriter* const bw) {
//------------------------------------------------------------------------------
// VP8LBitWriter
+// This is the minimum amount of size the memory buffer is guaranteed to grow
+// when extra space is needed.
+#define MIN_EXTRA_SIZE (32768ULL)
+
+#define VP8L_WRITER_BYTES ((int)sizeof(vp8l_wtype_t))
+#define VP8L_WRITER_BITS (VP8L_WRITER_BYTES * 8)
+#define VP8L_WRITER_MAX_BITS (8 * (int)sizeof(vp8l_atype_t))
+
// Returns 1 on success.
static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) {
uint8_t* allocated_buf;
size_t allocated_size;
- const size_t current_size = VP8LBitWriterNumBytes(bw);
+ const size_t max_bytes = bw->end_ - bw->buf_;
+ const size_t current_size = bw->cur_ - bw->buf_;
const uint64_t size_required_64b = (uint64_t)current_size + extra_size;
const size_t size_required = (size_t)size_required_64b;
if (size_required != size_required_64b) {
bw->error_ = 1;
return 0;
}
- if (bw->max_bytes_ > 0 && size_required <= bw->max_bytes_) return 1;
- allocated_size = (3 * bw->max_bytes_) >> 1;
+ if (max_bytes > 0 && size_required <= max_bytes) return 1;
+ allocated_size = (3 * max_bytes) >> 1;
if (allocated_size < size_required) allocated_size = size_required;
// make allocated size multiple of 1k
allocated_size = (((allocated_size >> 10) + 1) << 10);
- allocated_buf = (uint8_t*)malloc(allocated_size);
+ allocated_buf = (uint8_t*)WebPSafeMalloc(1ULL, allocated_size);
if (allocated_buf == NULL) {
bw->error_ = 1;
return 0;
}
- memcpy(allocated_buf, bw->buf_, current_size);
- free(bw->buf_);
+ if (current_size > 0) {
+ memcpy(allocated_buf, bw->buf_, current_size);
+ }
+ WebPSafeFree(bw->buf_);
bw->buf_ = allocated_buf;
- bw->max_bytes_ = allocated_size;
- memset(allocated_buf + current_size, 0, allocated_size - current_size);
+ bw->cur_ = bw->buf_ + current_size;
+ bw->end_ = bw->buf_ + allocated_size;
return 1;
}
@@ -230,53 +244,64 @@ int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size) {
void VP8LBitWriterDestroy(VP8LBitWriter* const bw) {
if (bw != NULL) {
- free(bw->buf_);
+ WebPSafeFree(bw->buf_);
memset(bw, 0, sizeof(*bw));
}
}
void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits) {
- if (n_bits < 1) return;
-#if !defined(__BIG_ENDIAN__)
- // Technically, this branch of the code can write up to 25 bits at a time,
- // but in prefix encoding, the maximum number of bits written is 18 at a time.
- {
- uint8_t* const p = &bw->buf_[bw->bit_pos_ >> 3];
- uint32_t v = *(const uint32_t*)p;
- v |= bits << (bw->bit_pos_ & 7);
- *(uint32_t*)p = v;
- bw->bit_pos_ += n_bits;
- }
-#else // BIG_ENDIAN
- {
- uint8_t* p = &bw->buf_[bw->bit_pos_ >> 3];
- const int bits_reserved_in_first_byte = bw->bit_pos_ & 7;
- const int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
- // implicit & 0xff is assumed for uint8_t arithmetic
- *p++ |= bits << bits_reserved_in_first_byte;
- bits >>= 8 - bits_reserved_in_first_byte;
- if (bits_left_to_write >= 1) {
- *p++ = bits;
- bits >>= 8;
- if (bits_left_to_write >= 9) {
- *p++ = bits;
- bits >>= 8;
+ assert(n_bits <= 32);
+ // That's the max we can handle:
+ assert(bw->used_ + n_bits <= 2 * VP8L_WRITER_MAX_BITS);
+ if (n_bits > 0) {
+ // Local field copy.
+ vp8l_atype_t lbits = bw->bits_;
+ int used = bw->used_;
+ // Special case of overflow handling for 32bit accumulator (2-steps flush).
+ if (VP8L_WRITER_BITS == 16) {
+ if (used + n_bits >= VP8L_WRITER_MAX_BITS) {
+ // Fill up all the VP8L_WRITER_MAX_BITS so it can be flushed out below.
+ const int shift = VP8L_WRITER_MAX_BITS - used;
+ lbits |= (vp8l_atype_t)bits << used;
+ used = VP8L_WRITER_MAX_BITS;
+ n_bits -= shift;
+ bits >>= shift;
+ assert(n_bits <= VP8L_WRITER_MAX_BITS);
}
}
- assert(n_bits <= 25);
- *p = bits;
- bw->bit_pos_ += n_bits;
+ // If needed, make some room by flushing some bits out.
+ while (used >= VP8L_WRITER_BITS) {
+ if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
+ const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
+ if (extra_size != (size_t)extra_size ||
+ !VP8LBitWriterResize(bw, (size_t)extra_size)) {
+ bw->cur_ = bw->buf_;
+ bw->error_ = 1;
+ return;
+ }
+ }
+ *(vp8l_wtype_t*)bw->cur_ = (vp8l_wtype_t)WSWAP((vp8l_wtype_t)lbits);
+ bw->cur_ += VP8L_WRITER_BYTES;
+ lbits >>= VP8L_WRITER_BITS;
+ used -= VP8L_WRITER_BITS;
+ }
+ // Eventually, insert new bits.
+ bw->bits_ = lbits | ((vp8l_atype_t)bits << used);
+ bw->used_ = used + n_bits;
}
-#endif
- if ((bw->bit_pos_ >> 3) > (bw->max_bytes_ - 8)) {
- const uint64_t extra_size = 32768ULL + bw->max_bytes_;
- if (extra_size != (size_t)extra_size ||
- !VP8LBitWriterResize(bw, (size_t)extra_size)) {
- bw->bit_pos_ = 0;
- bw->error_ = 1;
+}
+
+uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) {
+ // flush leftover bits
+ if (VP8LBitWriterResize(bw, (bw->used_ + 7) >> 3)) {
+ while (bw->used_ > 0) {
+ *bw->cur_++ = (uint8_t)bw->bits_;
+ bw->bits_ >>= 8;
+ bw->used_ -= 8;
}
+ bw->used_ = 0;
}
+ return bw->buf_;
}
//------------------------------------------------------------------------------
-
diff --git a/src/3rdparty/libwebp/src/utils/bit_writer.h b/src/3rdparty/libwebp/src/utils/bit_writer.h
index 89a9ead..c80d22a 100644
--- a/src/3rdparty/libwebp/src/utils/bit_writer.h
+++ b/src/3rdparty/libwebp/src/utils/bit_writer.h
@@ -68,51 +68,46 @@ static WEBP_INLINE size_t VP8BitWriterSize(const VP8BitWriter* const bw) {
//------------------------------------------------------------------------------
// VP8LBitWriter
-// TODO(vikasa): VP8LBitWriter is copied as-is from lossless code. There's scope
-// of re-using VP8BitWriter. Will evaluate once basic lossless encoder is
-// implemented.
-typedef struct {
- uint8_t* buf_;
- size_t bit_pos_;
- size_t max_bytes_;
+#if defined(__x86_64__) || defined(_M_X64) // 64bit
+typedef uint64_t vp8l_atype_t; // accumulator type
+typedef uint32_t vp8l_wtype_t; // writing type
+#define WSWAP HToLE32
+#else
+typedef uint32_t vp8l_atype_t;
+typedef uint16_t vp8l_wtype_t;
+#define WSWAP HToLE16
+#endif
- // After all bits are written, the caller must observe the state of
- // error_. A value of 1 indicates that a memory allocation failure
- // has happened during bit writing. A value of 0 indicates successful
+typedef struct {
+ vp8l_atype_t bits_; // bit accumulator
+ int used_; // number of bits used in accumulator
+ uint8_t* buf_; // start of buffer
+ uint8_t* cur_; // current write position
+ uint8_t* end_; // end of buffer
+
+ // After all bits are written (VP8LBitWriterFinish()), the caller must observe
+ // the state of error_. A value of 1 indicates that a memory allocation
+ // failure has happened during bit writing. A value of 0 indicates successful
// writing of bits.
int error_;
} VP8LBitWriter;
static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) {
- return (bw->bit_pos_ + 7) >> 3;
+ return (bw->cur_ - bw->buf_) + ((bw->used_ + 7) >> 3);
}
-static WEBP_INLINE uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) {
- return bw->buf_;
-}
+uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw);
// Returns 0 in case of memory allocation error.
int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size);
void VP8LBitWriterDestroy(VP8LBitWriter* const bw);
-// This function writes bits into bytes in increasing addresses, and within
-// a byte least-significant-bit first.
-//
-// The function can write up to 16 bits in one go with WriteBits
-// Example: let's assume that 3 bits (Rs below) have been written already:
-//
-// BYTE-0 BYTE+1 BYTE+2
-//
-// 0000 0RRR 0000 0000 0000 0000
-//
-// Now, we could write 5 or less bits in MSB by just sifting by 3
-// and OR'ing to BYTE-0.
-//
-// For n bits, we take the last 5 bytes, OR that with high bits in BYTE-0,
-// and locate the rest in BYTE+1 and BYTE+2.
-//
+// This function writes bits into bytes in increasing addresses (little endian),
+// and within a byte least-significant-bit first.
+// This function can write up to 32 bits in one go, but VP8LBitReader can only
+// read 24 bits max (VP8L_MAX_NUM_BIT_READ).
// VP8LBitWriter's error_ flag is set in case of memory allocation error.
void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits);
diff --git a/src/3rdparty/libwebp/src/utils/color_cache.c b/src/3rdparty/libwebp/src/utils/color_cache.c
index 66a4464..8a88f08 100644
--- a/src/3rdparty/libwebp/src/utils/color_cache.c
+++ b/src/3rdparty/libwebp/src/utils/color_cache.c
@@ -32,7 +32,7 @@ int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) {
void VP8LColorCacheClear(VP8LColorCache* const cc) {
if (cc != NULL) {
- free(cc->colors_);
+ WebPSafeFree(cc->colors_);
cc->colors_ = NULL;
}
}
diff --git a/src/3rdparty/libwebp/src/utils/endian_inl.h b/src/3rdparty/libwebp/src/utils/endian_inl.h
new file mode 100644
index 0000000..cd56c37
--- /dev/null
+++ b/src/3rdparty/libwebp/src/utils/endian_inl.h
@@ -0,0 +1,100 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Endian related functions.
+
+#ifndef WEBP_UTILS_ENDIAN_INL_H_
+#define WEBP_UTILS_ENDIAN_INL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include "../dsp/dsp.h"
+#include "../webp/types.h"
+
+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN) && \
+ (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
+ (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
+#define WORDS_BIGENDIAN
+#endif
+
+#if defined(WORDS_BIGENDIAN)
+#define HToLE32 BSwap32
+#define HToLE16 BSwap16
+#else
+#define HToLE32(x) (x)
+#define HToLE16(x) (x)
+#endif
+
+#if !defined(HAVE_CONFIG_H)
+// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64
+#if LOCAL_GCC_PREREQ(4,3) || LOCAL_CLANG_PREREQ(3,3)
+#define HAVE_BUILTIN_BSWAP32
+#define HAVE_BUILTIN_BSWAP64
+#endif
+// clang-3.3 and gcc-4.8 have a builtin function for swap16
+#if LOCAL_GCC_PREREQ(4,8) || LOCAL_CLANG_PREREQ(3,3)
+#define HAVE_BUILTIN_BSWAP16
+#endif
+#endif // !HAVE_CONFIG_H
+
+static WEBP_INLINE uint16_t BSwap16(uint16_t x) {
+#if defined(HAVE_BUILTIN_BSWAP16)
+ return __builtin_bswap16(x);
+#elif defined(_MSC_VER)
+ return _byteswap_ushort(x);
+#else
+ // gcc will recognize a 'rorw $8, ...' here:
+ return (x >> 8) | ((x & 0xff) << 8);
+#endif // HAVE_BUILTIN_BSWAP16
+}
+
+static WEBP_INLINE uint32_t BSwap32(uint32_t x) {
+#if defined(WEBP_USE_MIPS32_R2)
+ uint32_t ret;
+ __asm__ volatile (
+ "wsbh %[ret], %[x] \n\t"
+ "rotr %[ret], %[ret], 16 \n\t"
+ : [ret]"=r"(ret)
+ : [x]"r"(x)
+ );
+ return ret;
+#elif defined(HAVE_BUILTIN_BSWAP32)
+ return __builtin_bswap32(x);
+#elif defined(__i386__) || defined(__x86_64__)
+ uint32_t swapped_bytes;
+ __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x));
+ return swapped_bytes;
+#elif defined(_MSC_VER)
+ return (uint32_t)_byteswap_ulong(x);
+#else
+ return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+#endif // HAVE_BUILTIN_BSWAP32
+}
+
+static WEBP_INLINE uint64_t BSwap64(uint64_t x) {
+#if defined(HAVE_BUILTIN_BSWAP64)
+ return __builtin_bswap64(x);
+#elif defined(__x86_64__)
+ uint64_t swapped_bytes;
+ __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x));
+ return swapped_bytes;
+#elif defined(_MSC_VER)
+ return (uint64_t)_byteswap_uint64(x);
+#else // generic code for swapping 64-bit values (suggested by bdb@)
+ x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32);
+ x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16);
+ x = ((x & 0xff00ff00ff00ff00ull) >> 8) | ((x & 0x00ff00ff00ff00ffull) << 8);
+ return x;
+#endif // HAVE_BUILTIN_BSWAP64
+}
+
+#endif // WEBP_UTILS_ENDIAN_INL_H_
diff --git a/src/3rdparty/libwebp/src/utils/huffman.c b/src/3rdparty/libwebp/src/utils/huffman.c
index 8c5739f..c4c16d9 100644
--- a/src/3rdparty/libwebp/src/utils/huffman.c
+++ b/src/3rdparty/libwebp/src/utils/huffman.c
@@ -22,6 +22,9 @@
// (might be faster on some platform)
// #define USE_LUT_REVERSE_BITS
+// Huffman data read via DecodeImageStream is represented in two (red and green)
+// bytes.
+#define MAX_HTREE_GROUPS 0x10000
#define NON_EXISTENT_SYMBOL (-1)
static void TreeNodeInit(HuffmanTreeNode* const node) {
@@ -46,17 +49,25 @@ static void AssignChildren(HuffmanTree* const tree,
TreeNodeInit(children + 1);
}
+// A Huffman tree is a full binary tree; and in a full binary tree with L
+// leaves, the total number of nodes N = 2 * L - 1.
+static int HuffmanTreeMaxNodes(int num_leaves) {
+ return (2 * num_leaves - 1);
+}
+
+static int HuffmanTreeAllocate(HuffmanTree* const tree, int num_nodes) {
+ assert(tree != NULL);
+ tree->root_ =
+ (HuffmanTreeNode*)WebPSafeMalloc(num_nodes, sizeof(*tree->root_));
+ return (tree->root_ != NULL);
+}
+
static int TreeInit(HuffmanTree* const tree, int num_leaves) {
assert(tree != NULL);
if (num_leaves == 0) return 0;
- // We allocate maximum possible nodes in the tree at once.
- // Note that a Huffman tree is a full binary tree; and in a full binary tree
- // with L leaves, the total number of nodes N = 2 * L - 1.
- tree->max_nodes_ = 2 * num_leaves - 1;
+ tree->max_nodes_ = HuffmanTreeMaxNodes(num_leaves);
assert(tree->max_nodes_ < (1 << 16)); // limit for the lut_jump_ table
- tree->root_ = (HuffmanTreeNode*)WebPSafeMalloc((uint64_t)tree->max_nodes_,
- sizeof(*tree->root_));
- if (tree->root_ == NULL) return 0;
+ if (!HuffmanTreeAllocate(tree, tree->max_nodes_)) return 0;
TreeNodeInit(tree->root_); // Initialize root.
tree->num_nodes_ = 1;
memset(tree->lut_bits_, 255, sizeof(tree->lut_bits_));
@@ -64,17 +75,41 @@ static int TreeInit(HuffmanTree* const tree, int num_leaves) {
return 1;
}
-void HuffmanTreeRelease(HuffmanTree* const tree) {
+void VP8LHuffmanTreeFree(HuffmanTree* const tree) {
if (tree != NULL) {
- free(tree->root_);
+ WebPSafeFree(tree->root_);
tree->root_ = NULL;
tree->max_nodes_ = 0;
tree->num_nodes_ = 0;
}
}
-int HuffmanCodeLengthsToCodes(const int* const code_lengths,
- int code_lengths_size, int* const huff_codes) {
+HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups) {
+ HTreeGroup* const htree_groups =
+ (HTreeGroup*)WebPSafeCalloc(num_htree_groups, sizeof(*htree_groups));
+ assert(num_htree_groups <= MAX_HTREE_GROUPS);
+ if (htree_groups == NULL) {
+ return NULL;
+ }
+ return htree_groups;
+}
+
+void VP8LHtreeGroupsFree(HTreeGroup* htree_groups, int num_htree_groups) {
+ if (htree_groups != NULL) {
+ int i, j;
+ for (i = 0; i < num_htree_groups; ++i) {
+ HuffmanTree* const htrees = htree_groups[i].htrees_;
+ for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+ VP8LHuffmanTreeFree(&htrees[j]);
+ }
+ }
+ WebPSafeFree(htree_groups);
+ }
+}
+
+int VP8LHuffmanCodeLengthsToCodes(
+ const int* const code_lengths, int code_lengths_size,
+ int* const huff_codes) {
int symbol;
int code_len;
int code_length_hist[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
@@ -193,9 +228,10 @@ static int TreeAddSymbol(HuffmanTree* const tree,
return 1;
}
-int HuffmanTreeBuildImplicit(HuffmanTree* const tree,
- const int* const code_lengths,
- int code_lengths_size) {
+int VP8LHuffmanTreeBuildImplicit(HuffmanTree* const tree,
+ const int* const code_lengths,
+ int* const codes,
+ int code_lengths_size) {
int symbol;
int num_symbols = 0;
int root_symbol = 0;
@@ -219,47 +255,43 @@ int HuffmanTreeBuildImplicit(HuffmanTree* const tree,
if (num_symbols == 1) { // Trivial case.
const int max_symbol = code_lengths_size;
if (root_symbol < 0 || root_symbol >= max_symbol) {
- HuffmanTreeRelease(tree);
+ VP8LHuffmanTreeFree(tree);
return 0;
}
return TreeAddSymbol(tree, root_symbol, 0, 0);
} else { // Normal case.
int ok = 0;
+ memset(codes, 0, code_lengths_size * sizeof(*codes));
- // Get Huffman codes from the code lengths.
- int* const codes =
- (int*)WebPSafeMalloc((uint64_t)code_lengths_size, sizeof(*codes));
- if (codes == NULL) goto End;
-
- if (!HuffmanCodeLengthsToCodes(code_lengths, code_lengths_size, codes)) {
+ if (!VP8LHuffmanCodeLengthsToCodes(code_lengths, code_lengths_size,
+ codes)) {
goto End;
}
// Add symbols one-by-one.
for (symbol = 0; symbol < code_lengths_size; ++symbol) {
if (code_lengths[symbol] > 0) {
- if (!TreeAddSymbol(tree, symbol, codes[symbol], code_lengths[symbol])) {
+ if (!TreeAddSymbol(tree, symbol, codes[symbol],
+ code_lengths[symbol])) {
goto End;
}
}
}
ok = 1;
End:
- free(codes);
ok = ok && IsFull(tree);
- if (!ok) HuffmanTreeRelease(tree);
+ if (!ok) VP8LHuffmanTreeFree(tree);
return ok;
}
}
-int HuffmanTreeBuildExplicit(HuffmanTree* const tree,
- const int* const code_lengths,
- const int* const codes,
- const int* const symbols, int max_symbol,
- int num_symbols) {
+int VP8LHuffmanTreeBuildExplicit(HuffmanTree* const tree,
+ const int* const code_lengths,
+ const int* const codes,
+ const int* const symbols, int max_symbol,
+ int num_symbols) {
int ok = 0;
int i;
-
assert(tree != NULL);
assert(code_lengths != NULL);
assert(codes != NULL);
@@ -282,7 +314,6 @@ int HuffmanTreeBuildExplicit(HuffmanTree* const tree,
ok = 1;
End:
ok = ok && IsFull(tree);
- if (!ok) HuffmanTreeRelease(tree);
+ if (!ok) VP8LHuffmanTreeFree(tree);
return ok;
}
-
diff --git a/src/3rdparty/libwebp/src/utils/huffman.h b/src/3rdparty/libwebp/src/utils/huffman.h
index e8afd27..624bc17 100644
--- a/src/3rdparty/libwebp/src/utils/huffman.h
+++ b/src/3rdparty/libwebp/src/utils/huffman.h
@@ -15,6 +15,7 @@
#define WEBP_UTILS_HUFFMAN_H_
#include <assert.h>
+#include "../webp/format_constants.h"
#include "../webp/types.h"
#ifdef __cplusplus
@@ -42,6 +43,12 @@ struct HuffmanTree {
int num_nodes_; // number of currently occupied nodes
};
+// Huffman Tree group.
+typedef struct HTreeGroup HTreeGroup;
+struct HTreeGroup {
+ HuffmanTree htrees_[HUFFMAN_CODES_PER_META_CODE];
+};
+
// Returns true if the given node is not a leaf of the Huffman tree.
static WEBP_INLINE int HuffmanTreeNodeIsNotLeaf(
const HuffmanTreeNode* const node) {
@@ -56,29 +63,37 @@ static WEBP_INLINE const HuffmanTreeNode* HuffmanTreeNextNode(
// Releases the nodes of the Huffman tree.
// Note: It does NOT free 'tree' itself.
-void HuffmanTreeRelease(HuffmanTree* const tree);
+void VP8LHuffmanTreeFree(HuffmanTree* const tree);
+
+// Creates the instance of HTreeGroup with specified number of tree-groups.
+HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups);
+
+// Releases the memory allocated for HTreeGroup.
+void VP8LHtreeGroupsFree(HTreeGroup* htree_groups, int num_htree_groups);
// Builds Huffman tree assuming code lengths are implicitly in symbol order.
+// The 'huff_codes' and 'code_lengths' are pre-allocated temporary memory
+// buffers, used for creating the huffman tree.
// Returns false in case of error (invalid tree or memory error).
-int HuffmanTreeBuildImplicit(HuffmanTree* const tree,
- const int* const code_lengths,
- int code_lengths_size);
+int VP8LHuffmanTreeBuildImplicit(HuffmanTree* const tree,
+ const int* const code_lengths,
+ int* const huff_codes,
+ int code_lengths_size);
// Build a Huffman tree with explicitly given lists of code lengths, codes
// and symbols. Verifies that all symbols added are smaller than max_symbol.
// Returns false in case of an invalid symbol, invalid tree or memory error.
-int HuffmanTreeBuildExplicit(HuffmanTree* const tree,
- const int* const code_lengths,
- const int* const codes,
- const int* const symbols, int max_symbol,
- int num_symbols);
+int VP8LHuffmanTreeBuildExplicit(HuffmanTree* const tree,
+ const int* const code_lengths,
+ const int* const codes,
+ const int* const symbols, int max_symbol,
+ int num_symbols);
// Utility: converts Huffman code lengths to corresponding Huffman codes.
// 'huff_codes' should be pre-allocated.
// Returns false in case of error (memory allocation, invalid codes).
-int HuffmanCodeLengthsToCodes(const int* const code_lengths,
- int code_lengths_size, int* const huff_codes);
-
+int VP8LHuffmanCodeLengthsToCodes(const int* const code_lengths,
+ int code_lengths_size, int* const huff_codes);
#ifdef __cplusplus
} // extern "C"
diff --git a/src/3rdparty/libwebp/src/utils/huffman_encode.c b/src/3rdparty/libwebp/src/utils/huffman_encode.c
index 9c59867..6421c2b 100644
--- a/src/3rdparty/libwebp/src/utils/huffman_encode.c
+++ b/src/3rdparty/libwebp/src/utils/huffman_encode.c
@@ -28,13 +28,13 @@ static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
// Change the population counts in a way that the consequent
// Huffman tree compression, especially its RLE-part, give smaller output.
-static int OptimizeHuffmanForRle(int length, int* const counts) {
- uint8_t* good_for_rle;
+static void OptimizeHuffmanForRle(int length, uint8_t* const good_for_rle,
+ uint32_t* const counts) {
// 1) Let's make the Huffman code more compatible with rle encoding.
int i;
for (; length >= 0; --length) {
if (length == 0) {
- return 1; // All zeros.
+ return; // All zeros.
}
if (counts[length - 1] != 0) {
// Now counts[0..length - 1] does not have trailing zeros.
@@ -43,15 +43,11 @@ static int OptimizeHuffmanForRle(int length, int* const counts) {
}
// 2) Let's mark all population counts that already can be encoded
// with an rle code.
- good_for_rle = (uint8_t*)calloc(length, 1);
- if (good_for_rle == NULL) {
- return 0;
- }
{
// Let's not spoil any of the existing good rle codes.
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
- int symbol = counts[0];
+ uint32_t symbol = counts[0];
int stride = 0;
for (i = 0; i < length + 1; ++i) {
if (i == length || counts[i] != symbol) {
@@ -73,17 +69,17 @@ static int OptimizeHuffmanForRle(int length, int* const counts) {
}
// 3) Let's replace those population counts that lead to more rle codes.
{
- int stride = 0;
- int limit = counts[0];
- int sum = 0;
+ uint32_t stride = 0;
+ uint32_t limit = counts[0];
+ uint32_t sum = 0;
for (i = 0; i < length + 1; ++i) {
if (i == length || good_for_rle[i] ||
(i != 0 && good_for_rle[i - 1]) ||
!ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
if (stride >= 4 || (stride >= 3 && sum == 0)) {
- int k;
+ uint32_t k;
// The stride must end, collapse what we have, if we have enough (4).
- int count = (sum + stride / 2) / stride;
+ uint32_t count = (sum + stride / 2) / stride;
if (count < 1) {
count = 1;
}
@@ -119,17 +115,8 @@ static int OptimizeHuffmanForRle(int length, int* const counts) {
}
}
}
- free(good_for_rle);
- return 1;
}
-typedef struct {
- int total_count_;
- int value_;
- int pool_index_left_;
- int pool_index_right_;
-} HuffmanTree;
-
// A comparer function for two Huffman trees: sorts first by 'total count'
// (more comes first), and then by 'value' (more comes first).
static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
@@ -175,12 +162,12 @@ static void SetBitDepths(const HuffmanTree* const tree,
// we are not planning to use this with extremely long blocks.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
-static int GenerateOptimalTree(const int* const histogram, int histogram_size,
- int tree_depth_limit,
- uint8_t* const bit_depths) {
- int count_min;
+static void GenerateOptimalTree(const uint32_t* const histogram,
+ int histogram_size,
+ HuffmanTree* tree, int tree_depth_limit,
+ uint8_t* const bit_depths) {
+ uint32_t count_min;
HuffmanTree* tree_pool;
- HuffmanTree* tree;
int tree_size_orig = 0;
int i;
@@ -191,15 +178,9 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
}
if (tree_size_orig == 0) { // pretty optimal already!
- return 1;
+ return;
}
- // 3 * tree_size is enough to cover all the nodes representing a
- // population and all the inserted nodes combining two existing nodes.
- // The tree pool needs 2 * (tree_size_orig - 1) entities, and the
- // tree needs exactly tree_size_orig entities.
- tree = (HuffmanTree*)WebPSafeMalloc(3ULL * tree_size_orig, sizeof(*tree));
- if (tree == NULL) return 0;
tree_pool = tree + tree_size_orig;
// For block sizes with less than 64k symbols we never need to do a
@@ -215,7 +196,7 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
int j;
for (j = 0; j < histogram_size; ++j) {
if (histogram[j] != 0) {
- const int count =
+ const uint32_t count =
(histogram[j] < count_min) ? count_min : histogram[j];
tree[idx].total_count_ = count;
tree[idx].value_ = j;
@@ -231,7 +212,7 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
if (tree_size > 1) { // Normal case.
int tree_pool_size = 0;
while (tree_size > 1) { // Finish when we have only one root.
- int count;
+ uint32_t count;
tree_pool[tree_pool_size++] = tree[tree_size - 1];
tree_pool[tree_pool_size++] = tree[tree_size - 2];
count = tree_pool[tree_pool_size - 1].total_count_ +
@@ -272,8 +253,6 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
}
}
}
- free(tree);
- return 1;
}
// -----------------------------------------------------------------------------
@@ -424,17 +403,15 @@ static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) {
// -----------------------------------------------------------------------------
// Main entry point
-int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit,
- HuffmanTreeCode* const tree) {
- const int num_symbols = tree->num_symbols;
- if (!OptimizeHuffmanForRle(num_symbols, histogram)) {
- return 0;
- }
- if (!GenerateOptimalTree(histogram, num_symbols,
- tree_depth_limit, tree->code_lengths)) {
- return 0;
- }
+void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
+ uint8_t* const buf_rle,
+ HuffmanTree* const huff_tree,
+ HuffmanTreeCode* const huff_code) {
+ const int num_symbols = huff_code->num_symbols;
+ memset(buf_rle, 0, num_symbols * sizeof(*buf_rle));
+ OptimizeHuffmanForRle(num_symbols, buf_rle, histogram);
+ GenerateOptimalTree(histogram, num_symbols, huff_tree, tree_depth_limit,
+ huff_code->code_lengths);
// Create the actual bit codes for the bit lengths.
- ConvertBitDepthsToSymbols(tree);
- return 1;
+ ConvertBitDepthsToSymbols(huff_code);
}
diff --git a/src/3rdparty/libwebp/src/utils/huffman_encode.h b/src/3rdparty/libwebp/src/utils/huffman_encode.h
index ee51c68..91aa18f 100644
--- a/src/3rdparty/libwebp/src/utils/huffman_encode.h
+++ b/src/3rdparty/libwebp/src/utils/huffman_encode.h
@@ -33,14 +33,26 @@ typedef struct {
uint16_t* codes; // Symbol Codes.
} HuffmanTreeCode;
+// Struct to represent the Huffman tree.
+// TODO(vikasa): Add comment for the fields of the Struct.
+typedef struct {
+ uint32_t total_count_;
+ int value_;
+ int pool_index_left_; // Index for the left sub-tree.
+ int pool_index_right_; // Index for the right sub-tree.
+} HuffmanTree;
+
// Turn the Huffman tree into a token sequence.
// Returns the number of tokens used.
int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree,
HuffmanTreeToken* tokens, int max_tokens);
// Create an optimized tree, and tokenize it.
-int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit,
- HuffmanTreeCode* const tree);
+// 'buf_rle' and 'huff_tree' are pre-allocated and the 'tree' is the constructed
+// huffman code tree.
+void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
+ uint8_t* const buf_rle, HuffmanTree* const huff_tree,
+ HuffmanTreeCode* const tree);
#ifdef __cplusplus
}
diff --git a/src/3rdparty/libwebp/src/utils/quant_levels_dec.c b/src/3rdparty/libwebp/src/utils/quant_levels_dec.c
index 8489705..5b8b8b4 100644
--- a/src/3rdparty/libwebp/src/utils/quant_levels_dec.c
+++ b/src/3rdparty/libwebp/src/utils/quant_levels_dec.c
@@ -7,18 +7,273 @@
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
-// TODO(skal): implement gradient smoothing.
+// Implement gradient smoothing: we replace a current alpha value by its
+// surrounding average if it's close enough (that is: the change will be less
+// than the minimum distance between two quantized level).
+// We use sliding window for computing the 2d moving average.
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./quant_levels_dec.h"
-int DequantizeLevels(uint8_t* const data, int width, int height,
- int row, int num_rows) {
- if (data == NULL || width <= 0 || height <= 0 || row < 0 || num_rows < 0 ||
- row + num_rows > height) {
- return 0;
+#include <string.h> // for memset
+
+#include "./utils.h"
+
+// #define USE_DITHERING // uncomment to enable ordered dithering (not vital)
+
+#define FIX 16 // fix-point precision for averaging
+#define LFIX 2 // extra precision for look-up table
+#define LUT_SIZE ((1 << (8 + LFIX)) - 1) // look-up table size
+
+#if defined(USE_DITHERING)
+
+#define DFIX 4 // extra precision for ordered dithering
+#define DSIZE 4 // dithering size (must be a power of two)
+// cf. http://en.wikipedia.org/wiki/Ordered_dithering
+static const uint8_t kOrderedDither[DSIZE][DSIZE] = {
+ { 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision
+ { 12, 4, 14, 6 },
+ { 3, 11, 1, 9 },
+ { 15, 7, 13, 5 }
+};
+
+#else
+#define DFIX 0
+#endif
+
+typedef struct {
+ int width_, height_; // dimension
+ int row_; // current input row being processed
+ uint8_t* src_; // input pointer
+ uint8_t* dst_; // output pointer
+
+ int radius_; // filter radius (=delay)
+ int scale_; // normalization factor, in FIX bits precision
+
+ void* mem_; // all memory
+
+ // various scratch buffers
+ uint16_t* start_;
+ uint16_t* cur_;
+ uint16_t* end_;
+ uint16_t* top_;
+ uint16_t* average_;
+
+ // input levels distribution
+ int num_levels_; // number of quantized levels
+ int min_, max_; // min and max level values
+ int min_level_dist_; // smallest distance between two consecutive levels
+
+ int16_t* correction_; // size = 1 + 2*LUT_SIZE -> ~4k memory
+} SmoothParams;
+
+//------------------------------------------------------------------------------
+
+#define CLIP_MASK (int)(~0U << (8 + DFIX))
+static WEBP_INLINE uint8_t clip_8b(int v) {
+ return (!(v & CLIP_MASK)) ? (uint8_t)(v >> DFIX) : (v < 0) ? 0u : 255u;
+}
+
+// vertical accumulation
+static void VFilter(SmoothParams* const p) {
+ const uint8_t* src = p->src_;
+ const int w = p->width_;
+ uint16_t* const cur = p->cur_;
+ const uint16_t* const top = p->top_;
+ uint16_t* const out = p->end_;
+ uint16_t sum = 0; // all arithmetic is modulo 16bit
+ int x;
+
+ for (x = 0; x < w; ++x) {
+ uint16_t new_value;
+ sum += src[x];
+ new_value = top[x] + sum;
+ out[x] = new_value - cur[x]; // vertical sum of 'r' pixels.
+ cur[x] = new_value;
+ }
+ // move input pointers one row down
+ p->top_ = p->cur_;
+ p->cur_ += w;
+ if (p->cur_ == p->end_) p->cur_ = p->start_; // roll-over
+ // We replicate edges, as it's somewhat easier as a boundary condition.
+ // That's why we don't update the 'src' pointer on top/bottom area:
+ if (p->row_ >= 0 && p->row_ < p->height_ - 1) {
+ p->src_ += p->width_;
+ }
+}
+
+// horizontal accumulation. We use mirror replication of missing pixels, as it's
+// a little easier to implement (surprisingly).
+static void HFilter(SmoothParams* const p) {
+ const uint16_t* const in = p->end_;
+ uint16_t* const out = p->average_;
+ const uint32_t scale = p->scale_;
+ const int w = p->width_;
+ const int r = p->radius_;
+
+ int x;
+ for (x = 0; x <= r; ++x) { // left mirroring
+ const uint16_t delta = in[x + r - 1] + in[r - x];
+ out[x] = (delta * scale) >> FIX;
+ }
+ for (; x < w - r; ++x) { // bulk middle run
+ const uint16_t delta = in[x + r] - in[x - r - 1];
+ out[x] = (delta * scale) >> FIX;
+ }
+ for (; x < w; ++x) { // right mirroring
+ const uint16_t delta =
+ 2 * in[w - 1] - in[2 * w - 2 - r - x] - in[x - r - 1];
+ out[x] = (delta * scale) >> FIX;
+ }
+}
+
+// emit one filtered output row
+static void ApplyFilter(SmoothParams* const p) {
+ const uint16_t* const average = p->average_;
+ const int w = p->width_;
+ const int16_t* const correction = p->correction_;
+#if defined(USE_DITHERING)
+ const uint8_t* const dither = kOrderedDither[p->row_ % DSIZE];
+#endif
+ uint8_t* const dst = p->dst_;
+ int x;
+ for (x = 0; x < w; ++x) {
+ const int v = dst[x];
+ if (v < p->max_ && v > p->min_) {
+ const int c = (v << DFIX) + correction[average[x] - (v << LFIX)];
+#if defined(USE_DITHERING)
+ dst[x] = clip_8b(c + dither[x % DSIZE]);
+#else
+ dst[x] = clip_8b(c);
+#endif
+ }
}
+ p->dst_ += w; // advance output pointer
+}
+
+//------------------------------------------------------------------------------
+// Initialize correction table
+
+static void InitCorrectionLUT(int16_t* const lut, int min_dist) {
+ // The correction curve is:
+ // f(x) = x for x <= threshold2
+ // f(x) = 0 for x >= threshold1
+ // and a linear interpolation for range x=[threshold2, threshold1]
+ // (along with f(-x) = -f(x) symmetry).
+ // Note that: threshold2 = 3/4 * threshold1
+ const int threshold1 = min_dist << LFIX;
+ const int threshold2 = (3 * threshold1) >> 2;
+ const int max_threshold = threshold2 << DFIX;
+ const int delta = threshold1 - threshold2;
+ int i;
+ for (i = 1; i <= LUT_SIZE; ++i) {
+ int c = (i <= threshold2) ? (i << DFIX)
+ : (i < threshold1) ? max_threshold * (threshold1 - i) / delta
+ : 0;
+ c >>= LFIX;
+ lut[+i] = +c;
+ lut[-i] = -c;
+ }
+ lut[0] = 0;
+}
+
+static void CountLevels(const uint8_t* const data, int size,
+ SmoothParams* const p) {
+ int i, last_level;
+ uint8_t used_levels[256] = { 0 };
+ p->min_ = 255;
+ p->max_ = 0;
+ for (i = 0; i < size; ++i) {
+ const int v = data[i];
+ if (v < p->min_) p->min_ = v;
+ if (v > p->max_) p->max_ = v;
+ used_levels[v] = 1;
+ }
+ // Compute the mininum distance between two non-zero levels.
+ p->min_level_dist_ = p->max_ - p->min_;
+ last_level = -1;
+ for (i = 0; i < 256; ++i) {
+ if (used_levels[i]) {
+ ++p->num_levels_;
+ if (last_level >= 0) {
+ const int level_dist = i - last_level;
+ if (level_dist < p->min_level_dist_) {
+ p->min_level_dist_ = level_dist;
+ }
+ }
+ last_level = i;
+ }
+ }
+}
+
+// Initialize all params.
+static int InitParams(uint8_t* const data, int width, int height,
+ int radius, SmoothParams* const p) {
+ const int R = 2 * radius + 1; // total size of the kernel
+
+ const size_t size_scratch_m = (R + 1) * width * sizeof(*p->start_);
+ const size_t size_m = width * sizeof(*p->average_);
+ const size_t size_lut = (1 + 2 * LUT_SIZE) * sizeof(*p->correction_);
+ const size_t total_size = size_scratch_m + size_m + size_lut;
+ uint8_t* mem = (uint8_t*)WebPSafeMalloc(1U, total_size);
+
+ if (mem == NULL) return 0;
+ p->mem_ = (void*)mem;
+
+ p->start_ = (uint16_t*)mem;
+ p->cur_ = p->start_;
+ p->end_ = p->start_ + R * width;
+ p->top_ = p->end_ - width;
+ memset(p->top_, 0, width * sizeof(*p->top_));
+ mem += size_scratch_m;
+
+ p->average_ = (uint16_t*)mem;
+ mem += size_m;
+
+ p->width_ = width;
+ p->height_ = height;
+ p->src_ = data;
+ p->dst_ = data;
+ p->radius_ = radius;
+ p->scale_ = (1 << (FIX + LFIX)) / (R * R); // normalization constant
+ p->row_ = -radius;
+
+ // analyze the input distribution so we can best-fit the threshold
+ CountLevels(data, width * height, p);
+
+ // correction table
+ p->correction_ = ((int16_t*)mem) + LUT_SIZE;
+ InitCorrectionLUT(p->correction_, p->min_level_dist_);
+
return 1;
}
+static void CleanupParams(SmoothParams* const p) {
+ WebPSafeFree(p->mem_);
+}
+
+int WebPDequantizeLevels(uint8_t* const data, int width, int height,
+ int strength) {
+ const int radius = 4 * strength / 100;
+ if (strength < 0 || strength > 100) return 0;
+ if (data == NULL || width <= 0 || height <= 0) return 0; // bad params
+ if (radius > 0) {
+ SmoothParams p;
+ memset(&p, 0, sizeof(p));
+ if (!InitParams(data, width, height, radius, &p)) return 0;
+ if (p.num_levels_ > 2) {
+ for (; p.row_ < p.height_; ++p.row_) {
+ VFilter(&p); // accumulate average of input
+ // Need to wait few rows in order to prime the filter,
+ // before emitting some output.
+ if (p.row_ >= p.radius_) {
+ HFilter(&p);
+ ApplyFilter(&p);
+ }
+ }
+ }
+ CleanupParams(&p);
+ }
+ return 1;
+}
diff --git a/src/3rdparty/libwebp/src/utils/quant_levels_dec.h b/src/3rdparty/libwebp/src/utils/quant_levels_dec.h
index 0288383..9aab068 100644
--- a/src/3rdparty/libwebp/src/utils/quant_levels_dec.h
+++ b/src/3rdparty/libwebp/src/utils/quant_levels_dec.h
@@ -21,11 +21,12 @@ extern "C" {
#endif
// Apply post-processing to input 'data' of size 'width'x'height' assuming that
-// the source was quantized to a reduced number of levels. The post-processing
-// will be applied to 'num_rows' rows of 'data' starting from 'row'.
-// Returns false in case of error (data is NULL, invalid parameters, ...).
-int DequantizeLevels(uint8_t* const data, int width, int height,
- int row, int num_rows);
+// the source was quantized to a reduced number of levels.
+// Strength is in [0..100] and controls the amount of dithering applied.
+// Returns false in case of error (data is NULL, invalid parameters,
+// malloc failure, ...).
+int WebPDequantizeLevels(uint8_t* const data, int width, int height,
+ int strength);
#ifdef __cplusplus
} // extern "C"
diff --git a/src/3rdparty/libwebp/src/utils/random.h b/src/3rdparty/libwebp/src/utils/random.h
index 08a83e9..c392a61 100644
--- a/src/3rdparty/libwebp/src/utils/random.h
+++ b/src/3rdparty/libwebp/src/utils/random.h
@@ -45,7 +45,8 @@ static WEBP_INLINE int VP8RandomBits2(VP8Random* const rg, int num_bits,
rg->tab_[rg->index1_] = diff;
if (++rg->index1_ == VP8_RANDOM_TABLE_SIZE) rg->index1_ = 0;
if (++rg->index2_ == VP8_RANDOM_TABLE_SIZE) rg->index2_ = 0;
- diff = (diff << 1) >> (32 - num_bits); // sign-extend, 0-center
+ // sign-extend, 0-center
+ diff = (int)((uint32_t)diff << 1) >> (32 - num_bits);
diff = (diff * amp) >> VP8_RANDOM_DITHER_FIX; // restrict range
diff += 1 << (num_bits - 1); // shift back to 0.5-center
return diff;
diff --git a/src/3rdparty/libwebp/src/utils/rescaler.c b/src/3rdparty/libwebp/src/utils/rescaler.c
index 7061246..fad9c6b 100644
--- a/src/3rdparty/libwebp/src/utils/rescaler.c
+++ b/src/3rdparty/libwebp/src/utils/rescaler.c
@@ -14,41 +14,20 @@
#include <assert.h>
#include <stdlib.h>
#include "./rescaler.h"
+#include "../dsp/dsp.h"
//------------------------------------------------------------------------------
+// Implementations of critical functions ImportRow / ExportRow
+
+void (*WebPRescalerImportRow)(WebPRescaler* const wrk,
+ const uint8_t* const src, int channel) = NULL;
+void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out) = NULL;
#define RFIX 30
#define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
-void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
- uint8_t* const dst, int dst_width, int dst_height,
- int dst_stride, int num_channels, int x_add, int x_sub,
- int y_add, int y_sub, int32_t* const work) {
- wrk->x_expand = (src_width < dst_width);
- wrk->src_width = src_width;
- wrk->src_height = src_height;
- wrk->dst_width = dst_width;
- wrk->dst_height = dst_height;
- wrk->dst = dst;
- wrk->dst_stride = dst_stride;
- wrk->num_channels = num_channels;
- // for 'x_expand', we use bilinear interpolation
- wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
- wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
- wrk->y_accum = y_add;
- wrk->y_add = y_add;
- wrk->y_sub = y_sub;
- wrk->fx_scale = (1 << RFIX) / x_sub;
- wrk->fy_scale = (1 << RFIX) / y_sub;
- wrk->fxy_scale = wrk->x_expand ?
- ((int64_t)dst_height << RFIX) / (x_sub * src_height) :
- ((int64_t)dst_height << RFIX) / (x_add * src_height);
- wrk->irow = work;
- wrk->frow = work + num_channels * dst_width;
-}
-
-void WebPRescalerImportRow(WebPRescaler* const wrk,
- const uint8_t* const src, int channel) {
+static void ImportRowC(WebPRescaler* const wrk,
+ const uint8_t* const src, int channel) {
const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels;
int x_in = channel;
@@ -84,22 +63,20 @@ void WebPRescalerImportRow(WebPRescaler* const wrk,
accum -= wrk->x_sub;
}
}
- // Accumulate the new row's contribution
+ // Accumulate the contribution of the new row.
for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
wrk->irow[x_out] += wrk->frow[x_out];
}
}
-uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk) {
+static void ExportRowC(WebPRescaler* const wrk, int x_out) {
if (wrk->y_accum <= 0) {
- int x_out;
uint8_t* const dst = wrk->dst;
int32_t* const irow = wrk->irow;
const int32_t* const frow = wrk->frow;
const int yscale = wrk->fy_scale * (-wrk->y_accum);
const int x_out_max = wrk->dst_width * wrk->num_channels;
-
- for (x_out = 0; x_out < x_out_max; ++x_out) {
+ for (; x_out < x_out_max; ++x_out) {
const int frac = (int)MULT_FIX(frow[x_out], yscale);
const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
@@ -107,9 +84,214 @@ uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk) {
}
wrk->y_accum += wrk->y_add;
wrk->dst += wrk->dst_stride;
- return dst;
+ }
+}
+
+//------------------------------------------------------------------------------
+// MIPS version
+
+#if defined(WEBP_USE_MIPS32)
+
+static void ImportRowMIPS(WebPRescaler* const wrk,
+ const uint8_t* const src, int channel) {
+ const int x_stride = wrk->num_channels;
+ const int x_out_max = wrk->dst_width * wrk->num_channels;
+ const int fx_scale = wrk->fx_scale;
+ const int x_add = wrk->x_add;
+ const int x_sub = wrk->x_sub;
+ int* frow = wrk->frow + channel;
+ int* irow = wrk->irow + channel;
+ const uint8_t* src1 = src + channel;
+ int temp1, temp2, temp3;
+ int base, frac, sum;
+ int accum, accum1;
+ const int x_stride1 = x_stride << 2;
+ int loop_c = x_out_max - channel;
+
+ if (!wrk->x_expand) {
+ __asm__ volatile (
+ "li %[temp1], 0x8000 \n\t"
+ "li %[temp2], 0x10000 \n\t"
+ "li %[sum], 0 \n\t"
+ "li %[accum], 0 \n\t"
+ "1: \n\t"
+ "addu %[accum], %[accum], %[x_add] \n\t"
+ "blez %[accum], 3f \n\t"
+ "2: \n\t"
+ "lbu %[temp3], 0(%[src1]) \n\t"
+ "subu %[accum], %[accum], %[x_sub] \n\t"
+ "addu %[src1], %[src1], %[x_stride] \n\t"
+ "addu %[sum], %[sum], %[temp3] \n\t"
+ "bgtz %[accum], 2b \n\t"
+ "3: \n\t"
+ "lbu %[base], 0(%[src1]) \n\t"
+ "addu %[src1], %[src1], %[x_stride] \n\t"
+ "negu %[accum1], %[accum] \n\t"
+ "mul %[frac], %[base], %[accum1] \n\t"
+ "addu %[temp3], %[sum], %[base] \n\t"
+ "mul %[temp3], %[temp3], %[x_sub] \n\t"
+ "lw %[base], 0(%[irow]) \n\t"
+ "subu %[loop_c], %[loop_c], %[x_stride] \n\t"
+ "sll %[accum1], %[frac], 2 \n\t"
+ "mult %[temp1], %[temp2] \n\t"
+ "madd %[accum1], %[fx_scale] \n\t"
+ "mfhi %[sum] \n\t"
+ "subu %[temp3], %[temp3], %[frac] \n\t"
+ "sw %[temp3], 0(%[frow]) \n\t"
+ "add %[base], %[base], %[temp3] \n\t"
+ "sw %[base], 0(%[irow]) \n\t"
+ "addu %[irow], %[irow], %[x_stride1] \n\t"
+ "addu %[frow], %[frow], %[x_stride1] \n\t"
+ "bgtz %[loop_c], 1b \n\t"
+
+ : [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3),
+ [sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac),
+ [frow] "+r" (frow), [irow] "+r" (irow), [accum1] "=&r" (accum1),
+ [temp2] "=&r" (temp2), [temp1] "=&r" (temp1)
+ : [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale),
+ [x_sub] "r" (x_sub), [x_add] "r" (x_add),
+ [loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1)
+ : "memory", "hi", "lo"
+ );
} else {
- return NULL;
+ __asm__ volatile (
+ "lbu %[temp1], 0(%[src1]) \n\t"
+ "move %[temp2], %[temp1] \n\t"
+ "li %[accum], 0 \n\t"
+ "1: \n\t"
+ "bgez %[accum], 2f \n\t"
+ "move %[temp2], %[temp1] \n\t"
+ "addu %[src1], %[x_stride] \n\t"
+ "lbu %[temp1], 0(%[src1]) \n\t"
+ "addu %[accum], %[x_add] \n\t"
+ "2: \n\t"
+ "subu %[temp3], %[temp2], %[temp1] \n\t"
+ "mul %[temp3], %[temp3], %[accum] \n\t"
+ "mul %[base], %[temp1], %[x_add] \n\t"
+ "subu %[accum], %[accum], %[x_sub] \n\t"
+ "lw %[frac], 0(%[irow]) \n\t"
+ "subu %[loop_c], %[loop_c], %[x_stride] \n\t"
+ "addu %[temp3], %[base], %[temp3] \n\t"
+ "sw %[temp3], 0(%[frow]) \n\t"
+ "addu %[frow], %[x_stride1] \n\t"
+ "addu %[frac], %[temp3] \n\t"
+ "sw %[frac], 0(%[irow]) \n\t"
+ "addu %[irow], %[x_stride1] \n\t"
+ "bgtz %[loop_c], 1b \n\t"
+
+ : [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1),
+ [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [base] "=&r" (base),
+ [frac] "=&r" (frac), [frow] "+r" (frow), [irow] "+r" (irow)
+ : [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub),
+ [x_stride1] "r" (x_stride1), [loop_c] "r" (loop_c)
+ : "memory", "hi", "lo"
+ );
+ }
+}
+
+static void ExportRowMIPS(WebPRescaler* const wrk, int x_out) {
+ if (wrk->y_accum <= 0) {
+ uint8_t* const dst = wrk->dst;
+ int32_t* const irow = wrk->irow;
+ const int32_t* const frow = wrk->frow;
+ const int yscale = wrk->fy_scale * (-wrk->y_accum);
+ const int x_out_max = wrk->dst_width * wrk->num_channels;
+ // if wrk->fxy_scale can fit into 32 bits use optimized code,
+ // otherwise use C code
+ if ((wrk->fxy_scale >> 32) == 0) {
+ int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end;
+ const int temp2 = (int)(wrk->fxy_scale);
+ const int temp8 = x_out_max << 2;
+ uint8_t* dst_t = (uint8_t*)dst;
+ int32_t* irow_t = (int32_t*)irow;
+ const int32_t* frow_t = (const int32_t*)frow;
+
+ __asm__ volatile(
+ "addiu %[temp6], $zero, -256 \n\t"
+ "addiu %[temp7], $zero, 255 \n\t"
+ "li %[temp3], 0x10000 \n\t"
+ "li %[temp4], 0x8000 \n\t"
+ "addu %[loop_end], %[frow_t], %[temp8] \n\t"
+ "1: \n\t"
+ "lw %[temp0], 0(%[frow_t]) \n\t"
+ "mult %[temp3], %[temp4] \n\t"
+ "addiu %[frow_t], %[frow_t], 4 \n\t"
+ "sll %[temp0], %[temp0], 2 \n\t"
+ "madd %[temp0], %[yscale] \n\t"
+ "mfhi %[temp1] \n\t"
+ "lw %[temp0], 0(%[irow_t]) \n\t"
+ "addiu %[dst_t], %[dst_t], 1 \n\t"
+ "addiu %[irow_t], %[irow_t], 4 \n\t"
+ "subu %[temp0], %[temp0], %[temp1] \n\t"
+ "mult %[temp3], %[temp4] \n\t"
+ "sll %[temp0], %[temp0], 2 \n\t"
+ "madd %[temp0], %[temp2] \n\t"
+ "mfhi %[temp5] \n\t"
+ "sw %[temp1], -4(%[irow_t]) \n\t"
+ "and %[temp0], %[temp5], %[temp6] \n\t"
+ "slti %[temp1], %[temp5], 0 \n\t"
+ "beqz %[temp0], 2f \n\t"
+ "xor %[temp5], %[temp5], %[temp5] \n\t"
+ "movz %[temp5], %[temp7], %[temp1] \n\t"
+ "2: \n\t"
+ "sb %[temp5], -1(%[dst_t]) \n\t"
+ "bne %[frow_t], %[loop_end], 1b \n\t"
+
+ : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+ [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
+ [temp7]"=&r"(temp7), [frow_t]"+r"(frow_t), [irow_t]"+r"(irow_t),
+ [dst_t]"+r"(dst_t), [loop_end]"=&r"(loop_end)
+ : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8)
+ : "memory", "hi", "lo"
+ );
+ wrk->y_accum += wrk->y_add;
+ wrk->dst += wrk->dst_stride;
+ } else {
+ ExportRowC(wrk, x_out);
+ }
+ }
+}
+#endif // WEBP_USE_MIPS32
+
+//------------------------------------------------------------------------------
+
+void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
+ uint8_t* const dst, int dst_width, int dst_height,
+ int dst_stride, int num_channels, int x_add, int x_sub,
+ int y_add, int y_sub, int32_t* const work) {
+ wrk->x_expand = (src_width < dst_width);
+ wrk->src_width = src_width;
+ wrk->src_height = src_height;
+ wrk->dst_width = dst_width;
+ wrk->dst_height = dst_height;
+ wrk->dst = dst;
+ wrk->dst_stride = dst_stride;
+ wrk->num_channels = num_channels;
+ // for 'x_expand', we use bilinear interpolation
+ wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
+ wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
+ wrk->y_accum = y_add;
+ wrk->y_add = y_add;
+ wrk->y_sub = y_sub;
+ wrk->fx_scale = (1 << RFIX) / x_sub;
+ wrk->fy_scale = (1 << RFIX) / y_sub;
+ wrk->fxy_scale = wrk->x_expand ?
+ ((int64_t)dst_height << RFIX) / (x_sub * src_height) :
+ ((int64_t)dst_height << RFIX) / (x_add * src_height);
+ wrk->irow = work;
+ wrk->frow = work + num_channels * dst_width;
+
+ if (WebPRescalerImportRow == NULL) {
+ WebPRescalerImportRow = ImportRowC;
+ WebPRescalerExportRow = ExportRowC;
+ if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_MIPS32)
+ if (VP8GetCPUInfo(kMIPS32)) {
+ WebPRescalerImportRow = ImportRowMIPS;
+ WebPRescalerExportRow = ExportRowMIPS;
+ }
+#endif
+ }
}
}
@@ -142,11 +324,10 @@ int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
int WebPRescalerExport(WebPRescaler* const rescaler) {
int total_exported = 0;
while (WebPRescalerHasPendingOutput(rescaler)) {
- WebPRescalerExportRow(rescaler);
+ WebPRescalerExportRow(rescaler, 0);
++total_exported;
}
return total_exported;
}
//------------------------------------------------------------------------------
-
diff --git a/src/3rdparty/libwebp/src/utils/rescaler.h b/src/3rdparty/libwebp/src/utils/rescaler.h
index 68e49ce..a6f3787 100644
--- a/src/3rdparty/libwebp/src/utils/rescaler.h
+++ b/src/3rdparty/libwebp/src/utils/rescaler.h
@@ -52,26 +52,24 @@ void WebPRescalerInit(WebPRescaler* const rescaler,
int WebPRescaleNeededLines(const WebPRescaler* const rescaler,
int max_num_lines);
-// Import a row of data and save its contribution in the rescaler.
-// 'channel' denotes the channel number to be imported.
-void WebPRescalerImportRow(WebPRescaler* const rescaler,
- const uint8_t* const src, int channel);
-
// Import multiple rows over all channels, until at least one row is ready to
// be exported. Returns the actual number of lines that were imported.
int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows,
const uint8_t* src, int src_stride);
+// Import a row of data and save its contribution in the rescaler.
+// 'channel' denotes the channel number to be imported.
+extern void (*WebPRescalerImportRow)(WebPRescaler* const wrk,
+ const uint8_t* const src, int channel);
+// Export one row (starting at x_out position) from rescaler.
+extern void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out);
+
// Return true if there is pending output rows ready.
static WEBP_INLINE
int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
return (rescaler->y_accum <= 0);
}
-// Export one row from rescaler. Returns the pointer where output was written,
-// or NULL if no row was pending.
-uint8_t* WebPRescalerExportRow(WebPRescaler* const rescaler);
-
// Export as many rows as possible. Return the numbers of rows written.
int WebPRescalerExport(WebPRescaler* const rescaler);
diff --git a/src/3rdparty/libwebp/src/utils/thread.c b/src/3rdparty/libwebp/src/utils/thread.c
index a9e3fae..264210b 100644
--- a/src/3rdparty/libwebp/src/utils/thread.c
+++ b/src/3rdparty/libwebp/src/utils/thread.c
@@ -14,11 +14,35 @@
#include <assert.h>
#include <string.h> // for memset()
#include "./thread.h"
+#include "./utils.h"
#ifdef WEBP_USE_THREAD
#if defined(_WIN32)
+#include <windows.h>
+typedef HANDLE pthread_t;
+typedef CRITICAL_SECTION pthread_mutex_t;
+typedef struct {
+ HANDLE waiting_sem_;
+ HANDLE received_sem_;
+ HANDLE signal_event_;
+} pthread_cond_t;
+
+#else // !_WIN32
+
+#include <pthread.h>
+
+#endif // _WIN32
+
+struct WebPWorkerImpl {
+ pthread_mutex_t mutex_;
+ pthread_cond_t condition_;
+ pthread_t thread_;
+};
+
+#if defined(_WIN32)
+
//------------------------------------------------------------------------------
// simplistic pthread emulation layer
@@ -129,23 +153,25 @@ static int pthread_cond_wait(pthread_cond_t* const condition,
//------------------------------------------------------------------------------
+static void Execute(WebPWorker* const worker); // Forward declaration.
+
static THREADFN ThreadLoop(void* ptr) {
WebPWorker* const worker = (WebPWorker*)ptr;
int done = 0;
while (!done) {
- pthread_mutex_lock(&worker->mutex_);
+ pthread_mutex_lock(&worker->impl_->mutex_);
while (worker->status_ == OK) { // wait in idling mode
- pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
}
if (worker->status_ == WORK) {
- WebPWorkerExecute(worker);
+ Execute(worker);
worker->status_ = OK;
} else if (worker->status_ == NOT_OK) { // finish the worker
done = 1;
}
// signal to the main thread that we're done (for Sync())
- pthread_cond_signal(&worker->condition_);
- pthread_mutex_unlock(&worker->mutex_);
+ pthread_cond_signal(&worker->impl_->condition_);
+ pthread_mutex_unlock(&worker->impl_->mutex_);
}
return THREAD_RETURN(NULL); // Thread is finished
}
@@ -153,32 +179,36 @@ static THREADFN ThreadLoop(void* ptr) {
// main thread state control
static void ChangeState(WebPWorker* const worker,
WebPWorkerStatus new_status) {
- // no-op when attempting to change state on a thread that didn't come up
- if (worker->status_ < OK) return;
+ // No-op when attempting to change state on a thread that didn't come up.
+ // Checking status_ without acquiring the lock first would result in a data
+ // race.
+ if (worker->impl_ == NULL) return;
- pthread_mutex_lock(&worker->mutex_);
- // wait for the worker to finish
- while (worker->status_ != OK) {
- pthread_cond_wait(&worker->condition_, &worker->mutex_);
- }
- // assign new status and release the working thread if needed
- if (new_status != OK) {
- worker->status_ = new_status;
- pthread_cond_signal(&worker->condition_);
+ pthread_mutex_lock(&worker->impl_->mutex_);
+ if (worker->status_ >= OK) {
+ // wait for the worker to finish
+ while (worker->status_ != OK) {
+ pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
+ }
+ // assign new status and release the working thread if needed
+ if (new_status != OK) {
+ worker->status_ = new_status;
+ pthread_cond_signal(&worker->impl_->condition_);
+ }
}
- pthread_mutex_unlock(&worker->mutex_);
+ pthread_mutex_unlock(&worker->impl_->mutex_);
}
#endif // WEBP_USE_THREAD
//------------------------------------------------------------------------------
-void WebPWorkerInit(WebPWorker* const worker) {
+static void Init(WebPWorker* const worker) {
memset(worker, 0, sizeof(*worker));
worker->status_ = NOT_OK;
}
-int WebPWorkerSync(WebPWorker* const worker) {
+static int Sync(WebPWorker* const worker) {
#ifdef WEBP_USE_THREAD
ChangeState(worker, OK);
#endif
@@ -186,56 +216,94 @@ int WebPWorkerSync(WebPWorker* const worker) {
return !worker->had_error;
}
-int WebPWorkerReset(WebPWorker* const worker) {
+static int Reset(WebPWorker* const worker) {
int ok = 1;
worker->had_error = 0;
if (worker->status_ < OK) {
#ifdef WEBP_USE_THREAD
- if (pthread_mutex_init(&worker->mutex_, NULL) ||
- pthread_cond_init(&worker->condition_, NULL)) {
+ worker->impl_ = (WebPWorkerImpl*)WebPSafeCalloc(1, sizeof(*worker->impl_));
+ if (worker->impl_ == NULL) {
return 0;
}
- pthread_mutex_lock(&worker->mutex_);
- ok = !pthread_create(&worker->thread_, NULL, ThreadLoop, worker);
+ if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) {
+ goto Error;
+ }
+ if (pthread_cond_init(&worker->impl_->condition_, NULL)) {
+ pthread_mutex_destroy(&worker->impl_->mutex_);
+ goto Error;
+ }
+ pthread_mutex_lock(&worker->impl_->mutex_);
+ ok = !pthread_create(&worker->impl_->thread_, NULL, ThreadLoop, worker);
if (ok) worker->status_ = OK;
- pthread_mutex_unlock(&worker->mutex_);
+ pthread_mutex_unlock(&worker->impl_->mutex_);
+ if (!ok) {
+ pthread_mutex_destroy(&worker->impl_->mutex_);
+ pthread_cond_destroy(&worker->impl_->condition_);
+ Error:
+ WebPSafeFree(worker->impl_);
+ worker->impl_ = NULL;
+ return 0;
+ }
#else
worker->status_ = OK;
#endif
} else if (worker->status_ > OK) {
- ok = WebPWorkerSync(worker);
+ ok = Sync(worker);
}
assert(!ok || (worker->status_ == OK));
return ok;
}
-void WebPWorkerExecute(WebPWorker* const worker) {
+static void Execute(WebPWorker* const worker) {
if (worker->hook != NULL) {
worker->had_error |= !worker->hook(worker->data1, worker->data2);
}
}
-void WebPWorkerLaunch(WebPWorker* const worker) {
+static void Launch(WebPWorker* const worker) {
#ifdef WEBP_USE_THREAD
ChangeState(worker, WORK);
#else
- WebPWorkerExecute(worker);
+ Execute(worker);
#endif
}
-void WebPWorkerEnd(WebPWorker* const worker) {
- if (worker->status_ >= OK) {
+static void End(WebPWorker* const worker) {
#ifdef WEBP_USE_THREAD
+ if (worker->impl_ != NULL) {
ChangeState(worker, NOT_OK);
- pthread_join(worker->thread_, NULL);
- pthread_mutex_destroy(&worker->mutex_);
- pthread_cond_destroy(&worker->condition_);
+ pthread_join(worker->impl_->thread_, NULL);
+ pthread_mutex_destroy(&worker->impl_->mutex_);
+ pthread_cond_destroy(&worker->impl_->condition_);
+ WebPSafeFree(worker->impl_);
+ worker->impl_ = NULL;
+ }
#else
- worker->status_ = NOT_OK;
+ worker->status_ = NOT_OK;
+ assert(worker->impl_ == NULL);
#endif
- }
assert(worker->status_ == NOT_OK);
}
//------------------------------------------------------------------------------
+static WebPWorkerInterface g_worker_interface = {
+ Init, Reset, Sync, Launch, Execute, End
+};
+
+int WebPSetWorkerInterface(const WebPWorkerInterface* const winterface) {
+ if (winterface == NULL ||
+ winterface->Init == NULL || winterface->Reset == NULL ||
+ winterface->Sync == NULL || winterface->Launch == NULL ||
+ winterface->Execute == NULL || winterface->End == NULL) {
+ return 0;
+ }
+ g_worker_interface = *winterface;
+ return 1;
+}
+
+const WebPWorkerInterface* WebPGetWorkerInterface(void) {
+ return &g_worker_interface;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/3rdparty/libwebp/src/utils/thread.h b/src/3rdparty/libwebp/src/utils/thread.h
index aef33bd..7bd451b 100644
--- a/src/3rdparty/libwebp/src/utils/thread.h
+++ b/src/3rdparty/libwebp/src/utils/thread.h
@@ -15,33 +15,15 @@
#define WEBP_UTILS_THREAD_H_
#ifdef HAVE_CONFIG_H
-#include "config.h"
+#include "../webp/config.h"
#endif
+#include "../webp/types.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-#ifdef WEBP_USE_THREAD
-
-#if defined(_WIN32)
-
-#include <windows.h>
-typedef HANDLE pthread_t;
-typedef CRITICAL_SECTION pthread_mutex_t;
-typedef struct {
- HANDLE waiting_sem_;
- HANDLE received_sem_;
- HANDLE signal_event_;
-} pthread_cond_t;
-
-#else
-
-#include <pthread.h>
-
-#endif /* _WIN32 */
-#endif /* WEBP_USE_THREAD */
-
// State of the worker thread object
typedef enum {
NOT_OK = 0, // object is unusable
@@ -53,13 +35,12 @@ typedef enum {
// arguments (data1 and data2), and should return false in case of error.
typedef int (*WebPWorkerHook)(void*, void*);
-// Synchronize object used to launch job in the worker thread
+// Platform-dependent implementation details for the worker.
+typedef struct WebPWorkerImpl WebPWorkerImpl;
+
+// Synchronization object used to launch job in the worker thread
typedef struct {
-#ifdef WEBP_USE_THREAD
- pthread_mutex_t mutex_;
- pthread_cond_t condition_;
- pthread_t thread_;
-#endif
+ WebPWorkerImpl* impl_;
WebPWorkerStatus status_;
WebPWorkerHook hook; // hook to call
void* data1; // first argument passed to 'hook'
@@ -67,26 +48,41 @@ typedef struct {
int had_error; // return value of the last call to 'hook'
} WebPWorker;
-// Must be called first, before any other method.
-void WebPWorkerInit(WebPWorker* const worker);
-// Must be called to initialize the object and spawn the thread. Re-entrant.
-// Will potentially launch the thread. Returns false in case of error.
-int WebPWorkerReset(WebPWorker* const worker);
-// Makes sure the previous work is finished. Returns true if worker->had_error
-// was not set and no error condition was triggered by the working thread.
-int WebPWorkerSync(WebPWorker* const worker);
-// Triggers the thread to call hook() with data1 and data2 argument. These
-// hook/data1/data2 can be changed at any time before calling this function,
-// but not be changed afterward until the next call to WebPWorkerSync().
-void WebPWorkerLaunch(WebPWorker* const worker);
-// This function is similar to WebPWorkerLaunch() except that it calls the
-// hook directly instead of using a thread. Convenient to bypass the thread
-// mechanism while still using the WebPWorker structs. WebPWorkerSync() must
-// still be called afterward (for error reporting).
-void WebPWorkerExecute(WebPWorker* const worker);
-// Kill the thread and terminate the object. To use the object again, one
-// must call WebPWorkerReset() again.
-void WebPWorkerEnd(WebPWorker* const worker);
+// The interface for all thread-worker related functions. All these functions
+// must be implemented.
+typedef struct {
+ // Must be called first, before any other method.
+ void (*Init)(WebPWorker* const worker);
+ // Must be called to initialize the object and spawn the thread. Re-entrant.
+ // Will potentially launch the thread. Returns false in case of error.
+ int (*Reset)(WebPWorker* const worker);
+ // Makes sure the previous work is finished. Returns true if worker->had_error
+ // was not set and no error condition was triggered by the working thread.
+ int (*Sync)(WebPWorker* const worker);
+ // Triggers the thread to call hook() with data1 and data2 arguments. These
+ // hook/data1/data2 values can be changed at any time before calling this
+ // function, but not be changed afterward until the next call to Sync().
+ void (*Launch)(WebPWorker* const worker);
+ // This function is similar to Launch() except that it calls the
+ // hook directly instead of using a thread. Convenient to bypass the thread
+ // mechanism while still using the WebPWorker structs. Sync() must
+ // still be called afterward (for error reporting).
+ void (*Execute)(WebPWorker* const worker);
+ // Kill the thread and terminate the object. To use the object again, one
+ // must call Reset() again.
+ void (*End)(WebPWorker* const worker);
+} WebPWorkerInterface;
+
+// Install a new set of threading functions, overriding the defaults. This
+// should be done before any workers are started, i.e., before any encoding or
+// decoding takes place. The contents of the interface struct are copied, it
+// is safe to free the corresponding memory after this call. This function is
+// not thread-safe. Return false in case of invalid pointer or methods.
+WEBP_EXTERN(int) WebPSetWorkerInterface(
+ const WebPWorkerInterface* const interface);
+
+// Retrieve the currently set thread worker interface.
+WEBP_EXTERN(const WebPWorkerInterface*) WebPGetWorkerInterface(void);
//------------------------------------------------------------------------------
diff --git a/src/3rdparty/libwebp/src/utils/utils.c b/src/3rdparty/libwebp/src/utils/utils.c
index 5592538..8ff7f12 100644
--- a/src/3rdparty/libwebp/src/utils/utils.c
+++ b/src/3rdparty/libwebp/src/utils/utils.c
@@ -14,29 +14,198 @@
#include <stdlib.h>
#include "./utils.h"
+// If PRINT_MEM_INFO is defined, extra info (like total memory used, number of
+// alloc/free etc) is printed. For debugging/tuning purpose only (it's slow,
+// and not multi-thread safe!).
+// An interesting alternative is valgrind's 'massif' tool:
+// http://valgrind.org/docs/manual/ms-manual.html
+// Here is an example command line:
+/* valgrind --tool=massif --massif-out-file=massif.out \
+ --stacks=yes --alloc-fn=WebPSafeAlloc --alloc-fn=WebPSafeCalloc
+ ms_print massif.out
+*/
+// In addition:
+// * if PRINT_MEM_TRAFFIC is defined, all the details of the malloc/free cycles
+// are printed.
+// * if MALLOC_FAIL_AT is defined, the global environment variable
+// $MALLOC_FAIL_AT is used to simulate a memory error when calloc or malloc
+// is called for the nth time. Example usage:
+// export MALLOC_FAIL_AT=50 && ./examples/cwebp input.png
+// * if MALLOC_LIMIT is defined, the global environment variable $MALLOC_LIMIT
+// sets the maximum amount of memory (in bytes) made available to libwebp.
+// This can be used to emulate environment with very limited memory.
+// Example: export MALLOC_LIMIT=64000000 && ./examples/dwebp picture.webp
+
+// #define PRINT_MEM_INFO
+// #define PRINT_MEM_TRAFFIC
+// #define MALLOC_FAIL_AT
+// #define MALLOC_LIMIT
+
//------------------------------------------------------------------------------
// Checked memory allocation
+#if defined(PRINT_MEM_INFO)
+
+#include <stdio.h>
+#include <stdlib.h> // for abort()
+
+static int num_malloc_calls = 0;
+static int num_calloc_calls = 0;
+static int num_free_calls = 0;
+static int countdown_to_fail = 0; // 0 = off
+
+typedef struct MemBlock MemBlock;
+struct MemBlock {
+ void* ptr_;
+ size_t size_;
+ MemBlock* next_;
+};
+
+static MemBlock* all_blocks = NULL;
+static size_t total_mem = 0;
+static size_t total_mem_allocated = 0;
+static size_t high_water_mark = 0;
+static size_t mem_limit = 0;
+
+static int exit_registered = 0;
+
+static void PrintMemInfo(void) {
+ fprintf(stderr, "\nMEMORY INFO:\n");
+ fprintf(stderr, "num calls to: malloc = %4d\n", num_malloc_calls);
+ fprintf(stderr, " calloc = %4d\n", num_calloc_calls);
+ fprintf(stderr, " free = %4d\n", num_free_calls);
+ fprintf(stderr, "total_mem: %u\n", (uint32_t)total_mem);
+ fprintf(stderr, "total_mem allocated: %u\n", (uint32_t)total_mem_allocated);
+ fprintf(stderr, "high-water mark: %u\n", (uint32_t)high_water_mark);
+ while (all_blocks != NULL) {
+ MemBlock* b = all_blocks;
+ all_blocks = b->next_;
+ free(b);
+ }
+}
+
+static void Increment(int* const v) {
+ if (!exit_registered) {
+#if defined(MALLOC_FAIL_AT)
+ {
+ const char* const malloc_fail_at_str = getenv("MALLOC_FAIL_AT");
+ if (malloc_fail_at_str != NULL) {
+ countdown_to_fail = atoi(malloc_fail_at_str);
+ }
+ }
+#endif
+#if defined(MALLOC_LIMIT)
+ {
+ const char* const malloc_limit_str = getenv("MALLOC_LIMIT");
+ if (malloc_limit_str != NULL) {
+ mem_limit = atoi(malloc_limit_str);
+ }
+ }
+#endif
+ (void)countdown_to_fail;
+ (void)mem_limit;
+ atexit(PrintMemInfo);
+ exit_registered = 1;
+ }
+ ++*v;
+}
+
+static void AddMem(void* ptr, size_t size) {
+ if (ptr != NULL) {
+ MemBlock* const b = (MemBlock*)malloc(sizeof(*b));
+ if (b == NULL) abort();
+ b->next_ = all_blocks;
+ all_blocks = b;
+ b->ptr_ = ptr;
+ b->size_ = size;
+ total_mem += size;
+ total_mem_allocated += size;
+#if defined(PRINT_MEM_TRAFFIC)
+#if defined(MALLOC_FAIL_AT)
+ fprintf(stderr, "fail-count: %5d [mem=%u]\n",
+ num_malloc_calls + num_calloc_calls, (uint32_t)total_mem);
+#else
+ fprintf(stderr, "Mem: %u (+%u)\n", (uint32_t)total_mem, (uint32_t)size);
+#endif
+#endif
+ if (total_mem > high_water_mark) high_water_mark = total_mem;
+ }
+}
+
+static void SubMem(void* ptr) {
+ if (ptr != NULL) {
+ MemBlock** b = &all_blocks;
+ // Inefficient search, but that's just for debugging.
+ while (*b != NULL && (*b)->ptr_ != ptr) b = &(*b)->next_;
+ if (*b == NULL) {
+ fprintf(stderr, "Invalid pointer free! (%p)\n", ptr);
+ abort();
+ }
+ {
+ MemBlock* const block = *b;
+ *b = block->next_;
+ total_mem -= block->size_;
+#if defined(PRINT_MEM_TRAFFIC)
+ fprintf(stderr, "Mem: %u (-%u)\n",
+ (uint32_t)total_mem, (uint32_t)block->size_);
+#endif
+ free(block);
+ }
+ }
+}
+
+#else
+#define Increment(v) do {} while (0)
+#define AddMem(p, s) do {} while (0)
+#define SubMem(p) do {} while (0)
+#endif
+
// Returns 0 in case of overflow of nmemb * size.
static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
const uint64_t total_size = nmemb * size;
if (nmemb == 0) return 1;
if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
if (total_size != (size_t)total_size) return 0;
+#if defined(PRINT_MEM_INFO) && defined(MALLOC_FAIL_AT)
+ if (countdown_to_fail > 0 && --countdown_to_fail == 0) {
+ return 0; // fake fail!
+ }
+#endif
+#if defined(MALLOC_LIMIT)
+ if (mem_limit > 0 && total_mem + total_size >= mem_limit) {
+ return 0; // fake fail!
+ }
+#endif
+
return 1;
}
void* WebPSafeMalloc(uint64_t nmemb, size_t size) {
+ void* ptr;
+ Increment(&num_malloc_calls);
if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
assert(nmemb * size > 0);
- return malloc((size_t)(nmemb * size));
+ ptr = malloc((size_t)(nmemb * size));
+ AddMem(ptr, (size_t)(nmemb * size));
+ return ptr;
}
void* WebPSafeCalloc(uint64_t nmemb, size_t size) {
+ void* ptr;
+ Increment(&num_calloc_calls);
if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
assert(nmemb * size > 0);
- return calloc((size_t)nmemb, size);
+ ptr = calloc((size_t)nmemb, size);
+ AddMem(ptr, (size_t)(nmemb * size));
+ return ptr;
}
-//------------------------------------------------------------------------------
+void WebPSafeFree(void* const ptr) {
+ if (ptr != NULL) {
+ Increment(&num_free_calls);
+ SubMem(ptr);
+ }
+ free(ptr);
+}
+//------------------------------------------------------------------------------
diff --git a/src/3rdparty/libwebp/src/utils/utils.h b/src/3rdparty/libwebp/src/utils/utils.h
index 8bdf0f0..f2c498a 100644
--- a/src/3rdparty/libwebp/src/utils/utils.h
+++ b/src/3rdparty/libwebp/src/utils/utils.h
@@ -35,10 +35,13 @@ extern "C" {
// somewhere (like: malloc(num_pixels * sizeof(*something))). That's why this
// safe malloc() borrows the signature from calloc(), pointing at the dangerous
// underlying multiply involved.
-void* WebPSafeMalloc(uint64_t nmemb, size_t size);
+WEBP_EXTERN(void*) WebPSafeMalloc(uint64_t nmemb, size_t size);
// Note that WebPSafeCalloc() expects the second argument type to be 'size_t'
// in order to favor the "calloc(num_foo, sizeof(foo))" pattern.
-void* WebPSafeCalloc(uint64_t nmemb, size_t size);
+WEBP_EXTERN(void*) WebPSafeCalloc(uint64_t nmemb, size_t size);
+
+// Companion deallocation function to the above allocations.
+WEBP_EXTERN(void) WebPSafeFree(void* const ptr);
//------------------------------------------------------------------------------
// Reading/writing data.
@@ -74,6 +77,41 @@ static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
PutLE16(data + 2, (int)(val >> 16));
}
+// Returns (int)floor(log2(n)). n must be > 0.
+// use GNU builtins where available.
+#if defined(__GNUC__) && \
+ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+ return 31 ^ __builtin_clz(n);
+}
+#elif defined(_MSC_VER) && _MSC_VER > 1310 && \
+ (defined(_M_X64) || defined(_M_IX86))
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+ uint32_t first_set_bit;
+ _BitScanReverse(&first_set_bit, n);
+ return first_set_bit;
+}
+#else
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+ int log = 0;
+ uint32_t value = n;
+ int i;
+
+ for (i = 4; i >= 0; --i) {
+ const int shift = (1 << i);
+ const uint32_t x = value >> shift;
+ if (x != 0) {
+ value = x;
+ log += shift;
+ }
+ }
+ return log;
+}
+#endif
+
//------------------------------------------------------------------------------
#ifdef __cplusplus