summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/dsp/lossless.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/lossless.c')
-rw-r--r--src/3rdparty/libwebp/src/dsp/lossless.c981
1 files changed, 547 insertions, 434 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/lossless.c b/src/3rdparty/libwebp/src/dsp/lossless.c
index bab76d2..ee334bc 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless.c
+++ b/src/3rdparty/libwebp/src/dsp/lossless.c
@@ -15,21 +15,16 @@
#include "./dsp.h"
-#if defined(WEBP_USE_SSE2)
-#include <emmintrin.h>
-#endif
-
#include <math.h>
#include <stdlib.h>
-#include "./lossless.h"
#include "../dec/vp8li.h"
+#include "../utils/endian_inl.h"
+#include "./lossless.h"
#include "./yuv.h"
#define MAX_DIFF_COST (1e30f)
// lookup table for small values of log2(int)
-#define APPROX_LOG_MAX 4096
-#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
0.0000000000000000f, 0.0000000000000000f,
1.0000000000000000f, 1.5849625007211560f,
@@ -331,30 +326,59 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
};
-float VP8LFastSLog2Slow(int v) {
+// The threshold till approximate version of log_2 can be used.
+// Practically, we can get rid of the call to log() as the two values match to
+// very high degree (the ratio of these two is 0.99999x).
+// Keeping a high threshold for now.
+#define APPROX_LOG_WITH_CORRECTION_MAX 65536
+#define APPROX_LOG_MAX 4096
+#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
+static float FastSLog2Slow(uint32_t v) {
assert(v >= LOG_LOOKUP_IDX_MAX);
- if (v < APPROX_LOG_MAX) {
+ if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
int log_cnt = 0;
+ uint32_t y = 1;
+ int correction = 0;
const float v_f = (float)v;
- while (v >= LOG_LOOKUP_IDX_MAX) {
+ const uint32_t orig_v = v;
+ do {
++log_cnt;
v = v >> 1;
- }
- return v_f * (kLog2Table[v] + log_cnt);
+ y = y << 1;
+ } while (v >= LOG_LOOKUP_IDX_MAX);
+ // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
+ // Xf = floor(Xf) * (1 + (v % y) / v)
+ // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
+ // The correction factor: log(1 + d) ~ d; for very small d values, so
+ // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
+ // LOG_2_RECIPROCAL ~ 23/16
+ correction = (23 * (orig_v & (y - 1))) >> 4;
+ return v_f * (kLog2Table[v] + log_cnt) + correction;
} else {
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
}
}
-float VP8LFastLog2Slow(int v) {
+static float FastLog2Slow(uint32_t v) {
assert(v >= LOG_LOOKUP_IDX_MAX);
- if (v < APPROX_LOG_MAX) {
+ if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
int log_cnt = 0;
- while (v >= LOG_LOOKUP_IDX_MAX) {
+ uint32_t y = 1;
+ const uint32_t orig_v = v;
+ double log_2;
+ do {
++log_cnt;
v = v >> 1;
+ y = y << 1;
+ } while (v >= LOG_LOOKUP_IDX_MAX);
+ log_2 = kLog2Table[v] + log_cnt;
+ if (orig_v >= APPROX_LOG_MAX) {
+ // Since the division is still expensive, add this correction factor only
+ // for large values of 'v'.
+ const int correction = (23 * (orig_v & (y - 1))) >> 4;
+ log_2 += (double)correction / orig_v;
}
- return kLog2Table[v] + log_cnt;
+ return (float)log_2;
} else {
return (float)(LOG_2_RECIPROCAL * log((double)v));
}
@@ -363,6 +387,9 @@ float VP8LFastLog2Slow(int v) {
//------------------------------------------------------------------------------
// Image transforms.
+// Mostly used to reduce code size + readability
+static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
+
// In-place sum of each component with mod 256.
static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
@@ -406,7 +433,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
(c1 >> 8) & 0xff,
(c2 >> 8) & 0xff);
const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
- return (a << 24) | (r << 16) | (g << 8) | b;
+ return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
}
static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
@@ -420,15 +447,24 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
- return (a << 24) | (r << 16) | (g << 8) | b;
+ return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
}
-static WEBP_INLINE int Sub3(int a, int b, int c) {
+// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
+#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
+# define LOCAL_INLINE __attribute__ ((noinline))
+#else
+# define LOCAL_INLINE WEBP_INLINE
+#endif
+
+static LOCAL_INLINE int Sub3(int a, int b, int c) {
const int pb = b - c;
const int pa = a - c;
return abs(pb) - abs(pa);
}
+#undef LOCAL_INLINE
+
static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
const int pa_minus_pb =
Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
@@ -489,21 +525,19 @@ static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
return pred;
}
static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
- const uint32_t pred = VP8LSelect(top[0], left, top[-1]);
+ const uint32_t pred = Select(top[0], left, top[-1]);
return pred;
}
static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
- const uint32_t pred = VP8LClampedAddSubtractFull(left, top[0], top[-1]);
+ const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
return pred;
}
static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
- const uint32_t pred = VP8LClampedAddSubtractHalf(left, top[0], top[-1]);
+ const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
return pred;
}
-// TODO(vikasa): Export the predictor array, to allow SSE2 variants.
-typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
-static const PredictorFunc kPredictors[16] = {
+static const VP8LPredictorFunc kPredictorsC[16] = {
Predictor0, Predictor1, Predictor2, Predictor3,
Predictor4, Predictor5, Predictor6, Predictor7,
Predictor8, Predictor9, Predictor10, Predictor11,
@@ -511,10 +545,9 @@ static const PredictorFunc kPredictors[16] = {
Predictor0, Predictor0 // <- padding security sentinels
};
-// TODO(vikasa): Replace 256 etc with defines.
-static float PredictionCostSpatial(const int* counts,
- int weight_0, double exp_val) {
- const int significant_symbols = 16;
+static float PredictionCostSpatial(const int counts[256], int weight_0,
+ double exp_val) {
+ const int significant_symbols = 256 >> 4;
const double exp_decay_factor = 0.6;
double bits = weight_0 * counts[0];
int i;
@@ -526,19 +559,19 @@ static float PredictionCostSpatial(const int* counts,
}
// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
-static float CombinedShannonEntropy(const int* const X,
- const int* const Y, int n) {
+static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
int i;
double retval = 0.;
int sumX = 0, sumXY = 0;
- for (i = 0; i < n; ++i) {
+ for (i = 0; i < 256; ++i) {
const int x = X[i];
- const int xy = X[i] + Y[i];
+ const int xy = x + Y[i];
if (x != 0) {
sumX += x;
retval -= VP8LFastSLog2(x);
- }
- if (xy != 0) {
+ sumXY += xy;
+ retval -= VP8LFastSLog2(xy);
+ } else if (xy != 0) {
sumXY += xy;
retval -= VP8LFastSLog2(xy);
}
@@ -547,50 +580,53 @@ static float CombinedShannonEntropy(const int* const X,
return (float)retval;
}
-static float PredictionCostSpatialHistogram(int accumulated[4][256],
- int tile[4][256]) {
+static float PredictionCostSpatialHistogram(const int accumulated[4][256],
+ const int tile[4][256]) {
int i;
double retval = 0;
for (i = 0; i < 4; ++i) {
const double kExpValue = 0.94;
retval += PredictionCostSpatial(tile[i], 1, kExpValue);
- retval += CombinedShannonEntropy(tile[i], accumulated[i], 256);
+ retval += CombinedShannonEntropy(tile[i], accumulated[i]);
}
return (float)retval;
}
+static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
+ ++histo_argb[0][argb >> 24];
+ ++histo_argb[1][(argb >> 16) & 0xff];
+ ++histo_argb[2][(argb >> 8) & 0xff];
+ ++histo_argb[3][argb & 0xff];
+}
+
static int GetBestPredictorForTile(int width, int height,
int tile_x, int tile_y, int bits,
- int accumulated[4][256],
+ const int accumulated[4][256],
const uint32_t* const argb_scratch) {
const int kNumPredModes = 14;
const int col_start = tile_x << bits;
const int row_start = tile_y << bits;
const int tile_size = 1 << bits;
- const int ymax = (tile_size <= height - row_start) ?
- tile_size : height - row_start;
- const int xmax = (tile_size <= width - col_start) ?
- tile_size : width - col_start;
- int histo[4][256];
+ const int max_y = GetMin(tile_size, height - row_start);
+ const int max_x = GetMin(tile_size, width - col_start);
float best_diff = MAX_DIFF_COST;
int best_mode = 0;
-
int mode;
for (mode = 0; mode < kNumPredModes; ++mode) {
const uint32_t* current_row = argb_scratch;
- const PredictorFunc pred_func = kPredictors[mode];
+ const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
float cur_diff;
int y;
- memset(&histo[0][0], 0, sizeof(histo));
- for (y = 0; y < ymax; ++y) {
+ int histo_argb[4][256];
+ memset(histo_argb, 0, sizeof(histo_argb));
+ for (y = 0; y < max_y; ++y) {
int x;
const int row = row_start + y;
const uint32_t* const upper_row = current_row;
current_row = upper_row + width;
- for (x = 0; x < xmax; ++x) {
+ for (x = 0; x < max_x; ++x) {
const int col = col_start + x;
uint32_t predict;
- uint32_t predict_diff;
if (row == 0) {
predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
} else if (col == 0) {
@@ -598,14 +634,11 @@ static int GetBestPredictorForTile(int width, int height,
} else {
predict = pred_func(current_row[col - 1], upper_row + col);
}
- predict_diff = VP8LSubPixels(current_row[col], predict);
- ++histo[0][predict_diff >> 24];
- ++histo[1][((predict_diff >> 16) & 0xff)];
- ++histo[2][((predict_diff >> 8) & 0xff)];
- ++histo[3][(predict_diff & 0xff)];
+ UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));
}
}
- cur_diff = PredictionCostSpatialHistogram(accumulated, histo);
+ cur_diff = PredictionCostSpatialHistogram(
+ accumulated, (const int (*)[256])histo_argb);
if (cur_diff < best_diff) {
best_diff = cur_diff;
best_mode = mode;
@@ -622,20 +655,18 @@ static void CopyTileWithPrediction(int width, int height,
const int col_start = tile_x << bits;
const int row_start = tile_y << bits;
const int tile_size = 1 << bits;
- const int ymax = (tile_size <= height - row_start) ?
- tile_size : height - row_start;
- const int xmax = (tile_size <= width - col_start) ?
- tile_size : width - col_start;
- const PredictorFunc pred_func = kPredictors[mode];
+ const int max_y = GetMin(tile_size, height - row_start);
+ const int max_x = GetMin(tile_size, width - col_start);
+ const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
const uint32_t* current_row = argb_scratch;
int y;
- for (y = 0; y < ymax; ++y) {
+ for (y = 0; y < max_y; ++y) {
int x;
const int row = row_start + y;
const uint32_t* const upper_row = current_row;
current_row = upper_row + width;
- for (x = 0; x < xmax; ++x) {
+ for (x = 0; x < max_x; ++x) {
const int col = col_start + x;
const int pix = row * width + col;
uint32_t predict;
@@ -681,7 +712,8 @@ void VP8LResidualImage(int width, int height, int bits,
if (all_x_max > width) {
all_x_max = width;
}
- pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo,
+ pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits,
+ (const int (*)[256])histo,
argb_scratch);
image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
@@ -695,11 +727,7 @@ void VP8LResidualImage(int width, int height, int bits,
}
ix = all_y * width + tile_x_offset;
for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
- const uint32_t a = argb[ix];
- ++histo[0][a >> 24];
- ++histo[1][((a >> 16) & 0xff)];
- ++histo[2][((a >> 8) & 0xff)];
- ++histo[3][(a & 0xff)];
+ UpdateHisto(histo, argb[ix]);
}
}
}
@@ -724,29 +752,36 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
{
int y = y_start;
- const int mask = (1 << transform->bits_) - 1;
+ const int tile_width = 1 << transform->bits_;
+ const int mask = tile_width - 1;
+ const int safe_width = width & ~mask;
const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
const uint32_t* pred_mode_base =
transform->data_ + (y >> transform->bits_) * tiles_per_row;
while (y < y_end) {
- int x;
const uint32_t pred2 = Predictor2(data[-1], data - width);
const uint32_t* pred_mode_src = pred_mode_base;
- PredictorFunc pred_func;
-
+ VP8LPredictorFunc pred_func;
+ int x = 1;
+ int t = 1;
// First pixel follows the T (mode=2) mode.
AddPixelsEq(data, pred2);
-
// .. the rest:
- pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
- for (x = 1; x < width; ++x) {
- uint32_t pred;
- if ((x & mask) == 0) { // start of tile. Read predictor function.
- pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
+ while (x < safe_width) {
+ pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
+ for (; t < tile_width; ++t, ++x) {
+ const uint32_t pred = pred_func(data[x - 1], data + x - width);
+ AddPixelsEq(data + x, pred);
+ }
+ t = 0;
+ }
+ if (x < width) {
+ pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
+ for (; x < width; ++x) {
+ const uint32_t pred = pred_func(data[x - 1], data + x - width);
+ AddPixelsEq(data + x, pred);
}
- pred = pred_func(data[x - 1], data + x - width);
- AddPixelsEq(data + x, pred);
}
data += width;
++y;
@@ -757,9 +792,9 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
}
}
-static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
- int i = 0;
- for (; i < num_pixs; ++i) {
+void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
+ int i;
+ for (i = 0; i < num_pixels; ++i) {
const uint32_t argb = argb_data[i];
const uint32_t green = (argb >> 8) & 0xff;
const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
@@ -770,26 +805,19 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
// Add green to blue and red channels (i.e. perform the inverse transform of
// 'subtract green').
-static void AddGreenToBlueAndRed(uint32_t* data, const uint32_t* data_end) {
- while (data < data_end) {
- const uint32_t argb = *data;
+void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
+ int i;
+ for (i = 0; i < num_pixels; ++i) {
+ const uint32_t argb = data[i];
const uint32_t green = ((argb >> 8) & 0xff);
uint32_t red_blue = (argb & 0x00ff00ffu);
red_blue += (green << 16) | green;
red_blue &= 0x00ff00ffu;
- *data++ = (argb & 0xff00ff00u) | red_blue;
+ data[i] = (argb & 0xff00ff00u) | red_blue;
}
}
-typedef struct {
- // Note: the members are uint8_t, so that any negative values are
- // automatically converted to "mod 256" values.
- uint8_t green_to_red_;
- uint8_t green_to_blue_;
- uint8_t red_to_blue_;
-} Multipliers;
-
-static WEBP_INLINE void MultipliersClear(Multipliers* m) {
+static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
m->green_to_red_ = 0;
m->green_to_blue_ = 0;
m->red_to_blue_ = 0;
@@ -801,40 +829,54 @@ static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
}
static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
- Multipliers* const m) {
+ VP8LMultipliers* const m) {
m->green_to_red_ = (color_code >> 0) & 0xff;
m->green_to_blue_ = (color_code >> 8) & 0xff;
m->red_to_blue_ = (color_code >> 16) & 0xff;
}
-static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) {
+static WEBP_INLINE uint32_t MultipliersToColorCode(
+ const VP8LMultipliers* const m) {
return 0xff000000u |
((uint32_t)(m->red_to_blue_) << 16) |
((uint32_t)(m->green_to_blue_) << 8) |
m->green_to_red_;
}
-static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
- uint32_t argb, int inverse) {
- const uint32_t green = argb >> 8;
- const uint32_t red = argb >> 16;
- uint32_t new_red = red;
- uint32_t new_blue = argb;
+void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
+ int num_pixels) {
+ int i;
+ for (i = 0; i < num_pixels; ++i) {
+ const uint32_t argb = data[i];
+ const uint32_t green = argb >> 8;
+ const uint32_t red = argb >> 16;
+ uint32_t new_red = red;
+ uint32_t new_blue = argb;
+ new_red -= ColorTransformDelta(m->green_to_red_, green);
+ new_red &= 0xff;
+ new_blue -= ColorTransformDelta(m->green_to_blue_, green);
+ new_blue -= ColorTransformDelta(m->red_to_blue_, red);
+ new_blue &= 0xff;
+ data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
+ }
+}
- if (inverse) {
+void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
+ int num_pixels) {
+ int i;
+ for (i = 0; i < num_pixels; ++i) {
+ const uint32_t argb = data[i];
+ const uint32_t green = argb >> 8;
+ const uint32_t red = argb >> 16;
+ uint32_t new_red = red;
+ uint32_t new_blue = argb;
new_red += ColorTransformDelta(m->green_to_red_, green);
new_red &= 0xff;
new_blue += ColorTransformDelta(m->green_to_blue_, green);
new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
new_blue &= 0xff;
- } else {
- new_red -= ColorTransformDelta(m->green_to_red_, green);
- new_red &= 0xff;
- new_blue -= ColorTransformDelta(m->green_to_blue_, green);
- new_blue -= ColorTransformDelta(m->red_to_blue_, red);
- new_blue &= 0xff;
+ data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
}
- return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
}
static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
@@ -856,225 +898,251 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
return (new_blue & 0xff);
}
-static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
- int ix, int xsize) {
- const uint32_t v = argb[ix];
- if (ix >= xsize + 3) {
- if (v == argb[ix - xsize] &&
- argb[ix - 1] == argb[ix - xsize - 1] &&
- argb[ix - 2] == argb[ix - xsize - 2] &&
- argb[ix - 3] == argb[ix - xsize - 3]) {
- return 1;
- }
- return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
- } else if (ix >= 3) {
- return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
- }
- return 0;
-}
-
static float PredictionCostCrossColor(const int accumulated[256],
const int counts[256]) {
// Favor low entropy, locally and globally.
// Favor small absolute values for PredictionCostSpatial
static const double kExpValue = 2.4;
- return CombinedShannonEntropy(counts, accumulated, 256) +
+ return CombinedShannonEntropy(counts, accumulated) +
PredictionCostSpatial(counts, 3, kExpValue);
}
-static Multipliers GetBestColorTransformForTile(
- int tile_x, int tile_y, int bits,
- Multipliers prevX,
- Multipliers prevY,
- int step, int xsize, int ysize,
- int* accumulated_red_histo,
- int* accumulated_blue_histo,
- const uint32_t* const argb) {
- float best_diff = MAX_DIFF_COST;
+static float GetPredictionCostCrossColorRed(
+ int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
+ int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
+ const int accumulated_red_histo[256], const uint32_t* const argb) {
+ int all_y;
+ int histo[256] = { 0 };
float cur_diff;
- const int halfstep = step / 2;
- const int max_tile_size = 1 << bits;
- const int tile_y_offset = tile_y * max_tile_size;
- const int tile_x_offset = tile_x * max_tile_size;
- int green_to_red;
- int green_to_blue;
- int red_to_blue;
- int all_x_max = tile_x_offset + max_tile_size;
- int all_y_max = tile_y_offset + max_tile_size;
- Multipliers best_tx;
- MultipliersClear(&best_tx);
- if (all_x_max > xsize) {
- all_x_max = xsize;
+ for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
+ int ix = all_y * xsize + tile_x_offset;
+ int all_x;
+ for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+ ++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
+ }
}
- if (all_y_max > ysize) {
- all_y_max = ysize;
+ cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
+ if ((uint8_t)green_to_red == prev_x.green_to_red_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
}
-
- for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
- int histo[256] = { 0 };
- int all_y;
-
- for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
- int ix = all_y * xsize + tile_x_offset;
- int all_x;
- for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
- if (SkipRepeatedPixels(argb, ix, xsize)) {
- continue;
- }
- ++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
- }
- }
- cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
- if ((uint8_t)green_to_red == prevX.green_to_red_) {
- cur_diff -= 3; // favor keeping the areas locally similar
+ if ((uint8_t)green_to_red == prev_y.green_to_red_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if (green_to_red == 0) {
+ cur_diff -= 3;
+ }
+ return cur_diff;
+}
+
+static void GetBestGreenToRed(
+ int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
+ int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
+ const int accumulated_red_histo[256], const uint32_t* const argb,
+ VP8LMultipliers* const best_tx) {
+ int min_green_to_red = -64;
+ int max_green_to_red = 64;
+ int green_to_red = 0;
+ int eval_min = 1;
+ int eval_max = 1;
+ float cur_diff_min = MAX_DIFF_COST;
+ float cur_diff_max = MAX_DIFF_COST;
+ // Do a binary search to find the optimal green_to_red color transform.
+ while (max_green_to_red - min_green_to_red > 2) {
+ if (eval_min) {
+ cur_diff_min = GetPredictionCostCrossColorRed(
+ tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
+ prev_x, prev_y, min_green_to_red, accumulated_red_histo, argb);
+ eval_min = 0;
}
- if ((uint8_t)green_to_red == prevY.green_to_red_) {
- cur_diff -= 3; // favor keeping the areas locally similar
+ if (eval_max) {
+ cur_diff_max = GetPredictionCostCrossColorRed(
+ tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
+ prev_x, prev_y, max_green_to_red, accumulated_red_histo, argb);
+ eval_max = 0;
}
- if (green_to_red == 0) {
- cur_diff -= 3;
+ if (cur_diff_min < cur_diff_max) {
+ green_to_red = min_green_to_red;
+ max_green_to_red = (max_green_to_red + min_green_to_red) / 2;
+ eval_max = 1;
+ } else {
+ green_to_red = max_green_to_red;
+ min_green_to_red = (max_green_to_red + min_green_to_red) / 2;
+ eval_min = 1;
}
- if (cur_diff < best_diff) {
- best_diff = cur_diff;
- best_tx.green_to_red_ = green_to_red;
+ }
+ best_tx->green_to_red_ = green_to_red;
+}
+
+static float GetPredictionCostCrossColorBlue(
+ int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
+ int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
+ int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],
+ const uint32_t* const argb) {
+ int all_y;
+ int histo[256] = { 0 };
+ float cur_diff;
+ for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
+ int all_x;
+ int ix = all_y * xsize + tile_x_offset;
+ for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+ ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
}
}
- best_diff = MAX_DIFF_COST;
- for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
- for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
- int all_y;
- int histo[256] = { 0 };
- for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
- int all_x;
- int ix = all_y * xsize + tile_x_offset;
- for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
- if (SkipRepeatedPixels(argb, ix, xsize)) {
- continue;
- }
- ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
- }
- }
- cur_diff =
- PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
- if ((uint8_t)green_to_blue == prevX.green_to_blue_) {
- cur_diff -= 3; // favor keeping the areas locally similar
- }
- if ((uint8_t)green_to_blue == prevY.green_to_blue_) {
- cur_diff -= 3; // favor keeping the areas locally similar
- }
- if ((uint8_t)red_to_blue == prevX.red_to_blue_) {
- cur_diff -= 3; // favor keeping the areas locally similar
- }
- if ((uint8_t)red_to_blue == prevY.red_to_blue_) {
- cur_diff -= 3; // favor keeping the areas locally similar
- }
- if (green_to_blue == 0) {
- cur_diff -= 3;
- }
- if (red_to_blue == 0) {
- cur_diff -= 3;
- }
+ cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
+ if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
+ cur_diff -= 3; // favor keeping the areas locally similar
+ }
+ if (green_to_blue == 0) {
+ cur_diff -= 3;
+ }
+ if (red_to_blue == 0) {
+ cur_diff -= 3;
+ }
+ return cur_diff;
+}
+
+static void GetBestGreenRedToBlue(
+ int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
+ int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
+ const int accumulated_blue_histo[256], const uint32_t* const argb,
+ VP8LMultipliers* const best_tx) {
+ float best_diff = MAX_DIFF_COST;
+ float cur_diff;
+ const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
+ const int min_green_to_blue = -32;
+ const int max_green_to_blue = 32;
+ const int min_red_to_blue = -32;
+ const int max_red_to_blue = 32;
+ const int num_iters =
+ (1 + (max_green_to_blue - min_green_to_blue) / step) *
+ (1 + (max_red_to_blue - min_red_to_blue) / step);
+ // Number of tries to get optimal green_to_blue & red_to_blue color transforms
+ // after finding a local minima.
+ const int max_tries_after_min = 4 + (num_iters >> 2);
+ int num_tries_after_min = 0;
+ int green_to_blue;
+ for (green_to_blue = min_green_to_blue;
+ green_to_blue <= max_green_to_blue &&
+ num_tries_after_min < max_tries_after_min;
+ green_to_blue += step) {
+ int red_to_blue;
+ for (red_to_blue = min_red_to_blue;
+ red_to_blue <= max_red_to_blue &&
+ num_tries_after_min < max_tries_after_min;
+ red_to_blue += step) {
+ cur_diff = GetPredictionCostCrossColorBlue(
+ tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,
+ prev_y, green_to_blue, red_to_blue, accumulated_blue_histo, argb);
if (cur_diff < best_diff) {
best_diff = cur_diff;
- best_tx.green_to_blue_ = green_to_blue;
- best_tx.red_to_blue_ = red_to_blue;
+ best_tx->green_to_blue_ = green_to_blue;
+ best_tx->red_to_blue_ = red_to_blue;
+ num_tries_after_min = 0;
+ } else {
+ ++num_tries_after_min;
}
}
}
+}
+
+static VP8LMultipliers GetBestColorTransformForTile(
+ int tile_x, int tile_y, int bits,
+ VP8LMultipliers prev_x,
+ VP8LMultipliers prev_y,
+ int quality, int xsize, int ysize,
+ const int accumulated_red_histo[256],
+ const int accumulated_blue_histo[256],
+ const uint32_t* const argb) {
+ const int max_tile_size = 1 << bits;
+ const int tile_y_offset = tile_y * max_tile_size;
+ const int tile_x_offset = tile_x * max_tile_size;
+ const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
+ const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
+ VP8LMultipliers best_tx;
+ MultipliersClear(&best_tx);
+
+ GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
+ prev_x, prev_y, accumulated_red_histo, argb, &best_tx);
+ GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max,
+ xsize, prev_x, prev_y, quality, accumulated_blue_histo,
+ argb, &best_tx);
return best_tx;
}
static void CopyTileWithColorTransform(int xsize, int ysize,
- int tile_x, int tile_y, int bits,
- Multipliers color_transform,
- uint32_t* const argb) {
- int y;
- int xscan = 1 << bits;
- int yscan = 1 << bits;
- tile_x <<= bits;
- tile_y <<= bits;
- if (xscan > xsize - tile_x) {
- xscan = xsize - tile_x;
- }
- if (yscan > ysize - tile_y) {
- yscan = ysize - tile_y;
- }
- yscan += tile_y;
- for (y = tile_y; y < yscan; ++y) {
- int ix = y * xsize + tile_x;
- const int end_ix = ix + xscan;
- for (; ix < end_ix; ++ix) {
- argb[ix] = TransformColor(&color_transform, argb[ix], 0);
- }
+ int tile_x, int tile_y,
+ int max_tile_size,
+ VP8LMultipliers color_transform,
+ uint32_t* argb) {
+ const int xscan = GetMin(max_tile_size, xsize - tile_x);
+ int yscan = GetMin(max_tile_size, ysize - tile_y);
+ argb += tile_y * xsize + tile_x;
+ while (yscan-- > 0) {
+ VP8LTransformColor(&color_transform, argb, xscan);
+ argb += xsize;
}
}
-void VP8LColorSpaceTransform(int width, int height, int bits, int step,
+void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
uint32_t* const argb, uint32_t* image) {
const int max_tile_size = 1 << bits;
- int tile_xsize = VP8LSubSampleSize(width, bits);
- int tile_ysize = VP8LSubSampleSize(height, bits);
+ const int tile_xsize = VP8LSubSampleSize(width, bits);
+ const int tile_ysize = VP8LSubSampleSize(height, bits);
int accumulated_red_histo[256] = { 0 };
int accumulated_blue_histo[256] = { 0 };
- int tile_y;
- int tile_x;
- Multipliers prevX;
- Multipliers prevY;
- MultipliersClear(&prevY);
- MultipliersClear(&prevX);
+ int tile_x, tile_y;
+ VP8LMultipliers prev_x, prev_y;
+ MultipliersClear(&prev_y);
+ MultipliersClear(&prev_x);
for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
- Multipliers color_transform;
- int all_x_max;
int y;
- const int tile_y_offset = tile_y * max_tile_size;
const int tile_x_offset = tile_x * max_tile_size;
+ const int tile_y_offset = tile_y * max_tile_size;
+ const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
+ const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
+ const int offset = tile_y * tile_xsize + tile_x;
if (tile_y != 0) {
- ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
- ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x],
- &prevY);
- } else if (tile_x != 0) {
- ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
+ ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
}
- color_transform =
- GetBestColorTransformForTile(tile_x, tile_y, bits,
- prevX, prevY,
- step, width, height,
- &accumulated_red_histo[0],
- &accumulated_blue_histo[0],
- argb);
- image[tile_y * tile_xsize + tile_x] =
- MultipliersToColorCode(&color_transform);
- CopyTileWithColorTransform(width, height, tile_x, tile_y, bits,
- color_transform, argb);
+ prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
+ prev_x, prev_y,
+ quality, width, height,
+ accumulated_red_histo,
+ accumulated_blue_histo,
+ argb);
+ image[offset] = MultipliersToColorCode(&prev_x);
+ CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
+ max_tile_size, prev_x, argb);
// Gather accumulated histogram data.
- all_x_max = tile_x_offset + max_tile_size;
- if (all_x_max > width) {
- all_x_max = width;
- }
- for (y = 0; y < max_tile_size; ++y) {
- int ix;
- int all_x;
- int all_y = tile_y_offset + y;
- if (all_y >= height) {
- break;
- }
- ix = all_y * width + tile_x_offset;
- for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+ for (y = tile_y_offset; y < all_y_max; ++y) {
+ int ix = y * width + tile_x_offset;
+ const int ix_end = ix + all_x_max - tile_x_offset;
+ for (; ix < ix_end; ++ix) {
+ const uint32_t pix = argb[ix];
if (ix >= 2 &&
- argb[ix] == argb[ix - 2] &&
- argb[ix] == argb[ix - 1]) {
+ pix == argb[ix - 2] &&
+ pix == argb[ix - 1]) {
continue; // repeated pixels are handled by backward references
}
if (ix >= width + 2 &&
argb[ix - 2] == argb[ix - width - 2] &&
argb[ix - 1] == argb[ix - width - 1] &&
- argb[ix] == argb[ix - width]) {
+ pix == argb[ix - width]) {
continue; // repeated pixels are handled by backward references
}
- ++accumulated_red_histo[(argb[ix] >> 16) & 0xff];
- ++accumulated_blue_histo[argb[ix] & 0xff];
+ ++accumulated_red_histo[(pix >> 16) & 0xff];
+ ++accumulated_blue_histo[(pix >> 0) & 0xff];
}
}
}
@@ -1085,7 +1153,10 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int step,
static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
int y_start, int y_end, uint32_t* data) {
const int width = transform->xsize_;
- const int mask = (1 << transform->bits_) - 1;
+ const int tile_width = 1 << transform->bits_;
+ const int mask = tile_width - 1;
+ const int safe_width = width & ~mask;
+ const int remaining_width = width - safe_width;
const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
int y = y_start;
const uint32_t* pred_row =
@@ -1093,16 +1164,21 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
while (y < y_end) {
const uint32_t* pred = pred_row;
- Multipliers m = { 0, 0, 0 };
- int x;
-
- for (x = 0; x < width; ++x) {
- if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m);
- data[x] = TransformColor(&m, data[x], 1);
+ VP8LMultipliers m = { 0, 0, 0 };
+ const uint32_t* const data_safe_end = data + safe_width;
+ const uint32_t* const data_end = data + width;
+ while (data < data_safe_end) {
+ ColorCodeToMultipliers(*pred++, &m);
+ VP8LTransformColorInverse(&m, data, tile_width);
+ data += tile_width;
+ }
+ if (data < data_end) { // Left-overs using C-version.
+ ColorCodeToMultipliers(*pred++, &m);
+ VP8LTransformColorInverse(&m, data, remaining_width);
+ data += remaining_width;
}
- data += width;
++y;
- if ((y & mask) == 0) pred_row += tiles_per_row;;
+ if ((y & mask) == 0) pred_row += tiles_per_row;
}
}
@@ -1173,7 +1249,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
assert(row_end <= transform->ysize_);
switch (transform->type_) {
case SUBTRACT_GREEN:
- VP8LAddGreenToBlueAndRed(out, out + (row_end - row_start) * width);
+ VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
break;
case PREDICTOR_TRANSFORM:
PredictorInverseTransform(transform, row_start, row_end, out);
@@ -1218,8 +1294,8 @@ static int is_big_endian(void) {
return (tmp.b[0] != 1);
}
-static void ConvertBGRAToRGB(const uint32_t* src,
- int num_pixels, uint8_t* dst) {
+void VP8LConvertBGRAToRGB_C(const uint32_t* src,
+ int num_pixels, uint8_t* dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@@ -1229,8 +1305,8 @@ static void ConvertBGRAToRGB(const uint32_t* src,
}
}
-static void ConvertBGRAToRGBA(const uint32_t* src,
- int num_pixels, uint8_t* dst) {
+void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
+ int num_pixels, uint8_t* dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@@ -1241,8 +1317,8 @@ static void ConvertBGRAToRGBA(const uint32_t* src,
}
}
-static void ConvertBGRAToRGBA4444(const uint32_t* src,
- int num_pixels, uint8_t* dst) {
+void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
+ int num_pixels, uint8_t* dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@@ -1258,8 +1334,8 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
}
}
-static void ConvertBGRAToRGB565(const uint32_t* src,
- int num_pixels, uint8_t* dst) {
+void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
+ int num_pixels, uint8_t* dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@@ -1275,8 +1351,8 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
}
}
-static void ConvertBGRAToBGR(const uint32_t* src,
- int num_pixels, uint8_t* dst) {
+void VP8LConvertBGRAToBGR_C(const uint32_t* src,
+ int num_pixels, uint8_t* dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@@ -1291,29 +1367,18 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
if (is_big_endian() == swap_on_big_endian) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
- uint32_t argb = *src++;
+ const uint32_t argb = *src++;
-#if !defined(__BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN)
#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
-#if defined(__i386__) || defined(__x86_64__)
- __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
- *(uint32_t*)dst = argb;
-#elif defined(_MSC_VER)
- argb = _byteswap_ulong(argb);
- *(uint32_t*)dst = argb;
-#else
- dst[0] = (argb >> 24) & 0xff;
- dst[1] = (argb >> 16) & 0xff;
- dst[2] = (argb >> 8) & 0xff;
- dst[3] = (argb >> 0) & 0xff;
-#endif
+ *(uint32_t*)dst = BSwap32(argb);
#else // WEBP_REFERENCE_IMPLEMENTATION
dst[0] = (argb >> 24) & 0xff;
dst[1] = (argb >> 16) & 0xff;
dst[2] = (argb >> 8) & 0xff;
dst[3] = (argb >> 0) & 0xff;
#endif
-#else // __BIG_ENDIAN__
+#else // WORDS_BIGENDIAN
dst[0] = (argb >> 0) & 0xff;
dst[1] = (argb >> 8) & 0xff;
dst[2] = (argb >> 16) & 0xff;
@@ -1330,17 +1395,17 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
switch (out_colorspace) {
case MODE_RGB:
- ConvertBGRAToRGB(in_data, num_pixels, rgba);
+ VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
break;
case MODE_RGBA:
- ConvertBGRAToRGBA(in_data, num_pixels, rgba);
+ VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
break;
case MODE_rgbA:
- ConvertBGRAToRGBA(in_data, num_pixels, rgba);
+ VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
break;
case MODE_BGR:
- ConvertBGRAToBGR(in_data, num_pixels, rgba);
+ VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
break;
case MODE_BGRA:
CopyOrSwap(in_data, num_pixels, rgba, 1);
@@ -1357,20 +1422,21 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
break;
case MODE_RGBA_4444:
- ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+ VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
break;
case MODE_rgbA_4444:
- ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+ VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
break;
case MODE_RGB_565:
- ConvertBGRAToRGB565(in_data, num_pixels, rgba);
+ VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
break;
default:
assert(0); // Code flow should not reach here.
}
}
+//------------------------------------------------------------------------------
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
void VP8LBundleColorMap(const uint8_t* const row, int width,
int xbits, uint32_t* const dst) {
@@ -1394,129 +1460,166 @@ void VP8LBundleColorMap(const uint8_t* const row, int width,
//------------------------------------------------------------------------------
-// TODO(vikasa): Move the SSE2 functions to lossless_dsp.c (new file), once
-// color-space conversion methods (ConvertFromBGRA) are also updated for SSE2.
-#if defined(WEBP_USE_SSE2)
-static WEBP_INLINE uint32_t ClampedAddSubtractFullSSE2(uint32_t c0, uint32_t c1,
- uint32_t c2) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
- const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
- const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
- const __m128i V1 = _mm_add_epi16(C0, C1);
- const __m128i V2 = _mm_sub_epi16(V1, C2);
- const __m128i b = _mm_packus_epi16(V2, V2);
- const uint32_t output = _mm_cvtsi128_si32(b);
- return output;
-}
-
-static WEBP_INLINE uint32_t ClampedAddSubtractHalfSSE2(uint32_t c0, uint32_t c1,
- uint32_t c2) {
- const uint32_t ave = Average2(c0, c1);
- const __m128i zero = _mm_setzero_si128();
- const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero);
- const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
- const __m128i A1 = _mm_sub_epi16(A0, B0);
- const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
- const __m128i A2 = _mm_sub_epi16(A1, BgtA);
- const __m128i A3 = _mm_srai_epi16(A2, 1);
- const __m128i A4 = _mm_add_epi16(A0, A3);
- const __m128i A5 = _mm_packus_epi16(A4, A4);
- const uint32_t output = _mm_cvtsi128_si32(A5);
- return output;
-}
-
-static WEBP_INLINE uint32_t SelectSSE2(uint32_t a, uint32_t b, uint32_t c) {
- int pa_minus_pb;
- const __m128i zero = _mm_setzero_si128();
- const __m128i A0 = _mm_cvtsi32_si128(a);
- const __m128i B0 = _mm_cvtsi32_si128(b);
- const __m128i C0 = _mm_cvtsi32_si128(c);
- const __m128i AC0 = _mm_subs_epu8(A0, C0);
- const __m128i CA0 = _mm_subs_epu8(C0, A0);
- const __m128i BC0 = _mm_subs_epu8(B0, C0);
- const __m128i CB0 = _mm_subs_epu8(C0, B0);
- const __m128i AC = _mm_or_si128(AC0, CA0);
- const __m128i BC = _mm_or_si128(BC0, CB0);
- const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c|
- const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c|
- const __m128i diff = _mm_sub_epi16(pb, pa);
- {
- int16_t out[8];
- _mm_storeu_si128((__m128i*)out, diff);
- pa_minus_pb = out[0] + out[1] + out[2] + out[3];
- }
- return (pa_minus_pb <= 0) ? a : b;
+static double ExtraCost(const uint32_t* population, int length) {
+ int i;
+ double cost = 0.;
+ for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
+ return cost;
}
-static void SubtractGreenFromBlueAndRedSSE2(uint32_t* argb_data, int num_pixs) {
- int i = 0;
- const __m128i mask = _mm_set1_epi32(0x0000ff00);
- for (; i + 4 < num_pixs; i += 4) {
- const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
- const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
- const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
- const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
- const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
- const __m128i out = _mm_sub_epi8(in, in_0g0g);
- _mm_storeu_si128((__m128i*)&argb_data[i], out);
+static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
+ int length) {
+ int i;
+ double cost = 0.;
+ for (i = 2; i < length - 2; ++i) {
+ const int xy = X[i + 2] + Y[i + 2];
+ cost += (i >> 1) * xy;
}
- // fallthrough and finish off with plain-C
- for (; i < num_pixs; ++i) {
- const uint32_t argb = argb_data[i];
- const uint32_t green = (argb >> 8) & 0xff;
- const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
- const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
- argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
+ return cost;
+}
+
+// Returns the various RLE counts
+static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
+ int i;
+ int streak = 0;
+ VP8LStreaks stats;
+ memset(&stats, 0, sizeof(stats));
+ for (i = 0; i < length - 1; ++i) {
+ ++streak;
+ if (population[i] == population[i + 1]) {
+ continue;
+ }
+ stats.counts[population[i] != 0] += (streak > 3);
+ stats.streaks[population[i] != 0][(streak > 3)] += streak;
+ streak = 0;
}
+ ++streak;
+ stats.counts[population[i] != 0] += (streak > 3);
+ stats.streaks[population[i] != 0][(streak > 3)] += streak;
+ return stats;
}
-static void AddGreenToBlueAndRedSSE2(uint32_t* data, const uint32_t* data_end) {
- const __m128i mask = _mm_set1_epi32(0x0000ff00);
- for (; data + 4 < data_end; data += 4) {
- const __m128i in = _mm_loadu_si128((__m128i*)data);
- const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
- const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
- const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
- const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
- const __m128i out = _mm_add_epi8(in, in_0g0g);
- _mm_storeu_si128((__m128i*)data, out);
+static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
+ const uint32_t* Y, int length) {
+ int i;
+ int streak = 0;
+ VP8LStreaks stats;
+ memset(&stats, 0, sizeof(stats));
+ for (i = 0; i < length - 1; ++i) {
+ const int xy = X[i] + Y[i];
+ const int xy_next = X[i + 1] + Y[i + 1];
+ ++streak;
+ if (xy == xy_next) {
+ continue;
+ }
+ stats.counts[xy != 0] += (streak > 3);
+ stats.streaks[xy != 0][(streak > 3)] += streak;
+ streak = 0;
}
- // fallthrough and finish off with plain-C
- while (data < data_end) {
- const uint32_t argb = *data;
- const uint32_t green = ((argb >> 8) & 0xff);
- uint32_t red_blue = (argb & 0x00ff00ffu);
- red_blue += (green << 16) | green;
- red_blue &= 0x00ff00ffu;
- *data++ = (argb & 0xff00ff00u) | red_blue;
+ {
+ const int xy = X[i] + Y[i];
+ ++streak;
+ stats.counts[xy != 0] += (streak > 3);
+ stats.streaks[xy != 0][(streak > 3)] += streak;
}
+ return stats;
}
-extern void VP8LDspInitSSE2(void);
+//------------------------------------------------------------------------------
-void VP8LDspInitSSE2(void) {
- VP8LClampedAddSubtractFull = ClampedAddSubtractFullSSE2;
- VP8LClampedAddSubtractHalf = ClampedAddSubtractHalfSSE2;
- VP8LSelect = SelectSSE2;
- VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRedSSE2;
- VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRedSSE2;
+static void HistogramAdd(const VP8LHistogram* const a,
+ const VP8LHistogram* const b,
+ VP8LHistogram* const out) {
+ int i;
+ const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
+ assert(a->palette_code_bits_ == b->palette_code_bits_);
+ if (b != out) {
+ for (i = 0; i < literal_size; ++i) {
+ out->literal_[i] = a->literal_[i] + b->literal_[i];
+ }
+ for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+ out->distance_[i] = a->distance_[i] + b->distance_[i];
+ }
+ for (i = 0; i < NUM_LITERAL_CODES; ++i) {
+ out->red_[i] = a->red_[i] + b->red_[i];
+ out->blue_[i] = a->blue_[i] + b->blue_[i];
+ out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
+ }
+ } else {
+ for (i = 0; i < literal_size; ++i) {
+ out->literal_[i] += a->literal_[i];
+ }
+ for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+ out->distance_[i] += a->distance_[i];
+ }
+ for (i = 0; i < NUM_LITERAL_CODES; ++i) {
+ out->red_[i] += a->red_[i];
+ out->blue_[i] += a->blue_[i];
+ out->alpha_[i] += a->alpha_[i];
+ }
+ }
}
-#endif
+
//------------------------------------------------------------------------------
-VP8LPredClampedAddSubFunc VP8LClampedAddSubtractFull;
-VP8LPredClampedAddSubFunc VP8LClampedAddSubtractHalf;
-VP8LPredSelectFunc VP8LSelect;
-VP8LSubtractGreenFromBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
-VP8LAddGreenToBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+VP8LPredictorFunc VP8LPredictors[16];
+
+VP8LTransformColorFunc VP8LTransformColor;
+VP8LTransformColorFunc VP8LTransformColorInverse;
+
+VP8LConvertFunc VP8LConvertBGRAToRGB;
+VP8LConvertFunc VP8LConvertBGRAToRGBA;
+VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
+VP8LConvertFunc VP8LConvertBGRAToRGB565;
+VP8LConvertFunc VP8LConvertBGRAToBGR;
+
+VP8LFastLog2SlowFunc VP8LFastLog2Slow;
+VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
+
+VP8LCostFunc VP8LExtraCost;
+VP8LCostCombinedFunc VP8LExtraCostCombined;
+
+VP8LCostCountFunc VP8LHuffmanCostCount;
+VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
+
+VP8LHistogramAddFunc VP8LHistogramAdd;
+
+extern void VP8LDspInitSSE2(void);
+extern void VP8LDspInitNEON(void);
+extern void VP8LDspInitMIPS32(void);
+
+static volatile VP8CPUInfo lossless_last_cpuinfo_used =
+ (VP8CPUInfo)&lossless_last_cpuinfo_used;
void VP8LDspInit(void) {
- VP8LClampedAddSubtractFull = ClampedAddSubtractFull;
- VP8LClampedAddSubtractHalf = ClampedAddSubtractHalf;
- VP8LSelect = Select;
- VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
- VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
+ if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+ memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));
+
+ VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
+ VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
+
+ VP8LTransformColor = VP8LTransformColor_C;
+ VP8LTransformColorInverse = VP8LTransformColorInverse_C;
+
+ VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
+ VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
+ VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
+ VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
+ VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
+
+ VP8LFastLog2Slow = FastLog2Slow;
+ VP8LFastSLog2Slow = FastSLog2Slow;
+
+ VP8LExtraCost = ExtraCost;
+ VP8LExtraCostCombined = ExtraCostCombined;
+
+ VP8LHuffmanCostCount = HuffmanCostCount;
+ VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
+
+ VP8LHistogramAdd = HistogramAdd;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
@@ -1525,8 +1628,18 @@ void VP8LDspInit(void) {
VP8LDspInitSSE2();
}
#endif
+#if defined(WEBP_USE_NEON)
+ if (VP8GetCPUInfo(kNEON)) {
+ VP8LDspInitNEON();
+ }
+#endif
+#if defined(WEBP_USE_MIPS32)
+ if (VP8GetCPUInfo(kMIPS32)) {
+ VP8LDspInitMIPS32();
+ }
+#endif
}
+ lossless_last_cpuinfo_used = VP8GetCPUInfo;
}
//------------------------------------------------------------------------------
-