diff options
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/lossless_sse2.c')
-rw-r--r-- | src/3rdparty/libwebp/src/dsp/lossless_sse2.c | 389 |
1 files changed, 347 insertions, 42 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/lossless_sse2.c b/src/3rdparty/libwebp/src/dsp/lossless_sse2.c index 2d016c2..15aae93 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/lossless_sse2.c @@ -14,9 +14,12 @@ #include "./dsp.h" #if defined(WEBP_USE_SSE2) + +#include "./common_sse2.h" +#include "./lossless.h" +#include "./lossless_common.h" #include <assert.h> #include <emmintrin.h> -#include "./lossless.h" //------------------------------------------------------------------------------ // Predictor Transform @@ -75,25 +78,44 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { return (pa_minus_pb <= 0) ? a : b; } -static WEBP_INLINE __m128i Average2_128i(uint32_t a0, uint32_t a1) { +static WEBP_INLINE void Average2_m128i(const __m128i* const a0, + const __m128i* const a1, + __m128i* const avg) { + // (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1) + const __m128i ones = _mm_set1_epi8(1); + const __m128i avg1 = _mm_avg_epu8(*a0, *a1); + const __m128i one = _mm_and_si128(_mm_xor_si128(*a0, *a1), ones); + *avg = _mm_sub_epi8(avg1, one); +} + +static WEBP_INLINE void Average2_uint32(const uint32_t a0, const uint32_t a1, + __m128i* const avg) { + // (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1) + const __m128i ones = _mm_set1_epi8(1); + const __m128i A0 = _mm_cvtsi32_si128(a0); + const __m128i A1 = _mm_cvtsi32_si128(a1); + const __m128i avg1 = _mm_avg_epu8(A0, A1); + const __m128i one = _mm_and_si128(_mm_xor_si128(A0, A1), ones); + *avg = _mm_sub_epi8(avg1, one); +} + +static WEBP_INLINE __m128i Average2_uint32_16(uint32_t a0, uint32_t a1) { const __m128i zero = _mm_setzero_si128(); const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero); const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero); const __m128i sum = _mm_add_epi16(A1, A0); - const __m128i avg = _mm_srli_epi16(sum, 1); - return avg; + return _mm_srli_epi16(sum, 1); } static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { - const __m128i avg = Average2_128i(a0, a1); - const __m128i A2 = _mm_packus_epi16(avg, avg); - const uint32_t output = _mm_cvtsi128_si32(A2); - return output; + __m128i output; + Average2_uint32(a0, a1, &output); + return _mm_cvtsi128_si32(output); } static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { const __m128i zero = _mm_setzero_si128(); - const __m128i avg1 = Average2_128i(a0, a2); + const __m128i avg1 = Average2_uint32_16(a0, a2); const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero); const __m128i sum = _mm_add_epi16(avg1, A1); const __m128i avg2 = _mm_srli_epi16(sum, 1); @@ -104,8 +126,8 @@ static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, uint32_t a2, uint32_t a3) { - const __m128i avg1 = Average2_128i(a0, a1); - const __m128i avg2 = Average2_128i(a2, a3); + const __m128i avg1 = Average2_uint32_16(a0, a1); + const __m128i avg2 = Average2_uint32_16(a2, a3); const __m128i sum = _mm_add_epi16(avg2, avg1); const __m128i avg3 = _mm_srli_epi16(sum, 1); const __m128i A0 = _mm_packus_epi16(avg3, avg3); @@ -113,68 +135,289 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, return output; } -static uint32_t Predictor5(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average3(left, top[0], top[1]); return pred; } -static uint32_t Predictor6(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(left, top[-1]); return pred; } -static uint32_t Predictor7(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(left, top[0]); return pred; } -static uint32_t Predictor8(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(top[-1], top[0]); (void)left; return pred; } -static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(top[0], top[1]); (void)left; return pred; } -static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average4(left, top[-1], top[0], top[1]); return pred; } -static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) { const uint32_t pred = Select(top[0], left, top[-1]); return pred; } -static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) { const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); return pred; } -static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) { const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); return pred; } +// Batch versions of those functions. + +// Predictor0: ARGB_BLACK. +static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper, + int num_pixels, uint32_t* out) { + int i; + const __m128i black = _mm_set1_epi32(ARGB_BLACK); + for (i = 0; i + 4 <= num_pixels; i += 4) { + const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); + const __m128i res = _mm_add_epi8(src, black); + _mm_storeu_si128((__m128i*)&out[i], res); + } + if (i != num_pixels) { + VP8LPredictorsAdd_C[0](in + i, upper + i, num_pixels - i, out + i); + } +} + +// Predictor1: left. +static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper, + int num_pixels, uint32_t* out) { + int i; + __m128i prev = _mm_set1_epi32(out[-1]); + for (i = 0; i + 4 <= num_pixels; i += 4) { + // a | b | c | d + const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); + // 0 | a | b | c + const __m128i shift0 = _mm_slli_si128(src, 4); + // a | a + b | b + c | c + d + const __m128i sum0 = _mm_add_epi8(src, shift0); + // 0 | 0 | a | a + b + const __m128i shift1 = _mm_slli_si128(sum0, 8); + // a | a + b | a + b + c | a + b + c + d + const __m128i sum1 = _mm_add_epi8(sum0, shift1); + const __m128i res = _mm_add_epi8(sum1, prev); + _mm_storeu_si128((__m128i*)&out[i], res); + // replicate prev output on the four lanes + prev = _mm_shuffle_epi32(res, (3 << 0) | (3 << 2) | (3 << 4) | (3 << 6)); + } + if (i != num_pixels) { + VP8LPredictorsAdd_C[1](in + i, upper + i, num_pixels - i, out + i); + } +} + +// Macro that adds 32-bit integers from IN using mod 256 arithmetic +// per 8 bit channel. +#define GENERATE_PREDICTOR_1(X, IN) \ +static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \ + int num_pixels, uint32_t* out) { \ + int i; \ + for (i = 0; i + 4 <= num_pixels; i += 4) { \ + const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \ + const __m128i other = _mm_loadu_si128((const __m128i*)&(IN)); \ + const __m128i res = _mm_add_epi8(src, other); \ + _mm_storeu_si128((__m128i*)&out[i], res); \ + } \ + if (i != num_pixels) { \ + VP8LPredictorsAdd_C[(X)](in + i, upper + i, num_pixels - i, out + i); \ + } \ +} + +// Predictor2: Top. +GENERATE_PREDICTOR_1(2, upper[i]) +// Predictor3: Top-right. +GENERATE_PREDICTOR_1(3, upper[i + 1]) +// Predictor4: Top-left. +GENERATE_PREDICTOR_1(4, upper[i - 1]) +#undef GENERATE_PREDICTOR_1 + +// Due to averages with integers, values cannot be accumulated in parallel for +// predictors 5 to 7. +GENERATE_PREDICTOR_ADD(Predictor5_SSE2, PredictorAdd5_SSE2) +GENERATE_PREDICTOR_ADD(Predictor6_SSE2, PredictorAdd6_SSE2) +GENERATE_PREDICTOR_ADD(Predictor7_SSE2, PredictorAdd7_SSE2) + +#define GENERATE_PREDICTOR_2(X, IN) \ +static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \ + int num_pixels, uint32_t* out) { \ + int i; \ + for (i = 0; i + 4 <= num_pixels; i += 4) { \ + const __m128i Tother = _mm_loadu_si128((const __m128i*)&(IN)); \ + const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]); \ + const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \ + __m128i avg, res; \ + Average2_m128i(&T, &Tother, &avg); \ + res = _mm_add_epi8(avg, src); \ + _mm_storeu_si128((__m128i*)&out[i], res); \ + } \ + if (i != num_pixels) { \ + VP8LPredictorsAdd_C[(X)](in + i, upper + i, num_pixels - i, out + i); \ + } \ +} +// Predictor8: average TL T. +GENERATE_PREDICTOR_2(8, upper[i - 1]) +// Predictor9: average T TR. +GENERATE_PREDICTOR_2(9, upper[i + 1]) +#undef GENERATE_PREDICTOR_2 + +// Predictor10: average of (average of (L,TL), average of (T, TR)). +static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper, + int num_pixels, uint32_t* out) { + int i, j; + __m128i L = _mm_cvtsi32_si128(out[-1]); + for (i = 0; i + 4 <= num_pixels; i += 4) { + __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); + __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]); + const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]); + const __m128i TR = _mm_loadu_si128((const __m128i*)&upper[i + 1]); + __m128i avgTTR; + Average2_m128i(&T, &TR, &avgTTR); + for (j = 0; j < 4; ++j) { + __m128i avgLTL, avg; + Average2_m128i(&L, &TL, &avgLTL); + Average2_m128i(&avgTTR, &avgLTL, &avg); + L = _mm_add_epi8(avg, src); + out[i + j] = _mm_cvtsi128_si32(L); + // Rotate the pre-computed values for the next iteration. + avgTTR = _mm_srli_si128(avgTTR, 4); + TL = _mm_srli_si128(TL, 4); + src = _mm_srli_si128(src, 4); + } + } + if (i != num_pixels) { + VP8LPredictorsAdd_C[10](in + i, upper + i, num_pixels - i, out + i); + } +} + +// Predictor11: select. +static void GetSumAbsDiff32(const __m128i* const A, const __m128i* const B, + __m128i* const out) { + // We can unpack with any value on the upper 32 bits, provided it's the same + // on both operands (to that their sum of abs diff is zero). Here we use *A. + const __m128i A_lo = _mm_unpacklo_epi32(*A, *A); + const __m128i B_lo = _mm_unpacklo_epi32(*B, *A); + const __m128i A_hi = _mm_unpackhi_epi32(*A, *A); + const __m128i B_hi = _mm_unpackhi_epi32(*B, *A); + const __m128i s_lo = _mm_sad_epu8(A_lo, B_lo); + const __m128i s_hi = _mm_sad_epu8(A_hi, B_hi); + *out = _mm_packs_epi32(s_lo, s_hi); +} + +static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper, + int num_pixels, uint32_t* out) { + int i, j; + __m128i L = _mm_cvtsi32_si128(out[-1]); + for (i = 0; i + 4 <= num_pixels; i += 4) { + __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]); + __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]); + __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); + __m128i pa; + GetSumAbsDiff32(&T, &TL, &pa); // pa = sum |T-TL| + for (j = 0; j < 4; ++j) { + const __m128i L_lo = _mm_unpacklo_epi32(L, L); + const __m128i TL_lo = _mm_unpacklo_epi32(TL, L); + const __m128i pb = _mm_sad_epu8(L_lo, TL_lo); // pb = sum |L-TL| + const __m128i mask = _mm_cmpgt_epi32(pb, pa); + const __m128i A = _mm_and_si128(mask, L); + const __m128i B = _mm_andnot_si128(mask, T); + const __m128i pred = _mm_or_si128(A, B); // pred = (L > T)? L : T + L = _mm_add_epi8(src, pred); + out[i + j] = _mm_cvtsi128_si32(L); + // Shift the pre-computed value for the next iteration. + T = _mm_srli_si128(T, 4); + TL = _mm_srli_si128(TL, 4); + src = _mm_srli_si128(src, 4); + pa = _mm_srli_si128(pa, 4); + } + } + if (i != num_pixels) { + VP8LPredictorsAdd_C[11](in + i, upper + i, num_pixels - i, out + i); + } +} + +// Predictor12: ClampedAddSubtractFull. +#define DO_PRED12(DIFF, LANE, OUT) \ +do { \ + const __m128i all = _mm_add_epi16(L, (DIFF)); \ + const __m128i alls = _mm_packus_epi16(all, all); \ + const __m128i res = _mm_add_epi8(src, alls); \ + out[i + (OUT)] = _mm_cvtsi128_si32(res); \ + L = _mm_unpacklo_epi8(res, zero); \ + /* Shift the pre-computed value for the next iteration.*/ \ + if (LANE == 0) (DIFF) = _mm_srli_si128((DIFF), 8); \ + src = _mm_srli_si128(src, 4); \ +} while (0) + +static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper, + int num_pixels, uint32_t* out) { + int i; + const __m128i zero = _mm_setzero_si128(); + const __m128i L8 = _mm_cvtsi32_si128(out[-1]); + __m128i L = _mm_unpacklo_epi8(L8, zero); + for (i = 0; i + 4 <= num_pixels; i += 4) { + // Load 4 pixels at a time. + __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); + const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]); + const __m128i T_lo = _mm_unpacklo_epi8(T, zero); + const __m128i T_hi = _mm_unpackhi_epi8(T, zero); + const __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]); + const __m128i TL_lo = _mm_unpacklo_epi8(TL, zero); + const __m128i TL_hi = _mm_unpackhi_epi8(TL, zero); + __m128i diff_lo = _mm_sub_epi16(T_lo, TL_lo); + __m128i diff_hi = _mm_sub_epi16(T_hi, TL_hi); + DO_PRED12(diff_lo, 0, 0); + DO_PRED12(diff_lo, 1, 1); + DO_PRED12(diff_hi, 0, 2); + DO_PRED12(diff_hi, 1, 3); + } + if (i != num_pixels) { + VP8LPredictorsAdd_C[12](in + i, upper + i, num_pixels - i, out + i); + } +} +#undef DO_PRED12 + +// Due to averages with integers, values cannot be accumulated in parallel for +// predictors 13. +GENERATE_PREDICTOR_ADD(Predictor13_SSE2, PredictorAdd13_SSE2) + //------------------------------------------------------------------------------ // Subtract-Green Transform -static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) { +static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels, + uint32_t* dst) { int i; for (i = 0; i + 4 <= num_pixels; i += 4) { - const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb + const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb const __m128i A = _mm_srli_epi16(in, 8); // 0 a 0 g const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0)); const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0)); // 0g0g const __m128i out = _mm_add_epi8(in, C); - _mm_storeu_si128((__m128i*)&argb_data[i], out); + _mm_storeu_si128((__m128i*)&dst[i], out); } // fallthrough and finish off with plain-C - VP8LAddGreenToBlueAndRed_C(argb_data + i, num_pixels - i); + if (i != num_pixels) { + VP8LAddGreenToBlueAndRed_C(src + i, num_pixels - i, dst + i); + } } //------------------------------------------------------------------------------ // Color Transform static void TransformColorInverse(const VP8LMultipliers* const m, - uint32_t* argb_data, int num_pixels) { - // sign-extended multiplying constants, pre-shifted by 5. + const uint32_t* const src, int num_pixels, + uint32_t* dst) { +// sign-extended multiplying constants, pre-shifted by 5. #define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend const __m128i mults_rb = _mm_set_epi16( CST(green_to_red_), CST(green_to_blue_), @@ -188,7 +431,7 @@ static void TransformColorInverse(const VP8LMultipliers* const m, const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks int i; for (i = 0; i + 4 <= num_pixels; i += 4) { - const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb + const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb const __m128i A = _mm_and_si128(in, mask_ag); // a 0 g 0 const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0)); const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0)); // g0g0 @@ -200,15 +443,53 @@ static void TransformColorInverse(const VP8LMultipliers* const m, const __m128i I = _mm_add_epi8(H, F); // r' x b'' 0 const __m128i J = _mm_srli_epi16(I, 8); // 0 r' 0 b'' const __m128i out = _mm_or_si128(J, A); - _mm_storeu_si128((__m128i*)&argb_data[i], out); + _mm_storeu_si128((__m128i*)&dst[i], out); } // Fall-back to C-version for left-overs. - VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i); + if (i != num_pixels) { + VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i); + } } //------------------------------------------------------------------------------ // Color-space conversion functions +static void ConvertBGRAToRGB(const uint32_t* src, int num_pixels, + uint8_t* dst) { + const __m128i* in = (const __m128i*)src; + __m128i* out = (__m128i*)dst; + + while (num_pixels >= 32) { + // Load the BGRA buffers. + __m128i in0 = _mm_loadu_si128(in + 0); + __m128i in1 = _mm_loadu_si128(in + 1); + __m128i in2 = _mm_loadu_si128(in + 2); + __m128i in3 = _mm_loadu_si128(in + 3); + __m128i in4 = _mm_loadu_si128(in + 4); + __m128i in5 = _mm_loadu_si128(in + 5); + __m128i in6 = _mm_loadu_si128(in + 6); + __m128i in7 = _mm_loadu_si128(in + 7); + VP8L32bToPlanar(&in0, &in1, &in2, &in3); + VP8L32bToPlanar(&in4, &in5, &in6, &in7); + // At this points, in1/in5 contains red only, in2/in6 green only ... + // Pack the colors in 24b RGB. + VP8PlanarTo24b(&in1, &in5, &in2, &in6, &in3, &in7); + _mm_storeu_si128(out + 0, in1); + _mm_storeu_si128(out + 1, in5); + _mm_storeu_si128(out + 2, in2); + _mm_storeu_si128(out + 3, in6); + _mm_storeu_si128(out + 4, in3); + _mm_storeu_si128(out + 5, in7); + in += 8; + out += 6; + num_pixels -= 32; + } + // left-overs + if (num_pixels > 0) { + VP8LConvertBGRAToRGB_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + } +} + static void ConvertBGRAToRGBA(const uint32_t* src, int num_pixels, uint8_t* dst) { const __m128i* in = (const __m128i*)src; @@ -233,7 +514,9 @@ static void ConvertBGRAToRGBA(const uint32_t* src, num_pixels -= 8; } // left-overs - VP8LConvertBGRAToRGBA_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + if (num_pixels > 0) { + VP8LConvertBGRAToRGBA_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + } } static void ConvertBGRAToRGBA4444(const uint32_t* src, @@ -267,7 +550,9 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src, num_pixels -= 8; } // left-overs - VP8LConvertBGRAToRGBA4444_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + if (num_pixels > 0) { + VP8LConvertBGRAToRGBA4444_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + } } static void ConvertBGRAToRGB565(const uint32_t* src, @@ -306,7 +591,9 @@ static void ConvertBGRAToRGB565(const uint32_t* src, num_pixels -= 8; } // left-overs - VP8LConvertBGRAToRGB565_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + if (num_pixels > 0) { + VP8LConvertBGRAToRGB565_C((const uint32_t*)in, num_pixels, (uint8_t*)out); + } } static void ConvertBGRAToBGR(const uint32_t* src, @@ -337,7 +624,9 @@ static void ConvertBGRAToBGR(const uint32_t* src, num_pixels -= 8; } // left-overs - VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, dst); + if (num_pixels > 0) { + VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, dst); + } } //------------------------------------------------------------------------------ @@ -346,19 +635,35 @@ static void ConvertBGRAToBGR(const uint32_t* src, extern void VP8LDspInitSSE2(void); WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE2(void) { - VP8LPredictors[5] = Predictor5; - VP8LPredictors[6] = Predictor6; - VP8LPredictors[7] = Predictor7; - VP8LPredictors[8] = Predictor8; - VP8LPredictors[9] = Predictor9; - VP8LPredictors[10] = Predictor10; - VP8LPredictors[11] = Predictor11; - VP8LPredictors[12] = Predictor12; - VP8LPredictors[13] = Predictor13; + VP8LPredictors[5] = Predictor5_SSE2; + VP8LPredictors[6] = Predictor6_SSE2; + VP8LPredictors[7] = Predictor7_SSE2; + VP8LPredictors[8] = Predictor8_SSE2; + VP8LPredictors[9] = Predictor9_SSE2; + VP8LPredictors[10] = Predictor10_SSE2; + VP8LPredictors[11] = Predictor11_SSE2; + VP8LPredictors[12] = Predictor12_SSE2; + VP8LPredictors[13] = Predictor13_SSE2; + + VP8LPredictorsAdd[0] = PredictorAdd0_SSE2; + VP8LPredictorsAdd[1] = PredictorAdd1_SSE2; + VP8LPredictorsAdd[2] = PredictorAdd2_SSE2; + VP8LPredictorsAdd[3] = PredictorAdd3_SSE2; + VP8LPredictorsAdd[4] = PredictorAdd4_SSE2; + VP8LPredictorsAdd[5] = PredictorAdd5_SSE2; + VP8LPredictorsAdd[6] = PredictorAdd6_SSE2; + VP8LPredictorsAdd[7] = PredictorAdd7_SSE2; + VP8LPredictorsAdd[8] = PredictorAdd8_SSE2; + VP8LPredictorsAdd[9] = PredictorAdd9_SSE2; + VP8LPredictorsAdd[10] = PredictorAdd10_SSE2; + VP8LPredictorsAdd[11] = PredictorAdd11_SSE2; + VP8LPredictorsAdd[12] = PredictorAdd12_SSE2; + VP8LPredictorsAdd[13] = PredictorAdd13_SSE2; VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed; VP8LTransformColorInverse = TransformColorInverse; + VP8LConvertBGRAToRGB = ConvertBGRAToRGB; VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA; VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444; VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565; |