diff options
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/rescaler_sse2.c')
-rw-r--r-- | src/3rdparty/libwebp/src/dsp/rescaler_sse2.c | 92 |
1 files changed, 46 insertions, 46 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/rescaler_sse2.c b/src/3rdparty/libwebp/src/dsp/rescaler_sse2.c index 8271c22..f93b204 100644 --- a/src/3rdparty/libwebp/src/dsp/rescaler_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/rescaler_sse2.c @@ -11,14 +11,14 @@ // // Author: Skal (pascal.massimino@gmail.com) -#include "./dsp.h" +#include "src/dsp/dsp.h" -#if defined(WEBP_USE_SSE2) +#if defined(WEBP_USE_SSE2) && !defined(WEBP_REDUCE_SIZE) #include <emmintrin.h> #include <assert.h> -#include "../utils/rescaler_utils.h" -#include "../utils/utils.h" +#include "src/utils/rescaler_utils.h" +#include "src/utils/utils.h" //------------------------------------------------------------------------------ // Implementations of critical functions ImportRow / ExportRow @@ -27,7 +27,7 @@ #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX) // input: 8 bytes ABCDEFGH -> output: A0E0B0F0C0G0D0H0 -static void LoadTwoPixels(const uint8_t* const src, __m128i* out) { +static void LoadTwoPixels_SSE2(const uint8_t* const src, __m128i* out) { const __m128i zero = _mm_setzero_si128(); const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH const __m128i B = _mm_unpacklo_epi8(A, zero); // A0B0C0D0E0F0G0H0 @@ -36,14 +36,14 @@ static void LoadTwoPixels(const uint8_t* const src, __m128i* out) { } // input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0 -static void LoadHeightPixels(const uint8_t* const src, __m128i* out) { +static void LoadHeightPixels_SSE2(const uint8_t* const src, __m128i* out) { const __m128i zero = _mm_setzero_si128(); const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH *out = _mm_unpacklo_epi8(A, zero); } -static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk, - const uint8_t* src) { +static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, + const uint8_t* src) { rescaler_t* frow = wrk->frow; const rescaler_t* const frow_end = frow + wrk->dst_width * wrk->num_channels; const int x_add = wrk->x_add; @@ -54,10 +54,10 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk, assert(wrk->x_expand); if (wrk->num_channels == 4) { if (wrk->src_width < 2) { - WebPRescalerImportRowExpandC(wrk, src); + WebPRescalerImportRowExpand_C(wrk, src); return; } - LoadTwoPixels(src, &cur_pixels); + LoadTwoPixels_SSE2(src, &cur_pixels); src += 4; while (1) { const __m128i mult = _mm_set1_epi32(((x_add - accum) << 16) | accum); @@ -67,7 +67,7 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk, if (frow >= frow_end) break; accum -= wrk->x_sub; if (accum < 0) { - LoadTwoPixels(src, &cur_pixels); + LoadTwoPixels_SSE2(src, &cur_pixels); src += 4; accum += x_add; } @@ -76,10 +76,10 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk, int left; const uint8_t* const src_limit = src + wrk->src_width - 8; if (wrk->src_width < 8) { - WebPRescalerImportRowExpandC(wrk, src); + WebPRescalerImportRowExpand_C(wrk, src); return; } - LoadHeightPixels(src, &cur_pixels); + LoadHeightPixels_SSE2(src, &cur_pixels); src += 7; left = 7; while (1) { @@ -94,7 +94,7 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk, if (--left) { cur_pixels = _mm_srli_si128(cur_pixels, 2); } else if (src <= src_limit) { - LoadHeightPixels(src, &cur_pixels); + LoadHeightPixels_SSE2(src, &cur_pixels); src += 7; left = 7; } else { // tail @@ -110,8 +110,8 @@ static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk, assert(accum == 0); } -static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk, - const uint8_t* src) { +static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk, + const uint8_t* src) { const int x_sub = wrk->x_sub; int accum = 0; const __m128i zero = _mm_setzero_si128(); @@ -123,7 +123,7 @@ static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk, const rescaler_t* const frow_end = wrk->frow + 4 * wrk->dst_width; if (wrk->num_channels != 4 || wrk->x_add > (x_sub << 7)) { - WebPRescalerImportRowShrinkC(wrk, src); + WebPRescalerImportRowShrink_C(wrk, src); return; } assert(!WebPRescalerInputDone(wrk)); @@ -169,12 +169,12 @@ static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk, // Row export // load *src as epi64, multiply by mult and store result in [out0 ... out3] -static WEBP_INLINE void LoadDispatchAndMult(const rescaler_t* const src, - const __m128i* const mult, - __m128i* const out0, - __m128i* const out1, - __m128i* const out2, - __m128i* const out3) { +static WEBP_INLINE void LoadDispatchAndMult_SSE2(const rescaler_t* const src, + const __m128i* const mult, + __m128i* const out0, + __m128i* const out1, + __m128i* const out2, + __m128i* const out3) { const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 0)); const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4)); const __m128i A2 = _mm_srli_epi64(A0, 32); @@ -192,12 +192,12 @@ static WEBP_INLINE void LoadDispatchAndMult(const rescaler_t* const src, } } -static WEBP_INLINE void ProcessRow(const __m128i* const A0, - const __m128i* const A1, - const __m128i* const A2, - const __m128i* const A3, - const __m128i* const mult, - uint8_t* const dst) { +static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0, + const __m128i* const A1, + const __m128i* const A2, + const __m128i* const A3, + const __m128i* const mult, + uint8_t* const dst) { const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER); const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0); const __m128i B0 = _mm_mul_epu32(*A0, *mult); @@ -210,7 +210,7 @@ static WEBP_INLINE void ProcessRow(const __m128i* const A0, const __m128i C3 = _mm_add_epi64(B3, rounder); const __m128i D0 = _mm_srli_epi64(C0, WEBP_RESCALER_RFIX); const __m128i D1 = _mm_srli_epi64(C1, WEBP_RESCALER_RFIX); -#if (WEBP_RESCALER_FIX < 32) +#if (WEBP_RESCALER_RFIX < 32) const __m128i D2 = _mm_and_si128(_mm_slli_epi64(C2, 32 - WEBP_RESCALER_RFIX), mask); const __m128i D3 = @@ -226,7 +226,7 @@ static WEBP_INLINE void ProcessRow(const __m128i* const A0, _mm_storel_epi64((__m128i*)dst, G); } -static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) { +static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) { int x_out; uint8_t* const dst = wrk->dst; rescaler_t* const irow = wrk->irow; @@ -240,8 +240,8 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) { if (wrk->y_accum == 0) { for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) { __m128i A0, A1, A2, A3; - LoadDispatchAndMult(frow + x_out, NULL, &A0, &A1, &A2, &A3); - ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out); + LoadDispatchAndMult_SSE2(frow + x_out, NULL, &A0, &A1, &A2, &A3); + ProcessRow_SSE2(&A0, &A1, &A2, &A3, &mult, dst + x_out); } for (; x_out < x_out_max; ++x_out) { const uint32_t J = frow[x_out]; @@ -257,8 +257,8 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) { const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER); for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) { __m128i A0, A1, A2, A3, B0, B1, B2, B3; - LoadDispatchAndMult(frow + x_out, &mA, &A0, &A1, &A2, &A3); - LoadDispatchAndMult(irow + x_out, &mB, &B0, &B1, &B2, &B3); + LoadDispatchAndMult_SSE2(frow + x_out, &mA, &A0, &A1, &A2, &A3); + LoadDispatchAndMult_SSE2(irow + x_out, &mB, &B0, &B1, &B2, &B3); { const __m128i C0 = _mm_add_epi64(A0, B0); const __m128i C1 = _mm_add_epi64(A1, B1); @@ -272,7 +272,7 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) { const __m128i E1 = _mm_srli_epi64(D1, WEBP_RESCALER_RFIX); const __m128i E2 = _mm_srli_epi64(D2, WEBP_RESCALER_RFIX); const __m128i E3 = _mm_srli_epi64(D3, WEBP_RESCALER_RFIX); - ProcessRow(&E0, &E1, &E2, &E3, &mult, dst + x_out); + ProcessRow_SSE2(&E0, &E1, &E2, &E3, &mult, dst + x_out); } } for (; x_out < x_out_max; ++x_out) { @@ -286,7 +286,7 @@ static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) { } } -static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) { +static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) { int x_out; uint8_t* const dst = wrk->dst; rescaler_t* const irow = wrk->irow; @@ -303,8 +303,8 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) { const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER); for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) { __m128i A0, A1, A2, A3, B0, B1, B2, B3; - LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3); - LoadDispatchAndMult(frow + x_out, &mult_y, &B0, &B1, &B2, &B3); + LoadDispatchAndMult_SSE2(irow + x_out, NULL, &A0, &A1, &A2, &A3); + LoadDispatchAndMult_SSE2(frow + x_out, &mult_y, &B0, &B1, &B2, &B3); { const __m128i C0 = _mm_add_epi64(B0, rounder); const __m128i C1 = _mm_add_epi64(B1, rounder); @@ -324,7 +324,7 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) { const __m128i G1 = _mm_or_si128(D1, F3); _mm_storeu_si128((__m128i*)(irow + x_out + 0), G0); _mm_storeu_si128((__m128i*)(irow + x_out + 4), G1); - ProcessRow(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out); + ProcessRow_SSE2(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out); } } for (; x_out < x_out_max; ++x_out) { @@ -340,10 +340,10 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) { const __m128i zero = _mm_setzero_si128(); for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) { __m128i A0, A1, A2, A3; - LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3); + LoadDispatchAndMult_SSE2(irow + x_out, NULL, &A0, &A1, &A2, &A3); _mm_storeu_si128((__m128i*)(irow + x_out + 0), zero); _mm_storeu_si128((__m128i*)(irow + x_out + 4), zero); - ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out); + ProcessRow_SSE2(&A0, &A1, &A2, &A3, &mult, dst + x_out); } for (; x_out < x_out_max; ++x_out) { const int v = (int)MULT_FIX(irow[x_out], scale); @@ -362,10 +362,10 @@ static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) { extern void WebPRescalerDspInitSSE2(void); WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitSSE2(void) { - WebPRescalerImportRowExpand = RescalerImportRowExpandSSE2; - WebPRescalerImportRowShrink = RescalerImportRowShrinkSSE2; - WebPRescalerExportRowExpand = RescalerExportRowExpandSSE2; - WebPRescalerExportRowShrink = RescalerExportRowShrinkSSE2; + WebPRescalerImportRowExpand = RescalerImportRowExpand_SSE2; + WebPRescalerImportRowShrink = RescalerImportRowShrink_SSE2; + WebPRescalerExportRowExpand = RescalerExportRowExpand_SSE2; + WebPRescalerExportRowShrink = RescalerExportRowShrink_SSE2; } #else // !WEBP_USE_SSE2 |