summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
diff options
context:
space:
mode:
authorLiang Qi <liang.qi@qt.io>2018-05-14 13:07:15 +0200
committerLiang Qi <liang.qi@qt.io>2018-05-22 08:03:36 +0000
commit62082a63e112e9991b33c2045896ced78ffcb62e (patch)
tree04a66f057499c90be0a8abfe8b0375886c6f25df /src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
parent92398950d9cfe5a88cb685ec166eb413aa8613ec (diff)
Update bundled libwebp to version 1.0.0
This commit imports libwebp 1.0.0, including AUTHORS, COPYING, ChangeLog, NEWS, PATENTS, README and src directories. In src, only includes header and source files. Upstream changes since 0.6.1 have been merged in. Also updated version in qt_attribution.json. [ChangeLog][Third-Party Code] Update bundled libwebp to version 1.0.0. Change-Id: Ia30ccc90286d5dd3e48e091f101f1cae84785150 Reviewed-by: Kai Koehne <kai.koehne@qt.io> Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c')
-rw-r--r--src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c94
1 files changed, 94 insertions, 0 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c b/src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
index 3526a34..2e12a71 100644
--- a/src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
+++ b/src/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
@@ -18,6 +18,9 @@
#include <smmintrin.h>
#include "src/dsp/lossless.h"
+// For sign-extended multiplying constants, pre-shifted by 5:
+#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
+
//------------------------------------------------------------------------------
// Subtract-Green Transform
@@ -39,12 +42,103 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
}
//------------------------------------------------------------------------------
+// Color Transform
+
+#define SPAN 8
+static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
+ int tile_width, int tile_height,
+ int green_to_blue, int red_to_blue,
+ int histo[]) {
+ const __m128i mults_r = _mm_set1_epi16(CST_5b(red_to_blue));
+ const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_blue));
+ const __m128i mask_g = _mm_set1_epi16(0xff00); // green mask
+ const __m128i mask_gb = _mm_set1_epi32(0xffff); // green/blue mask
+ const __m128i mask_b = _mm_set1_epi16(0x00ff); // blue mask
+ const __m128i shuffler_lo = _mm_setr_epi8(-1, 2, -1, 6, -1, 10, -1, 14, -1,
+ -1, -1, -1, -1, -1, -1, -1);
+ const __m128i shuffler_hi = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 2, -1, 6, -1, 10, -1, 14);
+ int y;
+ for (y = 0; y < tile_height; ++y) {
+ const uint32_t* const src = argb + y * stride;
+ int i, x;
+ for (x = 0; x + SPAN <= tile_width; x += SPAN) {
+ uint16_t values[SPAN];
+ const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]);
+ const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
+ const __m128i r0 = _mm_shuffle_epi8(in0, shuffler_lo);
+ const __m128i r1 = _mm_shuffle_epi8(in1, shuffler_hi);
+ const __m128i r = _mm_or_si128(r0, r1); // r 0
+ const __m128i gb0 = _mm_and_si128(in0, mask_gb);
+ const __m128i gb1 = _mm_and_si128(in1, mask_gb);
+ const __m128i gb = _mm_packus_epi32(gb0, gb1); // g b
+ const __m128i g = _mm_and_si128(gb, mask_g); // g 0
+ const __m128i A = _mm_mulhi_epi16(r, mults_r); // x dbr
+ const __m128i B = _mm_mulhi_epi16(g, mults_g); // x dbg
+ const __m128i C = _mm_sub_epi8(gb, B); // x b'
+ const __m128i D = _mm_sub_epi8(C, A); // x b''
+ const __m128i E = _mm_and_si128(D, mask_b); // 0 b''
+ _mm_storeu_si128((__m128i*)values, E);
+ for (i = 0; i < SPAN; ++i) ++histo[values[i]];
+ }
+ }
+ {
+ const int left_over = tile_width & (SPAN - 1);
+ if (left_over > 0) {
+ VP8LCollectColorBlueTransforms_C(argb + tile_width - left_over, stride,
+ left_over, tile_height,
+ green_to_blue, red_to_blue, histo);
+ }
+ }
+}
+
+static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
+ int tile_width, int tile_height,
+ int green_to_red, int histo[]) {
+ const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_red));
+ const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
+ const __m128i mask = _mm_set1_epi16(0xff);
+
+ int y;
+ for (y = 0; y < tile_height; ++y) {
+ const uint32_t* const src = argb + y * stride;
+ int i, x;
+ for (x = 0; x + SPAN <= tile_width; x += SPAN) {
+ uint16_t values[SPAN];
+ const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]);
+ const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
+ const __m128i g0 = _mm_and_si128(in0, mask_g); // 0 0 | g 0
+ const __m128i g1 = _mm_and_si128(in1, mask_g);
+ const __m128i g = _mm_packus_epi32(g0, g1); // g 0
+ const __m128i A0 = _mm_srli_epi32(in0, 16); // 0 0 | x r
+ const __m128i A1 = _mm_srli_epi32(in1, 16);
+ const __m128i A = _mm_packus_epi32(A0, A1); // x r
+ const __m128i B = _mm_mulhi_epi16(g, mults_g); // x dr
+ const __m128i C = _mm_sub_epi8(A, B); // x r'
+ const __m128i D = _mm_and_si128(C, mask); // 0 r'
+ _mm_storeu_si128((__m128i*)values, D);
+ for (i = 0; i < SPAN; ++i) ++histo[values[i]];
+ }
+ }
+ {
+ const int left_over = tile_width & (SPAN - 1);
+ if (left_over > 0) {
+ VP8LCollectColorRedTransforms_C(argb + tile_width - left_over, stride,
+ left_over, tile_height, green_to_red,
+ histo);
+ }
+ }
+}
+
+//------------------------------------------------------------------------------
// Entry point
extern void VP8LEncDspInitSSE41(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
+ VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41;
+ VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41;
}
#else // !WEBP_USE_SSE41