summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/libwebp/src/dsp/dec_sse41.c
blob: 4e81ec4d804df39f3e478bbf6b9c81d94a4c344d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// SSE4 version of some decoding functions.
//
// Author: Skal (pascal.massimino@gmail.com)

#include "./dsp.h"

#if defined(WEBP_USE_SSE41)

#include <smmintrin.h>
#include "../dec/vp8i_dec.h"
#include "../utils/utils.h"

static void HE16(uint8_t* dst) {     // horizontal
  int j;
  const __m128i kShuffle3 = _mm_set1_epi8(3);
  for (j = 16; j > 0; --j) {
    const __m128i in = _mm_cvtsi32_si128(WebPMemToUint32(dst - 4));
    const __m128i values = _mm_shuffle_epi8(in, kShuffle3);
    _mm_storeu_si128((__m128i*)dst, values);
    dst += BPS;
  }
}

//------------------------------------------------------------------------------
// Entry point

extern void VP8DspInitSSE41(void);

WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
  VP8PredLuma16[3] = HE16;
}

#else  // !WEBP_USE_SSE41

WEBP_DSP_INIT_STUB(VP8DspInitSSE41)

#endif  // WEBP_USE_SSE41