diff options
Diffstat (limited to 'src/3rdparty/libwebp/src/dsp/enc_neon.c')
-rw-r--r-- | src/3rdparty/libwebp/src/dsp/enc_neon.c | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/src/3rdparty/libwebp/src/dsp/enc_neon.c b/src/3rdparty/libwebp/src/dsp/enc_neon.c index 43bf124..3a04111 100644 --- a/src/3rdparty/libwebp/src/dsp/enc_neon.c +++ b/src/3rdparty/libwebp/src/dsp/enc_neon.c @@ -9,7 +9,7 @@ // // ARM NEON version of speed-critical encoding functions. // -// adapted from libvpx (http://www.webmproject.org/code/) +// adapted from libvpx (https://www.webmproject.org/code/) #include "src/dsp/dsp.h" @@ -764,9 +764,14 @@ static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a, // Horizontal sum of all four uint32_t values in 'sum'. static int SumToInt_NEON(uint32x4_t sum) { +#if defined(__aarch64__) + return (int)vaddvq_u32(sum); +#else const uint64x2_t sum2 = vpaddlq_u32(sum); - const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1); - return (int)sum3; + const uint32x2_t sum3 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(sum2)), + vreinterpret_u32_u64(vget_high_u64(sum2))); + return (int)vget_lane_u32(sum3, 0); +#endif } static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) { |