diff options
author | Yael Tsafrir <yael.tsafrir@intel.com> | 2017-09-12 07:46:32 +0000 |
---|---|---|
committer | Yael Tsafrir <yael.tsafrir@intel.com> | 2017-09-12 07:46:32 +0000 |
commit | 2d3f7520fc0be56b082353cd044edbaff0e8cd25 (patch) | |
tree | 70d89d266497616de0706f00aca1b9f228d62928 /lib/Headers | |
parent | 1ec89b70e886861634087e0b30cf3c90daaf74e6 (diff) |
[X86] Lower _mm[256|512]_[mask[z]]_avg_epu[8|16] intrinsics to native llvm IR
Differential Revision: https://reviews.llvm.org/D37562
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@313011 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers')
-rw-r--r-- | lib/Headers/avx2intrin.h | 12 | ||||
-rw-r--r-- | lib/Headers/avx512bwintrin.h | 46 | ||||
-rw-r--r-- | lib/Headers/emmintrin.h | 12 |
3 files changed, 42 insertions, 28 deletions
diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h index 576f761b25..caf4ced920 100644 --- a/lib/Headers/avx2intrin.h +++ b/lib/Headers/avx2intrin.h @@ -145,13 +145,21 @@ _mm256_andnot_si256(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_avg_epu8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); + typedef unsigned short __v32hu __attribute__((__vector_size__(64))); + return (__m256i)__builtin_convertvector( + ((__builtin_convertvector((__v32qu)__a, __v32hu) + + __builtin_convertvector((__v32qu)__b, __v32hu)) + 1) + >> 1, __v32qu); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_avg_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); + typedef unsigned int __v16su __attribute__((__vector_size__(64))); + return (__m256i)__builtin_convertvector( + ((__builtin_convertvector((__v16hu)__a, __v16su) + + __builtin_convertvector((__v16hu)__b, __v16su)) + 1) + >> 1, __v16hu); } static __inline__ __m256i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index 41958b7214..fa4fb20432 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -706,57 +706,55 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_avg_epu8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + typedef unsigned short __v64hu __attribute__((__vector_size__(128))); + return (__m512i)__builtin_convertvector( + ((__builtin_convertvector((__v64qu) __A, __v64hu) + + __builtin_convertvector((__v64qu) __B, __v64hu)) + 1) + >> 1, __v64qu); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_avg_epu8(__A, __B), + (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_avg_epu8(__A, __B), + (__v64qi)_mm512_setzero_qi()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_avg_epu16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + typedef unsigned int __v32su __attribute__((__vector_size__(128))); + return (__m512i)__builtin_convertvector( + ((__builtin_convertvector((__v32hu) __A, __v32su) + + __builtin_convertvector((__v32hu) __B, __v32su)) + 1) + >> 1, __v32hu); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_avg_epu16(__A, __B), + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_avg_epu16(__A, __B), + (__v32hi) _mm512_setzero_hi()); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index 709815cbb4..3372508a7f 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -2258,7 +2258,11 @@ _mm_adds_epu16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); + typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))); + return (__m128i)__builtin_convertvector( + ((__builtin_convertvector((__v16qu)__a, __v16hu) + + __builtin_convertvector((__v16qu)__b, __v16hu)) + 1) + >> 1, __v16qu); } /// \brief Computes the rounded avarages of corresponding elements of two @@ -2278,7 +2282,11 @@ _mm_avg_epu8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); + typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))); + return (__m128i)__builtin_convertvector( + ((__builtin_convertvector((__v8hu)__a, __v8su) + + __builtin_convertvector((__v8hu)__b, __v8su)) + 1) + >> 1, __v8hu); } /// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16] |