summaryrefslogtreecommitdiffstats
path: root/lib/Headers
diff options
context:
space:
mode:
authorYael Tsafrir <yael.tsafrir@intel.com>2017-09-12 07:46:32 +0000
committerYael Tsafrir <yael.tsafrir@intel.com>2017-09-12 07:46:32 +0000
commit2d3f7520fc0be56b082353cd044edbaff0e8cd25 (patch)
tree70d89d266497616de0706f00aca1b9f228d62928 /lib/Headers
parent1ec89b70e886861634087e0b30cf3c90daaf74e6 (diff)
[X86] Lower _mm[256|512]_[mask[z]]_avg_epu[8|16] intrinsics to native llvm IR
Differential Revision: https://reviews.llvm.org/D37562 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@313011 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers')
-rw-r--r--lib/Headers/avx2intrin.h12
-rw-r--r--lib/Headers/avx512bwintrin.h46
-rw-r--r--lib/Headers/emmintrin.h12
3 files changed, 42 insertions, 28 deletions
diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h
index 576f761b25..caf4ced920 100644
--- a/lib/Headers/avx2intrin.h
+++ b/lib/Headers/avx2intrin.h
@@ -145,13 +145,21 @@ _mm256_andnot_si256(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_avg_epu8(__m256i __a, __m256i __b)
{
- return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
+ typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
+ return (__m256i)__builtin_convertvector(
+ ((__builtin_convertvector((__v32qu)__a, __v32hu) +
+ __builtin_convertvector((__v32qu)__b, __v32hu)) + 1)
+ >> 1, __v32qu);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_avg_epu16(__m256i __a, __m256i __b)
{
- return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
+ typedef unsigned int __v16su __attribute__((__vector_size__(64)));
+ return (__m256i)__builtin_convertvector(
+ ((__builtin_convertvector((__v16hu)__a, __v16su) +
+ __builtin_convertvector((__v16hu)__b, __v16su)) + 1)
+ >> 1, __v16hu);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h
index 41958b7214..fa4fb20432 100644
--- a/lib/Headers/avx512bwintrin.h
+++ b/lib/Headers/avx512bwintrin.h
@@ -706,57 +706,55 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_avg_epu8 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
- (__v64qi) __B,
- (__v64qi) _mm512_setzero_qi(),
- (__mmask64) -1);
+ typedef unsigned short __v64hu __attribute__((__vector_size__(128)));
+ return (__m512i)__builtin_convertvector(
+ ((__builtin_convertvector((__v64qu) __A, __v64hu) +
+ __builtin_convertvector((__v64qu) __B, __v64hu)) + 1)
+ >> 1, __v64qu);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
__m512i __B)
{
- return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
- (__v64qi) __B,
- (__v64qi) __W,
- (__mmask64) __U);
+ return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+ (__v64qi)_mm512_avg_epu8(__A, __B),
+ (__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
- (__v64qi) __B,
- (__v64qi) _mm512_setzero_qi(),
- (__mmask64) __U);
+ return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+ (__v64qi)_mm512_avg_epu8(__A, __B),
+ (__v64qi)_mm512_setzero_qi());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_avg_epu16 (__m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
- (__v32hi) __B,
- (__v32hi) _mm512_setzero_hi(),
- (__mmask32) -1);
+ typedef unsigned int __v32su __attribute__((__vector_size__(128)));
+ return (__m512i)__builtin_convertvector(
+ ((__builtin_convertvector((__v32hu) __A, __v32su) +
+ __builtin_convertvector((__v32hu) __B, __v32su)) + 1)
+ >> 1, __v32hu);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
__m512i __B)
{
- return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
- (__v32hi) __B,
- (__v32hi) __W,
- (__mmask32) __U);
+ return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+ (__v32hi)_mm512_avg_epu16(__A, __B),
+ (__v32hi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
- (__v32hi) __B,
- (__v32hi) _mm512_setzero_hi(),
- (__mmask32) __U);
+ return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+ (__v32hi)_mm512_avg_epu16(__A, __B),
+ (__v32hi) _mm512_setzero_hi());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h
index 709815cbb4..3372508a7f 100644
--- a/lib/Headers/emmintrin.h
+++ b/lib/Headers/emmintrin.h
@@ -2258,7 +2258,11 @@ _mm_adds_epu16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_avg_epu8(__m128i __a, __m128i __b)
{
- return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
+ typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
+ return (__m128i)__builtin_convertvector(
+ ((__builtin_convertvector((__v16qu)__a, __v16hu) +
+ __builtin_convertvector((__v16qu)__b, __v16hu)) + 1)
+ >> 1, __v16qu);
}
/// \brief Computes the rounded avarages of corresponding elements of two
@@ -2278,7 +2282,11 @@ _mm_avg_epu8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_avg_epu16(__m128i __a, __m128i __b)
{
- return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
+ typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
+ return (__m128i)__builtin_convertvector(
+ ((__builtin_convertvector((__v8hu)__a, __v8su) +
+ __builtin_convertvector((__v8hu)__b, __v8su)) + 1)
+ >> 1, __v8hu);
}
/// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16]