summaryrefslogtreecommitdiffstats
path: root/lib/Headers/avx512vldqintrin.h
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2017-01-18 02:17:10 +0000
committerCraig Topper <craig.topper@gmail.com>2017-01-18 02:17:10 +0000
commitb410108ca3b40ebba4a551a95b9a1159fac595f5 (patch)
tree542333191e70863f63bc8f2beac074e24444de26 /lib/Headers/avx512vldqintrin.h
parenta0babd8efc1facb05ac8c1c9627cfbe45bdb350d (diff)
[AVX-512] Replace subvector broadcast builtins with shufflevectors and selects.
Verified that the backend codegens this equally well. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@292329 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512vldqintrin.h')
-rw-r--r--lib/Headers/avx512vldqintrin.h42
1 files changed, 20 insertions, 22 deletions
diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h
index cd9da43705..aecd7df34d 100644
--- a/lib/Headers/avx512vldqintrin.h
+++ b/lib/Headers/avx512vldqintrin.h
@@ -1000,27 +1000,26 @@ _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_broadcast_f64x2 (__m128d __A)
+_mm256_broadcast_f64x2(__m128d __A)
{
- return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
- (__v4df)_mm256_undefined_pd(),
- (__mmask8) -1);
+ return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
+ 0, 1, 0, 1);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
+_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
{
- return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
- (__v4df) __O,
- __M);
+ return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
+ (__v4df)_mm256_broadcast_f64x2(__A),
+ (__v4df)__O);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
{
- return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
- (__v4df) _mm256_setzero_ps (),
- __M);
+ return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
+ (__v4df)_mm256_broadcast_f64x2(__A),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -1072,27 +1071,26 @@ _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_broadcast_i64x2 (__m128i __A)
+_mm256_broadcast_i64x2(__m128i __A)
{
- return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
- (__v4di)_mm256_undefined_si256(),
- (__mmask8) -1);
+ return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
+ 0, 1, 0, 1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
+_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
{
- return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
- (__v4di) __O,
- __M);
+ return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
+ (__v4di)_mm256_broadcast_i64x2(__A),
+ (__v4di)__O);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
{
- return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
- (__v4di) _mm256_setzero_si256 (),
- __M);
+ return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
+ (__v4di)_mm256_broadcast_i64x2(__A),
+ (__v4di)_mm256_setzero_si256());
}
#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \