diff options
author | Craig Topper <craig.topper@gmail.com> | 2017-01-18 02:17:10 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2017-01-18 02:17:10 +0000 |
commit | b410108ca3b40ebba4a551a95b9a1159fac595f5 (patch) | |
tree | 542333191e70863f63bc8f2beac074e24444de26 /lib/Headers/avx512fintrin.h | |
parent | a0babd8efc1facb05ac8c1c9627cfbe45bdb350d (diff) |
[AVX-512] Replace subvector broadcast builtins with shufflevectors and selects.
Verified that the backend codegens this equally well.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@292329 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512fintrin.h')
-rw-r--r-- | lib/Headers/avx512fintrin.h | 102 |
1 files changed, 46 insertions, 56 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index e6a7217c89..7d9a52fdf2 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -7278,107 +7278,97 @@ _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) (__mmask8)(U), (int)(R)); }) static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_broadcast_f32x4 (__m128 __A) +_mm512_broadcast_f32x4(__m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1); + return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, + 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A) +_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, - (__v16sf) __O, - __M); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, + (__v16sf)_mm512_broadcast_f32x4(__A), + (__v16sf)__O); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A) +_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, - (__v16sf) - _mm512_setzero_ps (), - __M); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, + (__v16sf)_mm512_broadcast_f32x4(__A), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_broadcast_f64x4 (__m256d __A) +_mm512_broadcast_f64x4(__m256d __A) { - return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1); + return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, + 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A) +_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) { - return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, - (__v8df) __O, - __M); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, + (__v8df)_mm512_broadcast_f64x4(__A), + (__v8df)__O); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A) +_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) { - return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, - (__v8df) - _mm512_setzero_pd (), - __M); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, + (__v8df)_mm512_broadcast_f64x4(__A), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcast_i32x4 (__m128i __A) +_mm512_broadcast_i32x4(__m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, - (__v16si) - _mm512_undefined_epi32 (), - (__mmask16) -1); + return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, + 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A) +_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, - (__v16si) __O, - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_broadcast_i32x4(__A), + (__v16si)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A) +_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, - (__v16si) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_broadcast_i32x4(__A), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcast_i64x4 (__m256i __A) +_mm512_broadcast_i64x4(__m256i __A) { - return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, - (__v8di) - _mm512_undefined_epi32 (), - (__mmask8) -1); + return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, + 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A) +_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) { - return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, - (__v8di) __O, - __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_broadcast_i64x4(__A), + (__v8di)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) +_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) { - return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, - (__v8di) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_broadcast_i64x4(__A), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512d __DEFAULT_FN_ATTRS |