diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-11-13 07:26:34 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-11-13 07:26:34 +0000 |
commit | ecc2fbac01f6e7b11612d8b7299e73c7102abcaf (patch) | |
tree | a46d9cf3656a4e40f7473df76db5b1ca6cc487cd /lib/Headers/avx512fintrin.h | |
parent | d8afcb0b6e8b33d051581748b6071adcea596e51 (diff) |
[AVX-512] Replace masked dword and qword variable shift builtins with unmasked builtins and a select.
This is part of a set of changes to allow InstCombine in the backend to optimize variable shifts without having to know about masking.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@286757 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512fintrin.h')
-rw-r--r-- | lib/Headers/avx512fintrin.h | 160 |
1 files changed, 59 insertions, 101 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index c7a41d5499..dd20937062 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -6060,61 +6060,47 @@ _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sllv_epi32 (__m512i __X, __m512i __Y) +_mm512_sllv_epi32(__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, - (__v16si) __Y, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) +_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, - (__v16si) __Y, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_sllv_epi32(__X, __Y), + (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) +_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, - (__v16si) __Y, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_sllv_epi32(__X, __Y), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sllv_epi64 (__m512i __X, __m512i __Y) +_mm512_sllv_epi64(__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) - _mm512_undefined_pd (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) +_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_sllv_epi64(__X, __Y), + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) +_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_sllv_epi64(__X, __Y), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -6162,61 +6148,47 @@ _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_srav_epi32 (__m512i __X, __m512i __Y) +_mm512_srav_epi32(__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, - (__v16si) __Y, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) +_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, - (__v16si) __Y, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_srav_epi32(__X, __Y), + (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) +_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, - (__v16si) __Y, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_srav_epi32(__X, __Y), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_srav_epi64 (__m512i __X, __m512i __Y) +_mm512_srav_epi64(__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) +_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_srav_epi64(__X, __Y), + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) +_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_srav_epi64(__X, __Y), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -6264,61 +6236,47 @@ _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_srlv_epi32 (__m512i __X, __m512i __Y) +_mm512_srlv_epi32(__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, - (__v16si) __Y, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) +_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, - (__v16si) __Y, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_srlv_epi32(__X, __Y), + (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) +_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, - (__v16si) __Y, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_srlv_epi32(__X, __Y), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srlv_epi64 (__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) +_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_srlv_epi64(__X, __Y), + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) +_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_srlv_epi64(__X, __Y), + (__v8di)_mm512_setzero_si512()); } #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ |