diff options
author | Craig Topper <craig.topper@intel.com> | 2018-05-29 03:26:38 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-05-29 03:26:38 +0000 |
commit | 229a8c34f0725dc9217ded5bd8e023522fe009e0 (patch) | |
tree | 7536b536c950ec642c991d2aa399f73cede5d892 /lib/Headers/avx512vlbwintrin.h | |
parent | 2c6477ebd3c8da4a29b0e6399b80b29c8919e1f6 (diff) |
[X86] Merge the 3 different flavors of masked vpermi2var/vpermt2var builtins to a single version without masking. Use select builtins with appropriate operand instead.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@333387 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512vlbwintrin.h')
-rw-r--r-- | lib/Headers/avx512vlbwintrin.h | 86 |
1 files changed, 38 insertions, 48 deletions
diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h index 6c2d3eadc9..30a12b6072 100644 --- a/lib/Headers/avx512vlbwintrin.h +++ b/lib/Headers/avx512vlbwintrin.h @@ -1290,81 +1290,71 @@ _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U, - __m128i __B) -{ - return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A, - (__v8hi) __I /* idx */ , - (__v8hi) __B, - (__mmask8) __U); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I, - __mmask16 __U, __m256i __B) +_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) { - return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A, - (__v16hi) __I /* idx */ , - (__v16hi) __B, - (__mmask16) __U); + return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, + (__v8hi) __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B) +_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, + __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I/* idx */, - (__v8hi) __A, - (__v8hi) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_selectw_128(__U, + (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), + (__v8hi)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I, - __m128i __B) +_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, + __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I/* idx */, - (__v8hi) __A, - (__v8hi) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectw_128(__U, + (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), + (__v8hi)__I); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { - return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I/* idx */, - (__v8hi) __A, - (__v8hi) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectw_128(__U, + (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), + (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B) +_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I/* idx */, - (__v16hi) __A, - (__v16hi) __B, - (__mmask16) -1); + return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, + (__v16hi)__B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, + __m256i __B) +{ + return (__m256i)__builtin_ia32_selectw_256(__U, + (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), + (__v16hi)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U, - __m256i __I, __m256i __B) +_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, + __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I/* idx */, - (__v16hi) __A, - (__v16hi) __B, - (__mmask16) __U); + return (__m256i)__builtin_ia32_selectw_256(__U, + (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), + (__v16hi)__I); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, - __m256i __I, __m256i __B) +_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I, + __m256i __B) { - return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I/* idx */, - (__v16hi) __A, - (__v16hi) __B, - (__mmask16) __U); + return (__m256i)__builtin_ia32_selectw_256(__U, + (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), + (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS |