diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-06-11 03:31:13 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-06-11 03:31:13 +0000 |
commit | 353a6ccfbf42d12d3eb89075efbf96e7371690d1 (patch) | |
tree | 84528ec0cfd06ebd379a4e087382640a90024115 /lib/Headers/avx512vlbwintrin.h | |
parent | 677dadf5654c4bfd998c89d3d29ab4a6a4e44352 (diff) |
[AVX512] Implement 512-bit and masked shufflelo and shufflehi intrinsics directly with __builtin_shufflevector and __builtin_ia32_select. Also improve the formatting of the AVX2 version.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@272452 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512vlbwintrin.h')
-rw-r--r-- | lib/Headers/avx512vlbwintrin.h | 53 |
1 files changed, 24 insertions, 29 deletions
diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h index 43cdadbf4d..52499fe842 100644 --- a/lib/Headers/avx512vlbwintrin.h +++ b/lib/Headers/avx512vlbwintrin.h @@ -2407,49 +2407,44 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) (__mmask16)(m)); }) #define _mm_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_pshufhw128_mask((__v8hi)(__m128i)(A), (int)(imm), \ - (__v8hi)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + _mm_shufflehi_epi16((A), (imm)), \ + (__v8hi)(__m128i)(W)); }) #define _mm_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_pshufhw128_mask((__v8hi)(__m128i)(A), (int)(imm), \ - (__v8hi)_mm_setzero_hi(), \ - (__mmask8)(U)); }) - + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + _mm_shufflehi_epi16((A), (imm)), \ + (__v8hi)_mm_setzero_hi()); }) #define _mm256_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_pshufhw256_mask((__v16hi)(__m256i)(A), (int)(imm), \ - (__v16hi)(__m256i)(W), \ - (__mmask16)(U)); }) - + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + _mm256_shufflehi_epi16((A), (imm)), \ + (__v16hi)(__m256i)(W)); }) #define _mm256_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_pshufhw256_mask((__v16hi)(__m256i)(A), (int)(imm), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(U)); }) - + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + _mm256_shufflehi_epi16((A), (imm)), \ + (__v16hi)_mm256_setzero_si256()); }) #define _mm_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_pshuflw128_mask((__v8hi)(__m128i)(A), (int)(imm), \ - (__v8hi)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + _mm_shufflelo_epi16((A), (imm)), \ + (__v8hi)(__m128i)(W)); }) #define _mm_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_pshuflw128_mask((__v8hi)(__m128i)(A), (int)(imm), \ - (__v8hi)_mm_setzero_hi(), \ - (__mmask8)(U)); }) - + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + _mm_shufflelo_epi16((A), (imm)), \ + (__v8hi)_mm_setzero_hi()); }) #define _mm256_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_pshuflw256_mask((__v16hi)(__m256i)(A), (int)(imm), \ - (__v16hi)(__m256i)(W), \ - (__mmask16)(U)); }) - + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + _mm256_shufflelo_epi16((A), (imm)), \ + (__v16hi)(__m256i)(W)); }) #define _mm256_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_pshuflw256_mask((__v16hi)(__m256i)(A), (int)(imm), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(U)); }) + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + _mm256_shufflelo_epi16((A), (imm)), \ + (__v16hi)_mm256_setzero_si256()); }) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sllv_epi16 (__m256i __A, __m256i __B) |