summaryrefslogtreecommitdiffstats
path: root/lib/Headers/avx512vlbwintrin.h
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-06-11 03:31:13 +0000
committerCraig Topper <craig.topper@gmail.com>2016-06-11 03:31:13 +0000
commit353a6ccfbf42d12d3eb89075efbf96e7371690d1 (patch)
tree84528ec0cfd06ebd379a4e087382640a90024115 /lib/Headers/avx512vlbwintrin.h
parent677dadf5654c4bfd998c89d3d29ab4a6a4e44352 (diff)
[AVX512] Implement 512-bit and masked shufflelo and shufflehi intrinsics directly with __builtin_shufflevector and __builtin_ia32_select. Also improve the formatting of the AVX2 version.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@272452 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512vlbwintrin.h')
-rw-r--r--lib/Headers/avx512vlbwintrin.h53
1 files changed, 24 insertions, 29 deletions
diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h
index 43cdadbf4d..52499fe842 100644
--- a/lib/Headers/avx512vlbwintrin.h
+++ b/lib/Headers/avx512vlbwintrin.h
@@ -2407,49 +2407,44 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
(__mmask16)(m)); })
#define _mm_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_pshufhw128_mask((__v8hi)(__m128i)(A), (int)(imm), \
- (__v8hi)(__m128i)(W), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ _mm_shufflehi_epi16((A), (imm)), \
+ (__v8hi)(__m128i)(W)); })
#define _mm_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_pshufhw128_mask((__v8hi)(__m128i)(A), (int)(imm), \
- (__v8hi)_mm_setzero_hi(), \
- (__mmask8)(U)); })
-
+ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ _mm_shufflehi_epi16((A), (imm)), \
+ (__v8hi)_mm_setzero_hi()); })
#define _mm256_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_pshufhw256_mask((__v16hi)(__m256i)(A), (int)(imm), \
- (__v16hi)(__m256i)(W), \
- (__mmask16)(U)); })
-
+ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ _mm256_shufflehi_epi16((A), (imm)), \
+ (__v16hi)(__m256i)(W)); })
#define _mm256_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_pshufhw256_mask((__v16hi)(__m256i)(A), (int)(imm), \
- (__v16hi)_mm256_setzero_si256(), \
- (__mmask16)(U)); })
-
+ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ _mm256_shufflehi_epi16((A), (imm)), \
+ (__v16hi)_mm256_setzero_si256()); })
#define _mm_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_pshuflw128_mask((__v8hi)(__m128i)(A), (int)(imm), \
- (__v8hi)(__m128i)(W), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ _mm_shufflelo_epi16((A), (imm)), \
+ (__v8hi)(__m128i)(W)); })
#define _mm_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_pshuflw128_mask((__v8hi)(__m128i)(A), (int)(imm), \
- (__v8hi)_mm_setzero_hi(), \
- (__mmask8)(U)); })
-
+ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ _mm_shufflelo_epi16((A), (imm)), \
+ (__v8hi)_mm_setzero_hi()); })
#define _mm256_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_pshuflw256_mask((__v16hi)(__m256i)(A), (int)(imm), \
- (__v16hi)(__m256i)(W), \
- (__mmask16)(U)); })
-
+ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ _mm256_shufflelo_epi16((A), (imm)), \
+ (__v16hi)(__m256i)(W)); })
#define _mm256_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_pshuflw256_mask((__v16hi)(__m256i)(A), (int)(imm), \
- (__v16hi)_mm256_setzero_si256(), \
- (__mmask16)(U)); })
+ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ _mm256_shufflelo_epi16((A), (imm)), \
+ (__v16hi)_mm256_setzero_si256()); })
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_sllv_epi16 (__m256i __A, __m256i __B)