summaryrefslogtreecommitdiffstats
path: root/lib/Headers/avx512vldqintrin.h
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-10-31 04:30:56 +0000
committerCraig Topper <craig.topper@gmail.com>2016-10-31 04:30:56 +0000
commitcbebc3b2e70a27da2134841da0f72e404b699ecf (patch)
treec4429b37185450ac45c46def1bf9cddf527352da /lib/Headers/avx512vldqintrin.h
parent14849cb2ec67885d527b129037904a143327129e (diff)
[AVX-512] Remove masked vector extract builtins and replace with native shufflevectors and selects.
Unfortunately, the backend currently doesn't fold masks into the instructions correctly when they come from these shufflevectors. I'll work on that in a future commit. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@285540 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512vldqintrin.h')
-rw-r--r--lib/Headers/avx512vldqintrin.h44
1 files changed, 20 insertions, 24 deletions
diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h
index ffe59595e8..0f617c1a49 100644
--- a/lib/Headers/avx512vldqintrin.h
+++ b/lib/Headers/avx512vldqintrin.h
@@ -1096,40 +1096,36 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
}
#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
- (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1); })
+ (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \
+ (__v4df)_mm256_undefined_pd(), \
+ ((imm) & 1) ? 2 : 0, \
+ ((imm) & 1) ? 3 : 1); })
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
- (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U)); })
+ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
+ (__v2df)(W)); })
#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
- (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
+ (__v2df)_mm_setzero_pd()); })
#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_di(), \
- (__mmask8)-1); })
+ (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \
+ (__v4di)_mm256_undefined_si256(), \
+ ((imm) & 1) ? 2 : 0, \
+ ((imm) & 1) ? 3 : 1); })
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
- (int)(imm), \
- (__v2di)(__m128i)(W), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+ (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
+ (__v2di)(W)); })
#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_di(), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+ (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
+ (__v2di)_mm_setzero_di()); })
#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
(__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \