summaryrefslogtreecommitdiffstats
path: root/lib/Headers/avx512fintrin.h
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-12-11 01:26:52 +0000
committerCraig Topper <craig.topper@gmail.com>2016-12-11 01:26:52 +0000
commit5074e7e38b126b06700c3aa396c58daa9c99652f (patch)
tree1634286e5b1ecbd8f7309a4bc2b1ff7b3e46f8d3 /lib/Headers/avx512fintrin.h
parentf2ba17e2d09858e3be53511c94d970cfd166729c (diff)
[AVX-512] Remove masking from 512-bit vpermil builtins. The backend now has versions without masking so wrap it with select.
This will allow the backend to constant fold these to generic shuffle vectors like 128-bit and 256-bit without having to working about handling masking. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@289351 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/avx512fintrin.h')
-rw-r--r--lib/Headers/avx512fintrin.h54
1 files changed, 20 insertions, 34 deletions
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 86999574c8..71b645230d 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -6588,61 +6588,47 @@ _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
(__v16sf)_mm512_setzero_ps()); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_permutevar_pd (__m512d __A, __m512i __C)
+_mm512_permutevar_pd(__m512d __A, __m512i __C)
{
- return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
- (__v8di) __C,
- (__v8df)
- _mm512_undefined_pd (),
- (__mmask8) -1);
+ return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
+_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
{
- return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
- (__v8di) __C,
- (__v8df) __W,
- (__mmask8) __U);
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_permutevar_pd(__A, __C),
+ (__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
+_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
{
- return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
- (__v8di) __C,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U);
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_permutevar_pd(__A, __C),
+ (__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_permutevar_ps (__m512 __A, __m512i __C)
+_mm512_permutevar_ps(__m512 __A, __m512i __C)
{
- return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
- (__v16si) __C,
- (__v16sf)
- _mm512_undefined_ps (),
- (__mmask16) -1);
+ return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
+_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
{
- return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
- (__v16si) __C,
- (__v16sf) __W,
- (__mmask16) __U);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_permutevar_ps(__A, __C),
+ (__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
+_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
{
- return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
- (__v16si) __C,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U);
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_permutevar_ps(__A, __C),
+ (__v16sf)_mm512_setzero_ps());
}
static __inline __m512d __DEFAULT_FN_ATTRS