diff options
author | Craig Topper <craig.topper@intel.com> | 2017-11-10 05:20:32 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-11-10 05:20:32 +0000 |
commit | 41ea3d5b6cc3bdb5a44b1e9bc292f635471d44c3 (patch) | |
tree | fa48f4e8964e8bab034494b9724f055373758fed /lib/Headers/fmaintrin.h | |
parent | 23cd700298fc43ba058cc55fcb5a9c7752efa618 (diff) |
[X86] Reduce the number of FMA builtins needed by the frontend by adding negates to operands of the fmadd and fmaddsub builtins.
The backend should be able to combine the negates to create fmsub, fnmadd, and fnmsub. faddsub converting to fsubadd still needs work I think, but should be very doable.
This matches what we already do for the masked builtins.
This only covers the packed builtins. Scalar builtins will be done after FMA4 is fixed.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@317873 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Headers/fmaintrin.h')
-rw-r--r-- | lib/Headers/fmaintrin.h | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/lib/Headers/fmaintrin.h b/lib/Headers/fmaintrin.h index 235931ccb6..86a1198879 100644 --- a/lib/Headers/fmaintrin.h +++ b/lib/Headers/fmaintrin.h @@ -58,13 +58,13 @@ _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -82,13 +82,13 @@ _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -106,13 +106,13 @@ _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -142,13 +142,13 @@ _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS @@ -166,37 +166,37 @@ _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } static __inline__ __m256 __DEFAULT_FN_ATTRS @@ -214,13 +214,13 @@ _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } #undef __DEFAULT_FN_ATTRS |