[ARM NEON] Define vfms_f32 on ARM, and all vfms using vfma.

r259537 added vfma/vfms to armv7, but the builtin was only lowered on the AArch64 side. Instead of supporting it on ARM, get rid of it. The vfms builtin lowered to: %nb = fsub float -0.0, %b %r = @llvm.fma.f32(%a, %nb, %c) Instead, define the operation in terms of vfma, and swap the multiplicands. It now lowers to: %na = fsub float -0.0, %a %r = @llvm.fma.f32(%na, %b, %c) This matches the instruction more closely, and lets current LLVM generate the "natural" operand ordering: fmls.2s v0, v1, v2 instead of the crooked (but equivalent): fmls.2s v0, v2, v1 Except for theses changes, assembly is identical. LLVM accepts both commutations, and the LLVM tests in: test/CodeGen/AArch64/arm64-fmadd.ll test/CodeGen/AArch64/fp-dp3.ll test/CodeGen/AArch64/neon-fma.ll test/CodeGen/ARM/fusedMAC.ll already check either the new one only, or both. Also verified against the test-suite unittests. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@266807 91177308-0d34-0410-b5e6-96231b3b80d8
author: Ahmed Bougacha <ahmed.bougacha@gmail.com> 2016-04-19 19:44:45 +0000
committer: Ahmed Bougacha <ahmed.bougacha@gmail.com> 2016-04-19 19:44:45 +0000
commit: fa8ab2562a582a60fb7dff9d873b65d84ab864f4 (patch)
tree: bb68c1371ede9d2ab838de6fbbd7c627a5834f0e /include/clang/Basic/arm_neon.td
parent: a426f58b698e1e08ca72684a569e6718ccb8ceb7 (diff)
1 files changed, 6 insertions, 5 deletions
diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td
index 4863566653..6641ed2ede 100644
--- a/include/clang/Basic/arm_neon.td
+++ b/include/clang/Basic/arm_neon.td
@@ -339,6 +339,7 @@ def OP_MLALHi   : Op<(call "vmlal", $p0, (call "vget_high", $p1),
                                          (call "vget_high", $p2))>;
 def OP_MLALHi_N : Op<(call "vmlal_n", $p0, (call "vget_high", $p1), $p2)>;
 def OP_MLS      : Op<(op "-", $p0, (op "*", $p1, $p2))>;
+def OP_FMLS     : Op<(call "vfma", $p0, (op "-", $p1), $p2)>;
 def OP_MLSL     : Op<(op "-", $p0, (call "vmull", $p1, $p2))>;
 def OP_MLSLHi   : Op<(call "vmlsl", $p0, (call "vget_high", $p1),
                                          (call "vget_high", $p2))>;
@@ -347,7 +348,7 @@ def OP_MUL_N    : Op<(op "*", $p0, (dup $p1))>;
 def OP_MLA_N    : Op<(op "+", $p0, (op "*", $p1, (dup $p2)))>;
 def OP_MLS_N    : Op<(op "-", $p0, (op "*", $p1, (dup $p2)))>;
 def OP_FMLA_N   : Op<(call "vfma", $p0, $p1, (dup $p2))>;
-def OP_FMLS_N   : Op<(call "vfms", $p0, $p1, (dup $p2))>;
+def OP_FMLS_N   : Op<(call "vfma", $p0, (op "-", $p1), (dup $p2))>;
 def OP_MLAL_N   : Op<(op "+", $p0, (call "vmull", $p1, (dup $p2)))>;
 def OP_MLSL_N   : Op<(op "-", $p0, (call "vmull", $p1, (dup $p2)))>;
 def OP_MUL_LN   : Op<(op "*", $p0, (splat $p1, $p2))>;
@@ -377,8 +378,8 @@ def OP_QRDMLAH : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, $p2))>;
 def OP_QRDMLSH : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, $p2))>;
 def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
 def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
-def OP_FMS_LN   : Op<(call "vfma_lane", $p0, $p1, (op "-", $p2), $p3)>;
-def OP_FMS_LNQ  : Op<(call "vfma_laneq", $p0, $p1, (op "-", $p2), $p3)>;
+def OP_FMS_LN   : Op<(call "vfma_lane", $p0, (op "-", $p1), $p2, $p3)>;
+def OP_FMS_LNQ  : Op<(call "vfma_laneq", $p0, (op "-", $p1), $p2, $p3)>;
 def OP_TRN1     : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2),
                                                     (decimate mask1, 2)))>;
 def OP_ZIP1     : Op<(shuffle $p0, $p1, (lowhalf (interleave mask0, mask1)))>;
@@ -826,7 +827,7 @@ def VREINTERPRET
 
 let ArchGuard = "defined(__ARM_FEATURE_FMA)" in {
   def VFMA : SInst<"vfma", "dddd", "fQf">;
-  def VFMS : SInst<"vfms", "dddd", "fQf">;
+  def VFMS : SOpInst<"vfms", "dddd", "fQf", OP_FMLS>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -911,7 +912,7 @@ def FDIV : IOpInst<"vdiv", "ddd",  "fdQfQd", OP_DIV>;
 ////////////////////////////////////////////////////////////////////////////////
 // Vector fused multiply-add operations
 def FMLA : SInst<"vfma", "dddd", "dQd">;
-def FMLS : SInst<"vfms", "dddd", "dQd">;
+def FMLS : SOpInst<"vfms", "dddd", "dQd", OP_FMLS>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // MUL, MLA, MLS, FMA, FMS definitions with scalar argument
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>	2016-04-19 19:44:45 +0000
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>	2016-04-19 19:44:45 +0000
commit	fa8ab2562a582a60fb7dff9d873b65d84ab864f4 (patch)
tree	bb68c1371ede9d2ab838de6fbbd7c627a5834f0e /include/clang/Basic/arm_neon.td
parent	a426f58b698e1e08ca72684a569e6718ccb8ceb7 (diff)