From 28647f65ca9348a7b245c0ca4684c3ee2fa4d56b Mon Sep 17 00:00:00 2001
From: Amara Emerson <aemerson@apple.com>
Date: Thu, 21 Mar 2019 22:31:37 +0000
Subject: [AArch64] Split the neon.addp intrinsic into integer and fp variants.

This is the result of discussions on the list about how to deal with intrinsics
which require codegen to disambiguate them via only the integer/fp overloads.
It causes problems for GlobalISel as some of that information is lost during
translation, while with other operations like IR instructions the information is
encoded into the instruction opcode.

This patch changes clang to emit the new faddp intrinsic if the vector operands
to the builtin have FP element types. LLVM IR AutoUpgrade has been taught to
upgrade existing calls to aarch64.neon.addp with fp vector arguments, and
we remove the workarounds introduced for GlobalISel in r355865.

This is a more permanent solution to PR40968.

Differential Revision: https://reviews.llvm.org/D59655

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356722 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/aarch64-v8.2a-neon-intrinsics.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'test/CodeGen/aarch64-v8.2a-neon-intrinsics.c')

diff --git a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
index e1a2e3fb92..a4bf875336 100644
--- a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -736,14 +736,14 @@ float16x8_t test_vmulxq_f16(float16x8_t a, float16x8_t b) {
 }
 
 // CHECK-LABEL: test_vpadd_f16
-// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.addp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %a, <4 x half> %b)
 // CHECK:  ret <4 x half> [[ADD]]
 float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
   return vpadd_f16(a, b);
 }
 
 // CHECK-LABEL: test_vpaddq_f16
-// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.addp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.faddp.v8f16(<8 x half> %a, <8 x half> %b)
 // CHECK:  ret <8 x half> [[ADD]]
 float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) {
   return vpaddq_f16(a, b);
-- 
cgit v1.2.3


From 6df76a341e7e5a407a05c0c03bbff5dd77e2960e Mon Sep 17 00:00:00 2001
From: "Diogo N. Sampaio" <diogo.sampaio@arm.com>
Date: Fri, 12 Apr 2019 10:43:48 +0000
Subject: [Aarch64] Add v8.2-a half precision element extract intrinsics

Summary:
Implements the intrinsics define on the ACLE to extract half precision fp scalar elements from float16x4_t and float16x8_t vector types.
a.k.a:
vduph_lane_f16
vduph_laneq_f16

Reviewers: pablooliveira, olista01, LukeGeeson, DavidSpickett

Reviewed By: DavidSpickett

Subscribers: DavidSpickett, javed.absar, kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D60272


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@358276 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/aarch64-v8.2a-neon-intrinsics.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'test/CodeGen/aarch64-v8.2a-neon-intrinsics.c')

diff --git a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
index a4bf875336..a84445b62a 100644
--- a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -1618,3 +1618,16 @@ float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) {
   return vtrn2q_f16(a, b);
 }
 
+// CHECK-LABEL: @test_vduph_laneq_f16(
+// CHECK:        [[V:%.*]] = extractelement <8 x half> [[V2:%.*]], i32 7
+// CHECK-NEXT:   ret half [[V]]
+float16_t test_vduph_laneq_f16(float16x8_t vec) {
+  return vduph_laneq_f16(vec, 7);
+}
+
+// CHECK-LABEL: @test_vduph_lane_f16(
+// CHECK:        [[V:%.*]] = extractelement <4 x half> [[V2:%.*]], i32 3
+// CHECK-NEXT:   ret half [[V]]
+float16_t test_vduph_lane_f16(float16x4_t vec) {
+  return vduph_lane_f16(vec, 3);
+}
-- 
cgit v1.2.3