summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2024-04-23 17:37:07 +0100
committerGitHub <noreply@github.com>2024-04-23 17:37:07 +0100
commitcebc9609d8cdc6f488693cd8e4a616b935b38d2c (patch)
treef4fee0ac694bdefec2849ab1edeec70c36115198
parentc45fbfdb8e5a01cb4473c179dc390e9c039f3f39 (diff)
[AArch64] Match ZIP and UZP starting from undef elements. (#89578)
In case the first element of a zip/uzp mask is undef, the isZIPMask and isUZPMask functions have a 50% chance of picking the wrong "WhichResult", meaning they don't match a zip/uzp where they could. This patch alters the matching code to first check for the first non-undef element, to try and get WhichResult correct.
-rw-r--r--llvm/lib/Target/AArch64/AArch64PerfectShuffle.h43
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-uzp.ll30
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-zip.ll18
3 files changed, 49 insertions, 42 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
index 7abaead694d1..a143243a8d3b 100644
--- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -6620,11 +6620,28 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
return (PFEntry >> 30) + 1;
}
-inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+/// Return true for zip1 or zip2 masks of the form:
+/// <0, 8, 1, 9, 2, 10, 3, 11> or
+/// <4, 12, 5, 13, 6, 14, 7, 15>
+inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
- WhichResult = (M[0] == 0 ? 0 : 1);
+ // Check the first non-undef element for which half to use.
+ unsigned WhichResult = 2;
+ for (unsigned i = 0; i != NumElts / 2; i++) {
+ if (M[i * 2] >= 0) {
+ WhichResult = ((unsigned)M[i * 2] == i ? 0 : 1);
+ break;
+ } else if (M[i * 2 + 1] >= 0) {
+ WhichResult = ((unsigned)M[i * 2 + 1] == NumElts + i ? 0 : 1);
+ break;
+ }
+ }
+ if (WhichResult == 2)
+ return false;
+
+ // Check all elements match.
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
@@ -6632,20 +6649,34 @@ inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
Idx += 1;
}
-
+ WhichResultOut = WhichResult;
return true;
}
-inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+/// Return true for uzp1 or uzp2 masks of the form:
+/// <0, 2, 4, 6, 8, 10, 12, 14> or
+/// <1, 3, 5, 7, 9, 11, 13, 15>
+inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
+ // Check the first non-undef element for which half to use.
+ unsigned WhichResult = 2;
+ for (unsigned i = 0; i != NumElts; i++) {
+ if (M[i] >= 0) {
+ WhichResult = ((unsigned)M[i] == i * 2 ? 0 : 1);
+ break;
+ }
+ }
+ if (WhichResult == 2)
+ return false;
+
+ // Check all elements match.
for (unsigned i = 0; i != NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != 2 * i + WhichResult)
return false;
}
-
+ WhichResultOut = WhichResult;
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-uzp.ll b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
index 6e01ebc95a1c..49a51d96fbc8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-uzp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
@@ -110,13 +110,9 @@ define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vuzpQi16_undef0:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: uzp2.8h v3, v0, v1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT: add.8h v0, v0, v3
+; CHECK-NEXT: uzp1.8h v2, v0, v1
+; CHECK-NEXT: uzp2.8h v0, v0, v1
+; CHECK-NEXT: add.8h v0, v2, v0
; CHECK-NEXT: ret
%tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -127,13 +123,9 @@ define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vuzpQi16_undef01:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI9_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: uzp2.8h v3, v0, v1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT: add.8h v0, v0, v3
+; CHECK-NEXT: uzp1.8h v2, v0, v1
+; CHECK-NEXT: uzp2.8h v0, v0, v1
+; CHECK-NEXT: add.8h v0, v2, v0
; CHECK-NEXT: ret
%tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -144,13 +136,9 @@ define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vuzpQi16_undef012:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI10_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: uzp2.8h v3, v0, v1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT: add.8h v0, v0, v3
+; CHECK-NEXT: uzp1.8h v2, v0, v1
+; CHECK-NEXT: uzp2.8h v0, v0, v1
+; CHECK-NEXT: add.8h v0, v2, v0
; CHECK-NEXT: ret
%tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index 349751dda461..4c771cbd2966 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -142,11 +142,7 @@ define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind {
define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vzip1_undef_01:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT: zip1.8h v0, v0, v1
; CHECK-NEXT: ret
%s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i16> %s
@@ -155,11 +151,7 @@ define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vzip1_undef_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI9_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT: zip1.8h v0, v0, v1
; CHECK-NEXT: ret
%s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i16> %s
@@ -177,11 +169,7 @@ define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vzip1_undef_012:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI11_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT: zip1.8h v0, v0, v1
; CHECK-NEXT: ret
%s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i16> %s