diff options
author | David Green <david.green@arm.com> | 2024-04-23 17:37:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-23 17:37:07 +0100 |
commit | cebc9609d8cdc6f488693cd8e4a616b935b38d2c (patch) | |
tree | f4fee0ac694bdefec2849ab1edeec70c36115198 | |
parent | c45fbfdb8e5a01cb4473c179dc390e9c039f3f39 (diff) |
[AArch64] Match ZIP and UZP starting from undef elements. (#89578)
In case the first element of a zip/uzp mask is undef, the isZIPMask and
isUZPMask functions have a 50% chance of picking the wrong
"WhichResult", meaning they don't match a zip/uzp where they could. This
patch alters the matching code to first check for the first non-undef
element, to try and get WhichResult correct.
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64PerfectShuffle.h | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-uzp.ll | 30 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-zip.ll | 18 |
3 files changed, 49 insertions, 42 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h index 7abaead694d1..a143243a8d3b 100644 --- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h +++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h @@ -6620,11 +6620,28 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) { return (PFEntry >> 30) + 1; } -inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { +/// Return true for zip1 or zip2 masks of the form: +/// <0, 8, 1, 9, 2, 10, 3, 11> or +/// <4, 12, 5, 13, 6, 14, 7, 15> +inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) { unsigned NumElts = VT.getVectorNumElements(); if (NumElts % 2 != 0) return false; - WhichResult = (M[0] == 0 ? 0 : 1); + // Check the first non-undef element for which half to use. + unsigned WhichResult = 2; + for (unsigned i = 0; i != NumElts / 2; i++) { + if (M[i * 2] >= 0) { + WhichResult = ((unsigned)M[i * 2] == i ? 0 : 1); + break; + } else if (M[i * 2 + 1] >= 0) { + WhichResult = ((unsigned)M[i * 2 + 1] == NumElts + i ? 0 : 1); + break; + } + } + if (WhichResult == 2) + return false; + + // Check all elements match. unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { if ((M[i] >= 0 && (unsigned)M[i] != Idx) || @@ -6632,20 +6649,34 @@ inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { return false; Idx += 1; } - + WhichResultOut = WhichResult; return true; } -inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { +/// Return true for uzp1 or uzp2 masks of the form: +/// <0, 2, 4, 6, 8, 10, 12, 14> or +/// <1, 3, 5, 7, 9, 11, 13, 15> +inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) { unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); + // Check the first non-undef element for which half to use. + unsigned WhichResult = 2; + for (unsigned i = 0; i != NumElts; i++) { + if (M[i] >= 0) { + WhichResult = ((unsigned)M[i] == i * 2 ? 0 : 1); + break; + } + } + if (WhichResult == 2) + return false; + + // Check all elements match. for (unsigned i = 0; i != NumElts; ++i) { if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned)M[i] != 2 * i + WhichResult) return false; } - + WhichResultOut = WhichResult; return true; } diff --git a/llvm/test/CodeGen/AArch64/arm64-uzp.ll b/llvm/test/CodeGen/AArch64/arm64-uzp.ll index 6e01ebc95a1c..49a51d96fbc8 100644 --- a/llvm/test/CodeGen/AArch64/arm64-uzp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-uzp.ll @@ -110,13 +110,9 @@ define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vuzpQi16_undef0: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: uzp2.8h v3, v0, v1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-NEXT: add.8h v0, v0, v3 +; CHECK-NEXT: uzp1.8h v2, v0, v1 +; CHECK-NEXT: uzp2.8h v0, v0, v1 +; CHECK-NEXT: add.8h v0, v2, v0 ; CHECK-NEXT: ret %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> @@ -127,13 +123,9 @@ define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vuzpQi16_undef01: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: uzp2.8h v3, v0, v1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-NEXT: add.8h v0, v0, v3 +; CHECK-NEXT: uzp1.8h v2, v0, v1 +; CHECK-NEXT: uzp2.8h v0, v0, v1 +; CHECK-NEXT: add.8h v0, v2, v0 ; CHECK-NEXT: ret %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> @@ -144,13 +136,9 @@ define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vuzpQi16_undef012: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI10_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: uzp2.8h v3, v0, v1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-NEXT: add.8h v0, v0, v3 +; CHECK-NEXT: uzp1.8h v2, v0, v1 +; CHECK-NEXT: uzp2.8h v0, v0, v1 +; CHECK-NEXT: add.8h v0, v2, v0 ; CHECK-NEXT: ret %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 6, i32 8, i32 10, i32 12, i32 14> %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15> diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll index 349751dda461..4c771cbd2966 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zip.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll @@ -142,11 +142,7 @@ define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind { define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vzip1_undef_01: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: zip1.8h v0, v0, v1 ; CHECK-NEXT: ret %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> ret <8 x i16> %s @@ -155,11 +151,7 @@ define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vzip1_undef_0: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: zip1.8h v0, v0, v1 ; CHECK-NEXT: ret %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> ret <8 x i16> %s @@ -177,11 +169,7 @@ define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vzip1_undef_012: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI11_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: zip1.8h v0, v0, v1 ; CHECK-NEXT: ret %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11> ret <8 x i16> %s |