summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/AArch64/AArch64FalkorHWPFFix.cpp101
-rw-r--r--test/CodeGen/AArch64/falkor-hwpf-fix.mir277
2 files changed, 318 insertions, 60 deletions
diff --git a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index 0627976c9146..49f5c51d4120 100644
--- a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -220,27 +220,27 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
default:
return None;
+ case AArch64::LD1i64:
+ case AArch64::LD2i64:
+ DestRegIdx = 0;
+ BaseRegIdx = 3;
+ OffsetIdx = -1;
+ IsPrePost = false;
+ break;
+
case AArch64::LD1i8:
case AArch64::LD1i16:
case AArch64::LD1i32:
- case AArch64::LD1i64:
case AArch64::LD2i8:
case AArch64::LD2i16:
case AArch64::LD2i32:
- case AArch64::LD2i64:
case AArch64::LD3i8:
case AArch64::LD3i16:
case AArch64::LD3i32:
+ case AArch64::LD3i64:
case AArch64::LD4i8:
case AArch64::LD4i16:
case AArch64::LD4i32:
- DestRegIdx = 0;
- BaseRegIdx = 3;
- OffsetIdx = -1;
- IsPrePost = false;
- break;
-
- case AArch64::LD3i64:
case AArch64::LD4i64:
DestRegIdx = -1;
BaseRegIdx = 3;
@@ -264,23 +264,16 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
case AArch64::LD1Rv4s:
case AArch64::LD1Rv8h:
case AArch64::LD1Rv16b:
- case AArch64::LD1Twov1d:
- case AArch64::LD1Twov2s:
- case AArch64::LD1Twov4h:
- case AArch64::LD1Twov8b:
- case AArch64::LD2Twov2s:
- case AArch64::LD2Twov4s:
- case AArch64::LD2Twov8b:
- case AArch64::LD2Rv1d:
- case AArch64::LD2Rv2s:
- case AArch64::LD2Rv4s:
- case AArch64::LD2Rv8b:
DestRegIdx = 0;
BaseRegIdx = 1;
OffsetIdx = -1;
IsPrePost = false;
break;
+ case AArch64::LD1Twov1d:
+ case AArch64::LD1Twov2s:
+ case AArch64::LD1Twov4h:
+ case AArch64::LD1Twov8b:
case AArch64::LD1Twov2d:
case AArch64::LD1Twov4s:
case AArch64::LD1Twov8h:
@@ -301,10 +294,17 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
case AArch64::LD1Fourv4s:
case AArch64::LD1Fourv8h:
case AArch64::LD1Fourv16b:
+ case AArch64::LD2Twov2s:
+ case AArch64::LD2Twov4s:
+ case AArch64::LD2Twov8b:
case AArch64::LD2Twov2d:
case AArch64::LD2Twov4h:
case AArch64::LD2Twov8h:
case AArch64::LD2Twov16b:
+ case AArch64::LD2Rv1d:
+ case AArch64::LD2Rv2s:
+ case AArch64::LD2Rv4s:
+ case AArch64::LD2Rv8b:
case AArch64::LD2Rv2d:
case AArch64::LD2Rv4h:
case AArch64::LD2Rv8h:
@@ -345,32 +345,32 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
IsPrePost = false;
break;
+ case AArch64::LD1i64_POST:
+ case AArch64::LD2i64_POST:
+ DestRegIdx = 1;
+ BaseRegIdx = 4;
+ OffsetIdx = 5;
+ IsPrePost = true;
+ break;
+
case AArch64::LD1i8_POST:
case AArch64::LD1i16_POST:
case AArch64::LD1i32_POST:
- case AArch64::LD1i64_POST:
case AArch64::LD2i8_POST:
case AArch64::LD2i16_POST:
case AArch64::LD2i32_POST:
- case AArch64::LD2i64_POST:
case AArch64::LD3i8_POST:
case AArch64::LD3i16_POST:
case AArch64::LD3i32_POST:
+ case AArch64::LD3i64_POST:
case AArch64::LD4i8_POST:
case AArch64::LD4i16_POST:
case AArch64::LD4i32_POST:
- DestRegIdx = 1;
- BaseRegIdx = 4;
- OffsetIdx = 5;
- IsPrePost = false;
- break;
-
- case AArch64::LD3i64_POST:
case AArch64::LD4i64_POST:
DestRegIdx = -1;
BaseRegIdx = 4;
OffsetIdx = 5;
- IsPrePost = false;
+ IsPrePost = true;
break;
case AArch64::LD1Onev1d_POST:
@@ -389,23 +389,16 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
case AArch64::LD1Rv4s_POST:
case AArch64::LD1Rv8h_POST:
case AArch64::LD1Rv16b_POST:
- case AArch64::LD1Twov1d_POST:
- case AArch64::LD1Twov2s_POST:
- case AArch64::LD1Twov4h_POST:
- case AArch64::LD1Twov8b_POST:
- case AArch64::LD2Twov2s_POST:
- case AArch64::LD2Twov4s_POST:
- case AArch64::LD2Twov8b_POST:
- case AArch64::LD2Rv1d_POST:
- case AArch64::LD2Rv2s_POST:
- case AArch64::LD2Rv4s_POST:
- case AArch64::LD2Rv8b_POST:
DestRegIdx = 1;
BaseRegIdx = 2;
OffsetIdx = 3;
- IsPrePost = false;
+ IsPrePost = true;
break;
+ case AArch64::LD1Twov1d_POST:
+ case AArch64::LD1Twov2s_POST:
+ case AArch64::LD1Twov4h_POST:
+ case AArch64::LD1Twov8b_POST:
case AArch64::LD1Twov2d_POST:
case AArch64::LD1Twov4s_POST:
case AArch64::LD1Twov8h_POST:
@@ -426,10 +419,17 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
case AArch64::LD1Fourv4s_POST:
case AArch64::LD1Fourv8h_POST:
case AArch64::LD1Fourv16b_POST:
+ case AArch64::LD2Twov2s_POST:
+ case AArch64::LD2Twov4s_POST:
+ case AArch64::LD2Twov8b_POST:
case AArch64::LD2Twov2d_POST:
case AArch64::LD2Twov4h_POST:
case AArch64::LD2Twov8h_POST:
case AArch64::LD2Twov16b_POST:
+ case AArch64::LD2Rv1d_POST:
+ case AArch64::LD2Rv2s_POST:
+ case AArch64::LD2Rv4s_POST:
+ case AArch64::LD2Rv8b_POST:
case AArch64::LD2Rv2d_POST:
case AArch64::LD2Rv4h_POST:
case AArch64::LD2Rv8h_POST:
@@ -467,7 +467,7 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
DestRegIdx = -1;
BaseRegIdx = 2;
OffsetIdx = 3;
- IsPrePost = false;
+ IsPrePost = true;
break;
case AArch64::LDRBBroW:
@@ -572,16 +572,19 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
IsPrePost = true;
break;
+ case AArch64::LDNPDi:
+ case AArch64::LDNPQi:
+ case AArch64::LDNPSi:
case AArch64::LDPQi:
+ case AArch64::LDPDi:
+ case AArch64::LDPSi:
DestRegIdx = -1;
BaseRegIdx = 2;
OffsetIdx = 3;
IsPrePost = false;
break;
- case AArch64::LDPDi:
case AArch64::LDPSWi:
- case AArch64::LDPSi:
case AArch64::LDPWi:
case AArch64::LDPXi:
DestRegIdx = 0;
@@ -592,18 +595,18 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
case AArch64::LDPQpost:
case AArch64::LDPQpre:
+ case AArch64::LDPDpost:
+ case AArch64::LDPDpre:
+ case AArch64::LDPSpost:
+ case AArch64::LDPSpre:
DestRegIdx = -1;
BaseRegIdx = 3;
OffsetIdx = 4;
IsPrePost = true;
break;
- case AArch64::LDPDpost:
- case AArch64::LDPDpre:
case AArch64::LDPSWpost:
case AArch64::LDPSWpre:
- case AArch64::LDPSpost:
- case AArch64::LDPSpre:
case AArch64::LDPWpost:
case AArch64::LDPWpre:
case AArch64::LDPXpost:
diff --git a/test/CodeGen/AArch64/falkor-hwpf-fix.mir b/test/CodeGen/AArch64/falkor-hwpf-fix.mir
index 54c8b16a9b43..298e8a0c6d7b 100644
--- a/test/CodeGen/AArch64/falkor-hwpf-fix.mir
+++ b/test/CodeGen/AArch64/falkor-hwpf-fix.mir
@@ -1,12 +1,7 @@
# RUN: llc -mtriple=aarch64-linux-gnu -mcpu=falkor -run-pass falkor-hwpf-fix-late -o - %s | FileCheck %s
---- |
- @g = external global i32
-
- define void @hwpf1() { ret void }
- define void @hwpf2() { ret void }
-...
---
-# Verify that the tag collision between the loads is resolved.
+# Verify that the tag collision between the loads is resolved for various load opcodes.
+
# CHECK-LABEL: name: hwpf1
# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
# CHECK: LDRWui %[[BASE]], 0
@@ -17,7 +12,7 @@ body: |
bb.0:
liveins: %w0, %x1
- %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4 from @g)
+ %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4)
%w2 = LDRWui %x1, 1
%w0 = SUBWri %w0, 1, 0
@@ -28,19 +23,147 @@ body: |
RET_ReallyLR
...
---
-# Verify that the tag collision between the loads is resolved and written back for post increment addressing.
# CHECK-LABEL: name: hwpf2
# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i64 %q2, 0, %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %q2 = LD1i64 %q2, 0, %x1 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf3
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i8 %q2, 0, %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %q2 = LD1i8 %q2, 0, %x1 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf4
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Onev1d %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %d2 = LD1Onev1d %x1 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf5
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Twov1d %[[BASE]]
+# CHECK: LDRWui %x1, 0
+name: hwpf5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %d2_d3 = LD1Twov1d %x1 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf6
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPQi %[[BASE]]
+# CHECK: LDRWui %x1, 3
+name: hwpf6
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %q2, %q3 = LDPQi %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 3
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpf7
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPXi %[[BASE]]
+# CHECK: LDRWui %x1, 2
+name: hwpf7
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1
+
+ %x2, %x3 = LDPXi %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 2
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# Verify that the tag collision between the loads is resolved and written back
+# for post increment addressing for various load opcodes.
+
+# CHECK-LABEL: name: hwpfinc1
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
# CHECK: LDRWpost %[[BASE]], 0
# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
# CHECK: LDRWui %x1, 1
-name: hwpf2
+name: hwpfinc1
tracksRegLiveness: true
body: |
bb.0:
liveins: %w0, %x1
- %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4 from @g)
+ %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4)
%w2 = LDRWui %x1, 1
%w0 = SUBWri %w0, 1, 0
@@ -50,3 +173,135 @@ body: |
bb.1:
RET_ReallyLR
...
+---
+# CHECK-LABEL: name: hwpfinc2
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i64_POST %q2, 0, %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 1
+name: hwpfinc2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %x1, %q2 = LD1i64_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 132
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc3
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1i8_POST %q2, 0, %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 132
+name: hwpfinc3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %x1, %q2 = LD1i8_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWui %x1, 132
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc4
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD1Rv1d_POST %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x1, 252
+name: hwpfinc4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %q2
+
+ %x1, %d2 = LD1Rv1d_POST %x1, %xzr :: ("aarch64-strided-access" load 4)
+ %w2 = LDRWui %x1, 252
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc5
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LD3Threev2s_POST %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWroX %x17, %x0
+name: hwpfinc5
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x17, %q2
+
+ %x1, %d2_d3_d4 = LD3Threev2s_POST %x1, %x0 :: ("aarch64-strided-access" load 4)
+ %w0 = LDRWroX %x17, %x0, 0, 0
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc6
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPDpost %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x17, 2
+name: hwpfinc6
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x17, %q2
+
+ %x1, %d2, %d3 = LDPDpost %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w16 = LDRWui %x17, 2
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...
+---
+# CHECK-LABEL: name: hwpfinc7
+# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
+# CHECK: LDPXpost %[[BASE]]
+# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
+# CHECK: LDRWui %x17, 2
+name: hwpfinc7
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: %w0, %x1, %x17, %q2
+
+ %x1, %x2, %x3 = LDPXpost %x1, 3 :: ("aarch64-strided-access" load 4)
+ %w18 = LDRWui %x17, 2
+
+ %w0 = SUBWri %w0, 1, 0
+ %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
+ Bcc 9, %bb.0, implicit %nzcv
+
+ bb.1:
+ RET_ReallyLR
+...