summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKoakuma <koachan@protonmail.com>2024-01-15 04:28:51 +0700
committerGitHub <noreply@github.com>2024-01-14 16:28:51 -0500
commit63f98290d09e1da05fb1217d1b760cbe24b76db9 (patch)
treeddb7d53229c8c157070315db9882dd746128aefe
parent72990df072a56996612169f07c5752a6924288bb (diff)
[SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target (#77196)upstream/users/koachan/main.sparc-prefer-rdpc-over-call-to-implement-getpcx-for-64-bit-target
On 64-bit target, prefer usng RDPC over CALL to get the value of %pc. This is faster on modern processors (Niagara T1 and newer) and avoids polluting the processor's predictor state. The old behavior of using a fake CALL is still done when tuning for classic UltraSPARC processors, since RDPC is much slower there. A quick pgbench test on a SPARC T4 shows about 2% speedup on SELECT loads, and about 7% speedup on INSERT/UPDATE loads.
-rw-r--r--llvm/lib/Target/Sparc/Sparc.td18
-rw-r--r--llvm/lib/Target/Sparc/SparcAsmPrinter.cpp25
-rw-r--r--llvm/test/CodeGen/SPARC/getpcx-call.ll51
-rw-r--r--llvm/test/CodeGen/SPARC/getpcx-rdpc.ll51
4 files changed, 138 insertions, 7 deletions
diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td
index 1a71cfed3128..7b1033956524 100644
--- a/llvm/lib/Target/Sparc/Sparc.td
+++ b/llvm/lib/Target/Sparc/Sparc.td
@@ -62,6 +62,13 @@ def UsePopc : SubtargetFeature<"popc", "UsePopc", "true",
def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software emulation for floating point">;
+//===----------------------------------------------------------------------===//
+// SPARC Subtarget tuning features.
+//
+
+def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true",
+ "rd %pc, %XX is slow", [FeatureV9]>;
+
//==== Features added predmoninantly for LEON subtarget support
include "LeonFeatures.td"
@@ -89,8 +96,9 @@ def SparcAsmParserVariant : AsmParserVariant {
// SPARC processors supported.
//===----------------------------------------------------------------------===//
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+class Proc<string Name, list<SubtargetFeature> Features,
+ list<SubtargetFeature> TuneFeatures = []>
+ : Processor<Name, NoItineraries, Features, TuneFeatures>;
def : Proc<"generic", []>;
def : Proc<"v7", [FeatureSoftMulDiv, FeatureNoFSMULD]>;
@@ -118,9 +126,11 @@ def : Proc<"ma2480", [FeatureLeon, LeonCASA]>;
def : Proc<"ma2485", [FeatureLeon, LeonCASA]>;
def : Proc<"ma2x8x", [FeatureLeon, LeonCASA]>;
def : Proc<"v9", [FeatureV9]>;
-def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated, FeatureVIS],
+ [TuneSlowRDPC]>;
def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS,
- FeatureVIS2]>;
+ FeatureVIS2],
+ [TuneSlowRDPC]>;
def : Proc<"niagara", [FeatureV9, FeatureV8Deprecated, FeatureVIS,
FeatureVIS2]>;
def : Proc<"niagara2", [FeatureV9, FeatureV8Deprecated, UsePopc,
diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index cca624e09267..215a8ea83190 100644
--- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -13,6 +13,7 @@
#include "MCTargetDesc/SparcInstPrinter.h"
#include "MCTargetDesc/SparcMCExpr.h"
+#include "MCTargetDesc/SparcMCTargetDesc.h"
#include "MCTargetDesc/SparcTargetStreamer.h"
#include "Sparc.h"
#include "SparcInstrInfo.h"
@@ -111,6 +112,15 @@ static void EmitCall(MCStreamer &OutStreamer,
OutStreamer.emitInstruction(CallInst, STI);
}
+static void EmitRDPC(MCStreamer &OutStreamer, MCOperand &RD,
+ const MCSubtargetInfo &STI) {
+ MCInst RDPCInst;
+ RDPCInst.setOpcode(SP::RDASR);
+ RDPCInst.addOperand(RD);
+ RDPCInst.addOperand(MCOperand::createReg(SP::ASR5));
+ OutStreamer.emitInstruction(RDPCInst, STI);
+}
+
static void EmitSETHI(MCStreamer &OutStreamer,
MCOperand &Imm, MCOperand &RD,
const MCSubtargetInfo &STI)
@@ -226,7 +236,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
MCOperand RegO7 = MCOperand::createReg(SP::O7);
// <StartLabel>:
- // call <EndLabel>
+ // <GET-PC> // This will be either `call <EndLabel>` or `rd %pc, %o7`.
// <SethiLabel>:
// sethi %hi(_GLOBAL_OFFSET_TABLE_+(<SethiLabel>-<StartLabel>)), <MO>
// <EndLabel>:
@@ -234,8 +244,17 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
// add <MO>, %o7, <MO>
OutStreamer->emitLabel(StartLabel);
- MCOperand Callee = createPCXCallOP(EndLabel, OutContext);
- EmitCall(*OutStreamer, Callee, STI);
+ if (!STI.getTargetTriple().isSPARC64() ||
+ STI.hasFeature(Sparc::TuneSlowRDPC)) {
+ MCOperand Callee = createPCXCallOP(EndLabel, OutContext);
+ EmitCall(*OutStreamer, Callee, STI);
+ } else {
+ // TODO find out whether it is possible to store PC
+ // in other registers, to enable leaf function optimization.
+ // (On the other hand, approx. over 97.8% of GETPCXes happen
+ // in non-leaf functions, so would this be worth the effort?)
+ EmitRDPC(*OutStreamer, RegO7, STI);
+ }
OutStreamer->emitLabel(SethiLabel);
MCOperand hiImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC22,
GOTLabel, StartLabel, SethiLabel,
diff --git a/llvm/test/CodeGen/SPARC/getpcx-call.ll b/llvm/test/CodeGen/SPARC/getpcx-call.ll
new file mode 100644
index 000000000000..72d7b5a0bc2f
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/getpcx-call.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -relocation-model=pic -mtriple=sparc | FileCheck --check-prefix=SPARC %s
+; RUN: llc < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
+
+;; SPARC32 and SPARC64 for classic UltraSPARCs implement GETPCX
+;; with a fake `call`.
+;; All other SPARC64 targets implement it with `rd %pc, %o7`.
+;; Need to do the tests in separate files because apparently `tune-cpu`
+;; attribute applies to the entire file at once.
+
+@value = external global i32
+
+define i32 @testCall() nounwind #0 {
+; SPARC-LABEL: testCall:
+; SPARC: ! %bb.0:
+; SPARC-NEXT: save %sp, -96, %sp
+; SPARC-NEXT: .Ltmp0:
+; SPARC-NEXT: call .Ltmp1
+; SPARC-NEXT: .Ltmp2:
+; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC-NEXT: .Ltmp1:
+; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC-NEXT: add %i0, %o7, %i0
+; SPARC-NEXT: sethi %hi(value), %i1
+; SPARC-NEXT: add %i1, %lo(value), %i1
+; SPARC-NEXT: ld [%i0+%i1], %i0
+; SPARC-NEXT: ld [%i0], %i0
+; SPARC-NEXT: ret
+; SPARC-NEXT: restore
+;
+; SPARC64-LABEL: testCall:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -128, %sp
+; SPARC64-NEXT: .Ltmp0:
+; SPARC64-NEXT: call .Ltmp1
+; SPARC64-NEXT: .Ltmp2:
+; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC64-NEXT: .Ltmp1:
+; SPARC64-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC64-NEXT: add %i0, %o7, %i0
+; SPARC64-NEXT: sethi %hi(value), %i1
+; SPARC64-NEXT: add %i1, %lo(value), %i1
+; SPARC64-NEXT: ldx [%i0+%i1], %i0
+; SPARC64-NEXT: ld [%i0], %i0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %1 = load i32, ptr @value
+ ret i32 %1
+}
+
+attributes #0 = { "tune-cpu"="ultrasparc" }
diff --git a/llvm/test/CodeGen/SPARC/getpcx-rdpc.ll b/llvm/test/CodeGen/SPARC/getpcx-rdpc.ll
new file mode 100644
index 000000000000..286750a014e8
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/getpcx-rdpc.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -relocation-model=pic -mtriple=sparc | FileCheck --check-prefix=SPARC %s
+; RUN: llc < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
+
+;; SPARC32 and SPARC64 for classic UltraSPARCs implement GETPCX
+;; with a fake `call`.
+;; All other SPARC64 targets implement it with `rd %pc, %o7`.
+;; Need to do the tests in separate files because apparently `tune-cpu`
+;; attribute applies to the entire file at once.
+
+@value = external global i32
+
+define i32 @testRdpc() nounwind #0 {
+; SPARC-LABEL: testRdpc:
+; SPARC: ! %bb.0:
+; SPARC-NEXT: save %sp, -96, %sp
+; SPARC-NEXT: .Ltmp0:
+; SPARC-NEXT: call .Ltmp1
+; SPARC-NEXT: .Ltmp2:
+; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC-NEXT: .Ltmp1:
+; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC-NEXT: add %i0, %o7, %i0
+; SPARC-NEXT: sethi %hi(value), %i1
+; SPARC-NEXT: add %i1, %lo(value), %i1
+; SPARC-NEXT: ld [%i0+%i1], %i0
+; SPARC-NEXT: ld [%i0], %i0
+; SPARC-NEXT: ret
+; SPARC-NEXT: restore
+;
+; SPARC64-LABEL: testRdpc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -128, %sp
+; SPARC64-NEXT: .Ltmp0:
+; SPARC64-NEXT: rd %pc, %o7
+; SPARC64-NEXT: .Ltmp2:
+; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC64-NEXT: .Ltmp1:
+; SPARC64-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC64-NEXT: add %i0, %o7, %i0
+; SPARC64-NEXT: sethi %hi(value), %i1
+; SPARC64-NEXT: add %i1, %lo(value), %i1
+; SPARC64-NEXT: ldx [%i0+%i1], %i0
+; SPARC64-NEXT: ld [%i0], %i0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %1 = load i32, ptr @value
+ ret i32 %1
+}
+
+attributes #0 = { "tune-cpu"="niagara" }