diff options
author | Koakuma <koachan@protonmail.com> | 2024-01-15 04:28:51 +0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-14 16:28:51 -0500 |
commit | 63f98290d09e1da05fb1217d1b760cbe24b76db9 (patch) | |
tree | ddb7d53229c8c157070315db9882dd746128aefe | |
parent | 72990df072a56996612169f07c5752a6924288bb (diff) |
[SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target (#77196)upstream/users/koachan/main.sparc-prefer-rdpc-over-call-to-implement-getpcx-for-64-bit-target
On 64-bit target, prefer usng RDPC over CALL to get the value of %pc.
This is faster on modern processors (Niagara T1 and newer) and avoids polluting
the processor's predictor state.
The old behavior of using a fake CALL is still done when tuning for classic
UltraSPARC processors, since RDPC is much slower there.
A quick pgbench test on a SPARC T4 shows about 2% speedup on SELECT loads,
and about 7% speedup on INSERT/UPDATE loads.
-rw-r--r-- | llvm/lib/Target/Sparc/Sparc.td | 18 | ||||
-rw-r--r-- | llvm/lib/Target/Sparc/SparcAsmPrinter.cpp | 25 | ||||
-rw-r--r-- | llvm/test/CodeGen/SPARC/getpcx-call.ll | 51 | ||||
-rw-r--r-- | llvm/test/CodeGen/SPARC/getpcx-rdpc.ll | 51 |
4 files changed, 138 insertions, 7 deletions
diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td index 1a71cfed3128..7b1033956524 100644 --- a/llvm/lib/Target/Sparc/Sparc.td +++ b/llvm/lib/Target/Sparc/Sparc.td @@ -62,6 +62,13 @@ def UsePopc : SubtargetFeature<"popc", "UsePopc", "true", def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", "Use software emulation for floating point">; +//===----------------------------------------------------------------------===// +// SPARC Subtarget tuning features. +// + +def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true", + "rd %pc, %XX is slow", [FeatureV9]>; + //==== Features added predmoninantly for LEON subtarget support include "LeonFeatures.td" @@ -89,8 +96,9 @@ def SparcAsmParserVariant : AsmParserVariant { // SPARC processors supported. //===----------------------------------------------------------------------===// -class Proc<string Name, list<SubtargetFeature> Features> - : Processor<Name, NoItineraries, Features>; +class Proc<string Name, list<SubtargetFeature> Features, + list<SubtargetFeature> TuneFeatures = []> + : Processor<Name, NoItineraries, Features, TuneFeatures>; def : Proc<"generic", []>; def : Proc<"v7", [FeatureSoftMulDiv, FeatureNoFSMULD]>; @@ -118,9 +126,11 @@ def : Proc<"ma2480", [FeatureLeon, LeonCASA]>; def : Proc<"ma2485", [FeatureLeon, LeonCASA]>; def : Proc<"ma2x8x", [FeatureLeon, LeonCASA]>; def : Proc<"v9", [FeatureV9]>; -def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>; +def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated, FeatureVIS], + [TuneSlowRDPC]>; def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS, - FeatureVIS2]>; + FeatureVIS2], + [TuneSlowRDPC]>; def : Proc<"niagara", [FeatureV9, FeatureV8Deprecated, FeatureVIS, FeatureVIS2]>; def : Proc<"niagara2", [FeatureV9, FeatureV8Deprecated, UsePopc, diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index cca624e09267..215a8ea83190 100644 --- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/SparcInstPrinter.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "MCTargetDesc/SparcMCTargetDesc.h" #include "MCTargetDesc/SparcTargetStreamer.h" #include "Sparc.h" #include "SparcInstrInfo.h" @@ -111,6 +112,15 @@ static void EmitCall(MCStreamer &OutStreamer, OutStreamer.emitInstruction(CallInst, STI); } +static void EmitRDPC(MCStreamer &OutStreamer, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst RDPCInst; + RDPCInst.setOpcode(SP::RDASR); + RDPCInst.addOperand(RD); + RDPCInst.addOperand(MCOperand::createReg(SP::ASR5)); + OutStreamer.emitInstruction(RDPCInst, STI); +} + static void EmitSETHI(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD, const MCSubtargetInfo &STI) @@ -226,7 +236,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, MCOperand RegO7 = MCOperand::createReg(SP::O7); // <StartLabel>: - // call <EndLabel> + // <GET-PC> // This will be either `call <EndLabel>` or `rd %pc, %o7`. // <SethiLabel>: // sethi %hi(_GLOBAL_OFFSET_TABLE_+(<SethiLabel>-<StartLabel>)), <MO> // <EndLabel>: @@ -234,8 +244,17 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, // add <MO>, %o7, <MO> OutStreamer->emitLabel(StartLabel); - MCOperand Callee = createPCXCallOP(EndLabel, OutContext); - EmitCall(*OutStreamer, Callee, STI); + if (!STI.getTargetTriple().isSPARC64() || + STI.hasFeature(Sparc::TuneSlowRDPC)) { + MCOperand Callee = createPCXCallOP(EndLabel, OutContext); + EmitCall(*OutStreamer, Callee, STI); + } else { + // TODO find out whether it is possible to store PC + // in other registers, to enable leaf function optimization. + // (On the other hand, approx. over 97.8% of GETPCXes happen + // in non-leaf functions, so would this be worth the effort?) + EmitRDPC(*OutStreamer, RegO7, STI); + } OutStreamer->emitLabel(SethiLabel); MCOperand hiImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC22, GOTLabel, StartLabel, SethiLabel, diff --git a/llvm/test/CodeGen/SPARC/getpcx-call.ll b/llvm/test/CodeGen/SPARC/getpcx-call.ll new file mode 100644 index 000000000000..72d7b5a0bc2f --- /dev/null +++ b/llvm/test/CodeGen/SPARC/getpcx-call.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -relocation-model=pic -mtriple=sparc | FileCheck --check-prefix=SPARC %s +; RUN: llc < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s + +;; SPARC32 and SPARC64 for classic UltraSPARCs implement GETPCX +;; with a fake `call`. +;; All other SPARC64 targets implement it with `rd %pc, %o7`. +;; Need to do the tests in separate files because apparently `tune-cpu` +;; attribute applies to the entire file at once. + +@value = external global i32 + +define i32 @testCall() nounwind #0 { +; SPARC-LABEL: testCall: +; SPARC: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .Ltmp0: +; SPARC-NEXT: call .Ltmp1 +; SPARC-NEXT: .Ltmp2: +; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0 +; SPARC-NEXT: .Ltmp1: +; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0 +; SPARC-NEXT: add %i0, %o7, %i0 +; SPARC-NEXT: sethi %hi(value), %i1 +; SPARC-NEXT: add %i1, %lo(value), %i1 +; SPARC-NEXT: ld [%i0+%i1], %i0 +; SPARC-NEXT: ld [%i0], %i0 +; SPARC-NEXT: ret +; SPARC-NEXT: restore +; +; SPARC64-LABEL: testCall: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: save %sp, -128, %sp +; SPARC64-NEXT: .Ltmp0: +; SPARC64-NEXT: call .Ltmp1 +; SPARC64-NEXT: .Ltmp2: +; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0 +; SPARC64-NEXT: .Ltmp1: +; SPARC64-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0 +; SPARC64-NEXT: add %i0, %o7, %i0 +; SPARC64-NEXT: sethi %hi(value), %i1 +; SPARC64-NEXT: add %i1, %lo(value), %i1 +; SPARC64-NEXT: ldx [%i0+%i1], %i0 +; SPARC64-NEXT: ld [%i0], %i0 +; SPARC64-NEXT: ret +; SPARC64-NEXT: restore + %1 = load i32, ptr @value + ret i32 %1 +} + +attributes #0 = { "tune-cpu"="ultrasparc" } diff --git a/llvm/test/CodeGen/SPARC/getpcx-rdpc.ll b/llvm/test/CodeGen/SPARC/getpcx-rdpc.ll new file mode 100644 index 000000000000..286750a014e8 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/getpcx-rdpc.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -relocation-model=pic -mtriple=sparc | FileCheck --check-prefix=SPARC %s +; RUN: llc < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s + +;; SPARC32 and SPARC64 for classic UltraSPARCs implement GETPCX +;; with a fake `call`. +;; All other SPARC64 targets implement it with `rd %pc, %o7`. +;; Need to do the tests in separate files because apparently `tune-cpu` +;; attribute applies to the entire file at once. + +@value = external global i32 + +define i32 @testRdpc() nounwind #0 { +; SPARC-LABEL: testRdpc: +; SPARC: ! %bb.0: +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .Ltmp0: +; SPARC-NEXT: call .Ltmp1 +; SPARC-NEXT: .Ltmp2: +; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0 +; SPARC-NEXT: .Ltmp1: +; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0 +; SPARC-NEXT: add %i0, %o7, %i0 +; SPARC-NEXT: sethi %hi(value), %i1 +; SPARC-NEXT: add %i1, %lo(value), %i1 +; SPARC-NEXT: ld [%i0+%i1], %i0 +; SPARC-NEXT: ld [%i0], %i0 +; SPARC-NEXT: ret +; SPARC-NEXT: restore +; +; SPARC64-LABEL: testRdpc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: save %sp, -128, %sp +; SPARC64-NEXT: .Ltmp0: +; SPARC64-NEXT: rd %pc, %o7 +; SPARC64-NEXT: .Ltmp2: +; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0 +; SPARC64-NEXT: .Ltmp1: +; SPARC64-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0 +; SPARC64-NEXT: add %i0, %o7, %i0 +; SPARC64-NEXT: sethi %hi(value), %i1 +; SPARC64-NEXT: add %i1, %lo(value), %i1 +; SPARC64-NEXT: ldx [%i0+%i1], %i0 +; SPARC64-NEXT: ld [%i0], %i0 +; SPARC64-NEXT: ret +; SPARC64-NEXT: restore + %1 = load i32, ptr @value + ret i32 %1 +} + +attributes #0 = { "tune-cpu"="niagara" } |