From 369081c54e43d1e954de5668914ec8580f48904a Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Tue, 5 Mar 2024 22:45:21 +0000 Subject: =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20changes=20to?= =?UTF-8?q?=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 [skip ci] --- llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 12 ++ llvm/lib/Target/X86/X86AsmPrinter.cpp | 6 + llvm/lib/Target/X86/X86ISelLowering.cpp | 205 +++++++++++++++++++------ llvm/lib/Target/X86/X86InstrInfo.cpp | 4 + llvm/lib/Target/X86/X86MCInstLower.cpp | 6 + llvm/test/CodeGen/X86/tlsdesc-dynamic.ll | 74 +++++++++ llvm/test/CodeGen/X86/tlsdesc.ll | 18 +++ 7 files changed, 276 insertions(+), 49 deletions(-) create mode 100644 llvm/test/CodeGen/X86/tlsdesc-dynamic.ll create mode 100644 llvm/test/CodeGen/X86/tlsdesc.ll diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 28a067d525e0..cdb0d87088c3 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -431,6 +431,18 @@ enum TOF { /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TLSLDM MO_TLSLDM, + /// MO_TLSDESC - On a symbol operand this indicates that the immediate is + /// the offset of the GOT entry with the TLS index for the module that + /// contains the symbol. When this index is passed to a call to + /// the resolver function, it will return the offset from the thread pointer. + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TLSDESC + MO_TLSDESC, + /// MO_TLSCALL - On a symbol operand this indicates this call to + /// the resolver function, it will return the offset from the thread pointer. + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TLSCALL + MO_TLSCALL, /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is /// the offset of the GOT entry with the thread-pointer offset for the /// symbol. Used in the x86-64 initial exec TLS access model. diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 3395a13545e4..a69c4bcb71a7 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -271,6 +271,12 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, case X86II::MO_TLSGD: O << "@TLSGD"; break; case X86II::MO_TLSLD: O << "@TLSLD"; break; case X86II::MO_TLSLDM: O << "@TLSLDM"; break; + case X86II::MO_TLSDESC: + O << "@TLSDESC"; + break; + case X86II::MO_TLSCALL: + O << "@TLSCALL"; + break; case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break; case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break; case X86II::MO_TPOFF: O << "@TPOFF"; break; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6eaaec407dbb..b527e6e27871 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "X86ISelLowering.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "MCTargetDesc/X86ShuffleDecode.h" #include "X86.h" #include "X86CallingConv.h" @@ -18622,6 +18624,47 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, X86::RAX, X86II::MO_TLSGD); } +static SDValue LowerToTLSDESC(SDValue Sym, SelectionDAG &DAG, const EVT PtrVT, + bool Is64Bit) { + + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = DAG.getEntryNode(); + SDValue Target; + SDLoc DL(Sym); + + if (const auto *GA = dyn_cast(Sym)) { + Target = + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, GA->getValueType(0), + GA->getOffset(), X86II::MO_TLSDESC); + } else { + const auto *ES = cast(Sym); + Target = + DAG.getTargetExternalSymbol(ES->getSymbol(), PtrVT, X86II::MO_TLSDESC); + } + + SDValue Offset = DAG.getNode(X86ISD::Wrapper, DL, PtrVT, Target); + SDValue Ops[] = {Chain, Offset}; + Chain = DAG.getNode(X86ISD::TLSCALL, Target, NodeTys, Ops); + + // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. + MFI.setAdjustsStack(true); + MFI.setHasCalls(true); + unsigned ReturnReg = Is64Bit ? X86::RAX : X86::EAX; + + SDValue Glue = Chain.getValue(1); + Offset = DAG.getCopyFromReg(Chain, DL, ReturnReg, PtrVT, Glue); + // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit). + Value *Ptr = Constant::getNullValue( + PointerType::get(*DAG.getContext(), Is64Bit ? 257 : 256)); + + SDValue ThreadPointer = + DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), DAG.getIntPtrConstant(0, DL), + MachinePointerInfo(Ptr)); + + return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset); +} + // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32 static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG, @@ -18641,7 +18684,12 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, MFI->incNumLocalDynamicTLSAccesses(); SDValue Base; - if (Is64Bit) { + + if (DAG.getTarget().useTLSDESC()) { + SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, + X86II::MO_TLSDESC); + Base = LowerToTLSDESC(SymAddr, DAG, PtrVT, Is64Bit); + } else if (Is64Bit) { unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX; Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, X86II::MO_TLSLD, /*LocalDynamic=*/true); @@ -18740,6 +18788,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { TLSModel::Model model = DAG.getTarget().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: + if (DAG.getTarget().useTLSDESC()) + return LowerToTLSDESC(Op, DAG, PtrVT, Subtarget.is64Bit()); if (Subtarget.is64Bit()) { if (Subtarget.isTarget64BitLP64()) return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); @@ -35203,60 +35253,117 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI, MachineBasicBlock * X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const { - // This is pretty easy. We're taking the value that we received from - // our load from the relocation, sticking it in either RDI (x86-64) - // or EAX and doing an indirect call. The return value will then - // be in the normal return register. MachineFunction *F = BB->getParent(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); const MIMetadata MIMD(MI); - assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?"); - assert(MI.getOperand(3).isGlobal() && "This should be a global"); + assert((Subtarget.isTargetDarwin() || Subtarget.isTargetELF()) && + "Incompatible Target for TLSCALL"); + assert((MI.getOperand(3).isGlobal() || MI.getOperand(3).isSymbol()) && + "This should be a global or symbol"); - // Get a register mask for the lowered call. - // FIXME: The 32-bit calls have non-standard calling conventions. Use a - // proper register mask. - const uint32_t *RegMask = - Subtarget.is64Bit() ? - Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() : - Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); - if (Subtarget.is64Bit()) { - MachineInstrBuilder MIB = - BuildMI(*BB, MI, MIMD, TII->get(X86::MOV64rm), X86::RDI) - .addReg(X86::RIP) - .addImm(0) - .addReg(0) - .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, - MI.getOperand(3).getTargetFlags()) - .addReg(0); - MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL64m)); - addDirectMem(MIB, X86::RDI); - MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask); - } else if (!isPositionIndependent()) { - MachineInstrBuilder MIB = - BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX) - .addReg(0) - .addImm(0) - .addReg(0) - .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, - MI.getOperand(3).getTargetFlags()) - .addReg(0); - MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m)); - addDirectMem(MIB, X86::EAX); - MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); + if (Subtarget.isTargetDarwin()) { + // This is pretty easy. We're taking the value that we received from + // our load from the relocation, sticking it in either RDI (x86-64) + // or EAX and doing an indirect call. The return value will then + // be in the normal return register. + + // Get a register mask for the lowered call. + // FIXME: The 32-bit calls have non-standard calling conventions. Use a + // proper register mask. + const uint32_t *RegMask = + Subtarget.is64Bit() + ? Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() + : Subtarget.getRegisterInfo()->getCallPreservedMask(*F, + CallingConv::C); + if (Subtarget.is64Bit()) { + MachineInstrBuilder MIB = + BuildMI(*BB, MI, MIMD, TII->get(X86::MOV64rm), X86::RDI) + .addReg(X86::RIP) + .addImm(0) + .addReg(0) + .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, + MI.getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL64m)); + addDirectMem(MIB, X86::RDI); + MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask); + } else if (!isPositionIndependent()) { + MachineInstrBuilder MIB = + BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX) + .addReg(0) + .addImm(0) + .addReg(0) + .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, + MI.getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m)); + addDirectMem(MIB, X86::EAX); + MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); + } else { + MachineInstrBuilder MIB = + BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX) + .addReg(TII->getGlobalBaseReg(F)) + .addImm(0) + .addReg(0) + .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, + MI.getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m)); + addDirectMem(MIB, X86::EAX); + MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); + } } else { - MachineInstrBuilder MIB = - BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX) - .addReg(TII->getGlobalBaseReg(F)) - .addImm(0) - .addReg(0) - .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, - MI.getOperand(3).getTargetFlags()) - .addReg(0); - MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m)); - addDirectMem(MIB, X86::EAX); - MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); + // For ELF TLS Descriptors, we create the correct ELF relocations so that + // we generate: + // lea a@tlsdesc(%rip), %rax + // call *a@tlsdesc(%rax) + // The TLSCALL will call a resolver function with a custom ABI that + // preserves all registers, and places the offset from the thread pointer + // in %rax/%eax. + + // Get a register mask for the lowered call. + const uint32_t *RegMask = Subtarget.getRegisterInfo()->getCallPreservedMask( + *F, CallingConv::PreserveAll); + const unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; + const unsigned IP = Subtarget.is64Bit() ? X86::RIP : X86::IP; + const auto LoadOp = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r; + const auto CallOp = Subtarget.is64Bit() ? X86::CALL64m : X86::CALL32m; + + MachineOperand Sym = MI.getOperand(3); + + MachineInstrBuilder MIB; + if (Sym.isGlobal()) { + MIB = BuildMI(*BB, MI, MIMD, TII->get(LoadOp), Reg) + .addReg(IP) + .addImm(0) + .addReg(0) + .addGlobalAddress(Sym.getGlobal(), 0, Sym.getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, MIMD, TII->get(CallOp)) + .addReg(Reg) + .addImm(1) + .addReg(0) + .addGlobalAddress(Sym.getGlobal(), 0, X86II::MO_TLSCALL) + .addReg(0); + + } else { + MIB = BuildMI(*BB, MI, MIMD, TII->get(LoadOp), Reg) + .addReg(IP) + .addImm(0) + .addReg(0) + .add({Sym}) + .addReg(0); + Sym.setTargetFlags(X86II::MO_TLSCALL); + MIB = BuildMI(*BB, MI, MIMD, TII->get(CallOp)) + .addReg(Reg) + .addImm(1) + .addReg(0) + .add({Sym}) + .addReg(0); + } + + MIB.addReg(Reg, RegState::ImplicitDefine).addRegMask(RegMask); } MI.eraseFromParent(); // The pseudo instruction is gone now. diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 25be1e2f6833..629e3edd2cf9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -10088,6 +10088,8 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { {MO_TLSGD, "x86-tlsgd"}, {MO_TLSLD, "x86-tlsld"}, {MO_TLSLDM, "x86-tlsldm"}, + {MO_TLSDESC, "x86-tlsdesc"}, + {MO_TLSCALL, "x86-tlscall"}, {MO_GOTTPOFF, "x86-gottpoff"}, {MO_INDNTPOFF, "x86-indntpoff"}, {MO_TPOFF, "x86-tpoff"}, @@ -10241,6 +10243,8 @@ struct LDTLSCleanup : public MachineFunctionPass { switch (I->getOpcode()) { case X86::TLS_base_addr32: case X86::TLS_base_addr64: + case X86::TLSCall_32: + case X86::TLSCall_64: if (TLSBaseAddrReg) I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg); else diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index d3b7d97a83ca..de5dedde8cad 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -257,6 +257,12 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break; + case X86II::MO_TLSDESC: + RefKind = MCSymbolRefExpr::VK_TLSDESC; + break; + case X86II::MO_TLSCALL: + RefKind = MCSymbolRefExpr::VK_TLSCALL; + break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; diff --git a/llvm/test/CodeGen/X86/tlsdesc-dynamic.ll b/llvm/test/CodeGen/X86/tlsdesc-dynamic.ll new file mode 100644 index 000000000000..8f855a5efc54 --- /dev/null +++ b/llvm/test/CodeGen/X86/tlsdesc-dynamic.ll @@ -0,0 +1,74 @@ +; RUN: llc -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-tlsdesc %s -o - | FileCheck %s --check-prefixes=GD +; RUN: llc -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-tlsdesc -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefixes=GD-RELOC %s + +@general_dynamic_var = external thread_local global i32 + +define i32 @test_generaldynamic() { + %val = load i32, ptr @general_dynamic_var + ret i32 %val +; GD: test_generaldynamic: +; GD: leaq general_dynamic_var@tlsdesc(%rip), [[REG:%.*]] +; GD-NEXT: callq *general_dynamic_var@tlscall([[REG]]) +; GD-NEXT: movl %fs:([[REG]]), + +; GD-RELOC: R_X86_64_GOTPC32_TLSDESC general_dynamic_var +; GD-RELOC: R_X86_64_TLSDESC_CALL general_dynamic_var +} + +define ptr @test_generaldynamic_addr() { + ret ptr @general_dynamic_var +; GD: test_generaldynamic_addr: +; GD: leaq general_dynamic_var@tlsdesc(%rip), [[REG:%.*]] +; GD-NEXT: callq *general_dynamic_var@tlscall([[REG]]) +; GD-NEXT: addq %fs:0, %rax + +; GD-RELOC: R_X86_64_GOTPC32_TLSDESC general_dynamic_var +; GD-RELOC: R_X86_64_TLSDESC_CALL general_dynamic_var +} + +@local_dynamic_var = external thread_local(localdynamic) global i32 + +define i32 @test_localdynamic() { + %val = load i32, ptr @local_dynamic_var + ret i32 %val +; GD: test_localdynamic: +; GD: leaq _TLS_MODULE_BASE_@tlsdesc(%rip), [[REG:%.*]] +; GD-NEXT: callq *_TLS_MODULE_BASE_@tlscall([[REG]]) +; GD-NEXT: movl %fs:local_dynamic_var@DTPOFF(%rax), %eax + +; GD-RELOC: R_X86_64_GOTPC32_TLSDESC _TLS_MODULE_BASE_ +; GD-RELOC: R_X86_64_TLSDESC_CALL _TLS_MODULE_BASE_ +; GD-RELOC: R_X86_64_DTPOFF32 local_dynamic_var +} + +define ptr @test_localdynamic_addr() { + ret ptr @local_dynamic_var +; GD: test_localdynamic_addr: +; GD: leaq _TLS_MODULE_BASE_@tlsdesc(%rip), [[REG:%.*]] +; GD-NEXT: callq *_TLS_MODULE_BASE_@tlscall([[REG]]) +; GD-NEXT: movq %fs:0, %rcx +; GD-NEXT: leaq local_dynamic_var@DTPOFF(%rcx,[[REG]]) + +; GD-RELOC: R_X86_64_GOTPC32_TLSDESC _TLS_MODULE_BASE_ +; GD-RELOC: R_X86_64_TLSDESC_CALL _TLS_MODULE_BASE_ +; GD-RELOC: R_X86_64_DTPOFF32 local_dynamic_var +} + +@local_dynamic_var2 = external thread_local(localdynamic) global i32 + +define i32 @test_localdynamic_deduplicate() { + %val = load i32, ptr @local_dynamic_var + %val2 = load i32, ptr @local_dynamic_var2 + %sum = add i32 %val, %val2 + ret i32 %sum +; GD: test_localdynamic_deduplicate: +; GD: leaq _TLS_MODULE_BASE_@tlsdesc(%rip), [[REG:%.*]] +; GD-NEXT: callq *_TLS_MODULE_BASE_@tlscall([[REG]]) +; GD-NEXT: movl %fs:local_dynamic_var@DTPOFF(%rax) +; GD-NEXT: addl %fs:local_dynamic_var2@DTPOFF(%rax) + +; GD-RELOC: R_X86_64_GOTPC32_TLSDESC _TLS_MODULE_BASE_ +; GD-RELOC: R_X86_64_TLSDESC_CALL _TLS_MODULE_BASE_ +; GD-RELOC: R_X86_64_DTPOFF32 local_dynamic_var2 +} + diff --git a/llvm/test/CodeGen/X86/tlsdesc.ll b/llvm/test/CodeGen/X86/tlsdesc.ll new file mode 100644 index 000000000000..fc89f414bdc3 --- /dev/null +++ b/llvm/test/CodeGen/X86/tlsdesc.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-tlsdesc %s -o - | FileCheck %s --check-prefixes=INST +; RUN: llc -mtriple=x86_64-linux-gnu -relocation-model=pic -filetype=obj -enable-tlsdesc < %s | llvm-objdump -r - | FileCheck --check-prefixes=RELOC %s + +@var = thread_local global i32 zeroinitializer + +define i32 @test_thread_local() nounwind { + + %val = load i32, ptr @var + ret i32 %val + +; INST: test_thread_local: +; INST: leaq var@tlsdesc(%rip), [[REG:%.*]] +; INST-NEXT: callq *var@tlscall([[REG]]) +; INST-NEXT: movl %fs:([[REG]]), + +; RELOC: R_X86_64_GOTPC32_TLSDESC var +; RELOC: R_X86_64_TLSDESC_CALL var +} -- cgit v1.2.3