diff options
Diffstat (limited to 'llvm/lib')
27 files changed, 750 insertions, 308 deletions
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index 0694c2995dfc..1ab856ac8830 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -350,10 +350,7 @@ void Lint::visitCallBase(CallBase &I) { } case Intrinsic::vastart: - Check(I.getParent()->getParent()->isVarArg(), - "Undefined behavior: va_start called in a non-varargs function", - &I); - + // vastart in non-varargs function is rejected by the verifier visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), std::nullopt, nullptr, MemRef::Read | MemRef::Write); break; diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index c3bc3203b636..ae43e9ccf611 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1665,3 +1665,47 @@ void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) { } } } + +bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) { + switch (Opc) { + case TargetOpcode::G_FABS: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FCOPYSIGN: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FEXP: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FLOG10: + case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FLOG: + case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPOW: + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FREM: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + case TargetOpcode::G_INTRINSIC_TRUNC: + return true; + default: + return false; + } +} diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0fa0bf2609bb..c36b1cc9039c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24467,6 +24467,22 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) return DAG.getSplatVector(NVT, DL, V.getOperand(0)); + // extract_subvector(insert_subvector(x,y,c1),c2) + // --> extract_subvector(y,c2-c1) + // iff we're just extracting from the inserted subvector. + if (V.getOpcode() == ISD::INSERT_SUBVECTOR) { + SDValue InsSub = V.getOperand(1); + EVT InsSubVT = InsSub.getValueType(); + unsigned NumInsElts = InsSubVT.getVectorMinNumElements(); + unsigned InsIdx = V.getConstantOperandVal(2); + unsigned NumSubElts = NVT.getVectorMinNumElements(); + if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) && + TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) && + InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector()) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub, + DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL)); + } + // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') if (V.getOpcode() == ISD::BITCAST && diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 57a3f6a65e00..7a9cfdf5c3fd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1159,8 +1159,14 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { } SDValue Unrolled = DAG.UnrollVectorOp(Node); - for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) - Results.push_back(Unrolled.getValue(I)); + if (Node->getNumValues() == 1) { + Results.push_back(Unrolled); + } else { + assert(Node->getNumValues() == Unrolled->getNumValues() && + "VectorLegalizer Expand returned wrong number of results!"); + for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) + Results.push_back(Unrolled.getValue(I)); + } } SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index f64ded4f2cf9..6e7b67ded23c 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1809,8 +1809,16 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, else if (attr.hasRetAttr(Attribute::ZExt)) Flags.setZExt(); - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0)); + for (unsigned i = 0; i < NumParts; ++i) { + ISD::ArgFlagsTy OutFlags = Flags; + if (NumParts > 1 && i == 0) + OutFlags.setSplit(); + else if (i == NumParts - 1 && i != 0) + OutFlags.setSplitEnd(); + + Outs.push_back( + ISD::OutputArg(OutFlags, PartVT, VT, /*isfixed=*/true, 0, 0)); + } } } diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 2c480fb76ee4..634b2dd5119e 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5341,10 +5341,11 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { Triple T(TT); - // The only data layout upgrades needed for pre-GCN are setting the address - // space of globals to 1. - if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") && - !DL.starts_with("G")) { + // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting + // the address space of globals to 1. This does not apply to SPIRV Logical. + if (((T.isAMDGPU() && !T.isAMDGCN()) || + (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) && + !DL.contains("-G") && !DL.starts_with("G")) { return DL.empty() ? std::string("G1") : (DL + "-G1").str(); } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 516d4a051556..4cd61e6e531b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5798,6 +5798,11 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { break; } + case Intrinsic::vastart: { + Check(Call.getFunction()->isVarArg(), + "va_start called in a non-varargs function"); + break; + } case Intrinsic::vector_reduce_and: case Intrinsic::vector_reduce_or: case Intrinsic::vector_reduce_xor: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index d39de770eaf1..d5c4ce1888e7 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -424,43 +424,6 @@ void AArch64RegisterBankInfo::applyMappingImpl( } } -/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, -/// having only floating-point operands. -static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { - switch (Opc) { - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FMA: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - case TargetOpcode::G_FCEIL: - case TargetOpcode::G_FFLOOR: - case TargetOpcode::G_FNEARBYINT: - case TargetOpcode::G_FNEG: - case TargetOpcode::G_FCOS: - case TargetOpcode::G_FSIN: - case TargetOpcode::G_FLOG10: - case TargetOpcode::G_FLOG: - case TargetOpcode::G_FLOG2: - case TargetOpcode::G_FSQRT: - case TargetOpcode::G_FABS: - case TargetOpcode::G_FEXP: - case TargetOpcode::G_FRINT: - case TargetOpcode::G_INTRINSIC_TRUNC: - case TargetOpcode::G_INTRINSIC_ROUND: - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: - case TargetOpcode::G_FMAXNUM: - case TargetOpcode::G_FMINNUM: - case TargetOpcode::G_FMAXIMUM: - case TargetOpcode::G_FMINIMUM: - return true; - } - return false; -} - const RegisterBankInfo::InstructionMapping & AArch64RegisterBankInfo::getSameKindOfOperandsMapping( const MachineInstr &MI) const { diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp index 05e10a95b157..1dda1b89b2d3 100644 --- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp +++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp @@ -101,6 +101,7 @@ public: } } + SII->fixImplicitOperands(*VOPDInst); for (auto CompIdx : VOPD::COMPONENTS) VOPDInst.copyImplicitOps(*MI[CompIdx]); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 245731ad5fc7..acb54fd10b90 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -612,13 +612,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Reserve null register - it shall never be allocated reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64); - // Disallow vcc_hi allocation in wave32. It may be allocated but most likely - // will result in bugs. - if (isWave32) { - Reserved.set(AMDGPU::VCC); - Reserved.set(AMDGPU::VCC_HI); - } - // Reserve SGPRs. // unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index de492f2b1f0a..98f5014a34b1 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -226,11 +226,8 @@ bool LoongArchAsmBackend::shouldInsertFixupForCodeAlign( MCFixup::create(0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_align)); const MCSymbolRefExpr *MCSym = getSecToAlignSym()[Sec]; if (MCSym == nullptr) { - // Create a symbol and make the value of symbol is zero. - MCSymbol *Sym = Ctx.createNamedTempSymbol("la-relax-align"); - Sym->setFragment(&*Sec->getBeginSymbol()->getFragment()); - Asm.registerSymbol(*Sym); - MCSym = MCSymbolRefExpr::create(Sym, Ctx); + // Use section symbol directly. + MCSym = MCSymbolRefExpr::create(Sec->getBeginSymbol(), Ctx); getSecToAlignSym()[Sec] = MCSym; } diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp index 6af1fd8c88e5..62b58cba9f24 100644 --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -104,26 +104,6 @@ MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, } } -// Instructions where all register operands are floating point. -static bool isFloatingPointOpcode(unsigned Opc) { - switch (Opc) { - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_FABS: - case TargetOpcode::G_FSQRT: - case TargetOpcode::G_FCEIL: - case TargetOpcode::G_FFLOOR: - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - return true; - default: - return false; - } -} - // Instructions where use operands are floating point registers. // Def operands are general purpose. static bool isFloatingPointOpcodeUse(unsigned Opc) { @@ -133,7 +113,7 @@ static bool isFloatingPointOpcodeUse(unsigned Opc) { case TargetOpcode::G_FCMP: return true; default: - return isFloatingPointOpcode(Opc); + return isPreISelGenericFloatingPointOpcode(Opc); } } @@ -145,7 +125,7 @@ static bool isFloatingPointOpcodeDef(unsigned Opc) { case TargetOpcode::G_UITOFP: return true; default: - return isFloatingPointOpcode(Opc); + return isPreISelGenericFloatingPointOpcode(Opc); } } diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp index 6aeef145e307..125a49de7b27 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp @@ -13,6 +13,7 @@ #include "PPCRegisterBankInfo.h" #include "PPCRegisterInfo.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -239,44 +240,6 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getInstructionMapping(MappingID, Cost, OperandsMapping, NumOperands); } -/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, -/// having only floating-point operands. -/// FIXME: this is copied from target AArch64. Needs some code refactor here to -/// put this function in GlobalISel/Utils.cpp. -static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { - switch (Opc) { - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FMA: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - case TargetOpcode::G_FCEIL: - case TargetOpcode::G_FFLOOR: - case TargetOpcode::G_FNEARBYINT: - case TargetOpcode::G_FNEG: - case TargetOpcode::G_FCOS: - case TargetOpcode::G_FSIN: - case TargetOpcode::G_FLOG10: - case TargetOpcode::G_FLOG: - case TargetOpcode::G_FLOG2: - case TargetOpcode::G_FSQRT: - case TargetOpcode::G_FABS: - case TargetOpcode::G_FEXP: - case TargetOpcode::G_FRINT: - case TargetOpcode::G_INTRINSIC_TRUNC: - case TargetOpcode::G_INTRINSIC_ROUND: - case TargetOpcode::G_FMAXNUM: - case TargetOpcode::G_FMINNUM: - case TargetOpcode::G_FMAXIMUM: - case TargetOpcode::G_FMINIMUM: - return true; - } - return false; -} - /// \returns true if a given intrinsic \p ID only uses and defines FPRs. static bool isFPIntrinsic(unsigned ID) { // TODO: Add more intrinsics. diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index 45e19cdea300..c18892ac62f2 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -34,14 +34,15 @@ private: // Whether this is assigning args for a return. bool IsRet; - // true if assignArg has been called for a mask argument, false otherwise. - bool AssignedFirstMaskArg = false; + RVVArgDispatcher &RVVDispatcher; public: RISCVOutgoingValueAssigner( - RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet) + RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet, + RVVArgDispatcher &RVVDispatcher) : CallLowering::OutgoingValueAssigner(nullptr), - RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {} + RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet), + RVVDispatcher(RVVDispatcher) {} bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, @@ -51,16 +52,9 @@ public: const DataLayout &DL = MF.getDataLayout(); const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); - std::optional<unsigned> FirstMaskArgument; - if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg && - ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) { - FirstMaskArgument = ValNo; - AssignedFirstMaskArg = true; - } - if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT, LocInfo, Flags, State, Info.IsFixed, IsRet, Info.Ty, - *Subtarget.getTargetLowering(), FirstMaskArgument)) + *Subtarget.getTargetLowering(), RVVDispatcher)) return true; StackSize = State.getStackSize(); @@ -181,14 +175,15 @@ private: // Whether this is assigning args from a return. bool IsRet; - // true if assignArg has been called for a mask argument, false otherwise. - bool AssignedFirstMaskArg = false; + RVVArgDispatcher &RVVDispatcher; public: RISCVIncomingValueAssigner( - RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet) + RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet, + RVVArgDispatcher &RVVDispatcher) : CallLowering::IncomingValueAssigner(nullptr), - RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {} + RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet), + RVVDispatcher(RVVDispatcher) {} bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, @@ -201,16 +196,9 @@ public: if (LocVT.isScalableVector()) MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); - std::optional<unsigned> FirstMaskArgument; - if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg && - ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) { - FirstMaskArgument = ValNo; - AssignedFirstMaskArg = true; - } - if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT, LocInfo, Flags, State, /*IsFixed=*/true, IsRet, Info.Ty, - *Subtarget.getTargetLowering(), FirstMaskArgument)) + *Subtarget.getTargetLowering(), RVVDispatcher)) return true; StackSize = State.getStackSize(); @@ -420,9 +408,11 @@ bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 4> SplitRetInfos; splitToValueTypes(OrigRetInfo, SplitRetInfos, DL, CC); + RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(), + ArrayRef(F.getReturnType())}; RISCVOutgoingValueAssigner Assigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, - /*IsRet=*/true); + /*IsRet=*/true, Dispatcher); RISCVOutgoingValueHandler Handler(MIRBuilder, MF.getRegInfo(), Ret); return determineAndHandleAssignments(Handler, Assigner, SplitRetInfos, MIRBuilder, CC, F.isVarArg()); @@ -531,6 +521,7 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, CallingConv::ID CC = F.getCallingConv(); SmallVector<ArgInfo, 32> SplitArgInfos; + SmallVector<Type *, 4> TypeList; unsigned Index = 0; for (auto &Arg : F.args()) { // Construct the ArgInfo object from destination register and argument type. @@ -542,12 +533,16 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, // correspondingly and appended to SplitArgInfos. splitToValueTypes(AInfo, SplitArgInfos, DL, CC); + TypeList.push_back(Arg.getType()); + ++Index; } + RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(), + ArrayRef(TypeList)}; RISCVIncomingValueAssigner Assigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, - /*IsRet=*/false); + /*IsRet=*/false, Dispatcher); RISCVFormalArgHandler Handler(MIRBuilder, MF.getRegInfo()); SmallVector<CCValAssign, 16> ArgLocs; @@ -585,11 +580,13 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 32> SplitArgInfos; SmallVector<ISD::OutputArg, 8> Outs; + SmallVector<Type *, 4> TypeList; for (auto &AInfo : Info.OrigArgs) { // Handle any required unmerging of split value types from a given VReg into // physical registers. ArgInfo objects are constructed correspondingly and // appended to SplitArgInfos. splitToValueTypes(AInfo, SplitArgInfos, DL, CC); + TypeList.push_back(AInfo.Ty); } // TODO: Support tail calls. @@ -607,9 +604,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); Call.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv)); + RVVArgDispatcher ArgDispatcher{&MF, getTLI<RISCVTargetLowering>(), + ArrayRef(TypeList)}; RISCVOutgoingValueAssigner ArgAssigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, - /*IsRet=*/false); + /*IsRet=*/false, ArgDispatcher); RISCVOutgoingValueHandler ArgHandler(MIRBuilder, MF.getRegInfo(), Call); if (!determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgInfos, MIRBuilder, CC, Info.IsVarArg)) @@ -637,9 +636,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 4> SplitRetInfos; splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC); + RVVArgDispatcher RetDispatcher{&MF, getTLI<RISCVTargetLowering>(), + ArrayRef(F.getReturnType())}; RISCVIncomingValueAssigner RetAssigner( CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV, - /*IsRet=*/true); + /*IsRet=*/true, RetDispatcher); RISCVCallReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Call); if (!determineAndHandleAssignments(RetHandler, RetAssigner, SplitRetInfos, MIRBuilder, CC, Info.IsVarArg)) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index ca77a9729e03..c1fde738c000 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -142,46 +142,6 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) { return &RISCV::ValueMappings[Idx]; } -/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, -/// having only floating-point operands. -/// FIXME: this is copied from target AArch64. Needs some code refactor here to -/// put this function in GlobalISel/Utils.cpp. -static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { - switch (Opc) { - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FMA: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - case TargetOpcode::G_FCEIL: - case TargetOpcode::G_FFLOOR: - case TargetOpcode::G_FNEARBYINT: - case TargetOpcode::G_FNEG: - case TargetOpcode::G_FCOPYSIGN: - case TargetOpcode::G_FCOS: - case TargetOpcode::G_FSIN: - case TargetOpcode::G_FLOG10: - case TargetOpcode::G_FLOG: - case TargetOpcode::G_FLOG2: - case TargetOpcode::G_FSQRT: - case TargetOpcode::G_FABS: - case TargetOpcode::G_FEXP: - case TargetOpcode::G_FRINT: - case TargetOpcode::G_INTRINSIC_TRUNC: - case TargetOpcode::G_INTRINSIC_ROUND: - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: - case TargetOpcode::G_FMAXNUM: - case TargetOpcode::G_FMINNUM: - case TargetOpcode::G_FMAXIMUM: - case TargetOpcode::G_FMINIMUM: - return true; - } - return false; -} - // TODO: Make this more like AArch64? bool RISCVRegisterBankInfo::hasFPConstraints( const MachineInstr &MI, const MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 794455aa7304..59962216e0c0 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1226,9 +1226,9 @@ def TuneNoSinkSplatOperands "false", "Disable sink splat operands to enable .vx, .vf," ".wx, and .wf instructions">; -def TuneNoStripWSuffix - : SubtargetFeature<"no-strip-w-suffix", "EnableStripWSuffix", "false", - "Disable strip W suffix">; +def TunePreferWInst + : SubtargetFeature<"prefer-w-inst", "PreferWInst", "true", + "Prefer instructions with W suffix">; def TuneConditionalCompressedMoveFusion : SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion", diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1d1ea6bae6c1..765838aafb58 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -18223,33 +18224,12 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, return false; } -static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, - std::optional<unsigned> FirstMaskArgument, - CCState &State, const RISCVTargetLowering &TLI) { - const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); - if (RC == &RISCV::VRRegClass) { - // Assign the first mask argument to V0. - // This is an interim calling convention and it may be changed in the - // future. - if (FirstMaskArgument && ValNo == *FirstMaskArgument) - return State.AllocateReg(RISCV::V0); - return State.AllocateReg(ArgVRs); - } - if (RC == &RISCV::VRM2RegClass) - return State.AllocateReg(ArgVRM2s); - if (RC == &RISCV::VRM4RegClass) - return State.AllocateReg(ArgVRM4s); - if (RC == &RISCV::VRM8RegClass) - return State.AllocateReg(ArgVRM8s); - llvm_unreachable("Unhandled register class for ValueType"); -} - // Implements the RISC-V calling convention. Returns true upon failure. bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - std::optional<unsigned> FirstMaskArgument) { + RVVArgDispatcher &RVVDispatcher) { unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); assert(XLen == 32 || XLen == 64); MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; @@ -18418,7 +18398,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, else if (ValVT == MVT::f64 && !UseGPRForF64) Reg = State.AllocateReg(ArgFPR64s); else if (ValVT.isVector()) { - Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI); + Reg = RVVDispatcher.getNextPhysReg(); if (!Reg) { // For return values, the vector must be passed fully via registers or // via the stack. @@ -18504,9 +18484,15 @@ void RISCVTargetLowering::analyzeInputArgs( unsigned NumArgs = Ins.size(); FunctionType *FType = MF.getFunction().getFunctionType(); - std::optional<unsigned> FirstMaskArgument; - if (Subtarget.hasVInstructions()) - FirstMaskArgument = preAssignMask(Ins); + RVVArgDispatcher Dispatcher; + if (IsRet) { + Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)}; + } else { + SmallVector<Type *, 4> TypeList; + for (const Argument &Arg : MF.getFunction().args()) + TypeList.push_back(Arg.getType()); + Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)}; + } for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Ins[i].VT; @@ -18521,7 +18507,7 @@ void RISCVTargetLowering::analyzeInputArgs( RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, - FirstMaskArgument)) { + Dispatcher)) { LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT << '\n'); llvm_unreachable(nullptr); @@ -18535,9 +18521,13 @@ void RISCVTargetLowering::analyzeOutputArgs( CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { unsigned NumArgs = Outs.size(); - std::optional<unsigned> FirstMaskArgument; - if (Subtarget.hasVInstructions()) - FirstMaskArgument = preAssignMask(Outs); + SmallVector<Type *, 4> TypeList; + if (IsRet) + TypeList.push_back(MF.getFunction().getReturnType()); + else if (CLI) + for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs()) + TypeList.push_back(Arg.Ty); + RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)}; for (unsigned i = 0; i != NumArgs; i++) { MVT ArgVT = Outs[i].VT; @@ -18547,7 +18537,7 @@ void RISCVTargetLowering::analyzeOutputArgs( RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, - FirstMaskArgument)) { + Dispatcher)) { LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT << "\n"); llvm_unreachable(nullptr); @@ -18728,7 +18718,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - std::optional<unsigned> FirstMaskArgument) { + RVVArgDispatcher &RVVDispatcher) { if (LocVT == MVT::i32 || LocVT == MVT::i64) { if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); @@ -18806,13 +18796,14 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, } if (LocVT.isVector()) { - if (unsigned Reg = - allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) { + MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg(); + if (AllocatedVReg) { // Fixed-length vectors are located in the corresponding scalable-vector // container types. if (ValVT.isFixedLengthVector()) LocVT = TLI.getContainerForFixedLengthVector(LocVT); - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc( + CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo)); } else { // Try and pass the address via a "fast" GPR. if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { @@ -19440,17 +19431,15 @@ bool RISCVTargetLowering::CanLowerReturn( SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); - std::optional<unsigned> FirstMaskArgument; - if (Subtarget.hasVInstructions()) - FirstMaskArgument = preAssignMask(Outs); + RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)}; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, - ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, - *this, FirstMaskArgument)) + ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, + nullptr, *this, Dispatcher)) return false; } return true; @@ -21247,6 +21236,181 @@ unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const { return Subtarget.getMinimumJumpTableEntries(); } +// Handle single arg such as return value. +template <typename Arg> +void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) { + // This lambda determines whether an array of types are constructed by + // homogeneous vector types. + auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) { + // First, extract the first element in the argument type. + auto It = ArgList.begin(); + MVT FirstArgRegType = It->VT; + + // Return if there is no return or the type needs split. + if (It == ArgList.end() || It->Flags.isSplit()) + return false; + + ++It; + + // Return if this argument type contains only 1 element, or it's not a + // vector type. + if (It == ArgList.end() || !FirstArgRegType.isScalableVector()) + return false; + + // Second, check if the following elements in this argument type are all the + // same. + for (; It != ArgList.end(); ++It) + if (It->Flags.isSplit() || It->VT != FirstArgRegType) + return false; + + return true; + }; + + if (isHomogeneousScalableVectorType(ArgList)) { + // Handle as tuple type + RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false}); + } else { + // Handle as normal vector type + bool FirstVMaskAssigned = false; + for (const auto &OutArg : ArgList) { + MVT RegisterVT = OutArg.VT; + + // Skip non-RVV register type + if (!RegisterVT.isVector()) + continue; + + if (RegisterVT.isFixedLengthVector()) + RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT); + + if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) { + RVVArgInfos.push_back({1, RegisterVT, true}); + FirstVMaskAssigned = true; + continue; + } + + RVVArgInfos.push_back({1, RegisterVT, false}); + } + } +} + +// Handle multiple args. +template <> +void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) { + const DataLayout &DL = MF->getDataLayout(); + const Function &F = MF->getFunction(); + LLVMContext &Context = F.getContext(); + + bool FirstVMaskAssigned = false; + for (Type *Ty : TypeList) { + StructType *STy = dyn_cast<StructType>(Ty); + if (STy && STy->containsHomogeneousScalableVectorTypes()) { + Type *ElemTy = STy->getTypeAtIndex(0U); + EVT VT = TLI->getValueType(DL, ElemTy); + MVT RegisterVT = + TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT); + unsigned NumRegs = + TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT); + + RVVArgInfos.push_back( + {NumRegs * STy->getNumElements(), RegisterVT, false}); + } else { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*TLI, DL, Ty, ValueVTs); + + for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; + ++Value) { + EVT VT = ValueVTs[Value]; + MVT RegisterVT = + TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT); + unsigned NumRegs = + TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT); + + // Skip non-RVV register type + if (!RegisterVT.isVector()) + continue; + + if (RegisterVT.isFixedLengthVector()) + RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT); + + if (!FirstVMaskAssigned && + RegisterVT.getVectorElementType() == MVT::i1) { + RVVArgInfos.push_back({1, RegisterVT, true}); + FirstVMaskAssigned = true; + --NumRegs; + } + + RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false}); + } + } + } +} + +void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul, + unsigned StartReg) { + assert((StartReg % LMul) == 0 && + "Start register number should be multiple of lmul"); + const MCPhysReg *VRArrays; + switch (LMul) { + default: + report_fatal_error("Invalid lmul"); + case 1: + VRArrays = ArgVRs; + break; + case 2: + VRArrays = ArgVRM2s; + break; + case 4: + VRArrays = ArgVRM4s; + break; + case 8: + VRArrays = ArgVRM8s; + break; + } + + for (unsigned i = 0; i < NF; ++i) + if (StartReg) + AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]); + else + AllocatedPhysRegs.push_back(MCPhysReg()); +} + +/// This function determines if each RVV argument is passed by register, if the +/// argument can be assigned to a VR, then give it a specific register. +/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg. +void RVVArgDispatcher::compute() { + uint32_t AssignedMap = 0; + auto allocate = [&](const RVVArgInfo &ArgInfo) { + // Allocate first vector mask argument to V0. + if (ArgInfo.FirstVMask) { + AllocatedPhysRegs.push_back(RISCV::V0); + return; + } + + unsigned RegsNeeded = divideCeil( + ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock); + unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded; + for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs; + StartReg += RegsNeeded) { + uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg; + if ((AssignedMap & Map) == 0) { + allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8); + AssignedMap |= Map; + return; + } + } + + allocatePhysReg(ArgInfo.NF, RegsNeeded, 0); + }; + + for (unsigned i = 0; i < RVVArgInfos.size(); ++i) + allocate(RVVArgInfos[i]); +} + +MCPhysReg RVVArgDispatcher::getNextPhysReg() { + assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range"); + return AllocatedPhysRegs[CurIdx++]; +} + namespace llvm::RISCVVIntrinsicsTable { #define GET_RISCVVIntrinsicsTable_IMPL diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index e2633733c31b..b10da3d40bef 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -24,6 +24,7 @@ namespace llvm { class InstructionCost; class RISCVSubtarget; struct RISCVRegisterInfo; +class RVVArgDispatcher; namespace RISCVISD { // clang-format off @@ -875,7 +876,7 @@ public: ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - std::optional<unsigned> FirstMaskArgument); + RVVArgDispatcher &RVVDispatcher); private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, @@ -1017,19 +1018,71 @@ private: unsigned getMinimumJumpTableEntries() const override; }; +/// As per the spec, the rules for passing vector arguments are as follows: +/// +/// 1. For the first vector mask argument, use v0 to pass it. +/// 2. For vector data arguments or rest vector mask arguments, starting from +/// the v8 register, if a vector register group between v8-v23 that has not been +/// allocated can be found and the first register number is a multiple of LMUL, +/// then allocate this vector register group to the argument and mark these +/// registers as allocated. Otherwise, pass it by reference and are replaced in +/// the argument list with the address. +/// 3. For tuple vector data arguments, starting from the v8 register, if +/// NFIELDS consecutive vector register groups between v8-v23 that have not been +/// allocated can be found and the first register number is a multiple of LMUL, +/// then allocate these vector register groups to the argument and mark these +/// registers as allocated. Otherwise, pass it by reference and are replaced in +/// the argument list with the address. +class RVVArgDispatcher { +public: + static constexpr unsigned NumArgVRs = 16; + + struct RVVArgInfo { + unsigned NF; + MVT VT; + bool FirstVMask = false; + }; + + template <typename Arg> + RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI, + ArrayRef<Arg> ArgList) + : MF(MF), TLI(TLI) { + constructArgInfos(ArgList); + compute(); + } + + RVVArgDispatcher() = default; + + MCPhysReg getNextPhysReg(); + +private: + SmallVector<RVVArgInfo, 4> RVVArgInfos; + SmallVector<MCPhysReg, 4> AllocatedPhysRegs; + + const MachineFunction *MF = nullptr; + const RISCVTargetLowering *TLI = nullptr; + + unsigned CurIdx = 0; + + template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret); + void compute(); + void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1, + unsigned StartReg = 0); +}; + namespace RISCV { bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - std::optional<unsigned> FirstMaskArgument); + RVVArgDispatcher &RVVDispatcher); bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - std::optional<unsigned> FirstMaskArgument); + RVVArgDispatcher &RVVDispatcher); bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index b0fda040519a..668062c8d33f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2719,6 +2719,50 @@ std::string RISCVInstrInfo::createMIROperandComment( } // clang-format off +#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \ + RISCV::Pseudo##OP##_##LMUL + +#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \ + RISCV::Pseudo##OP##_##LMUL##_MASK + +#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \ + CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) + +#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \ + CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4) + +#define CASE_RVV_OPCODE_UNMASK(OP) \ + CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \ + case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8) + +#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \ + CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, M4) + +#define CASE_RVV_OPCODE_MASK(OP) \ + CASE_RVV_OPCODE_MASK_WIDEN(OP): \ + case CASE_RVV_OPCODE_MASK_LMUL(OP, M8) + +#define CASE_RVV_OPCODE_WIDEN(OP) \ + CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \ + case CASE_RVV_OPCODE_MASK_WIDEN(OP) + +#define CASE_RVV_OPCODE(OP) \ + CASE_RVV_OPCODE_UNMASK(OP): \ + case CASE_RVV_OPCODE_MASK(OP) +// clang-format on + +// clang-format off #define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \ RISCV::PseudoV##OP##_##TYPE##_##LMUL @@ -2798,6 +2842,28 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, case RISCV::PseudoCCMOVGPR: // Operands 4 and 5 are commutable. return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5); + case CASE_RVV_OPCODE(VADD_VV): + case CASE_RVV_OPCODE(VAND_VV): + case CASE_RVV_OPCODE(VOR_VV): + case CASE_RVV_OPCODE(VXOR_VV): + case CASE_RVV_OPCODE_MASK(VMSEQ_VV): + case CASE_RVV_OPCODE_MASK(VMSNE_VV): + case CASE_RVV_OPCODE(VMIN_VV): + case CASE_RVV_OPCODE(VMINU_VV): + case CASE_RVV_OPCODE(VMAX_VV): + case CASE_RVV_OPCODE(VMAXU_VV): + case CASE_RVV_OPCODE(VMUL_VV): + case CASE_RVV_OPCODE(VMULH_VV): + case CASE_RVV_OPCODE(VMULHU_VV): + case CASE_RVV_OPCODE_WIDEN(VWADD_VV): + case CASE_RVV_OPCODE_WIDEN(VWADDU_VV): + case CASE_RVV_OPCODE_WIDEN(VWMUL_VV): + case CASE_RVV_OPCODE_WIDEN(VWMULU_VV): + case CASE_RVV_OPCODE_WIDEN(VWMACC_VV): + case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV): + case CASE_RVV_OPCODE_UNMASK(VADC_VVM): + // Operands 2 and 3 are commutable. + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); case CASE_VFMA_SPLATS(FMADD): case CASE_VFMA_SPLATS(FMSUB): case CASE_VFMA_SPLATS(FMACC): diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 686bfd1af0d0..0b8317925097 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2129,8 +2129,9 @@ multiclass VPseudoBinary<VReg RetClass, LMULInfo MInfo, string Constraint = "", int sew = 0, - int TargetConstraintType = 1> { - let VLMul = MInfo.value, SEW=sew in { + int TargetConstraintType = 1, + bit Commutable = 0> { + let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class, Constraint, TargetConstraintType>; @@ -2169,8 +2170,9 @@ multiclass VPseudoBinaryM<VReg RetClass, DAGOperand Op2Class, LMULInfo MInfo, string Constraint = "", - int TargetConstraintType = 1> { - let VLMul = MInfo.value in { + int TargetConstraintType = 1, + bit Commutable = 0> { + let VLMul = MInfo.value, isCommutable = Commutable in { def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class, Constraint, TargetConstraintType>; let ForceTailAgnostic = true in @@ -2228,8 +2230,8 @@ multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass, } -multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0> { - defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew>; +multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0, bit Commutable = 0> { + defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew, Commutable=Commutable>; } multiclass VPseudoBinaryV_VV_RM<LMULInfo m, string Constraint = ""> { @@ -2333,9 +2335,10 @@ multiclass VPseudoVALU_MM<bit Commutable = 0> { // * The destination EEW is greater than the source EEW, the source EMUL is // at least 1, and the overlap is in the highest-numbered part of the // destination register group is legal. Otherwise, it is illegal. -multiclass VPseudoBinaryW_VV<LMULInfo m> { +multiclass VPseudoBinaryW_VV<LMULInfo m, bit Commutable = 0> { defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m, - "@earlyclobber $rd", TargetConstraintType=3>; + "@earlyclobber $rd", TargetConstraintType=3, + Commutable=Commutable>; } multiclass VPseudoBinaryW_VV_RM<LMULInfo m, int sew = 0> { @@ -2455,7 +2458,9 @@ multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>; } -multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1> { +multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1, + bit Commutable = 0> { + let isCommutable = Commutable in def "_VVM" # "_" # m.MX: VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, m.vrclass, m.vrclass, m, 1, "", @@ -2669,8 +2674,10 @@ multiclass PseudoVEXT_VF8 { // lowest-numbered part of the source register group". // With LMUL<=1 the source and dest occupy a single register so any overlap // is in the lowest-numbered part. -multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1> { - defm _VV : VPseudoBinaryM<m.moutclass, m.vrclass, m.vrclass, m, "", TargetConstraintType>; +multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1, + bit Commutable = 0> { + defm _VV : VPseudoBinaryM<m.moutclass, m.vrclass, m.vrclass, m, "", + TargetConstraintType, Commutable=Commutable>; } multiclass VPseudoBinaryM_VX<LMULInfo m, int TargetConstraintType = 1> { @@ -2749,10 +2756,11 @@ multiclass VPseudoVSSHT_VV_VX_VI_RM<Operand ImmType = simm5, string Constraint = } } -multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { +multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = "", + bit Commutable = 0> { foreach m = MxList in { defvar mx = m.MX; - defm "" : VPseudoBinaryV_VV<m, Constraint>, + defm "" : VPseudoBinaryV_VV<m, Constraint, Commutable=Commutable>, SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx, forceMergeOpRead=true>; defm "" : VPseudoBinaryV_VX<m, Constraint>, @@ -2802,17 +2810,17 @@ multiclass VPseudoVAALU_VV_VX_RM { multiclass VPseudoVMINMAX_VV_VX { foreach m = MxList in { defvar mx = m.MX; - defm "" : VPseudoBinaryV_VV<m>, + defm "" : VPseudoBinaryV_VV<m, Commutable=1>, SchedBinary<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV", mx>; defm "" : VPseudoBinaryV_VX<m>, SchedBinary<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX", mx>; } } -multiclass VPseudoVMUL_VV_VX { +multiclass VPseudoVMUL_VV_VX<bit Commutable = 0> { foreach m = MxList in { defvar mx = m.MX; - defm "" : VPseudoBinaryV_VV<m>, + defm "" : VPseudoBinaryV_VV<m, Commutable=Commutable>, SchedBinary<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV", mx>; defm "" : VPseudoBinaryV_VX<m>, SchedBinary<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX", mx>; @@ -2962,10 +2970,10 @@ multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> { } } -multiclass VPseudoVWALU_VV_VX { +multiclass VPseudoVWALU_VV_VX<bit Commutable = 0> { foreach m = MxListW in { defvar mx = m.MX; - defm "" : VPseudoBinaryW_VV<m>, + defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>, SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx, forceMergeOpRead=true>; defm "" : VPseudoBinaryW_VX<m>, @@ -2974,10 +2982,10 @@ multiclass VPseudoVWALU_VV_VX { } } -multiclass VPseudoVWMUL_VV_VX { +multiclass VPseudoVWMUL_VV_VX<bit Commutable = 0> { foreach m = MxListW in { defvar mx = m.MX; - defm "" : VPseudoBinaryW_VV<m>, + defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>, SchedBinary<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV", mx, forceMergeOpRead=true>; defm "" : VPseudoBinaryW_VX<m>, @@ -3072,7 +3080,7 @@ multiclass VPseudoVMRG_VM_XM_IM { multiclass VPseudoVCALU_VM_XM_IM { foreach m = MxList in { defvar mx = m.MX; - defm "" : VPseudoTiedBinaryV_VM<m>, + defm "" : VPseudoTiedBinaryV_VM<m, Commutable=1>, SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMergeOpRead=true>; defm "" : VPseudoTiedBinaryV_XM<m>, @@ -3285,10 +3293,10 @@ multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f, sew, Commutable=1>; } -multiclass VPseudoTernaryW_VV<LMULInfo m> { +multiclass VPseudoTernaryW_VV<LMULInfo m, bit Commutable = 0> { defvar constraint = "@earlyclobber $rd"; defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m, - constraint, /*Commutable*/ 0, TargetConstraintType=3>; + constraint, Commutable=Commutable, TargetConstraintType=3>; } multiclass VPseudoTernaryW_VV_RM<LMULInfo m, int sew = 0> { @@ -3378,10 +3386,10 @@ multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> { } } -multiclass VPseudoVWMAC_VV_VX { +multiclass VPseudoVWMAC_VV_VX<bit Commutable = 0> { foreach m = MxListW in { defvar mx = m.MX; - defm "" : VPseudoTernaryW_VV<m>, + defm "" : VPseudoTernaryW_VV<m, Commutable=Commutable>, SchedTernary<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV", mx>; defm "" : VPseudoTernaryW_VX<m>, @@ -3434,10 +3442,10 @@ multiclass VPseudoVWMAC_VV_VF_BF_RM { } } -multiclass VPseudoVCMPM_VV_VX_VI { +multiclass VPseudoVCMPM_VV_VX_VI<bit Commutable = 0> { foreach m = MxList in { defvar mx = m.MX; - defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>, + defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2, Commutable=Commutable>, SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>; defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>, SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>; @@ -6246,7 +6254,7 @@ defm PseudoVLSEG : VPseudoUSSegLoadFF; //===----------------------------------------------------------------------===// // 11.1. Vector Single-Width Integer Add and Subtract //===----------------------------------------------------------------------===// -defm PseudoVADD : VPseudoVALU_VV_VX_VI; +defm PseudoVADD : VPseudoVALU_VV_VX_VI<Commutable=1>; defm PseudoVSUB : VPseudoVALU_VV_VX; defm PseudoVRSUB : VPseudoVALU_VX_VI; @@ -6311,9 +6319,9 @@ foreach vti = AllIntegerVectors in { //===----------------------------------------------------------------------===// // 11.2. Vector Widening Integer Add/Subtract //===----------------------------------------------------------------------===// -defm PseudoVWADDU : VPseudoVWALU_VV_VX; +defm PseudoVWADDU : VPseudoVWALU_VV_VX<Commutable=1>; defm PseudoVWSUBU : VPseudoVWALU_VV_VX; -defm PseudoVWADD : VPseudoVWALU_VV_VX; +defm PseudoVWADD : VPseudoVWALU_VV_VX<Commutable=1>; defm PseudoVWSUB : VPseudoVWALU_VV_VX; defm PseudoVWADDU : VPseudoVWALU_WV_WX; defm PseudoVWSUBU : VPseudoVWALU_WV_WX; @@ -6344,9 +6352,9 @@ defm PseudoVMSBC : VPseudoVCALUM_V_X<"@earlyclobber $rd">; //===----------------------------------------------------------------------===// // 11.5. Vector Bitwise Logical Instructions //===----------------------------------------------------------------------===// -defm PseudoVAND : VPseudoVALU_VV_VX_VI; -defm PseudoVOR : VPseudoVALU_VV_VX_VI; -defm PseudoVXOR : VPseudoVALU_VV_VX_VI; +defm PseudoVAND : VPseudoVALU_VV_VX_VI<Commutable=1>; +defm PseudoVOR : VPseudoVALU_VV_VX_VI<Commutable=1>; +defm PseudoVXOR : VPseudoVALU_VV_VX_VI<Commutable=1>; //===----------------------------------------------------------------------===// // 11.6. Vector Single-Width Bit Shift Instructions @@ -6364,8 +6372,8 @@ defm PseudoVNSRA : VPseudoVNSHT_WV_WX_WI; //===----------------------------------------------------------------------===// // 11.8. Vector Integer Comparison Instructions //===----------------------------------------------------------------------===// -defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI; -defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI; +defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI<Commutable=1>; +defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI<Commutable=1>; defm PseudoVMSLTU : VPseudoVCMPM_VV_VX; defm PseudoVMSLT : VPseudoVCMPM_VV_VX; defm PseudoVMSLEU : VPseudoVCMPM_VV_VX_VI; @@ -6384,9 +6392,9 @@ defm PseudoVMAX : VPseudoVMINMAX_VV_VX; //===----------------------------------------------------------------------===// // 11.10. Vector Single-Width Integer Multiply Instructions //===----------------------------------------------------------------------===// -defm PseudoVMUL : VPseudoVMUL_VV_VX; -defm PseudoVMULH : VPseudoVMUL_VV_VX; -defm PseudoVMULHU : VPseudoVMUL_VV_VX; +defm PseudoVMUL : VPseudoVMUL_VV_VX<Commutable=1>; +defm PseudoVMULH : VPseudoVMUL_VV_VX<Commutable=1>; +defm PseudoVMULHU : VPseudoVMUL_VV_VX<Commutable=1>; defm PseudoVMULHSU : VPseudoVMUL_VV_VX; //===----------------------------------------------------------------------===// @@ -6400,8 +6408,8 @@ defm PseudoVREM : VPseudoVDIV_VV_VX; //===----------------------------------------------------------------------===// // 11.12. Vector Widening Integer Multiply Instructions //===----------------------------------------------------------------------===// -defm PseudoVWMUL : VPseudoVWMUL_VV_VX; -defm PseudoVWMULU : VPseudoVWMUL_VV_VX; +defm PseudoVWMUL : VPseudoVWMUL_VV_VX<Commutable=1>; +defm PseudoVWMULU : VPseudoVWMUL_VV_VX<Commutable=1>; defm PseudoVWMULSU : VPseudoVWMUL_VV_VX; //===----------------------------------------------------------------------===// @@ -6415,8 +6423,8 @@ defm PseudoVNMSUB : VPseudoVMAC_VV_VX_AAXA; //===----------------------------------------------------------------------===// // 11.14. Vector Widening Integer Multiply-Add Instructions //===----------------------------------------------------------------------===// -defm PseudoVWMACCU : VPseudoVWMAC_VV_VX; -defm PseudoVWMACC : VPseudoVWMAC_VV_VX; +defm PseudoVWMACCU : VPseudoVWMAC_VV_VX<Commutable=1>; +defm PseudoVWMACC : VPseudoVWMAC_VV_VX<Commutable=1>; defm PseudoVWMACCSU : VPseudoVWMAC_VV_VX; defm PseudoVWMACCUS : VPseudoVWMAC_VX; diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index 39d420c2fbf0..ead91c5656be 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -12,15 +12,24 @@ // extended bits aren't consumed or because the input was already sign extended // by an earlier instruction. // -// Then it removes the -w suffix from opw instructions whenever all users are -// dependent only on the lower word of the result of the instruction. -// The cases handled are: -// * addw because c.add has a larger register encoding than c.addw. -// * addiw because it helps reduce test differences between RV32 and RV64 -// w/o being a pessimization. -// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb) -// * slliw because c.slliw doesn't exist and c.slli does +// Then: +// 1. Unless explicit disabled or the target prefers instructions with W suffix, +// it removes the -w suffix from opw instructions whenever all users are +// dependent only on the lower word of the result of the instruction. +// The cases handled are: +// * addw because c.add has a larger register encoding than c.addw. +// * addiw because it helps reduce test differences between RV32 and RV64 +// w/o being a pessimization. +// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb) +// * slliw because c.slliw doesn't exist and c.slli does // +// 2. Or if explicit enabled or the target prefers instructions with W suffix, +// it adds the W suffix to the instruction whenever all users are dependent +// only on the lower word of the result of the instruction. +// The cases handled are: +// * add/addi/sub/mul. +// * slli with imm < 32. +// * ld/lwu. //===---------------------------------------------------------------------===// #include "RISCV.h" @@ -60,6 +69,8 @@ public: const RISCVSubtarget &ST, MachineRegisterInfo &MRI); bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, const RISCVSubtarget &ST, MachineRegisterInfo &MRI); + bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, + const RISCVSubtarget &ST, MachineRegisterInfo &MRI); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -672,9 +683,6 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, const RISCVSubtarget &ST, MachineRegisterInfo &MRI) { - if (DisableStripWSuffix || !ST.enableStripWSuffix()) - return false; - bool MadeChange = false; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { @@ -698,6 +706,58 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF, return MadeChange; } +bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF, + const RISCVInstrInfo &TII, + const RISCVSubtarget &ST, + MachineRegisterInfo &MRI) { + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + unsigned WOpc; + // TODO: Add more? + switch (MI.getOpcode()) { + default: + continue; + case RISCV::ADD: + WOpc = RISCV::ADDW; + break; + case RISCV::ADDI: + WOpc = RISCV::ADDIW; + break; + case RISCV::SUB: + WOpc = RISCV::SUBW; + break; + case RISCV::MUL: + WOpc = RISCV::MULW; + break; + case RISCV::SLLI: + // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits + if (MI.getOperand(2).getImm() >= 32) + continue; + WOpc = RISCV::SLLIW; + break; + case RISCV::LD: + case RISCV::LWU: + WOpc = RISCV::LW; + break; + } + + if (hasAllWUsers(MI, ST, MRI)) { + LLVM_DEBUG(dbgs() << "Replacing " << MI); + MI.setDesc(TII.get(WOpc)); + MI.clearFlag(MachineInstr::MIFlag::NoSWrap); + MI.clearFlag(MachineInstr::MIFlag::NoUWrap); + MI.clearFlag(MachineInstr::MIFlag::IsExact); + LLVM_DEBUG(dbgs() << " with " << MI); + ++NumTransformedToWInstrs; + MadeChange = true; + } + } + } + + return MadeChange; +} + bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -711,7 +771,12 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI); - MadeChange |= stripWSuffixes(MF, TII, ST, MRI); + + if (!(DisableStripWSuffix || ST.preferWInst())) + MadeChange |= stripWSuffixes(MF, TII, ST, MRI); + + if (ST.preferWInst()) + MadeChange |= appendWSuffixes(MF, TII, ST, MRI); return MadeChange; } diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index fbf64f2b1dfb..ae8baa3f1191 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -55,9 +55,9 @@ static std::string computeDataLayout(const Triple &TT) { // mean anything. if (Arch == Triple::spirv32) return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024"; + "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"; return "e-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024"; + "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"; } static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp index e7c9e60ba95f..9e85424e76e6 100644 --- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp @@ -13,10 +13,13 @@ #include "X86RegisterBankInfo.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterBank.h" #include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/IntrinsicsX86.h" #define GET_TARGET_REGBANK_IMPL #include "X86GenRegisterBank.inc" @@ -68,6 +71,98 @@ X86RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, llvm_unreachable("Unsupported register kind yet."); } +// \returns true if a given intrinsic only uses and defines FPRs. +static bool isFPIntrinsic(const MachineRegisterInfo &MRI, + const MachineInstr &MI) { + // TODO: Add more intrinsics. + switch (cast<GIntrinsic>(MI).getIntrinsicID()) { + default: + return false; + // SSE1 + case Intrinsic::x86_sse_rcp_ss: + case Intrinsic::x86_sse_rcp_ps: + case Intrinsic::x86_sse_rsqrt_ss: + case Intrinsic::x86_sse_rsqrt_ps: + case Intrinsic::x86_sse_min_ss: + case Intrinsic::x86_sse_min_ps: + case Intrinsic::x86_sse_max_ss: + case Intrinsic::x86_sse_max_ps: + return true; + } + return false; +} + +bool X86RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { + unsigned Op = MI.getOpcode(); + if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI)) + return true; + + // Do we have an explicit floating point instruction? + if (isPreISelGenericFloatingPointOpcode(Op)) + return true; + + // No. Check if we have a copy-like instruction. If we do, then we could + // still be fed by floating point instructions. + if (Op != TargetOpcode::COPY && !MI.isPHI() && + !isPreISelGenericOptimizationHint(Op)) + return false; + + // Check if we already know the register bank. + auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); + if (RB == &getRegBank(X86::PSRRegBankID)) + return true; + if (RB == &getRegBank(X86::GPRRegBankID)) + return false; + + // We don't know anything. + // + // If we have a phi, we may be able to infer that it will be assigned a fp + // type based off of its inputs. + if (!MI.isPHI() || Depth > MaxFPRSearchDepth) + return false; + + return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { + return Op.isReg() && + onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); + }); +} + +bool X86RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FCMP: + case TargetOpcode::G_LROUND: + case TargetOpcode::G_LLROUND: + case TargetOpcode::G_INTRINSIC_TRUNC: + case TargetOpcode::G_INTRINSIC_ROUND: + return true; + default: + break; + } + return hasFPConstraints(MI, MRI, TRI, Depth); +} + +bool X86RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: + return true; + default: + break; + } + return hasFPConstraints(MI, MRI, TRI, Depth); +} + X86GenRegisterBankInfo::PartialMappingIdx X86GenRegisterBankInfo::getPartialMappingIdx(const MachineInstr &MI, const LLT &Ty, bool isFP) { @@ -180,11 +275,13 @@ X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, const RegisterBankInfo::InstructionMapping & X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned Opc = MI.getOpcode(); - // Try the default logic for non-generic instructions that are either copies - // or already have some operands assigned to banks. + // Try the default logic for non-generic instructions that are either + // copies or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) { const InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) @@ -221,13 +318,14 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_FPEXT: case TargetOpcode::G_FPTRUNC: case TargetOpcode::G_FCONSTANT: - // Instruction having only floating-point operands (all scalars in VECRReg) + // Instruction having only floating-point operands (all scalars in + // VECRReg) getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx); break; case TargetOpcode::G_SITOFP: case TargetOpcode::G_FPTOSI: { - // Some of the floating-point instructions have mixed GPR and FP operands: - // fine-tune the computed mapping. + // Some of the floating-point instructions have mixed GPR and FP + // operands: fine-tune the computed mapping. auto &Op0 = MI.getOperand(0); auto &Op1 = MI.getOperand(1); const LLT Ty0 = MRI.getType(Op0.getReg()); @@ -271,9 +369,36 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ isFPTrunc || isFPAnyExt, OpRegBankIdx); - } break; + break; + } + case TargetOpcode::G_LOAD: { + // Check if that load feeds fp instructions. + // In that case, we want the default mapping to be on FPR + // instead of blind map every scalar to GPR. + bool IsFP = any_of(MRI.use_nodbg_instructions(cast<GLoad>(MI).getDstReg()), + [&](const MachineInstr &UseMI) { + // If we have at least one direct use in a FP + // instruction, assume this was a floating point load + // in the IR. If it was not, we would have had a + // bitcast before reaching that instruction. + return onlyUsesFP(UseMI, MRI, TRI); + }); + getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx); + break; + } + case TargetOpcode::G_STORE: { + // Check if that store is fed by fp instructions. + Register VReg = cast<GStore>(MI).getValueReg(); + if (!VReg) + break; + MachineInstr *DefMI = MRI.getVRegDef(VReg); + bool IsFP = onlyDefinesFP(*DefMI, MRI, TRI); + getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx); + break; + } default: - // Track the bank of each register, use NotFP mapping (all scalars in GPRs) + // Track the bank of each register, use NotFP mapping (all scalars in + // GPRs) getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ false, OpRegBankIdx); break; } diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h index 989c5956ad59..8f38e717e36b 100644 --- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h +++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h @@ -62,6 +62,22 @@ private: const SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx, SmallVectorImpl<const ValueMapping *> &OpdsMapping); + // Maximum recursion depth for hasFPConstraints. + const unsigned MaxFPRSearchDepth = 2; + + /// \returns true if \p MI only uses and defines FPRs. + bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth = 0) const; + + /// \returns true if \p MI only uses FPRs. + bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + + /// \returns true if \p MI only defines FPRs. + bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + public: X86RegisterBankInfo(const TargetRegisterInfo &TRI); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index bae8579fc365..ba5db854647a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1774,6 +1774,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Instruction *I = moveAddAfterMinMax(II, Builder)) return I; + // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C + const APInt *RHSC; + if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) && + match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC))))) + return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y), + ConstantInt::get(II->getType(), *RHSC)); + // smax(X, -X) --> abs(X) // smin(X, -X) --> -abs(X) // umax(X, -X) --> -abs(X) @@ -1815,7 +1822,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return NewMinMax; // Try to fold minmax with constant RHS based on range information - const APInt *RHSC; if (match(I1, m_APIntAllowUndef(RHSC))) { ICmpInst::Predicate Pred = ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID)); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 380bac9c6180..baec51a07fcb 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1019,12 +1019,14 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, const SmallPtrSetImpl<BasicBlock *> &SuccPreds, BasicBlock *&CommonPred) { - // There must be phis in BB, otherwise BB will be merged into Succ directly - if (BB->phis().empty() || Succ->phis().empty()) + // When Succ has no phis, BB may be merged into Succ directly. We don't need + // to redirect the predecessors of BB in this case. + if (Succ->phis().empty()) return false; - // BB must have predecessors not shared that can be redirected to Succ - if (!BB->hasNPredecessorsOrMore(2)) + // BB must have multiple different predecessors, so that at least one of + // predecessors can be redirected to Succ, except the common predecessor. + if (BB->getUniquePredecessor() || pred_empty(BB)) return false; // Get single common predecessors of both BB and Succ @@ -3627,10 +3629,12 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C, return createIntegerExpression(C); auto *FP = dyn_cast<ConstantFP>(&C); - if (FP && (Ty.isFloatTy() || Ty.isDoubleTy())) { + if (FP && Ty.isFloatingPointTy() && Ty.getScalarSizeInBits() <= 64) { const APFloat &APF = FP->getValueAPF(); - return DIB.createConstantValueExpression( - APF.bitcastToAPInt().getZExtValue()); + APInt const &API = APF.bitcastToAPInt(); + if (auto Temp = API.getZExtValue()) + return DIB.createConstantValueExpression(static_cast<uint64_t>(Temp)); + return DIB.createConstantValueExpression(*API.getRawData()); } if (!Ty.isPointerTy()) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c63b500f546f..d0bcdceae392 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15155,8 +15155,8 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores, Type *ValueTy = StoreTy; if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand())) ValueTy = Trunc->getSrcTy(); - unsigned MinVF = TTI->getStoreMinimumVF( - R.getMinVF(DL->getTypeSizeInBits(StoreTy)), StoreTy, ValueTy); + unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF( + R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy, ValueTy)); if (MaxVF < MinVF) { LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF |