summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/Lint.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp44
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp16
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp10
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp12
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp9
-rw-r--r--llvm/lib/IR/Verifier.cpp5
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp37
-rw-r--r--llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp7
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp7
-rw-r--r--llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp24
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp39
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp57
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp40
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp244
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h59
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp66
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td92
-rw-r--r--llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp89
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp139
-rw-r--r--llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h16
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp18
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp4
27 files changed, 750 insertions, 308 deletions
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp
index 0694c2995dfc..1ab856ac8830 100644
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -350,10 +350,7 @@ void Lint::visitCallBase(CallBase &I) {
}
case Intrinsic::vastart:
- Check(I.getParent()->getParent()->isVarArg(),
- "Undefined behavior: va_start called in a non-varargs function",
- &I);
-
+ // vastart in non-varargs function is rejected by the verifier
visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
std::nullopt, nullptr, MemRef::Read | MemRef::Write);
break;
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index c3bc3203b636..ae43e9ccf611 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1665,3 +1665,47 @@ void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) {
}
}
}
+
+bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FCOPYSIGN:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FMAD:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPOW:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0fa0bf2609bb..c36b1cc9039c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24467,6 +24467,22 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
return DAG.getSplatVector(NVT, DL, V.getOperand(0));
+ // extract_subvector(insert_subvector(x,y,c1),c2)
+ // --> extract_subvector(y,c2-c1)
+ // iff we're just extracting from the inserted subvector.
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
+ SDValue InsSub = V.getOperand(1);
+ EVT InsSubVT = InsSub.getValueType();
+ unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
+ unsigned InsIdx = V.getConstantOperandVal(2);
+ unsigned NumSubElts = NVT.getVectorMinNumElements();
+ if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
+ TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
+ InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector())
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
+ DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
+ }
+
// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
if (V.getOpcode() == ISD::BITCAST &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 57a3f6a65e00..7a9cfdf5c3fd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1159,8 +1159,14 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
}
SDValue Unrolled = DAG.UnrollVectorOp(Node);
- for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
- Results.push_back(Unrolled.getValue(I));
+ if (Node->getNumValues() == 1) {
+ Results.push_back(Unrolled);
+ } else {
+ assert(Node->getNumValues() == Unrolled->getNumValues() &&
+ "VectorLegalizer Expand returned wrong number of results!");
+ for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
+ Results.push_back(Unrolled.getValue(I));
+ }
}
SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index f64ded4f2cf9..6e7b67ded23c 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1809,8 +1809,16 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
else if (attr.hasRetAttr(Attribute::ZExt))
Flags.setZExt();
- for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
+ for (unsigned i = 0; i < NumParts; ++i) {
+ ISD::ArgFlagsTy OutFlags = Flags;
+ if (NumParts > 1 && i == 0)
+ OutFlags.setSplit();
+ else if (i == NumParts - 1 && i != 0)
+ OutFlags.setSplitEnd();
+
+ Outs.push_back(
+ ISD::OutputArg(OutFlags, PartVT, VT, /*isfixed=*/true, 0, 0));
+ }
}
}
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 2c480fb76ee4..634b2dd5119e 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5341,10 +5341,11 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
Triple T(TT);
- // The only data layout upgrades needed for pre-GCN are setting the address
- // space of globals to 1.
- if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") &&
- !DL.starts_with("G")) {
+ // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
+ // the address space of globals to 1. This does not apply to SPIRV Logical.
+ if (((T.isAMDGPU() && !T.isAMDGCN()) ||
+ (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
+ !DL.contains("-G") && !DL.starts_with("G")) {
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 516d4a051556..4cd61e6e531b 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5798,6 +5798,11 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
}
+ case Intrinsic::vastart: {
+ Check(Call.getFunction()->isVarArg(),
+ "va_start called in a non-varargs function");
+ break;
+ }
case Intrinsic::vector_reduce_and:
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_xor:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index d39de770eaf1..d5c4ce1888e7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -424,43 +424,6 @@ void AArch64RegisterBankInfo::applyMappingImpl(
}
}
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FMINIMUM:
- return true;
- }
- return false;
-}
-
const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
const MachineInstr &MI) const {
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 05e10a95b157..1dda1b89b2d3 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -101,6 +101,7 @@ public:
}
}
+ SII->fixImplicitOperands(*VOPDInst);
for (auto CompIdx : VOPD::COMPONENTS)
VOPDInst.copyImplicitOps(*MI[CompIdx]);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 245731ad5fc7..acb54fd10b90 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -612,13 +612,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Reserve null register - it shall never be allocated
reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
- // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
- // will result in bugs.
- if (isWave32) {
- Reserved.set(AMDGPU::VCC);
- Reserved.set(AMDGPU::VCC_HI);
- }
-
// Reserve SGPRs.
//
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index de492f2b1f0a..98f5014a34b1 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -226,11 +226,8 @@ bool LoongArchAsmBackend::shouldInsertFixupForCodeAlign(
MCFixup::create(0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_align));
const MCSymbolRefExpr *MCSym = getSecToAlignSym()[Sec];
if (MCSym == nullptr) {
- // Create a symbol and make the value of symbol is zero.
- MCSymbol *Sym = Ctx.createNamedTempSymbol("la-relax-align");
- Sym->setFragment(&*Sec->getBeginSymbol()->getFragment());
- Asm.registerSymbol(*Sym);
- MCSym = MCSymbolRefExpr::create(Sym, Ctx);
+ // Use section symbol directly.
+ MCSym = MCSymbolRefExpr::create(Sec->getBeginSymbol(), Ctx);
getSecToAlignSym()[Sec] = MCSym;
}
diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 6af1fd8c88e5..62b58cba9f24 100644
--- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -104,26 +104,6 @@ MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
}
}
-// Instructions where all register operands are floating point.
-static bool isFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- return true;
- default:
- return false;
- }
-}
-
// Instructions where use operands are floating point registers.
// Def operands are general purpose.
static bool isFloatingPointOpcodeUse(unsigned Opc) {
@@ -133,7 +113,7 @@ static bool isFloatingPointOpcodeUse(unsigned Opc) {
case TargetOpcode::G_FCMP:
return true;
default:
- return isFloatingPointOpcode(Opc);
+ return isPreISelGenericFloatingPointOpcode(Opc);
}
}
@@ -145,7 +125,7 @@ static bool isFloatingPointOpcodeDef(unsigned Opc) {
case TargetOpcode::G_UITOFP:
return true;
default:
- return isFloatingPointOpcode(Opc);
+ return isPreISelGenericFloatingPointOpcode(Opc);
}
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
index 6aeef145e307..125a49de7b27 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -13,6 +13,7 @@
#include "PPCRegisterBankInfo.h"
#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
@@ -239,44 +240,6 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getInstructionMapping(MappingID, Cost, OperandsMapping, NumOperands);
}
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-/// FIXME: this is copied from target AArch64. Needs some code refactor here to
-/// put this function in GlobalISel/Utils.cpp.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FMINIMUM:
- return true;
- }
- return false;
-}
-
/// \returns true if a given intrinsic \p ID only uses and defines FPRs.
static bool isFPIntrinsic(unsigned ID) {
// TODO: Add more intrinsics.
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 45e19cdea300..c18892ac62f2 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -34,14 +34,15 @@ private:
// Whether this is assigning args for a return.
bool IsRet;
- // true if assignArg has been called for a mask argument, false otherwise.
- bool AssignedFirstMaskArg = false;
+ RVVArgDispatcher &RVVDispatcher;
public:
RISCVOutgoingValueAssigner(
- RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet)
+ RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet,
+ RVVArgDispatcher &RVVDispatcher)
: CallLowering::OutgoingValueAssigner(nullptr),
- RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {}
+ RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet),
+ RVVDispatcher(RVVDispatcher) {}
bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
@@ -51,16 +52,9 @@ public:
const DataLayout &DL = MF.getDataLayout();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg &&
- ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) {
- FirstMaskArgument = ValNo;
- AssignedFirstMaskArg = true;
- }
-
if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
LocInfo, Flags, State, Info.IsFixed, IsRet, Info.Ty,
- *Subtarget.getTargetLowering(), FirstMaskArgument))
+ *Subtarget.getTargetLowering(), RVVDispatcher))
return true;
StackSize = State.getStackSize();
@@ -181,14 +175,15 @@ private:
// Whether this is assigning args from a return.
bool IsRet;
- // true if assignArg has been called for a mask argument, false otherwise.
- bool AssignedFirstMaskArg = false;
+ RVVArgDispatcher &RVVDispatcher;
public:
RISCVIncomingValueAssigner(
- RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet)
+ RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet,
+ RVVArgDispatcher &RVVDispatcher)
: CallLowering::IncomingValueAssigner(nullptr),
- RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {}
+ RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet),
+ RVVDispatcher(RVVDispatcher) {}
bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
@@ -201,16 +196,9 @@ public:
if (LocVT.isScalableVector())
MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg &&
- ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) {
- FirstMaskArgument = ValNo;
- AssignedFirstMaskArg = true;
- }
-
if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
LocInfo, Flags, State, /*IsFixed=*/true, IsRet, Info.Ty,
- *Subtarget.getTargetLowering(), FirstMaskArgument))
+ *Subtarget.getTargetLowering(), RVVDispatcher))
return true;
StackSize = State.getStackSize();
@@ -420,9 +408,11 @@ bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(OrigRetInfo, SplitRetInfos, DL, CC);
+ RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(F.getReturnType())};
RISCVOutgoingValueAssigner Assigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/true);
+ /*IsRet=*/true, Dispatcher);
RISCVOutgoingValueHandler Handler(MIRBuilder, MF.getRegInfo(), Ret);
return determineAndHandleAssignments(Handler, Assigner, SplitRetInfos,
MIRBuilder, CC, F.isVarArg());
@@ -531,6 +521,7 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CallingConv::ID CC = F.getCallingConv();
SmallVector<ArgInfo, 32> SplitArgInfos;
+ SmallVector<Type *, 4> TypeList;
unsigned Index = 0;
for (auto &Arg : F.args()) {
// Construct the ArgInfo object from destination register and argument type.
@@ -542,12 +533,16 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
// correspondingly and appended to SplitArgInfos.
splitToValueTypes(AInfo, SplitArgInfos, DL, CC);
+ TypeList.push_back(Arg.getType());
+
++Index;
}
+ RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(TypeList)};
RISCVIncomingValueAssigner Assigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/false);
+ /*IsRet=*/false, Dispatcher);
RISCVFormalArgHandler Handler(MIRBuilder, MF.getRegInfo());
SmallVector<CCValAssign, 16> ArgLocs;
@@ -585,11 +580,13 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 32> SplitArgInfos;
SmallVector<ISD::OutputArg, 8> Outs;
+ SmallVector<Type *, 4> TypeList;
for (auto &AInfo : Info.OrigArgs) {
// Handle any required unmerging of split value types from a given VReg into
// physical registers. ArgInfo objects are constructed correspondingly and
// appended to SplitArgInfos.
splitToValueTypes(AInfo, SplitArgInfos, DL, CC);
+ TypeList.push_back(AInfo.Ty);
}
// TODO: Support tail calls.
@@ -607,9 +604,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
Call.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv));
+ RVVArgDispatcher ArgDispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(TypeList)};
RISCVOutgoingValueAssigner ArgAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/false);
+ /*IsRet=*/false, ArgDispatcher);
RISCVOutgoingValueHandler ArgHandler(MIRBuilder, MF.getRegInfo(), Call);
if (!determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgInfos,
MIRBuilder, CC, Info.IsVarArg))
@@ -637,9 +636,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC);
+ RVVArgDispatcher RetDispatcher{&MF, getTLI<RISCVTargetLowering>(),
+ ArrayRef(F.getReturnType())};
RISCVIncomingValueAssigner RetAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
- /*IsRet=*/true);
+ /*IsRet=*/true, RetDispatcher);
RISCVCallReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Call);
if (!determineAndHandleAssignments(RetHandler, RetAssigner, SplitRetInfos,
MIRBuilder, CC, Info.IsVarArg))
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index ca77a9729e03..c1fde738c000 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -142,46 +142,6 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
return &RISCV::ValueMappings[Idx];
}
-/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
-/// having only floating-point operands.
-/// FIXME: this is copied from target AArch64. Needs some code refactor here to
-/// put this function in GlobalISel/Utils.cpp.
-static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FCOPYSIGN:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FMINIMUM:
- return true;
- }
- return false;
-}
-
// TODO: Make this more like AArch64?
bool RISCVRegisterBankInfo::hasFPConstraints(
const MachineInstr &MI, const MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 794455aa7304..59962216e0c0 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1226,9 +1226,9 @@ def TuneNoSinkSplatOperands
"false", "Disable sink splat operands to enable .vx, .vf,"
".wx, and .wf instructions">;
-def TuneNoStripWSuffix
- : SubtargetFeature<"no-strip-w-suffix", "EnableStripWSuffix", "false",
- "Disable strip W suffix">;
+def TunePreferWInst
+ : SubtargetFeature<"prefer-w-inst", "PreferWInst", "true",
+ "Prefer instructions with W suffix">;
def TuneConditionalCompressedMoveFusion
: SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1d1ea6bae6c1..765838aafb58 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -18223,33 +18224,12 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
return false;
}
-static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
- std::optional<unsigned> FirstMaskArgument,
- CCState &State, const RISCVTargetLowering &TLI) {
- const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
- if (RC == &RISCV::VRRegClass) {
- // Assign the first mask argument to V0.
- // This is an interim calling convention and it may be changed in the
- // future.
- if (FirstMaskArgument && ValNo == *FirstMaskArgument)
- return State.AllocateReg(RISCV::V0);
- return State.AllocateReg(ArgVRs);
- }
- if (RC == &RISCV::VRM2RegClass)
- return State.AllocateReg(ArgVRM2s);
- if (RC == &RISCV::VRM4RegClass)
- return State.AllocateReg(ArgVRM4s);
- if (RC == &RISCV::VRM8RegClass)
- return State.AllocateReg(ArgVRM8s);
- llvm_unreachable("Unhandled register class for ValueType");
-}
-
// Implements the RISC-V calling convention. Returns true upon failure.
bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument) {
+ RVVArgDispatcher &RVVDispatcher) {
unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
assert(XLen == 32 || XLen == 64);
MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
@@ -18418,7 +18398,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
else if (ValVT == MVT::f64 && !UseGPRForF64)
Reg = State.AllocateReg(ArgFPR64s);
else if (ValVT.isVector()) {
- Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
+ Reg = RVVDispatcher.getNextPhysReg();
if (!Reg) {
// For return values, the vector must be passed fully via registers or
// via the stack.
@@ -18504,9 +18484,15 @@ void RISCVTargetLowering::analyzeInputArgs(
unsigned NumArgs = Ins.size();
FunctionType *FType = MF.getFunction().getFunctionType();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions())
- FirstMaskArgument = preAssignMask(Ins);
+ RVVArgDispatcher Dispatcher;
+ if (IsRet) {
+ Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
+ } else {
+ SmallVector<Type *, 4> TypeList;
+ for (const Argument &Arg : MF.getFunction().args())
+ TypeList.push_back(Arg.getType());
+ Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
+ }
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Ins[i].VT;
@@ -18521,7 +18507,7 @@ void RISCVTargetLowering::analyzeInputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
- FirstMaskArgument)) {
+ Dispatcher)) {
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
<< ArgVT << '\n');
llvm_unreachable(nullptr);
@@ -18535,9 +18521,13 @@ void RISCVTargetLowering::analyzeOutputArgs(
CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
unsigned NumArgs = Outs.size();
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions())
- FirstMaskArgument = preAssignMask(Outs);
+ SmallVector<Type *, 4> TypeList;
+ if (IsRet)
+ TypeList.push_back(MF.getFunction().getReturnType());
+ else if (CLI)
+ for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
+ TypeList.push_back(Arg.Ty);
+ RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
for (unsigned i = 0; i != NumArgs; i++) {
MVT ArgVT = Outs[i].VT;
@@ -18547,7 +18537,7 @@ void RISCVTargetLowering::analyzeOutputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
- FirstMaskArgument)) {
+ Dispatcher)) {
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
<< ArgVT << "\n");
llvm_unreachable(nullptr);
@@ -18728,7 +18718,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
ISD::ArgFlagsTy ArgFlags, CCState &State,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument) {
+ RVVArgDispatcher &RVVDispatcher) {
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
@@ -18806,13 +18796,14 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
}
if (LocVT.isVector()) {
- if (unsigned Reg =
- allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
+ MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
+ if (AllocatedVReg) {
// Fixed-length vectors are located in the corresponding scalable-vector
// container types.
if (ValVT.isFixedLengthVector())
LocVT = TLI.getContainerForFixedLengthVector(LocVT);
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(
+ CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
} else {
// Try and pass the address via a "fast" GPR.
if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
@@ -19440,17 +19431,15 @@ bool RISCVTargetLowering::CanLowerReturn(
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
- std::optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasVInstructions())
- FirstMaskArgument = preAssignMask(Outs);
+ RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
- ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
- *this, FirstMaskArgument))
+ ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
+ nullptr, *this, Dispatcher))
return false;
}
return true;
@@ -21247,6 +21236,181 @@ unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
return Subtarget.getMinimumJumpTableEntries();
}
+// Handle single arg such as return value.
+template <typename Arg>
+void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
+ // This lambda determines whether an array of types are constructed by
+ // homogeneous vector types.
+ auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
+ // First, extract the first element in the argument type.
+ auto It = ArgList.begin();
+ MVT FirstArgRegType = It->VT;
+
+ // Return if there is no return or the type needs split.
+ if (It == ArgList.end() || It->Flags.isSplit())
+ return false;
+
+ ++It;
+
+ // Return if this argument type contains only 1 element, or it's not a
+ // vector type.
+ if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
+ return false;
+
+ // Second, check if the following elements in this argument type are all the
+ // same.
+ for (; It != ArgList.end(); ++It)
+ if (It->Flags.isSplit() || It->VT != FirstArgRegType)
+ return false;
+
+ return true;
+ };
+
+ if (isHomogeneousScalableVectorType(ArgList)) {
+ // Handle as tuple type
+ RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
+ } else {
+ // Handle as normal vector type
+ bool FirstVMaskAssigned = false;
+ for (const auto &OutArg : ArgList) {
+ MVT RegisterVT = OutArg.VT;
+
+ // Skip non-RVV register type
+ if (!RegisterVT.isVector())
+ continue;
+
+ if (RegisterVT.isFixedLengthVector())
+ RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
+
+ if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
+ RVVArgInfos.push_back({1, RegisterVT, true});
+ FirstVMaskAssigned = true;
+ continue;
+ }
+
+ RVVArgInfos.push_back({1, RegisterVT, false});
+ }
+ }
+}
+
+// Handle multiple args.
+template <>
+void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
+ const DataLayout &DL = MF->getDataLayout();
+ const Function &F = MF->getFunction();
+ LLVMContext &Context = F.getContext();
+
+ bool FirstVMaskAssigned = false;
+ for (Type *Ty : TypeList) {
+ StructType *STy = dyn_cast<StructType>(Ty);
+ if (STy && STy->containsHomogeneousScalableVectorTypes()) {
+ Type *ElemTy = STy->getTypeAtIndex(0U);
+ EVT VT = TLI->getValueType(DL, ElemTy);
+ MVT RegisterVT =
+ TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
+ unsigned NumRegs =
+ TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
+
+ RVVArgInfos.push_back(
+ {NumRegs * STy->getNumElements(), RegisterVT, false});
+ } else {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
+
+ for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
+ ++Value) {
+ EVT VT = ValueVTs[Value];
+ MVT RegisterVT =
+ TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
+ unsigned NumRegs =
+ TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
+
+ // Skip non-RVV register type
+ if (!RegisterVT.isVector())
+ continue;
+
+ if (RegisterVT.isFixedLengthVector())
+ RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
+
+ if (!FirstVMaskAssigned &&
+ RegisterVT.getVectorElementType() == MVT::i1) {
+ RVVArgInfos.push_back({1, RegisterVT, true});
+ FirstVMaskAssigned = true;
+ --NumRegs;
+ }
+
+ RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
+ }
+ }
+ }
+}
+
+void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
+ unsigned StartReg) {
+ assert((StartReg % LMul) == 0 &&
+ "Start register number should be multiple of lmul");
+ const MCPhysReg *VRArrays;
+ switch (LMul) {
+ default:
+ report_fatal_error("Invalid lmul");
+ case 1:
+ VRArrays = ArgVRs;
+ break;
+ case 2:
+ VRArrays = ArgVRM2s;
+ break;
+ case 4:
+ VRArrays = ArgVRM4s;
+ break;
+ case 8:
+ VRArrays = ArgVRM8s;
+ break;
+ }
+
+ for (unsigned i = 0; i < NF; ++i)
+ if (StartReg)
+ AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
+ else
+ AllocatedPhysRegs.push_back(MCPhysReg());
+}
+
+/// This function determines if each RVV argument is passed by register, if the
+/// argument can be assigned to a VR, then give it a specific register.
+/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
+void RVVArgDispatcher::compute() {
+ uint32_t AssignedMap = 0;
+ auto allocate = [&](const RVVArgInfo &ArgInfo) {
+ // Allocate first vector mask argument to V0.
+ if (ArgInfo.FirstVMask) {
+ AllocatedPhysRegs.push_back(RISCV::V0);
+ return;
+ }
+
+ unsigned RegsNeeded = divideCeil(
+ ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
+ unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
+ for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
+ StartReg += RegsNeeded) {
+ uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
+ if ((AssignedMap & Map) == 0) {
+ allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
+ AssignedMap |= Map;
+ return;
+ }
+ }
+
+ allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
+ };
+
+ for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
+ allocate(RVVArgInfos[i]);
+}
+
+MCPhysReg RVVArgDispatcher::getNextPhysReg() {
+ assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
+ return AllocatedPhysRegs[CurIdx++];
+}
+
namespace llvm::RISCVVIntrinsicsTable {
#define GET_RISCVVIntrinsicsTable_IMPL
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index e2633733c31b..b10da3d40bef 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -24,6 +24,7 @@ namespace llvm {
class InstructionCost;
class RISCVSubtarget;
struct RISCVRegisterInfo;
+class RVVArgDispatcher;
namespace RISCVISD {
// clang-format off
@@ -875,7 +876,7 @@ public:
ISD::ArgFlagsTy ArgFlags, CCState &State,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument);
+ RVVArgDispatcher &RVVDispatcher);
private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
@@ -1017,19 +1018,71 @@ private:
unsigned getMinimumJumpTableEntries() const override;
};
+/// As per the spec, the rules for passing vector arguments are as follows:
+///
+/// 1. For the first vector mask argument, use v0 to pass it.
+/// 2. For vector data arguments or rest vector mask arguments, starting from
+/// the v8 register, if a vector register group between v8-v23 that has not been
+/// allocated can be found and the first register number is a multiple of LMUL,
+/// then allocate this vector register group to the argument and mark these
+/// registers as allocated. Otherwise, pass it by reference and are replaced in
+/// the argument list with the address.
+/// 3. For tuple vector data arguments, starting from the v8 register, if
+/// NFIELDS consecutive vector register groups between v8-v23 that have not been
+/// allocated can be found and the first register number is a multiple of LMUL,
+/// then allocate these vector register groups to the argument and mark these
+/// registers as allocated. Otherwise, pass it by reference and are replaced in
+/// the argument list with the address.
+class RVVArgDispatcher {
+public:
+ static constexpr unsigned NumArgVRs = 16;
+
+ struct RVVArgInfo {
+ unsigned NF;
+ MVT VT;
+ bool FirstVMask = false;
+ };
+
+ template <typename Arg>
+ RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI,
+ ArrayRef<Arg> ArgList)
+ : MF(MF), TLI(TLI) {
+ constructArgInfos(ArgList);
+ compute();
+ }
+
+ RVVArgDispatcher() = default;
+
+ MCPhysReg getNextPhysReg();
+
+private:
+ SmallVector<RVVArgInfo, 4> RVVArgInfos;
+ SmallVector<MCPhysReg, 4> AllocatedPhysRegs;
+
+ const MachineFunction *MF = nullptr;
+ const RISCVTargetLowering *TLI = nullptr;
+
+ unsigned CurIdx = 0;
+
+ template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret);
+ void compute();
+ void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1,
+ unsigned StartReg = 0);
+};
+
namespace RISCV {
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument);
+ RVVArgDispatcher &RVVDispatcher);
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
- std::optional<unsigned> FirstMaskArgument);
+ RVVArgDispatcher &RVVDispatcher);
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index b0fda040519a..668062c8d33f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2719,6 +2719,50 @@ std::string RISCVInstrInfo::createMIROperandComment(
}
// clang-format off
+#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
+ RISCV::Pseudo##OP##_##LMUL
+
+#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
+ RISCV::Pseudo##OP##_##LMUL##_MASK
+
+#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
+ CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
+
+#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
+ CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
+
+#define CASE_RVV_OPCODE_UNMASK(OP) \
+ CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
+ case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
+
+#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
+ CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
+
+#define CASE_RVV_OPCODE_MASK(OP) \
+ CASE_RVV_OPCODE_MASK_WIDEN(OP): \
+ case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
+
+#define CASE_RVV_OPCODE_WIDEN(OP) \
+ CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
+ case CASE_RVV_OPCODE_MASK_WIDEN(OP)
+
+#define CASE_RVV_OPCODE(OP) \
+ CASE_RVV_OPCODE_UNMASK(OP): \
+ case CASE_RVV_OPCODE_MASK(OP)
+// clang-format on
+
+// clang-format off
#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
RISCV::PseudoV##OP##_##TYPE##_##LMUL
@@ -2798,6 +2842,28 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case RISCV::PseudoCCMOVGPR:
// Operands 4 and 5 are commutable.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
+ case CASE_RVV_OPCODE(VADD_VV):
+ case CASE_RVV_OPCODE(VAND_VV):
+ case CASE_RVV_OPCODE(VOR_VV):
+ case CASE_RVV_OPCODE(VXOR_VV):
+ case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
+ case CASE_RVV_OPCODE_MASK(VMSNE_VV):
+ case CASE_RVV_OPCODE(VMIN_VV):
+ case CASE_RVV_OPCODE(VMINU_VV):
+ case CASE_RVV_OPCODE(VMAX_VV):
+ case CASE_RVV_OPCODE(VMAXU_VV):
+ case CASE_RVV_OPCODE(VMUL_VV):
+ case CASE_RVV_OPCODE(VMULH_VV):
+ case CASE_RVV_OPCODE(VMULHU_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
+ case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
+ case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
+ // Operands 2 and 3 are commutable.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
case CASE_VFMA_SPLATS(FMADD):
case CASE_VFMA_SPLATS(FMSUB):
case CASE_VFMA_SPLATS(FMACC):
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 686bfd1af0d0..0b8317925097 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2129,8 +2129,9 @@ multiclass VPseudoBinary<VReg RetClass,
LMULInfo MInfo,
string Constraint = "",
int sew = 0,
- int TargetConstraintType = 1> {
- let VLMul = MInfo.value, SEW=sew in {
+ int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in {
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
Constraint, TargetConstraintType>;
@@ -2169,8 +2170,9 @@ multiclass VPseudoBinaryM<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
- int TargetConstraintType = 1> {
- let VLMul = MInfo.value in {
+ int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ let VLMul = MInfo.value, isCommutable = Commutable in {
def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class,
Constraint, TargetConstraintType>;
let ForceTailAgnostic = true in
@@ -2228,8 +2230,8 @@ multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass,
}
-multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0> {
- defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew>;
+multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0, bit Commutable = 0> {
+ defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew, Commutable=Commutable>;
}
multiclass VPseudoBinaryV_VV_RM<LMULInfo m, string Constraint = ""> {
@@ -2333,9 +2335,10 @@ multiclass VPseudoVALU_MM<bit Commutable = 0> {
// * The destination EEW is greater than the source EEW, the source EMUL is
// at least 1, and the overlap is in the highest-numbered part of the
// destination register group is legal. Otherwise, it is illegal.
-multiclass VPseudoBinaryW_VV<LMULInfo m> {
+multiclass VPseudoBinaryW_VV<LMULInfo m, bit Commutable = 0> {
defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m,
- "@earlyclobber $rd", TargetConstraintType=3>;
+ "@earlyclobber $rd", TargetConstraintType=3,
+ Commutable=Commutable>;
}
multiclass VPseudoBinaryW_VV_RM<LMULInfo m, int sew = 0> {
@@ -2455,7 +2458,9 @@ multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>;
}
-multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1> {
+multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ let isCommutable = Commutable in
def "_VVM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, m.vrclass, m, 1, "",
@@ -2669,8 +2674,10 @@ multiclass PseudoVEXT_VF8 {
// lowest-numbered part of the source register group".
// With LMUL<=1 the source and dest occupy a single register so any overlap
// is in the lowest-numbered part.
-multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1> {
- defm _VV : VPseudoBinaryM<m.moutclass, m.vrclass, m.vrclass, m, "", TargetConstraintType>;
+multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1,
+ bit Commutable = 0> {
+ defm _VV : VPseudoBinaryM<m.moutclass, m.vrclass, m.vrclass, m, "",
+ TargetConstraintType, Commutable=Commutable>;
}
multiclass VPseudoBinaryM_VX<LMULInfo m, int TargetConstraintType = 1> {
@@ -2749,10 +2756,11 @@ multiclass VPseudoVSSHT_VV_VX_VI_RM<Operand ImmType = simm5, string Constraint =
}
}
-multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = "",
+ bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryV_VV<m, Constraint>,
+ defm "" : VPseudoBinaryV_VV<m, Constraint, Commutable=Commutable>,
SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
@@ -2802,17 +2810,17 @@ multiclass VPseudoVAALU_VV_VX_RM {
multiclass VPseudoVMINMAX_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryV_VV<m>,
+ defm "" : VPseudoBinaryV_VV<m, Commutable=1>,
SchedBinary<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
SchedBinary<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX", mx>;
}
}
-multiclass VPseudoVMUL_VV_VX {
+multiclass VPseudoVMUL_VV_VX<bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryV_VV<m>,
+ defm "" : VPseudoBinaryV_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
SchedBinary<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX", mx>;
@@ -2962,10 +2970,10 @@ multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> {
}
}
-multiclass VPseudoVWALU_VV_VX {
+multiclass VPseudoVWALU_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryW_VV<m>,
+ defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_VX<m>,
@@ -2974,10 +2982,10 @@ multiclass VPseudoVWALU_VV_VX {
}
}
-multiclass VPseudoVWMUL_VV_VX {
+multiclass VPseudoVWMUL_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryW_VV<m>,
+ defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_VX<m>,
@@ -3072,7 +3080,7 @@ multiclass VPseudoVMRG_VM_XM_IM {
multiclass VPseudoVCALU_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoTiedBinaryV_VM<m>,
+ defm "" : VPseudoTiedBinaryV_VM<m, Commutable=1>,
SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoTiedBinaryV_XM<m>,
@@ -3285,10 +3293,10 @@ multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f,
sew, Commutable=1>;
}
-multiclass VPseudoTernaryW_VV<LMULInfo m> {
+multiclass VPseudoTernaryW_VV<LMULInfo m, bit Commutable = 0> {
defvar constraint = "@earlyclobber $rd";
defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m,
- constraint, /*Commutable*/ 0, TargetConstraintType=3>;
+ constraint, Commutable=Commutable, TargetConstraintType=3>;
}
multiclass VPseudoTernaryW_VV_RM<LMULInfo m, int sew = 0> {
@@ -3378,10 +3386,10 @@ multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
}
}
-multiclass VPseudoVWMAC_VV_VX {
+multiclass VPseudoVWMAC_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
- defm "" : VPseudoTernaryW_VV<m>,
+ defm "" : VPseudoTernaryW_VV<m, Commutable=Commutable>,
SchedTernary<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
"ReadVIWMulAddV", mx>;
defm "" : VPseudoTernaryW_VX<m>,
@@ -3434,10 +3442,10 @@ multiclass VPseudoVWMAC_VV_VF_BF_RM {
}
}
-multiclass VPseudoVCMPM_VV_VX_VI {
+multiclass VPseudoVCMPM_VV_VX_VI<bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
- defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>,
+ defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2, Commutable=Commutable>,
SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>;
defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>,
SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>;
@@ -6246,7 +6254,7 @@ defm PseudoVLSEG : VPseudoUSSegLoadFF;
//===----------------------------------------------------------------------===//
// 11.1. Vector Single-Width Integer Add and Subtract
//===----------------------------------------------------------------------===//
-defm PseudoVADD : VPseudoVALU_VV_VX_VI;
+defm PseudoVADD : VPseudoVALU_VV_VX_VI<Commutable=1>;
defm PseudoVSUB : VPseudoVALU_VV_VX;
defm PseudoVRSUB : VPseudoVALU_VX_VI;
@@ -6311,9 +6319,9 @@ foreach vti = AllIntegerVectors in {
//===----------------------------------------------------------------------===//
// 11.2. Vector Widening Integer Add/Subtract
//===----------------------------------------------------------------------===//
-defm PseudoVWADDU : VPseudoVWALU_VV_VX;
+defm PseudoVWADDU : VPseudoVWALU_VV_VX<Commutable=1>;
defm PseudoVWSUBU : VPseudoVWALU_VV_VX;
-defm PseudoVWADD : VPseudoVWALU_VV_VX;
+defm PseudoVWADD : VPseudoVWALU_VV_VX<Commutable=1>;
defm PseudoVWSUB : VPseudoVWALU_VV_VX;
defm PseudoVWADDU : VPseudoVWALU_WV_WX;
defm PseudoVWSUBU : VPseudoVWALU_WV_WX;
@@ -6344,9 +6352,9 @@ defm PseudoVMSBC : VPseudoVCALUM_V_X<"@earlyclobber $rd">;
//===----------------------------------------------------------------------===//
// 11.5. Vector Bitwise Logical Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVAND : VPseudoVALU_VV_VX_VI;
-defm PseudoVOR : VPseudoVALU_VV_VX_VI;
-defm PseudoVXOR : VPseudoVALU_VV_VX_VI;
+defm PseudoVAND : VPseudoVALU_VV_VX_VI<Commutable=1>;
+defm PseudoVOR : VPseudoVALU_VV_VX_VI<Commutable=1>;
+defm PseudoVXOR : VPseudoVALU_VV_VX_VI<Commutable=1>;
//===----------------------------------------------------------------------===//
// 11.6. Vector Single-Width Bit Shift Instructions
@@ -6364,8 +6372,8 @@ defm PseudoVNSRA : VPseudoVNSHT_WV_WX_WI;
//===----------------------------------------------------------------------===//
// 11.8. Vector Integer Comparison Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI;
-defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI;
+defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI<Commutable=1>;
+defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI<Commutable=1>;
defm PseudoVMSLTU : VPseudoVCMPM_VV_VX;
defm PseudoVMSLT : VPseudoVCMPM_VV_VX;
defm PseudoVMSLEU : VPseudoVCMPM_VV_VX_VI;
@@ -6384,9 +6392,9 @@ defm PseudoVMAX : VPseudoVMINMAX_VV_VX;
//===----------------------------------------------------------------------===//
// 11.10. Vector Single-Width Integer Multiply Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVMUL : VPseudoVMUL_VV_VX;
-defm PseudoVMULH : VPseudoVMUL_VV_VX;
-defm PseudoVMULHU : VPseudoVMUL_VV_VX;
+defm PseudoVMUL : VPseudoVMUL_VV_VX<Commutable=1>;
+defm PseudoVMULH : VPseudoVMUL_VV_VX<Commutable=1>;
+defm PseudoVMULHU : VPseudoVMUL_VV_VX<Commutable=1>;
defm PseudoVMULHSU : VPseudoVMUL_VV_VX;
//===----------------------------------------------------------------------===//
@@ -6400,8 +6408,8 @@ defm PseudoVREM : VPseudoVDIV_VV_VX;
//===----------------------------------------------------------------------===//
// 11.12. Vector Widening Integer Multiply Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVWMUL : VPseudoVWMUL_VV_VX;
-defm PseudoVWMULU : VPseudoVWMUL_VV_VX;
+defm PseudoVWMUL : VPseudoVWMUL_VV_VX<Commutable=1>;
+defm PseudoVWMULU : VPseudoVWMUL_VV_VX<Commutable=1>;
defm PseudoVWMULSU : VPseudoVWMUL_VV_VX;
//===----------------------------------------------------------------------===//
@@ -6415,8 +6423,8 @@ defm PseudoVNMSUB : VPseudoVMAC_VV_VX_AAXA;
//===----------------------------------------------------------------------===//
// 11.14. Vector Widening Integer Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVWMACCU : VPseudoVWMAC_VV_VX;
-defm PseudoVWMACC : VPseudoVWMAC_VV_VX;
+defm PseudoVWMACCU : VPseudoVWMAC_VV_VX<Commutable=1>;
+defm PseudoVWMACC : VPseudoVWMAC_VV_VX<Commutable=1>;
defm PseudoVWMACCSU : VPseudoVWMAC_VV_VX;
defm PseudoVWMACCUS : VPseudoVWMAC_VX;
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 39d420c2fbf0..ead91c5656be 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -12,15 +12,24 @@
// extended bits aren't consumed or because the input was already sign extended
// by an earlier instruction.
//
-// Then it removes the -w suffix from opw instructions whenever all users are
-// dependent only on the lower word of the result of the instruction.
-// The cases handled are:
-// * addw because c.add has a larger register encoding than c.addw.
-// * addiw because it helps reduce test differences between RV32 and RV64
-// w/o being a pessimization.
-// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
-// * slliw because c.slliw doesn't exist and c.slli does
+// Then:
+// 1. Unless explicit disabled or the target prefers instructions with W suffix,
+// it removes the -w suffix from opw instructions whenever all users are
+// dependent only on the lower word of the result of the instruction.
+// The cases handled are:
+// * addw because c.add has a larger register encoding than c.addw.
+// * addiw because it helps reduce test differences between RV32 and RV64
+// w/o being a pessimization.
+// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
+// * slliw because c.slliw doesn't exist and c.slli does
//
+// 2. Or if explicit enabled or the target prefers instructions with W suffix,
+// it adds the W suffix to the instruction whenever all users are dependent
+// only on the lower word of the result of the instruction.
+// The cases handled are:
+// * add/addi/sub/mul.
+// * slli with imm < 32.
+// * ld/lwu.
//===---------------------------------------------------------------------===//
#include "RISCV.h"
@@ -60,6 +69,8 @@ public:
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
+ bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -672,9 +683,6 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
const RISCVInstrInfo &TII,
const RISCVSubtarget &ST,
MachineRegisterInfo &MRI) {
- if (DisableStripWSuffix || !ST.enableStripWSuffix())
- return false;
-
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
@@ -698,6 +706,58 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
return MadeChange;
}
+bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
+ const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST,
+ MachineRegisterInfo &MRI) {
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ unsigned WOpc;
+ // TODO: Add more?
+ switch (MI.getOpcode()) {
+ default:
+ continue;
+ case RISCV::ADD:
+ WOpc = RISCV::ADDW;
+ break;
+ case RISCV::ADDI:
+ WOpc = RISCV::ADDIW;
+ break;
+ case RISCV::SUB:
+ WOpc = RISCV::SUBW;
+ break;
+ case RISCV::MUL:
+ WOpc = RISCV::MULW;
+ break;
+ case RISCV::SLLI:
+ // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
+ if (MI.getOperand(2).getImm() >= 32)
+ continue;
+ WOpc = RISCV::SLLIW;
+ break;
+ case RISCV::LD:
+ case RISCV::LWU:
+ WOpc = RISCV::LW;
+ break;
+ }
+
+ if (hasAllWUsers(MI, ST, MRI)) {
+ LLVM_DEBUG(dbgs() << "Replacing " << MI);
+ MI.setDesc(TII.get(WOpc));
+ MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ MI.clearFlag(MachineInstr::MIFlag::IsExact);
+ LLVM_DEBUG(dbgs() << " with " << MI);
+ ++NumTransformedToWInstrs;
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -711,7 +771,12 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
- MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
+
+ if (!(DisableStripWSuffix || ST.preferWInst()))
+ MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
+
+ if (ST.preferWInst())
+ MadeChange |= appendWSuffixes(MF, TII, ST, MRI);
return MadeChange;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index fbf64f2b1dfb..ae8baa3f1191 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -55,9 +55,9 @@ static std::string computeDataLayout(const Triple &TT) {
// mean anything.
if (Arch == Triple::spirv32)
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024";
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024";
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
}
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
index e7c9e60ba95f..9e85424e76e6 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp
@@ -13,10 +13,13 @@
#include "X86RegisterBankInfo.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/IntrinsicsX86.h"
#define GET_TARGET_REGBANK_IMPL
#include "X86GenRegisterBank.inc"
@@ -68,6 +71,98 @@ X86RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
llvm_unreachable("Unsupported register kind yet.");
}
+// \returns true if a given intrinsic only uses and defines FPRs.
+static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI) {
+ // TODO: Add more intrinsics.
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+ default:
+ return false;
+ // SSE1
+ case Intrinsic::x86_sse_rcp_ss:
+ case Intrinsic::x86_sse_rcp_ps:
+ case Intrinsic::x86_sse_rsqrt_ss:
+ case Intrinsic::x86_sse_rsqrt_ps:
+ case Intrinsic::x86_sse_min_ss:
+ case Intrinsic::x86_sse_min_ps:
+ case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse_max_ps:
+ return true;
+ }
+ return false;
+}
+
+bool X86RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
+ unsigned Op = MI.getOpcode();
+ if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
+ return true;
+
+ // Do we have an explicit floating point instruction?
+ if (isPreISelGenericFloatingPointOpcode(Op))
+ return true;
+
+ // No. Check if we have a copy-like instruction. If we do, then we could
+ // still be fed by floating point instructions.
+ if (Op != TargetOpcode::COPY && !MI.isPHI() &&
+ !isPreISelGenericOptimizationHint(Op))
+ return false;
+
+ // Check if we already know the register bank.
+ auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
+ if (RB == &getRegBank(X86::PSRRegBankID))
+ return true;
+ if (RB == &getRegBank(X86::GPRRegBankID))
+ return false;
+
+ // We don't know anything.
+ //
+ // If we have a phi, we may be able to infer that it will be assigned a fp
+ // type based off of its inputs.
+ if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
+ return false;
+
+ return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
+ return Op.isReg() &&
+ onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
+ });
+}
+
+bool X86RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_LROUND:
+ case TargetOpcode::G_LLROUND:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ return true;
+ default:
+ break;
+ }
+ return hasFPConstraints(MI, MRI, TRI, Depth);
+}
+
+bool X86RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP:
+ return true;
+ default:
+ break;
+ }
+ return hasFPConstraints(MI, MRI, TRI, Depth);
+}
+
X86GenRegisterBankInfo::PartialMappingIdx
X86GenRegisterBankInfo::getPartialMappingIdx(const MachineInstr &MI,
const LLT &Ty, bool isFP) {
@@ -180,11 +275,13 @@ X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI,
const RegisterBankInfo::InstructionMapping &
X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned Opc = MI.getOpcode();
- // Try the default logic for non-generic instructions that are either copies
- // or already have some operands assigned to banks.
+ // Try the default logic for non-generic instructions that are either
+ // copies or already have some operands assigned to banks.
if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) {
const InstructionMapping &Mapping = getInstrMappingImpl(MI);
if (Mapping.isValid())
@@ -221,13 +318,14 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCONSTANT:
- // Instruction having only floating-point operands (all scalars in VECRReg)
+ // Instruction having only floating-point operands (all scalars in
+ // VECRReg)
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx);
break;
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_FPTOSI: {
- // Some of the floating-point instructions have mixed GPR and FP operands:
- // fine-tune the computed mapping.
+ // Some of the floating-point instructions have mixed GPR and FP
+ // operands: fine-tune the computed mapping.
auto &Op0 = MI.getOperand(0);
auto &Op1 = MI.getOperand(1);
const LLT Ty0 = MRI.getType(Op0.getReg());
@@ -271,9 +369,36 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ isFPTrunc || isFPAnyExt,
OpRegBankIdx);
- } break;
+ break;
+ }
+ case TargetOpcode::G_LOAD: {
+ // Check if that load feeds fp instructions.
+ // In that case, we want the default mapping to be on FPR
+ // instead of blind map every scalar to GPR.
+ bool IsFP = any_of(MRI.use_nodbg_instructions(cast<GLoad>(MI).getDstReg()),
+ [&](const MachineInstr &UseMI) {
+ // If we have at least one direct use in a FP
+ // instruction, assume this was a floating point load
+ // in the IR. If it was not, we would have had a
+ // bitcast before reaching that instruction.
+ return onlyUsesFP(UseMI, MRI, TRI);
+ });
+ getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx);
+ break;
+ }
+ case TargetOpcode::G_STORE: {
+ // Check if that store is fed by fp instructions.
+ Register VReg = cast<GStore>(MI).getValueReg();
+ if (!VReg)
+ break;
+ MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ bool IsFP = onlyDefinesFP(*DefMI, MRI, TRI);
+ getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx);
+ break;
+ }
default:
- // Track the bank of each register, use NotFP mapping (all scalars in GPRs)
+ // Track the bank of each register, use NotFP mapping (all scalars in
+ // GPRs)
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ false, OpRegBankIdx);
break;
}
diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
index 989c5956ad59..8f38e717e36b 100644
--- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h
@@ -62,6 +62,22 @@ private:
const SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx,
SmallVectorImpl<const ValueMapping *> &OpdsMapping);
+ // Maximum recursion depth for hasFPConstraints.
+ const unsigned MaxFPRSearchDepth = 2;
+
+ /// \returns true if \p MI only uses and defines FPRs.
+ bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth = 0) const;
+
+ /// \returns true if \p MI only uses FPRs.
+ bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+
+ /// \returns true if \p MI only defines FPRs.
+ bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+
public:
X86RegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bae8579fc365..ba5db854647a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1774,6 +1774,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *I = moveAddAfterMinMax(II, Builder))
return I;
+ // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
+ const APInt *RHSC;
+ if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
+ match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
+ return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
+ ConstantInt::get(II->getType(), *RHSC));
+
// smax(X, -X) --> abs(X)
// smin(X, -X) --> -abs(X)
// umax(X, -X) --> -abs(X)
@@ -1815,7 +1822,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return NewMinMax;
// Try to fold minmax with constant RHS based on range information
- const APInt *RHSC;
if (match(I1, m_APIntAllowUndef(RHSC))) {
ICmpInst::Predicate Pred =
ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID));
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 380bac9c6180..baec51a07fcb 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1019,12 +1019,14 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ,
const SmallPtrSetImpl<BasicBlock *> &SuccPreds,
BasicBlock *&CommonPred) {
- // There must be phis in BB, otherwise BB will be merged into Succ directly
- if (BB->phis().empty() || Succ->phis().empty())
+ // When Succ has no phis, BB may be merged into Succ directly. We don't need
+ // to redirect the predecessors of BB in this case.
+ if (Succ->phis().empty())
return false;
- // BB must have predecessors not shared that can be redirected to Succ
- if (!BB->hasNPredecessorsOrMore(2))
+ // BB must have multiple different predecessors, so that at least one of
+ // predecessors can be redirected to Succ, except the common predecessor.
+ if (BB->getUniquePredecessor() || pred_empty(BB))
return false;
// Get single common predecessors of both BB and Succ
@@ -3627,10 +3629,12 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C,
return createIntegerExpression(C);
auto *FP = dyn_cast<ConstantFP>(&C);
- if (FP && (Ty.isFloatTy() || Ty.isDoubleTy())) {
+ if (FP && Ty.isFloatingPointTy() && Ty.getScalarSizeInBits() <= 64) {
const APFloat &APF = FP->getValueAPF();
- return DIB.createConstantValueExpression(
- APF.bitcastToAPInt().getZExtValue());
+ APInt const &API = APF.bitcastToAPInt();
+ if (auto Temp = API.getZExtValue())
+ return DIB.createConstantValueExpression(static_cast<uint64_t>(Temp));
+ return DIB.createConstantValueExpression(*API.getRawData());
}
if (!Ty.isPointerTy())
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c63b500f546f..d0bcdceae392 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15155,8 +15155,8 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
Type *ValueTy = StoreTy;
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
ValueTy = Trunc->getSrcTy();
- unsigned MinVF = TTI->getStoreMinimumVF(
- R.getMinVF(DL->getTypeSizeInBits(StoreTy)), StoreTy, ValueTy);
+ unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF(
+ R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy, ValueTy));
if (MaxVF < MinVF) {
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF