diff options
Diffstat (limited to 'lib/CodeGen/TargetInfo.cpp')
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 985 |
1 files changed, 741 insertions, 244 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 82f412f012..b1773b7090 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -14,6 +14,7 @@ #include "TargetInfo.h" #include "ABIInfo.h" +#include "CGBlocks.h" #include "CGCXXABI.h" #include "CGValue.h" #include "CodeGenFunction.h" @@ -22,7 +23,9 @@ #include "clang/CodeGen/SwiftCallingConv.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" @@ -183,7 +186,11 @@ const TargetInfo &ABIInfo::getTarget() const { return CGT.getTarget(); } -bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); } +const CodeGenOptions &ABIInfo::getCodeGenOpts() const { + return CGT.getCodeGenOpts(); +} + +bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); } bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { return false; @@ -398,7 +405,17 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib, } unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const { - return llvm::CallingConv::C; + // OpenCL kernels are called via an explicit runtime API with arguments + // set with clSetKernelArg(), not as normal sub-functions. + // Return SPIR_KERNEL by default as the kernel calling convention to + // ensure the fingerprint is fixed such way that each OpenCL argument + // gets one matching argument in the produced kernel function argument + // list to enable feasible implementation of clSetKernelArg() with + // aggregates etc. In case we would use the default C calling conv here, + // clSetKernelArg() might break depending on the target-specific + // conventions; different targets might split structs passed as values + // to multiple function arguments etc. + return llvm::CallingConv::SPIR_KERNEL; } llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, @@ -406,13 +423,36 @@ llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule & return llvm::ConstantPointerNull::get(T); } +LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + return D ? D->getType().getAddressSpace() : LangAS::Default; +} + llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( - CodeGen::CodeGenFunction &CGF, llvm::Value *Src, QualType SrcTy, - QualType DestTy) const { + CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr, + LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const { // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. - return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, - CGF.ConvertType(DestTy)); + if (auto *C = dyn_cast<llvm::Constant>(Src)) + return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy); + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DestTy); +} + +llvm::Constant * +TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src, + LangAS SrcAddr, LangAS DestAddr, + llvm::Type *DestTy) const { + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + return llvm::ConstantExpr::getPointerCast(Src, DestTy); +} + +llvm::SyncScope::ID +TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const { + return C.getOrInsertSyncScopeID(""); /* default sync scope */ } static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); @@ -836,7 +876,10 @@ bool IsX86_MMXType(llvm::Type *IRType) { static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) { - if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) { + bool IsMMXCons = llvm::StringSwitch<bool>(Constraint) + .Cases("y", "&y", "^Ym", true) + .Default(false); + if (IsMMXCons && Ty->isVectorTy()) { if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) { // Invalid MMX constraint return nullptr; @@ -853,8 +896,14 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) + if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { + if (BT->getKind() == BuiltinType::LongDouble) { + if (&Context.getTargetInfo().getLongDoubleFormat() == + &llvm::APFloat::x87DoubleExtended()) + return false; + } return true; + } } else if (const VectorType *VT = Ty->getAs<VectorType>()) { // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX // registers specially. @@ -942,8 +991,7 @@ class X86_32ABIInfo : public SwiftABIInfo { Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; - ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State, - const ABIArgInfo& current) const; + /// \brief Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; @@ -1009,7 +1057,8 @@ public: const llvm::Triple &Triple, const CodeGenOptions &Opts); void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { // Darwin uses different dwarf register numbers for EH. @@ -1038,14 +1087,14 @@ public: getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig = (0xeb << 0) | // jmp rel8 (0x06 << 8) | // .+0x08 - ('F' << 16) | - ('T' << 24); + ('v' << 16) | + ('2' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "movl\t%ebp, %ebp" - "\t\t## marker for objc_retainAutoreleaseReturnValue"; + "\t\t// marker for objc_retainAutoreleaseReturnValue"; } }; @@ -1527,27 +1576,6 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { return true; } -ABIArgInfo -X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State, - const ABIArgInfo ¤t) const { - // Assumes vectorCall calling convention. - const Type *Base = nullptr; - uint64_t NumElts = 0; - - if (!Ty->isBuiltinType() && !Ty->isVectorType() && - isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - // HVA types get passed directly in registers if there is room. - State.FreeSSERegs -= NumElts; - return getDirectX86Hva(); - } - // If there's no room, the HVA gets passed as normal indirect - // structure. - return getIndirectResult(Ty, /*ByVal=*/false, State); - } - return current; -} - ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. @@ -1566,35 +1594,20 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } } - // vectorcall adds the concept of a homogenous vector aggregate, similar - // to other targets, regcall uses some of the HVA rules. + // Regcall uses the concept of a homogenous vector aggregate, similar + // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if ((State.CC == llvm::CallingConv::X86_VectorCall || - State.CC == llvm::CallingConv::X86_RegCall) && + if (State.CC == llvm::CallingConv::X86_RegCall && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.CC == llvm::CallingConv::X86_RegCall) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - if (Ty->isBuiltinType() || Ty->isVectorType()) - return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); - - } - return getIndirectResult(Ty, /*ByVal=*/false, State); - } else if (State.CC == llvm::CallingConv::X86_VectorCall) { - if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) { - // Actual floating-point types get registers first time through if - // there is registers available - State.FreeSSERegs -= NumElts; + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + if (Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); - } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { - // HVA Types only get registers after everything else has been - // set, so it gets set as indirect for now. - return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty)); - } + return ABIArgInfo::getExpand(); } + return getIndirectResult(Ty, /*ByVal=*/false, State); } if (isAggregateTypeForABI(Ty)) { @@ -1675,31 +1688,53 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, bool &UsedInAlloca) const { - // Vectorcall only allows the first 6 parameters to be passed in registers, - // and homogeneous vector aggregates are only put into registers as a second - // priority. - unsigned Count = 0; - CCState ZeroState = State; - ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0; - // HVAs must be done as a second priority for registers, so the deferred - // items are dealt with by going through the pattern a second time. + // Vectorcall x86 works subtly different than in x64, so the format is + // a bit different than the x64 version. First, all vector types (not HVAs) + // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers. + // This differs from the x64 implementation, where the first 6 by INDEX get + // registers. + // After that, integers AND HVAs are assigned Left to Right in the same pass. + // Integers are passed as ECX/EDX if one is available (in order). HVAs will + // first take up the remaining YMM/XMM registers. If insufficient registers + // remain but an integer register (ECX/EDX) is available, it will be passed + // in that, else, on the stack. for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = classifyArgumentType(I.type, State); - else - // Parameters after the 6th cannot be passed in registers, - // so pretend there are no registers left for them. - I.info = classifyArgumentType(I.type, ZeroState); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); - ++Count; + // First pass do all the vector types. + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType& Ty = I.type; + if ((Ty->isVectorType() || Ty->isBuiltinType()) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + I.info = ABIArgInfo::getDirect(); + } else { + I.info = classifyArgumentType(Ty, State); + } + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } - Count = 0; - // Go through the arguments a second time to get HVAs registers if there - // are still some available. + for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = reclassifyHvaArgType(I.type, State, I.info); - ++Count; + // Second pass, do the rest! + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType& Ty = I.type; + bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts); + + if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) { + // Assign true HVAs (non vector/native FP types). + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + I.info = getDirectX86Hva(); + } else { + I.info = getIndirectResult(Ty, /*ByVal=*/false, State); + } + } else if (!IsHva) { + // Assign all Non-HVAs, so this will exclude Vector/FP args. + I.info = classifyArgumentType(Ty, State); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } } @@ -1882,7 +1917,6 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( case llvm::Triple::DragonFly: case llvm::Triple::FreeBSD: case llvm::Triple::OpenBSD: - case llvm::Triple::Bitrig: case llvm::Triple::Win32: return true; default: @@ -1890,9 +1924,11 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( } } -void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { +void X86_32TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { // Get the LLVM function. @@ -1901,10 +1937,7 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, // Now add the 'alignstack' attribute with a value of 16. llvm::AttrBuilder B; B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - B)); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); @@ -2084,9 +2117,14 @@ class X86_64ABIInfo : public SwiftABIInfo { return !getTarget().getTriple().isOSDarwin(); } - /// GCC classifies <1 x long long> as SSE but compatibility with older clang - // compilers require us to classify it as INTEGER. + /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to + /// classify it as INTEGER (for compatibility with older clang compilers). bool classifyIntegerMMXAsSSE() const { + // Clang <= 3.8 did not do this. + if (getCodeGenOpts().getClangABICompat() <= + CodeGenOptions::ClangABI::Ver3_8) + return false; + const llvm::Triple &Triple = getTarget().getTriple(); if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4) return false; @@ -2240,23 +2278,28 @@ public: llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { - unsigned Sig; - if (getABIInfo().has64BitPointers()) - Sig = (0xeb << 0) | // jmp rel8 - (0x0a << 8) | // .+0x0c - ('F' << 16) | - ('T' << 24); - else - Sig = (0xeb << 0) | // jmp rel8 - (0x06 << 8) | // .+0x08 - ('F' << 16) | - ('T' << 24); + unsigned Sig = (0xeb << 0) | // jmp rel8 + (0x06 << 8) | // .+0x08 + ('v' << 16) | + ('2' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + // Get the LLVM function. + auto *Fn = cast<llvm::Function>(GV); + + // Now add the 'alignstack' attribute with a value of 16. + llvm::AttrBuilder B; + B.addStackAlignmentAttr(16); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); @@ -2303,7 +2346,8 @@ public: Win32StructABI, NumRegisterParameters, false) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { @@ -2331,11 +2375,12 @@ static void addStackProbeSizeTargetAttribute(const Decl *D, } } -void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { - X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - +void WinX86_32TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); + if (!IsForDefinition) + return; addStackProbeSizeTargetAttribute(D, GV, CGM); } @@ -2346,7 +2391,8 @@ public: : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; @@ -2375,12 +2421,22 @@ public: } }; -void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { - TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - +void WinX86_64TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + // Get the LLVM function. + auto *Fn = cast<llvm::Function>(GV); + + // Now add the 'alignstack' attribute with a value of 16. + llvm::AttrBuilder B; + B.addStackAlignmentAttr(16); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); @@ -3162,8 +3218,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, } } - llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr); - + llvm::StructType *Result = llvm::StructType::get(Lo, Hi); // Verify that the second element is at an 8-byte offset. assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && @@ -3238,8 +3293,7 @@ classifyReturnType(QualType RetTy) const { case ComplexX87: assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), - llvm::Type::getX86_FP80Ty(getVMContext()), - nullptr); + llvm::Type::getX86_FP80Ty(getVMContext())); break; } @@ -3496,18 +3550,27 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { unsigned FreeSSERegs = IsRegCall ? 16 : 8; unsigned NeededInt, NeededSSE; - if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && - !FI.getReturnType()->getTypePtr()->isUnionType()) { - FI.getReturnInfo() = - classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); - if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { - FreeIntRegs -= NeededInt; - FreeSSERegs -= NeededSSE; - } else { - FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); - } - } else if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + if (!getCXXABI().classifyReturnType(FI)) { + if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && + !FI.getReturnType()->getTypePtr()->isUnionType()) { + FI.getReturnInfo() = + classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; + } else { + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } + } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>()) { + // Complex Long Double Type is passed in Memory when Regcall + // calling convention is used. + const ComplexType *CT = FI.getReturnType()->getAs<ComplexType>(); + if (getContext().getCanonicalType(CT->getElementType()) == + getContext().LongDoubleTy) + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } else + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + } // If the return value is indirect, then the hidden argument is consuming one // integer register. @@ -3735,7 +3798,7 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, CharUnits::fromQuantity(16)); llvm::Type *DoubleTy = CGF.DoubleTy; - llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr); + llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy); llvm::Value *V; Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); @@ -3897,6 +3960,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, bool IsRegCall) const { unsigned Count = 0; for (auto &I : FI.arguments()) { + // Vectorcall in x64 only permits the first 6 arguments to be passed + // as XMM/YMM registers. if (Count < VectorcallMaxParamNumAsReg) I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); else { @@ -3909,11 +3974,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, ++Count; } - Count = 0; for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); - ++Count; + I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); } } @@ -3974,7 +4036,10 @@ Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. class PPC32_SVR4_ABIInfo : public DefaultABIInfo { -bool IsSoftFloatABI; + bool IsSoftFloatABI; + + CharUnits getParamTypeAlignment(QualType Ty) const; + public: PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI) : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {} @@ -3996,13 +4061,46 @@ public: bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; +} + +CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { + // Complex types are passed just like their elements + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + if (Ty->isVectorType()) + return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 + : 4); + + // For single-element float/vector structs, we consider the whole type + // to have the same alignment requirements as its single element. + const Type *AlignTy = nullptr; + if (const Type *EltType = isSingleElementStruct(Ty, getContext())) { + const BuiltinType *BT = EltType->getAs<BuiltinType>(); + if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || + (BT && BT->isFloatingPoint())) + AlignTy = EltType; + } + if (AlignTy) + return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4); + return CharUnits::fromQuantity(4); } // TODO: this implementation is now likely redundant with // DefaultABIInfo::EmitVAArg. Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, QualType Ty) const { + if (getTarget().getTriple().isOSDarwin()) { + auto TI = getContext().getTypeInfoInChars(Ty); + TI.second = getParamTypeAlignment(Ty); + + CharUnits SlotSize = CharUnits::fromQuantity(4); + return emitVoidPtrVAArg(CGF, VAList, Ty, + classifyArgumentType(Ty).isIndirect(), TI, SlotSize, + /*AllowHigherAlign=*/true); + } + const unsigned OverflowLimit = 8; if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { // TODO: Implement this. For now ignore. @@ -4640,7 +4738,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { llvm::Type *CoerceTy; if (Bits > GPRBits) { CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits); - CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr); + CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy); } else CoerceTy = llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); @@ -4777,7 +4875,8 @@ class AArch64ABIInfo : public SwiftABIInfo { public: enum ABIKind { AAPCS = 0, - DarwinPCS + DarwinPCS, + Win64 }; private: @@ -4815,10 +4914,14 @@ private: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { - return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) - : EmitAAPCSVAArg(VAListAddr, Ty, CGF); + return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty) + : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) + : EmitAAPCSVAArg(VAListAddr, Ty, CGF); } + Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + bool shouldPassIndirectlyForSwift(CharUnits totalSize, ArrayRef<llvm::Type*> scalars, bool asReturnValue) const override { @@ -4827,6 +4930,9 @@ private: bool isSwiftErrorInRegister() const override { return true; } + + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -4835,7 +4941,7 @@ public: : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tfp, fp\t\t# marker for objc_retainAutoreleaseReturnValue"; + return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { @@ -4844,6 +4950,22 @@ public: bool doesReturnSlotInterfereWithArgs() const override { return false; } }; + +class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { +public: + WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K) + : AArch64TargetCodeGenInfo(CGT, K) {} + + void getDependentLibraryOption(llvm::StringRef Lib, + llvm::SmallString<24> &Opt) const override { + Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); + } + + void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, + llvm::SmallString<32> &Opt) const override { + Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; + } +}; } ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { @@ -4893,10 +5015,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { // Empty records are always ignored on Darwin, but actually passed in C++ mode // elsewhere for GNU compatibility. - if (isEmptyRecord(getContext(), Ty, true)) { + uint64_t Size = getContext().getTypeSize(Ty); + bool IsEmpty = isEmptyRecord(getContext(), Ty, true); + if (IsEmpty || Size == 0) { if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) return ABIArgInfo::getIgnore(); + // GNU C mode. The only argument that gets ignored is an empty one with size + // 0. + if (IsEmpty && Size == 0) + return ABIArgInfo::getIgnore(); return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } @@ -4909,7 +5037,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { } // Aggregates <= 16 bytes are passed directly in registers or on the stack. - uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 128) { // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of // same size and alignment. @@ -4917,7 +5044,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { return coerceToIntArray(Ty, getContext(), getVMContext()); } unsigned Alignment = getContext().getTypeAlign(Ty); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. @@ -4949,7 +5076,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { : ABIArgInfo::getDirect()); } - if (isEmptyRecord(getContext(), RetTy, true)) + uint64_t Size = getContext().getTypeSize(RetTy); + if (isEmptyRecord(getContext(), RetTy, true) || Size == 0) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; @@ -4959,7 +5087,6 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getDirect(); // Aggregates <= 16 bytes are returned directly in registers or on the stack. - uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 128) { // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of // same size and alignment. @@ -4967,7 +5094,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { return coerceToIntArray(RetTy, getContext(), getVMContext()); } unsigned Alignment = getContext().getTypeAlign(RetTy); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. @@ -4995,6 +5122,17 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize, + llvm::Type *eltTy, + unsigned elts) const { + if (!llvm::isPowerOf2_32(elts)) + return false; + if (totalSize.getQuantity() != 8 && + (totalSize.getQuantity() != 16 || elts == 1)) + return false; + return true; +} + bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS64 must have base types of a floating // point type or a short-vector type. This is the same as the 32-bit ABI, @@ -5305,6 +5443,14 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } +Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + CGF.getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(8), + /*allowHigherAlign*/ false); +} + //===----------------------------------------------------------------------===// // ARM ABI Implementation //===----------------------------------------------------------------------===// @@ -5383,6 +5529,8 @@ private: bool isSwiftErrorInRegister() const override { return true; } + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { @@ -5399,7 +5547,7 @@ public: } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue"; + return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue"; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, @@ -5417,7 +5565,10 @@ public: } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -5449,10 +5600,7 @@ public: // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); - Fn->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - B)); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } }; @@ -5462,7 +5610,8 @@ public: : ARMTargetCodeGenInfo(CGT, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { @@ -5476,8 +5625,11 @@ public: }; void WindowsARMTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { - ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); + if (!IsForDefinition) + return; addStackProbeSizeTargetAttribute(D, GV, CGM); } } @@ -5534,17 +5686,14 @@ void ARMABIInfo::setCCs() { // AAPCS apparently requires runtime support functions to be soft-float, but // that's almost certainly for historic reasons (Thumb1 not supporting VFP // most likely). It's more convenient for AAPCS16_VFP to be hard-float. - switch (getABIKind()) { - case APCS: - case AAPCS16_VFP: - if (abiCC != getLLVMDefaultCC()) + + // The Run-time ABI for the ARM Architecture section 4.1.2 requires + // AEABI-complying FP helper functions to use the base AAPCS. + // These AEABI functions are expanded in the ARM llvm backend, all the builtin + // support functions emitted by clang such as the _Complex helpers follow the + // abiCC. + if (abiCC != getLLVMDefaultCC()) BuiltinCC = abiCC; - break; - case AAPCS: - case AAPCS_VFP: - BuiltinCC = llvm::CallingConv::ARM_AAPCS; - break; - } } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, @@ -5898,6 +6047,20 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, + llvm::Type *eltTy, + unsigned numElts) const { + if (!llvm::isPowerOf2_32(numElts)) + return false; + unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy); + if (size > 64) + return false; + if (vectorSize.getQuantity() != 8 && + (vectorSize.getQuantity() != 16 || numElts == 1)) + return false; + return true; +} + bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS-VFP must have base types of float, // double, or 64-bit or 128-bit vectors. @@ -5992,7 +6155,9 @@ public: : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; + private: // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. @@ -6046,9 +6211,11 @@ Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm_unreachable("NVPTX does not support varargs"); } -void NVPTXTargetCodeGenInfo:: -setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const{ +void NVPTXTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -6484,14 +6651,17 @@ public: MSP430TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; }; } -void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const { +void MSP430TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) { // Handle 'interrupt' attribute: @@ -6550,10 +6720,21 @@ public: } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; llvm::Function *Fn = cast<llvm::Function>(GV); + + if (FD->hasAttr<MipsLongCallAttr>()) + Fn->addFnAttr("long-call"); + else if (FD->hasAttr<MipsShortCallAttr>()) + Fn->addFnAttr("short-call"); + + // Other attributes do not have a meaning for declarations. + if (!IsForDefinition) + return; + if (FD->hasAttr<Mips16Attr>()) { Fn->addFnAttr("mips16"); } @@ -6561,6 +6742,11 @@ public: Fn->addFnAttr("nomips16"); } + if (FD->hasAttr<MicroMipsAttr>()) + Fn->addFnAttr("micromips"); + else if (FD->hasAttr<NoMicroMipsAttr>()) + Fn->addFnAttr("nomicromips"); + const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>(); if (!Attr) return; @@ -6910,7 +7096,10 @@ public: : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { + if (!IsForDefinition) + return; const auto *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; auto *Fn = cast<llvm::Function>(GV); @@ -6938,11 +7127,15 @@ public: : DefaultTargetCodeGenInfo(CGT) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; }; void TCETargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -7038,13 +7231,13 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const { ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - uint64_t Size = getContext().getTypeSize(Ty); if (Size > 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); @@ -7238,38 +7431,138 @@ public: namespace { class AMDGPUABIInfo final : public DefaultABIInfo { +private: + static const unsigned MaxNumRegsForArgsRet = 16; + + unsigned numRegsForType(QualType Ty) const; + + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const override; + public: - explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : + DefaultABIInfo(CGT) {} -private: - ABIArgInfo classifyArgumentType(QualType Ty) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyKernelArgumentType(QualType Ty) const; + ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const; void computeInfo(CGFunctionInfo &FI) const override; }; +bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + return true; +} + +bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( + const Type *Base, uint64_t Members) const { + uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; + + // Homogeneous Aggregates may occupy at most 16 registers. + return Members * NumRegs <= MaxNumRegsForArgsRet; +} + +/// Estimate number of registers the type will use when passed in registers. +unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const { + unsigned NumRegs = 0; + + if (const VectorType *VT = Ty->getAs<VectorType>()) { + // Compute from the number of elements. The reported size is based on the + // in-memory size, which includes the padding 4th element for 3-vectors. + QualType EltTy = VT->getElementType(); + unsigned EltSize = getContext().getTypeSize(EltTy); + + // 16-bit element vectors should be passed as packed. + if (EltSize == 16) + return (VT->getNumElements() + 1) / 2; + + unsigned EltNumRegs = (EltSize + 31) / 32; + return EltNumRegs * VT->getNumElements(); + } + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + assert(!RD->hasFlexibleArrayMember()); + + for (const FieldDecl *Field : RD->fields()) { + QualType FieldTy = Field->getType(); + NumRegs += numRegsForType(FieldTy); + } + + return NumRegs; + } + + return (getContext().getTypeSize(Ty) + 31) / 32; +} + void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { + llvm::CallingConv::ID CC = FI.getCallingConvention(); + if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - unsigned CC = FI.getCallingConvention(); - for (auto &Arg : FI.arguments()) - if (CC == llvm::CallingConv::AMDGPU_KERNEL) - Arg.info = classifyArgumentType(Arg.type); - else - Arg.info = DefaultABIInfo::classifyArgumentType(Arg.type); + unsigned NumRegsLeft = MaxNumRegsForArgsRet; + for (auto &Arg : FI.arguments()) { + if (CC == llvm::CallingConv::AMDGPU_KERNEL) { + Arg.info = classifyKernelArgumentType(Arg.type); + } else { + Arg.info = classifyArgumentType(Arg.type, NumRegsLeft); + } + } } -/// \brief Classify argument of given type \p Ty. -ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const { - llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty)); - if (!StrTy) { - return DefaultABIInfo::classifyArgumentType(Ty); +ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { + if (isAggregateTypeForABI(RetTy)) { + // Records with non-trivial destructors/copy-constructors should not be + // returned by value. + if (!getRecordArgABI(RetTy, getCXXABI())) { + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just return a regular value. + if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const RecordType *RT = RetTy->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyReturnType(RetTy); + } + + // Pack aggregates <= 4 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(RetTy); + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + if (Size <= 64) { + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) + return ABIArgInfo::getDirect(); + } } + // Otherwise just do the default thing. + return DefaultABIInfo::classifyReturnType(RetTy); +} + +/// For kernels all parameters are really passed in a special buffer. It doesn't +/// make sense to pass anything byval, so everything must be direct. +ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + // TODO: Can we omit empty structs? + // Coerce single element structs to its element. - if (StrTy->getNumElements() == 1) { - return ABIArgInfo::getDirect(); - } + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); // If we set CanBeFlattened to true, CodeGen will expand the struct to its // individual elements, which confuses the Clover OpenCL backend; therefore we @@ -7277,34 +7570,116 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const { return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } +ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, + unsigned &NumRegsLeft) const { + assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); + + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (isAggregateTypeForABI(Ty)) { + // Records with non-trivial destructors/copy-constructors should not be + // passed by value. + if (auto RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just pass a regular value. TODO: We + // could do reasonable-size multiple-element structs too, using getExpand(), + // though watch out for things like bitfields. + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyArgumentType(Ty); + } + + // Pack aggregates <= 8 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 64) { + unsigned NumRegs = (Size + 31) / 32; + NumRegsLeft -= std::min(NumRegsLeft, NumRegs); + + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + // XXX: Should this be i64 instead, and should the limit increase? + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (NumRegsLeft > 0) { + unsigned NumRegs = numRegsForType(Ty); + if (NumRegsLeft >= NumRegs) { + NumRegsLeft -= NumRegs; + return ABIArgInfo::getDirect(); + } + } + } + + // Otherwise just do the default thing. + ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); + if (!ArgInfo.isIndirect()) { + unsigned NumRegs = numRegsForType(Ty); + NumRegsLeft -= std::min(NumRegs, NumRegsLeft); + } + + return ArgInfo; +} + class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; unsigned getOpenCLKernelCallingConv() const override; llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; + + LangAS getASTAllocaAddressSpace() const override { + return getLangASFromTargetAS( + getABIInfo().getDataLayout().getAllocaAddrSpace()); + } + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const override; + llvm::Function * + createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *BlockInvokeFunc, + llvm::Value *BlockLiteral) const override; }; } -static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM); - void AMDGPUTargetCodeGenInfo::setTargetAttributes( - const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const { + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; llvm::Function *F = cast<llvm::Function>(GV); - if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) { - unsigned Min = Attr->getMin(); - unsigned Max = Attr->getMax(); + const auto *ReqdWGS = M.getLangOpts().OpenCL ? + FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; + const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); + if (ReqdWGS || FlatWGS) { + unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; + unsigned Max = FlatWGS ? FlatWGS->getMax() : 0; + if (ReqdWGS && Min == 0 && Max == 0) + Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); if (Min != 0) { assert(Min <= Max && "Min must be less than or equal Max"); @@ -7343,8 +7718,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } - - appendOpenCLVersionMD(M); } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { @@ -7369,6 +7742,49 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( llvm::ConstantPointerNull::get(NPT), PT); } +LangAS +AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + LangAS DefaultGlobalAS = getLangASFromTargetAS( + CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); + if (!D) + return DefaultGlobalAS; + + LangAS AddrSpace = D->getType().getAddressSpace(); + assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); + if (AddrSpace != LangAS::Default) + return AddrSpace; + + if (CGM.isTypeConstant(D->getType(), false)) { + if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) + return ConstAS.getValue(); + } + return DefaultGlobalAS; +} + +llvm::SyncScope::ID +AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const { + StringRef Name; + switch (S) { + case SyncScope::OpenCLWorkGroup: + Name = "workgroup"; + break; + case SyncScope::OpenCLDevice: + Name = "agent"; + break; + case SyncScope::OpenCLAllSVMDevices: + Name = ""; + break; + case SyncScope::OpenCLSubGroup: + Name = "subgroup"; + } + return C.getOrInsertSyncScopeID(Name); +} + //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. @@ -8015,45 +8431,18 @@ class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { public: SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} - void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; }; + } // End anonymous namespace. -/// Emit SPIR specific metadata: OpenCL and SPIR version. -void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { - llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); - llvm::Module &M = CGM.getModule(); - // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the - // opencl.spir.version named metadata. - llvm::Metadata *SPIRVerElts[] = { - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, (CGM.getLangOpts().OpenCLVersion / 100 > 1) ? 0 : 2))}; - llvm::NamedMDNode *SPIRVerMD = - M.getOrInsertNamedMetadata("opencl.spir.version"); - SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); - appendOpenCLVersionMD(CGM); -} - -static void appendOpenCLVersionMD(CodeGen::CodeGenModule &CGM) { - llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); - llvm::Module &M = CGM.getModule(); - // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the - // opencl.ocl.version named metadata node. - llvm::Metadata *OCLVerElts[] = { - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))}; - llvm::NamedMDNode *OCLVerMD = - M.getOrInsertNamedMetadata("opencl.ocl.version"); - OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); +namespace clang { +namespace CodeGen { +void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { + DefaultABIInfo SPIRABI(CGM.getTypes()); + SPIRABI.computeInfo(FI); +} +} } unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { @@ -8435,6 +8824,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; + else if (Triple.isOSWindows()) + return SetCGInfo( + new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIInfo::Win64)); return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); } @@ -8564,3 +8956,108 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); } } + +/// Create an OpenCL kernel for an enqueued block. +/// +/// The kernel has the same function type as the block invoke function. Its +/// name is the name of the block invoke function postfixed with "_kernel". +/// It simply calls the block invoke function then returns. +llvm::Function * +TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *Invoke, + llvm::Value *BlockLiteral) const { + auto *InvokeFT = Invoke->getFunctionType(); + llvm::SmallVector<llvm::Type *, 2> ArgTys; + for (auto &P : InvokeFT->params()) + ArgTys.push_back(P); + auto &C = CGF.getLLVMContext(); + std::string Name = Invoke->getName().str() + "_kernel"; + auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); + auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, + &CGF.CGM.getModule()); + auto IP = CGF.Builder.saveIP(); + auto *BB = llvm::BasicBlock::Create(C, "entry", F); + auto &Builder = CGF.Builder; + Builder.SetInsertPoint(BB); + llvm::SmallVector<llvm::Value *, 2> Args; + for (auto &A : F->args()) + Args.push_back(&A); + Builder.CreateCall(Invoke, Args); + Builder.CreateRetVoid(); + Builder.restoreIP(IP); + return F; +} + +/// Create an OpenCL kernel for an enqueued block. +/// +/// The type of the first argument (the block literal) is the struct type +/// of the block literal instead of a pointer type. The first argument +/// (block literal) is passed directly by value to the kernel. The kernel +/// allocates the same type of struct on stack and stores the block literal +/// to it and passes its pointer to the block invoke function. The kernel +/// has "enqueued-block" function attribute and kernel argument metadata. +llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( + CodeGenFunction &CGF, llvm::Function *Invoke, + llvm::Value *BlockLiteral) const { + auto &Builder = CGF.Builder; + auto &C = CGF.getLLVMContext(); + + auto *BlockTy = BlockLiteral->getType()->getPointerElementType(); + auto *InvokeFT = Invoke->getFunctionType(); + llvm::SmallVector<llvm::Type *, 2> ArgTys; + llvm::SmallVector<llvm::Metadata *, 8> AddressQuals; + llvm::SmallVector<llvm::Metadata *, 8> AccessQuals; + llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames; + llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames; + llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals; + llvm::SmallVector<llvm::Metadata *, 8> ArgNames; + + ArgTys.push_back(BlockTy); + ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); + AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0))); + ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); + ArgTypeQuals.push_back(llvm::MDString::get(C, "")); + AccessQuals.push_back(llvm::MDString::get(C, "none")); + ArgNames.push_back(llvm::MDString::get(C, "block_literal")); + for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { + ArgTys.push_back(InvokeFT->getParamType(I)); + ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); + AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); + AccessQuals.push_back(llvm::MDString::get(C, "none")); + ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); + ArgTypeQuals.push_back(llvm::MDString::get(C, "")); + ArgNames.push_back( + llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); + } + std::string Name = Invoke->getName().str() + "_kernel"; + auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); + auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, + &CGF.CGM.getModule()); + F->addFnAttr("enqueued-block"); + auto IP = CGF.Builder.saveIP(); + auto *BB = llvm::BasicBlock::Create(C, "entry", F); + Builder.SetInsertPoint(BB); + unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy); + auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); + BlockPtr->setAlignment(BlockAlign); + Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); + auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); + llvm::SmallVector<llvm::Value *, 2> Args; + Args.push_back(Cast); + for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I) + Args.push_back(I); + Builder.CreateCall(Invoke, Args); + Builder.CreateRetVoid(); + Builder.restoreIP(IP); + + F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); + F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); + F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); + F->setMetadata("kernel_arg_base_type", + llvm::MDNode::get(C, ArgBaseTypeNames)); + F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); + if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata) + F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); + + return F; +} |