summaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/TargetInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/TargetInfo.cpp')
-rw-r--r--lib/CodeGen/TargetInfo.cpp985
1 files changed, 741 insertions, 244 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 82f412f012..b1773b7090 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -14,6 +14,7 @@
#include "TargetInfo.h"
#include "ABIInfo.h"
+#include "CGBlocks.h"
#include "CGCXXABI.h"
#include "CGValue.h"
#include "CodeGenFunction.h"
@@ -22,7 +23,9 @@
#include "clang/CodeGen/SwiftCallingConv.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/raw_ostream.h"
@@ -183,7 +186,11 @@ const TargetInfo &ABIInfo::getTarget() const {
return CGT.getTarget();
}
-bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); }
+const CodeGenOptions &ABIInfo::getCodeGenOpts() const {
+ return CGT.getCodeGenOpts();
+}
+
+bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); }
bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
return false;
@@ -398,7 +405,17 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
}
unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
- return llvm::CallingConv::C;
+ // OpenCL kernels are called via an explicit runtime API with arguments
+ // set with clSetKernelArg(), not as normal sub-functions.
+ // Return SPIR_KERNEL by default as the kernel calling convention to
+ // ensure the fingerprint is fixed such way that each OpenCL argument
+ // gets one matching argument in the produced kernel function argument
+ // list to enable feasible implementation of clSetKernelArg() with
+ // aggregates etc. In case we would use the default C calling conv here,
+ // clSetKernelArg() might break depending on the target-specific
+ // conventions; different targets might split structs passed as values
+ // to multiple function arguments etc.
+ return llvm::CallingConv::SPIR_KERNEL;
}
llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
@@ -406,13 +423,36 @@ llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &
return llvm::ConstantPointerNull::get(T);
}
+LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
+ const VarDecl *D) const {
+ assert(!CGM.getLangOpts().OpenCL &&
+ !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+ "Address space agnostic languages only");
+ return D ? D->getType().getAddressSpace() : LangAS::Default;
+}
+
llvm::Value *TargetCodeGenInfo::performAddrSpaceCast(
- CodeGen::CodeGenFunction &CGF, llvm::Value *Src, QualType SrcTy,
- QualType DestTy) const {
+ CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr,
+ LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const {
// Since target may map different address spaces in AST to the same address
// space, an address space conversion may end up as a bitcast.
- return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src,
- CGF.ConvertType(DestTy));
+ if (auto *C = dyn_cast<llvm::Constant>(Src))
+ return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy);
+ return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DestTy);
+}
+
+llvm::Constant *
+TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src,
+ LangAS SrcAddr, LangAS DestAddr,
+ llvm::Type *DestTy) const {
+ // Since target may map different address spaces in AST to the same address
+ // space, an address space conversion may end up as a bitcast.
+ return llvm::ConstantExpr::getPointerCast(Src, DestTy);
+}
+
+llvm::SyncScope::ID
+TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const {
+ return C.getOrInsertSyncScopeID(""); /* default sync scope */
}
static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
@@ -836,7 +876,10 @@ bool IsX86_MMXType(llvm::Type *IRType) {
static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
StringRef Constraint,
llvm::Type* Ty) {
- if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) {
+ bool IsMMXCons = llvm::StringSwitch<bool>(Constraint)
+ .Cases("y", "&y", "^Ym", true)
+ .Default(false);
+ if (IsMMXCons && Ty->isVectorTy()) {
if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) {
// Invalid MMX constraint
return nullptr;
@@ -853,8 +896,14 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
/// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
- if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half)
+ if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) {
+ if (BT->getKind() == BuiltinType::LongDouble) {
+ if (&Context.getTargetInfo().getLongDoubleFormat() ==
+ &llvm::APFloat::x87DoubleExtended())
+ return false;
+ }
return true;
+ }
} else if (const VectorType *VT = Ty->getAs<VectorType>()) {
// vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
// registers specially.
@@ -942,8 +991,7 @@ class X86_32ABIInfo : public SwiftABIInfo {
Class classify(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
- ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State,
- const ABIArgInfo& current) const;
+
/// \brief Updates the number of available free registers, returns
/// true if any registers were allocated.
bool updateFreeRegs(QualType Ty, CCState &State) const;
@@ -1009,7 +1057,8 @@ public:
const llvm::Triple &Triple, const CodeGenOptions &Opts);
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override;
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override;
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
// Darwin uses different dwarf register numbers for EH.
@@ -1038,14 +1087,14 @@ public:
getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
unsigned Sig = (0xeb << 0) | // jmp rel8
(0x06 << 8) | // .+0x08
- ('F' << 16) |
- ('T' << 24);
+ ('v' << 16) |
+ ('2' << 24);
return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
return "movl\t%ebp, %ebp"
- "\t\t## marker for objc_retainAutoreleaseReturnValue";
+ "\t\t// marker for objc_retainAutoreleaseReturnValue";
}
};
@@ -1527,27 +1576,6 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
return true;
}
-ABIArgInfo
-X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State,
- const ABIArgInfo &current) const {
- // Assumes vectorCall calling convention.
- const Type *Base = nullptr;
- uint64_t NumElts = 0;
-
- if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
- isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.FreeSSERegs >= NumElts) {
- // HVA types get passed directly in registers if there is room.
- State.FreeSSERegs -= NumElts;
- return getDirectX86Hva();
- }
- // If there's no room, the HVA gets passed as normal indirect
- // structure.
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- }
- return current;
-}
-
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// FIXME: Set alignment on indirect arguments.
@@ -1566,35 +1594,20 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
}
}
- // vectorcall adds the concept of a homogenous vector aggregate, similar
- // to other targets, regcall uses some of the HVA rules.
+ // Regcall uses the concept of a homogenous vector aggregate, similar
+ // to other targets.
const Type *Base = nullptr;
uint64_t NumElts = 0;
- if ((State.CC == llvm::CallingConv::X86_VectorCall ||
- State.CC == llvm::CallingConv::X86_RegCall) &&
+ if (State.CC == llvm::CallingConv::X86_RegCall &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.CC == llvm::CallingConv::X86_RegCall) {
- if (State.FreeSSERegs >= NumElts) {
- State.FreeSSERegs -= NumElts;
- if (Ty->isBuiltinType() || Ty->isVectorType())
- return ABIArgInfo::getDirect();
- return ABIArgInfo::getExpand();
-
- }
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
- if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) {
- // Actual floating-point types get registers first time through if
- // there is registers available
- State.FreeSSERegs -= NumElts;
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ if (Ty->isBuiltinType() || Ty->isVectorType())
return ABIArgInfo::getDirect();
- } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
- // HVA Types only get registers after everything else has been
- // set, so it gets set as indirect for now.
- return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty));
- }
+ return ABIArgInfo::getExpand();
}
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
}
if (isAggregateTypeForABI(Ty)) {
@@ -1675,31 +1688,53 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
bool &UsedInAlloca) const {
- // Vectorcall only allows the first 6 parameters to be passed in registers,
- // and homogeneous vector aggregates are only put into registers as a second
- // priority.
- unsigned Count = 0;
- CCState ZeroState = State;
- ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0;
- // HVAs must be done as a second priority for registers, so the deferred
- // items are dealt with by going through the pattern a second time.
+ // Vectorcall x86 works subtly different than in x64, so the format is
+ // a bit different than the x64 version. First, all vector types (not HVAs)
+ // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers.
+ // This differs from the x64 implementation, where the first 6 by INDEX get
+ // registers.
+ // After that, integers AND HVAs are assigned Left to Right in the same pass.
+ // Integers are passed as ECX/EDX if one is available (in order). HVAs will
+ // first take up the remaining YMM/XMM registers. If insufficient registers
+ // remain but an integer register (ECX/EDX) is available, it will be passed
+ // in that, else, on the stack.
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = classifyArgumentType(I.type, State);
- else
- // Parameters after the 6th cannot be passed in registers,
- // so pretend there are no registers left for them.
- I.info = classifyArgumentType(I.type, ZeroState);
- UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
- ++Count;
+ // First pass do all the vector types.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType& Ty = I.type;
+ if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ I.info = ABIArgInfo::getDirect();
+ } else {
+ I.info = classifyArgumentType(Ty, State);
+ }
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
- Count = 0;
- // Go through the arguments a second time to get HVAs registers if there
- // are still some available.
+
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = reclassifyHvaArgType(I.type, State, I.info);
- ++Count;
+ // Second pass, do the rest!
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType& Ty = I.type;
+ bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts);
+
+ if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) {
+ // Assign true HVAs (non vector/native FP types).
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ I.info = getDirectX86Hva();
+ } else {
+ I.info = getIndirectResult(Ty, /*ByVal=*/false, State);
+ }
+ } else if (!IsHva) {
+ // Assign all Non-HVAs, so this will exclude Vector/FP args.
+ I.info = classifyArgumentType(Ty, State);
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
}
@@ -1882,7 +1917,6 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
case llvm::Triple::DragonFly:
case llvm::Triple::FreeBSD:
case llvm::Triple::OpenBSD:
- case llvm::Triple::Bitrig:
case llvm::Triple::Win32:
return true;
default:
@@ -1890,9 +1924,11 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
}
}
-void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
+void X86_32TargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const {
+ if (!IsForDefinition)
+ return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
// Get the LLVM function.
@@ -1901,10 +1937,7 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
// Now add the 'alignstack' attribute with a value of 16.
llvm::AttrBuilder B;
B.addStackAlignmentAttr(16);
- Fn->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- B));
+ Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
}
if (FD->hasAttr<AnyX86InterruptAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -2084,9 +2117,14 @@ class X86_64ABIInfo : public SwiftABIInfo {
return !getTarget().getTriple().isOSDarwin();
}
- /// GCC classifies <1 x long long> as SSE but compatibility with older clang
- // compilers require us to classify it as INTEGER.
+ /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to
+ /// classify it as INTEGER (for compatibility with older clang compilers).
bool classifyIntegerMMXAsSSE() const {
+ // Clang <= 3.8 did not do this.
+ if (getCodeGenOpts().getClangABICompat() <=
+ CodeGenOptions::ClangABI::Ver3_8)
+ return false;
+
const llvm::Triple &Triple = getTarget().getTriple();
if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4)
return false;
@@ -2240,23 +2278,28 @@ public:
llvm::Constant *
getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
- unsigned Sig;
- if (getABIInfo().has64BitPointers())
- Sig = (0xeb << 0) | // jmp rel8
- (0x0a << 8) | // .+0x0c
- ('F' << 16) |
- ('T' << 24);
- else
- Sig = (0xeb << 0) | // jmp rel8
- (0x06 << 8) | // .+0x08
- ('F' << 16) |
- ('T' << 24);
+ unsigned Sig = (0xeb << 0) | // jmp rel8
+ (0x06 << 8) | // .+0x08
+ ('v' << 16) |
+ ('2' << 24);
return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override {
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override {
+ if (!IsForDefinition)
+ return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+ if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
+ // Get the LLVM function.
+ auto *Fn = cast<llvm::Function>(GV);
+
+ // Now add the 'alignstack' attribute with a value of 16.
+ llvm::AttrBuilder B;
+ B.addStackAlignmentAttr(16);
+ Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ }
if (FD->hasAttr<AnyX86InterruptAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
Fn->setCallingConv(llvm::CallingConv::X86_INTR);
@@ -2303,7 +2346,8 @@ public:
Win32StructABI, NumRegisterParameters, false) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override;
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override;
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
@@ -2331,11 +2375,12 @@ static void addStackProbeSizeTargetAttribute(const Decl *D,
}
}
-void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
- X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
-
+void WinX86_32TargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const {
+ X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
+ if (!IsForDefinition)
+ return;
addStackProbeSizeTargetAttribute(D, GV, CGM);
}
@@ -2346,7 +2391,8 @@ public:
: TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override;
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override;
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
return 7;
@@ -2375,12 +2421,22 @@ public:
}
};
-void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
- TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
-
+void WinX86_64TargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const {
+ TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
+ if (!IsForDefinition)
+ return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+ if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
+ // Get the LLVM function.
+ auto *Fn = cast<llvm::Function>(GV);
+
+ // Now add the 'alignstack' attribute with a value of 16.
+ llvm::AttrBuilder B;
+ B.addStackAlignmentAttr(16);
+ Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ }
if (FD->hasAttr<AnyX86InterruptAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
Fn->setCallingConv(llvm::CallingConv::X86_INTR);
@@ -3162,8 +3218,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
}
}
- llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr);
-
+ llvm::StructType *Result = llvm::StructType::get(Lo, Hi);
// Verify that the second element is at an 8-byte offset.
assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 &&
@@ -3238,8 +3293,7 @@ classifyReturnType(QualType RetTy) const {
case ComplexX87:
assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()),
- llvm::Type::getX86_FP80Ty(getVMContext()),
- nullptr);
+ llvm::Type::getX86_FP80Ty(getVMContext()));
break;
}
@@ -3496,18 +3550,27 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
unsigned FreeSSERegs = IsRegCall ? 16 : 8;
unsigned NeededInt, NeededSSE;
- if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
- !FI.getReturnType()->getTypePtr()->isUnionType()) {
- FI.getReturnInfo() =
- classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE);
- if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
- FreeIntRegs -= NeededInt;
- FreeSSERegs -= NeededSSE;
- } else {
- FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
- }
- } else if (!getCXXABI().classifyReturnType(FI))
- FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ if (!getCXXABI().classifyReturnType(FI)) {
+ if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
+ !FI.getReturnType()->getTypePtr()->isUnionType()) {
+ FI.getReturnInfo() =
+ classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE);
+ if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
+ FreeIntRegs -= NeededInt;
+ FreeSSERegs -= NeededSSE;
+ } else {
+ FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
+ }
+ } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>()) {
+ // Complex Long Double Type is passed in Memory when Regcall
+ // calling convention is used.
+ const ComplexType *CT = FI.getReturnType()->getAs<ComplexType>();
+ if (getContext().getCanonicalType(CT->getElementType()) ==
+ getContext().LongDoubleTy)
+ FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
+ } else
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ }
// If the return value is indirect, then the hidden argument is consuming one
// integer register.
@@ -3735,7 +3798,7 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
CharUnits::fromQuantity(16));
llvm::Type *DoubleTy = CGF.DoubleTy;
- llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr);
+ llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy);
llvm::Value *V;
Address Tmp = CGF.CreateMemTemp(Ty);
Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
@@ -3897,6 +3960,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI,
bool IsRegCall) const {
unsigned Count = 0;
for (auto &I : FI.arguments()) {
+ // Vectorcall in x64 only permits the first 6 arguments to be passed
+ // as XMM/YMM registers.
if (Count < VectorcallMaxParamNumAsReg)
I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall);
else {
@@ -3909,11 +3974,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI,
++Count;
}
- Count = 0;
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
- ++Count;
+ I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
}
}
@@ -3974,7 +4036,10 @@ Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
namespace {
/// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information.
class PPC32_SVR4_ABIInfo : public DefaultABIInfo {
-bool IsSoftFloatABI;
+ bool IsSoftFloatABI;
+
+ CharUnits getParamTypeAlignment(QualType Ty) const;
+
public:
PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI)
: DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {}
@@ -3996,13 +4061,46 @@ public:
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
};
+}
+
+CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
+ // Complex types are passed just like their elements
+ if (const ComplexType *CTy = Ty->getAs<ComplexType>())
+ Ty = CTy->getElementType();
+
+ if (Ty->isVectorType())
+ return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16
+ : 4);
+
+ // For single-element float/vector structs, we consider the whole type
+ // to have the same alignment requirements as its single element.
+ const Type *AlignTy = nullptr;
+ if (const Type *EltType = isSingleElementStruct(Ty, getContext())) {
+ const BuiltinType *BT = EltType->getAs<BuiltinType>();
+ if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) ||
+ (BT && BT->isFloatingPoint()))
+ AlignTy = EltType;
+ }
+ if (AlignTy)
+ return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4);
+ return CharUnits::fromQuantity(4);
}
// TODO: this implementation is now likely redundant with
// DefaultABIInfo::EmitVAArg.
Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
QualType Ty) const {
+ if (getTarget().getTriple().isOSDarwin()) {
+ auto TI = getContext().getTypeInfoInChars(Ty);
+ TI.second = getParamTypeAlignment(Ty);
+
+ CharUnits SlotSize = CharUnits::fromQuantity(4);
+ return emitVoidPtrVAArg(CGF, VAList, Ty,
+ classifyArgumentType(Ty).isIndirect(), TI, SlotSize,
+ /*AllowHigherAlign=*/true);
+ }
+
const unsigned OverflowLimit = 8;
if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
// TODO: Implement this. For now ignore.
@@ -4640,7 +4738,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
llvm::Type *CoerceTy;
if (Bits > GPRBits) {
CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
- CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr);
+ CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy);
} else
CoerceTy =
llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
@@ -4777,7 +4875,8 @@ class AArch64ABIInfo : public SwiftABIInfo {
public:
enum ABIKind {
AAPCS = 0,
- DarwinPCS
+ DarwinPCS,
+ Win64
};
private:
@@ -4815,10 +4914,14 @@ private:
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override {
- return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
- : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+ return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
+ : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
+ : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
}
+ Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const override;
+
bool shouldPassIndirectlyForSwift(CharUnits totalSize,
ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
@@ -4827,6 +4930,9 @@ private:
bool isSwiftErrorInRegister() const override {
return true;
}
+
+ bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
+ unsigned elts) const override;
};
class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4835,7 +4941,7 @@ public:
: TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
- return "mov\tfp, fp\t\t# marker for objc_retainAutoreleaseReturnValue";
+ return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
@@ -4844,6 +4950,22 @@ public:
bool doesReturnSlotInterfereWithArgs() const override { return false; }
};
+
+class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
+public:
+ WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K)
+ : AArch64TargetCodeGenInfo(CGT, K) {}
+
+ void getDependentLibraryOption(llvm::StringRef Lib,
+ llvm::SmallString<24> &Opt) const override {
+ Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
+ }
+
+ void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
+ llvm::SmallString<32> &Opt) const override {
+ Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
+ }
+};
}
ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
@@ -4893,10 +5015,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
// Empty records are always ignored on Darwin, but actually passed in C++ mode
// elsewhere for GNU compatibility.
- if (isEmptyRecord(getContext(), Ty, true)) {
+ uint64_t Size = getContext().getTypeSize(Ty);
+ bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
+ if (IsEmpty || Size == 0) {
if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
return ABIArgInfo::getIgnore();
+ // GNU C mode. The only argument that gets ignored is an empty one with size
+ // 0.
+ if (IsEmpty && Size == 0)
+ return ABIArgInfo::getIgnore();
return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
}
@@ -4909,7 +5037,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
}
// Aggregates <= 16 bytes are passed directly in registers or on the stack.
- uint64_t Size = getContext().getTypeSize(Ty);
if (Size <= 128) {
// On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
// same size and alignment.
@@ -4917,7 +5044,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
return coerceToIntArray(Ty, getContext(), getVMContext());
}
unsigned Alignment = getContext().getTypeAlign(Ty);
- Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+ Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
// For aggregates with 16-byte alignment, we use i128.
@@ -4949,7 +5076,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
: ABIArgInfo::getDirect());
}
- if (isEmptyRecord(getContext(), RetTy, true))
+ uint64_t Size = getContext().getTypeSize(RetTy);
+ if (isEmptyRecord(getContext(), RetTy, true) || Size == 0)
return ABIArgInfo::getIgnore();
const Type *Base = nullptr;
@@ -4959,7 +5087,6 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
return ABIArgInfo::getDirect();
// Aggregates <= 16 bytes are returned directly in registers or on the stack.
- uint64_t Size = getContext().getTypeSize(RetTy);
if (Size <= 128) {
// On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
// same size and alignment.
@@ -4967,7 +5094,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
return coerceToIntArray(RetTy, getContext(), getVMContext());
}
unsigned Alignment = getContext().getTypeAlign(RetTy);
- Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+ Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
// For aggregates with 16-byte alignment, we use i128.
@@ -4995,6 +5122,17 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
return false;
}
+bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize,
+ llvm::Type *eltTy,
+ unsigned elts) const {
+ if (!llvm::isPowerOf2_32(elts))
+ return false;
+ if (totalSize.getQuantity() != 8 &&
+ (totalSize.getQuantity() != 16 || elts == 1))
+ return false;
+ return true;
+}
+
bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
// Homogeneous aggregates for AAPCS64 must have base types of a floating
// point type or a short-vector type. This is the same as the 32-bit ABI,
@@ -5305,6 +5443,14 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
TyInfo, SlotSize, /*AllowHigherAlign*/ true);
}
+Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const {
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
+ CGF.getContext().getTypeInfoInChars(Ty),
+ CharUnits::fromQuantity(8),
+ /*allowHigherAlign*/ false);
+}
+
//===----------------------------------------------------------------------===//
// ARM ABI Implementation
//===----------------------------------------------------------------------===//
@@ -5383,6 +5529,8 @@ private:
bool isSwiftErrorInRegister() const override {
return true;
}
+ bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
+ unsigned elts) const override;
};
class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -5399,7 +5547,7 @@ public:
}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
- return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue";
+ return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue";
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
@@ -5417,7 +5565,10 @@ public:
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override {
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override {
+ if (!IsForDefinition)
+ return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
@@ -5449,10 +5600,7 @@ public:
// the backend to perform a realignment as part of the function prologue.
llvm::AttrBuilder B;
B.addStackAlignmentAttr(8);
- Fn->addAttributes(llvm::AttributeSet::FunctionIndex,
- llvm::AttributeSet::get(CGM.getLLVMContext(),
- llvm::AttributeSet::FunctionIndex,
- B));
+ Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
}
};
@@ -5462,7 +5610,8 @@ public:
: ARMTargetCodeGenInfo(CGT, K) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override;
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override;
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
@@ -5476,8 +5625,11 @@ public:
};
void WindowsARMTargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
- ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const {
+ ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
+ if (!IsForDefinition)
+ return;
addStackProbeSizeTargetAttribute(D, GV, CGM);
}
}
@@ -5534,17 +5686,14 @@ void ARMABIInfo::setCCs() {
// AAPCS apparently requires runtime support functions to be soft-float, but
// that's almost certainly for historic reasons (Thumb1 not supporting VFP
// most likely). It's more convenient for AAPCS16_VFP to be hard-float.
- switch (getABIKind()) {
- case APCS:
- case AAPCS16_VFP:
- if (abiCC != getLLVMDefaultCC())
+
+ // The Run-time ABI for the ARM Architecture section 4.1.2 requires
+ // AEABI-complying FP helper functions to use the base AAPCS.
+ // These AEABI functions are expanded in the ARM llvm backend, all the builtin
+ // support functions emitted by clang such as the _Complex helpers follow the
+ // abiCC.
+ if (abiCC != getLLVMDefaultCC())
BuiltinCC = abiCC;
- break;
- case AAPCS:
- case AAPCS_VFP:
- BuiltinCC = llvm::CallingConv::ARM_AAPCS;
- break;
- }
}
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
@@ -5898,6 +6047,20 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
return false;
}
+bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
+ llvm::Type *eltTy,
+ unsigned numElts) const {
+ if (!llvm::isPowerOf2_32(numElts))
+ return false;
+ unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy);
+ if (size > 64)
+ return false;
+ if (vectorSize.getQuantity() != 8 &&
+ (vectorSize.getQuantity() != 16 || numElts == 1))
+ return false;
+ return true;
+}
+
bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
// Homogeneous aggregates for AAPCS-VFP must have base types of float,
// double, or 64-bit or 128-bit vectors.
@@ -5992,7 +6155,9 @@ public:
: TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
+ CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const override;
+
private:
// Adds a NamedMDNode with F, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
@@ -6046,9 +6211,11 @@ Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
llvm_unreachable("NVPTX does not support varargs");
}
-void NVPTXTargetCodeGenInfo::
-setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const{
+void NVPTXTargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const {
+ if (!IsForDefinition)
+ return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
@@ -6484,14 +6651,17 @@ public:
MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
+ CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const override;
};
}
-void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const {
+void MSP430TargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const {
+ if (!IsForDefinition)
+ return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) {
// Handle 'interrupt' attribute:
@@ -6550,10 +6720,21 @@ public:
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override {
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override {
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
llvm::Function *Fn = cast<llvm::Function>(GV);
+
+ if (FD->hasAttr<MipsLongCallAttr>())
+ Fn->addFnAttr("long-call");
+ else if (FD->hasAttr<MipsShortCallAttr>())
+ Fn->addFnAttr("short-call");
+
+ // Other attributes do not have a meaning for declarations.
+ if (!IsForDefinition)
+ return;
+
if (FD->hasAttr<Mips16Attr>()) {
Fn->addFnAttr("mips16");
}
@@ -6561,6 +6742,11 @@ public:
Fn->addFnAttr("nomips16");
}
+ if (FD->hasAttr<MicroMipsAttr>())
+ Fn->addFnAttr("micromips");
+ else if (FD->hasAttr<NoMicroMipsAttr>())
+ Fn->addFnAttr("nomicromips");
+
const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>();
if (!Attr)
return;
@@ -6910,7 +7096,10 @@ public:
: TargetCodeGenInfo(new DefaultABIInfo(CGT)) { }
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override {
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override {
+ if (!IsForDefinition)
+ return;
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
auto *Fn = cast<llvm::Function>(GV);
@@ -6938,11 +7127,15 @@ public:
: DefaultTargetCodeGenInfo(CGT) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
+ CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const override;
};
void TCETargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const {
+ if (!IsForDefinition)
+ return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
@@ -7038,13 +7231,13 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const {
ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
}
+ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+ return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+
// Ignore empty records.
if (isEmptyRecord(getContext(), Ty, true))
return ABIArgInfo::getIgnore();
- if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
- return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
-
uint64_t Size = getContext().getTypeSize(Ty);
if (Size > 64)
return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
@@ -7238,38 +7431,138 @@ public:
namespace {
class AMDGPUABIInfo final : public DefaultABIInfo {
+private:
+ static const unsigned MaxNumRegsForArgsRet = 16;
+
+ unsigned numRegsForType(QualType Ty) const;
+
+ bool isHomogeneousAggregateBaseType(QualType Ty) const override;
+ bool isHomogeneousAggregateSmallEnough(const Type *Base,
+ uint64_t Members) const override;
+
public:
- explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+ explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
+ DefaultABIInfo(CGT) {}
-private:
- ABIArgInfo classifyArgumentType(QualType Ty) const;
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+ ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+ ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const;
void computeInfo(CGFunctionInfo &FI) const override;
};
+bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+ return true;
+}
+
+bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
+ const Type *Base, uint64_t Members) const {
+ uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
+
+ // Homogeneous Aggregates may occupy at most 16 registers.
+ return Members * NumRegs <= MaxNumRegsForArgsRet;
+}
+
+/// Estimate number of registers the type will use when passed in registers.
+unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const {
+ unsigned NumRegs = 0;
+
+ if (const VectorType *VT = Ty->getAs<VectorType>()) {
+ // Compute from the number of elements. The reported size is based on the
+ // in-memory size, which includes the padding 4th element for 3-vectors.
+ QualType EltTy = VT->getElementType();
+ unsigned EltSize = getContext().getTypeSize(EltTy);
+
+ // 16-bit element vectors should be passed as packed.
+ if (EltSize == 16)
+ return (VT->getNumElements() + 1) / 2;
+
+ unsigned EltNumRegs = (EltSize + 31) / 32;
+ return EltNumRegs * VT->getNumElements();
+ }
+
+ if (const RecordType *RT = Ty->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ assert(!RD->hasFlexibleArrayMember());
+
+ for (const FieldDecl *Field : RD->fields()) {
+ QualType FieldTy = Field->getType();
+ NumRegs += numRegsForType(FieldTy);
+ }
+
+ return NumRegs;
+ }
+
+ return (getContext().getTypeSize(Ty) + 31) / 32;
+}
+
void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
+ llvm::CallingConv::ID CC = FI.getCallingConvention();
+
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- unsigned CC = FI.getCallingConvention();
- for (auto &Arg : FI.arguments())
- if (CC == llvm::CallingConv::AMDGPU_KERNEL)
- Arg.info = classifyArgumentType(Arg.type);
- else
- Arg.info = DefaultABIInfo::classifyArgumentType(Arg.type);
+ unsigned NumRegsLeft = MaxNumRegsForArgsRet;
+ for (auto &Arg : FI.arguments()) {
+ if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
+ Arg.info = classifyKernelArgumentType(Arg.type);
+ } else {
+ Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
+ }
+ }
}
-/// \brief Classify argument of given type \p Ty.
-ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const {
- llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
- if (!StrTy) {
- return DefaultABIInfo::classifyArgumentType(Ty);
+ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
+ if (isAggregateTypeForABI(RetTy)) {
+ // Records with non-trivial destructors/copy-constructors should not be
+ // returned by value.
+ if (!getRecordArgABI(RetTy, getCXXABI())) {
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), RetTy, true))
+ return ABIArgInfo::getIgnore();
+
+ // Lower single-element structs to just return a regular value.
+ if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+
+ if (const RecordType *RT = RetTy->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ if (RD->hasFlexibleArrayMember())
+ return DefaultABIInfo::classifyReturnType(RetTy);
+ }
+
+ // Pack aggregates <= 4 bytes into single VGPR or pair.
+ uint64_t Size = getContext().getTypeSize(RetTy);
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ if (Size <= 64) {
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+
+ if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
+ return ABIArgInfo::getDirect();
+ }
}
+ // Otherwise just do the default thing.
+ return DefaultABIInfo::classifyReturnType(RetTy);
+}
+
+/// For kernels all parameters are really passed in a special buffer. It doesn't
+/// make sense to pass anything byval, so everything must be direct.
+ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ // TODO: Can we omit empty structs?
+
// Coerce single element structs to its element.
- if (StrTy->getNumElements() == 1) {
- return ABIArgInfo::getDirect();
- }
+ if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
// individual elements, which confuses the Clover OpenCL backend; therefore we
@@ -7277,34 +7570,116 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
}
+ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
+ unsigned &NumRegsLeft) const {
+ assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
+
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ if (isAggregateTypeForABI(Ty)) {
+ // Records with non-trivial destructors/copy-constructors should not be
+ // passed by value.
+ if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
+ return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), Ty, true))
+ return ABIArgInfo::getIgnore();
+
+ // Lower single-element structs to just pass a regular value. TODO: We
+ // could do reasonable-size multiple-element structs too, using getExpand(),
+ // though watch out for things like bitfields.
+ if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+
+ if (const RecordType *RT = Ty->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ if (RD->hasFlexibleArrayMember())
+ return DefaultABIInfo::classifyArgumentType(Ty);
+ }
+
+ // Pack aggregates <= 8 bytes into single VGPR or pair.
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (Size <= 64) {
+ unsigned NumRegs = (Size + 31) / 32;
+ NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
+
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ // XXX: Should this be i64 instead, and should the limit increase?
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+
+ if (NumRegsLeft > 0) {
+ unsigned NumRegs = numRegsForType(Ty);
+ if (NumRegsLeft >= NumRegs) {
+ NumRegsLeft -= NumRegs;
+ return ABIArgInfo::getDirect();
+ }
+ }
+ }
+
+ // Otherwise just do the default thing.
+ ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
+ if (!ArgInfo.isIndirect()) {
+ unsigned NumRegs = numRegsForType(Ty);
+ NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
+ }
+
+ return ArgInfo;
+}
+
class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
public:
AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
+ CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const override;
unsigned getOpenCLKernelCallingConv() const override;
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::PointerType *T, QualType QT) const override;
+
+ LangAS getASTAllocaAddressSpace() const override {
+ return getLangASFromTargetAS(
+ getABIInfo().getDataLayout().getAllocaAddrSpace());
+ }
+ LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
+ const VarDecl *D) const override;
+ llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
+ llvm::LLVMContext &C) const override;
+ llvm::Function *
+ createEnqueuedBlockKernel(CodeGenFunction &CGF,
+ llvm::Function *BlockInvokeFunc,
+ llvm::Value *BlockLiteral) const override;
};
}
-static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM);
-
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
- const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const {
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const {
+ if (!IsForDefinition)
+ return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
llvm::Function *F = cast<llvm::Function>(GV);
- if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
- unsigned Min = Attr->getMin();
- unsigned Max = Attr->getMax();
+ const auto *ReqdWGS = M.getLangOpts().OpenCL ?
+ FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
+ const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
+ if (ReqdWGS || FlatWGS) {
+ unsigned Min = FlatWGS ? FlatWGS->getMin() : 0;
+ unsigned Max = FlatWGS ? FlatWGS->getMax() : 0;
+ if (ReqdWGS && Min == 0 && Max == 0)
+ Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
if (Min != 0) {
assert(Min <= Max && "Min must be less than or equal Max");
@@ -7343,8 +7718,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
-
- appendOpenCLVersionMD(M);
}
unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
@@ -7369,6 +7742,49 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
llvm::ConstantPointerNull::get(NPT), PT);
}
+LangAS
+AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
+ const VarDecl *D) const {
+ assert(!CGM.getLangOpts().OpenCL &&
+ !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+ "Address space agnostic languages only");
+ LangAS DefaultGlobalAS = getLangASFromTargetAS(
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_global));
+ if (!D)
+ return DefaultGlobalAS;
+
+ LangAS AddrSpace = D->getType().getAddressSpace();
+ assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace));
+ if (AddrSpace != LangAS::Default)
+ return AddrSpace;
+
+ if (CGM.isTypeConstant(D->getType(), false)) {
+ if (auto ConstAS = CGM.getTarget().getConstantAddressSpace())
+ return ConstAS.getValue();
+ }
+ return DefaultGlobalAS;
+}
+
+llvm::SyncScope::ID
+AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S,
+ llvm::LLVMContext &C) const {
+ StringRef Name;
+ switch (S) {
+ case SyncScope::OpenCLWorkGroup:
+ Name = "workgroup";
+ break;
+ case SyncScope::OpenCLDevice:
+ Name = "agent";
+ break;
+ case SyncScope::OpenCLAllSVMDevices:
+ Name = "";
+ break;
+ case SyncScope::OpenCLSubGroup:
+ Name = "subgroup";
+ }
+ return C.getOrInsertSyncScopeID(Name);
+}
+
//===----------------------------------------------------------------------===//
// SPARC v8 ABI Implementation.
// Based on the SPARC Compliance Definition version 2.4.1.
@@ -8015,45 +8431,18 @@ class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
public:
SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
- void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
unsigned getOpenCLKernelCallingConv() const override;
};
+
} // End anonymous namespace.
-/// Emit SPIR specific metadata: OpenCL and SPIR version.
-void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
- llvm::LLVMContext &Ctx = CGM.getModule().getContext();
- llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
- llvm::Module &M = CGM.getModule();
- // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
- // opencl.spir.version named metadata.
- llvm::Metadata *SPIRVerElts[] = {
- llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
- llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- Int32Ty, (CGM.getLangOpts().OpenCLVersion / 100 > 1) ? 0 : 2))};
- llvm::NamedMDNode *SPIRVerMD =
- M.getOrInsertNamedMetadata("opencl.spir.version");
- SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts));
- appendOpenCLVersionMD(CGM);
-}
-
-static void appendOpenCLVersionMD(CodeGen::CodeGenModule &CGM) {
- llvm::LLVMContext &Ctx = CGM.getModule().getContext();
- llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
- llvm::Module &M = CGM.getModule();
- // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
- // opencl.ocl.version named metadata node.
- llvm::Metadata *OCLVerElts[] = {
- llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
- llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))};
- llvm::NamedMDNode *OCLVerMD =
- M.getOrInsertNamedMetadata("opencl.ocl.version");
- OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
+namespace clang {
+namespace CodeGen {
+void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
+ DefaultABIInfo SPIRABI(CGM.getTypes());
+ SPIRABI.computeInfo(FI);
+}
+}
}
unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
@@ -8435,6 +8824,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;
if (getTarget().getABI() == "darwinpcs")
Kind = AArch64ABIInfo::DarwinPCS;
+ else if (Triple.isOSWindows())
+ return SetCGInfo(
+ new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIInfo::Win64));
return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind));
}
@@ -8564,3 +8956,108 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return SetCGInfo(new SPIRTargetCodeGenInfo(Types));
}
}
+
+/// Create an OpenCL kernel for an enqueued block.
+///
+/// The kernel has the same function type as the block invoke function. Its
+/// name is the name of the block invoke function postfixed with "_kernel".
+/// It simply calls the block invoke function then returns.
+llvm::Function *
+TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF,
+ llvm::Function *Invoke,
+ llvm::Value *BlockLiteral) const {
+ auto *InvokeFT = Invoke->getFunctionType();
+ llvm::SmallVector<llvm::Type *, 2> ArgTys;
+ for (auto &P : InvokeFT->params())
+ ArgTys.push_back(P);
+ auto &C = CGF.getLLVMContext();
+ std::string Name = Invoke->getName().str() + "_kernel";
+ auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
+ auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
+ &CGF.CGM.getModule());
+ auto IP = CGF.Builder.saveIP();
+ auto *BB = llvm::BasicBlock::Create(C, "entry", F);
+ auto &Builder = CGF.Builder;
+ Builder.SetInsertPoint(BB);
+ llvm::SmallVector<llvm::Value *, 2> Args;
+ for (auto &A : F->args())
+ Args.push_back(&A);
+ Builder.CreateCall(Invoke, Args);
+ Builder.CreateRetVoid();
+ Builder.restoreIP(IP);
+ return F;
+}
+
+/// Create an OpenCL kernel for an enqueued block.
+///
+/// The type of the first argument (the block literal) is the struct type
+/// of the block literal instead of a pointer type. The first argument
+/// (block literal) is passed directly by value to the kernel. The kernel
+/// allocates the same type of struct on stack and stores the block literal
+/// to it and passes its pointer to the block invoke function. The kernel
+/// has "enqueued-block" function attribute and kernel argument metadata.
+llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
+ CodeGenFunction &CGF, llvm::Function *Invoke,
+ llvm::Value *BlockLiteral) const {
+ auto &Builder = CGF.Builder;
+ auto &C = CGF.getLLVMContext();
+
+ auto *BlockTy = BlockLiteral->getType()->getPointerElementType();
+ auto *InvokeFT = Invoke->getFunctionType();
+ llvm::SmallVector<llvm::Type *, 2> ArgTys;
+ llvm::SmallVector<llvm::Metadata *, 8> AddressQuals;
+ llvm::SmallVector<llvm::Metadata *, 8> AccessQuals;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgNames;
+
+ ArgTys.push_back(BlockTy);
+ ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
+ AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
+ ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
+ ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
+ AccessQuals.push_back(llvm::MDString::get(C, "none"));
+ ArgNames.push_back(llvm::MDString::get(C, "block_literal"));
+ for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
+ ArgTys.push_back(InvokeFT->getParamType(I));
+ ArgTypeNames.push_back(llvm::MDString::get(C, "void*"));
+ AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
+ AccessQuals.push_back(llvm::MDString::get(C, "none"));
+ ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*"));
+ ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
+ ArgNames.push_back(
+ llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str()));
+ }
+ std::string Name = Invoke->getName().str() + "_kernel";
+ auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
+ auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
+ &CGF.CGM.getModule());
+ F->addFnAttr("enqueued-block");
+ auto IP = CGF.Builder.saveIP();
+ auto *BB = llvm::BasicBlock::Create(C, "entry", F);
+ Builder.SetInsertPoint(BB);
+ unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy);
+ auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
+ BlockPtr->setAlignment(BlockAlign);
+ Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
+ auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
+ llvm::SmallVector<llvm::Value *, 2> Args;
+ Args.push_back(Cast);
+ for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I)
+ Args.push_back(I);
+ Builder.CreateCall(Invoke, Args);
+ Builder.CreateRetVoid();
+ Builder.restoreIP(IP);
+
+ F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));
+ F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));
+ F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));
+ F->setMetadata("kernel_arg_base_type",
+ llvm::MDNode::get(C, ArgBaseTypeNames));
+ F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));
+ if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
+ F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames));
+
+ return F;
+}