summaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/BackendUtil.cpp50
-rw-r--r--lib/CodeGen/CGAtomic.cpp2
-rw-r--r--lib/CodeGen/CGBlocks.cpp262
-rw-r--r--lib/CodeGen/CGBuiltin.cpp1075
-rw-r--r--lib/CodeGen/CGCUDANV.cpp33
-rw-r--r--lib/CodeGen/CGCXX.cpp4
-rw-r--r--lib/CodeGen/CGCall.cpp93
-rw-r--r--lib/CodeGen/CGCall.h14
-rw-r--r--lib/CodeGen/CGClass.cpp22
-rw-r--r--lib/CodeGen/CGCleanup.cpp4
-rw-r--r--lib/CodeGen/CGDebugInfo.cpp286
-rw-r--r--lib/CodeGen/CGDebugInfo.h37
-rw-r--r--lib/CodeGen/CGDecl.cpp112
-rw-r--r--lib/CodeGen/CGDeclCXX.cpp91
-rw-r--r--lib/CodeGen/CGException.cpp16
-rw-r--r--lib/CodeGen/CGExpr.cpp63
-rw-r--r--lib/CodeGen/CGExprAgg.cpp9
-rw-r--r--lib/CodeGen/CGExprCXX.cpp27
-rw-r--r--lib/CodeGen/CGExprComplex.cpp8
-rw-r--r--lib/CodeGen/CGExprConstant.cpp36
-rw-r--r--lib/CodeGen/CGExprScalar.cpp434
-rw-r--r--lib/CodeGen/CGLoopInfo.cpp10
-rw-r--r--lib/CodeGen/CGNonTrivialStruct.cpp7
-rw-r--r--lib/CodeGen/CGObjC.cpp217
-rw-r--r--lib/CodeGen/CGObjCMac.cpp20
-rw-r--r--lib/CodeGen/CGObjCRuntime.cpp2
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.cpp40
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.h4
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp334
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h79
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp1832
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.h82
-rw-r--r--lib/CodeGen/CGRecordLayoutBuilder.cpp2
-rw-r--r--lib/CodeGen/CGStmt.cpp13
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp184
-rw-r--r--lib/CodeGen/CGVTables.cpp13
-rw-r--r--lib/CodeGen/CGValue.h5
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CodeGenABITypes.cpp1
-rw-r--r--lib/CodeGen/CodeGenAction.cpp16
-rw-r--r--lib/CodeGen/CodeGenFunction.cpp95
-rw-r--r--lib/CodeGen/CodeGenFunction.h66
-rw-r--r--lib/CodeGen/CodeGenModule.cpp427
-rw-r--r--lib/CodeGen/CodeGenModule.h27
-rw-r--r--lib/CodeGen/CodeGenPGO.h1
-rw-r--r--lib/CodeGen/CodeGenTBAA.cpp2
-rw-r--r--lib/CodeGen/CodeGenTypes.cpp3
-rw-r--r--lib/CodeGen/CodeGenTypes.h1
-rw-r--r--lib/CodeGen/ConstantEmitter.h3
-rw-r--r--lib/CodeGen/CoverageMappingGen.cpp73
-rw-r--r--lib/CodeGen/CoverageMappingGen.h1
-rw-r--r--lib/CodeGen/ItaniumCXXABI.cpp73
-rw-r--r--lib/CodeGen/MacroPPCallbacks.cpp17
-rw-r--r--lib/CodeGen/MicrosoftCXXABI.cpp11
-rw-r--r--lib/CodeGen/ModuleBuilder.cpp2
-rw-r--r--lib/CodeGen/ObjectFilePCHContainerOperations.cpp5
-rw-r--r--lib/CodeGen/SwiftCallingConv.cpp40
-rw-r--r--lib/CodeGen/TargetInfo.cpp211
-rw-r--r--lib/CodeGen/VarBypassDetector.cpp2
59 files changed, 4740 insertions, 1860 deletions
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 45be10f005..52fc08de9b 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
#include "clang/CodeGen/BackendUtil.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetOptions.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Frontend/Utils.h"
#include "clang/Lex/HeaderSearchOptions.h"
@@ -37,6 +37,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
@@ -235,11 +236,12 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address);
bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope;
+ bool UseOdrIndicator = CGOpts.SanitizeAddressUseOdrIndicator;
bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts);
PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover,
UseAfterScope));
PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover,
- UseGlobalsGC));
+ UseGlobalsGC, UseOdrIndicator));
}
static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -247,7 +249,8 @@ static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
PM.add(createAddressSanitizerFunctionPass(
/*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false));
PM.add(createAddressSanitizerModulePass(
- /*CompileKernel*/ true, /*Recover*/ true));
+ /*CompileKernel*/ true, /*Recover*/ true, /*UseGlobalsGC*/ true,
+ /*UseOdrIndicator*/ false));
}
static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -428,7 +431,7 @@ static void initTargetOptions(llvm::TargetOptions &Options,
switch (LangOpts.getDefaultFPContractMode()) {
case LangOptions::FPC_Off:
// Preserve any contraction performed by the front-end. (Strict performs
- // splitting of the muladd instrinsic in the backend.)
+ // splitting of the muladd intrinsic in the backend.)
Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
break;
case LangOptions::FPC_On:
@@ -468,7 +471,7 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection;
Options.EmitAddrsig = CodeGenOpts.Addrsig;
- if (CodeGenOpts.EnableSplitDwarf)
+ if (CodeGenOpts.getSplitDwarfMode() != CodeGenOptions::NoFission)
Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
@@ -503,6 +506,8 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) {
Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
Options.NoRedZone = CodeGenOpts.DisableRedZone;
Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData;
+ Options.Filter = CodeGenOpts.ProfileFilterFiles;
+ Options.Exclude = CodeGenOpts.ProfileExcludeFiles;
Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
return Options;
}
@@ -832,7 +837,8 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
break;
default:
- if (!CodeGenOpts.SplitDwarfFile.empty()) {
+ if (!CodeGenOpts.SplitDwarfFile.empty() &&
+ (CodeGenOpts.getSplitDwarfMode() == CodeGenOptions::SplitFileFission)) {
DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile);
if (!DwoOS)
return;
@@ -930,18 +936,21 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty()
? DefaultProfileGenName
: CodeGenOpts.InstrProfileOutput,
- "", "", true, CodeGenOpts.DebugInfoForProfiling);
+ "", "", "", true,
+ CodeGenOpts.DebugInfoForProfiling);
else if (CodeGenOpts.hasProfileIRUse())
// -fprofile-use.
- PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", false,
+ PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "",
+ CodeGenOpts.ProfileRemappingFile, false,
CodeGenOpts.DebugInfoForProfiling);
else if (!CodeGenOpts.SampleProfileFile.empty())
// -fprofile-sample-use
- PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, false,
+ PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile,
+ CodeGenOpts.ProfileRemappingFile, false,
CodeGenOpts.DebugInfoForProfiling);
else if (CodeGenOpts.DebugInfoForProfiling)
// -fdebug-info-for-profiling
- PGOOpt = PGOOptions("", "", "", false, true);
+ PGOOpt = PGOOptions("", "", "", "", false, true);
PassBuilder PB(TM.get(), PGOOpt);
@@ -1130,6 +1139,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
const LangOptions &LOpts,
std::unique_ptr<raw_pwrite_stream> OS,
std::string SampleProfile,
+ std::string ProfileRemapping,
BackendAction Action) {
StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>>
ModuleToDefinedGVSummaries;
@@ -1147,15 +1157,14 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
continue;
auto GUID = GlobalList.first;
- assert(GlobalList.second.SummaryList.size() == 1 &&
- "Expected individual combined index to have one summary per GUID");
- auto &Summary = GlobalList.second.SummaryList[0];
- // Skip the summaries for the importing module. These are included to
- // e.g. record required linkage changes.
- if (Summary->modulePath() == M->getModuleIdentifier())
- continue;
- // Add an entry to provoke importing by thinBackend.
- ImportList[Summary->modulePath()].insert(GUID);
+ for (auto &Summary : GlobalList.second.SummaryList) {
+ // Skip the summaries for the importing module. These are included to
+ // e.g. record required linkage changes.
+ if (Summary->modulePath() == M->getModuleIdentifier())
+ continue;
+ // Add an entry to provoke importing by thinBackend.
+ ImportList[Summary->modulePath()].insert(GUID);
+ }
}
std::vector<std::unique_ptr<llvm::MemoryBuffer>> OwnedImports;
@@ -1202,6 +1211,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
Conf.CGOptLevel = getCGOptLevel(CGOpts);
initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
Conf.SampleProfile = std::move(SampleProfile);
+ Conf.ProfileRemapping = std::move(ProfileRemapping);
Conf.UseNewPM = CGOpts.ExperimentalNewPassManager;
Conf.DebugPassManager = CGOpts.DebugPassManager;
Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness;
@@ -1268,7 +1278,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
if (!CombinedIndex->skipModuleByDistributedBackend()) {
runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
LOpts, std::move(OS), CGOpts.SampleProfileFile,
- Action);
+ CGOpts.ProfileRemappingFile, Action);
return;
}
// Distributed indexing detected that nothing from the module is needed
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 9379c835d3..24056a449d 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -18,7 +18,7 @@
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index dae148cbd1..6631bfb0df 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -175,7 +175,7 @@ static std::string getBlockDescriptorName(const CGBlockInfo &BlockInfo,
/// unsigned long reserved;
/// unsigned long size; // size of Block_literal metadata in bytes.
/// void *copy_func_helper_decl; // optional copy helper.
-/// void *destroy_func_decl; // optioanl destructor helper.
+/// void *destroy_func_decl; // optional destructor helper.
/// void *block_method_encoding_address; // @encode for block literal signature.
/// void *block_layout_info; // encoding of captured block variables.
/// };
@@ -197,7 +197,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
std::string descName;
// If an equivalent block descriptor global variable exists, return it.
- if (C.getLangOpts().ObjC1 &&
+ if (C.getLangOpts().ObjC &&
CGM.getLangOpts().getGC() == LangOptions::NonGC) {
descName = getBlockDescriptorName(blockInfo, CGM);
if (llvm::GlobalValue *desc = CGM.getModule().getNamedValue(descName))
@@ -243,7 +243,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer(), i8p));
// GC layout.
- if (C.getLangOpts().ObjC1) {
+ if (C.getLangOpts().ObjC) {
if (CGM.getLangOpts().getGC() != LangOptions::NonGC)
elements.add(CGM.getObjCRuntime().BuildGCBlockLayout(CGM, blockInfo));
else
@@ -446,12 +446,25 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
assert(elementTypes.empty());
if (CGM.getLangOpts().OpenCL) {
- // The header is basically 'struct { int; int;
+ // The header is basically 'struct { int; int; generic void *;
// custom_fields; }'. Assert that struct is packed.
+ auto GenericAS =
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic);
+ auto GenPtrAlign =
+ CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8);
+ auto GenPtrSize =
+ CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8);
+ assert(CGM.getIntSize() <= GenPtrSize);
+ assert(CGM.getIntAlign() <= GenPtrAlign);
+ assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
elementTypes.push_back(CGM.IntTy); /* total size */
elementTypes.push_back(CGM.IntTy); /* align */
- unsigned Offset = 2 * CGM.getIntSize().getQuantity();
- unsigned BlockAlign = CGM.getIntAlign().getQuantity();
+ elementTypes.push_back(
+ CGM.getOpenCLRuntime()
+ .getGenericVoidPointerType()); /* invoke function */
+ unsigned Offset =
+ 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
+ unsigned BlockAlign = GenPtrAlign.getQuantity();
if (auto *Helper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
@@ -493,7 +506,11 @@ static QualType getCaptureFieldType(const CodeGenFunction &CGF,
return CGF.BlockInfo->getCapture(VD).fieldType();
if (auto *FD = CGF.LambdaCaptureFields.lookup(VD))
return FD->getType();
- return VD->getType();
+ // If the captured variable is a non-escaping __block variable, the field
+ // type is the reference type. If the variable is a __block variable that
+ // already has a reference type, the field type is the variable's type.
+ return VD->isNonEscapingByref() ?
+ CGF.getContext().getLValueReferenceType(VD->getType()) : VD->getType();
}
/// Compute the layout of the given block. Attempts to lay the block
@@ -516,7 +533,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
info.CanBeGlobal = true;
return;
}
- else if (C.getLangOpts().ObjC1 &&
+ else if (C.getLangOpts().ObjC &&
CGM.getLangOpts().getGC() == LangOptions::NonGC)
info.HasCapturedVariableLayout = true;
@@ -549,7 +566,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
for (const auto &CI : block->captures()) {
const VarDecl *variable = CI.getVariable();
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
// We have to copy/dispose of the __block reference.
info.NeedsCopyDispose = true;
@@ -842,10 +859,12 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
/// Enter a full-expression with a non-trivial number of objects to
/// clean up. This is in this file because, at the moment, the only
/// kind of cleanup object is a BlockDecl*.
-void CodeGenFunction::enterNonTrivialFullExpression(const ExprWithCleanups *E) {
- assert(E->getNumObjects() != 0);
- for (const ExprWithCleanups::CleanupObject &C : E->getObjects())
- enterBlockScope(*this, C);
+void CodeGenFunction::enterNonTrivialFullExpression(const FullExpr *E) {
+ if (const auto EWC = dyn_cast<ExprWithCleanups>(E)) {
+ assert(EWC->getNumObjects() != 0);
+ for (const ExprWithCleanups::CleanupObject &C : EWC->getObjects())
+ enterBlockScope(*this, C);
+ }
}
/// Find the layout for the given block in a linked list and remove it.
@@ -902,12 +921,20 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
+ auto GenVoidPtrTy =
+ IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
+ LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
+ auto GenVoidPtrSize = CharUnits::fromQuantity(
+ CGM.getTarget().getPointerWidth(
+ CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) /
+ 8);
// Using the computed layout, generate the actual block function.
bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
CodeGenFunction BlockCGF{CGM, true};
BlockCGF.SanOpts = SanOpts;
auto *InvokeFn = BlockCGF.GenerateBlockFunction(
CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
+ auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
// If there is nothing to capture, we can emit this as a global block.
if (blockInfo.CanBeGlobal)
@@ -983,12 +1010,11 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()),
getIntSize(), "block.align");
}
- if (!IsOpenCL) {
- addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy),
- getPointerSize(), "block.invoke");
+ addHeaderField(blockFn, GenVoidPtrSize, "block.invoke");
+ if (!IsOpenCL)
addHeaderField(descriptor, getPointerSize(), "block.descriptor");
- } else if (auto *Helper =
- CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ else if (auto *Helper =
+ CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) {
addHeaderField(
I.first,
@@ -1032,7 +1058,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// The lambda capture in a lambda's conversion-to-block-pointer is
// special; we'll simply emit it directly.
src = Address::invalid();
- } else if (CI.isByRef()) {
+ } else if (CI.isEscapingByref()) {
if (BlockInfo && CI.isNested()) {
// We need to use the capture from the enclosing block.
const CGBlockInfo::Capture &enclosingCapture =
@@ -1060,7 +1086,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// the block field. There's no need to chase the forwarding
// pointer at this point, since we're building something that will
// live a shorter life than the stack byref anyway.
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
// Get a void* that points to the byref struct.
llvm::Value *byrefPointer;
if (CI.isNested())
@@ -1192,23 +1218,38 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() {
}
llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
- assert(!getLangOpts().OpenCL && "OpenCL does not need this");
-
if (GenericBlockLiteralType)
return GenericBlockLiteralType;
llvm::Type *BlockDescPtrTy = getBlockDescriptorType();
- // struct __block_literal_generic {
- // void *__isa;
- // int __flags;
- // int __reserved;
- // void (*__invoke)(void *);
- // struct __block_descriptor *__descriptor;
- // };
- GenericBlockLiteralType =
- llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
- IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
+ if (getLangOpts().OpenCL) {
+ // struct __opencl_block_literal_generic {
+ // int __size;
+ // int __align;
+ // __generic void *__invoke;
+ // /* custom fields */
+ // };
+ SmallVector<llvm::Type *, 8> StructFields(
+ {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()});
+ if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ for (auto I : Helper->getCustomFieldTypes())
+ StructFields.push_back(I);
+ }
+ GenericBlockLiteralType = llvm::StructType::create(
+ StructFields, "struct.__opencl_block_literal_generic");
+ } else {
+ // struct __block_literal_generic {
+ // void *__isa;
+ // int __flags;
+ // int __reserved;
+ // void (*__invoke)(void *);
+ // struct __block_descriptor *__descriptor;
+ // };
+ GenericBlockLiteralType =
+ llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
+ IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
+ }
return GenericBlockLiteralType;
}
@@ -1219,21 +1260,27 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
E->getCallee()->getType()->getAs<BlockPointerType>();
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
- llvm::Value *FuncPtr = nullptr;
- if (!CGM.getLangOpts().OpenCL) {
- // Get a pointer to the generic block literal.
- llvm::Type *BlockLiteralTy =
- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0);
+ // Get a pointer to the generic block literal.
+ // For OpenCL we generate generic AS void ptr to be able to reuse the same
+ // block definition for blocks with captures generated as private AS local
+ // variables and without captures generated as global AS program scope
+ // variables.
+ unsigned AddrSpace = 0;
+ if (getLangOpts().OpenCL)
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
- // Bitcast the callee to a block literal.
- BlockPtr =
- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
+ llvm::Type *BlockLiteralTy =
+ llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
- // Get the function pointer from the literal.
- FuncPtr =
- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
- }
+ // Bitcast the callee to a block literal.
+ BlockPtr =
+ Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
+
+ // Get the function pointer from the literal.
+ llvm::Value *FuncPtr =
+ Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
+ CGM.getLangOpts().OpenCL ? 2 : 3);
// Add the block literal.
CallArgList Args;
@@ -1256,11 +1303,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
// Load the function.
- llvm::Value *Func;
- if (CGM.getLangOpts().OpenCL)
- Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
- else
- Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
const FunctionType *FuncTy = FnType->castAs<FunctionType>();
const CGFunctionInfo &FnInfo =
@@ -1279,8 +1322,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
return EmitCall(FnInfo, Callee, ReturnValue, Args);
}
-Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
- bool isByRef) {
+Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) {
assert(BlockInfo && "evaluating block ref without block information?");
const CGBlockInfo::Capture &capture = BlockInfo->getCapture(variable);
@@ -1291,7 +1333,7 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(),
capture.getOffset(), "block.capture.addr");
- if (isByRef) {
+ if (variable->isEscapingByref()) {
// addr should be a void** right now. Load, then cast the result
// to byref*.
@@ -1305,6 +1347,10 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
variable->getName());
}
+ assert((!variable->isNonEscapingByref() ||
+ capture.fieldType()->isReferenceType()) &&
+ "the capture field of a non-escaping variable should have a "
+ "reference type");
if (capture.fieldType()->isReferenceType())
addr = EmitLoadOfReference(MakeAddrLValue(addr, capture.fieldType()));
@@ -1373,14 +1419,14 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
// Reserved
fields.addInt(CGM.IntTy, 0);
-
- // Function
- fields.add(blockFn);
} else {
fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity());
fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity());
}
+ // Function
+ fields.add(blockFn);
+
if (!IsOpenCL) {
// Descriptor
fields.add(buildBlockDescriptor(CGM, blockInfo));
@@ -1656,7 +1702,7 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags());
}
BlockFieldFlags Flags;
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
Flags = BLOCK_FIELD_IS_BYREF;
if (T.isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
@@ -1773,8 +1819,6 @@ static std::string getBlockCaptureStr(const BlockCaptureManagedEntity &E,
CodeGenModule &CGM) {
std::string Str;
ASTContext &Ctx = CGM.getContext();
- std::unique_ptr<ItaniumMangleContext> MC(
- ItaniumMangleContext::create(Ctx, Ctx.getDiagnostics()));
const BlockDecl::Capture &CI = *E.CI;
QualType CaptureTy = CI.getVariable()->getType();
@@ -1800,7 +1844,7 @@ static std::string getBlockCaptureStr(const BlockCaptureManagedEntity &E,
Str += "c";
SmallString<256> TyStr;
llvm::raw_svector_ostream Out(TyStr);
- MC->mangleTypeName(CaptureTy, Out);
+ CGM.getCXXABI().getMangleContext().mangleTypeName(CaptureTy, Out);
Str += llvm::to_string(TyStr.size()) + TyStr.c_str();
break;
}
@@ -1939,7 +1983,7 @@ static void setBlockHelperAttributesVisibility(bool CapturesNonExternalType,
} else {
Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
Fn->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn);
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
}
}
@@ -1964,16 +2008,16 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
ASTContext &C = getContext();
+ QualType ReturnTy = C.VoidTy;
+
FunctionArgList args;
- ImplicitParamDecl DstDecl(getContext(), C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl DstDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&DstDecl);
- ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
// FIXME: it would be nice if these were mergeable with things with
// identical semantics.
@@ -1983,20 +2027,20 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage,
FuncName, &CGM.getModule());
- IdentifierInfo *II
- = &CGM.getContext().Idents.get(FuncName);
+ IdentifierInfo *II = &C.Idents.get(FuncName);
+
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(C.VoidPtrTy);
+ ArgTys.push_back(C.VoidPtrTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
- FunctionDecl *FD = FunctionDecl::Create(C,
- C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false,
- false);
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI,
CGM);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()};
llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
@@ -2102,7 +2146,7 @@ getBlockFieldFlagsForObjCObjectPointer(const BlockDecl::Capture &CI,
static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
const LangOptions &LangOpts) {
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
if (T.isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
@@ -2157,13 +2201,14 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
ASTContext &C = getContext();
+ QualType ReturnTy = C.VoidTy;
+
FunctionArgList args;
- ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
// FIXME: We'd like to put these into a mergable by content, with
// internal linkage.
@@ -2173,18 +2218,19 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage,
FuncName, &CGM.getModule());
- IdentifierInfo *II
- = &CGM.getContext().Idents.get(FuncName);
+ IdentifierInfo *II = &C.Idents.get(FuncName);
- FunctionDecl *FD = FunctionDecl::Create(C, C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false, false);
+ SmallVector<QualType, 1> ArgTys;
+ ArgTys.push_back(C.VoidPtrTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI,
CGM);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
markAsIgnoreThreadCheckingAtRuntime(Fn);
ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()};
@@ -2403,19 +2449,17 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
BlockByrefHelpers &generator) {
ASTContext &Context = CGF.getContext();
- QualType R = Context.VoidTy;
+ QualType ReturnTy = Context.VoidTy;
FunctionArgList args;
- ImplicitParamDecl Dst(CGF.getContext(), Context.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl Dst(Context, Context.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&Dst);
- ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl Src(Context, Context.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&Src);
const CGFunctionInfo &FI =
- CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
+ CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI);
@@ -2428,16 +2472,18 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
IdentifierInfo *II
= &Context.Idents.get("__Block_byref_object_copy_");
- FunctionDecl *FD = FunctionDecl::Create(Context,
- Context.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, R, nullptr,
- SC_Static,
- false, false);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(Context.VoidPtrTy);
+ ArgTys.push_back(Context.VoidPtrTy);
+ QualType FunctionTy = Context.getFunctionType(ReturnTy, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ Context, Context.getTranslationUnitDecl(), SourceLocation(),
+ SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false);
CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
- CGF.StartFunction(FD, R, Fn, FI, args);
+ CGF.StartFunction(FD, ReturnTy, Fn, FI, args);
if (generator.needsCopy()) {
llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0);
@@ -2502,12 +2548,13 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
IdentifierInfo *II
= &Context.Idents.get("__Block_byref_object_dispose_");
- FunctionDecl *FD = FunctionDecl::Create(Context,
- Context.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, R, nullptr,
- SC_Static,
- false, false);
+ SmallVector<QualType, 1> ArgTys;
+ ArgTys.push_back(Context.VoidPtrTy);
+ QualType FunctionTy = Context.getFunctionType(R, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ Context, Context.getTranslationUnitDecl(), SourceLocation(),
+ SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false);
CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
@@ -2564,6 +2611,9 @@ BlockByrefHelpers *
CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
const AutoVarEmission &emission) {
const VarDecl &var = *emission.Variable;
+ assert(var.isEscapingByref() &&
+ "only escaping __block variables need byref helpers");
+
QualType type = var.getType();
auto &byrefInfo = getBlockByrefInfo(&var);
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index df71dbb4b4..eea9207a34 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -21,10 +21,11 @@
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
-#include "clang/Analysis/Analyses/OSLog.h"
+#include "clang/AST/OSLog.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -93,11 +94,11 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
return V;
}
-/// Utility to insert an atomic instruction based on Instrinsic::ID
+/// Utility to insert an atomic instruction based on Intrinsic::ID
/// and the expression node.
-static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
- llvm::AtomicRMWInst::BinOp Kind,
- const CallExpr *E) {
+static Value *MakeBinaryAtomicValue(
+ CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
QualType T = E->getType();
assert(E->getArg(0)->getType()->isPointerType());
assert(CGF.getContext().hasSameUnqualifiedType(T,
@@ -119,7 +120,7 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
Args[1] = EmitToInt(CGF, Args[1], T, IntType);
llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
- Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
+ Kind, Args[0], Args[1], Ordering);
return EmitFromInt(CGF, Result, T, ValueType);
}
@@ -151,7 +152,7 @@ static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
}
-/// Utility to insert an atomic instruction based Instrinsic::ID and
+/// Utility to insert an atomic instruction based Intrinsic::ID and
/// the expression node, where the return value is the result of the
/// operation.
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
@@ -200,6 +201,9 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
/// cmpxchg result or the old value.
///
/// @returns result of cmpxchg, according to ReturnBool
+///
+/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
+/// invoke the function EmitAtomicCmpXchgForMSIntrin.
static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
bool ReturnBool) {
QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
@@ -230,6 +234,72 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
ValueType);
}
+/// This function should be invoked to emit atomic cmpxchg for Microsoft's
+/// _InterlockedCompareExchange* intrinsics which have the following signature:
+/// T _InterlockedCompareExchange(T volatile *Destination,
+/// T Exchange,
+/// T Comparand);
+///
+/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
+/// cmpxchg *Destination, Comparand, Exchange.
+/// So we need to swap Comparand and Exchange when invoking
+/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
+/// function MakeAtomicCmpXchgValue since it expects the arguments to be
+/// already swapped.
+
+static
+Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
+ AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
+ assert(E->getArg(0)->getType()->isPointerType());
+ assert(CGF.getContext().hasSameUnqualifiedType(
+ E->getType(), E->getArg(0)->getType()->getPointeeType()));
+ assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
+ E->getArg(1)->getType()));
+ assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
+ E->getArg(2)->getType()));
+
+ auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
+ auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
+ auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
+
+ // For Release ordering, the failure ordering should be Monotonic.
+ auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
+ AtomicOrdering::Monotonic :
+ SuccessOrdering;
+
+ auto *Result = CGF.Builder.CreateAtomicCmpXchg(
+ Destination, Comparand, Exchange,
+ SuccessOrdering, FailureOrdering);
+ Result->setVolatile(true);
+ return CGF.Builder.CreateExtractValue(Result, 0);
+}
+
+static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
+ assert(E->getArg(0)->getType()->isPointerType());
+
+ auto *IntTy = CGF.ConvertType(E->getType());
+ auto *Result = CGF.Builder.CreateAtomicRMW(
+ AtomicRMWInst::Add,
+ CGF.EmitScalarExpr(E->getArg(0)),
+ ConstantInt::get(IntTy, 1),
+ Ordering);
+ return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
+}
+
+static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E,
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
+ assert(E->getArg(0)->getType()->isPointerType());
+
+ auto *IntTy = CGF.ConvertType(E->getType());
+ auto *Result = CGF.Builder.CreateAtomicRMW(
+ AtomicRMWInst::Sub,
+ CGF.EmitScalarExpr(E->getArg(0)),
+ ConstantInt::get(IntTy, 1),
+ Ordering);
+ return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
+}
+
// Emit a simple mangled intrinsic that has 1 argument and a return type
// matching the argument type.
static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
@@ -316,7 +386,7 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
const CallExpr *E, llvm::Constant *calleeValue) {
- CGCallee callee = CGCallee::forDirect(calleeValue, FD);
+ CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
}
@@ -485,7 +555,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
}
namespace {
-/// A struct to generically desribe a bit test intrinsic.
+/// A struct to generically describe a bit test intrinsic.
struct BitTest {
enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
enum InterlockingKind : uint8_t {
@@ -711,8 +781,11 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
} else {
Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
Arg1Ty = CGF.Int8PtrTy;
- Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
- llvm::ConstantInt::get(CGF.Int32Ty, 0));
+ if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
+ Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry));
+ } else
+ Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
+ llvm::ConstantInt::get(CGF.Int32Ty, 0));
}
// Mark the call site and declaration with ReturnsTwice.
@@ -745,6 +818,30 @@ enum class CodeGenFunction::MSVCIntrin {
_InterlockedIncrement,
_InterlockedOr,
_InterlockedXor,
+ _InterlockedExchangeAdd_acq,
+ _InterlockedExchangeAdd_rel,
+ _InterlockedExchangeAdd_nf,
+ _InterlockedExchange_acq,
+ _InterlockedExchange_rel,
+ _InterlockedExchange_nf,
+ _InterlockedCompareExchange_acq,
+ _InterlockedCompareExchange_rel,
+ _InterlockedCompareExchange_nf,
+ _InterlockedOr_acq,
+ _InterlockedOr_rel,
+ _InterlockedOr_nf,
+ _InterlockedXor_acq,
+ _InterlockedXor_rel,
+ _InterlockedXor_nf,
+ _InterlockedAnd_acq,
+ _InterlockedAnd_rel,
+ _InterlockedAnd_nf,
+ _InterlockedIncrement_acq,
+ _InterlockedIncrement_rel,
+ _InterlockedIncrement_nf,
+ _InterlockedDecrement_acq,
+ _InterlockedDecrement_rel,
+ _InterlockedDecrement_nf,
__fastfail,
};
@@ -811,25 +908,74 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
case MSVCIntrin::_InterlockedXor:
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
-
- case MSVCIntrin::_InterlockedDecrement: {
- llvm::Type *IntTy = ConvertType(E->getType());
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Sub,
- EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- llvm::AtomicOrdering::SequentiallyConsistent);
- return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
- }
- case MSVCIntrin::_InterlockedIncrement: {
- llvm::Type *IntTy = ConvertType(E->getType());
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Add,
- EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- llvm::AtomicOrdering::SequentiallyConsistent);
- return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
- }
+ case MSVCIntrin::_InterlockedExchangeAdd_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedExchangeAdd_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedExchangeAdd_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedExchange_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedExchange_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedExchange_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedCompareExchange_acq:
+ return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedCompareExchange_rel:
+ return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedCompareExchange_nf:
+ return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedOr_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedOr_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedOr_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedXor_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedXor_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedXor_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedAnd_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedAnd_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedAnd_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedIncrement_acq:
+ return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedIncrement_rel:
+ return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedIncrement_nf:
+ return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedDecrement_acq:
+ return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedDecrement_rel:
+ return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedDecrement_nf:
+ return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
+
+ case MSVCIntrin::_InterlockedDecrement:
+ return EmitAtomicDecrementValue(*this, E);
+ case MSVCIntrin::_InterlockedIncrement:
+ return EmitAtomicIncrementValue(*this, E);
case MSVCIntrin::__fastfail: {
// Request immediate process termination from the kernel. The instruction
@@ -923,35 +1069,42 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
if (llvm::Function *F = CGM.getModule().getFunction(Name))
return F;
+ llvm::SmallVector<QualType, 4> ArgTys;
llvm::SmallVector<ImplicitParamDecl, 4> Params;
Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
Ctx.VoidPtrTy, ImplicitParamDecl::Other);
+ ArgTys.emplace_back(Ctx.VoidPtrTy);
for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
char Size = Layout.Items[I].getSizeByte();
if (!Size)
continue;
+ QualType ArgTy = getOSLogArgType(Ctx, Size);
Params.emplace_back(
Ctx, nullptr, SourceLocation(),
- &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
- getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other);
+ &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
+ ImplicitParamDecl::Other);
+ ArgTys.emplace_back(ArgTy);
}
FunctionArgList Args;
for (auto &P : Params)
Args.push_back(&P);
+ QualType ReturnTy = Ctx.VoidTy;
+ QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {});
+
// The helper function has linkonce_odr linkage to enable the linker to merge
// identical functions. To ensure the merging always happens, 'noinline' is
// attached to the function when compiling with -Oz.
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn = llvm::Function::Create(
FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn);
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
// Attach 'noinline' at -Oz.
@@ -962,9 +1115,9 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
IdentifierInfo *II = &Ctx.Idents.get(Name);
FunctionDecl *FD = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
- Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
+ FuncionTy, nullptr, SC_PrivateExtern, false, false);
- StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
+ StartFunction(FD, ReturnTy, Fn, FI, Args);
// Create a scope with an artificial location for the body of this function.
auto AL = ApplyDebugLocation::CreateArtificial(*this);
@@ -1024,7 +1177,12 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
llvm::Value *ArgVal;
- if (const Expr *TheExpr = Item.getExpr()) {
+ if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
+ uint64_t Val = 0;
+ for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
+ Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
+ ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
+ } else if (const Expr *TheExpr = Item.getExpr()) {
ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
// Check if this is a retainable type.
@@ -1252,6 +1410,42 @@ static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
return Res;
}
+static bool
+TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
+ llvm::SmallPtrSetImpl<const Decl *> &Seen) {
+ if (const auto *Arr = Ctx.getAsArrayType(Ty))
+ Ty = Ctx.getBaseElementType(Arr);
+
+ const auto *Record = Ty->getAsCXXRecordDecl();
+ if (!Record)
+ return false;
+
+ // We've already checked this type, or are in the process of checking it.
+ if (!Seen.insert(Record).second)
+ return false;
+
+ assert(Record->hasDefinition() &&
+ "Incomplete types should already be diagnosed");
+
+ if (Record->isDynamicClass())
+ return true;
+
+ for (FieldDecl *F : Record->fields()) {
+ if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
+ return true;
+ }
+ return false;
+}
+
+/// Determine if the specified type requires laundering by checking if it is a
+/// dynamic class type or contains a subobject which is a dynamic class type.
+static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
+ if (!CGM.getCodeGenOpts().StrictVTablePointers)
+ return false;
+ llvm::SmallPtrSet<const Decl *, 16> Seen;
+ return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
+}
+
RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
llvm::Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
@@ -1267,9 +1461,10 @@ RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
}
-RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
- unsigned BuiltinID, const CallExpr *E,
+RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
+ const CallExpr *E,
ReturnValueSlot ReturnValue) {
+ const FunctionDecl *FD = GD.getDecl()->getAsFunction();
// See if we can constant fold this builtin. If so, don't emit it at all.
Expr::EvalResult Result;
if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
@@ -1644,6 +1839,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
"cast");
return RValue::get(Result);
}
+ case Builtin::BI__lzcnt16:
+ case Builtin::BI__lzcnt:
+ case Builtin::BI__lzcnt64: {
+ Value *ArgValue = EmitScalarExpr(E->getArg(0));
+
+ llvm::Type *ArgType = ArgValue->getType();
+ Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
+
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
+ if (Result->getType() != ResultType)
+ Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
+ "cast");
+ return RValue::get(Result);
+ }
case Builtin::BI__popcnt16:
case Builtin::BI__popcnt:
case Builtin::BI__popcnt64:
@@ -1662,46 +1872,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
"cast");
return RValue::get(Result);
}
- case Builtin::BI_rotr8:
- case Builtin::BI_rotr16:
- case Builtin::BI_rotr:
- case Builtin::BI_lrotr:
- case Builtin::BI_rotr64: {
- Value *Val = EmitScalarExpr(E->getArg(0));
- Value *Shift = EmitScalarExpr(E->getArg(1));
-
- llvm::Type *ArgType = Val->getType();
- Shift = Builder.CreateIntCast(Shift, ArgType, false);
- unsigned ArgWidth = ArgType->getIntegerBitWidth();
- Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
-
- Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask);
- Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
- Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
- Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
- Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
- return RValue::get(Result);
- }
- case Builtin::BI_rotl8:
- case Builtin::BI_rotl16:
- case Builtin::BI_rotl:
- case Builtin::BI_lrotl:
- case Builtin::BI_rotl64: {
- Value *Val = EmitScalarExpr(E->getArg(0));
- Value *Shift = EmitScalarExpr(E->getArg(1));
-
- llvm::Type *ArgType = Val->getType();
- Shift = Builder.CreateIntCast(Shift, ArgType, false);
- unsigned ArgWidth = ArgType->getIntegerBitWidth();
- Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
-
- Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask);
- Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
- Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
- Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
- Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
- return RValue::get(Result);
- }
case Builtin::BI__builtin_unpredictable: {
// Always return the argument of __builtin_unpredictable. LLVM does not
// handle this builtin. Metadata for this builtin should be added directly
@@ -1760,14 +1930,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_rotateleft16:
case Builtin::BI__builtin_rotateleft32:
case Builtin::BI__builtin_rotateleft64:
+ case Builtin::BI_rotl8: // Microsoft variants of rotate left
+ case Builtin::BI_rotl16:
+ case Builtin::BI_rotl:
+ case Builtin::BI_lrotl:
+ case Builtin::BI_rotl64:
return emitRotate(E, false);
case Builtin::BI__builtin_rotateright8:
case Builtin::BI__builtin_rotateright16:
case Builtin::BI__builtin_rotateright32:
case Builtin::BI__builtin_rotateright64:
+ case Builtin::BI_rotr8: // Microsoft variants of rotate right
+ case Builtin::BI_rotr16:
+ case Builtin::BI_rotr:
+ case Builtin::BI_lrotr:
+ case Builtin::BI_rotr64:
return emitRotate(E, true);
+ case Builtin::BI__builtin_constant_p: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+ // At -O0, we don't perform inlining, so we don't need to delay the
+ // processing.
+ return RValue::get(ConstantInt::get(ResultType, 0));
+
+ const Expr *Arg = E->getArg(0);
+ QualType ArgType = Arg->getType();
+ if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType())
+ // We can only reason about scalar types.
+ return RValue::get(ConstantInt::get(ResultType, 0));
+
+ Value *ArgValue = EmitScalarExpr(Arg);
+ Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
+ Value *Result = Builder.CreateCall(F, ArgValue);
+ if (Result->getType() != ResultType)
+ Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
+ return RValue::get(Result);
+ }
case Builtin::BI__builtin_object_size: {
unsigned Type =
E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
@@ -2032,10 +2232,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin___memcpy_chk: {
// fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
- llvm::APSInt Size, DstSize;
- if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
+ Expr::EvalResult SizeResult, DstSizeResult;
+ if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
+ !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
break;
+ llvm::APSInt Size = SizeResult.Val.getInt();
+ llvm::APSInt DstSize = DstSizeResult.Val.getInt();
if (Size.ugt(DstSize))
break;
Address Dest = EmitPointerWithAlignment(E->getArg(0));
@@ -2056,10 +2258,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin___memmove_chk: {
// fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
- llvm::APSInt Size, DstSize;
- if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
+ Expr::EvalResult SizeResult, DstSizeResult;
+ if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
+ !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
break;
+ llvm::APSInt Size = SizeResult.Val.getInt();
+ llvm::APSInt DstSize = DstSizeResult.Val.getInt();
if (Size.ugt(DstSize))
break;
Address Dest = EmitPointerWithAlignment(E->getArg(0));
@@ -2094,10 +2298,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
}
case Builtin::BI__builtin___memset_chk: {
// fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
- llvm::APSInt Size, DstSize;
- if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
+ Expr::EvalResult SizeResult, DstSizeResult;
+ if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
+ !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
break;
+ llvm::APSInt Size = SizeResult.Val.getInt();
+ llvm::APSInt DstSize = DstSizeResult.Val.getInt();
if (Size.ugt(DstSize))
break;
Address Dest = EmitPointerWithAlignment(E->getArg(0));
@@ -2305,6 +2511,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(nullptr);
}
+ case Builtin::BI__builtin_launder: {
+ const Expr *Arg = E->getArg(0);
+ QualType ArgTy = Arg->getType()->getPointeeType();
+ Value *Ptr = EmitScalarExpr(Arg);
+ if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
+ Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
+
+ return RValue::get(Ptr);
+ }
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_sub:
case Builtin::BI__sync_fetch_and_or:
@@ -2999,7 +3214,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_InterlockedExchangePointer:
return RValue::get(
EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
- case Builtin::BI_InterlockedCompareExchangePointer: {
+ case Builtin::BI_InterlockedCompareExchangePointer:
+ case Builtin::BI_InterlockedCompareExchangePointer_nf: {
llvm::Type *RTy;
llvm::IntegerType *IntType =
IntegerType::get(getLLVMContext(),
@@ -3016,10 +3232,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Value *Comparand =
Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
- auto Result =
- Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
- AtomicOrdering::SequentiallyConsistent,
- AtomicOrdering::SequentiallyConsistent);
+ auto Ordering =
+ BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
+ AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
+
+ auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ Ordering, Ordering);
Result->setVolatile(true);
return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
@@ -3029,16 +3247,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_InterlockedCompareExchange8:
case Builtin::BI_InterlockedCompareExchange16:
case Builtin::BI_InterlockedCompareExchange:
- case Builtin::BI_InterlockedCompareExchange64: {
- AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
- EmitScalarExpr(E->getArg(0)),
- EmitScalarExpr(E->getArg(2)),
- EmitScalarExpr(E->getArg(1)),
- AtomicOrdering::SequentiallyConsistent,
- AtomicOrdering::SequentiallyConsistent);
- CXI->setVolatile(true);
- return RValue::get(Builder.CreateExtractValue(CXI, 0));
- }
+ case Builtin::BI_InterlockedCompareExchange64:
+ return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
case Builtin::BI_InterlockedIncrement16:
case Builtin::BI_InterlockedIncrement:
return RValue::get(
@@ -3457,7 +3667,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
// Convert to generic address space.
EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
- ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
+ ClkEvent = ClkEvent->getType()->isIntegerTy()
+ ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy)
+ : Builder.CreatePointerCast(ClkEvent, EventPtrTy);
auto Info =
CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
llvm::Value *Kernel =
@@ -3591,13 +3803,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_os_log_format:
return emitBuiltinOSLogFormat(*E);
- case Builtin::BI__builtin_os_log_format_buffer_size: {
- analyze_os_log::OSLogBufferLayout Layout;
- analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
- return RValue::get(ConstantInt::get(ConvertType(E->getType()),
- Layout.size().getQuantity()));
- }
-
case Builtin::BI__xray_customevent: {
if (!ShouldXRayInstrumentFunction())
return RValue::getIgnored();
@@ -4365,6 +4570,14 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vextq_v),
NEONMAP0(vfma_v),
NEONMAP0(vfmaq_v),
+ NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
+ NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
@@ -5338,6 +5551,34 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
}
+ case NEON::BI__builtin_neon_vfmlal_low_v:
+ case NEON::BI__builtin_neon_vfmlalq_low_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
+ }
+ case NEON::BI__builtin_neon_vfmlsl_low_v:
+ case NEON::BI__builtin_neon_vfmlslq_low_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
+ }
+ case NEON::BI__builtin_neon_vfmlal_high_v:
+ case NEON::BI__builtin_neon_vfmlalq_high_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
+ }
+ case NEON::BI__builtin_neon_vfmlsl_high_v:
+ case NEON::BI__builtin_neon_vfmlslq_high_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
+ }
}
assert(Int && "Expected valid intrinsic number");
@@ -5585,10 +5826,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
llvm::FunctionType *FTy =
llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
- APSInt Value;
- if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
+ Expr::EvalResult Result;
+ if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
llvm_unreachable("Sema will ensure that the parameter is constant");
+ llvm::APSInt Value = Result.Val.getInt();
uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
llvm::InlineAsm *Emit =
@@ -6070,6 +6312,120 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
case ARM::BI_InterlockedIncrement64:
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
+ case ARM::BI_InterlockedExchangeAdd8_acq:
+ case ARM::BI_InterlockedExchangeAdd16_acq:
+ case ARM::BI_InterlockedExchangeAdd_acq:
+ case ARM::BI_InterlockedExchangeAdd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
+ case ARM::BI_InterlockedExchangeAdd8_rel:
+ case ARM::BI_InterlockedExchangeAdd16_rel:
+ case ARM::BI_InterlockedExchangeAdd_rel:
+ case ARM::BI_InterlockedExchangeAdd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
+ case ARM::BI_InterlockedExchangeAdd8_nf:
+ case ARM::BI_InterlockedExchangeAdd16_nf:
+ case ARM::BI_InterlockedExchangeAdd_nf:
+ case ARM::BI_InterlockedExchangeAdd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
+ case ARM::BI_InterlockedExchange8_acq:
+ case ARM::BI_InterlockedExchange16_acq:
+ case ARM::BI_InterlockedExchange_acq:
+ case ARM::BI_InterlockedExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
+ case ARM::BI_InterlockedExchange8_rel:
+ case ARM::BI_InterlockedExchange16_rel:
+ case ARM::BI_InterlockedExchange_rel:
+ case ARM::BI_InterlockedExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
+ case ARM::BI_InterlockedExchange8_nf:
+ case ARM::BI_InterlockedExchange16_nf:
+ case ARM::BI_InterlockedExchange_nf:
+ case ARM::BI_InterlockedExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
+ case ARM::BI_InterlockedCompareExchange8_acq:
+ case ARM::BI_InterlockedCompareExchange16_acq:
+ case ARM::BI_InterlockedCompareExchange_acq:
+ case ARM::BI_InterlockedCompareExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
+ case ARM::BI_InterlockedCompareExchange8_rel:
+ case ARM::BI_InterlockedCompareExchange16_rel:
+ case ARM::BI_InterlockedCompareExchange_rel:
+ case ARM::BI_InterlockedCompareExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
+ case ARM::BI_InterlockedCompareExchange8_nf:
+ case ARM::BI_InterlockedCompareExchange16_nf:
+ case ARM::BI_InterlockedCompareExchange_nf:
+ case ARM::BI_InterlockedCompareExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
+ case ARM::BI_InterlockedOr8_acq:
+ case ARM::BI_InterlockedOr16_acq:
+ case ARM::BI_InterlockedOr_acq:
+ case ARM::BI_InterlockedOr64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
+ case ARM::BI_InterlockedOr8_rel:
+ case ARM::BI_InterlockedOr16_rel:
+ case ARM::BI_InterlockedOr_rel:
+ case ARM::BI_InterlockedOr64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
+ case ARM::BI_InterlockedOr8_nf:
+ case ARM::BI_InterlockedOr16_nf:
+ case ARM::BI_InterlockedOr_nf:
+ case ARM::BI_InterlockedOr64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
+ case ARM::BI_InterlockedXor8_acq:
+ case ARM::BI_InterlockedXor16_acq:
+ case ARM::BI_InterlockedXor_acq:
+ case ARM::BI_InterlockedXor64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
+ case ARM::BI_InterlockedXor8_rel:
+ case ARM::BI_InterlockedXor16_rel:
+ case ARM::BI_InterlockedXor_rel:
+ case ARM::BI_InterlockedXor64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
+ case ARM::BI_InterlockedXor8_nf:
+ case ARM::BI_InterlockedXor16_nf:
+ case ARM::BI_InterlockedXor_nf:
+ case ARM::BI_InterlockedXor64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
+ case ARM::BI_InterlockedAnd8_acq:
+ case ARM::BI_InterlockedAnd16_acq:
+ case ARM::BI_InterlockedAnd_acq:
+ case ARM::BI_InterlockedAnd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
+ case ARM::BI_InterlockedAnd8_rel:
+ case ARM::BI_InterlockedAnd16_rel:
+ case ARM::BI_InterlockedAnd_rel:
+ case ARM::BI_InterlockedAnd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
+ case ARM::BI_InterlockedAnd8_nf:
+ case ARM::BI_InterlockedAnd16_nf:
+ case ARM::BI_InterlockedAnd_nf:
+ case ARM::BI_InterlockedAnd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
+ case ARM::BI_InterlockedIncrement16_acq:
+ case ARM::BI_InterlockedIncrement_acq:
+ case ARM::BI_InterlockedIncrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
+ case ARM::BI_InterlockedIncrement16_rel:
+ case ARM::BI_InterlockedIncrement_rel:
+ case ARM::BI_InterlockedIncrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
+ case ARM::BI_InterlockedIncrement16_nf:
+ case ARM::BI_InterlockedIncrement_nf:
+ case ARM::BI_InterlockedIncrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
+ case ARM::BI_InterlockedDecrement16_acq:
+ case ARM::BI_InterlockedDecrement_acq:
+ case ARM::BI_InterlockedDecrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
+ case ARM::BI_InterlockedDecrement16_rel:
+ case ARM::BI_InterlockedDecrement_rel:
+ case ARM::BI_InterlockedDecrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
+ case ARM::BI_InterlockedDecrement16_nf:
+ case ARM::BI_InterlockedDecrement_nf:
+ case ARM::BI_InterlockedDecrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
}
// Get the last argument, which specifies the vector type.
@@ -6576,11 +6932,33 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
}
+ if (BuiltinID == AArch64::BI__getReg) {
+ Expr::EvalResult Result;
+ if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
+ llvm_unreachable("Sema will ensure that the parameter is constant");
+
+ llvm::APSInt Value = Result.Val.getInt();
+ LLVMContext &Context = CGM.getLLVMContext();
+ std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10);
+
+ llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
+ llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
+ llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
+
+ llvm::Value *F =
+ CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
+ return Builder.CreateCall(F, Metadata);
+ }
+
if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
return Builder.CreateCall(F);
}
+ if (BuiltinID == AArch64::BI_ReadWriteBarrier)
+ return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
+ llvm::SyncScope::SingleThread);
+
// CRC32
Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
@@ -6643,6 +7021,48 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
}
+ if (BuiltinID == AArch64::BI_ReadStatusReg ||
+ BuiltinID == AArch64::BI_WriteStatusReg) {
+ LLVMContext &Context = CGM.getLLVMContext();
+
+ unsigned SysReg =
+ E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
+
+ std::string SysRegStr;
+ llvm::raw_string_ostream(SysRegStr) <<
+ ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
+ ((SysReg >> 11) & 7) << ":" <<
+ ((SysReg >> 7) & 15) << ":" <<
+ ((SysReg >> 3) & 15) << ":" <<
+ ( SysReg & 7);
+
+ llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
+ llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
+ llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
+
+ llvm::Type *RegisterType = Int64Ty;
+ llvm::Type *ValueType = Int32Ty;
+ llvm::Type *Types[] = { RegisterType };
+
+ if (BuiltinID == AArch64::BI_ReadStatusReg) {
+ llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
+ llvm::Value *Call = Builder.CreateCall(F, Metadata);
+
+ return Builder.CreateTrunc(Call, ValueType);
+ }
+
+ llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
+ llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
+ ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
+
+ return Builder.CreateCall(F, { Metadata, ArgValue });
+ }
+
+ if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
+ llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
+ return Builder.CreateCall(F);
+ }
+
// Find out if any arguments are required to be integer constant
// expressions.
unsigned ICEArguments = 0;
@@ -6738,7 +7158,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvth_f16_u32:
case NEON::BI__builtin_neon_vcvth_f16_u64:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_f16_s16:
case NEON::BI__builtin_neon_vcvth_f16_s32:
case NEON::BI__builtin_neon_vcvth_f16_s64: {
@@ -6758,7 +7178,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vcvth_u16_f16:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s16_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
@@ -6768,7 +7188,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vcvth_u32_f16:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s32_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
@@ -6778,7 +7198,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vcvth_u64_f16:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s64_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
@@ -8493,6 +8913,129 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
case AArch64::BI_InterlockedIncrement64:
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
+ case AArch64::BI_InterlockedExchangeAdd8_acq:
+ case AArch64::BI_InterlockedExchangeAdd16_acq:
+ case AArch64::BI_InterlockedExchangeAdd_acq:
+ case AArch64::BI_InterlockedExchangeAdd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
+ case AArch64::BI_InterlockedExchangeAdd8_rel:
+ case AArch64::BI_InterlockedExchangeAdd16_rel:
+ case AArch64::BI_InterlockedExchangeAdd_rel:
+ case AArch64::BI_InterlockedExchangeAdd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
+ case AArch64::BI_InterlockedExchangeAdd8_nf:
+ case AArch64::BI_InterlockedExchangeAdd16_nf:
+ case AArch64::BI_InterlockedExchangeAdd_nf:
+ case AArch64::BI_InterlockedExchangeAdd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
+ case AArch64::BI_InterlockedExchange8_acq:
+ case AArch64::BI_InterlockedExchange16_acq:
+ case AArch64::BI_InterlockedExchange_acq:
+ case AArch64::BI_InterlockedExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
+ case AArch64::BI_InterlockedExchange8_rel:
+ case AArch64::BI_InterlockedExchange16_rel:
+ case AArch64::BI_InterlockedExchange_rel:
+ case AArch64::BI_InterlockedExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
+ case AArch64::BI_InterlockedExchange8_nf:
+ case AArch64::BI_InterlockedExchange16_nf:
+ case AArch64::BI_InterlockedExchange_nf:
+ case AArch64::BI_InterlockedExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
+ case AArch64::BI_InterlockedCompareExchange8_acq:
+ case AArch64::BI_InterlockedCompareExchange16_acq:
+ case AArch64::BI_InterlockedCompareExchange_acq:
+ case AArch64::BI_InterlockedCompareExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
+ case AArch64::BI_InterlockedCompareExchange8_rel:
+ case AArch64::BI_InterlockedCompareExchange16_rel:
+ case AArch64::BI_InterlockedCompareExchange_rel:
+ case AArch64::BI_InterlockedCompareExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
+ case AArch64::BI_InterlockedCompareExchange8_nf:
+ case AArch64::BI_InterlockedCompareExchange16_nf:
+ case AArch64::BI_InterlockedCompareExchange_nf:
+ case AArch64::BI_InterlockedCompareExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
+ case AArch64::BI_InterlockedOr8_acq:
+ case AArch64::BI_InterlockedOr16_acq:
+ case AArch64::BI_InterlockedOr_acq:
+ case AArch64::BI_InterlockedOr64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
+ case AArch64::BI_InterlockedOr8_rel:
+ case AArch64::BI_InterlockedOr16_rel:
+ case AArch64::BI_InterlockedOr_rel:
+ case AArch64::BI_InterlockedOr64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
+ case AArch64::BI_InterlockedOr8_nf:
+ case AArch64::BI_InterlockedOr16_nf:
+ case AArch64::BI_InterlockedOr_nf:
+ case AArch64::BI_InterlockedOr64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
+ case AArch64::BI_InterlockedXor8_acq:
+ case AArch64::BI_InterlockedXor16_acq:
+ case AArch64::BI_InterlockedXor_acq:
+ case AArch64::BI_InterlockedXor64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
+ case AArch64::BI_InterlockedXor8_rel:
+ case AArch64::BI_InterlockedXor16_rel:
+ case AArch64::BI_InterlockedXor_rel:
+ case AArch64::BI_InterlockedXor64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
+ case AArch64::BI_InterlockedXor8_nf:
+ case AArch64::BI_InterlockedXor16_nf:
+ case AArch64::BI_InterlockedXor_nf:
+ case AArch64::BI_InterlockedXor64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
+ case AArch64::BI_InterlockedAnd8_acq:
+ case AArch64::BI_InterlockedAnd16_acq:
+ case AArch64::BI_InterlockedAnd_acq:
+ case AArch64::BI_InterlockedAnd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
+ case AArch64::BI_InterlockedAnd8_rel:
+ case AArch64::BI_InterlockedAnd16_rel:
+ case AArch64::BI_InterlockedAnd_rel:
+ case AArch64::BI_InterlockedAnd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
+ case AArch64::BI_InterlockedAnd8_nf:
+ case AArch64::BI_InterlockedAnd16_nf:
+ case AArch64::BI_InterlockedAnd_nf:
+ case AArch64::BI_InterlockedAnd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
+ case AArch64::BI_InterlockedIncrement16_acq:
+ case AArch64::BI_InterlockedIncrement_acq:
+ case AArch64::BI_InterlockedIncrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
+ case AArch64::BI_InterlockedIncrement16_rel:
+ case AArch64::BI_InterlockedIncrement_rel:
+ case AArch64::BI_InterlockedIncrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
+ case AArch64::BI_InterlockedIncrement16_nf:
+ case AArch64::BI_InterlockedIncrement_nf:
+ case AArch64::BI_InterlockedIncrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
+ case AArch64::BI_InterlockedDecrement16_acq:
+ case AArch64::BI_InterlockedDecrement_acq:
+ case AArch64::BI_InterlockedDecrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
+ case AArch64::BI_InterlockedDecrement16_rel:
+ case AArch64::BI_InterlockedDecrement_rel:
+ case AArch64::BI_InterlockedDecrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
+ case AArch64::BI_InterlockedDecrement16_nf:
+ case AArch64::BI_InterlockedDecrement_nf:
+ case AArch64::BI_InterlockedDecrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
+
+ case AArch64::BI_InterlockedAdd: {
+ Value *Arg0 = EmitScalarExpr(E->getArg(0));
+ Value *Arg1 = EmitScalarExpr(E->getArg(1));
+ AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Add, Arg0, Arg1,
+ llvm::AtomicOrdering::SequentiallyConsistent);
+ return Builder.CreateAdd(RMWI, Arg1);
+ }
}
}
@@ -8947,7 +9490,7 @@ static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, const CallExpr *E,
Value *Res;
if (IsAddition) {
// ADDUS: a > (a+b) ? ~0 : (a+b)
- // If Ops[0] > Add, overflow occured.
+ // If Ops[0] > Add, overflow occurred.
Value *Add = CGF.Builder.CreateAdd(Ops[0], Ops[1]);
Value *ICmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Add);
Value *Max = llvm::Constant::getAllOnesValue(ResultType);
@@ -9018,17 +9561,17 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
return EmitX86CpuSupports(FeatureStr);
}
-uint32_t
+uint64_t
CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
// Processor features and mapping to processor feature value.
- uint32_t FeaturesMask = 0;
+ uint64_t FeaturesMask = 0;
for (const StringRef &FeatureStr : FeatureStrs) {
unsigned Feature =
StringSwitch<unsigned>(FeatureStr)
#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
#include "llvm/Support/X86TargetParser.def"
;
- FeaturesMask |= (1U << Feature);
+ FeaturesMask |= (1ULL << Feature);
}
return FeaturesMask;
}
@@ -9037,31 +9580,54 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs));
}
-llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) {
- // Matching the struct layout from the compiler-rt/libgcc structure that is
- // filled in:
- // unsigned int __cpu_vendor;
- // unsigned int __cpu_type;
- // unsigned int __cpu_subtype;
- // unsigned int __cpu_features[1];
- llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
- llvm::ArrayType::get(Int32Ty, 1));
-
- // Grab the global __cpu_model.
- llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
+ uint32_t Features1 = Lo_32(FeaturesMask);
+ uint32_t Features2 = Hi_32(FeaturesMask);
+
+ Value *Result = Builder.getTrue();
+
+ if (Features1 != 0) {
+ // Matching the struct layout from the compiler-rt/libgcc structure that is
+ // filled in:
+ // unsigned int __cpu_vendor;
+ // unsigned int __cpu_type;
+ // unsigned int __cpu_subtype;
+ // unsigned int __cpu_features[1];
+ llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
+ llvm::ArrayType::get(Int32Ty, 1));
+
+ // Grab the global __cpu_model.
+ llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+
+ // Grab the first (0th) element from the field __cpu_features off of the
+ // global in the struct STy.
+ Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
+ Builder.getInt32(0)};
+ Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
+ Value *Features =
+ Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
+
+ // Check the value of the bit corresponding to the feature requested.
+ Value *Mask = Builder.getInt32(Features1);
+ Value *Bitset = Builder.CreateAnd(Features, Mask);
+ Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
+ Result = Builder.CreateAnd(Result, Cmp);
+ }
+
+ if (Features2 != 0) {
+ llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
+ "__cpu_features2");
+ Value *Features =
+ Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4));
+
+ // Check the value of the bit corresponding to the feature requested.
+ Value *Mask = Builder.getInt32(Features2);
+ Value *Bitset = Builder.CreateAnd(Features, Mask);
+ Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
+ Result = Builder.CreateAnd(Result, Cmp);
+ }
- // Grab the first (0th) element from the field __cpu_features off of the
- // global in the struct STy.
- Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3),
- ConstantInt::get(Int32Ty, 0)};
- Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
- Value *Features =
- Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
-
- // Check the value of the bit corresponding to the feature requested.
- Value *Bitset = Builder.CreateAnd(
- Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
- return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
+ return Result;
}
Value *CodeGenFunction::EmitX86CpuInit() {
@@ -10425,30 +10991,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
case X86::BI__builtin_ia32_addcarryx_u32:
case X86::BI__builtin_ia32_addcarryx_u64:
- case X86::BI__builtin_ia32_addcarry_u32:
- case X86::BI__builtin_ia32_addcarry_u64:
case X86::BI__builtin_ia32_subborrow_u32:
case X86::BI__builtin_ia32_subborrow_u64: {
Intrinsic::ID IID;
switch (BuiltinID) {
default: llvm_unreachable("Unsupported intrinsic!");
case X86::BI__builtin_ia32_addcarryx_u32:
- IID = Intrinsic::x86_addcarryx_u32;
+ IID = Intrinsic::x86_addcarry_32;
break;
case X86::BI__builtin_ia32_addcarryx_u64:
- IID = Intrinsic::x86_addcarryx_u64;
- break;
- case X86::BI__builtin_ia32_addcarry_u32:
- IID = Intrinsic::x86_addcarry_u32;
- break;
- case X86::BI__builtin_ia32_addcarry_u64:
- IID = Intrinsic::x86_addcarry_u64;
+ IID = Intrinsic::x86_addcarry_64;
break;
case X86::BI__builtin_ia32_subborrow_u32:
- IID = Intrinsic::x86_subborrow_u32;
+ IID = Intrinsic::x86_subborrow_32;
break;
case X86::BI__builtin_ia32_subborrow_u64:
- IID = Intrinsic::x86_subborrow_u64;
+ IID = Intrinsic::x86_subborrow_64;
break;
}
@@ -11314,12 +11872,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
- case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
- llvm::SmallVector<llvm::Value *, 5> Args;
- for (unsigned I = 0; I != 5; ++I)
+ case AMDGPU::BI__builtin_amdgcn_mov_dpp:
+ case AMDGPU::BI__builtin_amdgcn_update_dpp: {
+ llvm::SmallVector<llvm::Value *, 6> Args;
+ for (unsigned I = 0; I != E->getNumArgs(); ++I)
Args.push_back(EmitScalarExpr(E->getArg(I)));
- Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
- Args[0]->getType());
+ assert(Args.size() == 5 || Args.size() == 6);
+ if (Args.size() == 5)
+ Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
+ Value *F =
+ CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
case AMDGPU::BI__builtin_amdgcn_div_fixup:
@@ -11685,7 +12247,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {X, Y, M4Value});
}
- // Vector intrisincs that output the post-instruction CC value.
+ // Vector intrinsics that output the post-instruction CC value.
#define INTRINSIC_WITH_CC(NAME) \
case SystemZ::BI__builtin_##NAME: \
@@ -12145,7 +12707,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
bool isColMajor = isColMajorArg.getSExtValue();
unsigned IID;
unsigned NumResults = 8;
- // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet
+ // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet
// for some reason nvcc builtins use _c_.
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
@@ -12423,6 +12985,191 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify);
return Builder.CreateCall(Callee, {Addr, Count});
}
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ResT = ConvertType(E->getType());
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
+ {ResT, Src->getType()});
+ return Builder.CreateCall(Callee, {Src});
+ }
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ResT = ConvertType(E->getType());
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
+ {ResT, Src->getType()});
+ return Builder.CreateCall(Callee, {Src});
+ }
+ case WebAssembly::BI__builtin_wasm_min_f32:
+ case WebAssembly::BI__builtin_wasm_min_f64:
+ case WebAssembly::BI__builtin_wasm_min_f32x4:
+ case WebAssembly::BI__builtin_wasm_min_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::minimum,
+ ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_max_f32:
+ case WebAssembly::BI__builtin_wasm_max_f64:
+ case WebAssembly::BI__builtin_wasm_max_f32x4:
+ case WebAssembly::BI__builtin_wasm_max_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::maximum,
+ ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
+ llvm::APSInt LaneConst;
+ if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
+ llvm_unreachable("Constant arg isn't actually constant?");
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
+ Value *Extract = Builder.CreateExtractElement(Vec, Lane);
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
+ return Builder.CreateSExt(Extract, ConvertType(E->getType()));
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
+ return Builder.CreateZExt(Extract, ConvertType(E->getType()));
+ case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f64x2:
+ return Extract;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ }
+ case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i16x8:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
+ llvm::APSInt LaneConst;
+ if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
+ llvm_unreachable("Constant arg isn't actually constant?");
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
+ Value *Val = EmitScalarExpr(E->getArg(2));
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: {
+ llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType();
+ Value *Trunc = Builder.CreateTrunc(Val, ElemType);
+ return Builder.CreateInsertElement(Vec, Trunc, Lane);
+ }
+ case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f64x2:
+ return Builder.CreateInsertElement(Vec, Val, Lane);
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ }
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: {
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
+ IntNo = Intrinsic::sadd_sat;
+ break;
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
+ IntNo = Intrinsic::uadd_sat;
+ break;
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
+ IntNo = Intrinsic::wasm_sub_saturate_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8:
+ IntNo = Intrinsic::wasm_sub_saturate_unsigned;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_bitselect: {
+ Value *V1 = EmitScalarExpr(E->getArg(0));
+ Value *V2 = EmitScalarExpr(E->getArg(1));
+ Value *C = EmitScalarExpr(E->getArg(2));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect,
+ ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {V1, V2, C});
+ }
+ case WebAssembly::BI__builtin_wasm_any_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_any_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_any_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_any_true_i64x2:
+ case WebAssembly::BI__builtin_wasm_all_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_all_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_all_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_any_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_any_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_any_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_any_true_i64x2:
+ IntNo = Intrinsic::wasm_anytrue;
+ break;
+ case WebAssembly::BI__builtin_wasm_all_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_all_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_all_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_all_true_i64x2:
+ IntNo = Intrinsic::wasm_alltrue;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
+ return Builder.CreateCall(Callee, {Vec});
+ }
+ case WebAssembly::BI__builtin_wasm_abs_f32x4:
+ case WebAssembly::BI__builtin_wasm_abs_f64x2: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
+ return Builder.CreateCall(Callee, {Vec});
+ }
+ case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
+ case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
+ return Builder.CreateCall(Callee, {Vec});
+ }
default:
return nullptr;
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 3f3f2c5e43..1c578bd151 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -137,7 +137,7 @@ CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
: CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
TheModule(CGM.getModule()),
- RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) {
+ RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
@@ -353,8 +353,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// global variable and save a reference in GpuBinaryHandle to be cleaned up
// in destructor on exit. Then associate all known kernels with the GPU binary
// handle so CUDA runtime can figure out what to call on the GPU side.
- std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary;
- if (!IsHIP) {
+ std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr;
+ if (!CudaGpuBinaryFileName.empty()) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
@@ -388,15 +388,23 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
ModuleIDSectionName = "__hip_module_id";
ModuleIDPrefix = "__hip_";
- // For HIP, create an external symbol __hip_fatbin in section .hip_fatbin.
- // The external symbol is supposed to contain the fat binary but will be
- // populated somewhere else, e.g. by lld through link script.
- FatBinStr = new llvm::GlobalVariable(
+ if (CudaGpuBinary) {
+ // If fatbin is available from early finalization, create a string
+ // literal containing the fat binary loaded from the given file.
+ FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
+ FatbinConstantName, 8);
+ } else {
+ // If fatbin is not available, create an external symbol
+ // __hip_fatbin in section .hip_fatbin. The external symbol is supposed
+ // to contain the fat binary but will be populated somewhere else,
+ // e.g. by lld through link script.
+ FatBinStr = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty,
/*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
"__hip_fatbin", nullptr,
llvm::GlobalVariable::NotThreadLocal);
- cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
+ cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
+ }
FatMagic = HIPFatMagic;
} else {
@@ -447,6 +455,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// thread safety of the loaded program. Therefore we can assume sequential
// execution of constructor functions here.
if (IsHIP) {
+ auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage :
+ llvm::GlobalValue::LinkOnceAnyLinkage;
llvm::BasicBlock *IfBlock =
llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
llvm::BasicBlock *ExitBlock =
@@ -455,12 +465,13 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// of HIP ABI.
GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, /*isConstant=*/false,
- llvm::GlobalValue::LinkOnceAnyLinkage,
+ Linkage,
/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
"__hip_gpubin_handle");
GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
// Prevent the weak symbol in different shared libraries being merged.
- GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ if (Linkage != llvm::GlobalValue::InternalLinkage)
+ GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
Address GpuBinaryAddr(
GpuBinaryHandle,
CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
@@ -509,7 +520,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// Generate a unique module ID.
SmallString<64> ModuleID;
llvm::raw_svector_ostream OS(ModuleID);
- OS << ModuleIDPrefix << llvm::format("%x", FatbinWrapper->getGUID());
+ OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
llvm::Constant *ModuleIDConstant =
makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32);
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index d5945be434..8b0733fbec 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -23,7 +23,7 @@
#include "clang/AST/Mangle.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtCXX.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
using namespace clang;
using namespace CodeGen;
@@ -276,7 +276,7 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF,
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt");
llvm::Value *VFunc =
CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes);
- CGCallee Callee(GD.getDecl()->getCanonicalDecl(), VFunc);
+ CGCallee Callee(GD, VFunc);
return Callee;
}
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 8857ffdde4..64e18e171e 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -23,11 +23,11 @@
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/SwiftCallingConv.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -59,6 +59,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
case CC_X86Pascal: return llvm::CallingConv::C;
// TODO: Add support for __vectorcall to LLVM.
case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
+ case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall;
case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
@@ -67,11 +68,13 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
}
}
-/// Derives the 'this' type for codegen purposes, i.e. ignoring method
+/// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR
/// qualification.
-/// FIXME: address space qualification?
-static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD) {
+static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD,
+ const CXXMethodDecl *MD) {
QualType RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal();
+ if (MD)
+ RecTy = Context.getAddrSpaceQualType(RecTy, MD->getType().getAddressSpace());
return Context.getPointerType(CanQualType::CreateUnsafe(RecTy));
}
@@ -214,6 +217,9 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
if (PcsAttr *PCS = D->getAttr<PcsAttr>())
return (PCS->getPCS() == PcsAttr::AAPCS ? CC_AAPCS : CC_AAPCS_VFP);
+ if (D->hasAttr<AArch64VectorPcsAttr>())
+ return CC_AArch64VectorCall;
+
if (D->hasAttr<IntelOclBiccAttr>())
return CC_IntelOclBicc;
@@ -246,7 +252,7 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
// Add the 'this' pointer.
if (RD)
- argTypes.push_back(GetThisType(Context, RD));
+ argTypes.push_back(GetThisType(Context, RD, MD));
else
argTypes.push_back(Context.VoidPtrTy);
@@ -302,7 +308,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
SmallVector<CanQualType, 16> argTypes;
SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
- argTypes.push_back(GetThisType(Context, MD->getParent()));
+ argTypes.push_back(GetThisType(Context, MD->getParent(), MD));
bool PassParams = true;
@@ -529,7 +535,7 @@ const CGFunctionInfo &
CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) {
assert(MD->isVirtual() && "only methods have thunks");
CanQual<FunctionProtoType> FTP = GetFormalType(MD);
- CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) };
+ CanQualType ArgTys[] = { GetThisType(Context, MD->getParent(), MD) };
return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false,
/*chainCall=*/false, ArgTys,
FTP->getExtInfo(), {}, RequiredArgs(1));
@@ -543,7 +549,7 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD,
CanQual<FunctionProtoType> FTP = GetFormalType(CD);
SmallVector<CanQualType, 2> ArgTys;
const CXXRecordDecl *RD = CD->getParent();
- ArgTys.push_back(GetThisType(Context, RD));
+ ArgTys.push_back(GetThisType(Context, RD, CD));
if (CT == Ctor_CopyingClosure)
ArgTys.push_back(*FTP->param_type_begin());
if (RD->getNumVBases() > 0)
@@ -741,8 +747,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
FunctionType::ExtInfo info,
ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
RequiredArgs required) {
- assert(std::all_of(argTypes.begin(), argTypes.end(),
- [](CanQualType T) { return T.isCanonicalAsParam(); }));
+ assert(llvm::all_of(argTypes,
+ [](CanQualType T) { return T.isCanonicalAsParam(); }));
// Lookup or create unique function info.
llvm::FoldingSetNodeID ID;
@@ -1253,8 +1259,8 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
// Otherwise do coercion through memory. This is stupid, but simple.
Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
- Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
- Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy);
+ Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
+ Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty);
CGF.Builder.CreateMemCpy(Casted, SrcCasted,
llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
false);
@@ -1335,8 +1341,8 @@ static void CreateCoercedStore(llvm::Value *Src,
// to that information.
Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
CGF.Builder.CreateStore(Src, Tmp);
- Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
- Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy);
+ Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
+ Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty);
CGF.Builder.CreateMemCpy(DstCasted, Casted,
llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
false);
@@ -1709,6 +1715,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
if (CodeGenOpts.DisableRedZone)
FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
+ if (CodeGenOpts.IndirectTlsSegRefs)
+ FuncAttrs.addAttribute("indirect-tls-seg-refs");
if (CodeGenOpts.NoImplicitFloat)
FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
@@ -1785,6 +1793,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
if (CodeGenOpts.Backchain)
FuncAttrs.addAttribute("backchain");
+ // FIXME: The interaction of this attribute with the SLH command line flag
+ // has not been determined.
if (CodeGenOpts.SpeculativeLoadHardening)
FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
}
@@ -1831,7 +1841,7 @@ void CodeGenModule::ConstructAttributeList(
AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
CalleeInfo.getCalleeFunctionProtoType());
- const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
+ const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
bool HasOptnone = false;
// FIXME: handle sseregparm someday...
@@ -1848,6 +1858,8 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
if (TargetDecl->hasAttr<ConvergentAttr>())
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+ if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>())
+ FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
AddAttributesFromFunctionProtoType(
@@ -1939,7 +1951,7 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute("disable-tail-calls",
llvm::toStringRef(DisableTailCalls));
- GetCPUAndFeaturesAttributes(TargetDecl, FuncAttrs);
+ GetCPUAndFeaturesAttributes(CalleeInfo.getCalleeDecl(), FuncAttrs);
}
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
@@ -3066,8 +3078,9 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
QualType type = param->getType();
- assert(!isInAllocaArgument(CGM.getCXXABI(), type) &&
- "cannot emit delegate call arguments for inalloca arguments!");
+ if (isInAllocaArgument(CGM.getCXXABI(), type)) {
+ CGM.ErrorUnsupported(param, "forwarded non-trivially copyable parameter");
+ }
// GetAddrOfLocalVar returns a pointer-to-pointer for references,
// but the argument needs to be the original pointer.
@@ -3948,15 +3961,28 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
} else if (I->hasLValue()) {
auto LV = I->getKnownLValue();
auto AS = LV.getAddressSpace();
+
if ((!ArgInfo.getIndirectByVal() &&
(LV.getAlignment() >=
- getContext().getTypeAlignInChars(I->Ty))) ||
- (ArgInfo.getIndirectByVal() &&
- ((AS != LangAS::Default && AS != LangAS::opencl_private &&
- AS != CGM.getASTAllocaAddressSpace())))) {
+ getContext().getTypeAlignInChars(I->Ty)))) {
+ NeedCopy = true;
+ }
+ if (!getLangOpts().OpenCL) {
+ if ((ArgInfo.getIndirectByVal() &&
+ (AS != LangAS::Default &&
+ AS != CGM.getASTAllocaAddressSpace()))) {
+ NeedCopy = true;
+ }
+ }
+ // For OpenCL even if RV is located in default or alloca address space
+ // we don't want to perform address space cast for it.
+ else if ((ArgInfo.getIndirectByVal() &&
+ Addr.getType()->getAddressSpace() != IRFuncTy->
+ getParamType(FirstIRArg)->getPointerAddressSpace())) {
NeedCopy = true;
}
}
+
if (NeedCopy) {
// Create an aligned temporary, and copy to it.
Address AI = CreateMemTempWithoutCast(
@@ -4238,6 +4264,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
}
#endif
+ // Update the largest vector width if any arguments have vector types.
+ for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
+ if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+ }
+
// Compute the calling convention and attributes.
unsigned CallingConv;
llvm::AttributeList Attrs;
@@ -4251,8 +4284,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Apply always_inline to all calls within flatten functions.
// FIXME: should this really take priority over __try, below?
if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
- !(Callee.getAbstractInfo().getCalleeDecl() &&
- Callee.getAbstractInfo().getCalleeDecl()->hasAttr<NoInlineAttr>())) {
+ !(Callee.getAbstractInfo().getCalleeDecl().getDecl() &&
+ Callee.getAbstractInfo()
+ .getCalleeDecl()
+ .getDecl()
+ ->hasAttr<NoInlineAttr>())) {
Attrs =
Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
llvm::Attribute::AlwaysInline);
@@ -4318,6 +4354,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (!CI->getType()->isVoidTy())
CI->setName("call");
+ // Update largest vector width from the return type.
+ if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+
// Insert instrumentation or attach profile metadata at indirect call sites.
// For more details, see the comment before the definition of
// IPVK_IndirectCallTarget in InstrProfData.inc.
@@ -4332,7 +4373,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Suppress tail calls if requested.
if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) {
- const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl();
+ const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl();
if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>())
Call->setTailCallKind(llvm::CallInst::TCK_NoTail);
}
@@ -4479,7 +4520,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
} ();
// Emit the assume_aligned check on the return value.
- const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl();
+ const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl();
if (Ret.isScalar() && TargetDecl) {
if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) {
llvm::Value *OffsetValue = nullptr;
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 99a36e4e12..c300808bea 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -46,21 +46,21 @@ class CGCalleeInfo {
/// The function prototype of the callee.
const FunctionProtoType *CalleeProtoTy;
/// The function declaration of the callee.
- const Decl *CalleeDecl;
+ GlobalDecl CalleeDecl;
public:
- explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {}
- CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl)
+ explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl() {}
+ CGCalleeInfo(const FunctionProtoType *calleeProtoTy, GlobalDecl calleeDecl)
: CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {}
CGCalleeInfo(const FunctionProtoType *calleeProtoTy)
- : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {}
- CGCalleeInfo(const Decl *calleeDecl)
+ : CalleeProtoTy(calleeProtoTy), CalleeDecl() {}
+ CGCalleeInfo(GlobalDecl calleeDecl)
: CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {}
const FunctionProtoType *getCalleeFunctionProtoType() const {
return CalleeProtoTy;
}
- const Decl *getCalleeDecl() const { return CalleeDecl; }
+ const GlobalDecl getCalleeDecl() const { return CalleeDecl; }
};
/// All available information about a concrete callee.
@@ -171,7 +171,7 @@ public:
}
CGCalleeInfo getAbstractInfo() const {
if (isVirtual())
- return VirtualInfo.MD.getDecl();
+ return VirtualInfo.MD;
assert(isOrdinary());
return AbstractInfo;
}
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 468d81cbbb..cfc912cc9a 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -16,14 +16,15 @@
#include "CGDebugInfo.h"
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
+#include "TargetInfo.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/EvaluatedExprVisitor.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtCXX.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Transforms/Utils/SanitizerStats.h"
@@ -2012,8 +2013,19 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
bool NewPointerIsChecked) {
CallArgList Args;
+ LangAS SlotAS = E->getType().getAddressSpace();
+ QualType ThisType = D->getThisType(getContext());
+ LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace();
+ llvm::Value *ThisPtr = This.getPointer();
+ if (SlotAS != ThisAS) {
+ unsigned TargetThisAS = getContext().getTargetAddressSpace(ThisAS);
+ llvm::Type *NewType =
+ ThisPtr->getType()->getPointerElementType()->getPointerTo(TargetThisAS);
+ ThisPtr = getTargetHooks().performAddrSpaceCast(*this, This.getPointer(),
+ ThisAS, SlotAS, NewType);
+ }
// Push the this ptr.
- Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
+ Args.add(RValue::get(ThisPtr), D->getThisType(getContext()));
// If this is a trivial constructor, emit a memcpy now before we lose
// the alignment information on the argument.
@@ -2122,7 +2134,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
CGM.getAddrOfCXXStructor(D, getFromCtorType(Type));
const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall(
Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs);
- CGCallee Callee = CGCallee::forDirect(CalleePtr, D);
+ CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type));
EmitCall(Info, Callee, ReturnValueSlot(), Args);
// Generate vtable assumptions if we're constructing a complete object
@@ -2808,7 +2820,7 @@ void CodeGenFunction::EmitForwardingCallToLambda(
// variadic arguments.
// Now emit our call.
- auto callee = CGCallee::forDirect(calleePtr, callOperator);
+ auto callee = CGCallee::forDirect(calleePtr, GlobalDecl(callOperator));
RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, callArgs);
// If necessary, copy the returned value into the slot.
@@ -2839,7 +2851,7 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() {
CallArgList CallArgs;
QualType ThisType = getContext().getPointerType(getContext().getRecordType(Lambda));
- Address ThisPtr = GetAddrOfBlockDecl(variable, false);
+ Address ThisPtr = GetAddrOfBlockDecl(variable);
CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType);
// Add the rest of the parameters.
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 0a766d1762..3743d24f11 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -366,7 +366,7 @@ static llvm::SwitchInst *TransitionToCleanupSwitch(CodeGenFunction &CGF,
llvm::BasicBlock *Block) {
// If it's a branch, turn it into a switch whose default
// destination is its original target.
- llvm::TerminatorInst *Term = Block->getTerminator();
+ llvm::Instruction *Term = Block->getTerminator();
assert(Term && "can't transition block without terminator");
if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) {
@@ -589,7 +589,7 @@ static void ForwardPrebranchedFallthrough(llvm::BasicBlock *Exit,
llvm::BasicBlock *To) {
// Exit is the exit block of a cleanup, so it always terminates in
// an unconditional branch or a switch.
- llvm::TerminatorInst *Term = Exit->getTerminator();
+ llvm::Instruction *Term = Exit->getTerminator();
if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) {
assert(Br->isUnconditional() && Br->getSuccessor(0) == From);
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 7d6eb83f12..f3a07a30eb 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -25,10 +25,10 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Version.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Frontend/FrontendOptions.h"
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Lex/ModuleMap.h"
@@ -41,6 +41,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MD5.h"
@@ -180,8 +181,7 @@ void CGDebugInfo::setLocation(SourceLocation Loc) {
SourceManager &SM = CGM.getContext().getSourceManager();
auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
PresumedLoc PCLoc = SM.getPresumedLoc(CurLoc);
-
- if (PCLoc.isInvalid() || Scope->getFilename() == PCLoc.getFilename())
+ if (PCLoc.isInvalid() || Scope->getFile() == getOrCreateFile(CurLoc))
return;
if (auto *LBF = dyn_cast<llvm::DILexicalBlockFile>(Scope)) {
@@ -220,7 +220,7 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context,
if (const auto *RDecl = dyn_cast<RecordDecl>(Context))
if (!RDecl->isDependentType())
return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl),
- getOrCreateMainFile());
+ TheCU->getFile());
return Default;
}
@@ -234,6 +234,9 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const {
if (CGM.getCodeGenOpts().EmitCodeView)
PP.MSVCFormatting = true;
+ // Apply -fdebug-prefix-map.
+ PP.RemapFilePaths = true;
+ PP.remapPath = [this](StringRef Path) { return remapDIPath(Path); };
return PP;
}
@@ -401,19 +404,18 @@ Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM,
llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
if (!Loc.isValid())
// If Location is not valid then use main input file.
- return getOrCreateMainFile();
+ return TheCU->getFile();
SourceManager &SM = CGM.getContext().getSourceManager();
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
- if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty())
+ StringRef FileName = PLoc.getFilename();
+ if (PLoc.isInvalid() || FileName.empty())
// If the location is not valid then use main input file.
- return getOrCreateMainFile();
+ return TheCU->getFile();
// Cache the results.
- const char *fname = PLoc.getFilename();
- auto It = DIFileCache.find(fname);
-
+ auto It = DIFileCache.find(FileName.data());
if (It != DIFileCache.end()) {
// Verify that the information still exists.
if (llvm::Metadata *V = It->second)
@@ -426,22 +428,48 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
if (CSKind)
CSInfo.emplace(*CSKind, Checksum);
-
- llvm::DIFile *F = DBuilder.createFile(
- remapDIPath(PLoc.getFilename()), remapDIPath(getCurrentDirname()), CSInfo,
- getSource(SM, SM.getFileID(Loc)));
-
- DIFileCache[fname].reset(F);
+ return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc)));
+}
+
+llvm::DIFile *
+CGDebugInfo::createFile(StringRef FileName,
+ Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
+ Optional<StringRef> Source) {
+ StringRef Dir;
+ StringRef File;
+ std::string RemappedFile = remapDIPath(FileName);
+ std::string CurDir = remapDIPath(getCurrentDirname());
+ SmallString<128> DirBuf;
+ SmallString<128> FileBuf;
+ if (llvm::sys::path::is_absolute(RemappedFile)) {
+ // Strip the common prefix (if it is more than just "/") from current
+ // directory and FileName for a more space-efficient encoding.
+ auto FileIt = llvm::sys::path::begin(RemappedFile);
+ auto FileE = llvm::sys::path::end(RemappedFile);
+ auto CurDirIt = llvm::sys::path::begin(CurDir);
+ auto CurDirE = llvm::sys::path::end(CurDir);
+ for (; CurDirIt != CurDirE && *CurDirIt == *FileIt; ++CurDirIt, ++FileIt)
+ llvm::sys::path::append(DirBuf, *CurDirIt);
+ if (std::distance(llvm::sys::path::begin(CurDir), CurDirIt) == 1) {
+ // The common prefix only the root; stripping it would cause
+ // LLVM diagnostic locations to be more confusing.
+ Dir = {};
+ File = RemappedFile;
+ } else {
+ for (; FileIt != FileE; ++FileIt)
+ llvm::sys::path::append(FileBuf, *FileIt);
+ Dir = DirBuf;
+ File = FileBuf;
+ }
+ } else {
+ Dir = CurDir;
+ File = RemappedFile;
+ }
+ llvm::DIFile *F = DBuilder.createFile(File, Dir, CSInfo, Source);
+ DIFileCache[FileName.data()].reset(F);
return F;
}
-llvm::DIFile *CGDebugInfo::getOrCreateMainFile() {
- return DBuilder.createFile(
- remapDIPath(TheCU->getFilename()), remapDIPath(TheCU->getDirectory()),
- TheCU->getFile()->getChecksum(),
- CGM.getCodeGenOpts().EmbedSource ? TheCU->getSource() : None);
-}
-
std::string CGDebugInfo::remapDIPath(StringRef Path) const {
for (const auto &Entry : DebugPrefixMap)
if (Path.startswith(Entry.first))
@@ -527,11 +555,11 @@ void CGDebugInfo::CreateCompileUnit() {
llvm::dwarf::SourceLanguage LangTag;
const LangOptions &LO = CGM.getLangOpts();
if (LO.CPlusPlus) {
- if (LO.ObjC1)
+ if (LO.ObjC)
LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus;
else
LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
- } else if (LO.ObjC1) {
+ } else if (LO.ObjC) {
LangTag = llvm::dwarf::DW_LANG_ObjC;
} else if (LO.RenderScript) {
LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript;
@@ -545,7 +573,7 @@ void CGDebugInfo::CreateCompileUnit() {
// Figure out which version of the ObjC runtime we have.
unsigned RuntimeVers = 0;
- if (LO.ObjC1)
+ if (LO.ObjC)
RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1;
llvm::DICompileUnit::DebugEmissionKind EmissionKind;
@@ -566,26 +594,33 @@ void CGDebugInfo::CreateCompileUnit() {
break;
}
+ uint64_t DwoId = 0;
+ auto &CGOpts = CGM.getCodeGenOpts();
+ // The DIFile used by the CU is distinct from the main source
+ // file. Its directory part specifies what becomes the
+ // DW_AT_comp_dir (the compilation directory), even if the source
+ // file was specified with an absolute path.
if (CSKind)
CSInfo.emplace(*CSKind, Checksum);
+ llvm::DIFile *CUFile = DBuilder.createFile(
+ remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), CSInfo,
+ getSource(SM, SM.getMainFileID()));
// Create new compile unit.
- // FIXME - Eliminate TheCU.
- auto &CGOpts = CGM.getCodeGenOpts();
TheCU = DBuilder.createCompileUnit(
- LangTag,
- DBuilder.createFile(remapDIPath(MainFileName),
- remapDIPath(getCurrentDirname()), CSInfo,
- getSource(SM, SM.getMainFileID())),
- CGOpts.EmitVersionIdentMetadata ? Producer : "",
+ LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "",
LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO,
CGOpts.DwarfDebugFlags, RuntimeVers,
- CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind,
- 0 /* DWOid */, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling,
+ (CGOpts.getSplitDwarfMode() != CodeGenOptions::NoFission)
+ ? ""
+ : CGOpts.SplitDwarfFile,
+ EmissionKind, DwoId, CGOpts.SplitDwarfInlining,
+ CGOpts.DebugInfoForProfiling,
CGM.getTarget().getTriple().isNVPTX()
? llvm::DICompileUnit::DebugNameTableKind::None
: static_cast<llvm::DICompileUnit::DebugNameTableKind>(
- CGOpts.DebugNameTable));
+ CGOpts.DebugNameTable),
+ CGOpts.DebugRangesBaseAddress);
}
llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
@@ -603,9 +638,9 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return nullptr;
case BuiltinType::ObjCClass:
if (!ClassTy)
- ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
- "objc_class", TheCU,
- getOrCreateMainFile(), 0);
+ ClassTy =
+ DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+ "objc_class", TheCU, TheCU->getFile(), 0);
return ClassTy;
case BuiltinType::ObjCId: {
// typedef struct objc_class *Class;
@@ -617,21 +652,21 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return ObjTy;
if (!ClassTy)
- ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
- "objc_class", TheCU,
- getOrCreateMainFile(), 0);
+ ClassTy =
+ DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+ "objc_class", TheCU, TheCU->getFile(), 0);
unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
auto *ISATy = DBuilder.createPointerType(ClassTy, Size);
- ObjTy = DBuilder.createStructType(
- TheCU, "objc_object", getOrCreateMainFile(), 0, 0, 0,
- llvm::DINode::FlagZero, nullptr, llvm::DINodeArray());
+ ObjTy = DBuilder.createStructType(TheCU, "objc_object", TheCU->getFile(), 0,
+ 0, 0, llvm::DINode::FlagZero, nullptr,
+ llvm::DINodeArray());
DBuilder.replaceArrays(
ObjTy, DBuilder.getOrCreateArray(&*DBuilder.createMemberType(
- ObjTy, "isa", getOrCreateMainFile(), 0, Size, 0, 0,
+ ObjTy, "isa", TheCU->getFile(), 0, Size, 0, 0,
llvm::DINode::FlagZero, ISATy)));
return ObjTy;
}
@@ -639,7 +674,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
if (!SelTy)
SelTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
"objc_selector", TheCU,
- getOrCreateMainFile(), 0);
+ TheCU->getFile(), 0);
return SelTy;
}
@@ -658,6 +693,10 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy);
case BuiltinType::OCLReserveID:
return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy);
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ case BuiltinType::Id: \
+ return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty);
+#include "clang/Basic/OpenCLExtensionTypes.def"
case BuiltinType::UChar:
case BuiltinType::Char_U:
@@ -956,7 +995,7 @@ llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name,
if (Cache)
return Cache;
Cache = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, Name,
- TheCU, getOrCreateMainFile(), 0);
+ TheCU, TheCU->getFile(), 0);
unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
Cache = DBuilder.createPointerType(Cache, Size);
return Cache;
@@ -1093,6 +1132,7 @@ static unsigned getDwarfCC(CallingConv CC) {
case CC_X86_64SysV:
return llvm::dwarf::DW_CC_LLVM_X86_64SysV;
case CC_AAPCS:
+ case CC_AArch64VectorCall:
return llvm::dwarf::DW_CC_LLVM_AAPCS;
case CC_AAPCS_VFP:
return llvm::dwarf::DW_CC_LLVM_AAPCS_VFP;
@@ -1485,16 +1525,16 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
// Collect virtual method info.
llvm::DIType *ContainingType = nullptr;
- unsigned Virtuality = 0;
unsigned VIndex = 0;
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
int ThisAdjustment = 0;
if (Method->isVirtual()) {
if (Method->isPure())
- Virtuality = llvm::dwarf::DW_VIRTUALITY_pure_virtual;
+ SPFlags |= llvm::DISubprogram::SPFlagPureVirtual;
else
- Virtuality = llvm::dwarf::DW_VIRTUALITY_virtual;
+ SPFlags |= llvm::DISubprogram::SPFlagVirtual;
if (CGM.getTarget().getCXXABI().isItaniumFamily()) {
// It doesn't make sense to give a virtual destructor a vtable index,
@@ -1546,12 +1586,13 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
Flags |= llvm::DINode::FlagLValueReference;
if (Method->getRefQualifier() == RQ_RValue)
Flags |= llvm::DINode::FlagRValueReference;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
llvm::DISubprogram *SP = DBuilder.createMethod(
RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine,
- MethodTy, /*isLocalToUnit=*/false, /*isDefinition=*/false, Virtuality,
- VIndex, ThisAdjustment, ContainingType, Flags, CGM.getLangOpts().Optimize,
+ MethodTy, VIndex, ThisAdjustment, ContainingType, Flags, SPFlags,
TParamsArray.get());
SPCache[Method->getCanonicalDecl()].reset(SP);
@@ -1776,6 +1817,29 @@ CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD,
return llvm::DINodeArray();
}
+llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL,
+ llvm::DIFile *Unit) {
+ if (auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL)) {
+ auto T = TS->getSpecializedTemplateOrPartial();
+ auto TA = TS->getTemplateArgs().asArray();
+ // Collect parameters for a partial specialization
+ if (T.is<VarTemplatePartialSpecializationDecl *>()) {
+ const TemplateParameterList *TList =
+ T.get<VarTemplatePartialSpecializationDecl *>()
+ ->getTemplateParameters();
+ return CollectTemplateParams(TList, TA, Unit);
+ }
+
+ // Collect parameters for an explicit specialization
+ if (T.is<VarTemplateDecl *>()) {
+ const TemplateParameterList *TList = T.get<VarTemplateDecl *>()
+ ->getTemplateParameters();
+ return CollectTemplateParams(TList, TA, Unit);
+ }
+ }
+ return llvm::DINodeArray();
+}
+
llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams(
const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) {
// Always get the full list of parameters, not just the ones from
@@ -1931,8 +1995,17 @@ static bool isDefinedInClangModule(const RecordDecl *RD) {
if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) {
if (!CXXDecl->isCompleteDefinition())
return false;
+ // Check wether RD is a template.
auto TemplateKind = CXXDecl->getTemplateSpecializationKind();
if (TemplateKind != TSK_Undeclared) {
+ // Unfortunately getOwningModule() isn't accurate enough to find the
+ // owning module of a ClassTemplateSpecializationDecl that is inside a
+ // namespace spanning multiple modules.
+ bool Explicit = false;
+ if (auto *TD = dyn_cast<ClassTemplateSpecializationDecl>(CXXDecl))
+ Explicit = TD->isExplicitInstantiationOrSpecialization();
+ if (!Explicit && CXXDecl->getEnclosingNamespaceContext())
+ return false;
// This is a template, check the origin of the first member.
if (CXXDecl->field_begin() == CXXDecl->field_end())
return TemplateKind == TSK_ExplicitInstantiationDeclaration;
@@ -2480,9 +2553,9 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
Count = CAT->getSize().getZExtValue();
else if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) {
if (Expr *Size = VAT->getSizeExpr()) {
- llvm::APSInt V;
- if (Size->EvaluateAsInt(V, CGM.getContext()))
- Count = V.getExtValue();
+ Expr::EvalResult Result;
+ if (Size->EvaluateAsInt(Result, CGM.getContext()))
+ Count = Result.Val.getInt().getExtValue();
}
}
@@ -2548,9 +2621,9 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
const FunctionProtoType *FPT =
Ty->getPointeeType()->getAs<FunctionProtoType>();
return DBuilder.createMemberPointerType(
- getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType(
- Ty->getClass(), FPT->getTypeQuals())),
- FPT, U),
+ getOrCreateInstanceMethodType(
+ CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()),
+ FPT, U),
ClassType, Size, /*Align=*/0, Flags);
}
@@ -3070,6 +3143,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
unsigned &LineNo, QualType &T,
StringRef &Name, StringRef &LinkageName,
+ llvm::MDTuple *&TemplateParameters,
llvm::DIScope *&VDContext) {
Unit = getOrCreateFile(VD->getLocation());
LineNo = getLineNumber(VD->getLocation());
@@ -3093,6 +3167,13 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
if (LinkageName == Name)
LinkageName = StringRef();
+ if (isa<VarTemplateSpecializationDecl>(VD)) {
+ llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VD, &*Unit);
+ TemplateParameters = parameterNodes.get();
+ } else {
+ TemplateParameters = nullptr;
+ }
+
// Since we emit declarations (DW_AT_members) for static members, place the
// definition of those static members in the namespace they were declared in
// in the source code (the lexical decl context).
@@ -3119,6 +3200,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
llvm::DINodeArray TParamsArray;
StringRef Name, LinkageName;
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
SourceLocation Loc = GD.getDecl()->getLocation();
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *DContext = Unit;
@@ -3135,20 +3217,23 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
QualType FnType = CGM.getContext().getFunctionType(
FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
+ if (!FD->isExternallyVisible())
+ SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
+
if (Stub) {
+ Flags |= getCallSiteRelatedAttrs();
+ SPFlags |= llvm::DISubprogram::SPFlagDefinition;
return DBuilder.createFunction(
DContext, Name, LinkageName, Unit, Line,
- getOrCreateFunctionType(GD.getDecl(), FnType, Unit),
- !FD->isExternallyVisible(),
- /* isDefinition = */ true, 0, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags,
TParamsArray.get(), getFunctionDeclaration(FD));
}
llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl(
DContext, Name, LinkageName, Unit, Line,
- getOrCreateFunctionType(GD.getDecl(), FnType, Unit),
- !FD->isExternallyVisible(),
- /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags,
TParamsArray.get(), getFunctionDeclaration(FD));
const FunctionDecl *CanonDecl = FD->getCanonicalDecl();
FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
@@ -3173,12 +3258,14 @@ CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) {
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *DContext = Unit;
unsigned Line = getLineNumber(Loc);
+ llvm::MDTuple *TemplateParameters = nullptr;
- collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, DContext);
+ collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, TemplateParameters,
+ DContext);
auto Align = getDeclAlignIfRequired(VD, CGM.getContext());
auto *GV = DBuilder.createTempGlobalVariableFwdDecl(
DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit),
- !VD->isExternallyVisible(), nullptr, Align);
+ !VD->isExternallyVisible(), nullptr, TemplateParameters, Align);
FwdDeclReplaceMap.emplace_back(
std::piecewise_construct,
std::make_tuple(cast<VarDecl>(VD->getCanonicalDecl())),
@@ -3334,6 +3421,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
bool HasDecl = (D != nullptr);
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *FDContext = Unit;
llvm::DINodeArray TParamsArray;
@@ -3373,6 +3461,15 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
if (CurFuncIsThunk)
Flags |= llvm::DINode::FlagThunk;
+ if (Fn->hasLocalLinkage())
+ SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
+
+ llvm::DINode::DIFlags FlagsForDef = Flags | getCallSiteRelatedAttrs();
+ llvm::DISubprogram::DISPFlags SPFlagsForDef =
+ SPFlags | llvm::DISubprogram::SPFlagDefinition;
+
unsigned LineNo = getLineNumber(Loc);
unsigned ScopeLine = getLineNumber(ScopeLoc);
@@ -3383,9 +3480,8 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
// are emitted as CU level entities by the backend.
llvm::DISubprogram *SP = DBuilder.createFunction(
FDContext, Name, LinkageName, Unit, LineNo,
- getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(),
- true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
- TParamsArray.get(), getFunctionDeclaration(D));
+ getOrCreateFunctionType(D, FnType, Unit), ScopeLine, FlagsForDef,
+ SPFlagsForDef, TParamsArray.get(), getFunctionDeclaration(D));
Fn->setSubprogram(SP);
// We might get here with a VarDecl in the case we're generating
// code for the initialization of globals. Do not record these decls
@@ -3405,8 +3501,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
cast<llvm::DICompositeType>(It->second);
llvm::DISubprogram *FD = DBuilder.createFunction(
InterfaceDecl, Name, LinkageName, Unit, LineNo,
- getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(),
- false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags,
TParamsArray.get());
DBuilder.finalizeSubprogram(FD);
ObjCMethodCache[ID].push_back(FD);
@@ -3455,11 +3550,13 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
}
unsigned LineNo = getLineNumber(Loc);
unsigned ScopeLine = 0;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
DBuilder.retainType(DBuilder.createFunction(
FDContext, Name, LinkageName, Unit, LineNo,
- getOrCreateFunctionType(D, FnType, Unit), false /*internalLinkage*/,
- false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags,
TParamsArray.get(), getFunctionDeclaration(D)));
}
@@ -3488,7 +3585,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
// Update our current location
setLocation(Loc);
- if (CurLoc.isInvalid() || CurLoc.isMacroID())
+ if (CurLoc.isInvalid() || CurLoc.isMacroID() || LexicalBlockStack.empty())
return;
llvm::MDNode *Scope = LexicalBlockStack.back();
@@ -4089,7 +4186,9 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
unsigned LineNo;
StringRef DeclName, LinkageName;
QualType T;
- collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName, DContext);
+ llvm::MDTuple *TemplateParameters = nullptr;
+ collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName,
+ TemplateParameters, DContext);
// Attempt to store one global variable for the declaration - even if we
// emit a lot of fields.
@@ -4115,7 +4214,8 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
Var->hasLocalLinkage(),
Expr.empty() ? nullptr : DBuilder.createExpression(Expr),
- getOrCreateStaticDataMemberDeclarationOrNull(D), Align);
+ getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters,
+ Align);
Var->addDebugInfo(GVE);
}
DeclCache[D->getCanonicalDecl()].reset(GVE);
@@ -4172,10 +4272,19 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
InitExpr = DBuilder.createConstantValueExpression(
Init.getFloat().bitcastToAPInt().getZExtValue());
}
+
+ llvm::MDTuple *TemplateParameters = nullptr;
+
+ if (isa<VarTemplateSpecializationDecl>(VD))
+ if (VarD) {
+ llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VarD, &*Unit);
+ TemplateParameters = parameterNodes.get();
+ }
+
GV.reset(DBuilder.createGlobalVariableExpression(
DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty,
true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD),
- Align));
+ TemplateParameters, Align));
}
llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
@@ -4364,7 +4473,7 @@ void CGDebugInfo::EmitExplicitCastType(QualType Ty) {
if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
return;
- if (auto *DieTy = getOrCreateType(Ty, getOrCreateMainFile()))
+ if (auto *DieTy = getOrCreateType(Ty, TheCU->getFile()))
// Don't ignore in case of explicit cast where it is referenced indirectly.
DBuilder.retainType(DieTy);
}
@@ -4376,3 +4485,22 @@ llvm::DebugLoc CGDebugInfo::SourceLocToDebugLoc(SourceLocation Loc) {
llvm::MDNode *Scope = LexicalBlockStack.back();
return llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), Scope);
}
+
+llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const {
+ // Call site-related attributes are only useful in optimized programs, and
+ // when there's a possibility of debugging backtraces.
+ if (!CGM.getLangOpts().Optimize || DebugKind == codegenoptions::NoDebugInfo ||
+ DebugKind == codegenoptions::LocTrackingOnly)
+ return llvm::DINode::FlagZero;
+
+ // Call site-related attributes are available in DWARF v5. Some debuggers,
+ // while not fully DWARF v5-compliant, may accept these attributes as if they
+ // were part of DWARF v4.
+ bool SupportsDWARFv4Ext =
+ CGM.getCodeGenOpts().DwarfVersion == 4 &&
+ CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB;
+ if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5)
+ return llvm::DINode::FlagZero;
+
+ return llvm::DINode::FlagAllCallsDescribed;
+}
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 8641c2d896..031e40b9dd 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -20,8 +20,8 @@
#include "clang/AST/ExternalASTSource.h"
#include "clang/AST/Type.h"
#include "clang/AST/TypeOrdering.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/SourceLocation.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
@@ -76,6 +76,9 @@ class CGDebugInfo {
llvm::DIType *OCLQueueDITy = nullptr;
llvm::DIType *OCLNDRangeDITy = nullptr;
llvm::DIType *OCLReserveIDDITy = nullptr;
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ llvm::DIType *Id##Ty = nullptr;
+#include "clang/Basic/OpenCLExtensionTypes.def"
/// Cache of previously constructed Types.
llvm::DenseMap<const void *, llvm::TrackingMDRef> TypeCache;
@@ -248,6 +251,11 @@ class CGDebugInfo {
llvm::DINodeArray CollectFunctionTemplateParams(const FunctionDecl *FD,
llvm::DIFile *Unit);
+ /// A helper function to collect debug info for function template
+ /// parameters.
+ llvm::DINodeArray CollectVarTemplateParams(const VarDecl *VD,
+ llvm::DIFile *Unit);
+
/// A helper function to collect debug info for template
/// parameters.
llvm::DINodeArray
@@ -333,6 +341,9 @@ public:
void finalize();
+ /// Remap a given path with the current debug prefix map
+ std::string remapDIPath(StringRef) const;
+
/// Register VLA size expression debug node with the qualified type.
void registerVLASizeExpression(QualType Ty, llvm::Metadata *SizeExpr) {
SizeExprCache[Ty] = SizeExpr;
@@ -520,9 +531,6 @@ private:
/// Create new compile unit.
void CreateCompileUnit();
- /// Remap a given path with the current debug prefix map
- std::string remapDIPath(StringRef) const;
-
/// Compute the file checksum debug info for input file ID.
Optional<llvm::DIFile::ChecksumKind>
computeChecksum(FileID FID, SmallString<32> &Checksum) const;
@@ -530,11 +538,15 @@ private:
/// Get the source of the given file ID.
Optional<StringRef> getSource(const SourceManager &SM, FileID FID);
- /// Get the file debug info descriptor for the input location.
+ /// Convenience function to get the file debug info descriptor for the input
+ /// location.
llvm::DIFile *getOrCreateFile(SourceLocation Loc);
- /// Get the file info for main compile unit.
- llvm::DIFile *getOrCreateMainFile();
+ /// Create a file debug info descriptor for a source file.
+ llvm::DIFile *
+ createFile(StringRef FileName,
+ Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
+ Optional<StringRef> Source);
/// Get the type from the cache or create a new type if necessary.
llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg);
@@ -603,6 +615,11 @@ private:
unsigned LineNo, StringRef LinkageName,
llvm::GlobalVariable *Var, llvm::DIScope *DContext);
+
+ /// Return flags which enable debug info emission for call sites, provided
+ /// that it is supported and enabled.
+ llvm::DINode::DIFlags getCallSiteRelatedAttrs() const;
+
/// Get the printing policy for producing names for debug info.
PrintingPolicy getPrintingPolicy() const;
@@ -645,7 +662,9 @@ private:
/// Collect various properties of a VarDecl.
void collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
unsigned &LineNo, QualType &T, StringRef &Name,
- StringRef &LinkageName, llvm::DIScope *&VDContext);
+ StringRef &LinkageName,
+ llvm::MDTuple *&TemplateParameters,
+ llvm::DIScope *&VDContext);
/// Allocate a copy of \p A using the DebugInfoNames allocator
/// and return a reference to it. If multiple arguments are given the strings
@@ -725,7 +744,7 @@ public:
/// function \p InlinedFn. The current debug location becomes the inlined call
/// site of the inlined function.
ApplyInlineDebugLocation(CodeGenFunction &CGF, GlobalDecl InlinedFn);
- /// Restore everything back to the orginial state.
+ /// Restore everything back to the original state.
~ApplyInlineDebugLocation();
};
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index e74066ef43..f4fef45a12 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -26,10 +26,10 @@
#include "clang/AST/Decl.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclOpenMP.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalVariable.h"
@@ -754,9 +754,9 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
// If we're emitting a value with lifetime, we have to do the
// initialization *before* we leave the cleanup scopes.
- if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) {
- enterFullExpression(ewc);
- init = ewc->getSubExpr();
+ if (const FullExpr *fe = dyn_cast<FullExpr>(init)) {
+ enterFullExpression(fe);
+ init = fe->getSubExpr();
}
CodeGenFunction::RunCleanupsScope Scope(*this);
@@ -963,6 +963,49 @@ static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init,
return llvm::isBytewiseValue(Init);
}
+static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D,
+ CGBuilderTy &Builder,
+ llvm::Constant *Constant,
+ CharUnits Align) {
+ auto FunctionName = [&](const DeclContext *DC) -> std::string {
+ if (const auto *FD = dyn_cast<FunctionDecl>(DC)) {
+ if (const auto *CC = dyn_cast<CXXConstructorDecl>(FD))
+ return CC->getNameAsString();
+ if (const auto *CD = dyn_cast<CXXDestructorDecl>(FD))
+ return CD->getNameAsString();
+ return CGM.getMangledName(FD);
+ } else if (const auto *OM = dyn_cast<ObjCMethodDecl>(DC)) {
+ return OM->getNameAsString();
+ } else if (isa<BlockDecl>(DC)) {
+ return "<block>";
+ } else if (isa<CapturedDecl>(DC)) {
+ return "<captured>";
+ } else {
+ llvm::llvm_unreachable_internal("expected a function or method");
+ }
+ };
+
+ auto *Ty = Constant->getType();
+ bool isConstant = true;
+ llvm::GlobalVariable *InsertBefore = nullptr;
+ unsigned AS = CGM.getContext().getTargetAddressSpace(
+ CGM.getStringLiteralAddressSpace());
+ llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+ CGM.getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage,
+ Constant,
+ "__const." + FunctionName(D.getParentFunctionOrMethod()) + "." +
+ D.getName(),
+ InsertBefore, llvm::GlobalValue::NotThreadLocal, AS);
+ GV->setAlignment(Align.getQuantity());
+ GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+ Address SrcPtr = Address(GV, Align);
+ llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), AS);
+ if (SrcPtr.getType() != BP)
+ SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
+ return SrcPtr;
+}
+
static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
Address Loc, bool isVolatile,
CGBuilderTy &Builder,
@@ -1002,25 +1045,10 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
return;
}
- // Otherwise, create a temporary global with the initializer then memcpy from
- // the global to the alloca.
- std::string Name = getStaticDeclName(CGM, D);
- unsigned AS = CGM.getContext().getTargetAddressSpace(
- CGM.getStringLiteralAddressSpace());
- llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), AS);
-
- llvm::GlobalVariable *GV = new llvm::GlobalVariable(
- CGM.getModule(), constant->getType(), true,
- llvm::GlobalValue::PrivateLinkage, constant, Name, nullptr,
- llvm::GlobalValue::NotThreadLocal, AS);
- GV->setAlignment(Loc.getAlignment().getQuantity());
- GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-
- Address SrcPtr = Address(GV, Loc.getAlignment());
- if (SrcPtr.getType() != BP)
- SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
-
- Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile);
+ Builder.CreateMemCpy(
+ Loc,
+ createUnnamedGlobalFrom(CGM, D, Builder, constant, Loc.getAlignment()),
+ SizeVal, isVolatile);
}
/// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a
@@ -1066,6 +1094,7 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
// For each dimension stores its QualType and corresponding
// size-expression Value.
SmallVector<CodeGenFunction::VlaSizePair, 4> Dimensions;
+ SmallVector<IdentifierInfo *, 4> VLAExprNames;
// Break down the array into individual dimensions.
QualType Type1D = D.getType();
@@ -1074,8 +1103,14 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
Dimensions.emplace_back(C, Type1D.getUnqualifiedType());
else {
- auto SizeExprAddr = CreateDefaultAlignTempAlloca(
- VlaSize.NumElts->getType(), "__vla_expr");
+ // Generate a locally unique name for the size expression.
+ Twine Name = Twine("__vla_expr") + Twine(VLAExprCounter++);
+ SmallString<12> Buffer;
+ StringRef NameRef = Name.toStringRef(Buffer);
+ auto &Ident = getContext().Idents.getOwn(NameRef);
+ VLAExprNames.push_back(&Ident);
+ auto SizeExprAddr =
+ CreateDefaultAlignTempAlloca(VlaSize.NumElts->getType(), NameRef);
Builder.CreateStore(VlaSize.NumElts, SizeExprAddr);
Dimensions.emplace_back(SizeExprAddr.getPointer(),
Type1D.getUnqualifiedType());
@@ -1089,20 +1124,20 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
// Register each dimension's size-expression with a DILocalVariable,
// so that it can be used by CGDebugInfo when instantiating a DISubrange
// to describe this array.
+ unsigned NameIdx = 0;
for (auto &VlaSize : Dimensions) {
llvm::Metadata *MD;
if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
MD = llvm::ConstantAsMetadata::get(C);
else {
// Create an artificial VarDecl to generate debug info for.
- IdentifierInfo &NameIdent = getContext().Idents.getOwn(
- cast<llvm::AllocaInst>(VlaSize.NumElts)->getName());
+ IdentifierInfo *NameIdent = VLAExprNames[NameIdx++];
auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType();
auto QT = getContext().getIntTypeForBitwidth(
VlaExprTy->getScalarSizeInBits(), false);
auto *ArtificialDecl = VarDecl::Create(
getContext(), const_cast<DeclContext *>(D.getDeclContext()),
- D.getLocation(), D.getLocation(), &NameIdent, QT,
+ D.getLocation(), D.getLocation(), NameIdent, QT,
getContext().CreateTypeSourceInfo(QT), SC_Auto);
ArtificialDecl->setImplicit();
@@ -1125,8 +1160,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
AutoVarEmission emission(D);
- bool isByRef = D.hasAttr<BlocksAttr>();
- emission.IsByRef = isByRef;
+ bool isEscapingByRef = D.isEscapingByref();
+ emission.IsEscapingByRef = isEscapingByRef;
CharUnits alignment = getContext().getDeclAlign(&D);
@@ -1165,8 +1200,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// in OpenCL.
if ((!getLangOpts().OpenCL ||
Ty.getAddressSpace() == LangAS::opencl_constant) &&
- (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef &&
- CGM.isTypeConstant(Ty, true))) {
+ (CGM.getCodeGenOpts().MergeAllConstants && !NRVO &&
+ !isEscapingByRef && CGM.isTypeConstant(Ty, true))) {
EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
// Signal this condition to later callbacks.
@@ -1218,7 +1253,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
} else {
CharUnits allocaAlignment;
llvm::Type *allocaTy;
- if (isByRef) {
+ if (isEscapingByRef) {
auto &byrefInfo = getBlockByrefInfo(&D);
allocaTy = byrefInfo.Type;
allocaAlignment = byrefInfo.ByrefAlignment;
@@ -1418,7 +1453,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
}
// Initialize the structure of a __block variable.
- if (emission.IsByRef)
+ if (emission.IsEscapingByRef)
emitByrefStructureInit(emission);
// Initialize the variable here if it doesn't have a initializer and it is a
@@ -1428,7 +1463,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
type.isNonTrivialToPrimitiveDefaultInitialize() ==
QualType::PDIK_Struct) {
LValue Dst = MakeAddrLValue(emission.getAllocatedAddress(), type);
- if (emission.IsByRef)
+ if (emission.IsEscapingByRef)
drillIntoBlockVariable(*this, Dst, &D);
defaultInitNonTrivialCStructVar(Dst);
return;
@@ -1440,7 +1475,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
// Check whether this is a byref variable that's potentially
// captured and moved by its own initializer. If so, we'll need to
// emit the initializer first, then copy into the variable.
- bool capturedByInit = emission.IsByRef && isCapturedBy(D, Init);
+ bool capturedByInit = emission.IsEscapingByRef && isCapturedBy(D, Init);
Address Loc =
capturedByInit ? emission.Addr : emission.getObjectAddress(*this);
@@ -1634,7 +1669,8 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
// If this is a block variable, call _Block_object_destroy
// (on the unforwarded address). Don't enter this cleanup if we're in pure-GC
// mode.
- if (emission.IsByRef && CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
+ if (emission.IsEscapingByRef &&
+ CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
if (emission.Variable->getType().isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
@@ -2149,5 +2185,5 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
}
void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) {
- //Do nothing - here to avoid build errors
+ getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D);
}
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 3f96fea608..9aa31f181e 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -15,7 +15,7 @@
#include "CGCXXABI.h"
#include "CGObjCRuntime.h"
#include "CGOpenMPRuntime.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
@@ -26,7 +26,10 @@ using namespace CodeGen;
static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
ConstantAddress DeclPtr) {
- assert(D.hasGlobalStorage() && "VarDecl must have global storage!");
+ assert(
+ (D.hasGlobalStorage() ||
+ (D.hasLocalStorage() && CGF.getContext().getLangOpts().OpenCLCPlusPlus)) &&
+ "VarDecl must have global or local (in the case of OpenCL) storage!");
assert(!D.getType()->isReferenceType() &&
"Should not call EmitDeclInit on a reference!");
@@ -63,15 +66,24 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
/// Emit code to cause the destruction of the given variable with
/// static storage duration.
static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
- ConstantAddress addr) {
+ ConstantAddress Addr) {
+ // Honor __attribute__((no_destroy)) and bail instead of attempting
+ // to emit a reference to a possibly nonexistent destructor, which
+ // in turn can cause a crash. This will result in a global constructor
+ // that isn't balanced out by a destructor call as intended by the
+ // attribute. This also checks for -fno-c++-static-destructors and
+ // bails even if the attribute is not present.
+ if (D.isNoDestroy(CGF.getContext()))
+ return;
+
CodeGenModule &CGM = CGF.CGM;
// FIXME: __attribute__((cleanup)) ?
- QualType type = D.getType();
- QualType::DestructionKind dtorKind = type.isDestructedType();
+ QualType Type = D.getType();
+ QualType::DestructionKind DtorKind = Type.isDestructedType();
- switch (dtorKind) {
+ switch (DtorKind) {
case QualType::DK_none:
return;
@@ -86,13 +98,14 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
return;
}
- llvm::Constant *function;
- llvm::Constant *argument;
+ llvm::Constant *Func;
+ llvm::Constant *Argument;
// Special-case non-array C++ destructors, if they have the right signature.
// Under some ABIs, destructors return this instead of void, and cannot be
- // passed directly to __cxa_atexit if the target does not allow this mismatch.
- const CXXRecordDecl *Record = type->getAsCXXRecordDecl();
+ // passed directly to __cxa_atexit if the target does not allow this
+ // mismatch.
+ const CXXRecordDecl *Record = Type->getAsCXXRecordDecl();
bool CanRegisterDestructor =
Record && (!CGM.getCXXABI().HasThisReturn(
GlobalDecl(Record->getDestructor(), Dtor_Complete)) ||
@@ -103,43 +116,47 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
bool UsingExternalHelper = !CGM.getCodeGenOpts().CXAAtExit;
if (Record && (CanRegisterDestructor || UsingExternalHelper)) {
assert(!Record->hasTrivialDestructor());
- CXXDestructorDecl *dtor = Record->getDestructor();
+ CXXDestructorDecl *Dtor = Record->getDestructor();
- function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete);
- argument = llvm::ConstantExpr::getBitCast(
- addr.getPointer(), CGF.getTypes().ConvertType(type)->getPointerTo());
+ Func = CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete);
+ Argument = llvm::ConstantExpr::getBitCast(
+ Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo());
// Otherwise, the standard logic requires a helper function.
} else {
- function = CodeGenFunction(CGM)
- .generateDestroyHelper(addr, type, CGF.getDestroyer(dtorKind),
- CGF.needsEHCleanup(dtorKind), &D);
- argument = llvm::Constant::getNullValue(CGF.Int8PtrTy);
+ Func = CodeGenFunction(CGM)
+ .generateDestroyHelper(Addr, Type, CGF.getDestroyer(DtorKind),
+ CGF.needsEHCleanup(DtorKind), &D);
+ Argument = llvm::Constant::getNullValue(CGF.Int8PtrTy);
}
- CGM.getCXXABI().registerGlobalDtor(CGF, D, function, argument);
+ CGM.getCXXABI().registerGlobalDtor(CGF, D, Func, Argument);
}
/// Emit code to cause the variable at the given address to be considered as
/// constant from this point onwards.
static void EmitDeclInvariant(CodeGenFunction &CGF, const VarDecl &D,
llvm::Constant *Addr) {
+ return CGF.EmitInvariantStart(
+ Addr, CGF.getContext().getTypeSizeInChars(D.getType()));
+}
+
+void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) {
// Do not emit the intrinsic if we're not optimizing.
- if (!CGF.CGM.getCodeGenOpts().OptimizationLevel)
+ if (!CGM.getCodeGenOpts().OptimizationLevel)
return;
// Grab the llvm.invariant.start intrinsic.
llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start;
// Overloaded address space type.
- llvm::Type *ObjectPtr[1] = {CGF.Int8PtrTy};
- llvm::Constant *InvariantStart = CGF.CGM.getIntrinsic(InvStartID, ObjectPtr);
+ llvm::Type *ObjectPtr[1] = {Int8PtrTy};
+ llvm::Constant *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr);
// Emit a call with the size in bytes of the object.
- CharUnits WidthChars = CGF.getContext().getTypeSizeInChars(D.getType());
- uint64_t Width = WidthChars.getQuantity();
- llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(CGF.Int64Ty, Width),
- llvm::ConstantExpr::getBitCast(Addr, CGF.Int8PtrTy)};
- CGF.Builder.CreateCall(InvariantStart, Args);
+ uint64_t Width = Size.getQuantity();
+ llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width),
+ llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)};
+ Builder.CreateCall(InvariantStart, Args);
}
void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D,
@@ -360,11 +377,21 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
Fn->addFnAttr(llvm::Attribute::ShadowCallStack);
auto RASignKind = getCodeGenOpts().getSignReturnAddress();
- if (RASignKind != CodeGenOptions::SignReturnAddressScope::None)
+ if (RASignKind != CodeGenOptions::SignReturnAddressScope::None) {
Fn->addFnAttr("sign-return-address",
RASignKind == CodeGenOptions::SignReturnAddressScope::All
? "all"
: "non-leaf");
+ auto RASignKey = getCodeGenOpts().getSignReturnAddressKey();
+ Fn->addFnAttr("sign-return-address-key",
+ RASignKey == CodeGenOptions::SignReturnAddressKeyValue::AKey
+ ? "a_key"
+ : "b_key");
+ }
+
+ if (getCodeGenOpts().BranchTargetEnforcement)
+ Fn->addFnAttr("branch-target-enforcement");
+
return Fn;
}
@@ -597,7 +624,7 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
void
CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
ArrayRef<llvm::Function *> Decls,
- Address Guard) {
+ ConstantAddress Guard) {
{
auto NL = ApplyDebugLocation::CreateEmpty(*this);
StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
@@ -621,6 +648,12 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
// initializers use previously-initialized thread_local vars, that's
// probably supposed to be OK, but the standard doesn't say.
Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard);
+
+ // The guard variable can't ever change again.
+ EmitInvariantStart(
+ Guard.getPointer(),
+ CharUnits::fromQuantity(
+ CGM.getDataLayout().getTypeAllocSize(GuardVal->getType())));
}
RunCleanupsScope Scope(*this);
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 4ee835259a..f1298d1201 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -66,7 +66,7 @@ llvm::Constant *CodeGenModule::getTerminateFn() {
name = "__std_terminate";
else
name = "?terminate@@YAXXZ";
- } else if (getLangOpts().ObjC1 &&
+ } else if (getLangOpts().ObjC &&
getLangOpts().ObjCRuntime.hasTerminate())
name = "objc_terminate";
else
@@ -224,7 +224,7 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM,
if (FD && FD->usesSEHTry())
return getSEHPersonalityMSVC(T);
- if (L.ObjC1)
+ if (L.ObjC)
return L.CPlusPlus ? getObjCXXPersonality(Target, L)
: getObjCPersonality(Target, L);
return L.CPlusPlus ? getCXXPersonality(Target, L)
@@ -250,7 +250,11 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM,
static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM,
const EHPersonality &Personality) {
llvm::Constant *Fn = getPersonalityFn(CGM, Personality);
- return llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
+ llvm::PointerType* Int8PtrTy = llvm::PointerType::get(
+ llvm::Type::getInt8Ty(CGM.getLLVMContext()),
+ CGM.getDataLayout().getProgramAddressSpace());
+
+ return llvm::ConstantExpr::getBitCast(Fn, Int8PtrTy);
}
/// Check whether a landingpad instruction only uses C++ features.
@@ -315,7 +319,7 @@ static bool PersonalityHasOnlyCXXUses(llvm::Constant *Fn) {
/// when it really needs it.
void CodeGenModule::SimplifyPersonality() {
// If we're not in ObjC++ -fexceptions, there's nothing to do.
- if (!LangOpts.CPlusPlus || !LangOpts.ObjC1 || !LangOpts.Exceptions)
+ if (!LangOpts.CPlusPlus || !LangOpts.ObjC || !LangOpts.Exceptions)
return;
// Both the problem this endeavors to fix and the way the logic
@@ -1248,7 +1252,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
// we follow the false destination for each of the cond branches to reach
// the rethrow block.
llvm::BasicBlock *RethrowBlock = WasmCatchStartBlock;
- while (llvm::TerminatorInst *TI = RethrowBlock->getTerminator()) {
+ while (llvm::Instruction *TI = RethrowBlock->getTerminator()) {
auto *BI = cast<llvm::BranchInst>(TI);
assert(BI->isConditional());
RethrowBlock = BI->getSuccessor(1);
@@ -1874,7 +1878,7 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
OutlinedStmt->getBeginLoc(), OutlinedStmt->getBeginLoc());
CurSEHParent = ParentCGF.CurSEHParent;
- CGM.SetLLVMFunctionAttributes(nullptr, FnInfo, CurFn);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, CurFn);
EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter);
}
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index b74937e9ca..6ef1091cc0 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -26,7 +26,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/NSAPI.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DataLayout.h"
@@ -1260,6 +1260,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
return EmitVAArgExprLValue(cast<VAArgExpr>(E));
case Expr::DeclRefExprClass:
return EmitDeclRefLValue(cast<DeclRefExpr>(E));
+ case Expr::ConstantExprClass:
+ return EmitLValue(cast<ConstantExpr>(E)->getSubExpr());
case Expr::ParenExprClass:
return EmitLValue(cast<ParenExpr>(E)->getSubExpr());
case Expr::GenericSelectionExprClass:
@@ -1491,6 +1493,16 @@ CodeGenFunction::tryEmitAsConstant(const MemberExpr *ME) {
return ConstantEmission();
}
+llvm::Value *CodeGenFunction::emitScalarConstant(
+ const CodeGenFunction::ConstantEmission &Constant, Expr *E) {
+ assert(Constant && "not a constant");
+ if (Constant.isReference())
+ return EmitLoadOfLValue(Constant.getReferenceLValue(*this, E),
+ E->getExprLoc())
+ .getScalarVal();
+ return Constant.getValue();
+}
+
llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue,
SourceLocation Loc) {
return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
@@ -2486,7 +2498,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
}
assert(isa<BlockDecl>(CurCodeDecl));
- Address addr = GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>());
+ Address addr = GetAddrOfBlockDecl(VD);
return MakeAddrLValue(addr, T, AlignmentSource::Decl);
}
}
@@ -2538,7 +2550,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
}
// Drill into block byref variables.
- bool isBlockByref = VD->hasAttr<BlocksAttr>();
+ bool isBlockByref = VD->isEscapingByref();
if (isBlockByref) {
addr = emitBlockByrefAddress(addr, VD);
}
@@ -2601,7 +2613,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
// of a pointer to object; as in void foo (__weak id *param); *param = 0;
// But, we continue to generate __strong write barrier on indirect write
// into a pointer to object.
- if (getLangOpts().ObjC1 &&
+ if (getLangOpts().ObjC &&
getLangOpts().getGC() != LangOptions::NonGC &&
LV.isObjCWeak())
LV.setNonGC(!E->isOBJCGCCandidate(getContext()));
@@ -2662,7 +2674,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
if (FnName.startswith("\01"))
FnName = FnName.substr(1);
StringRef NameItems[] = {
- PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName};
+ PredefinedExpr::getIdentKindName(E->getIdentKind()), FnName};
std::string GVName = llvm::join(NameItems, NameItems + 2, ".");
if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) {
std::string Name = SL->getString();
@@ -2867,6 +2879,11 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF,
CheckRecoverableKind RecoverKind, bool IsFatal,
llvm::BasicBlock *ContBB) {
assert(IsFatal || RecoverKind != CheckRecoverableKind::Unrecoverable);
+ Optional<ApplyDebugLocation> DL;
+ if (!CGF.Builder.getCurrentDebugLocation()) {
+ // Ensure that the call has at least an artificial debug location.
+ DL.emplace(CGF, SourceLocation());
+ }
bool NeedsAbortSuffix =
IsFatal && RecoverKind != CheckRecoverableKind::Unrecoverable;
bool MinimalRuntime = CGF.CGM.getCodeGenOpts().SanitizeMinimalRuntime;
@@ -3478,7 +3495,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo);
- if (getLangOpts().ObjC1 &&
+ if (getLangOpts().ObjC &&
getLangOpts().getGC() != LangOptions::NonGC) {
LV.setNonGC(!E->isOBJCGCCandidate(getContext()));
setObjCGCLValueClass(getContext(), E, LV);
@@ -3931,7 +3948,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo,
FieldTBAAInfo);
if (RecordCVR & Qualifiers::Volatile)
- RefLVal.getQuals().setVolatile(true);
+ RefLVal.getQuals().addVolatile();
addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo);
// Qualifiers on the struct don't apply to the referencee.
@@ -4151,8 +4168,9 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
case CK_ARCReclaimReturnedObject:
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
- case CK_AddressSpaceConversion:
case CK_IntToOCLSampler:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
return EmitUnsupportedLValue(E, "unexpected cast lvalue");
case CK_Dependent:
@@ -4246,6 +4264,15 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(),
CGM.getTBAAInfoForSubobject(LV, E->getType()));
}
+ case CK_AddressSpaceConversion: {
+ LValue LV = EmitLValue(E->getSubExpr());
+ QualType DestTy = getContext().getPointerType(E->getType());
+ llvm::Value *V = getTargetHooks().performAddrSpaceCast(
+ *this, LV.getPointer(), E->getSubExpr()->getType().getAddressSpace(),
+ E->getType().getAddressSpace(), ConvertType(DestTy));
+ return MakeAddrLValue(Address(V, LV.getAddress().getAlignment()),
+ E->getType(), LV.getBaseInfo(), LV.getTBAAInfo());
+ }
case CK_ObjCObjectLValueCast: {
LValue LV = EmitLValue(E->getSubExpr());
Address V = Builder.CreateElementBitCast(LV.getAddress(),
@@ -4253,10 +4280,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(),
CGM.getTBAAInfoForSubobject(LV, E->getType()));
}
- case CK_ZeroToOCLQueue:
- llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid");
- case CK_ZeroToOCLEvent:
- llvm_unreachable("NULL to OpenCL event lvalue cast is not valid");
+ case CK_ZeroToOCLOpaqueType:
+ llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid");
}
llvm_unreachable("Unhandled lvalue cast kind?");
@@ -4363,7 +4388,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, const FunctionDecl *FD) {
}
llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, FD);
- return CGCallee::forDirect(calleePtr, FD);
+ return CGCallee::forDirect(calleePtr, GlobalDecl(FD));
}
CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
@@ -4407,8 +4432,13 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
calleePtr = EmitLValue(E).getPointer();
}
assert(functionType->isFunctionType());
- CGCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(),
- E->getReferencedDeclOfCallee());
+
+ GlobalDecl GD;
+ if (const auto *VD =
+ dyn_cast_or_null<VarDecl>(E->getReferencedDeclOfCallee()))
+ GD = GlobalDecl(VD);
+
+ CGCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(), GD);
CGCallee callee(calleeInfo, calleePtr);
return callee;
}
@@ -4593,7 +4623,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
assert(CalleeType->isFunctionPointerType() &&
"Call must have function pointer type!");
- const Decl *TargetDecl = OrigCallee.getAbstractInfo().getCalleeDecl();
+ const Decl *TargetDecl =
+ OrigCallee.getAbstractInfo().getCalleeDecl().getDecl();
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl))
// We can only guarantee that a function is called from the correct
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 6264110286..db49b3f28a 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -125,6 +125,10 @@ public:
return Visit(E->getReplacement());
}
+ void VisitConstantExpr(ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+
// l-values.
void VisitDeclRefExpr(DeclRefExpr *E) { EmitAggLoadOfLValue(E); }
void VisitMemberExpr(MemberExpr *ME) { EmitAggLoadOfLValue(ME); }
@@ -847,10 +851,11 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
case CK_BuiltinFnToFnPtr:
- case CK_ZeroToOCLEvent:
- case CK_ZeroToOCLQueue:
+ case CK_ZeroToOCLOpaqueType:
case CK_AddressSpaceConversion:
case CK_IntToOCLSampler:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
llvm_unreachable("cast kind invalid for aggregate types");
}
}
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index 393a4aa787..2e0d4ca767 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -17,8 +17,8 @@
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
#include "ConstantEmitter.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Intrinsics.h"
@@ -177,7 +177,8 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
if (MD->isStatic()) {
// The method is static, emit it as we would a regular call.
- CGCallee callee = CGCallee::forDirect(CGM.GetAddrOfFunction(MD), MD);
+ CGCallee callee =
+ CGCallee::forDirect(CGM.GetAddrOfFunction(MD), GlobalDecl(MD));
return EmitCall(getContext().getPointerType(MD->getType()), callee, CE,
ReturnValue);
}
@@ -353,13 +354,13 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
else if (!DevirtualizedMethod)
Callee = CGCallee::forDirect(
CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete, FInfo, Ty),
- Dtor);
+ GlobalDecl(Dtor, Dtor_Complete));
else {
const CXXDestructorDecl *DDtor =
cast<CXXDestructorDecl>(DevirtualizedMethod);
Callee = CGCallee::forDirect(
- CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty),
- DDtor);
+ CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty),
+ GlobalDecl(DDtor, Dtor_Complete));
}
EmitCXXMemberOrOperatorCall(
CalleeDecl, Callee, ReturnValue, This.getPointer(),
@@ -371,8 +372,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
CGCallee Callee;
if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) {
Callee = CGCallee::forDirect(
- CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty),
- Ctor);
+ CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty),
+ GlobalDecl(Ctor, Ctor_Complete));
} else if (UseVirtualCall) {
Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty);
} else {
@@ -389,11 +390,12 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty);
else if (!DevirtualizedMethod)
- Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty), MD);
+ Callee =
+ CGCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty), GlobalDecl(MD));
else {
- Callee = CGCallee::forDirect(
- CGM.GetAddrOfFunction(DevirtualizedMethod, Ty),
- DevirtualizedMethod);
+ Callee =
+ CGCallee::forDirect(CGM.GetAddrOfFunction(DevirtualizedMethod, Ty),
+ GlobalDecl(DevirtualizedMethod));
}
}
@@ -1293,7 +1295,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF,
const CallArgList &Args) {
llvm::Instruction *CallOrInvoke;
llvm::Constant *CalleePtr = CGF.CGM.GetAddrOfFunction(CalleeDecl);
- CGCallee Callee = CGCallee::forDirect(CalleePtr, CalleeDecl);
+ CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl));
RValue RV =
CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall(
Args, CalleeType, /*chainCall=*/false),
@@ -2252,7 +2254,6 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,
}
void CodeGenFunction::EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Slot) {
- RunCleanupsScope Scope(*this);
LValue SlotLV = MakeAddrLValue(Slot.getAddress(), E->getType());
CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin();
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index fb176093a7..2db693b44c 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -101,6 +101,9 @@ public:
llvm_unreachable("Stmt can't have complex result type!");
}
ComplexPairTy VisitExpr(Expr *S);
+ ComplexPairTy VisitConstantExpr(ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+ }
ComplexPairTy VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr());}
ComplexPairTy VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
return Visit(GE->getResultExpr());
@@ -505,10 +508,11 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
case CK_BuiltinFnToFnPtr:
- case CK_ZeroToOCLEvent:
- case CK_ZeroToOCLQueue:
+ case CK_ZeroToOCLOpaqueType:
case CK_AddressSpaceConversion:
case CK_IntToOCLSampler:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
llvm_unreachable("invalid cast kind for complex value");
case CK_FloatingRealToComplex:
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 651b05a26f..c9475840ae 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -47,7 +47,7 @@ class ConstStructBuilder {
public:
static llvm::Constant *BuildStruct(ConstantEmitter &Emitter,
ConstExprEmitter *ExprEmitter,
- llvm::ConstantStruct *Base,
+ llvm::Constant *Base,
InitListExpr *Updater,
QualType ValTy);
static llvm::Constant *BuildStruct(ConstantEmitter &Emitter,
@@ -76,7 +76,7 @@ private:
void ConvertStructToPacked();
bool Build(InitListExpr *ILE);
- bool Build(ConstExprEmitter *Emitter, llvm::ConstantStruct *Base,
+ bool Build(ConstExprEmitter *Emitter, llvm::Constant *Base,
InitListExpr *Updater);
bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
@@ -566,7 +566,7 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) {
llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter,
ConstExprEmitter *ExprEmitter,
- llvm::ConstantStruct *Base,
+ llvm::Constant *Base,
InitListExpr *Updater,
QualType ValTy) {
ConstStructBuilder Builder(Emitter);
@@ -723,6 +723,10 @@ public:
return nullptr;
}
+ llvm::Constant *VisitConstantExpr(ConstantExpr *CE, QualType T) {
+ return Visit(CE->getSubExpr(), T);
+ }
+
llvm::Constant *VisitParenExpr(ParenExpr *PE, QualType T) {
return Visit(PE->getSubExpr(), T);
}
@@ -869,8 +873,9 @@ public:
case CK_FloatingToIntegral:
case CK_FloatingToBoolean:
case CK_FloatingCast:
- case CK_ZeroToOCLEvent:
- case CK_ZeroToOCLQueue:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
+ case CK_ZeroToOCLOpaqueType:
return nullptr;
}
llvm_unreachable("Invalid CastKind");
@@ -1026,8 +1031,8 @@ public:
}
if (destType->isRecordType())
- return ConstStructBuilder::BuildStruct(Emitter, this,
- dyn_cast<llvm::ConstantStruct>(Base), Updater, destType);
+ return ConstStructBuilder::BuildStruct(Emitter, this, Base, Updater,
+ destType);
return nullptr;
}
@@ -1102,7 +1107,7 @@ public:
} // end anonymous namespace.
bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter,
- llvm::ConstantStruct *Base,
+ llvm::Constant *Base,
InitListExpr *Updater) {
assert(Base && "base expression should not be empty");
@@ -1110,7 +1115,7 @@ bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter,
RecordDecl *RD = ExprType->getAs<RecordType>()->getDecl();
const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
const llvm::StructLayout *BaseLayout = CGM.getDataLayout().getStructLayout(
- Base->getType());
+ cast<llvm::StructType>(Base->getType()));
unsigned FieldNo = -1;
unsigned ElementNo = 0;
@@ -1131,7 +1136,7 @@ bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter,
if (Field->isUnnamedBitfield())
continue;
- llvm::Constant *EltInit = Base->getOperand(ElementNo);
+ llvm::Constant *EltInit = Base->getAggregateElement(ElementNo);
// Bail out if the type of the ConstantStruct does not have the same layout
// as the type of the InitListExpr.
@@ -1450,6 +1455,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) {
if (CD->isTrivial() && CD->isDefaultConstructor())
return CGM.EmitNullConstant(D.getType());
}
+ InConstantContext = true;
}
QualType destType = D.getType();
@@ -1547,7 +1553,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E,
if (destType->isReferenceType())
Success = E->EvaluateAsLValue(Result, CGM.getContext());
else
- Success = E->EvaluateAsRValue(Result, CGM.getContext());
+ Success = E->EvaluateAsRValue(Result, CGM.getContext(), InConstantContext);
llvm::Constant *C;
if (Success && !Result.HasSideEffects)
@@ -1600,6 +1606,7 @@ private:
ConstantLValue tryEmitBase(const APValue::LValueBase &base);
ConstantLValue VisitStmt(const Stmt *S) { return nullptr; }
+ ConstantLValue VisitConstantExpr(const ConstantExpr *E);
ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E);
ConstantLValue VisitStringLiteral(const StringLiteral *E);
ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E);
@@ -1755,6 +1762,11 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) {
}
ConstantLValue
+ConstantLValueEmitter::VisitConstantExpr(const ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+}
+
+ConstantLValue
ConstantLValueEmitter::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
return tryEmitGlobalCompoundLiteral(CGM, Emitter.CGF, E);
}
@@ -1782,7 +1794,7 @@ ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) {
return cast<ConstantAddress>(Res.getAddress());
}
- auto kind = E->getIdentType();
+ auto kind = E->getIdentKind();
if (kind == PredefinedExpr::PrettyFunction) {
return CGM.GetAddrOfConstantCString("top level", ".tmp");
}
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 4995db28ba..f53bb33e46 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "CodeGenFunction.h"
-#include "CGCleanup.h"
#include "CGCXXABI.h"
+#include "CGCleanup.h"
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
+#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
@@ -23,8 +23,9 @@
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/FixedPoint.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/Optional.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -302,7 +303,11 @@ public:
/// Known implicit conversion check kinds.
/// Keep in sync with the enum of the same name in ubsan_handlers.h
enum ImplicitConversionCheckKind : unsigned char {
- ICCK_IntegerTruncation = 0,
+ ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7.
+ ICCK_UnsignedIntegerTruncation = 1,
+ ICCK_SignedIntegerTruncation = 2,
+ ICCK_IntegerSignChange = 3,
+ ICCK_SignedIntegerTruncationOrSignChange = 4,
};
/// Emit a check that an [implicit] truncation of an integer does not
@@ -310,21 +315,39 @@ public:
void EmitIntegerTruncationCheck(Value *Src, QualType SrcType, Value *Dst,
QualType DstType, SourceLocation Loc);
+ /// Emit a check that an [implicit] conversion of an integer does not change
+ /// the sign of the value. It is not UB, so we use the value after conversion.
+ /// NOTE: Src and Dst may be the exact same value! (point to the same thing)
+ void EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, Value *Dst,
+ QualType DstType, SourceLocation Loc);
+
/// Emit a conversion from the specified type to the specified destination
/// type, both of which are LLVM scalar types.
struct ScalarConversionOpts {
bool TreatBooleanAsSigned;
bool EmitImplicitIntegerTruncationChecks;
+ bool EmitImplicitIntegerSignChangeChecks;
ScalarConversionOpts()
: TreatBooleanAsSigned(false),
- EmitImplicitIntegerTruncationChecks(false) {}
+ EmitImplicitIntegerTruncationChecks(false),
+ EmitImplicitIntegerSignChangeChecks(false) {}
+
+ ScalarConversionOpts(clang::SanitizerSet SanOpts)
+ : TreatBooleanAsSigned(false),
+ EmitImplicitIntegerTruncationChecks(
+ SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation)),
+ EmitImplicitIntegerSignChangeChecks(
+ SanOpts.has(SanitizerKind::ImplicitIntegerSignChange)) {}
};
Value *
EmitScalarConversion(Value *Src, QualType SrcTy, QualType DstTy,
SourceLocation Loc,
ScalarConversionOpts Opts = ScalarConversionOpts());
+ Value *EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy,
+ SourceLocation Loc);
+
/// Emit a conversion from the specified complex type to the specified
/// destination type, where the destination type is an LLVM scalar type.
Value *EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src,
@@ -382,6 +405,9 @@ public:
}
Value *VisitExpr(Expr *S);
+ Value *VisitConstantExpr(ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+ }
Value *VisitParenExpr(ParenExpr *PE) {
return Visit(PE->getSubExpr());
}
@@ -450,19 +476,10 @@ public:
return CGF.getOrCreateOpaqueRValueMapping(E).getScalarVal();
}
- Value *emitConstant(const CodeGenFunction::ConstantEmission &Constant,
- Expr *E) {
- assert(Constant && "not a constant");
- if (Constant.isReference())
- return EmitLoadOfLValue(Constant.getReferenceLValue(CGF, E),
- E->getExprLoc());
- return Constant.getValue();
- }
-
// l-values.
Value *VisitDeclRefExpr(DeclRefExpr *E) {
if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E))
- return emitConstant(Constant, E);
+ return CGF.emitScalarConstant(Constant, E);
return EmitLoadOfLValue(E);
}
@@ -664,7 +681,7 @@ public:
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), Ops))
return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
@@ -941,48 +958,233 @@ void ScalarExprEmitter::EmitFloatConversionCheck(
SanitizerHandler::FloatCastOverflow, StaticArgs, OrigSrc);
}
+// Should be called within CodeGenFunction::SanitizerScope RAII scope.
+// Returns 'i1 false' when the truncation Src -> Dst was lossy.
+static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+EmitIntegerTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst,
+ QualType DstType, CGBuilderTy &Builder) {
+ llvm::Type *SrcTy = Src->getType();
+ llvm::Type *DstTy = Dst->getType();
+ (void)DstTy; // Only used in assert()
+
+ // This should be truncation of integral types.
+ assert(Src != Dst);
+ assert(SrcTy->getScalarSizeInBits() > Dst->getType()->getScalarSizeInBits());
+ assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) &&
+ "non-integer llvm type");
+
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+
+ // If both (src and dst) types are unsigned, then it's an unsigned truncation.
+ // Else, it is a signed truncation.
+ ScalarExprEmitter::ImplicitConversionCheckKind Kind;
+ SanitizerMask Mask;
+ if (!SrcSigned && !DstSigned) {
+ Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation;
+ Mask = SanitizerKind::ImplicitUnsignedIntegerTruncation;
+ } else {
+ Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation;
+ Mask = SanitizerKind::ImplicitSignedIntegerTruncation;
+ }
+
+ llvm::Value *Check = nullptr;
+ // 1. Extend the truncated value back to the same width as the Src.
+ Check = Builder.CreateIntCast(Dst, SrcTy, DstSigned, "anyext");
+ // 2. Equality-compare with the original source value
+ Check = Builder.CreateICmpEQ(Check, Src, "truncheck");
+ // If the comparison result is 'i1 false', then the truncation was lossy.
+ return std::make_pair(Kind, std::make_pair(Check, Mask));
+}
+
void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType,
Value *Dst, QualType DstType,
SourceLocation Loc) {
- if (!CGF.SanOpts.has(SanitizerKind::ImplicitIntegerTruncation))
+ if (!CGF.SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation))
return;
- llvm::Type *SrcTy = Src->getType();
- llvm::Type *DstTy = Dst->getType();
-
// We only care about int->int conversions here.
// We ignore conversions to/from pointer and/or bool.
if (!(SrcType->isIntegerType() && DstType->isIntegerType()))
return;
- assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) &&
- "clang integer type lowered to non-integer llvm type");
-
- unsigned SrcBits = SrcTy->getScalarSizeInBits();
- unsigned DstBits = DstTy->getScalarSizeInBits();
+ unsigned SrcBits = Src->getType()->getScalarSizeInBits();
+ unsigned DstBits = Dst->getType()->getScalarSizeInBits();
// This must be truncation. Else we do not care.
if (SrcBits <= DstBits)
return;
assert(!DstType->isBooleanType() && "we should not get here with booleans.");
+ // If the integer sign change sanitizer is enabled,
+ // and we are truncating from larger unsigned type to smaller signed type,
+ // let that next sanitizer deal with it.
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+ if (CGF.SanOpts.has(SanitizerKind::ImplicitIntegerSignChange) &&
+ (!SrcSigned && DstSigned))
+ return;
+
CodeGenFunction::SanitizerScope SanScope(&CGF);
+ std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+ Check =
+ EmitIntegerTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder);
+ // If the comparison result is 'i1 false', then the truncation was lossy.
+
+ // Do we care about this type of truncation?
+ if (!CGF.SanOpts.has(Check.second.second))
+ return;
+
+ llvm::Constant *StaticArgs[] = {
+ CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType),
+ CGF.EmitCheckTypeDescriptor(DstType),
+ llvm::ConstantInt::get(Builder.getInt8Ty(), Check.first)};
+ CGF.EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs,
+ {Src, Dst});
+}
+
+// Should be called within CodeGenFunction::SanitizerScope RAII scope.
+// Returns 'i1 false' when the conversion Src -> Dst changed the sign.
+static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+EmitIntegerSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst,
+ QualType DstType, CGBuilderTy &Builder) {
+ llvm::Type *SrcTy = Src->getType();
+ llvm::Type *DstTy = Dst->getType();
+
+ assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) &&
+ "non-integer llvm type");
+
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+ (void)SrcSigned; // Only used in assert()
+ (void)DstSigned; // Only used in assert()
+ unsigned SrcBits = SrcTy->getScalarSizeInBits();
+ unsigned DstBits = DstTy->getScalarSizeInBits();
+ (void)SrcBits; // Only used in assert()
+ (void)DstBits; // Only used in assert()
+
+ assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) &&
+ "either the widths should be different, or the signednesses.");
+
+ // NOTE: zero value is considered to be non-negative.
+ auto EmitIsNegativeTest = [&Builder](Value *V, QualType VType,
+ const char *Name) -> Value * {
+ // Is this value a signed type?
+ bool VSigned = VType->isSignedIntegerOrEnumerationType();
+ llvm::Type *VTy = V->getType();
+ if (!VSigned) {
+ // If the value is unsigned, then it is never negative.
+ // FIXME: can we encounter non-scalar VTy here?
+ return llvm::ConstantInt::getFalse(VTy->getContext());
+ }
+ // Get the zero of the same type with which we will be comparing.
+ llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0);
+ // %V.isnegative = icmp slt %V, 0
+ // I.e is %V *strictly* less than zero, does it have negative value?
+ return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero,
+ llvm::Twine(Name) + "." + V->getName() +
+ ".negativitycheck");
+ };
+
+ // 1. Was the old Value negative?
+ llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src");
+ // 2. Is the new Value negative?
+ llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst");
+ // 3. Now, was the 'negativity status' preserved during the conversion?
+ // NOTE: conversion from negative to zero is considered to change the sign.
+ // (We want to get 'false' when the conversion changed the sign)
+ // So we should just equality-compare the negativity statuses.
llvm::Value *Check = nullptr;
+ Check = Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "signchangecheck");
+ // If the comparison result is 'false', then the conversion changed the sign.
+ return std::make_pair(
+ ScalarExprEmitter::ICCK_IntegerSignChange,
+ std::make_pair(Check, SanitizerKind::ImplicitIntegerSignChange));
+}
- // 1. Extend the truncated value back to the same width as the Src.
- bool InputSigned = DstType->isSignedIntegerOrEnumerationType();
- Check = Builder.CreateIntCast(Dst, SrcTy, InputSigned, "anyext");
- // 2. Equality-compare with the original source value
- Check = Builder.CreateICmpEQ(Check, Src, "truncheck");
- // If the comparison result is 'i1 false', then the truncation was lossy.
+void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType,
+ Value *Dst, QualType DstType,
+ SourceLocation Loc) {
+ if (!CGF.SanOpts.has(SanitizerKind::ImplicitIntegerSignChange))
+ return;
+
+ llvm::Type *SrcTy = Src->getType();
+ llvm::Type *DstTy = Dst->getType();
+
+ // We only care about int->int conversions here.
+ // We ignore conversions to/from pointer and/or bool.
+ if (!(SrcType->isIntegerType() && DstType->isIntegerType()))
+ return;
+
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+ unsigned SrcBits = SrcTy->getScalarSizeInBits();
+ unsigned DstBits = DstTy->getScalarSizeInBits();
+
+ // Now, we do not need to emit the check in *all* of the cases.
+ // We can avoid emitting it in some obvious cases where it would have been
+ // dropped by the opt passes (instcombine) always anyways.
+ // If it's a cast between effectively the same type, no check.
+ // NOTE: this is *not* equivalent to checking the canonical types.
+ if (SrcSigned == DstSigned && SrcBits == DstBits)
+ return;
+ // At least one of the values needs to have signed type.
+ // If both are unsigned, then obviously, neither of them can be negative.
+ if (!SrcSigned && !DstSigned)
+ return;
+ // If the conversion is to *larger* *signed* type, then no check is needed.
+ // Because either sign-extension happens (so the sign will remain),
+ // or zero-extension will happen (the sign bit will be zero.)
+ if ((DstBits > SrcBits) && DstSigned)
+ return;
+ if (CGF.SanOpts.has(SanitizerKind::ImplicitSignedIntegerTruncation) &&
+ (SrcBits > DstBits) && SrcSigned) {
+ // If the signed integer truncation sanitizer is enabled,
+ // and this is a truncation from signed type, then no check is needed.
+ // Because here sign change check is interchangeable with truncation check.
+ return;
+ }
+ // That's it. We can't rule out any more cases with the data we have.
+
+ CodeGenFunction::SanitizerScope SanScope(&CGF);
+
+ std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+ Check;
+
+ // Each of these checks needs to return 'false' when an issue was detected.
+ ImplicitConversionCheckKind CheckKind;
+ llvm::SmallVector<std::pair<llvm::Value *, SanitizerMask>, 2> Checks;
+ // So we can 'and' all the checks together, and still get 'false',
+ // if at least one of the checks detected an issue.
+
+ Check = EmitIntegerSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder);
+ CheckKind = Check.first;
+ Checks.emplace_back(Check.second);
+
+ if (CGF.SanOpts.has(SanitizerKind::ImplicitSignedIntegerTruncation) &&
+ (SrcBits > DstBits) && !SrcSigned && DstSigned) {
+ // If the signed integer truncation sanitizer was enabled,
+ // and we are truncating from larger unsigned type to smaller signed type,
+ // let's handle the case we skipped in that check.
+ Check =
+ EmitIntegerTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder);
+ CheckKind = ICCK_SignedIntegerTruncationOrSignChange;
+ Checks.emplace_back(Check.second);
+ // If the comparison result is 'i1 false', then the truncation was lossy.
+ }
llvm::Constant *StaticArgs[] = {
CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType),
CGF.EmitCheckTypeDescriptor(DstType),
- llvm::ConstantInt::get(Builder.getInt8Ty(), ICCK_IntegerTruncation)};
- CGF.EmitCheck(std::make_pair(Check, SanitizerKind::ImplicitIntegerTruncation),
- SanitizerHandler::ImplicitConversion, StaticArgs, {Src, Dst});
+ llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind)};
+ // EmitCheck() will 'and' all the checks together.
+ CGF.EmitCheck(Checks, SanitizerHandler::ImplicitConversion, StaticArgs,
+ {Src, Dst});
}
/// Emit a conversion from the specified type to the specified destination type,
@@ -991,6 +1193,27 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
QualType DstType,
SourceLocation Loc,
ScalarConversionOpts Opts) {
+ // All conversions involving fixed point types should be handled by the
+ // EmitFixedPoint family functions. This is done to prevent bloating up this
+ // function more, and although fixed point numbers are represented by
+ // integers, we do not want to follow any logic that assumes they should be
+ // treated as integers.
+ // TODO(leonardchan): When necessary, add another if statement checking for
+ // conversions to fixed point types from other types.
+ if (SrcType->isFixedPointType()) {
+ if (DstType->isFixedPointType()) {
+ return EmitFixedPointConversion(Src, SrcType, DstType, Loc);
+ } else if (DstType->isBooleanType()) {
+ // We do not need to check the padding bit on unsigned types if unsigned
+ // padding is enabled because overflow into this bit is undefined
+ // behavior.
+ return Builder.CreateIsNotNull(Src, "tobool");
+ }
+
+ llvm_unreachable(
+ "Unhandled scalar conversion involving a fixed point type.");
+ }
+
QualType NoncanonicalSrcType = SrcType;
QualType NoncanonicalDstType = DstType;
@@ -1036,8 +1259,13 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
}
// Ignore conversions like int -> uint.
- if (SrcTy == DstTy)
+ if (SrcTy == DstTy) {
+ if (Opts.EmitImplicitIntegerSignChangeChecks)
+ EmitIntegerSignChangeCheck(Src, NoncanonicalSrcType, Src,
+ NoncanonicalDstType, Loc);
+
return Src;
+ }
// Handle pointer conversions next: pointers can only be converted to/from
// other pointers and integers. Check for pointer types in terms of LLVM, as
@@ -1181,9 +1409,91 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
EmitIntegerTruncationCheck(Src, NoncanonicalSrcType, Res,
NoncanonicalDstType, Loc);
+ if (Opts.EmitImplicitIntegerSignChangeChecks)
+ EmitIntegerSignChangeCheck(Src, NoncanonicalSrcType, Res,
+ NoncanonicalDstType, Loc);
+
return Res;
}
+Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy,
+ QualType DstTy,
+ SourceLocation Loc) {
+ using llvm::APInt;
+ using llvm::ConstantInt;
+ using llvm::Value;
+
+ assert(SrcTy->isFixedPointType());
+ assert(DstTy->isFixedPointType());
+
+ FixedPointSemantics SrcFPSema =
+ CGF.getContext().getFixedPointSemantics(SrcTy);
+ FixedPointSemantics DstFPSema =
+ CGF.getContext().getFixedPointSemantics(DstTy);
+ unsigned SrcWidth = SrcFPSema.getWidth();
+ unsigned DstWidth = DstFPSema.getWidth();
+ unsigned SrcScale = SrcFPSema.getScale();
+ unsigned DstScale = DstFPSema.getScale();
+ bool SrcIsSigned = SrcFPSema.isSigned();
+ bool DstIsSigned = DstFPSema.isSigned();
+
+ llvm::Type *DstIntTy = Builder.getIntNTy(DstWidth);
+
+ Value *Result = Src;
+ unsigned ResultWidth = SrcWidth;
+
+ if (!DstFPSema.isSaturated()) {
+ // Downscale.
+ if (DstScale < SrcScale)
+ Result = SrcIsSigned ?
+ Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") :
+ Builder.CreateLShr(Result, SrcScale - DstScale, "downscale");
+
+ // Resize.
+ Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize");
+
+ // Upscale.
+ if (DstScale > SrcScale)
+ Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale");
+ } else {
+ // Adjust the number of fractional bits.
+ if (DstScale > SrcScale) {
+ ResultWidth = SrcWidth + DstScale - SrcScale;
+ llvm::Type *UpscaledTy = Builder.getIntNTy(ResultWidth);
+ Result = Builder.CreateIntCast(Result, UpscaledTy, SrcIsSigned, "resize");
+ Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale");
+ } else if (DstScale < SrcScale) {
+ Result = SrcIsSigned ?
+ Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") :
+ Builder.CreateLShr(Result, SrcScale - DstScale, "downscale");
+ }
+
+ // Handle saturation.
+ bool LessIntBits = DstFPSema.getIntegralBits() < SrcFPSema.getIntegralBits();
+ if (LessIntBits) {
+ Value *Max = ConstantInt::get(
+ CGF.getLLVMContext(),
+ APFixedPoint::getMax(DstFPSema).getValue().extOrTrunc(ResultWidth));
+ Value *TooHigh = SrcIsSigned ? Builder.CreateICmpSGT(Result, Max)
+ : Builder.CreateICmpUGT(Result, Max);
+ Result = Builder.CreateSelect(TooHigh, Max, Result, "satmax");
+ }
+ // Cannot overflow min to dest type if src is unsigned since all fixed
+ // point types can cover the unsigned min of 0.
+ if (SrcIsSigned && (LessIntBits || !DstIsSigned)) {
+ Value *Min = ConstantInt::get(
+ CGF.getLLVMContext(),
+ APFixedPoint::getMin(DstFPSema).getValue().extOrTrunc(ResultWidth));
+ Value *TooLow = Builder.CreateICmpSLT(Result, Min);
+ Result = Builder.CreateSelect(TooLow, Min, Result, "satmin");
+ }
+
+ // Resize the integer part to get the final destination size.
+ Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize");
+ }
+ return Result;
+}
+
/// Emit a conversion from the specified complex type to the specified
/// destination type, where the destination type is an LLVM scalar type.
Value *ScalarExprEmitter::EmitComplexToScalarConversion(
@@ -1405,10 +1715,11 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) {
if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) {
CGF.EmitIgnoredExpr(E->getBase());
- return emitConstant(Constant, E);
+ return CGF.emitScalarConstant(Constant, E);
} else {
- llvm::APSInt Value;
- if (E->EvaluateAsInt(Value, CGF.getContext(), Expr::SE_AllowSideEffects)) {
+ Expr::EvalResult Result;
+ if (E->EvaluateAsInt(Result, CGF.getContext(), Expr::SE_AllowSideEffects)) {
+ llvm::APSInt Value = Result.Val.getInt();
CGF.EmitIgnoredExpr(E->getBase());
return Builder.getInt(Value);
}
@@ -1874,11 +2185,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
return Builder.CreateVectorSplat(NumElements, Elt, "splat");
}
+ case CK_FixedPointCast:
+ return EmitScalarConversion(Visit(E), E->getType(), DestTy,
+ CE->getExprLoc());
+
+ case CK_FixedPointToBoolean:
+ assert(E->getType()->isFixedPointType() &&
+ "Expected src type to be fixed point type");
+ assert(DestTy->isBooleanType() && "Expected dest type to be boolean type");
+ return EmitScalarConversion(Visit(E), E->getType(), DestTy,
+ CE->getExprLoc());
+
case CK_IntegralCast: {
ScalarConversionOpts Opts;
- if (CGF.SanOpts.has(SanitizerKind::ImplicitIntegerTruncation)) {
- if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE))
- Opts.EmitImplicitIntegerTruncationChecks = !ICE->isPartOfExplicitCast();
+ if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) {
+ if (!ICE->isPartOfExplicitCast())
+ Opts = ScalarConversionOpts(CGF.SanOpts);
}
return EmitScalarConversion(Visit(E), E->getType(), DestTy,
CE->getExprLoc(), Opts);
@@ -1919,13 +2241,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
CE->getExprLoc());
}
- case CK_ZeroToOCLEvent: {
- assert(DestTy->isEventT() && "CK_ZeroToOCLEvent cast on non-event type");
- return llvm::Constant::getNullValue(ConvertType(DestTy));
- }
-
- case CK_ZeroToOCLQueue: {
- assert(DestTy->isQueueT() && "CK_ZeroToOCLQueue cast on non queue_t type");
+ case CK_ZeroToOCLOpaqueType: {
+ assert((DestTy->isEventT() || DestTy->isQueueT() ||
+ DestTy->isOCLIntelSubgroupAVCType()) &&
+ "CK_ZeroToOCLEvent cast on non-event type");
return llvm::Constant::getNullValue(ConvertType(DestTy));
}
@@ -1984,7 +2303,7 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWAdd(InVal, Amount, Name);
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (!E->canOverflow())
return Builder.CreateNSWAdd(InVal, Amount, Name);
@@ -2279,9 +2598,11 @@ Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) {
Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) {
// Try folding the offsetof to a constant.
- llvm::APSInt Value;
- if (E->EvaluateAsInt(Value, CGF.getContext()))
+ Expr::EvalResult EVResult;
+ if (E->EvaluateAsInt(EVResult, CGF.getContext())) {
+ llvm::APSInt Value = EVResult.Val.getInt();
return Builder.getInt(Value);
+ }
// Loop over the components of the offsetof to compute the value.
unsigned n = E->getNumComponents();
@@ -2550,9 +2871,10 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
// Expand the binary operator.
Result = (this->*Func)(OpInfo);
- // Convert the result back to the LHS type.
- Result =
- EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, Loc);
+ // Convert the result back to the LHS type,
+ // potentially with Implicit Conversion sanitizer check.
+ Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy,
+ Loc, ScalarConversionOpts(CGF.SanOpts));
if (atomicPHI) {
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
@@ -2990,7 +3312,7 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), op))
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
@@ -3025,7 +3347,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), op))
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp
index 8f9a9b9607..169ae4fcde 100644
--- a/lib/CodeGen/CGLoopInfo.cpp
+++ b/lib/CodeGen/CGLoopInfo.cpp
@@ -10,8 +10,8 @@
#include "CGLoopInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
-#include "clang/Sema/LoopHint.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
@@ -335,10 +335,10 @@ void LoopInfoStack::InsertHelper(Instruction *I) const {
if (!L.getLoopID())
return;
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) {
- for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i)
- if (TI->getSuccessor(i) == L.getHeader()) {
- TI->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID());
+ if (I->isTerminator()) {
+ for (BasicBlock *Succ : successors(I))
+ if (Succ == L.getHeader()) {
+ I->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID());
break;
}
return;
diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp
index e9f60a9113..c6a96a9126 100644
--- a/lib/CodeGen/CGNonTrivialStruct.cpp
+++ b/lib/CodeGen/CGNonTrivialStruct.cpp
@@ -187,6 +187,7 @@ template <class Derived> struct GenFuncNameBase {
if (!FK)
return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset);
+ asDerived().flushTrivialFields();
CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
ASTContext &Ctx = asDerived().getContext();
const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);
@@ -336,6 +337,7 @@ template <class Derived> struct GenFuncBase {
return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset,
Addrs);
+ asDerived().flushTrivialFields(Addrs);
CodeGenFunction &CGF = *this->CGF;
ASTContext &Ctx = CGF.getContext();
@@ -456,12 +458,13 @@ template <class Derived> struct GenFuncBase {
llvm::Function::Create(FuncTy, llvm::GlobalValue::LinkOnceODRLinkage,
FuncName, &CGM.getModule());
F->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.SetLLVMFunctionAttributes(nullptr, FI, F);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, F);
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F);
IdentifierInfo *II = &Ctx.Idents.get(FuncName);
FunctionDecl *FD = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
- II, Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
+ II, Ctx.getFunctionType(Ctx.VoidTy, llvm::None, {}), nullptr,
+ SC_PrivateExtern, false, false);
CodeGenFunction NewCGF(CGM);
setCGF(&NewCGF);
CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args);
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index c62f40b790..cc582b926b 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -352,6 +352,56 @@ static const Expr *findWeakLValue(const Expr *E) {
return nullptr;
}
+/// The ObjC runtime may provide entrypoints that are likely to be faster
+/// than an ordinary message send of the appropriate selector.
+///
+/// The entrypoints are guaranteed to be equivalent to just sending the
+/// corresponding message. If the entrypoint is implemented naively as just a
+/// message send, using it is a trade-off: it sacrifices a few cycles of
+/// overhead to save a small amount of code. However, it's possible for
+/// runtimes to detect and special-case classes that use "standard"
+/// behavior; if that's dynamically a large proportion of all objects, using
+/// the entrypoint will also be faster than using a message send.
+///
+/// If the runtime does support a required entrypoint, then this method will
+/// generate a call and return the resulting value. Otherwise it will return
+/// None and the caller can generate a msgSend instead.
+static Optional<llvm::Value *>
+tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType,
+ llvm::Value *Receiver,
+ const CallArgList& Args, Selector Sel,
+ const ObjCMethodDecl *method) {
+ auto &CGM = CGF.CGM;
+ if (!CGM.getCodeGenOpts().ObjCConvertMessagesToRuntimeCalls)
+ return None;
+
+ auto &Runtime = CGM.getLangOpts().ObjCRuntime;
+ switch (Sel.getMethodFamily()) {
+ case OMF_alloc:
+ if (Runtime.shouldUseRuntimeFunctionsForAlloc() &&
+ ResultType->isObjCObjectPointerType()) {
+ // [Foo alloc] -> objc_alloc(Foo)
+ if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "alloc")
+ return CGF.EmitObjCAlloc(Receiver, CGF.ConvertType(ResultType));
+ // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo)
+ if (Sel.isKeywordSelector() && Sel.getNumArgs() == 1 &&
+ Args.size() == 1 && Args.front().getType()->isPointerType() &&
+ Sel.getNameForSlot(0) == "allocWithZone") {
+ const llvm::Value* arg = Args.front().getKnownRValue().getScalarVal();
+ if (isa<llvm::ConstantPointerNull>(arg))
+ return CGF.EmitObjCAllocWithZone(Receiver,
+ CGF.ConvertType(ResultType));
+ return None;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+ return None;
+}
+
RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
ReturnValueSlot Return) {
// Only the lookup mechanism and first two arguments of the method
@@ -474,10 +524,16 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
Args,
method);
} else {
- result = Runtime.GenerateMessageSend(*this, Return, ResultType,
- E->getSelector(),
- Receiver, Args, OID,
- method);
+ // Call runtime methods directly if we can.
+ if (Optional<llvm::Value *> SpecializedResult =
+ tryGenerateSpecializedMessageSend(*this, ResultType, Receiver, Args,
+ E->getSelector(), method)) {
+ result = RValue::get(SpecializedResult.getValue());
+ } else {
+ result = Runtime.GenerateMessageSend(*this, Return, ResultType,
+ E->getSelector(), Receiver, Args,
+ OID, method);
+ }
}
// For delegate init calls in ARC, implicitly store the result of
@@ -568,7 +624,7 @@ static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF,
LValue lvalue, QualType type);
/// Generate an Objective-C method. An Objective-C method is a C function with
-/// its pointer, name, and types registered in the class struture.
+/// its pointer, name, and types registered in the class structure.
void CodeGenFunction::GenerateObjCMethod(const ObjCMethodDecl *OMD) {
StartObjCMethod(OMD, OMD->getClassInterface());
PGO.assignRegionCounters(GlobalDecl(OMD), CurFn);
@@ -883,9 +939,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
// If there's a non-trivial 'get' expression, we just have to emit that.
if (!hasTrivialGetExpr(propImpl)) {
if (!AtomicHelperFn) {
- ReturnStmt ret(SourceLocation(), propImpl->getGetterCXXConstructor(),
- /*nrvo*/ nullptr);
- EmitReturnStmt(ret);
+ auto *ret = ReturnStmt::Create(getContext(), SourceLocation(),
+ propImpl->getGetterCXXConstructor(),
+ /* NRVOCandidate=*/nullptr);
+ EmitReturnStmt(*ret);
}
else {
ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
@@ -1844,6 +1901,7 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
/// where a null input causes a no-op and returns null.
static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
llvm::Value *value,
+ llvm::Type *returnType,
llvm::Constant *&fn,
StringRef fnName,
bool isTailCall = false) {
@@ -1857,7 +1915,7 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
}
// Cast the argument to 'id'.
- llvm::Type *origType = value->getType();
+ llvm::Type *origType = returnType ? returnType : value->getType();
value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy);
// Call the function.
@@ -1963,7 +2021,7 @@ llvm::Value *CodeGenFunction::EmitARCRetain(QualType type, llvm::Value *value) {
/// Retain the given object, with normal retain semantics.
/// call i8* \@objc_retain(i8* %value)
llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retain,
"objc_retain");
}
@@ -1977,7 +2035,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) {
llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value,
bool mandatory) {
llvm::Value *result
- = emitARCValueOperation(*this, value,
+ = emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainBlock,
"objc_retainBlock");
@@ -2047,7 +2105,7 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) {
llvm::Value *
CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
emitAutoreleasedReturnValueMarker(*this);
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue,
"objc_retainAutoreleasedReturnValue");
}
@@ -2062,7 +2120,7 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
llvm::Value *
CodeGenFunction::EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value) {
emitAutoreleasedReturnValueMarker(*this);
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue,
"objc_unsafeClaimAutoreleasedReturnValue");
}
@@ -2177,7 +2235,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst,
/// Autorelease the given object.
/// call i8* \@objc_autorelease(i8* %value)
llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_autorelease,
"objc_autorelease");
}
@@ -2186,7 +2244,7 @@ llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) {
/// call i8* \@objc_autoreleaseReturnValue(i8* %value)
llvm::Value *
CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_autoreleaseReturnValue,
"objc_autoreleaseReturnValue",
/*isTailCall*/ true);
@@ -2196,7 +2254,7 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) {
/// call i8* \@objc_retainAutoreleaseReturnValue(i8* %value)
llvm::Value *
CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue,
"objc_retainAutoreleaseReturnValue",
/*isTailCall*/ true);
@@ -2225,7 +2283,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainAutorelease(QualType type,
/// call i8* \@objc_retainAutorelease(i8* %value)
llvm::Value *
CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainAutorelease,
"objc_retainAutorelease");
}
@@ -2384,6 +2442,24 @@ llvm::Value *CodeGenFunction::EmitObjCMRRAutoreleasePoolPush() {
return InitRV.getScalarVal();
}
+/// Allocate the given objc object.
+/// call i8* \@objc_alloc(i8* %value)
+llvm::Value *CodeGenFunction::EmitObjCAlloc(llvm::Value *value,
+ llvm::Type *resultType) {
+ return emitARCValueOperation(*this, value, resultType,
+ CGM.getObjCEntrypoints().objc_alloc,
+ "objc_alloc");
+}
+
+/// Allocate the given objc object.
+/// call i8* \@objc_allocWithZone(i8* %value)
+llvm::Value *CodeGenFunction::EmitObjCAllocWithZone(llvm::Value *value,
+ llvm::Type *resultType) {
+ return emitARCValueOperation(*this, value, resultType,
+ CGM.getObjCEntrypoints().objc_allocWithZone,
+ "objc_allocWithZone");
+}
+
/// Produce the code to do a primitive release.
/// [tmp drain];
void CodeGenFunction::EmitObjCMRRAutoreleasePoolPop(llvm::Value *Arg) {
@@ -2446,27 +2522,36 @@ void CodeGenFunction::EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr) {
EHStack.pushCleanup<CallObjCMRRAutoreleasePoolObject>(NormalCleanup, Ptr);
}
-static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
- LValue lvalue,
- QualType type) {
- switch (type.getObjCLifetime()) {
+static bool shouldRetainObjCLifetime(Qualifiers::ObjCLifetime lifetime) {
+ switch (lifetime) {
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
case Qualifiers::OCL_Strong:
case Qualifiers::OCL_Autoreleasing:
- return TryEmitResult(CGF.EmitLoadOfLValue(lvalue,
- SourceLocation()).getScalarVal(),
- false);
+ return true;
case Qualifiers::OCL_Weak:
- return TryEmitResult(CGF.EmitARCLoadWeakRetained(lvalue.getAddress()),
- true);
+ return false;
}
llvm_unreachable("impossible lifetime!");
}
static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
+ LValue lvalue,
+ QualType type) {
+ llvm::Value *result;
+ bool shouldRetain = shouldRetainObjCLifetime(type.getObjCLifetime());
+ if (shouldRetain) {
+ result = CGF.EmitLoadOfLValue(lvalue, SourceLocation()).getScalarVal();
+ } else {
+ assert(type.getObjCLifetime() == Qualifiers::OCL_Weak);
+ result = CGF.EmitARCLoadWeakRetained(lvalue.getAddress());
+ }
+ return TryEmitResult(result, !shouldRetain);
+}
+
+static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
const Expr *e) {
e = e->IgnoreParens();
QualType type = e->getType();
@@ -2500,6 +2585,16 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
cast<BinaryOperator>(e)->getOpcode() == BO_Assign)
return TryEmitResult(CGF.EmitScalarExpr(e), false);
+ // Try to emit code for scalar constant instead of emitting LValue and
+ // loading it because we are not guaranteed to have an l-value. One of such
+ // cases is DeclRefExpr referencing non-odr-used constant-evaluated variable.
+ if (const auto *decl_expr = dyn_cast<DeclRefExpr>(e)) {
+ auto *DRE = const_cast<DeclRefExpr *>(decl_expr);
+ if (CodeGenFunction::ConstantEmission constant = CGF.tryEmitAsConstant(DRE))
+ return TryEmitResult(CGF.emitScalarConstant(constant, DRE),
+ !shouldRetainObjCLifetime(type.getObjCLifetime()));
+ }
+
return tryEmitARCRetainLoadOfScalar(CGF, CGF.EmitLValue(e), type);
}
@@ -3229,29 +3324,32 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
ASTContext &C = getContext();
IdentifierInfo *II
= &CGM.getContext().Idents.get("__assign_helper_atomic_property_");
- FunctionDecl *FD = FunctionDecl::Create(C,
- C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false,
- false);
+ QualType ReturnTy = C.VoidTy;
QualType DestTy = C.getPointerType(Ty);
QualType SrcTy = Ty;
SrcTy.addConst();
SrcTy = C.getPointerType(SrcTy);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(DestTy);
+ ArgTys.push_back(SrcTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
+
FunctionArgList args;
- ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- DestTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy,
+ ImplicitParamDecl::Other);
args.push_back(&DstDecl);
- ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- SrcTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy,
+ ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
@@ -3262,7 +3360,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
DeclRefExpr DstExpr(&DstDecl, false, DestTy,
VK_RValue, SourceLocation());
@@ -3301,50 +3399,51 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic)))
return nullptr;
llvm::Constant *HelperFn = nullptr;
-
if (hasTrivialGetExpr(PID))
return nullptr;
assert(PID->getGetterCXXConstructor() && "getGetterCXXConstructor - null");
if ((HelperFn = CGM.getAtomicGetterHelperFnMap(Ty)))
return HelperFn;
-
ASTContext &C = getContext();
- IdentifierInfo *II
- = &CGM.getContext().Idents.get("__copy_helper_atomic_property_");
- FunctionDecl *FD = FunctionDecl::Create(C,
- C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false,
- false);
+ IdentifierInfo *II =
+ &CGM.getContext().Idents.get("__copy_helper_atomic_property_");
+ QualType ReturnTy = C.VoidTy;
QualType DestTy = C.getPointerType(Ty);
QualType SrcTy = Ty;
SrcTy.addConst();
SrcTy = C.getPointerType(SrcTy);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(DestTy);
+ ArgTys.push_back(SrcTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
+
FunctionArgList args;
- ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- DestTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy,
+ ImplicitParamDecl::Other);
args.push_back(&DstDecl);
- ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- SrcTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy,
+ ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *Fn =
- llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
- "__copy_helper_atomic_property_", &CGM.getModule());
+ llvm::Function *Fn = llvm::Function::Create(
+ LTy, llvm::GlobalValue::InternalLinkage, "__copy_helper_atomic_property_",
+ &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
VK_RValue, SourceLocation());
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 2b6d895f93..d91eb43ca3 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -23,9 +23,9 @@
#include "clang/AST/DeclObjC.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtObjC.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/LangOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
@@ -7188,15 +7188,21 @@ CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name,
Weak ? llvm::GlobalValue::ExternalWeakLinkage
: llvm::GlobalValue::ExternalLinkage;
-
-
llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name);
- if (!GV) {
- GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABITy,
- false, L, nullptr, Name);
+ if (!GV || GV->getType() != ObjCTypes.ClassnfABITy->getPointerTo()) {
+ auto *NewGV = new llvm::GlobalVariable(ObjCTypes.ClassnfABITy, false, L,
+ nullptr, Name);
if (DLLImport)
- GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+ NewGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+
+ if (GV) {
+ GV->replaceAllUsesWith(
+ llvm::ConstantExpr::getBitCast(NewGV, GV->getType()));
+ GV->eraseFromParent();
+ }
+ GV = NewGV;
+ CGM.getModule().getGlobalList().push_back(GV);
}
assert(GV->getLinkage() == L);
diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp
index 8390bca737..4b6f24a03f 100644
--- a/lib/CodeGen/CGObjCRuntime.cpp
+++ b/lib/CodeGen/CGObjCRuntime.cpp
@@ -296,7 +296,7 @@ void CGObjCRuntime::EmitInitOfCatchParam(CodeGenFunction &CGF,
switch (paramDecl->getType().getQualifiers().getObjCLifetime()) {
case Qualifiers::OCL_Strong:
exn = CGF.EmitARCRetainNonBlock(exn);
- // fallthrough
+ LLVM_FALLTHROUGH;
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 1da19a90c3..7f6f595dd5 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -62,6 +62,11 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
case BuiltinType::OCLReserveID:
return llvm::PointerType::get(
llvm::StructType::create(Ctx, "opencl.reserve_id_t"), AddrSpc);
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ case BuiltinType::Id: \
+ return llvm::PointerType::get( \
+ llvm::StructType::create(Ctx, "opencl." #ExtType), AddrSpc);
+#include "clang/Basic/OpenCLExtensionTypes.def"
}
}
@@ -118,25 +123,6 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
}
-// Get the block literal from an expression derived from the block expression.
-// OpenCL v2.0 s6.12.5:
-// Block variable declarations are implicitly qualified with const. Therefore
-// all block variables must be initialized at declaration time and may not be
-// reassigned.
-static const BlockExpr *getBlockExpr(const Expr *E) {
- if (auto Cast = dyn_cast<CastExpr>(E)) {
- E = Cast->getSubExpr();
- }
- if (auto DR = dyn_cast<DeclRefExpr>(E)) {
- E = cast<VarDecl>(DR->getDecl())->getInit();
- }
- E = E->IgnoreImplicit();
- if (auto Cast = dyn_cast<CastExpr>(E)) {
- E = Cast->getSubExpr();
- }
- return cast<BlockExpr>(E);
-}
-
/// Record emitted llvm invoke function and llvm block literal for the
/// corresponding block expression.
void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
@@ -151,15 +137,21 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
EnqueuedBlockMap[E].Kernel = nullptr;
}
-llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
- return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
-}
-
CGOpenCLRuntime::EnqueuedBlockInfo
CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
CGF.EmitScalarExpr(E);
- const BlockExpr *Block = getBlockExpr(E);
+ // The block literal may be assigned to a const variable. Chasing down
+ // to get the block literal.
+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
+ E = cast<VarDecl>(DR->getDecl())->getInit();
+ }
+ E = E->IgnoreImplicit();
+ if (auto Cast = dyn_cast<CastExpr>(E)) {
+ E = Cast->getSubExpr();
+ }
+ auto *Block = cast<BlockExpr>(E);
+
assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
"Block expression not emitted");
diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
index a513340827..3da55af065 100644
--- a/lib/CodeGen/CGOpenCLRuntime.h
+++ b/lib/CodeGen/CGOpenCLRuntime.h
@@ -91,10 +91,6 @@ public:
/// \param Block block literal emitted for the block expression.
void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
llvm::Value *Block);
-
- /// \return LLVM block invoke function emitted for an expression derived from
- /// the block expression.
- llvm::Function *getInvokeFunction(const Expr *E);
};
}
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index d421729faa..66f0783e27 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1223,6 +1223,17 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
void CGOpenMPRuntime::clear() {
InternalVars.clear();
+ // Clean non-target variable declarations possibly used only in debug info.
+ for (const auto &Data : EmittedNonTargetVariables) {
+ if (!Data.getValue().pointsToAliveValue())
+ continue;
+ auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
+ if (!GV)
+ continue;
+ if (!GV->isDeclaration() || GV->getNumUses() > 0)
+ continue;
+ GV->eraseFromParent();
+ }
}
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
@@ -1456,7 +1467,9 @@ createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
- llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
+ unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
+ FlagsTy FlagsKey(Flags, Reserved2Flags);
+ llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
if (!Entry) {
if (!DefaultOpenMPPSource) {
// Initialize default location for psource field of ident_t structure of
@@ -1469,22 +1482,47 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
}
- llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty),
- llvm::ConstantInt::get(CGM.Int32Ty, Flags),
- llvm::ConstantInt::getNullValue(CGM.Int32Ty),
- llvm::ConstantInt::getNullValue(CGM.Int32Ty),
- DefaultOpenMPPSource};
+ llvm::Constant *Data[] = {
+ llvm::ConstantInt::getNullValue(CGM.Int32Ty),
+ llvm::ConstantInt::get(CGM.Int32Ty, Flags),
+ llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
+ llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
llvm::GlobalValue *DefaultOpenMPLocation =
- createGlobalStruct(CGM, IdentQTy, /*IsConstant=*/false, Data, "",
+ createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
llvm::GlobalValue::PrivateLinkage);
DefaultOpenMPLocation->setUnnamedAddr(
llvm::GlobalValue::UnnamedAddr::Global);
- OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
+ OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
}
return Address(Entry, Align);
}
+void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
+ bool AtCurrentPoint) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
+
+ llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
+ if (AtCurrentPoint) {
+ Elem.second.ServiceInsertPt = new llvm::BitCastInst(
+ Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
+ } else {
+ Elem.second.ServiceInsertPt =
+ new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
+ Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
+ }
+}
+
+void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ if (Elem.second.ServiceInsertPt) {
+ llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
+ Elem.second.ServiceInsertPt = nullptr;
+ Ptr->eraseFromParent();
+ }
+}
+
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
SourceLocation Loc,
unsigned Flags) {
@@ -1511,8 +1549,10 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
Elem.second.DebugLoc = AI.getPointer();
LocValue = AI;
+ if (!Elem.second.ServiceInsertPt)
+ setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+ CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
CGF.getTypeSize(IdentQTy));
}
@@ -1582,21 +1622,25 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
// kmpc_global_thread_num(ident_t *loc).
// Generate thread id value and cache this value for use across the
// function.
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ if (!Elem.second.ServiceInsertPt)
+ setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+ CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
llvm::CallInst *Call = CGF.Builder.CreateCall(
createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
emitUpdateLocation(CGF, Loc));
Call->setCallingConv(CGF.getRuntimeCC());
- auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = Call;
return Call;
}
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
- if (OpenMPLocThreadIDMap.count(CGF.CurFn))
+ if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
+ clearLocThreadIdInsertPt(CGF);
OpenMPLocThreadIDMap.erase(CGF.CurFn);
+ }
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
for(auto *D : FunctionUDRMap[CGF.CurFn])
UDRMap.erase(D);
@@ -2470,8 +2514,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
VD = VD->getDefinition(CGM.getContext());
- if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
- ThreadPrivateWithDefinition.insert(VD);
+ if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
QualType ASTTy = VD->getType();
llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
@@ -2617,7 +2660,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
return CGM.getLangOpts().OpenMPIsDevice;
VD = VD->getDefinition(CGM.getContext());
- if (VD && !DeclareTargetWithDefinition.insert(VD).second)
+ if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
return CGM.getLangOpts().OpenMPIsDevice;
QualType ASTTy = VD->getType();
@@ -3171,13 +3214,7 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
}
-void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
- OpenMPDirectiveKind Kind, bool EmitChecks,
- bool ForceSimpleCall) {
- if (!CGF.HaveInsertPoint())
- return;
- // Build call __kmpc_cancel_barrier(loc, thread_id);
- // Build call __kmpc_barrier(loc, thread_id);
+unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
unsigned Flags;
if (Kind == OMPD_for)
Flags = OMP_IDENT_BARRIER_IMPL_FOR;
@@ -3189,6 +3226,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
Flags = OMP_IDENT_BARRIER_EXPL;
else
Flags = OMP_IDENT_BARRIER_IMPL;
+ return Flags;
+}
+
+void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDirectiveKind Kind, bool EmitChecks,
+ bool ForceSimpleCall) {
+ if (!CGF.HaveInsertPoint())
+ return;
+ // Build call __kmpc_cancel_barrier(loc, thread_id);
+ // Build call __kmpc_barrier(loc, thread_id);
+ unsigned Flags = getDefaultFlagsForBarriers(Kind);
// Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
// thread_id);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
@@ -3261,6 +3309,18 @@ bool CGOpenMPRuntime::isStaticNonchunked(
return Schedule == OMP_dist_sch_static;
}
+bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const {
+ OpenMPSchedType Schedule =
+ getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
+ return Schedule == OMP_sch_static_chunked;
+}
+
+bool CGOpenMPRuntime::isStaticChunked(
+ OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+ return Schedule == OMP_dist_sch_static_chunked;
+}
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
OpenMPSchedType Schedule =
@@ -3881,6 +3941,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
llvm::LLVMContext &C = M.getContext();
SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
OrderedEntries(OffloadEntriesInfoManager.size());
+ llvm::SmallVector<StringRef, 16> ParentFunctions(
+ OffloadEntriesInfoManager.size());
// Auxiliary methods to create metadata values and strings.
auto &&GetMDInt = [this](unsigned V) {
@@ -3895,7 +3957,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Create function that emits metadata for each target region entry;
auto &&TargetRegionMetadataEmitter =
- [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
+ [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
unsigned DeviceID, unsigned FileID, StringRef ParentName,
unsigned Line,
const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
@@ -3915,6 +3977,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Save this entry in the right position of the ordered entries array.
OrderedEntries[E.getOrder()] = &E;
+ ParentFunctions[E.getOrder()] = ParentName;
// Add metadata to the named metadata node.
MD->addOperand(llvm::MDNode::get(C, Ops));
@@ -3956,6 +4019,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
E)) {
if (!CE->getID() || !CE->getAddress()) {
+ // Do not blame the entry if the parent funtion is not emitted.
+ StringRef FnName = ParentFunctions[CE->getOrder()];
+ if (!CGM.GetGlobalValue(FnName))
+ continue;
unsigned DiagID = CGM.getDiags().getCustomDiagID(
DiagnosticsEngine::Error,
"Offloading entry for target region is incorrect: either the "
@@ -5215,8 +5282,8 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
LBLVal.getPointer(),
UBLVal.getPointer(),
CGF.EmitLoadOfScalar(StLVal, Loc),
- llvm::ConstantInt::getNullValue(
- CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
llvm::ConstantInt::getSigned(
CGF.IntTy, Data.Schedule.getPointer()
? Data.Schedule.getInt() ? NumTasks : Grainsize
@@ -6735,10 +6802,11 @@ private:
}
// Check if the length evaluates to 1.
- llvm::APSInt ConstLength;
- if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
+ Expr::EvalResult Result;
+ if (!Length->EvaluateAsInt(Result, CGF.getContext()))
return true; // Can have more that size 1.
+ llvm::APSInt ConstLength = Result.Val.getInt();
return ConstLength.getSExtValue() != 1;
}
@@ -7489,6 +7557,82 @@ public:
}
}
+ /// Emit capture info for lambdas for variables captured by reference.
+ void generateInfoForLambdaCaptures(
+ const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
+ MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
+ MapFlagsArrayTy &Types,
+ llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
+ const auto *RD = VD->getType()
+ .getCanonicalType()
+ .getNonReferenceType()
+ ->getAsCXXRecordDecl();
+ if (!RD || !RD->isLambda())
+ return;
+ Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
+ LValue VDLVal = CGF.MakeAddrLValue(
+ VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+ llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+ FieldDecl *ThisCapture = nullptr;
+ RD->getCaptureFields(Captures, ThisCapture);
+ if (ThisCapture) {
+ LValue ThisLVal =
+ CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+ LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
+ LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
+ BasePointers.push_back(ThisLVal.getPointer());
+ Pointers.push_back(ThisLValVal.getPointer());
+ Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
+ Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ }
+ for (const LambdaCapture &LC : RD->captures()) {
+ if (LC.getCaptureKind() != LCK_ByRef)
+ continue;
+ const VarDecl *VD = LC.getCapturedVar();
+ auto It = Captures.find(VD);
+ assert(It != Captures.end() && "Found lambda capture without field.");
+ LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+ LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
+ LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
+ BasePointers.push_back(VarLVal.getPointer());
+ Pointers.push_back(VarLValVal.getPointer());
+ Sizes.push_back(CGF.getTypeSize(
+ VD->getType().getCanonicalType().getNonReferenceType()));
+ Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ }
+ }
+
+ /// Set correct indices for lambdas captures.
+ void adjustMemberOfForLambdaCaptures(
+ const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
+ MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
+ MapFlagsArrayTy &Types) const {
+ for (unsigned I = 0, E = Types.size(); I < E; ++I) {
+ // Set correct member_of idx for all implicit lambda captures.
+ if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
+ continue;
+ llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
+ assert(BasePtr && "Unable to find base lambda address.");
+ int TgtIdx = -1;
+ for (unsigned J = I; J > 0; --J) {
+ unsigned Idx = J - 1;
+ if (Pointers[Idx] != BasePtr)
+ continue;
+ TgtIdx = Idx;
+ break;
+ }
+ assert(TgtIdx != -1 && "Unable to find parent lambda.");
+ // All other current entries will be MEMBER_OF the combined entry
+ // (except for PTR_AND_OBJ entries which do not have a placeholder value
+ // 0xFFFF in the MEMBER_OF field).
+ OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
+ setCorrectMemberOfFlag(Types[I], MemberOfFlag);
+ }
+ }
+
/// Generate the base pointers, section pointers, sizes and map types
/// associated to a given capture.
void generateInfoForCapture(const CapturedStmt::Capture *Cap,
@@ -8061,6 +8205,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// Get mappable expression information.
MappableExprsHandler MEHandler(D, CGF);
+ llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
auto RI = CS.getCapturedRecordDecl()->field_begin();
auto CV = CapturedVars.begin();
@@ -8090,6 +8235,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
if (CurBasePointers.empty())
MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
CurPointers, CurSizes, CurMapTypes);
+ // Generate correct mapping for variables captured by reference in
+ // lambdas.
+ if (CI->capturesVariable())
+ MEHandler.generateInfoForLambdaCaptures(
+ CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
+ CurMapTypes, LambdaPointers);
}
// We expect to have at least an element of information for this capture.
assert(!CurBasePointers.empty() &&
@@ -8111,6 +8262,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
Sizes.append(CurSizes.begin(), CurSizes.end());
MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
}
+ // Adjust MEMBER_OF flags for the lambdas captures.
+ MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
+ Pointers, MapTypes);
// Map other list items in the map clause which are not captured variables
// but "declare target link" global variables.
MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
@@ -8304,14 +8458,15 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
if (!CGM.getLangOpts().OpenMPIsDevice)
return false;
- // Try to detect target regions in the function.
const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
+ StringRef Name = CGM.getMangledName(GD);
+ // Try to detect target regions in the function.
if (const auto *FD = dyn_cast<FunctionDecl>(VD))
- scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD));
+ scanForTargetRegionsFunctions(FD->getBody(), Name);
// Do not to emit function if it is not marked as declare target.
return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
- AlreadyEmittedTargetFunctions.count(VD->getCanonicalDecl()) == 0;
+ AlreadyEmittedTargetFunctions.count(Name) == 0;
}
bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
@@ -8348,54 +8503,62 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
llvm::Constant *Addr) {
- if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
- OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
- StringRef VarName;
- CharUnits VarSize;
- llvm::GlobalValue::LinkageTypes Linkage;
- switch (*Res) {
- case OMPDeclareTargetDeclAttr::MT_To:
- Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
- VarName = CGM.getMangledName(VD);
- if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
- VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
- assert(!VarSize.isZero() && "Expected non-zero size of the variable");
- } else {
- VarSize = CharUnits::Zero();
- }
- Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
- // Temp solution to prevent optimizations of the internal variables.
- if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
- std::string RefName = getName({VarName, "ref"});
- if (!CGM.GetGlobalValue(RefName)) {
- llvm::Constant *AddrRef =
- getOrCreateInternalVariable(Addr->getType(), RefName);
- auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
- GVAddrRef->setConstant(/*Val=*/true);
- GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
- GVAddrRef->setInitializer(Addr);
- CGM.addCompilerUsedGlobal(GVAddrRef);
- }
- }
- break;
- case OMPDeclareTargetDeclAttr::MT_Link:
- Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
- if (CGM.getLangOpts().OpenMPIsDevice) {
- VarName = Addr->getName();
- Addr = nullptr;
- } else {
- VarName = getAddrOfDeclareTargetLink(VD).getName();
- Addr =
- cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
+ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
+ if (!Res) {
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ // Register non-target variables being emitted in device code (debug info
+ // may cause this).
+ StringRef VarName = CGM.getMangledName(VD);
+ EmittedNonTargetVariables.try_emplace(VarName, Addr);
+ }
+ return;
+ }
+ // Register declare target variables.
+ OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
+ StringRef VarName;
+ CharUnits VarSize;
+ llvm::GlobalValue::LinkageTypes Linkage;
+ switch (*Res) {
+ case OMPDeclareTargetDeclAttr::MT_To:
+ Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
+ VarName = CGM.getMangledName(VD);
+ if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
+ VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
+ assert(!VarSize.isZero() && "Expected non-zero size of the variable");
+ } else {
+ VarSize = CharUnits::Zero();
+ }
+ Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
+ // Temp solution to prevent optimizations of the internal variables.
+ if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
+ std::string RefName = getName({VarName, "ref"});
+ if (!CGM.GetGlobalValue(RefName)) {
+ llvm::Constant *AddrRef =
+ getOrCreateInternalVariable(Addr->getType(), RefName);
+ auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
+ GVAddrRef->setConstant(/*Val=*/true);
+ GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
+ GVAddrRef->setInitializer(Addr);
+ CGM.addCompilerUsedGlobal(GVAddrRef);
}
- VarSize = CGM.getPointerSize();
- Linkage = llvm::GlobalValue::WeakAnyLinkage;
- break;
}
- OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
- VarName, Addr, VarSize, Flags, Linkage);
+ break;
+ case OMPDeclareTargetDeclAttr::MT_Link:
+ Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ VarName = Addr->getName();
+ Addr = nullptr;
+ } else {
+ VarName = getAddrOfDeclareTargetLink(VD).getName();
+ Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
+ }
+ VarSize = CGM.getPointerSize();
+ Linkage = llvm::GlobalValue::WeakAnyLinkage;
+ break;
}
+ OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
+ VarName, Addr, VarSize, Flags, Linkage);
}
bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
@@ -8422,6 +8585,12 @@ void CGOpenMPRuntime::emitDeferredTargetDecls() const {
}
}
+void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+ assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+ " Expected target-based directive.");
+}
+
CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
CodeGenModule &CGM)
: CGM(CGM) {
@@ -8440,21 +8609,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
return true;
+ StringRef Name = CGM.getMangledName(GD);
const auto *D = cast<FunctionDecl>(GD.getDecl());
- const FunctionDecl *FD = D->getCanonicalDecl();
// Do not to emit function if it is marked as declare target as it was already
// emitted.
if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
- if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) {
- if (auto *F = dyn_cast_or_null<llvm::Function>(
- CGM.GetGlobalValue(CGM.getMangledName(GD))))
+ if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
+ if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
return !F->isDeclaration();
return false;
}
return true;
}
- return !AlreadyEmittedTargetFunctions.insert(FD).second;
+ return !AlreadyEmittedTargetFunctions.insert(Name).second;
}
llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
@@ -9002,8 +9170,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
ParamAttrTy &ParamAttr = ParamAttrs[Pos];
ParamAttr.Kind = Linear;
if (*SI) {
- if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
- Expr::SE_AllowSideEffects)) {
+ Expr::EvalResult Result;
+ if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
if (const auto *DRE =
cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
@@ -9012,6 +9180,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
ParamPositions[StridePVD->getCanonicalDecl()]);
}
}
+ } else {
+ ParamAttr.StrideOrArg = Result.Val.getInt();
}
}
++SI;
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 35f75a9ec0..d9ac5df36b 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -19,8 +19,8 @@
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/ValueHandle.h"
@@ -278,12 +278,39 @@ protected:
/// stored.
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
+ void setLocThreadIdInsertPt(CodeGenFunction &CGF,
+ bool AtCurrentPoint = false);
+ void clearLocThreadIdInsertPt(CodeGenFunction &CGF);
+
+ /// Check if the default location must be constant.
+ /// Default is false to support OMPT/OMPD.
+ virtual bool isDefaultLocationConstant() const { return false; }
+
+ /// Returns additional flags that can be stored in reserved_2 field of the
+ /// default location.
+ virtual unsigned getDefaultLocationReserved2Flags() const { return 0; }
+
+ /// Returns default flags for the barriers depending on the directive, for
+ /// which this barier is going to be emitted.
+ static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind);
+
+ /// Get the LLVM type for the critical name.
+ llvm::ArrayType *getKmpCriticalNameTy() const {return KmpCriticalNameTy;}
+
+ /// Returns corresponding lock object for the specified critical region
+ /// name. If the lock object does not exist it is created, otherwise the
+ /// reference to the existing copy is returned.
+ /// \param CriticalName Name of the critical region.
+ ///
+ llvm::Value *getCriticalRegionLock(StringRef CriticalName);
+
private:
/// Default const ident_t object used for initialization of all other
/// ident_t objects.
llvm::Constant *DefaultOpenMPPSource = nullptr;
+ using FlagsTy = std::pair<unsigned, unsigned>;
/// Map of flags and corresponding default locations.
- typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
+ using OpenMPDefaultLocMapTy = llvm::DenseMap<FlagsTy, llvm::Value *>;
OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
Address getOrCreateDefaultLocation(unsigned Flags);
@@ -300,6 +327,8 @@ private:
struct DebugLocThreadIdTy {
llvm::Value *DebugLoc;
llvm::Value *ThreadID;
+ /// Insert point for the service instructions.
+ llvm::AssertingVH<llvm::Instruction> ServiceInsertPt = nullptr;
};
/// Map of local debug location, ThreadId and functions.
typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy>
@@ -596,7 +625,11 @@ private:
OffloadEntriesInfoManagerTy OffloadEntriesInfoManager;
bool ShouldMarkAsGlobal = true;
- llvm::SmallDenseSet<const Decl *> AlreadyEmittedTargetFunctions;
+ /// List of the emitted functions.
+ llvm::StringSet<> AlreadyEmittedTargetFunctions;
+ /// List of the global variables with their addresses that should not be
+ /// emitted for the target.
+ llvm::StringMap<llvm::WeakTrackingVH> EmittedNonTargetVariables;
/// List of variables that can become declare target implicitly and, thus,
/// must be emitted.
@@ -673,10 +706,10 @@ private:
const llvm::Twine &Name);
/// Set of threadprivate variables with the generated initializer.
- llvm::SmallPtrSet<const VarDecl *, 4> ThreadPrivateWithDefinition;
+ llvm::StringSet<> ThreadPrivateWithDefinition;
/// Set of declare target variables with the generated initializer.
- llvm::SmallPtrSet<const VarDecl *, 4> DeclareTargetWithDefinition;
+ llvm::StringSet<> DeclareTargetWithDefinition;
/// Emits initialization code for the threadprivate variables.
/// \param VDAddr Address of the global variable \a VD.
@@ -688,13 +721,6 @@ private:
llvm::Value *Ctor, llvm::Value *CopyCtor,
llvm::Value *Dtor, SourceLocation Loc);
- /// Returns corresponding lock object for the specified critical region
- /// name. If the lock object does not exist it is created, otherwise the
- /// reference to the existing copy is returned.
- /// \param CriticalName Name of the critical region.
- ///
- llvm::Value *getCriticalRegionLock(StringRef CriticalName);
-
struct TaskResultTy {
llvm::Value *NewTask = nullptr;
llvm::Value *TaskEntry = nullptr;
@@ -884,6 +910,20 @@ public:
virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,
bool Chunked) const;
+ /// Check if the specified \a ScheduleKind is static chunked.
+ /// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
+ /// \param Chunked True if chunk is specified in the clause.
+ ///
+ virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const;
+
+ /// Check if the specified \a ScheduleKind is static non-chunked.
+ /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause.
+ /// \param Chunked True if chunk is specified in the clause.
+ ///
+ virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,
+ bool Chunked) const;
+
/// Check if the specified \a ScheduleKind is dynamic.
/// This kind of worksharing directive is emitted without outer loop.
/// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
@@ -1500,7 +1540,7 @@ public:
/// schedule clause.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
- llvm::Value *&Chunk) const {}
+ const Expr *&ChunkExpr) const {}
/// Emits call of the outlined function with the provided arguments,
/// translating these arguments to correct target-specific arguments.
@@ -1517,12 +1557,23 @@ public:
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD);
- /// Marks the declaration as alread emitted for the device code and returns
+ /// Marks the declaration as already emitted for the device code and returns
/// true, if it was marked already, and false, otherwise.
bool markAsGlobalTarget(GlobalDecl GD);
/// Emit deferred declare target variables marked for deferred emission.
void emitDeferredTargetDecls() const;
+
+ /// Adjust some parameters for the target-based directives, like addresses of
+ /// the variables captured by reference in lambdas.
+ virtual void
+ adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) const;
+
+ /// Perform check on requires decl to ensure that target architecture
+ /// supports unified addressing
+ virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+ const OMPRequiresDecl *D) const {}
};
/// Class supports emissionof SIMD-only code.
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 14fd4a3113..b055132ef0 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -17,6 +17,7 @@
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace clang;
@@ -32,8 +33,8 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
/// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
OMPRTL_NVPTX__kmpc_spmd_kernel_init,
- /// Call to void __kmpc_spmd_kernel_deinit();
- OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
+ /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
/// Call to void __kmpc_kernel_prepare_parallel(void
/// *outlined_function, int16_t
/// IsOMPRuntimeInitialized);
@@ -61,31 +62,21 @@ enum OpenMPRTLFunctionNVPTX {
/// lane_offset, int16_t shortCircuit),
/// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
OMPRTL_NVPTX__kmpc_parallel_reduce_nowait,
- /// Call to __kmpc_nvptx_simd_reduce_nowait(kmp_int32
- /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
- /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- /// lane_offset, int16_t shortCircuit),
- /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
- OMPRTL_NVPTX__kmpc_simd_reduce_nowait,
- /// Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
- /// int32_t num_vars, size_t reduce_size, void *reduce_data,
- /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t
- /// lane_offset, int16_t shortCircuit),
- /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num),
- /// void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad,
- /// int32_t index, int32_t width),
- /// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t
- /// index, int32_t width, int32_t reduce))
- OMPRTL_NVPTX__kmpc_teams_reduce_nowait,
+ /// Call to __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32
+ /// global_tid, kmp_critical_name *lck)
+ OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple,
+ /// Call to __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc,
+ /// kmp_int32 global_tid, kmp_critical_name *lck)
+ OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple,
/// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
OMPRTL_NVPTX__kmpc_end_reduce_nowait,
/// Call to void __kmpc_data_sharing_init_stack();
OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
/// Call to void __kmpc_data_sharing_init_stack_spmd();
OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
- /// Call to void* __kmpc_data_sharing_push_stack(size_t size,
+ /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
/// int16_t UseSharedMemory);
- OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
/// Call to void __kmpc_data_sharing_pop_stack(void *a);
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
/// Call to void __kmpc_begin_sharing_variables(void ***args,
@@ -100,6 +91,13 @@ enum OpenMPRTLFunctionNVPTX {
OMPRTL_NVPTX__kmpc_parallel_level,
/// Call to int8_t __kmpc_is_spmd_exec_mode();
OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
+ /// Call to void __kmpc_get_team_static_memory(const void *buf, size_t size,
+ /// int16_t is_shared, const void **res);
+ OMPRTL_NVPTX__kmpc_get_team_static_memory,
+ /// Call to void __kmpc_restore_team_static_memory(int16_t is_shared);
+ OMPRTL_NVPTX__kmpc_restore_team_static_memory,
+ // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+ OMPRTL__kmpc_barrier,
};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -142,19 +140,35 @@ public:
/// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry
/// to the target region and used by containing directives such as 'parallel'
/// to emit optimized code.
-class ExecutionModeRAII {
+class ExecutionRuntimeModesRAII {
private:
- CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
- CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
+ CGOpenMPRuntimeNVPTX::ExecutionMode SavedExecMode =
+ CGOpenMPRuntimeNVPTX::EM_Unknown;
+ CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode;
+ bool SavedRuntimeMode = false;
+ bool *RuntimeMode = nullptr;
public:
- ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD)
- : Mode(Mode) {
- SavedMode = Mode;
- Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD
- : CGOpenMPRuntimeNVPTX::EM_NonSPMD;
+ /// Constructor for Non-SPMD mode.
+ ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode)
+ : ExecMode(ExecMode) {
+ SavedExecMode = ExecMode;
+ ExecMode = CGOpenMPRuntimeNVPTX::EM_NonSPMD;
+ }
+ /// Constructor for SPMD mode.
+ ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode,
+ bool &RuntimeMode, bool FullRuntimeMode)
+ : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
+ SavedExecMode = ExecMode;
+ SavedRuntimeMode = RuntimeMode;
+ ExecMode = CGOpenMPRuntimeNVPTX::EM_SPMD;
+ RuntimeMode = FullRuntimeMode;
+ }
+ ~ExecutionRuntimeModesRAII() {
+ ExecMode = SavedExecMode;
+ if (RuntimeMode)
+ *RuntimeMode = SavedRuntimeMode;
}
- ~ExecutionModeRAII() { Mode = SavedMode; }
};
/// GPU Configuration: This information can be derived from cuda registers,
@@ -169,16 +183,34 @@ enum MachineConfiguration : unsigned {
LaneIDMask = WarpSize - 1,
/// Global memory alignment for performance.
- GlobalMemoryAlignment = 256,
-};
+ GlobalMemoryAlignment = 128,
-enum NamedBarrier : unsigned {
- /// Synchronize on this barrier #ID using a named barrier primitive.
- /// Only the subset of active threads in a parallel region arrive at the
- /// barrier.
- NB_Parallel = 1,
+ /// Maximal size of the shared memory buffer.
+ SharedMemorySize = 128,
};
+static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
+ RefExpr = RefExpr->IgnoreParens();
+ if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
+ const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = TempASE->getBase()->IgnoreParenImpCasts();
+ RefExpr = Base;
+ } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
+ const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+ Base = TempOASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = TempASE->getBase()->IgnoreParenImpCasts();
+ RefExpr = Base;
+ }
+ RefExpr = RefExpr->IgnoreParenImpCasts();
+ if (const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
+ return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
+ const auto *ME = cast<MemberExpr>(RefExpr);
+ return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
+}
+
typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
return P1.first > P2.first;
@@ -186,20 +218,31 @@ static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
static RecordDecl *buildRecordForGlobalizedVars(
ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,
+ ArrayRef<const ValueDecl *> EscapedDeclsForTeams,
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
&MappedDeclsFields) {
- if (EscapedDecls.empty())
+ if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
return nullptr;
SmallVector<VarsDataTy, 4> GlobalizedVars;
for (const ValueDecl *D : EscapedDecls)
+ GlobalizedVars.emplace_back(
+ CharUnits::fromQuantity(std::max(
+ C.getDeclAlign(D).getQuantity(),
+ static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))),
+ D);
+ for (const ValueDecl *D : EscapedDeclsForTeams)
GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
stable_sort_comparator);
// Build struct _globalized_locals_ty {
- // /* globalized vars */
+ // /* globalized vars */[WarSize] align (max(decl_align,
+ // GlobalMemoryAlignment))
+ // /* globalized vars */ for EscapedDeclsForTeams
// };
RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
GlobalizedRD->startDefinition();
+ llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped(
+ EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
for (const auto &Pair : GlobalizedVars) {
const ValueDecl *VD = Pair.second;
QualType Type = VD->getType();
@@ -208,19 +251,39 @@ static RecordDecl *buildRecordForGlobalizedVars(
else
Type = Type.getNonReferenceType();
SourceLocation Loc = VD->getLocation();
- auto *Field =
- FieldDecl::Create(C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
- C.getTrivialTypeSourceInfo(Type, SourceLocation()),
- /*BW=*/nullptr, /*Mutable=*/false,
- /*InitStyle=*/ICIS_NoInit);
- Field->setAccess(AS_public);
- GlobalizedRD->addDecl(Field);
- if (VD->hasAttrs()) {
- for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
- E(VD->getAttrs().end());
- I != E; ++I)
- Field->addAttr(*I);
+ FieldDecl *Field;
+ if (SingleEscaped.count(VD)) {
+ Field = FieldDecl::Create(
+ C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+ C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ if (VD->hasAttrs()) {
+ for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
+ E(VD->getAttrs().end());
+ I != E; ++I)
+ Field->addAttr(*I);
+ }
+ } else {
+ llvm::APInt ArraySize(32, WarpSize);
+ Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0);
+ Field = FieldDecl::Create(
+ C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+ C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(),
+ static_cast<CharUnits::QuantityType>(
+ GlobalMemoryAlignment)));
+ Field->addAttr(AlignedAttr::CreateImplicit(
+ C, AlignedAttr::GNU_aligned, /*IsAlignmentExpr=*/true,
+ IntegerLiteral::Create(C, Align,
+ C.getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation())));
}
+ GlobalizedRD->addDecl(Field);
MappedDeclsFields.try_emplace(VD, Field);
}
GlobalizedRD->completeDefinition();
@@ -256,9 +319,11 @@ class CheckVarsEscapingDeclContext final
const auto *Attr = FD->getAttr<OMPCaptureKindAttr>();
if (!Attr)
return;
- if (!isOpenMPPrivate(
- static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) ||
- Attr->getCaptureKind() == OMPC_map)
+ if (((Attr->getCaptureKind() != OMPC_map) &&
+ !isOpenMPPrivate(
+ static_cast<OpenMPClauseKind>(Attr->getCaptureKind()))) ||
+ ((Attr->getCaptureKind() == OMPC_map) &&
+ !FD->getType()->isAnyPointerType()))
return;
}
if (!FD->getType()->isReferenceType()) {
@@ -340,15 +405,24 @@ class CheckVarsEscapingDeclContext final
}
}
- void buildRecordForGlobalizedVars() {
+ void buildRecordForGlobalizedVars(bool IsInTTDRegion) {
assert(!GlobalizedRD &&
"Record for globalized variables is built already.");
+ ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
+ if (IsInTTDRegion)
+ EscapedDeclsForTeams = EscapedDecls.getArrayRef();
+ else
+ EscapedDeclsForParallel = EscapedDecls.getArrayRef();
GlobalizedRD = ::buildRecordForGlobalizedVars(
- CGF.getContext(), EscapedDecls.getArrayRef(), MappedDeclsFields);
+ CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
+ MappedDeclsFields);
}
public:
- CheckVarsEscapingDeclContext(CodeGenFunction &CGF) : CGF(CGF) {}
+ CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
+ ArrayRef<const ValueDecl *> TeamsReductions)
+ : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
+ }
virtual ~CheckVarsEscapingDeclContext() = default;
void VisitDeclStmt(const DeclStmt *S) {
if (!S)
@@ -490,9 +564,9 @@ public:
/// Returns the record that handles all the escaped local variables and used
/// instead of their original storage.
- const RecordDecl *getGlobalizedRecord() {
+ const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {
if (!GlobalizedRD)
- buildRecordForGlobalizedVars();
+ buildRecordForGlobalizedVars(IsInTTDRegion);
return GlobalizedRD;
}
@@ -568,29 +642,15 @@ static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
/// Get barrier to synchronize all threads in a block.
static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
- CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
-}
-
-/// Get barrier #ID to synchronize selected (multiple of warp size) threads in
-/// a CTA.
-static void getNVPTXBarrier(CodeGenFunction &CGF, int ID,
- llvm::Value *NumThreads) {
- CGBuilderTy &Bld = CGF.Builder;
- llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads};
- CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier),
- Args);
+ llvm::Function *F = llvm::Intrinsic::getDeclaration(
+ &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0);
+ F->addFnAttr(llvm::Attribute::Convergent);
+ CGF.EmitRuntimeCall(F);
}
/// Synchronize all GPU threads in a block.
static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
-/// Synchronize worker threads in a parallel region.
-static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) {
- return getNVPTXBarrier(CGF, NB_Parallel, NumThreads);
-}
-
/// Get the value of the thread_limit clause in the teams directive.
/// For the 'generic' execution mode, the runtime encodes thread_limit in
/// the launch parameters, always starting thread_limit+warpSize threads per
@@ -652,12 +712,58 @@ getDataSharingMode(CodeGenModule &CGM) {
: CGOpenMPRuntimeNVPTX::Generic;
}
+// Checks if the expression is constant or does not have non-trivial function
+// calls.
+static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+ // We can skip constant expressions.
+ // We can skip expressions with trivial calls or simple expressions.
+ return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
+ !E->hasNonTrivialCall(Ctx)) &&
+ !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
+}
+
/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
-/// iff there is only one.
-static const Stmt *getSingleCompoundChild(const Stmt *Body) {
- if (const auto *C = dyn_cast<CompoundStmt>(Body))
- if (C->size() == 1)
- return C->body_front();
+/// iff there is only one that is not evaluatable at the compile time.
+static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
+ if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
+ const Stmt *Child = nullptr;
+ for (const Stmt *S : C->body()) {
+ if (const auto *E = dyn_cast<Expr>(S)) {
+ if (isTrivial(Ctx, E))
+ continue;
+ }
+ // Some of the statements can be ignored.
+ if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
+ isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
+ continue;
+ // Analyze declarations.
+ if (const auto *DS = dyn_cast<DeclStmt>(S)) {
+ if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
+ if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
+ isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
+ isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
+ isa<UsingDirectiveDecl>(D) ||
+ isa<OMPDeclareReductionDecl>(D) ||
+ isa<OMPThreadPrivateDecl>(D))
+ return true;
+ const auto *VD = dyn_cast<VarDecl>(D);
+ if (!VD)
+ return false;
+ return VD->isConstexpr() ||
+ ((VD->getType().isTrivialType(Ctx) ||
+ VD->getType()->isReferenceType()) &&
+ (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
+ }))
+ continue;
+ }
+ // Found multiple children - cannot get the one child only.
+ if (Child)
+ return Body;
+ Child = S;
+ }
+ if (Child)
+ return Child;
+ }
return Body;
}
@@ -686,7 +792,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
const auto *CS = D.getInnermostCapturedStmt();
const auto *Body =
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
- const Stmt *ChildStmt = getSingleCompoundChild(Body);
+ const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
@@ -700,7 +806,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
/*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPParallelDirective(DKind) &&
@@ -781,10 +887,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
- return !hasParallelIfNumThreadsClause(Ctx, D);
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd:
- // Distribute with lastprivates requires non-SPMD execution mode.
return !hasParallelIfNumThreadsClause(Ctx, D);
case OMPD_target_simd:
case OMPD_target_teams_distribute:
@@ -861,7 +965,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
const auto *CS = D.getInnermostCapturedStmt();
const auto *Body =
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
- const Stmt *ChildStmt = getSingleCompoundChild(Body);
+ const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
@@ -876,7 +980,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
/*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPWorksharingDirective(DKind) &&
@@ -888,7 +992,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
/*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPParallelDirective(DKind) &&
@@ -900,7 +1004,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
/*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPWorksharingDirective(DKind) &&
@@ -921,7 +1025,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
/*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPWorksharingDirective(DKind) &&
@@ -1069,7 +1173,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false);
+ ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
EntryFunctionState EST;
WorkerFunctionState WST(CGM, D.getBeginLoc());
Work.clear();
@@ -1085,17 +1189,35 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
: EST(EST), WST(WST) {}
void Enter(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitNonSPMDEntryHeader(CGF, EST, WST);
+ auto &RT =
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
+ RT.emitNonSPMDEntryHeader(CGF, EST, WST);
+ // Skip target region initialization.
+ RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitNonSPMDEntryFooter(CGF, EST);
+ auto &RT =
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
+ RT.clearLocThreadIdInsertPt(CGF);
+ RT.emitNonSPMDEntryFooter(CGF, EST);
}
} Action(EST, WST);
CodeGen.setAction(Action);
+ IsInTTDRegion = true;
+ // Reserve place for the globalized memory.
+ GlobalizedRecords.emplace_back();
+ if (!KernelStaticGlobalized) {
+ KernelStaticGlobalized = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+ }
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
+ IsInTTDRegion = false;
// Now change the name of the worker function to correspond to this target
// region's entry function.
@@ -1183,7 +1305,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true);
+ ExecutionRuntimeModesRAII ModeRAII(
+ CurrentExecutionMode, RequiresFullRuntime,
+ CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
+ !supportsLightweightRuntime(CGM.getContext(), D));
EntryFunctionState EST;
// Emit target region as a standalone region.
@@ -1199,14 +1324,30 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
: RT(RT), EST(EST), D(D) {}
void Enter(CodeGenFunction &CGF) override {
RT.emitSPMDEntryHeader(CGF, EST, D);
+ // Skip target region initialization.
+ RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
+ RT.clearLocThreadIdInsertPt(CGF);
RT.emitSPMDEntryFooter(CGF, EST);
}
} Action(*this, EST, D);
CodeGen.setAction(Action);
+ IsInTTDRegion = true;
+ // Reserve place for the globalized memory.
+ GlobalizedRecords.emplace_back();
+ if (!KernelStaticGlobalized) {
+ KernelStaticGlobalized = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+ }
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
+ IsInTTDRegion = false;
}
void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
@@ -1218,14 +1359,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
EST.ExitBB = CGF.createBasicBlock(".exit");
- // Initialize the OMP state in the runtime; called by all active threads.
- bool RequiresFullRuntime = CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
- !supportsLightweightRuntime(CGF.getContext(), D);
- llvm::Value *Args[] = {
- getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
- /*RequiresOMPRuntime=*/
- Bld.getInt16(RequiresFullRuntime ? 1 : 0),
- /*RequiresDataSharing=*/Bld.getInt16(RequiresFullRuntime ? 1 : 0)};
+ llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
+ /*RequiresOMPRuntime=*/
+ Bld.getInt16(RequiresFullRuntime ? 1 : 0),
+ /*RequiresDataSharing=*/Bld.getInt16(0)};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
@@ -1256,8 +1393,11 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF,
CGF.EmitBlock(OMPDeInitBB);
// DeInitialize the OMP state in the runtime; called by all active threads.
+ llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
+ CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None);
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(EST.ExitBB);
@@ -1344,6 +1484,8 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
// Signal start of parallel region.
CGF.EmitBlock(ExecuteBB);
+ // Skip initialization.
+ setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
// Process work items: outlined parallel functions.
for (llvm::Function *W : Work) {
@@ -1404,6 +1546,8 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
// Exit target region.
CGF.EmitBlock(ExitBB);
+ // Skip initialization.
+ clearLocThreadIdInsertPt(CGF);
}
/// Returns specified OpenMP runtime function for the current OpenMP
@@ -1440,11 +1584,12 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
break;
}
- case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
- // Build void __kmpc_spmd_kernel_deinit();
+ case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
+ // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty};
auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit");
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
@@ -1536,83 +1681,37 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait");
break;
}
- case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: {
- // Build int32_t kmpc_nvptx_simd_reduce_nowait(kmp_int32 global_tid,
- // kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
- // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- // lane_offset, int16_t Algorithm Version),
- // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
- llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
- CGM.Int16Ty, CGM.Int16Ty};
- auto *ShuffleReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
- auto *InterWarpCopyFnTy =
- llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
- /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
- CGM.Int32Ty,
- CGM.SizeTy,
- CGM.VoidPtrTy,
- ShuffleReduceFnTy->getPointerTo(),
- InterWarpCopyFnTy->getPointerTo()};
+ case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
+ // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty};
auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_simd_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
break;
}
- case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: {
- // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
- // int32_t num_vars, size_t reduce_size, void *reduce_data,
- // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- // lane_offset, int16_t shortCircuit),
- // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num),
- // void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad,
- // int32_t index, int32_t width),
- // void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad,
- // int32_t index, int32_t width, int32_t reduce))
- llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
- CGM.Int16Ty, CGM.Int16Ty};
- auto *ShuffleReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
- auto *InterWarpCopyFnTy =
- llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
- /*isVarArg=*/false);
- llvm::Type *CopyToScratchpadTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy,
- CGM.Int32Ty, CGM.Int32Ty};
- auto *CopyToScratchpadFnTy =
- llvm::FunctionType::get(CGM.VoidTy, CopyToScratchpadTypeParams,
- /*isVarArg=*/false);
- llvm::Type *LoadReduceTypeParams[] = {
- CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty};
- auto *LoadReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, LoadReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
- CGM.Int32Ty,
- CGM.SizeTy,
- CGM.VoidPtrTy,
- ShuffleReduceFnTy->getPointerTo(),
- InterWarpCopyFnTy->getPointerTo(),
- CopyToScratchpadFnTy->getPointerTo(),
- LoadReduceFnTy->getPointerTo()};
+ case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple: {
+ // Build __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32
+ // global_tid, kmp_critical_name *lck)
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), CGM.Int32Ty,
+ llvm::PointerType::getUnqual(getKmpCriticalNameTy())};
auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_simple");
break;
}
- case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
- // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {CGM.Int32Ty};
+ case OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple: {
+ // Build __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, kmp_int32
+ // global_tid, kmp_critical_name *lck)
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), CGM.Int32Ty,
+ llvm::PointerType::getUnqual(getKmpCriticalNameTy())};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_teams_end_reduce_nowait_simple");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
@@ -1630,14 +1729,14 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
break;
}
- case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
- // Build void *__kmpc_data_sharing_push_stack(size_t size,
+ case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
+ // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
// int16_t UseSharedMemory);
llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
+ FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
@@ -1687,6 +1786,33 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
break;
}
+ case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
+ // Build void __kmpc_get_team_static_memory(const void *buf, size_t size,
+ // int16_t is_shared, const void **res);
+ llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.SizeTy, CGM.Int16Ty,
+ CGM.VoidPtrPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
+ // Build void __kmpc_restore_team_static_memory(int16_t is_shared);
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, CGM.Int16Ty, /*isVarArg=*/false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
+ break;
+ }
+ case OMPRTL__kmpc_barrier: {
+ // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
+ cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent);
+ break;
+ }
}
return RTLFn;
}
@@ -1733,6 +1859,37 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
+namespace {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+/// Enum for accesseing the reserved_2 field of the ident_t struct.
+enum ModeFlagsTy : unsigned {
+ /// Bit set to 1 when in SPMD mode.
+ KMP_IDENT_SPMD_MODE = 0x01,
+ /// Bit set to 1 when a simplified runtime is used.
+ KMP_IDENT_SIMPLE_RT_MODE = 0x02,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE)
+};
+
+/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime.
+static const ModeFlagsTy UndefinedMode =
+ (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
+} // anonymous namespace
+
+unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const {
+ switch (getExecutionMode()) {
+ case EM_SPMD:
+ if (requiresFullRuntime())
+ return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
+ return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
+ case EM_NonSPMD:
+ assert(requiresFullRuntime() && "Expected full runtime.");
+ return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
+ case EM_Unknown:
+ return UndefinedMode;
+ }
+ llvm_unreachable("Unknown flags are requested.");
+}
+
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, "_", "$") {
if (!CGM.getLangOpts().OpenMPIsDevice)
@@ -1784,12 +1941,15 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
}
} Action(IsInParallelRegion);
CodeGen.setAction(Action);
+ bool PrevIsInTTDRegion = IsInTTDRegion;
+ IsInTTDRegion = false;
bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
IsInTargetMasterThreadRegion = false;
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
+ IsInTTDRegion = PrevIsInTTDRegion;
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
!IsInParallelRegion) {
llvm::Function *WrapperFun =
@@ -1803,13 +1963,14 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
/// Get list of lastprivate variables from the teams distribute ... or
/// teams {distribute ...} directives.
static void
-getDistributeLastprivateVars(const OMPExecutableDirective &D,
+getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D,
llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
"expected teams directive.");
const OMPExecutableDirective *Dir = &D;
if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
if (const Stmt *S = getSingleCompoundChild(
+ Ctx,
D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
/*IgnoreCaptured=*/true))) {
Dir = dyn_cast<OMPExecutableDirective>(S);
@@ -1819,12 +1980,21 @@ getDistributeLastprivateVars(const OMPExecutableDirective &D,
}
if (!Dir)
return;
- for (const OMPLastprivateClause *C :
- Dir->getClausesOfKind<OMPLastprivateClause>()) {
- for (const Expr *E : C->getVarRefs()) {
- const auto *DE = cast<DeclRefExpr>(E->IgnoreParens());
- Vars.push_back(cast<ValueDecl>(DE->getDecl()->getCanonicalDecl()));
- }
+ for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) {
+ for (const Expr *E : C->getVarRefs())
+ Vars.push_back(getPrivateItem(E));
+ }
+}
+
+/// Get list of reduction variables from the teams ... directives.
+static void
+getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D,
+ llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
+ assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
+ "expected teams directive.");
+ for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
+ for (const Expr *E : C->privates())
+ Vars.push_back(getPrivateItem(E));
}
}
@@ -1834,13 +2004,22 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
SourceLocation Loc = D.getBeginLoc();
const RecordDecl *GlobalizedRD = nullptr;
- llvm::SmallVector<const ValueDecl *, 4> LastPrivates;
+ llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
+ // Globalize team reductions variable unconditionally in all modes.
+ getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
- getDistributeLastprivateVars(D, LastPrivates);
- if (!LastPrivates.empty())
- GlobalizedRD = buildRecordForGlobalizedVars(
- CGM.getContext(), LastPrivates, MappedDeclsFields);
+ getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
+ if (!LastPrivatesReductions.empty()) {
+ GlobalizedRD = ::buildRecordForGlobalizedVars(
+ CGM.getContext(), llvm::None, LastPrivatesReductions,
+ MappedDeclsFields);
+ }
+ } else if (!LastPrivatesReductions.empty()) {
+ assert(!TeamAndReductions.first &&
+ "Previous team declaration is not expected.");
+ TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
+ std::swap(TeamAndReductions.second, LastPrivatesReductions);
}
// Emit target region as a standalone region.
@@ -1869,9 +2048,9 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
for (const auto &Pair : MappedDeclsFields) {
assert(Pair.getFirst()->isCanonicalDecl() &&
"Expected canonical declaration");
- Data.insert(std::make_pair(
- Pair.getFirst(),
- std::make_pair(Pair.getSecond(), Address::invalid())));
+ Data.insert(std::make_pair(Pair.getFirst(),
+ MappedVarData(Pair.getSecond(),
+ /*IsOnePerTeam=*/true)));
}
}
Rt.emitGenericVarsProlog(CGF, Loc);
@@ -1905,74 +2084,184 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
if (I == FunctionGlobalizedDecls.end())
return;
if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
- QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+ QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+ QualType SecGlobalRecTy;
// Recover pointer to this function's global record. The runtime will
// handle the specifics of the allocation of the memory.
// Use actual memory size of the record including the padding
// for alignment purposes.
unsigned Alignment =
- CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
+ CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
unsigned GlobalRecordSize =
- CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
+ CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity();
GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+ llvm::PointerType *GlobalRecPtrTy =
+ CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
llvm::Value *GlobalRecCastAddr;
- if (WithSPMDCheck ||
- getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
+ llvm::Value *IsTTD = nullptr;
+ if (!IsInTTDRegion &&
+ (WithSPMDCheck ||
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+ if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *PL = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+ {RTLoc, ThreadID});
+ IsTTD = Bld.CreateIsNull(PL);
+ }
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(SPMDBB);
- Address RecPtr = CGF.CreateMemTemp(RecTy, "_local_stack");
+ Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
+ CharUnits::fromQuantity(Alignment));
CGF.EmitBranch(ExitBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(NonSPMDBB);
+ llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);
+ if (const RecordDecl *SecGlobalizedVarsRecord =
+ I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
+ SecGlobalRecTy =
+ CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
+
+ // Recover pointer to this function's global record. The runtime will
+ // handle the specifics of the allocation of the memory.
+ // Use actual memory size of the record including the padding
+ // for alignment purposes.
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
+ unsigned GlobalRecordSize =
+ CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
+ GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+ Size = Bld.CreateSelect(
+ IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);
+ }
// TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global.
llvm::Value *GlobalRecordSizeArg[] = {
- llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
- llvm::Value *GlobalRecValue =
- CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
- OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
- GlobalRecordSizeArg);
+ Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+ GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+ GlobalRecValue, GlobalRecPtrTy);
CGF.EmitBlock(ExitBB);
- auto *Phi = Bld.CreatePHI(GlobalRecCastAddr->getType(),
+ auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
/*NumReservedValues=*/2, "_select_stack");
Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
GlobalRecCastAddr = Phi;
I->getSecond().GlobalRecordAddr = Phi;
I->getSecond().IsInSPMDModeFlag = IsSPMD;
+ } else if (IsInTTDRegion) {
+ assert(GlobalizedRecords.back().Records.size() < 2 &&
+ "Expected less than 2 globalized records: one for target and one "
+ "for teams.");
+ unsigned Offset = 0;
+ for (const RecordDecl *RD : GlobalizedRecords.back().Records) {
+ QualType RDTy = CGM.getContext().getRecordType(RD);
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(RDTy).getQuantity();
+ unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity();
+ Offset =
+ llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment);
+ }
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
+ Offset = llvm::alignTo(Offset, Alignment);
+ GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
+ ++GlobalizedRecords.back().RegionCounter;
+ if (GlobalizedRecords.back().Records.size() == 1) {
+ assert(KernelStaticGlobalized &&
+ "Kernel static pointer must be initialized already.");
+ auto *UseSharedMemory = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true,
+ llvm::GlobalValue::InternalLinkage, nullptr,
+ "_openmp_static_kernel$is_shared");
+ UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/16, /*Signed=*/0);
+ llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+ Address(UseSharedMemory,
+ CGM.getContext().getTypeAlignInChars(Int16Ty)),
+ /*Volatile=*/false, Int16Ty, Loc);
+ auto *StaticGlobalized = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
+ llvm::GlobalValue::CommonLinkage, nullptr);
+ auto *RecSize = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.SizeTy, /*isConstant=*/true,
+ llvm::GlobalValue::InternalLinkage, nullptr,
+ "_openmp_static_kernel$size");
+ RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ llvm::Value *Ld = CGF.EmitLoadOfScalar(
+ Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false,
+ CGM.getContext().getSizeType(), Loc);
+ llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ KernelStaticGlobalized, CGM.VoidPtrPtrTy);
+ llvm::Value *GlobalRecordSizeArg[] = {StaticGlobalized, Ld,
+ IsInSharedMemory, ResAddr};
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_get_team_static_memory),
+ GlobalRecordSizeArg);
+ GlobalizedRecords.back().Buffer = StaticGlobalized;
+ GlobalizedRecords.back().RecSize = RecSize;
+ GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
+ GlobalizedRecords.back().Loc = Loc;
+ }
+ assert(KernelStaticGlobalized && "Global address must be set already.");
+ Address FrameAddr = CGF.EmitLoadOfPointer(
+ Address(KernelStaticGlobalized, CGM.getPointerAlign()),
+ CGM.getContext()
+ .getPointerType(CGM.getContext().VoidPtrTy)
+ .castAs<PointerType>());
+ llvm::Value *GlobalRecValue =
+ Bld.CreateConstInBoundsGEP(FrameAddr, Offset, CharUnits::One())
+ .getPointer();
+ I->getSecond().GlobalRecordAddr = GlobalRecValue;
+ I->getSecond().IsInSPMDModeFlag = nullptr;
+ GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo());
} else {
// TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global.
llvm::Value *GlobalRecordSizeArg[] = {
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
- llvm::Value *GlobalRecValue =
- CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
- OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
- GlobalRecordSizeArg);
+ llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+ GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+ GlobalRecValue, GlobalRecPtrTy);
I->getSecond().GlobalRecordAddr = GlobalRecValue;
I->getSecond().IsInSPMDModeFlag = nullptr;
}
LValue Base =
- CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
+ CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy);
// Emit the "global alloca" which is a GEP from the global declaration
// record using the pointer returned by the runtime.
+ LValue SecBase;
+ decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
+ if (IsTTD) {
+ SecIt = I->getSecond().SecondaryLocalVarData->begin();
+ llvm::PointerType *SecGlobalRecPtrTy =
+ CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
+ SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
+ SecGlobalRecTy);
+ }
for (auto &Rec : I->getSecond().LocalVarData) {
bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
llvm::Value *ParValue;
@@ -1982,14 +2271,51 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
}
- const FieldDecl *FD = Rec.second.first;
- LValue VarAddr = CGF.EmitLValueForField(Base, FD);
- Rec.second.second = VarAddr.getAddress();
+ LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD);
+ // Emit VarAddr basing on lane-id if required.
+ QualType VarTy;
+ if (Rec.second.IsOnePerTeam) {
+ VarTy = Rec.second.FD->getType();
+ } else {
+ llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
+ VarAddr.getAddress().getPointer(),
+ {Bld.getInt32(0), getNVPTXLaneID(CGF)});
+ VarTy =
+ Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
+ VarAddr = CGF.MakeAddrLValue(
+ Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
+ AlignmentSource::Decl);
+ }
+ Rec.second.PrivateAddr = VarAddr.getAddress();
+ if (!IsInTTDRegion &&
+ (WithSPMDCheck ||
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
+ assert(I->getSecond().IsInSPMDModeFlag &&
+ "Expected unknown execution mode or required SPMD check.");
+ if (IsTTD) {
+ assert(SecIt->second.IsOnePerTeam &&
+ "Secondary glob data must be one per team.");
+ LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);
+ VarAddr.setAddress(
+ Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(),
+ VarAddr.getPointer()),
+ VarAddr.getAlignment()));
+ Rec.second.PrivateAddr = VarAddr.getAddress();
+ }
+ Address GlobalPtr = Rec.second.PrivateAddr;
+ Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());
+ Rec.second.PrivateAddr = Address(
+ Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
+ LocalAddr.getPointer(), GlobalPtr.getPointer()),
+ LocalAddr.getAlignment());
+ }
if (EscapedParam) {
const auto *VD = cast<VarDecl>(Rec.first);
CGF.EmitStoreOfScalar(ParValue, VarAddr);
I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
}
+ if (IsTTD)
+ ++SecIt;
}
}
for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
@@ -2011,7 +2337,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
llvm::Value *GlobalRecordSizeArg[] = {
Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo());
@@ -2043,8 +2370,9 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
Addr);
}
if (I->getSecond().GlobalRecordAddr) {
- if (WithSPMDCheck ||
- getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
+ if (!IsInTTDRegion &&
+ (WithSPMDCheck ||
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
CGBuilderTy &Bld = CGF.Builder;
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
@@ -2057,6 +2385,23 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
CGF.EmitBlock(ExitBB);
+ } else if (IsInTTDRegion) {
+ assert(GlobalizedRecords.back().RegionCounter > 0 &&
+ "region counter must be > 0.");
+ --GlobalizedRecords.back().RegionCounter;
+ // Emit the restore function only in the target region.
+ if (GlobalizedRecords.back().RegionCounter == 0) {
+ QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/16, /*Signed=*/0);
+ llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+ Address(GlobalizedRecords.back().UseSharedMemory,
+ CGM.getContext().getTypeAlignInChars(Int16Ty)),
+ /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc);
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_restore_team_static_memory),
+ IsInSharedMemory);
+ }
} else {
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
@@ -2155,7 +2500,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
// passed from the outside of the target region.
CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);
- // There's somehting to share.
+ // There's something to share.
if (!CapturedVars.empty()) {
// Prepare for parallel region. Indicate the outlined function.
Address SharedArgs =
@@ -2209,30 +2554,24 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
Work.emplace_back(WFn);
};
- auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen,
- &ThreadIDAddr](CodeGenFunction &CGF,
- PrePostActionTy &Action) {
- RegionCodeGenTy RCG(CodeGen);
+ auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen](
+ CodeGenFunction &CGF, PrePostActionTy &Action) {
if (IsInParallelRegion) {
SeqGen(CGF, Action);
} else if (IsInTargetMasterThreadRegion) {
L0ParallelGen(CGF, Action);
- } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) {
- RCG(CGF);
} else {
// Check for master and then parallelism:
// if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) {
- // Serialized execution.
- // } else if (master) {
- // Worker call.
+ // Serialized execution.
// } else {
- // Outlined function call.
+ // Worker call.
// }
CGBuilderTy &Bld = CGF.Builder;
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential");
llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck");
- llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
+ llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
@@ -2245,29 +2584,17 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
{RTLoc, ThreadID});
llvm::Value *Res = Bld.CreateIsNotNull(PL);
- Bld.CreateCondBr(Res, SeqBB, MasterCheckBB);
+ Bld.CreateCondBr(Res, SeqBB, MasterBB);
CGF.EmitBlock(SeqBB);
SeqGen(CGF, Action);
CGF.EmitBranch(ExitBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
- CGF.EmitBlock(MasterCheckBB);
- llvm::BasicBlock *MasterThenBB = CGF.createBasicBlock("master.then");
- llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
- llvm::Value *IsMaster =
- Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
- Bld.CreateCondBr(IsMaster, MasterThenBB, ElseBlock);
- CGF.EmitBlock(MasterThenBB);
+ CGF.EmitBlock(MasterBB);
L0ParallelGen(CGF, Action);
CGF.EmitBranch(ExitBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
- CGF.EmitBlock(ElseBlock);
- // In the worker need to use the real thread id.
- ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
- RCG(CGF);
- // There is no need to emit line number for unconditional branch.
- (void)ApplyDebugLocation::CreateEmpty(CGF);
// Emit the continuation block for code after the if.
CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
}
@@ -2338,6 +2665,20 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
}
}
+void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ OpenMPDirectiveKind Kind, bool,
+ bool) {
+ // Always emit simple barriers!
+ if (!CGF.HaveInsertPoint())
+ return;
+ // Build call __kmpc_cancel_barrier(loc, thread_id);
+ unsigned Flags = getDefaultFlagsForBarriers(Kind);
+ llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
+ getThreadID(CGF, Loc)};
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
+}
+
void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
CodeGenFunction &CGF, StringRef CriticalName,
const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
@@ -2380,14 +2721,16 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
CGF.EmitBlock(BodyBB);
// Output the critical statement.
- CriticalOpGen(CGF);
+ CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc,
+ Hint);
// After the body surrounded by the critical region, the single executing
// thread will jump to the synchronisation point.
// Block waits for all threads in current team to finish then increments the
// counter variable and returns to the loop.
CGF.EmitBlock(SyncBB);
- getNVPTXCTABarrier(CGF);
+ emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
llvm::Value *IncCounterVal =
CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
@@ -2509,11 +2852,12 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
IntType, Offset, Loc);
CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
- Ptr = Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize));
- ElemPtr =
+ Address LocalPtr =
+ Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize));
+ Address LocalElemPtr =
Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize));
- PhiSrc->addIncoming(Ptr.getPointer(), ThenBB);
- PhiDest->addIncoming(ElemPtr.getPointer(), ThenBB);
+ PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB);
+ PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB);
CGF.EmitBranch(PreCondBB);
CGF.EmitBlock(ExitBB);
} else {
@@ -2739,223 +3083,6 @@ static void emitReductionListCopy(
}
}
-/// This function emits a helper that loads data from the scratchpad array
-/// and (optionally) reduces it with the input operand.
-///
-/// load_and_reduce(local, scratchpad, index, width, should_reduce)
-/// reduce_data remote;
-/// for elem in remote:
-/// remote.elem = Scratchpad[elem_id][index]
-/// if (should_reduce)
-/// local = local @ remote
-/// else
-/// local = remote
-static llvm::Value *emitReduceScratchpadFunction(
- CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) {
- ASTContext &C = CGM.getContext();
- QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1);
-
- // Destination of the copy.
- ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
- // Base address of the scratchpad array, with each element storing a
- // Reduce list per team.
- ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
- // A source index into the scratchpad array.
- ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
- ImplicitParamDecl::Other);
- // Row width of an element in the scratchpad array, typically
- // the number of teams.
- ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
- ImplicitParamDecl::Other);
- // If should_reduce == 1, then it's load AND reduce,
- // If should_reduce == 0 (or otherwise), then it only loads (+ copy).
- // The latter case is used for initialization.
- ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- Int32Ty, ImplicitParamDecl::Other);
-
- FunctionArgList Args;
- Args.push_back(&ReduceListArg);
- Args.push_back(&ScratchPadArg);
- Args.push_back(&IndexArg);
- Args.push_back(&WidthArg);
- Args.push_back(&ShouldReduceArg);
-
- const CGFunctionInfo &CGFI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *Fn = llvm::Function::Create(
- CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
- "_omp_reduction_load_and_reduce", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
- Fn->setDoesNotRecurse();
- CodeGenFunction CGF(CGM);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
- CGBuilderTy &Bld = CGF.Builder;
-
- // Get local Reduce list pointer.
- Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
- Address ReduceListAddr(
- Bld.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
- C.VoidPtrTy, Loc),
- CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
- CGF.getPointerAlign());
-
- Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
- llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
- AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
-
- Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
- llvm::Value *IndexVal = Bld.CreateIntCast(
- CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc),
- CGM.SizeTy, /*isSigned=*/true);
-
- Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
- llvm::Value *WidthVal = Bld.CreateIntCast(
- CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, Int32Ty, Loc),
- CGM.SizeTy, /*isSigned=*/true);
-
- Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg);
- llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar(
- AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, Loc);
-
- // The absolute ptr address to the base addr of the next element to copy.
- llvm::Value *CumulativeElemBasePtr =
- Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy);
- Address SrcDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign());
-
- // Create a Remote Reduce list to store the elements read from the
- // scratchpad array.
- Address RemoteReduceList =
- CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_red_list");
-
- // Assemble remote Reduce list from scratchpad array.
- emitReductionListCopy(ScratchpadToThread, CGF, ReductionArrayTy, Privates,
- SrcDataAddr, RemoteReduceList,
- {/*RemoteLaneOffset=*/nullptr,
- /*ScratchpadIndex=*/IndexVal,
- /*ScratchpadWidth=*/WidthVal});
-
- llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
- llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
- llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
-
- llvm::Value *CondReduce = Bld.CreateIsNotNull(ShouldReduceVal);
- Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
-
- CGF.EmitBlock(ThenBB);
- // We should reduce with the local Reduce list.
- // reduce_function(LocalReduceList, RemoteReduceList)
- llvm::Value *LocalDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- ReduceListAddr.getPointer(), CGF.VoidPtrTy);
- llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- RemoteReduceList.getPointer(), CGF.VoidPtrTy);
- CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
- CGF, Loc, ReduceFn, {LocalDataPtr, RemoteDataPtr});
- Bld.CreateBr(MergeBB);
-
- CGF.EmitBlock(ElseBB);
- // No reduction; just copy:
- // Local Reduce list = Remote Reduce list.
- emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates,
- RemoteReduceList, ReduceListAddr);
- Bld.CreateBr(MergeBB);
-
- CGF.EmitBlock(MergeBB);
-
- CGF.FinishFunction();
- return Fn;
-}
-
-/// This function emits a helper that stores reduced data from the team
-/// master to a scratchpad array in global memory.
-///
-/// for elem in Reduce List:
-/// scratchpad[elem_id][index] = elem
-///
-static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
- ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy,
- SourceLocation Loc) {
-
- ASTContext &C = CGM.getContext();
- QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1);
-
- // Source of the copy.
- ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
- // Base address of the scratchpad array, with each element storing a
- // Reduce list per team.
- ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
- // A destination index into the scratchpad array, typically the team
- // identifier.
- ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
- ImplicitParamDecl::Other);
- // Row width of an element in the scratchpad array, typically
- // the number of teams.
- ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
- ImplicitParamDecl::Other);
-
- FunctionArgList Args;
- Args.push_back(&ReduceListArg);
- Args.push_back(&ScratchPadArg);
- Args.push_back(&IndexArg);
- Args.push_back(&WidthArg);
-
- const CGFunctionInfo &CGFI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *Fn = llvm::Function::Create(
- CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
- "_omp_reduction_copy_to_scratchpad", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
- Fn->setDoesNotRecurse();
- CodeGenFunction CGF(CGM);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
- CGBuilderTy &Bld = CGF.Builder;
-
- Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
- Address SrcDataAddr(
- Bld.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
- C.VoidPtrTy, Loc),
- CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
- CGF.getPointerAlign());
-
- Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
- llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
- AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
-
- Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
- llvm::Value *IndexVal = Bld.CreateIntCast(
- CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc),
- CGF.SizeTy, /*isSigned=*/true);
-
- Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
- llvm::Value *WidthVal =
- Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false,
- Int32Ty, SourceLocation()),
- CGF.SizeTy, /*isSigned=*/true);
-
- // The absolute ptr address to the base addr of the next element to copy.
- llvm::Value *CumulativeElemBasePtr =
- Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy);
- Address DestDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign());
-
- emitReductionListCopy(ThreadToScratchpad, CGF, ReductionArrayTy, Privates,
- SrcDataAddr, DestDataAddr,
- {/*RemoteLaneOffset=*/nullptr,
- /*ScratchpadIndex=*/IndexVal,
- /*ScratchpadWidth=*/WidthVal});
-
- CGF.FinishFunction();
- return Fn;
-}
-
/// This function emits a helper that gathers Reduce lists from the first
/// lane of every active warp to lanes in the first warp.
///
@@ -3013,11 +3140,10 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
llvm::GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
if (!TransferMedium) {
- auto *Ty = llvm::ArrayType::get(CGM.Int64Ty, WarpSize);
+ auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
TransferMedium = new llvm::GlobalVariable(
- M, Ty,
- /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
+ M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
llvm::Constant::getNullValue(Ty), TransferMediumName,
/*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
SharedAddressSpace);
@@ -3035,7 +3161,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
Address LocalReduceList(
Bld.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
- C.VoidPtrTy, SourceLocation()),
+ C.VoidPtrTy, Loc),
CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
CGF.getPointerAlign());
@@ -3045,121 +3171,153 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
// Warp master copies reduce element to transfer medium in __shared__
// memory.
//
- llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
- llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
- llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
-
- // if (lane_id == 0)
- llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master");
- Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
- CGF.EmitBlock(ThenBB);
-
- // Reduce element = LocalReduceList[i]
- Address ElemPtrPtrAddr =
- Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
- llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
- ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- // elemptr = (type[i]*)(elemptrptr)
- Address ElemPtr =
- Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
- ElemPtr = Bld.CreateElementBitCast(
- ElemPtr, CGF.ConvertTypeForMem(Private->getType()));
-
- // Get pointer to location in transfer medium.
- // MediumPtr = &medium[warp_id]
- llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
- TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
- Address MediumPtr(MediumPtrVal, C.getTypeAlignInChars(Private->getType()));
- // Casting to actual data type.
- // MediumPtr = (type[i]*)MediumPtrAddr;
- MediumPtr = Bld.CreateElementBitCast(
- MediumPtr, CGF.ConvertTypeForMem(Private->getType()));
-
- // elem = *elemptr
- //*MediumPtr = elem
- if (Private->getType()->isScalarType()) {
- llvm::Value *Elem = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false,
- Private->getType(), Loc);
- // Store the source element value to the dest element address.
- CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/false,
- Private->getType());
- } else {
- CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()),
- CGF.MakeAddrLValue(MediumPtr, Private->getType()),
- Private->getType(), AggValueSlot::DoesNotOverlap);
- }
-
- Bld.CreateBr(MergeBB);
-
- CGF.EmitBlock(ElseBB);
- Bld.CreateBr(MergeBB);
+ unsigned RealTySize =
+ C.getTypeSizeInChars(Private->getType())
+ .alignTo(C.getTypeAlignInChars(Private->getType()))
+ .getQuantity();
+ for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
+ unsigned NumIters = RealTySize / TySize;
+ if (NumIters == 0)
+ continue;
+ QualType CType = C.getIntTypeForBitwidth(
+ C.toBits(CharUnits::fromQuantity(TySize)), /*Signed=*/1);
+ llvm::Type *CopyType = CGF.ConvertTypeForMem(CType);
+ CharUnits Align = CharUnits::fromQuantity(TySize);
+ llvm::Value *Cnt = nullptr;
+ Address CntAddr = Address::invalid();
+ llvm::BasicBlock *PrecondBB = nullptr;
+ llvm::BasicBlock *ExitBB = nullptr;
+ if (NumIters > 1) {
+ CntAddr = CGF.CreateMemTemp(C.IntTy, ".cnt.addr");
+ CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.IntTy), CntAddr,
+ /*Volatile=*/false, C.IntTy);
+ PrecondBB = CGF.createBasicBlock("precond");
+ ExitBB = CGF.createBasicBlock("exit");
+ llvm::BasicBlock *BodyBB = CGF.createBasicBlock("body");
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(PrecondBB);
+ Cnt = CGF.EmitLoadOfScalar(CntAddr, /*Volatile=*/false, C.IntTy, Loc);
+ llvm::Value *Cmp =
+ Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.IntTy, NumIters));
+ Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
+ CGF.EmitBlock(BodyBB);
+ }
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
- CGF.EmitBlock(MergeBB);
+ // if (lane_id == 0)
+ llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master");
+ Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
+ CGF.EmitBlock(ThenBB);
- Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
- llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
- AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation());
+ // Reduce element = LocalReduceList[i]
+ Address ElemPtrPtrAddr =
+ Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
+ llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
+ ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ // elemptr = ((CopyType*)(elemptrptr)) + I
+ Address ElemPtr = Address(ElemPtrPtr, Align);
+ ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType);
+ if (NumIters > 1) {
+ ElemPtr = Address(Bld.CreateGEP(ElemPtr.getPointer(), Cnt),
+ ElemPtr.getAlignment());
+ }
- llvm::Value *NumActiveThreads = Bld.CreateNSWMul(
- NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads");
- // named_barrier_sync(ParallelBarrierID, num_active_threads)
- syncParallelThreads(CGF, NumActiveThreads);
+ // Get pointer to location in transfer medium.
+ // MediumPtr = &medium[warp_id]
+ llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
+ TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
+ Address MediumPtr(MediumPtrVal, Align);
+ // Casting to actual data type.
+ // MediumPtr = (CopyType*)MediumPtrAddr;
+ MediumPtr = Bld.CreateElementBitCast(MediumPtr, CopyType);
+
+ // elem = *elemptr
+ //*MediumPtr = elem
+ llvm::Value *Elem =
+ CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc);
+ // Store the source element value to the dest element address.
+ CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType);
+
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(ElseBB);
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(MergeBB);
+
+ // kmpc_barrier.
+ CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
+ /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
+
+ //
+ // Warp 0 copies reduce element from transfer medium.
+ //
+ llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
+
+ Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
+ llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
+ AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, Loc);
+
+ // Up to 32 threads in warp 0 are active.
+ llvm::Value *IsActiveThread =
+ Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
+ Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
+
+ CGF.EmitBlock(W0ThenBB);
+
+ // SrcMediumPtr = &medium[tid]
+ llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
+ TransferMedium,
+ {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
+ Address SrcMediumPtr(SrcMediumPtrVal, Align);
+ // SrcMediumVal = *SrcMediumPtr;
+ SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType);
+
+ // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I
+ Address TargetElemPtrPtr =
+ Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
+ llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar(
+ TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc);
+ Address TargetElemPtr = Address(TargetElemPtrVal, Align);
+ TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType);
+ if (NumIters > 1) {
+ TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getPointer(), Cnt),
+ TargetElemPtr.getAlignment());
+ }
- //
- // Warp 0 copies reduce element from transfer medium.
- //
- llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then");
- llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
- llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
-
- // Up to 32 threads in warp 0 are active.
- llvm::Value *IsActiveThread =
- Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
- Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
-
- CGF.EmitBlock(W0ThenBB);
-
- // SrcMediumPtr = &medium[tid]
- llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
- TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
- Address SrcMediumPtr(SrcMediumPtrVal,
- C.getTypeAlignInChars(Private->getType()));
- // SrcMediumVal = *SrcMediumPtr;
- SrcMediumPtr = Bld.CreateElementBitCast(
- SrcMediumPtr, CGF.ConvertTypeForMem(Private->getType()));
-
- // TargetElemPtr = (type[i]*)(SrcDataAddr[i])
- Address TargetElemPtrPtr =
- Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
- llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar(
- TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- Address TargetElemPtr =
- Address(TargetElemPtrVal, C.getTypeAlignInChars(Private->getType()));
- TargetElemPtr = Bld.CreateElementBitCast(
- TargetElemPtr, CGF.ConvertTypeForMem(Private->getType()));
-
- // *TargetElemPtr = SrcMediumVal;
- if (Private->getType()->isScalarType()) {
- llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar(
- SrcMediumPtr, /*Volatile=*/false, Private->getType(), Loc);
+ // *TargetElemPtr = SrcMediumVal;
+ llvm::Value *SrcMediumValue =
+ CGF.EmitLoadOfScalar(SrcMediumPtr, /*Volatile=*/true, CType, Loc);
CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false,
- Private->getType());
- } else {
- CGF.EmitAggregateCopy(
- CGF.MakeAddrLValue(SrcMediumPtr, Private->getType()),
- CGF.MakeAddrLValue(TargetElemPtr, Private->getType()),
- Private->getType(), AggValueSlot::DoesNotOverlap);
+ CType);
+ Bld.CreateBr(W0MergeBB);
+
+ CGF.EmitBlock(W0ElseBB);
+ Bld.CreateBr(W0MergeBB);
+
+ CGF.EmitBlock(W0MergeBB);
+
+ // While warp 0 copies values from transfer medium, all other warps must
+ // wait.
+ // kmpc_barrier.
+ CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
+ /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
+ if (NumIters > 1) {
+ Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, /*V=*/1));
+ CGF.EmitStoreOfScalar(Cnt, CntAddr, /*Volatile=*/false, C.IntTy);
+ CGF.EmitBranch(PrecondBB);
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ExitBB);
+ }
+ RealTySize %= TySize;
}
- Bld.CreateBr(W0MergeBB);
-
- CGF.EmitBlock(W0ElseBB);
- Bld.CreateBr(W0MergeBB);
-
- CGF.EmitBlock(W0MergeBB);
-
- // While warp 0 copies values from transfer medium, all other warps must
- // wait.
- syncParallelThreads(CGF, NumActiveThreads);
++Idx;
}
@@ -3633,125 +3791,115 @@ void CGOpenMPRuntimeNVPTX::emitReduction(
return;
bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
+#ifndef NDEBUG
bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
- bool SimdReduction = isOpenMPSimdDirective(Options.ReductionKind);
- assert((TeamsReduction || ParallelReduction || SimdReduction) &&
- "Invalid reduction selection in emitReduction.");
+#endif
if (Options.SimpleReduction) {
+ assert(!TeamsReduction && !ParallelReduction &&
+ "Invalid reduction selection in emitReduction.");
CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
ReductionOps, Options);
return;
}
- ASTContext &C = CGM.getContext();
-
- // 1. Build a list of reduction variables.
- // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
- auto Size = RHSExprs.size();
- for (const Expr *E : Privates) {
- if (E->getType()->isVariablyModifiedType())
- // Reserve place for array size.
- ++Size;
- }
- llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
- QualType ReductionArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
- Address ReductionList =
- CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
- auto IPriv = Privates.begin();
- unsigned Idx = 0;
- for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
- Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
- CGF.getPointerSize());
- CGF.Builder.CreateStore(
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
- Elem);
- if ((*IPriv)->getType()->isVariablyModifiedType()) {
- // Store array size.
- ++Idx;
- Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
- CGF.getPointerSize());
- llvm::Value *Size = CGF.Builder.CreateIntCast(
- CGF.getVLASize(
- CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
- .NumElts,
- CGF.SizeTy, /*isSigned=*/false);
- CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
- Elem);
- }
- }
-
- // 2. Emit reduce_func().
- llvm::Value *ReductionFn = emitReductionFunction(
- CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
- Privates, LHSExprs, RHSExprs, ReductionOps);
+ assert((TeamsReduction || ParallelReduction) &&
+ "Invalid reduction selection in emitReduction.");
- // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
+ // Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
// RedList, shuffle_reduce_func, interwarp_copy_func);
+ // or
+ // Build res = __kmpc_reduce_teams_nowait_simple(<loc>, <gtid>, <lck>);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
- llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
- llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- ReductionList.getPointer(), CGF.VoidPtrTy);
-
- llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
- CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
- llvm::Value *InterWarpCopyFn =
- emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
-
- llvm::Value *Args[] = {ThreadId,
- CGF.Builder.getInt32(RHSExprs.size()),
- ReductionArrayTySize,
- RL,
- ShuffleAndReduceFn,
- InterWarpCopyFn};
-
- llvm::Value *Res = nullptr;
- if (ParallelReduction)
- Res = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait),
- Args);
- else if (SimdReduction)
- Res = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_simd_reduce_nowait),
- Args);
- if (TeamsReduction) {
- llvm::Value *ScratchPadCopyFn =
- emitCopyToScratchpad(CGM, Privates, ReductionArrayTy, Loc);
- llvm::Value *LoadAndReduceFn = emitReduceScratchpadFunction(
+ llvm::Value *Res;
+ if (ParallelReduction) {
+ ASTContext &C = CGM.getContext();
+ // 1. Build a list of reduction variables.
+ // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
+ auto Size = RHSExprs.size();
+ for (const Expr *E : Privates) {
+ if (E->getType()->isVariablyModifiedType())
+ // Reserve place for array size.
+ ++Size;
+ }
+ llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
+ QualType ReductionArrayTy =
+ C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ Address ReductionList =
+ CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+ auto IPriv = Privates.begin();
+ unsigned Idx = 0;
+ for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
+ Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
+ CGF.getPointerSize());
+ CGF.Builder.CreateStore(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
+ Elem);
+ if ((*IPriv)->getType()->isVariablyModifiedType()) {
+ // Store array size.
+ ++Idx;
+ Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
+ CGF.getPointerSize());
+ llvm::Value *Size = CGF.Builder.CreateIntCast(
+ CGF.getVLASize(
+ CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+ .NumElts,
+ CGF.SizeTy, /*isSigned=*/false);
+ CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+ Elem);
+ }
+ }
+
+ llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
+ llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ ReductionList.getPointer(), CGF.VoidPtrTy);
+ llvm::Value *ReductionFn = emitReductionFunction(
+ CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
+ Privates, LHSExprs, RHSExprs, ReductionOps);
+ llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
+ llvm::Value *InterWarpCopyFn =
+ emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
llvm::Value *Args[] = {ThreadId,
CGF.Builder.getInt32(RHSExprs.size()),
ReductionArrayTySize,
RL,
ShuffleAndReduceFn,
- InterWarpCopyFn,
- ScratchPadCopyFn,
- LoadAndReduceFn};
+ InterWarpCopyFn};
+
+ Res = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait),
+ Args);
+ } else {
+ assert(TeamsReduction && "expected teams reduction.");
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ std::string Name = getName({"reduction"});
+ llvm::Value *Lock = getCriticalRegionLock(Name);
+ llvm::Value *Args[] = {RTLoc, ThreadId, Lock};
Res = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_teams_reduce_nowait),
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple),
Args);
}
- // 5. Build switch(res)
- llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
- llvm::SwitchInst *SwInst =
- CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1);
+ // 5. Build if (res == 1)
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.reduction.done");
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.then");
+ llvm::Value *Cond = CGF.Builder.CreateICmpEQ(
+ Res, llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1));
+ CGF.Builder.CreateCondBr(Cond, ThenBB, ExitBB);
- // 6. Build case 1: where we have reduced values in the master
+ // 6. Build then branch: where we have reduced values in the master
// thread in each team.
// __kmpc_end_reduce{_nowait}(<gtid>);
// break;
- llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
- SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
- CGF.EmitBlock(Case1BB);
+ CGF.EmitBlock(ThenBB);
// Add emission of __kmpc_end_reduce{_nowait}(<gtid>);
- llvm::Value *EndArgs[] = {ThreadId};
auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps,
this](CodeGenFunction &CGF, PrePostActionTy &Action) {
auto IPriv = Privates.begin();
@@ -3765,15 +3913,33 @@ void CGOpenMPRuntimeNVPTX::emitReduction(
++IRHS;
}
};
- RegionCodeGenTy RCG(CodeGen);
- NVPTXActionTy Action(
- nullptr, llvm::None,
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
- EndArgs);
- RCG.setAction(Action);
- RCG(CGF);
- CGF.EmitBranch(DefaultBB);
- CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
+ if (ParallelReduction) {
+ llvm::Value *EndArgs[] = {ThreadId};
+ RegionCodeGenTy RCG(CodeGen);
+ NVPTXActionTy Action(
+ nullptr, llvm::None,
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
+ EndArgs);
+ RCG.setAction(Action);
+ RCG(CGF);
+ } else {
+ assert(TeamsReduction && "expected teams reduction.");
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ std::string Name = getName({"reduction"});
+ llvm::Value *Lock = getCriticalRegionLock(Name);
+ llvm::Value *EndArgs[] = {RTLoc, ThreadId, Lock};
+ RegionCodeGenTy RCG(CodeGen);
+ NVPTXActionTy Action(
+ nullptr, llvm::None,
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple),
+ EndArgs);
+ RCG.setAction(Action);
+ RCG(CGF);
+ }
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
}
const VarDecl *
@@ -4000,6 +4166,8 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
assert(D && "Expected function or captured|block decl.");
assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&
"Function is registered already.");
+ assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
+ "Team is set but not processed.");
const Stmt *Body = nullptr;
bool NeedToDelayGlobalization = false;
if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
@@ -4015,9 +4183,12 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
}
if (!Body)
return;
- CheckVarsEscapingDeclContext VarChecker(CGF);
+ CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
VarChecker.Visit(Body);
- const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord();
+ const RecordDecl *GlobalizedVarsRecord =
+ VarChecker.getGlobalizedRecord(IsInTTDRegion);
+ TeamAndReductions.first = nullptr;
+ TeamAndReductions.second.clear();
ArrayRef<const ValueDecl *> EscapedVariableLengthDecls =
VarChecker.getEscapedVariableLengthDecls();
if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
@@ -4035,7 +4206,21 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
assert(VD->isCanonicalDecl() && "Expected canonical declaration");
const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
- Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
+ Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
+ }
+ if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
+ CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);
+ VarChecker.Visit(Body);
+ I->getSecond().SecondaryGlobalRecord =
+ VarChecker.getGlobalizedRecord(/*IsInTTDRegion=*/true);
+ I->getSecond().SecondaryLocalVarData.emplace();
+ DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
+ for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
+ assert(VD->isCanonicalDecl() && "Expected canonical declaration");
+ const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
+ Data.insert(
+ std::make_pair(VD, MappedVarData(FD, /*IsInTTDRegion=*/true)));
+ }
}
if (!NeedToDelayGlobalization) {
emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
@@ -4062,7 +4247,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
return Address::invalid();
auto VDI = I->getSecond().LocalVarData.find(VD);
if (VDI != I->getSecond().LocalVarData.end())
- return VDI->second.second;
+ return VDI->second.PrivateAddr;
if (VD->hasAttrs()) {
for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()),
E(VD->attr_end());
@@ -4071,7 +4256,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
->getCanonicalDecl());
if (VDI != I->getSecond().LocalVarData.end())
- return VDI->second.second;
+ return VDI->second.PrivateAddr;
}
}
return Address::invalid();
@@ -4091,16 +4276,285 @@ void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk(
Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF),
CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
S.getIterationVariable()->getType(), S.getBeginLoc());
+ return;
}
+ CGOpenMPRuntime::getDefaultDistScheduleAndChunk(
+ CGF, S, ScheduleKind, Chunk);
}
void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk(
CodeGenFunction &CGF, const OMPLoopDirective &S,
OpenMPScheduleClauseKind &ScheduleKind,
- llvm::Value *&Chunk) const {
- if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
- ScheduleKind = OMPC_SCHEDULE_static;
- Chunk = CGF.Builder.getIntN(CGF.getContext().getTypeSize(
- S.getIterationVariable()->getType()), 1);
+ const Expr *&ChunkExpr) const {
+ ScheduleKind = OMPC_SCHEDULE_static;
+ // Chunk size is 1 in this case.
+ llvm::APInt ChunkSize(32, 1);
+ ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
+ CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation());
+}
+
+void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+ assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+ " Expected target-based directive.");
+ const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);
+ for (const CapturedStmt::Capture &C : CS->captures()) {
+ // Capture variables captured by reference in lambdas for target-based
+ // directives.
+ if (!C.capturesVariable())
+ continue;
+ const VarDecl *VD = C.getCapturedVar();
+ const auto *RD = VD->getType()
+ .getCanonicalType()
+ .getNonReferenceType()
+ ->getAsCXXRecordDecl();
+ if (!RD || !RD->isLambda())
+ continue;
+ Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+ LValue VDLVal;
+ if (VD->getType().getCanonicalType()->isReferenceType())
+ VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());
+ else
+ VDLVal = CGF.MakeAddrLValue(
+ VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+ llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+ FieldDecl *ThisCapture = nullptr;
+ RD->getCaptureFields(Captures, ThisCapture);
+ if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
+ LValue ThisLVal =
+ CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+ llvm::Value *CXXThis = CGF.LoadCXXThis();
+ CGF.EmitStoreOfScalar(CXXThis, ThisLVal);
+ }
+ for (const LambdaCapture &LC : RD->captures()) {
+ if (LC.getCaptureKind() != LCK_ByRef)
+ continue;
+ const VarDecl *VD = LC.getCapturedVar();
+ if (!CS->capturesVariable(VD))
+ continue;
+ auto It = Captures.find(VD);
+ assert(It != Captures.end() && "Found lambda capture without field.");
+ LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+ Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+ if (VD->getType().getCanonicalType()->isReferenceType())
+ VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
+ VD->getType().getCanonicalType())
+ .getAddress();
+ CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal);
+ }
+ }
+}
+
+// Get current CudaArch and ignore any unknown values
+static CudaArch getCudaArch(CodeGenModule &CGM) {
+ if (!CGM.getTarget().hasFeature("ptx"))
+ return CudaArch::UNKNOWN;
+ llvm::StringMap<bool> Features;
+ CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
+ CGM.getTarget().getTargetOpts().CPU,
+ CGM.getTarget().getTargetOpts().Features);
+ for (const auto &Feature : Features) {
+ if (Feature.getValue()) {
+ CudaArch Arch = StringToCudaArch(Feature.getKey());
+ if (Arch != CudaArch::UNKNOWN)
+ return Arch;
+ }
+ }
+ return CudaArch::UNKNOWN;
+}
+
+/// Check to see if target architecture supports unified addressing which is
+/// a restriction for OpenMP requires clause "unified_shared_memory".
+void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
+ CodeGenModule &CGM, const OMPRequiresDecl *D) const {
+ for (const OMPClause *Clause : D->clauselists()) {
+ if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
+ switch (getCudaArch(CGM)) {
+ case CudaArch::SM_20:
+ case CudaArch::SM_21:
+ case CudaArch::SM_30:
+ case CudaArch::SM_32:
+ case CudaArch::SM_35:
+ case CudaArch::SM_37:
+ case CudaArch::SM_50:
+ case CudaArch::SM_52:
+ case CudaArch::SM_53:
+ case CudaArch::SM_60:
+ case CudaArch::SM_61:
+ case CudaArch::SM_62:
+ CGM.Error(Clause->getBeginLoc(),
+ "Target architecture does not support unified addressing");
+ return;
+ case CudaArch::SM_70:
+ case CudaArch::SM_72:
+ case CudaArch::SM_75:
+ case CudaArch::GFX600:
+ case CudaArch::GFX601:
+ case CudaArch::GFX700:
+ case CudaArch::GFX701:
+ case CudaArch::GFX702:
+ case CudaArch::GFX703:
+ case CudaArch::GFX704:
+ case CudaArch::GFX801:
+ case CudaArch::GFX802:
+ case CudaArch::GFX803:
+ case CudaArch::GFX810:
+ case CudaArch::GFX900:
+ case CudaArch::GFX902:
+ case CudaArch::GFX904:
+ case CudaArch::GFX906:
+ case CudaArch::GFX909:
+ case CudaArch::UNKNOWN:
+ break;
+ case CudaArch::LAST:
+ llvm_unreachable("Unexpected Cuda arch.");
+ }
+ }
+ }
+}
+
+/// Get number of SMs and number of blocks per SM.
+static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
+ std::pair<unsigned, unsigned> Data;
+ if (CGM.getLangOpts().OpenMPCUDANumSMs)
+ Data.first = CGM.getLangOpts().OpenMPCUDANumSMs;
+ if (CGM.getLangOpts().OpenMPCUDABlocksPerSM)
+ Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM;
+ if (Data.first && Data.second)
+ return Data;
+ switch (getCudaArch(CGM)) {
+ case CudaArch::SM_20:
+ case CudaArch::SM_21:
+ case CudaArch::SM_30:
+ case CudaArch::SM_32:
+ case CudaArch::SM_35:
+ case CudaArch::SM_37:
+ case CudaArch::SM_50:
+ case CudaArch::SM_52:
+ case CudaArch::SM_53:
+ return {16, 16};
+ case CudaArch::SM_60:
+ case CudaArch::SM_61:
+ case CudaArch::SM_62:
+ return {56, 32};
+ case CudaArch::SM_70:
+ case CudaArch::SM_72:
+ case CudaArch::SM_75:
+ return {84, 32};
+ case CudaArch::GFX600:
+ case CudaArch::GFX601:
+ case CudaArch::GFX700:
+ case CudaArch::GFX701:
+ case CudaArch::GFX702:
+ case CudaArch::GFX703:
+ case CudaArch::GFX704:
+ case CudaArch::GFX801:
+ case CudaArch::GFX802:
+ case CudaArch::GFX803:
+ case CudaArch::GFX810:
+ case CudaArch::GFX900:
+ case CudaArch::GFX902:
+ case CudaArch::GFX904:
+ case CudaArch::GFX906:
+ case CudaArch::GFX909:
+ case CudaArch::UNKNOWN:
+ break;
+ case CudaArch::LAST:
+ llvm_unreachable("Unexpected Cuda arch.");
+ }
+ llvm_unreachable("Unexpected NVPTX target without ptx feature.");
+}
+
+void CGOpenMPRuntimeNVPTX::clear() {
+ if (!GlobalizedRecords.empty()) {
+ ASTContext &C = CGM.getContext();
+ llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs;
+ llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs;
+ RecordDecl *StaticRD = C.buildImplicitRecord(
+ "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
+ StaticRD->startDefinition();
+ RecordDecl *SharedStaticRD = C.buildImplicitRecord(
+ "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
+ SharedStaticRD->startDefinition();
+ for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
+ if (Records.Records.empty())
+ continue;
+ unsigned Size = 0;
+ unsigned RecAlignment = 0;
+ for (const RecordDecl *RD : Records.Records) {
+ QualType RDTy = C.getRecordType(RD);
+ unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity();
+ RecAlignment = std::max(RecAlignment, Alignment);
+ unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity();
+ Size =
+ llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
+ }
+ Size = llvm::alignTo(Size, RecAlignment);
+ llvm::APInt ArySize(/*numBits=*/64, Size);
+ QualType SubTy = C.getConstantArrayType(
+ C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ const bool UseSharedMemory = Size <= SharedMemorySize;
+ auto *Field =
+ FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD,
+ SourceLocation(), SourceLocation(), nullptr, SubTy,
+ C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ if (UseSharedMemory) {
+ SharedStaticRD->addDecl(Field);
+ SharedRecs.push_back(&Records);
+ } else {
+ StaticRD->addDecl(Field);
+ GlobalRecs.push_back(&Records);
+ }
+ Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size));
+ Records.UseSharedMemory->setInitializer(
+ llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0));
+ }
+ SharedStaticRD->completeDefinition();
+ if (!SharedStaticRD->field_empty()) {
+ QualType StaticTy = C.getRecordType(SharedStaticRD);
+ llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
+ auto *GV = new llvm::GlobalVariable(
+ CGM.getModule(), LLVMStaticTy,
+ /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
+ llvm::Constant::getNullValue(LLVMStaticTy),
+ "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ C.getTargetAddressSpace(LangAS::cuda_shared));
+ auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ GV, CGM.VoidPtrTy);
+ for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
+ Rec->Buffer->replaceAllUsesWith(Replacement);
+ Rec->Buffer->eraseFromParent();
+ }
+ }
+ StaticRD->completeDefinition();
+ if (!StaticRD->field_empty()) {
+ QualType StaticTy = C.getRecordType(StaticRD);
+ std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM);
+ llvm::APInt Size1(32, SMsBlockPerSM.second);
+ QualType Arr1Ty =
+ C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ llvm::APInt Size2(32, SMsBlockPerSM.first);
+ QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
+ auto *GV = new llvm::GlobalVariable(
+ CGM.getModule(), LLVMArr2Ty,
+ /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
+ llvm::Constant::getNullValue(LLVMArr2Ty),
+ "_openmp_static_glob_rd_$_");
+ auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ GV, CGM.VoidPtrTy);
+ for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
+ Rec->Buffer->replaceAllUsesWith(Replacement);
+ Rec->Buffer->eraseFromParent();
+ }
+ }
}
+ CGOpenMPRuntime::clear();
}
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index fc8cd2467b..8fb3b0a061 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -56,6 +56,8 @@ private:
ExecutionMode getExecutionMode() const;
+ bool requiresFullRuntime() const { return RequiresFullRuntime; }
+
/// Emit the worker function for the current target region.
void emitWorkerFunction(WorkerFunctionState &WST);
@@ -180,8 +182,19 @@ protected:
return "__omp_outlined__";
}
+ /// Check if the default location must be constant.
+ /// Constant for NVPTX for better optimization.
+ bool isDefaultLocationConstant() const override { return true; }
+
+ /// Returns additional flags that can be stored in reserved_2 field of the
+ /// default location.
+ /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
+ /// Full/Lightweight runtime mode. Used for better optimization.
+ unsigned getDefaultLocationReserved2Flags() const override;
+
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
+ void clear() override;
/// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
/// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
@@ -261,6 +274,18 @@ public:
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) override;
+ /// Emit an implicit/explicit barrier for OpenMP threads.
+ /// \param Kind Directive for which this implicit barrier call must be
+ /// generated. Must be OMPD_barrier for explicit barrier generation.
+ /// \param EmitChecks true if need to emit checks for cancellation barriers.
+ /// \param ForceSimpleCall true simple barrier call must be emitted, false if
+ /// runtime class decides which one to emit (simple or with cancellation
+ /// checks).
+ ///
+ void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDirectiveKind Kind, bool EmitChecks = true,
+ bool ForceSimpleCall = false) override;
+
/// Emits a critical region.
/// \param CriticalName Name of the critical region.
/// \param CriticalOpGen Generator for the statement associated with the given
@@ -348,7 +373,17 @@ public:
/// Choose a default value for the schedule clause.
void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
- llvm::Value *&Chunk) const override;
+ const Expr *&ChunkExpr) const override;
+
+ /// Adjust some parameters for the target-based directives, like addresses of
+ /// the variables captured by reference in lambdas.
+ void adjustTargetSpecificDataForLambdas(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
+
+ /// Perform check on requires decl to ensure that target architecture
+ /// supports unified addressing
+ void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+ const OMPRequiresDecl *D) const override;
private:
/// Track the execution mode when codegening directives within a target
@@ -357,9 +392,15 @@ private:
/// to emit optimized code.
ExecutionMode CurrentExecutionMode = EM_Unknown;
+ /// Check if the full runtime is required (default - yes).
+ bool RequiresFullRuntime = true;
+
/// true if we're emitting the code for the target region and next parallel
/// region is L0 for sure.
bool IsInTargetMasterThreadRegion = false;
+ /// true if currently emitting code for target/teams/distribute region, false
+ /// - otherwise.
+ bool IsInTTDRegion = false;
/// true if we're definitely in the parallel region.
bool IsInParallelRegion = false;
@@ -373,17 +414,31 @@ private:
llvm::Function *createParallelDataSharingWrapper(
llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
+ /// The data for the single globalized variable.
+ struct MappedVarData {
+ /// Corresponding field in the global record.
+ const FieldDecl *FD = nullptr;
+ /// Corresponding address.
+ Address PrivateAddr = Address::invalid();
+ /// true, if only one element is required (for latprivates in SPMD mode),
+ /// false, if need to create based on the warp-size.
+ bool IsOnePerTeam = false;
+ MappedVarData() = delete;
+ MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false)
+ : FD(FD), IsOnePerTeam(IsOnePerTeam) {}
+ };
/// The map of local variables to their addresses in the global memory.
- using DeclToAddrMapTy = llvm::MapVector<const Decl *,
- std::pair<const FieldDecl *, Address>>;
+ using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
/// Set of the parameters passed by value escaping OpenMP context.
using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
struct FunctionData {
DeclToAddrMapTy LocalVarData;
+ llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
EscapedParamsTy EscapedParameters;
llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
const RecordDecl *GlobalRecord = nullptr;
+ llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None;
llvm::Value *GlobalRecordAddr = nullptr;
llvm::Value *IsInSPMDModeFlag = nullptr;
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
@@ -391,6 +446,27 @@ private:
/// Maps the function to the list of the globalized variables with their
/// addresses.
llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
+ /// List of records for the globalized variables in target/teams/distribute
+ /// contexts. Inner records are going to be joined into the single record,
+ /// while those resulting records are going to be joined into the single
+ /// union. This resulting union (one per CU) is the entry point for the static
+ /// memory management runtime functions.
+ struct GlobalPtrSizeRecsTy {
+ llvm::GlobalVariable *UseSharedMemory = nullptr;
+ llvm::GlobalVariable *RecSize = nullptr;
+ llvm::GlobalVariable *Buffer = nullptr;
+ SourceLocation Loc;
+ llvm::SmallVector<const RecordDecl *, 2> Records;
+ unsigned RegionCounter = 0;
+ };
+ llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords;
+ /// Shared pointer for the global memory in the global memory buffer used for
+ /// the given kernel.
+ llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
+ /// Pair of the Non-SPMD team and all reductions variables in this team
+ /// region.
+ std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
+ TeamAndReductions;
};
} // CodeGen namespace.
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 58aaae6925..c754541ac1 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -20,7 +20,7 @@
#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 39a2cc145f..bc7a18af1e 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -19,8 +19,6 @@
#include "clang/Basic/Builtins.h"
#include "clang/Basic/PrettyStackTrace.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Sema/LoopHint.h"
-#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -1047,10 +1045,9 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
// exception to our over-conservative rules about not jumping to
// statements following block literals with non-trivial cleanups.
RunCleanupsScope cleanupScope(*this);
- if (const ExprWithCleanups *cleanups =
- dyn_cast_or_null<ExprWithCleanups>(RV)) {
- enterFullExpression(cleanups);
- RV = cleanups->getSubExpr();
+ if (const FullExpr *fe = dyn_cast_or_null<FullExpr>(RV)) {
+ enterFullExpression(fe);
+ RV = fe->getSubExpr();
}
// FIXME: Clean this up by using an LValue for ReturnTemp,
@@ -1823,9 +1820,9 @@ llvm::Value* CodeGenFunction::EmitAsmInput(
// If this can't be a register or memory, i.e., has to be a constant
// (immediate or symbolic), try to emit it as such.
if (!Info.allowsRegister() && !Info.allowsMemory()) {
- llvm::APSInt Result;
+ Expr::EvalResult Result;
if (InputExpr->EvaluateAsInt(Result, getContext()))
- return llvm::ConstantInt::get(getLLVMContext(), Result);
+ return llvm::ConstantInt::get(getLLVMContext(), Result.Val.getInt());
assert(!Info.requiresImmediateConstant() &&
"Required-immediate inlineasm arg isn't constant?");
}
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index 79ffa7c8e9..4f635efe71 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -385,12 +385,12 @@ static llvm::Function *emitOutlinedFunctionPrologue(
FunctionDecl *DebugFunctionDecl = nullptr;
if (!FO.UIntPtrCastRequired) {
FunctionProtoType::ExtProtoInfo EPI;
+ QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
DebugFunctionDecl = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
- SourceLocation(), DeclarationName(), Ctx.VoidTy,
- Ctx.getTrivialTypeSourceInfo(
- Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
- SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
+ SourceLocation(), DeclarationName(), FunctionTy,
+ Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
+ /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
}
for (const FieldDecl *FD : RD->fields()) {
QualType ArgType = FD->getType();
@@ -1738,6 +1738,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitOMPReductionClauseInit(S, LoopScope);
bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
S.getInc(),
[&S](CodeGenFunction &CGF) {
@@ -2006,7 +2008,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
// for combined 'distribute' and 'for' the increment expression of distribute
- // is store in DistInc. For 'distribute' alone, it is in Inc.
+ // is stored in DistInc. For 'distribute' alone, it is in Inc.
Expr *IncExpr;
if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
IncExpr = S.getDistInc();
@@ -2296,24 +2298,34 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
EmitOMPPrivateLoopCounters(S, LoopScope);
EmitOMPLinearClause(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
// Detect the loop schedule kind and chunk.
- llvm::Value *Chunk = nullptr;
+ const Expr *ChunkExpr = nullptr;
OpenMPScheduleTy ScheduleKind;
if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
ScheduleKind.Schedule = C->getScheduleKind();
ScheduleKind.M1 = C->getFirstScheduleModifier();
ScheduleKind.M2 = C->getSecondScheduleModifier();
- if (const Expr *Ch = C->getChunkSize()) {
- Chunk = EmitScalarExpr(Ch);
- Chunk = EmitScalarConversion(Chunk, Ch->getType(),
- S.getIterationVariable()->getType(),
- S.getBeginLoc());
- }
+ ChunkExpr = C->getChunkSize();
} else {
// Default behaviour for schedule clause.
CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
- *this, S, ScheduleKind.Schedule, Chunk);
+ *this, S, ScheduleKind.Schedule, ChunkExpr);
+ }
+ bool HasChunkSizeOne = false;
+ llvm::Value *Chunk = nullptr;
+ if (ChunkExpr) {
+ Chunk = EmitScalarExpr(ChunkExpr);
+ Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
+ S.getIterationVariable()->getType(),
+ S.getBeginLoc());
+ Expr::EvalResult Result;
+ if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
+ llvm::APSInt EvaluatedChunk = Result.Val.getInt();
+ HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
+ }
}
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
@@ -2321,8 +2333,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// If the static schedule kind is specified or if the ordered clause is
// specified, and if no monotonic modifier is specified, the effect will
// be as if the monotonic modifier was specified.
- if (RT.isStaticNonchunked(ScheduleKind.Schedule,
- /* Chunked */ Chunk != nullptr) &&
+ bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
+ /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
+ isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
+ if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
+ /* Chunked */ Chunk != nullptr) ||
+ StaticChunkedOne) &&
!Ordered) {
if (isOpenMPSimdDirective(S.getDirectiveKind()))
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
@@ -2333,23 +2349,38 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// unspecified in this case.
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
- UB.getAddress(), ST.getAddress());
+ UB.getAddress(), ST.getAddress(),
+ StaticChunkedOne ? Chunk : nullptr);
RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
ScheduleKind, StaticInit);
JumpDest LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
- EmitIgnoredExpr(S.getEnsureUpperBound());
+ if (!StaticChunkedOne)
+ EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB;
EmitIgnoredExpr(S.getInit());
- // while (idx <= UB) { BODY; ++idx; }
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
- S.getInc(),
- [&S, LoopExit](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
- },
- [](CodeGenFunction &) {});
+ // For unchunked static schedule generate:
+ //
+ // while (idx <= UB) {
+ // BODY;
+ // ++idx;
+ // }
+ //
+ // For static schedule with chunk one:
+ //
+ // while (IV <= PrevUB) {
+ // BODY;
+ // IV += ST;
+ // }
+ EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+ StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(),
+ StaticChunkedOne ? S.getDistInc() : S.getInc(),
+ [&S, LoopExit](CodeGenFunction &CGF) {
+ CGF.EmitOMPLoopBody(S, LoopExit);
+ CGF.EmitStopPoint(&S);
+ },
+ [](CodeGenFunction &) {});
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
@@ -2564,6 +2595,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
CGF.EmitOMPReductionClauseInit(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
// Emit static non-chunked loop.
OpenMPScheduleTy ScheduleKind;
@@ -2922,7 +2955,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
RedCG.emitAggregateType(CGF, Cnt);
// FIXME: This must removed once the runtime library is fixed.
// Emit required threadprivate variables for
- // initilizer/combiner/finalizer.
+ // initializer/combiner/finalizer.
CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
RedCG, Cnt);
Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
@@ -2968,10 +3001,10 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
RedCG.emitSharedLValue(CGF, Cnt);
RedCG.emitAggregateType(CGF, Cnt);
// The taskgroup descriptor variable is always implicit firstprivate and
- // privatized already during procoessing of the firstprivates.
+ // privatized already during processing of the firstprivates.
// FIXME: This must removed once the runtime library is fixed.
// Emit required threadprivate variables for
- // initilizer/combiner/finalizer.
+ // initializer/combiner/finalizer.
CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
RedCG, Cnt);
llvm::Value *ReductionsPtr =
@@ -3317,6 +3350,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
EmitOMPPrivateLoopCounters(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
// Detect the distribute schedule kind and chunk.
llvm::Value *Chunk = nullptr;
@@ -3345,13 +3380,18 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
// iteration space is divided into chunks that are approximately equal
// in size, and at most one chunk is distributed to each team of the
// league. The size of the chunks is unspecified in this case.
+ bool StaticChunked = RT.isStaticChunked(
+ ScheduleKind, /* Chunked */ Chunk != nullptr) &&
+ isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
if (RT.isStaticNonchunked(ScheduleKind,
- /* Chunked */ Chunk != nullptr)) {
+ /* Chunked */ Chunk != nullptr) ||
+ StaticChunked) {
if (isOpenMPSimdDirective(S.getDirectiveKind()))
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
- LB.getAddress(), UB.getAddress(), ST.getAddress());
+ LB.getAddress(), UB.getAddress(), ST.getAddress(),
+ StaticChunked ? Chunk : nullptr);
RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
StaticInit);
JumpDest LoopExit =
@@ -3370,15 +3410,45 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
? S.getCombinedCond()
: S.getCond();
- // for distribute alone, codegen
- // while (idx <= UB) { BODY; ++idx; }
- // when combined with 'for' (e.g. as in 'distribute parallel for')
- // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+ if (StaticChunked)
+ Cond = S.getCombinedDistCond();
+
+ // For static unchunked schedules generate:
+ //
+ // 1. For distribute alone, codegen
+ // while (idx <= UB) {
+ // BODY;
+ // ++idx;
+ // }
+ //
+ // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
+ // while (idx <= UB) {
+ // <CodeGen rest of pragma>(LB, UB);
+ // idx += ST;
+ // }
+ //
+ // For static chunk one schedule generate:
+ //
+ // while (IV <= GlobalUB) {
+ // <CodeGen rest of pragma>(LB, UB);
+ // LB += ST;
+ // UB += ST;
+ // UB = min(UB, GlobalUB);
+ // IV = LB;
+ // }
+ //
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
[&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
CodeGenLoop(CGF, S, LoopExit);
},
- [](CodeGenFunction &) {});
+ [&S, StaticChunked](CodeGenFunction &CGF) {
+ if (StaticChunked) {
+ CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
+ CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
+ CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
+ CGF.EmitIgnoredExpr(S.getCombinedInit());
+ }
+ });
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
@@ -3400,20 +3470,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
!isOpenMPParallelDirective(S.getDirectiveKind()) &&
!isOpenMPTeamsDirective(S.getDirectiveKind())) {
- OpenMPDirectiveKind ReductionKind = OMPD_unknown;
- if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
- isOpenMPSimdDirective(S.getDirectiveKind())) {
- ReductionKind = OMPD_parallel_for_simd;
- } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
- ReductionKind = OMPD_parallel_for;
- } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
- ReductionKind = OMPD_simd;
- } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
- S.hasClausesOfKind<OMPReductionClause>()) {
- llvm_unreachable(
- "No reduction clauses is allowed in distribute directive.");
- }
- EmitOMPReductionClauseFinal(S, ReductionKind);
+ EmitOMPReductionClauseFinal(S, OMPD_simd);
// Emit post-update of the reduction variables if IsLastIter != 0.
emitPostUpdateForReductionClause(
*this, S, [IL, &S](CodeGenFunction &CGF) {
@@ -3912,6 +3969,10 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_use_device_ptr:
case OMPC_is_device_ptr:
case OMPC_unified_address:
+ case OMPC_unified_shared_memory:
+ case OMPC_reverse_offload:
+ case OMPC_dynamic_allocators:
+ case OMPC_atomic_default_mem_order:
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
}
}
@@ -3928,13 +3989,13 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
}
const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
- if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
- enterFullExpression(EWC);
+ if (const auto *FE = dyn_cast<FullExpr>(CS))
+ enterFullExpression(FE);
// Processing for statements under 'atomic capture'.
if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
for (const Stmt *C : Compound->body()) {
- if (const auto *EWC = dyn_cast<ExprWithCleanups>(C))
- enterFullExpression(EWC);
+ if (const auto *FE = dyn_cast<FullExpr>(C))
+ enterFullExpression(FE);
}
}
@@ -4021,6 +4082,8 @@ static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
(void)PrivateScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
}
@@ -4101,6 +4164,8 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
@@ -4659,6 +4724,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF,
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
// TODO: Add support for clauses.
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
@@ -4959,10 +5026,16 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
} else {
+ OMPPrivateScope LoopGlobals(CGF);
if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
for (const Expr *E : LD->counters()) {
- if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
- cast<DeclRefExpr>(E)->getDecl())) {
+ const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
+ LValue GlobLVal = CGF.EmitLValue(E);
+ LoopGlobals.addPrivate(
+ VD, [&GlobLVal]() { return GlobLVal.getAddress(); });
+ }
+ if (isa<OMPCapturedExprDecl>(VD)) {
// Emit only those that were not explicitly referenced in clauses.
if (!CGF.LocalDeclMap.count(VD))
CGF.EmitVarDecl(*VD);
@@ -4983,6 +5056,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
}
}
}
+ LoopGlobals.Privatize();
CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
}
};
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index e29a035e31..09535900b5 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -16,9 +16,9 @@
#include "CodeGenModule.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/RecordLayout.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Format.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -304,7 +304,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
CGM.ErrorUnsupported(
MD, "non-trivial argument copy for return-adjusting thunk");
}
- EmitMustTailThunk(MD, AdjustedThisPtr, CalleePtr);
+ EmitMustTailThunk(CurGD, AdjustedThisPtr, CalleePtr);
return;
}
@@ -350,13 +350,12 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
: FPT->getReturnType();
ReturnValueSlot Slot;
if (!ResultType->isVoidType() &&
- CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
- !hasScalarEvaluationKind(CurFnInfo->getReturnType()))
+ CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect)
Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified());
// Now emit our call.
llvm::Instruction *CallOrInvoke;
- CGCallee Callee = CGCallee::forDirect(CalleePtr, MD);
+ CGCallee Callee = CGCallee::forDirect(CalleePtr, CurGD);
RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, &CallOrInvoke);
// Consider return adjustment if we have ThunkInfo.
@@ -375,7 +374,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
FinishThunk();
}
-void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
+void CodeGenFunction::EmitMustTailThunk(GlobalDecl GD,
llvm::Value *AdjustedThisPtr,
llvm::Value *CalleePtr) {
// Emitting a musttail call thunk doesn't use any of the CGCall.cpp machinery
@@ -412,7 +411,7 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
// Apply the standard set of call attributes.
unsigned CallingConv;
llvm::AttributeList Attrs;
- CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, MD, Attrs,
+ CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, GD, Attrs,
CallingConv, /*AttrOnCallSite=*/true);
Call->setAttributes(Attrs);
Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index 0dcbea423a..da8a8efb84 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -562,7 +562,10 @@ public:
}
void setVolatile(bool flag) {
- Quals.setVolatile(flag);
+ if (flag)
+ Quals.addVolatile();
+ else
+ Quals.removeVolatile();
}
Qualifiers::ObjCLifetime getObjCLifetime() const {
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 2a0f4f0e83..29c6793c60 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -102,4 +102,5 @@ add_clang_library(clangCodeGen
clangBasic
clangFrontend
clangLex
+ clangSerialization
)
diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp
index c152291b15..27f5d53ffe 100644
--- a/lib/CodeGen/CodeGenABITypes.cpp
+++ b/lib/CodeGen/CodeGenABITypes.cpp
@@ -20,7 +20,6 @@
#include "CGRecordLayout.h"
#include "CodeGenModule.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Lex/PreprocessorOptions.h"
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 1a2b0616dc..fd4506f2d1 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -549,12 +549,16 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
SourceLocation DILoc;
if (D.isLocationAvailable()) {
- D.getLocation(&Filename, &Line, &Column);
- const FileEntry *FE = FileMgr.getFile(Filename);
- if (FE && Line > 0) {
- // If -gcolumn-info was not used, Column will be 0. This upsets the
- // source manager, so pass 1 if Column is not set.
- DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1);
+ D.getLocation(Filename, Line, Column);
+ if (Line > 0) {
+ const FileEntry *FE = FileMgr.getFile(Filename);
+ if (!FE)
+ FE = FileMgr.getFile(D.getAbsolutePath());
+ if (FE) {
+ // If -gcolumn-info was not used, Column will be 0. This upsets the
+ // source manager, so pass 1 if Column is not set.
+ DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1);
+ }
}
BadDebugInfo = DILoc.isInvalid();
}
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index 77f978f687..f012384f3d 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -28,10 +28,10 @@
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtObjC.h"
#include "clang/Basic/Builtins.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
-#include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Intrinsics.h"
@@ -430,10 +430,25 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
NormalCleanupDest = Address::invalid();
}
- // Add the required-vector-width attribute.
- if (LargestVectorWidth != 0)
- CurFn->addFnAttr("min-legal-vector-width",
- llvm::utostr(LargestVectorWidth));
+ // Scan function arguments for vector width.
+ for (llvm::Argument &A : CurFn->args())
+ if (auto *VT = dyn_cast<llvm::VectorType>(A.getType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+
+ // Update vector width based on return type.
+ if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+
+ // Add the required-vector-width attribute. This contains the max width from:
+ // 1. min-vector-width attribute used in the source program.
+ // 2. Any builtins used that have a vector width specified.
+ // 3. Values passed in and out of inline assembly.
+ // 4. Width of vector arguments and return types for this function.
+ // 5. Width of vector aguments and return types for functions called by this
+ // function.
+ CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));
}
/// ShouldInstrumentFunction - Return true if the current function should be
@@ -1058,9 +1073,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
// Count the implicit return.
if (!endsWithReturn(D))
++NumReturnExprs;
- } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
- !hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
- // Indirect aggregate return; emit returned value directly into sret slot.
+ } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect) {
+ // Indirect return; emit returned value directly into sret slot.
// This reduces code size, and affects correctness in C++.
auto AI = CurFn->arg_begin();
if (CurFnInfo->getReturnInfo().isSRetAfterThis())
@@ -1188,8 +1202,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
LargestVectorWidth = VecWidth->getVectorWidth();
}
-void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
- const Stmt *Body) {
+void CodeGenFunction::EmitFunctionBody(const Stmt *Body) {
incrementProfileCounter(Body);
if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
EmitCompoundStmtWithoutScope(*S);
@@ -1357,7 +1370,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
// copy-constructors.
emitImplicitAssignmentOperatorBody(Args);
} else if (Body) {
- EmitFunctionBody(Args, Body);
+ EmitFunctionBody(Body);
} else
llvm_unreachable("no definition for emitted function");
@@ -1498,10 +1511,11 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
bool AllowLabels) {
// FIXME: Rename and handle conversion of other evaluatable things
// to bool.
- llvm::APSInt Int;
- if (!Cond->EvaluateAsInt(Int, getContext()))
+ Expr::EvalResult Result;
+ if (!Cond->EvaluateAsInt(Result, getContext()))
return false; // Not foldable, not integer or not fully evaluatable.
+ llvm::APSInt Int = Result.Val.getInt();
if (!AllowLabels && CodeGenFunction::ContainsLabel(Cond))
return false; // Contains a label.
@@ -1686,7 +1700,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
// create metadata that specifies that the branch is unpredictable.
// Don't bother if not optimizing because that metadata would not be used.
llvm::MDNode *Unpredictable = nullptr;
- auto *Call = dyn_cast<CallExpr>(Cond);
+ auto *Call = dyn_cast<CallExpr>(Cond->IgnoreImpCasts());
if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
@@ -2276,7 +2290,7 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures,
// Now build up the set of caller features and verify that all the required
// features are there.
llvm::StringMap<bool> CallerFeatureMap;
- CGM.getFunctionFeatureMap(CallerFeatureMap, FD);
+ CGM.getFunctionFeatureMap(CallerFeatureMap, GlobalDecl().getWithDecl(FD));
// If we have at least one of the features in the feature list return
// true, otherwise return false.
@@ -2284,14 +2298,13 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures,
ReqFeatures.begin(), ReqFeatures.end(), [&](StringRef Feature) {
SmallVector<StringRef, 1> OrFeatures;
Feature.split(OrFeatures, '|');
- return std::any_of(OrFeatures.begin(), OrFeatures.end(),
- [&](StringRef Feature) {
- if (!CallerFeatureMap.lookup(Feature)) {
- FirstMissing = Feature.str();
- return false;
- }
- return true;
- });
+ return llvm::any_of(OrFeatures, [&](StringRef Feature) {
+ if (!CallerFeatureMap.lookup(Feature)) {
+ FirstMissing = Feature.str();
+ return false;
+ }
+ return true;
+ });
});
}
@@ -2378,6 +2391,29 @@ CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) {
return Condition;
}
+static void CreateMultiVersionResolverReturn(CodeGenModule &CGM,
+ llvm::Function *Resolver,
+ CGBuilderTy &Builder,
+ llvm::Function *FuncToReturn,
+ bool SupportsIFunc) {
+ if (SupportsIFunc) {
+ Builder.CreateRet(FuncToReturn);
+ return;
+ }
+
+ llvm::SmallVector<llvm::Value *, 10> Args;
+ llvm::for_each(Resolver->args(),
+ [&](llvm::Argument &Arg) { Args.push_back(&Arg); });
+
+ llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args);
+ Result->setTailCallKind(llvm::CallInst::TCK_MustTail);
+
+ if (Resolver->getReturnType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(Result);
+}
+
void CodeGenFunction::EmitMultiVersionResolver(
llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
assert((getContext().getTargetInfo().getTriple().getArch() ==
@@ -2385,6 +2421,9 @@ void CodeGenFunction::EmitMultiVersionResolver(
getContext().getTargetInfo().getTriple().getArch() ==
llvm::Triple::x86_64) &&
"Only implemented for x86 targets");
+
+ bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc();
+
// Main function's basic block.
llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver);
Builder.SetInsertPoint(CurBlock);
@@ -2398,13 +2437,15 @@ void CodeGenFunction::EmitMultiVersionResolver(
if (!Condition) {
assert(&RO == Options.end() - 1 &&
"Default or Generic case must be last");
- Builder.CreateRet(RO.Function);
+ CreateMultiVersionResolverReturn(CGM, Resolver, Builder, RO.Function,
+ SupportsIFunc);
return;
}
llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver);
- llvm::IRBuilder<> RetBuilder(RetBlock);
- RetBuilder.CreateRet(RO.Function);
+ CGBuilderTy RetBuilder(*this, RetBlock);
+ CreateMultiVersionResolverReturn(CGM, Resolver, RetBuilder, RO.Function,
+ SupportsIFunc);
CurBlock = createBasicBlock("resolver_else", Resolver);
Builder.CreateCondBr(Condition, RetBlock, CurBlock);
}
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 6ea2d75b31..8971accdcd 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -29,9 +29,9 @@
#include "clang/AST/Type.h"
#include "clang/Basic/ABI.h"
#include "clang/Basic/CapturedStmt.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
@@ -1197,6 +1197,8 @@ public:
private:
CGDebugInfo *DebugInfo;
+ /// Used to create unique names for artificial VLA size debug info variables.
+ unsigned VLAExprCounter = 0;
bool DisableDebugInfo = false;
/// DidCallStackSave - Whether llvm.stacksave has been called. Used to avoid
@@ -1787,7 +1789,7 @@ public:
llvm::Value *ptr);
Address LoadBlockStruct();
- Address GetAddrOfBlockDecl(const VarDecl *var, bool ByRef);
+ Address GetAddrOfBlockDecl(const VarDecl *var);
/// BuildBlockByrefAddress - Computes the location of the
/// data in a variable which is declared as __block.
@@ -1825,7 +1827,7 @@ public:
void EmitConstructorBody(FunctionArgList &Args);
void EmitDestructorBody(FunctionArgList &Args);
void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
- void EmitFunctionBody(FunctionArgList &Args, const Stmt *Body);
+ void EmitFunctionBody(const Stmt *Body);
void EmitBlockWithFallThrough(llvm::BasicBlock *BB, const Stmt *S);
void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator,
@@ -1854,7 +1856,7 @@ public:
void FinishThunk();
/// Emit a musttail call for a thunk with a potentially adjusted this pointer.
- void EmitMustTailThunk(const CXXMethodDecl *MD, llvm::Value *AdjustedThisPtr,
+ void EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr,
llvm::Value *Callee);
/// Generate a thunk for the given method.
@@ -2683,8 +2685,9 @@ public:
llvm::Value *NRVOFlag;
- /// True if the variable is a __block variable.
- bool IsByRef;
+ /// True if the variable is a __block variable that is captured by an
+ /// escaping block.
+ bool IsEscapingByRef;
/// True if the variable is of aggregate type and has a constant
/// initializer.
@@ -2704,7 +2707,7 @@ public:
AutoVarEmission(const VarDecl &variable)
: Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr),
- IsByRef(false), IsConstantAggregate(false),
+ IsEscapingByRef(false), IsConstantAggregate(false),
SizeForLifetimeMarkers(nullptr), AllocaAddr(Address::invalid()) {}
bool wasEmittedAsGlobal() const { return !Addr.isValid(); }
@@ -2734,7 +2737,7 @@ public:
/// Note that this does not chase the forwarding pointer for
/// __block decls.
Address getObjectAddress(CodeGenFunction &CGF) const {
- if (!IsByRef) return Addr;
+ if (!IsEscapingByRef) return Addr;
return CGF.emitBlockByrefAddress(Addr, Variable, /*forward*/ false);
}
@@ -3523,6 +3526,7 @@ public:
ConstantEmission tryEmitAsConstant(DeclRefExpr *refExpr);
ConstantEmission tryEmitAsConstant(const MemberExpr *ME);
+ llvm::Value *emitScalarConstant(const ConstantEmission &Constant, Expr *E);
RValue EmitPseudoObjectRValue(const PseudoObjectExpr *e,
AggValueSlot slot = AggValueSlot::ignored());
@@ -3677,9 +3681,8 @@ public:
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue);
- RValue EmitBuiltinExpr(const FunctionDecl *FD,
- unsigned BuiltinID, const CallExpr *E,
- ReturnValueSlot ReturnValue);
+ RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
+ const CallExpr *E, ReturnValueSlot ReturnValue);
RValue emitRotate(const CallExpr *E, bool IsRotateRight);
@@ -3802,6 +3805,10 @@ public:
std::pair<LValue,llvm::Value*>
EmitARCStoreUnsafeUnretained(const BinaryOperator *e, bool ignored);
+ llvm::Value *EmitObjCAlloc(llvm::Value *value,
+ llvm::Type *returnType);
+ llvm::Value *EmitObjCAllocWithZone(llvm::Value *value,
+ llvm::Type *returnType);
llvm::Value *EmitObjCThrowOperand(const Expr *expr);
llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr);
llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr);
@@ -3891,6 +3898,8 @@ public:
AddInitializerToStaticVarDecl(const VarDecl &D,
llvm::GlobalVariable *GV);
+ // Emit an @llvm.invariant.start call for the given memory region.
+ void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size);
/// EmitCXXGlobalVarDeclInit - Create the initializer for a C++
/// variable with global storage.
@@ -3926,9 +3935,10 @@ public:
/// GenerateCXXGlobalInitFunc - Generates code for initializing global
/// variables.
- void GenerateCXXGlobalInitFunc(llvm::Function *Fn,
- ArrayRef<llvm::Function *> CXXThreadLocals,
- Address Guard = Address::invalid());
+ void
+ GenerateCXXGlobalInitFunc(llvm::Function *Fn,
+ ArrayRef<llvm::Function *> CXXThreadLocals,
+ ConstantAddress Guard = ConstantAddress::invalid());
/// GenerateCXXGlobalDtorsFunc - Generates code for destroying global
/// variables.
@@ -3946,11 +3956,13 @@ public:
void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp);
- void enterFullExpression(const ExprWithCleanups *E) {
- if (E->getNumObjects() == 0) return;
+ void enterFullExpression(const FullExpr *E) {
+ if (const auto *EWC = dyn_cast<ExprWithCleanups>(E))
+ if (EWC->getNumObjects() == 0)
+ return;
enterNonTrivialFullExpression(E);
}
- void enterNonTrivialFullExpression(const ExprWithCleanups *E);
+ void enterNonTrivialFullExpression(const FullExpr *E);
void EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint = true);
@@ -4273,6 +4285,7 @@ public:
struct MultiVersionResolverOption {
llvm::Function *Function;
+ FunctionDecl *FD;
struct Conds {
StringRef Architecture;
llvm::SmallVector<StringRef, 8> Features;
@@ -4292,22 +4305,7 @@ public:
void EmitMultiVersionResolver(llvm::Function *Resolver,
ArrayRef<MultiVersionResolverOption> Options);
- struct CPUDispatchMultiVersionResolverOption {
- llvm::Function *Function;
- // Note: EmitX86CPUSupports only has 32 bits available, so we store the mask
- // as 32 bits here. When 64-bit support is added to __builtin_cpu_supports,
- // this can be extended to 64 bits.
- uint32_t FeatureMask;
- CPUDispatchMultiVersionResolverOption(llvm::Function *F, uint64_t Mask)
- : Function(F), FeatureMask(static_cast<uint32_t>(Mask)) {}
- bool operator>(const CPUDispatchMultiVersionResolverOption &Other) const {
- return FeatureMask > Other.FeatureMask;
- }
- };
- void EmitCPUDispatchMultiVersionResolver(
- llvm::Function *Resolver,
- ArrayRef<CPUDispatchMultiVersionResolverOption> Options);
- static uint32_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
+ static uint64_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
private:
QualType getVarArgType(const Expr *Arg);
@@ -4324,7 +4322,7 @@ private:
llvm::Value *EmitX86CpuIs(StringRef CPUStr);
llvm::Value *EmitX86CpuSupports(const CallExpr *E);
llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs);
- llvm::Value *EmitX86CpuSupports(uint32_t Mask);
+ llvm::Value *EmitX86CpuSupports(uint64_t Mask);
llvm::Value *EmitX86CpuInit();
llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO);
};
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index c2fb4797cc..df814d6386 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -36,14 +36,14 @@
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Version.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
-#include "clang/Frontend/CodeGenOptions.h"
-#include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -126,7 +126,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC();
- if (LangOpts.ObjC1)
+ if (LangOpts.ObjC)
createObjCRuntime();
if (LangOpts.OpenCL)
createOpenCLRuntime();
@@ -149,12 +149,12 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
Block.GlobalUniqueCount = 0;
- if (C.getLangOpts().ObjC1)
+ if (C.getLangOpts().ObjC)
ObjCData.reset(new ObjCEntrypoints());
if (CodeGenOpts.hasProfileClangUse()) {
auto ReaderOrErr = llvm::IndexedInstrProfReader::create(
- CodeGenOpts.ProfileInstrumentUsePath);
+ CodeGenOpts.ProfileInstrumentUsePath, CodeGenOpts.ProfileRemappingFile);
if (auto E = ReaderOrErr.takeError()) {
unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
"Could not read profile %0: %1");
@@ -322,8 +322,6 @@ void CodeGenModule::checkAliases() {
assert(FTy);
if (!FTy->getReturnType()->isPointerTy())
Diags.Report(Location, diag::err_ifunc_resolver_return);
- if (FTy->getNumParams())
- Diags.Report(Location, diag::err_ifunc_resolver_params);
}
llvm::Constant *Aliasee = Alias->getIndirectSymbol();
@@ -460,6 +458,9 @@ void CodeGenModule::Release() {
// Indicate that we want CodeView in the metadata.
getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1);
}
+ if (CodeGenOpts.CodeViewGHash) {
+ getModule().addModuleFlag(llvm::Module::Warning, "CodeViewGHash", 1);
+ }
if (CodeGenOpts.ControlFlowGuard) {
// We want function ID tables for Control Flow Guard.
getModule().addModuleFlag(llvm::Module::Warning, "cfguardtable", 1);
@@ -589,6 +590,9 @@ void CodeGenModule::Release() {
if (getCodeGenOpts().EmitVersionIdentMetadata)
EmitVersionIdentMetadata();
+ if (!getCodeGenOpts().RecordCommandLine.empty())
+ EmitCommandLineMetadata();
+
EmitTargetMetadata();
}
@@ -893,11 +897,13 @@ static std::string getCPUSpecificMangling(const CodeGenModule &CGM,
static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
const CPUSpecificAttr *Attr,
+ unsigned CPUIndex,
raw_ostream &Out) {
- // cpu_specific gets the current name, dispatch gets the resolver.
+ // cpu_specific gets the current name, dispatch gets the resolver if IFunc is
+ // supported.
if (Attr)
- Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName());
- else
+ Out << getCPUSpecificMangling(CGM, Attr->getCPUName(CPUIndex)->getName());
+ else if (CGM.getTarget().supportsIFunc())
Out << ".resolver";
}
@@ -963,11 +969,19 @@ static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD,
if (const auto *FD = dyn_cast<FunctionDecl>(ND))
if (FD->isMultiVersion() && !OmitMultiVersionMangling) {
- if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())
- AppendCPUSpecificCPUDispatchMangling(
- CGM, FD->getAttr<CPUSpecificAttr>(), Out);
- else
+ switch (FD->getMultiVersionKind()) {
+ case MultiVersionKind::CPUDispatch:
+ case MultiVersionKind::CPUSpecific:
+ AppendCPUSpecificCPUDispatchMangling(CGM,
+ FD->getAttr<CPUSpecificAttr>(),
+ GD.getMultiVersionIndex(), Out);
+ break;
+ case MultiVersionKind::Target:
AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
+ break;
+ case MultiVersionKind::None:
+ llvm_unreachable("None multiversion type isn't valid here");
+ }
}
return Out.str();
@@ -992,8 +1006,10 @@ void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD,
"Other GD should now be a multiversioned function");
// OtherFD is the version of this function that was mangled BEFORE
// becoming a MultiVersion function. It potentially needs to be updated.
- const FunctionDecl *OtherFD =
- OtherGD.getCanonicalDecl().getDecl()->getAsFunction();
+ const FunctionDecl *OtherFD = OtherGD.getCanonicalDecl()
+ .getDecl()
+ ->getAsFunction()
+ ->getMostRecentDecl();
std::string OtherName = getMangledNameImpl(*this, OtherGD, OtherFD);
// This is so that if the initial version was already the 'default'
// version, we don't try to update it.
@@ -1025,26 +1041,6 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
}
}
- const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
- // Since CPUSpecific can require multiple emits per decl, store the manglings
- // separately.
- if (FD &&
- (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) {
- const auto *SD = FD->getAttr<CPUSpecificAttr>();
-
- std::pair<GlobalDecl, unsigned> SpecCanonicalGD{
- CanonicalGD,
- SD ? SD->ActiveArgIndex : std::numeric_limits<unsigned>::max()};
-
- auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD);
- if (FoundName != CPUSpecificMangledDeclNames.end())
- return FoundName->second;
-
- auto Result = CPUSpecificManglings.insert(
- std::make_pair(getMangledNameImpl(*this, GD, FD), SpecCanonicalGD));
- return CPUSpecificMangledDeclNames[SpecCanonicalGD] = Result.first->first();
- }
-
auto FoundName = MangledDeclNames.find(CanonicalGD);
if (FoundName != MangledDeclNames.end())
return FoundName->second;
@@ -1106,11 +1102,12 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) {
// Ctor function type is void()*.
llvm::FunctionType* CtorFTy = llvm::FunctionType::get(VoidTy, false);
- llvm::Type *CtorPFTy = llvm::PointerType::getUnqual(CtorFTy);
+ llvm::Type *CtorPFTy = llvm::PointerType::get(CtorFTy,
+ TheModule.getDataLayout().getProgramAddressSpace());
// Get the type of a ctor entry, { i32, void ()*, i8* }.
llvm::StructType *CtorStructTy = llvm::StructType::get(
- Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy);
+ Int32Ty, CtorPFTy, VoidPtrTy);
// Construct the constructor and destructor arrays.
ConstantInitBuilder builder(*this);
@@ -1166,12 +1163,12 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString()));
}
-void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D,
+void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD,
const CGFunctionInfo &Info,
llvm::Function *F) {
unsigned CallingConv;
llvm::AttributeList PAL;
- ConstructAttributeList(F->getName(), Info, D, PAL, CallingConv, false);
+ ConstructAttributeList(F->getName(), Info, GD, PAL, CallingConv, false);
F->setAttributes(PAL);
F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
}
@@ -1301,9 +1298,19 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
// Otherwise, propagate the inline hint attribute and potentially use its
// absence to mark things as noinline.
if (auto *FD = dyn_cast<FunctionDecl>(D)) {
- if (any_of(FD->redecls(), [&](const FunctionDecl *Redecl) {
- return Redecl->isInlineSpecified();
- })) {
+ // Search function and template pattern redeclarations for inline.
+ auto CheckForInline = [](const FunctionDecl *FD) {
+ auto CheckRedeclForInline = [](const FunctionDecl *Redecl) {
+ return Redecl->isInlineSpecified();
+ };
+ if (any_of(FD->redecls(), CheckRedeclForInline))
+ return true;
+ const FunctionDecl *Pattern = FD->getTemplateInstantiationPattern();
+ if (!Pattern)
+ return false;
+ return any_of(Pattern->redecls(), CheckRedeclForInline);
+ };
+ if (CheckForInline(FD)) {
B.addAttribute(llvm::Attribute::InlineHint);
} else if (CodeGenOpts.getInlining() ==
CodeGenOptions::OnlyHintInlining &&
@@ -1377,26 +1384,27 @@ void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) {
if (CodeGenOpts.KeepStaticConsts && D && isa<VarDecl>(D)) {
const auto *VD = cast<VarDecl>(D);
- if (VD->getType().isConstQualified() && VD->getStorageClass() == SC_Static)
+ if (VD->getType().isConstQualified() &&
+ VD->getStorageDuration() == SD_Static)
addUsedGlobal(GV);
}
}
-bool CodeGenModule::GetCPUAndFeaturesAttributes(const Decl *D,
+bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
llvm::AttrBuilder &Attrs) {
// Add target-cpu and target-features attributes to functions. If
// we have a decl for the function and it has a target attribute then
// parse that and add it to the feature set.
StringRef TargetCPU = getTarget().getTargetOpts().CPU;
std::vector<std::string> Features;
- const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
+ const auto *FD = dyn_cast_or_null<FunctionDecl>(GD.getDecl());
FD = FD ? FD->getMostRecentDecl() : FD;
const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr;
const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
bool AddedAttr = false;
if (TD || SD) {
llvm::StringMap<bool> FeatureMap;
- getFunctionFeatureMap(FeatureMap, FD);
+ getFunctionFeatureMap(FeatureMap, GD);
// Produce the canonical string for this set of features.
for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap)
@@ -1452,7 +1460,7 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD,
F->addFnAttr("implicit-section-name", SA->getName());
llvm::AttrBuilder Attrs;
- if (GetCPUAndFeaturesAttributes(D, Attrs)) {
+ if (GetCPUAndFeaturesAttributes(GD, Attrs)) {
// We know that GetCPUAndFeaturesAttributes will always have the
// newest set, since it has the newest possible FunctionDecl, so the
// new ones should replace the old.
@@ -1475,7 +1483,7 @@ void CodeGenModule::SetInternalFunctionAttributes(GlobalDecl GD,
llvm::Function *F,
const CGFunctionInfo &FI) {
const Decl *D = GD.getDecl();
- SetLLVMFunctionAttributes(D, FI, F);
+ SetLLVMFunctionAttributes(GD, FI, F);
SetLLVMFunctionAttributesForDefinition(D, F);
F->setLinkage(llvm::Function::InternalLinkage);
@@ -1537,7 +1545,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
const auto *FD = cast<FunctionDecl>(GD.getDecl());
if (!IsIncompleteFunction) {
- SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F);
+ SetLLVMFunctionAttributes(GD, getTypes().arrangeGlobalDeclaration(GD), F);
// Setup target-specific attributes.
if (F->isDeclaration())
getTargetCodeGenInfo().setTargetAttributes(FD, F, *this);
@@ -2015,7 +2023,7 @@ bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) {
if (CodeGenOpts.KeepStaticConsts) {
const auto *VD = dyn_cast<VarDecl>(Global);
if (VD && VD->getType().isConstQualified() &&
- VD->getStorageClass() == SC_Static)
+ VD->getStorageDuration() == SD_Static)
return true;
}
@@ -2413,6 +2421,19 @@ bool CodeGenModule::shouldOpportunisticallyEmitVTables() {
return CodeGenOpts.OptimizationLevel > 0;
}
+void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
+ llvm::GlobalValue *GV) {
+ const auto *FD = cast<FunctionDecl>(GD.getDecl());
+
+ if (FD->isCPUSpecificMultiVersion()) {
+ auto *Spec = FD->getAttr<CPUSpecificAttr>();
+ for (unsigned I = 0; I < Spec->cpus_size(); ++I)
+ EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
+ // Requires multiple emits.
+ } else
+ EmitGlobalFunctionDefinition(GD, GV);
+}
+
void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
const auto *D = cast<ValueDecl>(GD.getDecl());
@@ -2420,7 +2441,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
Context.getSourceManager(),
"Generating code for declaration");
- if (isa<FunctionDecl>(D)) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
// At -O0, don't generate IR for functions with available_externally
// linkage.
if (!shouldEmitFunction(GD))
@@ -2433,6 +2454,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
ABI->emitCXXStructor(CD, getFromCtorType(GD.getCtorType()));
else if (const auto *DD = dyn_cast<CXXDestructorDecl>(Method))
ABI->emitCXXStructor(DD, getFromDtorType(GD.getDtorType()));
+ else if (FD->isMultiVersion())
+ EmitMultiVersionFunctionDefinition(GD, GV);
else
EmitGlobalFunctionDefinition(GD, GV);
@@ -2442,6 +2465,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
return;
}
+ if (FD->isMultiVersion())
+ return EmitMultiVersionFunctionDefinition(GD, GV);
return EmitGlobalFunctionDefinition(GD, GV);
}
@@ -2499,13 +2524,19 @@ void CodeGenModule::emitMultiVersionFunctions() {
TA->getArchitecture(), Feats);
});
- llvm::Function *ResolverFunc = cast<llvm::Function>(
- GetGlobalValue((getMangledName(GD) + ".resolver").str()));
+ llvm::Function *ResolverFunc;
+ const TargetInfo &TI = getTarget();
+
+ if (TI.supportsIFunc() || FD->isTargetMultiVersion())
+ ResolverFunc = cast<llvm::Function>(
+ GetGlobalValue((getMangledName(GD) + ".resolver").str()));
+ else
+ ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD)));
+
if (supportsCOMDAT())
ResolverFunc->setComdat(
getModule().getOrInsertComdat(ResolverFunc->getName()));
- const TargetInfo &TI = getTarget();
std::stable_sort(
Options.begin(), Options.end(),
[&TI](const CodeGenFunction::MultiVersionResolverOption &LHS,
@@ -2522,26 +2553,58 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
assert(FD && "Not a FunctionDecl?");
const auto *DD = FD->getAttr<CPUDispatchAttr>();
assert(DD && "Not a cpu_dispatch Function?");
- llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType());
+ QualType CanonTy = Context.getCanonicalType(FD->getType());
+ llvm::Type *DeclTy = getTypes().ConvertFunctionType(CanonTy, FD);
+
+ if (const auto *CXXFD = dyn_cast<CXXMethodDecl>(FD)) {
+ const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD);
+ DeclTy = getTypes().GetFunctionType(FInfo);
+ }
StringRef ResolverName = getMangledName(GD);
- llvm::Type *ResolverType = llvm::FunctionType::get(
- llvm::PointerType::get(DeclTy,
- Context.getTargetAddressSpace(FD->getType())),
- false);
- auto *ResolverFunc = cast<llvm::Function>(
- GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
- /*ForVTable=*/false));
+
+ llvm::Type *ResolverType;
+ GlobalDecl ResolverGD;
+ if (getTarget().supportsIFunc())
+ ResolverType = llvm::FunctionType::get(
+ llvm::PointerType::get(DeclTy,
+ Context.getTargetAddressSpace(FD->getType())),
+ false);
+ else {
+ ResolverType = DeclTy;
+ ResolverGD = GD;
+ }
+
+ auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction(
+ ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false));
SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
const TargetInfo &Target = getTarget();
+ unsigned Index = 0;
for (const IdentifierInfo *II : DD->cpus()) {
// Get the name of the target function so we can look it up/create it.
std::string MangledName = getMangledNameImpl(*this, GD, FD, true) +
getCPUSpecificMangling(*this, II->getName());
- llvm::Constant *Func = GetOrCreateLLVMFunction(
- MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false,
- /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+
+ llvm::Constant *Func = GetGlobalValue(MangledName);
+
+ if (!Func) {
+ GlobalDecl ExistingDecl = Manglings.lookup(MangledName);
+ if (ExistingDecl.getDecl() &&
+ ExistingDecl.getDecl()->getAsFunction()->isDefined()) {
+ EmitGlobalFunctionDefinition(ExistingDecl, nullptr);
+ Func = GetGlobalValue(MangledName);
+ } else {
+ if (!ExistingDecl.getDecl())
+ ExistingDecl = GD.getWithMultiVersionIndex(Index);
+
+ Func = GetOrCreateLLVMFunction(
+ MangledName, DeclTy, ExistingDecl,
+ /*ForVTable=*/false, /*DontDefer=*/true,
+ /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+ }
+ }
+
llvm::SmallVector<StringRef, 32> Features;
Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features);
llvm::transform(Features, Features.begin(),
@@ -2551,29 +2614,53 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
return !Target.validateCpuSupports(Feat);
}), Features.end());
Options.emplace_back(cast<llvm::Function>(Func), StringRef{}, Features);
+ ++Index;
}
llvm::sort(
- Options.begin(), Options.end(),
- [](const CodeGenFunction::MultiVersionResolverOption &LHS,
- const CodeGenFunction::MultiVersionResolverOption &RHS) {
+ Options, [](const CodeGenFunction::MultiVersionResolverOption &LHS,
+ const CodeGenFunction::MultiVersionResolverOption &RHS) {
return CodeGenFunction::GetX86CpuSupportsMask(LHS.Conditions.Features) >
CodeGenFunction::GetX86CpuSupportsMask(RHS.Conditions.Features);
});
+
+ // If the list contains multiple 'default' versions, such as when it contains
+ // 'pentium' and 'generic', don't emit the call to the generic one (since we
+ // always run on at least a 'pentium'). We do this by deleting the 'least
+ // advanced' (read, lowest mangling letter).
+ while (Options.size() > 1 &&
+ CodeGenFunction::GetX86CpuSupportsMask(
+ (Options.end() - 2)->Conditions.Features) == 0) {
+ StringRef LHSName = (Options.end() - 2)->Function->getName();
+ StringRef RHSName = (Options.end() - 1)->Function->getName();
+ if (LHSName.compare(RHSName) < 0)
+ Options.erase(Options.end() - 2);
+ else
+ Options.erase(Options.end() - 1);
+ }
+
CodeGenFunction CGF(*this);
CGF.EmitMultiVersionResolver(ResolverFunc, Options);
}
-/// If an ifunc for the specified mangled name is not in the module, create and
-/// return an llvm IFunc Function with the specified type.
-llvm::Constant *
-CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy,
- const FunctionDecl *FD) {
+/// If a dispatcher for the specified mangled name is not in the module, create
+/// and return an llvm Function with the specified type.
+llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(
+ GlobalDecl GD, llvm::Type *DeclTy, const FunctionDecl *FD) {
std::string MangledName =
getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true);
- std::string IFuncName = MangledName + ".ifunc";
- if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName))
- return IFuncGV;
+
+ // Holds the name of the resolver, in ifunc mode this is the ifunc (which has
+ // a separate resolver).
+ std::string ResolverName = MangledName;
+ if (getTarget().supportsIFunc())
+ ResolverName += ".ifunc";
+ else if (FD->isTargetMultiVersion())
+ ResolverName += ".resolver";
+
+ // If this already exists, just return that one.
+ if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName))
+ return ResolverGV;
// Since this is the first time we've created this IFunc, make sure
// that we put this multiversioned function into the list to be
@@ -2581,20 +2668,28 @@ CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy,
if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion())
MultiVersionFuncs.push_back(GD);
- std::string ResolverName = MangledName + ".resolver";
- llvm::Type *ResolverType = llvm::FunctionType::get(
- llvm::PointerType::get(DeclTy,
- Context.getTargetAddressSpace(FD->getType())),
- false);
- llvm::Constant *Resolver =
- GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
- /*ForVTable=*/false);
- llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(
- DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule());
- GIF->setName(IFuncName);
- SetCommonAttributes(FD, GIF);
+ if (getTarget().supportsIFunc()) {
+ llvm::Type *ResolverType = llvm::FunctionType::get(
+ llvm::PointerType::get(
+ DeclTy, getContext().getTargetAddressSpace(FD->getType())),
+ false);
+ llvm::Constant *Resolver = GetOrCreateLLVMFunction(
+ MangledName + ".resolver", ResolverType, GlobalDecl{},
+ /*ForVTable=*/false);
+ llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(
+ DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule());
+ GIF->setName(ResolverName);
+ SetCommonAttributes(FD, GIF);
- return GIF;
+ return GIF;
+ }
+
+ llvm::Constant *Resolver = GetOrCreateLLVMFunction(
+ ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false);
+ assert(isa<llvm::GlobalValue>(Resolver) &&
+ "Resolver should be created for the first time");
+ SetCommonAttributes(FD, cast<llvm::GlobalValue>(Resolver));
+ return Resolver;
}
/// GetOrCreateLLVMFunction - If the specified mangled name is not in the
@@ -2634,7 +2729,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
if (TA && TA->isDefaultVersion())
UpdateMultiVersionNames(GD, FD);
if (!IsForDefinition)
- return GetOrCreateMultiVersionIFunc(GD, Ty, FD);
+ return GetOrCreateMultiVersionResolver(GD, Ty, FD);
}
}
@@ -3393,8 +3488,15 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
// CUDA E.2.4.1 "__shared__ variables cannot have an initialization
// as part of their declaration." Sema has already checked for
// error cases, so we just need to set Init to UndefValue.
- if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
- D->hasAttr<CUDASharedAttr>())
+ bool IsCUDASharedVar =
+ getLangOpts().CUDAIsDevice && D->hasAttr<CUDASharedAttr>();
+ // Shadows of initialized device-side global variables are also left
+ // undefined.
+ bool IsCUDAShadowVar =
+ !getLangOpts().CUDAIsDevice &&
+ (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() ||
+ D->hasAttr<CUDASharedAttr>());
+ if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar))
Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
else if (!InitExpr) {
// This is a tentative definition; tentative definitions are
@@ -3672,6 +3774,10 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
return llvm::GlobalVariable::WeakAnyLinkage;
}
+ if (const auto *FD = D->getAsFunction())
+ if (FD->isMultiVersion() && Linkage == GVA_AvailableExternally)
+ return llvm::GlobalVariable::LinkOnceAnyLinkage;
+
// We are guaranteed to have a strong definition somewhere else,
// so we can use available_externally linkage.
if (Linkage == GVA_AvailableExternally)
@@ -3908,15 +4014,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
AddGlobalDtor(Fn, DA->getPriority());
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, Fn);
-
- if (D->isCPUSpecificMultiVersion()) {
- auto *Spec = D->getAttr<CPUSpecificAttr>();
- // If there is another specific version we need to emit, do so here.
- if (Spec->ActiveArgIndex + 1 < Spec->cpus_size()) {
- ++Spec->ActiveArgIndex;
- EmitGlobalFunctionDefinition(GD, nullptr);
- }
- }
}
void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
@@ -4109,51 +4206,82 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
llvm::Constant *Zero = llvm::Constant::getNullValue(Int32Ty);
llvm::Constant *Zeros[] = { Zero, Zero };
-
+
+ const ASTContext &Context = getContext();
+ const llvm::Triple &Triple = getTriple();
+
+ const auto CFRuntime = getLangOpts().CFRuntime;
+ const bool IsSwiftABI =
+ static_cast<unsigned>(CFRuntime) >=
+ static_cast<unsigned>(LangOptions::CoreFoundationABI::Swift);
+ const bool IsSwift4_1 = CFRuntime == LangOptions::CoreFoundationABI::Swift4_1;
+
// If we don't already have it, get __CFConstantStringClassReference.
if (!CFConstantStringClassRef) {
+ const char *CFConstantStringClassName = "__CFConstantStringClassReference";
llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy);
Ty = llvm::ArrayType::get(Ty, 0);
- llvm::Constant *C =
- CreateRuntimeVariable(Ty, "__CFConstantStringClassReference");
-
- if (getTriple().isOSBinFormatELF() || getTriple().isOSBinFormatCOFF()) {
+
+ switch (CFRuntime) {
+ default: break;
+ case LangOptions::CoreFoundationABI::Swift: LLVM_FALLTHROUGH;
+ case LangOptions::CoreFoundationABI::Swift5_0:
+ CFConstantStringClassName =
+ Triple.isOSDarwin() ? "$s15SwiftFoundation19_NSCFConstantStringCN"
+ : "$s10Foundation19_NSCFConstantStringCN";
+ Ty = IntPtrTy;
+ break;
+ case LangOptions::CoreFoundationABI::Swift4_2:
+ CFConstantStringClassName =
+ Triple.isOSDarwin() ? "$S15SwiftFoundation19_NSCFConstantStringCN"
+ : "$S10Foundation19_NSCFConstantStringCN";
+ Ty = IntPtrTy;
+ break;
+ case LangOptions::CoreFoundationABI::Swift4_1:
+ CFConstantStringClassName =
+ Triple.isOSDarwin() ? "__T015SwiftFoundation19_NSCFConstantStringCN"
+ : "__T010Foundation19_NSCFConstantStringCN";
+ Ty = IntPtrTy;
+ break;
+ }
+
+ llvm::Constant *C = CreateRuntimeVariable(Ty, CFConstantStringClassName);
+
+ if (Triple.isOSBinFormatELF() || Triple.isOSBinFormatCOFF()) {
llvm::GlobalValue *GV = nullptr;
-
+
if ((GV = dyn_cast<llvm::GlobalValue>(C))) {
- IdentifierInfo &II = getContext().Idents.get(GV->getName());
- TranslationUnitDecl *TUDecl = getContext().getTranslationUnitDecl();
+ IdentifierInfo &II = Context.Idents.get(GV->getName());
+ TranslationUnitDecl *TUDecl = Context.getTranslationUnitDecl();
DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
const VarDecl *VD = nullptr;
for (const auto &Result : DC->lookup(&II))
if ((VD = dyn_cast<VarDecl>(Result)))
break;
-
- if (getTriple().isOSBinFormatELF()) {
+
+ if (Triple.isOSBinFormatELF()) {
if (!VD)
GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
- }
- else {
- if (!VD || !VD->hasAttr<DLLExportAttr>()) {
+ } else {
+ GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+ if (!VD || !VD->hasAttr<DLLExportAttr>())
GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
- GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
- } else {
+ else
GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
- GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
- }
}
-
+
setDSOLocal(GV);
}
}
-
+
// Decay array -> ptr
CFConstantStringClassRef =
- llvm::ConstantExpr::getGetElementPtr(Ty, C, Zeros);
+ IsSwiftABI ? llvm::ConstantExpr::getPtrToInt(C, Ty)
+ : llvm::ConstantExpr::getGetElementPtr(Ty, C, Zeros);
}
- QualType CFTy = getContext().getCFConstantStringType();
+ QualType CFTy = Context.getCFConstantStringType();
auto *STy = cast<llvm::StructType>(getTypes().ConvertType(CFTy));
@@ -4164,7 +4292,12 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
Fields.add(cast<llvm::ConstantExpr>(CFConstantStringClassRef));
// Flags.
- Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8);
+ if (IsSwiftABI) {
+ Fields.addInt(IntPtrTy, IsSwift4_1 ? 0x05 : 0x01);
+ Fields.addInt(Int64Ty, isUTF16 ? 0x07d0 : 0x07c8);
+ } else {
+ Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8);
+ }
// String pointer.
llvm::Constant *C = nullptr;
@@ -4185,22 +4318,21 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
// Don't enforce the target's minimum global alignment, since the only use
// of the string is via this class initializer.
- CharUnits Align = isUTF16
- ? getContext().getTypeAlignInChars(getContext().ShortTy)
- : getContext().getTypeAlignInChars(getContext().CharTy);
+ CharUnits Align = isUTF16 ? Context.getTypeAlignInChars(Context.ShortTy)
+ : Context.getTypeAlignInChars(Context.CharTy);
GV->setAlignment(Align.getQuantity());
// FIXME: We set the section explicitly to avoid a bug in ld64 224.1.
// Without it LLVM can merge the string with a non unnamed_addr one during
// LTO. Doing that changes the section it ends in, which surprises ld64.
- if (getTriple().isOSBinFormatMachO())
+ if (Triple.isOSBinFormatMachO())
GV->setSection(isUTF16 ? "__TEXT,__ustring"
: "__TEXT,__cstring,cstring_literals");
// Make sure the literal ends up in .rodata to allow for safe ICF and for
// the static linker to adjust permissions to read-only later on.
- else if (getTriple().isOSBinFormatELF())
+ else if (Triple.isOSBinFormatELF())
GV->setSection(".rodata");
-
+
// String.
llvm::Constant *Str =
llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros);
@@ -4211,8 +4343,17 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
Fields.add(Str);
// String length.
- auto Ty = getTypes().ConvertType(getContext().LongTy);
- Fields.addInt(cast<llvm::IntegerType>(Ty), StringLength);
+ llvm::IntegerType *LengthTy =
+ llvm::IntegerType::get(getModule().getContext(),
+ Context.getTargetInfo().getLongWidth());
+ if (IsSwiftABI) {
+ if (CFRuntime == LangOptions::CoreFoundationABI::Swift4_1 ||
+ CFRuntime == LangOptions::CoreFoundationABI::Swift4_2)
+ LengthTy = Int32Ty;
+ else
+ LengthTy = IntPtrTy;
+ }
+ Fields.addInt(LengthTy, StringLength);
CharUnits Alignment = getPointerAlign();
@@ -4220,7 +4361,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment,
/*isConstant=*/false,
llvm::GlobalVariable::PrivateLinkage);
- switch (getTriple().getObjectFormat()) {
+ switch (Triple.getObjectFormat()) {
case llvm::Triple::UnknownObjectFormat:
llvm_unreachable("unknown file format");
case llvm::Triple::COFF:
@@ -4713,6 +4854,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
case Decl::TypeAliasTemplate:
case Decl::Block:
case Decl::Empty:
+ case Decl::Binding:
break;
case Decl::Using: // using X; [C++]
if (CGDebugInfo *DI = getModuleDebugInfo())
@@ -4879,7 +5021,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
case Decl::OMPDeclareReduction:
EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D));
break;
-
+
case Decl::OMPRequires:
EmitOMPRequiresDecl(cast<OMPRequiresDecl>(D));
break;
@@ -5078,6 +5220,16 @@ void CodeGenModule::EmitVersionIdentMetadata() {
IdentMetadata->addOperand(llvm::MDNode::get(Ctx, IdentNode));
}
+void CodeGenModule::EmitCommandLineMetadata() {
+ llvm::NamedMDNode *CommandLineMetadata =
+ TheModule.getOrInsertNamedMetadata("llvm.commandline");
+ std::string CommandLine = getCodeGenOpts().RecordCommandLine;
+ llvm::LLVMContext &Ctx = TheModule.getContext();
+
+ llvm::Metadata *CommandLineNode[] = {llvm::MDString::get(Ctx, CommandLine)};
+ CommandLineMetadata->addOperand(llvm::MDNode::get(Ctx, CommandLineNode));
+}
+
void CodeGenModule::EmitTargetMetadata() {
// Warning, new MangledDeclNames may be appended within this loop.
// We rely on MapVector insertions adding new elements to the end
@@ -5293,8 +5445,9 @@ TargetAttr::ParsedTargetAttr CodeGenModule::filterFunctionTargetAttrs(const Targ
// Fills in the supplied string map with the set of target features for the
// passed in function.
void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
- const FunctionDecl *FD) {
+ GlobalDecl GD) {
StringRef TargetCPU = Target.getTargetOpts().CPU;
+ const FunctionDecl *FD = GD.getDecl()->getAsFunction();
if (const auto *TD = FD->getAttr<TargetAttr>()) {
TargetAttr::ParsedTargetAttr ParsedAttr = filterFunctionTargetAttrs(TD);
@@ -5316,8 +5469,8 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
ParsedAttr.Features);
} else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) {
llvm::SmallVector<StringRef, 32> FeaturesTmp;
- Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(),
- FeaturesTmp);
+ Target.getCPUSpecificCPUDispatchFeatures(
+ SD->getCPUName(GD.getMultiVersionIndex())->getName(), FeaturesTmp);
std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features);
} else {
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index baf3619ca8..0f6c3bec9e 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -119,7 +119,13 @@ struct OrderGlobalInits {
struct ObjCEntrypoints {
ObjCEntrypoints() { memset(this, 0, sizeof(*this)); }
- /// void objc_autoreleasePoolPop(void*);
+ /// void objc_alloc(id);
+ llvm::Constant *objc_alloc;
+
+ /// void objc_allocWithZone(id);
+ llvm::Constant *objc_allocWithZone;
+
+ /// void objc_autoreleasePoolPop(void*);
llvm::Constant *objc_autoreleasePoolPop;
/// void *objc_autoreleasePoolPush(void);
@@ -1043,8 +1049,7 @@ public:
const CGFunctionInfo &FI);
/// Set the LLVM function attributes (sext, zext, etc).
- void SetLLVMFunctionAttributes(const Decl *D,
- const CGFunctionInfo &Info,
+ void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info,
llvm::Function *F);
/// Set the LLVM function attributes which only apply to a function
@@ -1104,8 +1109,7 @@ public:
// Fills in the supplied string map with the set of target features for the
// passed in function.
- void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
- const FunctionDecl *FD);
+ void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, GlobalDecl GD);
StringRef getMangledName(GlobalDecl GD);
StringRef getBlockMangledName(GlobalDecl GD, const BlockDecl *BD);
@@ -1293,9 +1297,9 @@ private:
llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
ForDefinition_t IsForDefinition = NotForDefinition);
- llvm::Constant *GetOrCreateMultiVersionIFunc(GlobalDecl GD,
- llvm::Type *DeclTy,
- const FunctionDecl *FD);
+ llvm::Constant *GetOrCreateMultiVersionResolver(GlobalDecl GD,
+ llvm::Type *DeclTy,
+ const FunctionDecl *FD);
void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD);
llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
@@ -1304,7 +1308,7 @@ private:
ForDefinition_t IsForDefinition
= NotForDefinition);
- bool GetCPUAndFeaturesAttributes(const Decl *D,
+ bool GetCPUAndFeaturesAttributes(GlobalDecl GD,
llvm::AttrBuilder &AttrBuilder);
void setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO);
@@ -1315,6 +1319,8 @@ private:
void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr);
void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
+ void EmitMultiVersionFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
+
void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false);
void EmitAliasDefinition(GlobalDecl GD);
void emitIFuncDefinition(GlobalDecl GD);
@@ -1402,6 +1408,9 @@ private:
/// Emit the Clang version as llvm.ident metadata.
void EmitVersionIdentMetadata();
+ /// Emit the Clang commandline as llvm.commandline metadata.
+ void EmitCommandLineMetadata();
+
/// Emits target specific Metadata for global declarations.
void EmitTargetMetadata();
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
index 0759e65388..120ab651a4 100644
--- a/lib/CodeGen/CodeGenPGO.h
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -17,7 +17,6 @@
#include "CGBuilder.h"
#include "CodeGenModule.h"
#include "CodeGenTypes.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include <array>
#include <memory>
diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp
index ec48231e52..27d39716d2 100644
--- a/lib/CodeGen/CodeGenTBAA.cpp
+++ b/lib/CodeGen/CodeGenTBAA.cpp
@@ -20,7 +20,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/Mangle.h"
#include "clang/AST/RecordLayout.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 1a1395e6ae..2acf1ac161 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -503,6 +503,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case BuiltinType::Id:
#include "clang/Basic/OpenCLImageTypes.def"
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ case BuiltinType::Id:
+#include "clang/Basic/OpenCLExtensionTypes.def"
case BuiltinType::OCLSampler:
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index 626869f000..8e344e91b8 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -17,7 +17,6 @@
#include "CGCall.h"
#include "clang/Basic/ABI.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Sema/Sema.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Module.h"
diff --git a/lib/CodeGen/ConstantEmitter.h b/lib/CodeGen/ConstantEmitter.h
index b4d1b65743..7ad8e5d37c 100644
--- a/lib/CodeGen/ConstantEmitter.h
+++ b/lib/CodeGen/ConstantEmitter.h
@@ -38,6 +38,9 @@ private:
/// Whether the constant-emission failed.
bool Failed = false;
+ /// Whether we're in a constant context.
+ bool InConstantContext = false;
+
/// The AST address space where this (non-abstract) initializer is going.
/// Used for generating appropriate placeholders.
LangAS DestAddressSpace;
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index e75f2c8091..35962c73d9 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -552,6 +552,15 @@ struct CounterCoverageMappingBuilder
completeDeferred(Count, DeferredEndLoc);
}
+ size_t locationDepth(SourceLocation Loc) {
+ size_t Depth = 0;
+ while (Loc.isValid()) {
+ Loc = getIncludeOrExpansionLoc(Loc);
+ Depth++;
+ }
+ return Depth;
+ }
+
/// Pop regions from the stack into the function's list of regions.
///
/// Adds all regions from \c ParentIndex to the top of the stack to the
@@ -566,19 +575,41 @@ struct CounterCoverageMappingBuilder
SourceLocation EndLoc = Region.hasEndLoc()
? Region.getEndLoc()
: RegionStack[ParentIndex].getEndLoc();
+ size_t StartDepth = locationDepth(StartLoc);
+ size_t EndDepth = locationDepth(EndLoc);
while (!SM.isWrittenInSameFile(StartLoc, EndLoc)) {
- // The region ends in a nested file or macro expansion. Create a
- // separate region for each expansion.
- SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc);
- assert(SM.isWrittenInSameFile(NestedLoc, EndLoc));
-
- if (!isRegionAlreadyAdded(NestedLoc, EndLoc))
- SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc);
-
- EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc));
- if (EndLoc.isInvalid())
- llvm::report_fatal_error("File exit not handled before popRegions");
+ bool UnnestStart = StartDepth >= EndDepth;
+ bool UnnestEnd = EndDepth >= StartDepth;
+ if (UnnestEnd) {
+ // The region ends in a nested file or macro expansion. Create a
+ // separate region for each expansion.
+ SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc);
+ assert(SM.isWrittenInSameFile(NestedLoc, EndLoc));
+
+ if (!isRegionAlreadyAdded(NestedLoc, EndLoc))
+ SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc);
+
+ EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc));
+ if (EndLoc.isInvalid())
+ llvm::report_fatal_error("File exit not handled before popRegions");
+ EndDepth--;
+ }
+ if (UnnestStart) {
+ // The region begins in a nested file or macro expansion. Create a
+ // separate region for each expansion.
+ SourceLocation NestedLoc = getEndOfFileOrMacro(StartLoc);
+ assert(SM.isWrittenInSameFile(StartLoc, NestedLoc));
+
+ if (!isRegionAlreadyAdded(StartLoc, NestedLoc))
+ SourceRegions.emplace_back(Region.getCounter(), StartLoc, NestedLoc);
+
+ StartLoc = getIncludeOrExpansionLoc(StartLoc);
+ if (StartLoc.isInvalid())
+ llvm::report_fatal_error("File exit not handled before popRegions");
+ StartDepth--;
+ }
}
+ Region.setStartLoc(StartLoc);
Region.setEndLoc(EndLoc);
MostRecentLocation = EndLoc;
@@ -625,12 +656,15 @@ struct CounterCoverageMappingBuilder
return RegionStack.back();
}
- /// Propagate counts through the children of \c S.
- Counter propagateCounts(Counter TopCount, const Stmt *S) {
+ /// Propagate counts through the children of \p S if \p VisitChildren is true.
+ /// Otherwise, only emit a count for \p S itself.
+ Counter propagateCounts(Counter TopCount, const Stmt *S,
+ bool VisitChildren = true) {
SourceLocation StartLoc = getStart(S);
SourceLocation EndLoc = getEnd(S);
size_t Index = pushRegion(TopCount, StartLoc, EndLoc);
- Visit(S);
+ if (VisitChildren)
+ Visit(S);
Counter ExitCount = getRegion().getCounter();
popRegions(Index);
@@ -843,7 +877,16 @@ struct CounterCoverageMappingBuilder
if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body))))
return;
- propagateCounts(getRegionCounter(Body), Body);
+ // Do not visit the artificial children nodes of defaulted methods. The
+ // lexer may not be able to report back precise token end locations for
+ // these children nodes (llvm.org/PR39822), and moreover users will not be
+ // able to see coverage for them.
+ bool Defaulted = false;
+ if (auto *Method = dyn_cast<CXXMethodDecl>(D))
+ Defaulted = Method->isDefaulted();
+
+ propagateCounts(getRegionCounter(Body), Body,
+ /*VisitChildren=*/!Defaulted);
assert(RegionStack.empty() && "Regions entered but never exited");
// Discard the last uncompleted deferred region in a decl, if one exists.
diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h
index b08ad896d7..c62db09695 100644
--- a/lib/CodeGen/CoverageMappingGen.h
+++ b/lib/CodeGen/CoverageMappingGen.h
@@ -16,7 +16,6 @@
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Lex/PPCallbacks.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/GlobalValue.h"
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index a7e3c8d58b..b53304528c 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -287,6 +287,7 @@ public:
void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override;
+ bool canSpeculativelyEmitVTableAsBaseClass(const CXXRecordDecl *RD) const;
void setThunkLinkage(llvm::Function *Thunk, bool ForVTable, GlobalDecl GD,
bool ReturnAdjustment) override {
@@ -1562,9 +1563,8 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
Type != Dtor_Base && DD->isVirtual())
Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent());
else
- Callee =
- CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
- DD);
+ Callee = CGCallee::forDirect(
+ CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), GD);
CGF.EmitCXXMemberOrOperatorCall(DD, Callee, ReturnValueSlot(),
This.getPointer(), VTT, VTTTy,
@@ -1750,7 +1750,7 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
VFunc = VFuncLoad;
}
- CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc);
+ CGCallee Callee(GD, VFunc);
return Callee;
}
@@ -1778,7 +1778,8 @@ void ItaniumCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
VTables.EmitVTTDefinition(VTT, CGM.getVTableLinkage(RD), RD);
}
-bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
+bool ItaniumCXXABI::canSpeculativelyEmitVTableAsBaseClass(
+ const CXXRecordDecl *RD) const {
// We don't emit available_externally vtables if we are in -fapple-kext mode
// because kext mode does not permit devirtualization.
if (CGM.getLangOpts().AppleKext)
@@ -1796,7 +1797,43 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
// to emit an available_externally copy of vtable.
// FIXME we can still emit a copy of the vtable if we
// can emit definition of the inline functions.
- return !hasAnyUnusedVirtualInlineFunction(RD);
+ if (hasAnyUnusedVirtualInlineFunction(RD))
+ return false;
+
+ // For a class with virtual bases, we must also be able to speculatively
+ // emit the VTT, because CodeGen doesn't have separate notions of "can emit
+ // the vtable" and "can emit the VTT". For a base subobject, this means we
+ // need to be able to emit non-virtual base vtables.
+ if (RD->getNumVBases()) {
+ for (const auto &B : RD->bases()) {
+ auto *BRD = B.getType()->getAsCXXRecordDecl();
+ assert(BRD && "no class for base specifier");
+ if (B.isVirtual() || !BRD->isDynamicClass())
+ continue;
+ if (!canSpeculativelyEmitVTableAsBaseClass(BRD))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
+ if (!canSpeculativelyEmitVTableAsBaseClass(RD))
+ return false;
+
+ // For a complete-object vtable (or more specifically, for the VTT), we need
+ // to be able to speculatively emit the vtables of all dynamic virtual bases.
+ for (const auto &B : RD->vbases()) {
+ auto *BRD = B.getType()->getAsCXXRecordDecl();
+ assert(BRD && "no class for base specifier");
+ if (!BRD->isDynamicClass())
+ continue;
+ if (!canSpeculativelyEmitVTableAsBaseClass(BRD))
+ return false;
+ }
+
+ return true;
}
static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
Address InitialPtr,
@@ -1916,7 +1953,7 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
// Handle the array cookie specially in ASan.
if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) && AS == 0 &&
(expr->getOperatorNew()->isReplaceableGlobalAllocationFunction() ||
- CGM.getCodeGenOpts().SanitizeAddressPoisonClassMemberArrayNewCookie)) {
+ CGM.getCodeGenOpts().SanitizeAddressPoisonCustomArrayCookie)) {
// The store to the CookiePtr does not need to be instrumented.
CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI);
llvm::FunctionType *FTy =
@@ -2315,11 +2352,13 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() {
FTy, GlobalInitFnName, getTypes().arrangeNullaryFunction(),
SourceLocation());
ASTContext &Ctx = getContext();
+ QualType ReturnTy = Ctx.VoidTy;
+ QualType FunctionTy = Ctx.getFunctionType(ReturnTy, llvm::None, {});
FunctionDecl *FD = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
- &Ctx.Idents.get(GlobalInitFnName), Ctx.VoidTy, nullptr, SC_Static,
+ &Ctx.Idents.get(GlobalInitFnName), FunctionTy, nullptr, SC_Static,
false, false);
- CGF.StartFunction(GlobalDecl(FD), getContext().VoidTy, GlobalInitFn,
+ CGF.StartFunction(GlobalDecl(FD), ReturnTy, GlobalInitFn,
getTypes().arrangeNullaryFunction(), FunctionArgList(),
SourceLocation(), SourceLocation());
@@ -2418,7 +2457,7 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM),
WrapperName.str(), &CGM.getModule());
- CGM.SetLLVMFunctionAttributes(nullptr, FI, Wrapper);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Wrapper);
if (VD->hasDefinition())
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
@@ -2472,8 +2511,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
CharUnits GuardAlign = CharUnits::One();
Guard->setAlignment(GuardAlign.getQuantity());
- CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, OrderedInits,
- Address(Guard, GuardAlign));
+ CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(
+ InitFunc, OrderedInits, ConstantAddress(Guard, GuardAlign));
// On Darwin platforms, use CXX_FAST_TLS calling convention.
if (CGM.getTarget().getTriple().isOSDarwin()) {
InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
@@ -2525,7 +2564,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
llvm::GlobalVariable::ExternalWeakLinkage,
InitFnName.str(), &CGM.getModule());
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
- CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init));
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI,
+ cast<llvm::Function>(Init));
}
if (Init) {
@@ -2812,6 +2852,9 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case BuiltinType::Id:
#include "clang/Basic/OpenCLImageTypes.def"
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ case BuiltinType::Id:
+#include "clang/Basic/OpenCLExtensionTypes.def"
case BuiltinType::OCLSampler:
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
@@ -3088,7 +3131,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
}
assert(isa<ObjCInterfaceType>(Ty));
- // Fall through.
+ LLVM_FALLTHROUGH;
case Type::ObjCInterface:
if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) {
@@ -4025,7 +4068,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
switch (CatchType.getQualifiers().getObjCLifetime()) {
case Qualifiers::OCL_Strong:
CastExn = CGF.EmitARCRetainNonBlock(CastExn);
- // fallthrough
+ LLVM_FALLTHROUGH;
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp
index 48dea7d54b..013ca15e23 100644
--- a/lib/CodeGen/MacroPPCallbacks.cpp
+++ b/lib/CodeGen/MacroPPCallbacks.cpp
@@ -14,7 +14,8 @@
#include "MacroPPCallbacks.h"
#include "CGDebugInfo.h"
#include "clang/CodeGen/ModuleBuilder.h"
-#include "clang/Parse/Parser.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
using namespace clang;
@@ -88,16 +89,6 @@ SourceLocation MacroPPCallbacks::getCorrectLocation(SourceLocation Loc) {
return SourceLocation();
}
-static bool isBuiltinFile(SourceManager &SM, SourceLocation Loc) {
- StringRef Filename(SM.getPresumedLoc(Loc).getFilename());
- return Filename.equals("<built-in>");
-}
-
-static bool isCommandLineFile(SourceManager &SM, SourceLocation Loc) {
- StringRef Filename(SM.getPresumedLoc(Loc).getFilename());
- return Filename.equals("<command line>");
-}
-
void MacroPPCallbacks::updateStatusToNextScope() {
switch (Status) {
case NoScope:
@@ -127,7 +118,7 @@ void MacroPPCallbacks::FileEntered(SourceLocation Loc) {
updateStatusToNextScope();
return;
case BuiltinScope:
- if (isCommandLineFile(PP.getSourceManager(), Loc))
+ if (PP.getSourceManager().isWrittenInCommandLineFile(Loc))
return;
updateStatusToNextScope();
LLVM_FALLTHROUGH;
@@ -147,7 +138,7 @@ void MacroPPCallbacks::FileExited(SourceLocation Loc) {
default:
llvm_unreachable("Do not expect to exit a file from current scope");
case BuiltinScope:
- if (!isBuiltinFile(PP.getSourceManager(), Loc))
+ if (!PP.getSourceManager().isWrittenInBuiltinFile(Loc))
// Skip next scope and change status to MainFileScope.
Status = MainFileScope;
return;
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 3b894fcdc1..0ad19ad5ab 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -1552,9 +1552,9 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
if (Type == Dtor_Complete && DD->getParent()->getNumVBases() == 0)
Type = Dtor_Base;
- CGCallee Callee = CGCallee::forDirect(
- CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
- DD);
+ CGCallee Callee =
+ CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
+ GlobalDecl(DD, Type));
if (DD->isVirtual()) {
assert(Type != CXXDtorType::Dtor_Deleting &&
@@ -1872,7 +1872,7 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
VFunc = Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
}
- CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc);
+ CGCallee Callee(GD, VFunc);
return Callee;
}
@@ -3956,7 +3956,8 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
// Call the destructor with our arguments.
llvm::Constant *CalleePtr =
CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
- CGCallee Callee = CGCallee::forDirect(CalleePtr, CD);
+ CGCallee Callee =
+ CGCallee::forDirect(CalleePtr, GlobalDecl(CD, Ctor_Complete));
const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall(
Args, CD, Ctor_Complete, ExtraArgs.Prefix, ExtraArgs.Suffix);
CGF.EmitCall(CalleeInfo, Callee, ReturnValueSlot(), Args);
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index 511cf75d6a..1264893ec1 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -17,9 +17,9 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/Expr.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index c164cec5d9..6f00c836f9 100644
--- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -14,14 +14,13 @@
#include "clang/AST/DeclObjC.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/BackendUtil.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
-#include "clang/Serialization/ASTWriter.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitcode/BitstreamReader.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -156,6 +155,8 @@ public:
LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule;
CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo);
CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning());
+ CodeGenOpts.DebugPrefixMap =
+ CI.getInvocation().getCodeGenOpts().DebugPrefixMap;
}
~PCHContainerGenerator() override = default;
diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp
index b411a501ea..75a0fa5ce1 100644
--- a/lib/CodeGen/SwiftCallingConv.cpp
+++ b/lib/CodeGen/SwiftCallingConv.cpp
@@ -415,6 +415,40 @@ static bool areBytesInSameUnit(CharUnits first, CharUnits second,
== getOffsetAtStartOfUnit(second, chunkSize);
}
+static bool isMergeableEntryType(llvm::Type *type) {
+ // Opaquely-typed memory is always mergeable.
+ if (type == nullptr) return true;
+
+ // Pointers and integers are always mergeable. In theory we should not
+ // merge pointers, but (1) it doesn't currently matter in practice because
+ // the chunk size is never greater than the size of a pointer and (2)
+ // Swift IRGen uses integer types for a lot of things that are "really"
+ // just storing pointers (like Optional<SomePointer>). If we ever have a
+ // target that would otherwise combine pointers, we should put some effort
+ // into fixing those cases in Swift IRGen and then call out pointer types
+ // here.
+
+ // Floating-point and vector types should never be merged.
+ // Most such types are too large and highly-aligned to ever trigger merging
+ // in practice, but it's important for the rule to cover at least 'half'
+ // and 'float', as well as things like small vectors of 'i1' or 'i8'.
+ return (!type->isFloatingPointTy() && !type->isVectorTy());
+}
+
+bool SwiftAggLowering::shouldMergeEntries(const StorageEntry &first,
+ const StorageEntry &second,
+ CharUnits chunkSize) {
+ // Only merge entries that overlap the same chunk. We test this first
+ // despite being a bit more expensive because this is the condition that
+ // tends to prevent merging.
+ if (!areBytesInSameUnit(first.End - CharUnits::One(), second.Begin,
+ chunkSize))
+ return false;
+
+ return (isMergeableEntryType(first.Type) &&
+ isMergeableEntryType(second.Type));
+}
+
void SwiftAggLowering::finish() {
if (Entries.empty()) {
Finished = true;
@@ -425,12 +459,12 @@ void SwiftAggLowering::finish() {
// which is generally the size of a pointer.
const CharUnits chunkSize = getMaximumVoluntaryIntegerSize(CGM);
- // First pass: if two entries share a chunk, make them both opaque
+ // First pass: if two entries should be merged, make them both opaque
// and stretch one to meet the next.
+ // Also, remember if there are any opaque entries.
bool hasOpaqueEntries = (Entries[0].Type == nullptr);
for (size_t i = 1, e = Entries.size(); i != e; ++i) {
- if (areBytesInSameUnit(Entries[i - 1].End - CharUnits::One(),
- Entries[i].Begin, chunkSize)) {
+ if (shouldMergeEntries(Entries[i - 1], Entries[i], chunkSize)) {
Entries[i - 1].Type = nullptr;
Entries[i].Type = nullptr;
Entries[i - 1].End = Entries[i].Begin;
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index ada42fd2ae..ae080f5bbd 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -19,9 +19,9 @@
#include "CGValue.h"
#include "CodeGenFunction.h"
#include "clang/AST/RecordLayout.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/SwiftCallingConv.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
@@ -2337,7 +2337,7 @@ static std::string qualifyWindowsLibrary(llvm::StringRef Lib) {
bool Quote = (Lib.find(" ") != StringRef::npos);
std::string ArgStr = Quote ? "\"" : "";
ArgStr += Lib;
- if (!Lib.endswith_lower(".lib"))
+ if (!Lib.endswith_lower(".lib") && !Lib.endswith_lower(".a"))
ArgStr += ".lib";
ArgStr += Quote ? "\"" : "";
return ArgStr;
@@ -3944,18 +3944,39 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width));
}
- // Bool type is always extended to the ABI, other builtin types are not
- // extended.
- const BuiltinType *BT = Ty->getAs<BuiltinType>();
- if (BT && BT->getKind() == BuiltinType::Bool)
- return ABIArgInfo::getExtend(Ty);
+ if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+ switch (BT->getKind()) {
+ case BuiltinType::Bool:
+ // Bool type is always extended to the ABI, other builtin types are not
+ // extended.
+ return ABIArgInfo::getExtend(Ty);
- // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It
- // passes them indirectly through memory.
- if (IsMingw64 && BT && BT->getKind() == BuiltinType::LongDouble) {
- const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
- if (LDF == &llvm::APFloat::x87DoubleExtended())
- return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+ case BuiltinType::LongDouble:
+ // Mingw64 GCC uses the old 80 bit extended precision floating point
+ // unit. It passes them indirectly through memory.
+ if (IsMingw64) {
+ const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
+ if (LDF == &llvm::APFloat::x87DoubleExtended())
+ return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+ }
+ break;
+
+ case BuiltinType::Int128:
+ case BuiltinType::UInt128:
+ // If it's a parameter type, the normal ABI rule is that arguments larger
+ // than 8 bytes are passed indirectly. GCC follows it. We follow it too,
+ // even though it isn't particularly efficient.
+ if (!IsReturnType)
+ return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+
+ // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that.
+ // Clang matches them for compatibility.
+ return ABIArgInfo::getDirect(
+ llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2));
+
+ default:
+ break;
+ }
}
return ABIArgInfo::getDirect();
@@ -4978,13 +4999,21 @@ public:
llvm::Function *Fn = cast<llvm::Function>(GV);
auto Kind = CGM.getCodeGenOpts().getSignReturnAddress();
- if (Kind == CodeGenOptions::SignReturnAddressScope::None)
- return;
+ if (Kind != CodeGenOptions::SignReturnAddressScope::None) {
+ Fn->addFnAttr("sign-return-address",
+ Kind == CodeGenOptions::SignReturnAddressScope::All
+ ? "all"
+ : "non-leaf");
- Fn->addFnAttr("sign-return-address",
- Kind == CodeGenOptions::SignReturnAddressScope::All
- ? "all"
- : "non-leaf");
+ auto Key = CGM.getCodeGenOpts().getSignReturnAddressKey();
+ Fn->addFnAttr("sign-return-address-key",
+ Key == CodeGenOptions::SignReturnAddressKeyValue::AKey
+ ? "a_key"
+ : "b_key");
+ }
+
+ if (CGM.getCodeGenOpts().BranchTargetEnforcement)
+ Fn->addFnAttr("branch-target-enforcement");
}
};
@@ -4993,6 +5022,9 @@ public:
WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K)
: AArch64TargetCodeGenInfo(CGT, K) {}
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &CGM) const override;
+
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
@@ -5003,6 +5035,14 @@ public:
Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
}
};
+
+void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+ AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+ if (GV->isDeclaration())
+ return;
+ addStackProbeTargetAttributes(D, GV, CGM);
+}
}
ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
@@ -8209,6 +8249,137 @@ SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
return false;
}
+// ARC ABI implementation.
+namespace {
+
+class ARCABIInfo : public DefaultABIInfo {
+public:
+ using DefaultABIInfo::DefaultABIInfo;
+
+private:
+ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const override;
+
+ void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const {
+ if (!State.FreeRegs)
+ return;
+ if (Info.isIndirect() && Info.getInReg())
+ State.FreeRegs--;
+ else if (Info.isDirect() && Info.getInReg()) {
+ unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32;
+ if (sz < State.FreeRegs)
+ State.FreeRegs -= sz;
+ else
+ State.FreeRegs = 0;
+ }
+ }
+
+ void computeInfo(CGFunctionInfo &FI) const override {
+ CCState State(FI.getCallingConvention());
+ // ARC uses 8 registers to pass arguments.
+ State.FreeRegs = 8;
+
+ if (!getCXXABI().classifyReturnType(FI))
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ updateState(FI.getReturnInfo(), FI.getReturnType(), State);
+ for (auto &I : FI.arguments()) {
+ I.info = classifyArgumentType(I.type, State.FreeRegs);
+ updateState(I.info, I.type, State);
+ }
+ }
+
+ ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const;
+ ABIArgInfo getIndirectByValue(QualType Ty) const;
+ ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const;
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+};
+
+class ARCTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+ ARCTargetCodeGenInfo(CodeGenTypes &CGT)
+ : TargetCodeGenInfo(new ARCABIInfo(CGT)) {}
+};
+
+
+ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const {
+ return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) :
+ getNaturalAlignIndirect(Ty, false);
+}
+
+ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const {
+ // Compute the byval alignment.
+ const unsigned MinABIStackAlignInBytes = 4;
+ unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
+ return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true,
+ TypeAlign > MinABIStackAlignInBytes);
+}
+
+Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const {
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
+ getContext().getTypeInfoInChars(Ty),
+ CharUnits::fromQuantity(4), true);
+}
+
+ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty,
+ uint8_t FreeRegs) const {
+ // Handle the generic C++ ABI.
+ const RecordType *RT = Ty->getAs<RecordType>();
+ if (RT) {
+ CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
+ if (RAA == CGCXXABI::RAA_Indirect)
+ return getIndirectByRef(Ty, FreeRegs > 0);
+
+ if (RAA == CGCXXABI::RAA_DirectInMemory)
+ return getIndirectByValue(Ty);
+ }
+
+ // Treat an enum type as its underlying type.
+ if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+ Ty = EnumTy->getDecl()->getIntegerType();
+
+ auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32;
+
+ if (isAggregateTypeForABI(Ty)) {
+ // Structures with flexible arrays are always indirect.
+ if (RT && RT->getDecl()->hasFlexibleArrayMember())
+ return getIndirectByValue(Ty);
+
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), Ty, true))
+ return ABIArgInfo::getIgnore();
+
+ llvm::LLVMContext &LLVMContext = getVMContext();
+
+ llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
+ SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
+ llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
+
+ return FreeRegs >= SizeInRegs ?
+ ABIArgInfo::getDirectInReg(Result) :
+ ABIArgInfo::getDirect(Result, 0, nullptr, false);
+ }
+
+ return Ty->isPromotableIntegerType() ?
+ (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) :
+ ABIArgInfo::getExtend(Ty)) :
+ (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() :
+ ABIArgInfo::getDirect());
+}
+
+ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const {
+ if (RetTy->isAnyComplexType())
+ return ABIArgInfo::getDirectInReg();
+
+ // Arguments of size > 4 registers are indirect.
+ auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32;
+ if (RetSize > 4)
+ return getIndirectByRef(RetTy, /*HasFreeRegs*/ true);
+
+ return DefaultABIInfo::classifyReturnType(RetTy);
+}
+
+} // End anonymous namespace.
//===----------------------------------------------------------------------===//
// XCore ABI Implementation
@@ -9230,6 +9401,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return SetCGInfo(new SparcV9TargetCodeGenInfo(Types));
case llvm::Triple::xcore:
return SetCGInfo(new XCoreTargetCodeGenInfo(Types));
+ case llvm::Triple::arc:
+ return SetCGInfo(new ARCTargetCodeGenInfo(Types));
case llvm::Triple::spir:
case llvm::Triple::spir64:
return SetCGInfo(new SPIRTargetCodeGenInfo(Types));
diff --git a/lib/CodeGen/VarBypassDetector.cpp b/lib/CodeGen/VarBypassDetector.cpp
index 2f8a591a3e..859cdd4282 100644
--- a/lib/CodeGen/VarBypassDetector.cpp
+++ b/lib/CodeGen/VarBypassDetector.cpp
@@ -78,7 +78,7 @@ bool VarBypassDetector::BuildScopeInformation(const Stmt *S,
return false;
++StmtsToSkip;
}
- // Fall through
+ LLVM_FALLTHROUGH;
case Stmt::GotoStmtClass:
FromScopes.push_back({S, ParentScope});