diff options
Diffstat (limited to 'lib/CodeGen')
59 files changed, 4740 insertions, 1860 deletions
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index 45be10f005..52fc08de9b 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/BackendUtil.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/TargetOptions.h" -#include "clang/Frontend/CodeGenOptions.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearchOptions.h" @@ -37,6 +37,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" @@ -235,11 +236,12 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address); bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope; + bool UseOdrIndicator = CGOpts.SanitizeAddressUseOdrIndicator; bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts); PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover, UseAfterScope)); PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover, - UseGlobalsGC)); + UseGlobalsGC, UseOdrIndicator)); } static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -247,7 +249,8 @@ static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, PM.add(createAddressSanitizerFunctionPass( /*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false)); PM.add(createAddressSanitizerModulePass( - /*CompileKernel*/ true, /*Recover*/ true)); + /*CompileKernel*/ true, /*Recover*/ true, /*UseGlobalsGC*/ true, + /*UseOdrIndicator*/ false)); } static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -428,7 +431,7 @@ static void initTargetOptions(llvm::TargetOptions &Options, switch (LangOpts.getDefaultFPContractMode()) { case LangOptions::FPC_Off: // Preserve any contraction performed by the front-end. (Strict performs - // splitting of the muladd instrinsic in the backend.) + // splitting of the muladd intrinsic in the backend.) Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; break; case LangOptions::FPC_On: @@ -468,7 +471,7 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection; Options.EmitAddrsig = CodeGenOpts.Addrsig; - if (CodeGenOpts.EnableSplitDwarf) + if (CodeGenOpts.getSplitDwarfMode() != CodeGenOptions::NoFission) Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; @@ -503,6 +506,8 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) { Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum; Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData; + Options.Filter = CodeGenOpts.ProfileFilterFiles; + Options.Exclude = CodeGenOpts.ProfileExcludeFiles; Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody; return Options; } @@ -832,7 +837,8 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, break; default: - if (!CodeGenOpts.SplitDwarfFile.empty()) { + if (!CodeGenOpts.SplitDwarfFile.empty() && + (CodeGenOpts.getSplitDwarfMode() == CodeGenOptions::SplitFileFission)) { DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile); if (!DwoOS) return; @@ -930,18 +936,21 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() ? DefaultProfileGenName : CodeGenOpts.InstrProfileOutput, - "", "", true, CodeGenOpts.DebugInfoForProfiling); + "", "", "", true, + CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.hasProfileIRUse()) // -fprofile-use. - PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", false, + PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", + CodeGenOpts.ProfileRemappingFile, false, CodeGenOpts.DebugInfoForProfiling); else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use - PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, false, + PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, + CodeGenOpts.ProfileRemappingFile, false, CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.DebugInfoForProfiling) // -fdebug-info-for-profiling - PGOOpt = PGOOptions("", "", "", false, true); + PGOOpt = PGOOptions("", "", "", "", false, true); PassBuilder PB(TM.get(), PGOOpt); @@ -1130,6 +1139,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, const LangOptions &LOpts, std::unique_ptr<raw_pwrite_stream> OS, std::string SampleProfile, + std::string ProfileRemapping, BackendAction Action) { StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>> ModuleToDefinedGVSummaries; @@ -1147,15 +1157,14 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, continue; auto GUID = GlobalList.first; - assert(GlobalList.second.SummaryList.size() == 1 && - "Expected individual combined index to have one summary per GUID"); - auto &Summary = GlobalList.second.SummaryList[0]; - // Skip the summaries for the importing module. These are included to - // e.g. record required linkage changes. - if (Summary->modulePath() == M->getModuleIdentifier()) - continue; - // Add an entry to provoke importing by thinBackend. - ImportList[Summary->modulePath()].insert(GUID); + for (auto &Summary : GlobalList.second.SummaryList) { + // Skip the summaries for the importing module. These are included to + // e.g. record required linkage changes. + if (Summary->modulePath() == M->getModuleIdentifier()) + continue; + // Add an entry to provoke importing by thinBackend. + ImportList[Summary->modulePath()].insert(GUID); + } } std::vector<std::unique_ptr<llvm::MemoryBuffer>> OwnedImports; @@ -1202,6 +1211,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, Conf.CGOptLevel = getCGOptLevel(CGOpts); initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); + Conf.ProfileRemapping = std::move(ProfileRemapping); Conf.UseNewPM = CGOpts.ExperimentalNewPassManager; Conf.DebugPassManager = CGOpts.DebugPassManager; Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness; @@ -1268,7 +1278,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, if (!CombinedIndex->skipModuleByDistributedBackend()) { runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts, LOpts, std::move(OS), CGOpts.SampleProfileFile, - Action); + CGOpts.ProfileRemappingFile, Action); return; } // Distributed indexing detected that nothing from the module is needed diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index 9379c835d3..24056a449d 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -18,7 +18,7 @@ #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Sema/SemaDiagnostic.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index dae148cbd1..6631bfb0df 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -175,7 +175,7 @@ static std::string getBlockDescriptorName(const CGBlockInfo &BlockInfo, /// unsigned long reserved; /// unsigned long size; // size of Block_literal metadata in bytes. /// void *copy_func_helper_decl; // optional copy helper. -/// void *destroy_func_decl; // optioanl destructor helper. +/// void *destroy_func_decl; // optional destructor helper. /// void *block_method_encoding_address; // @encode for block literal signature. /// void *block_layout_info; // encoding of captured block variables. /// }; @@ -197,7 +197,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, std::string descName; // If an equivalent block descriptor global variable exists, return it. - if (C.getLangOpts().ObjC1 && + if (C.getLangOpts().ObjC && CGM.getLangOpts().getGC() == LangOptions::NonGC) { descName = getBlockDescriptorName(blockInfo, CGM); if (llvm::GlobalValue *desc = CGM.getModule().getNamedValue(descName)) @@ -243,7 +243,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer(), i8p)); // GC layout. - if (C.getLangOpts().ObjC1) { + if (C.getLangOpts().ObjC) { if (CGM.getLangOpts().getGC() != LangOptions::NonGC) elements.add(CGM.getObjCRuntime().BuildGCBlockLayout(CGM, blockInfo)); else @@ -446,12 +446,25 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, assert(elementTypes.empty()); if (CGM.getLangOpts().OpenCL) { - // The header is basically 'struct { int; int; + // The header is basically 'struct { int; int; generic void *; // custom_fields; }'. Assert that struct is packed. + auto GenericAS = + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic); + auto GenPtrAlign = + CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8); + auto GenPtrSize = + CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8); + assert(CGM.getIntSize() <= GenPtrSize); + assert(CGM.getIntAlign() <= GenPtrAlign); + assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign)); elementTypes.push_back(CGM.IntTy); /* total size */ elementTypes.push_back(CGM.IntTy); /* align */ - unsigned Offset = 2 * CGM.getIntSize().getQuantity(); - unsigned BlockAlign = CGM.getIntAlign().getQuantity(); + elementTypes.push_back( + CGM.getOpenCLRuntime() + .getGenericVoidPointerType()); /* invoke function */ + unsigned Offset = + 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity(); + unsigned BlockAlign = GenPtrAlign.getQuantity(); if (auto *Helper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ { @@ -493,7 +506,11 @@ static QualType getCaptureFieldType(const CodeGenFunction &CGF, return CGF.BlockInfo->getCapture(VD).fieldType(); if (auto *FD = CGF.LambdaCaptureFields.lookup(VD)) return FD->getType(); - return VD->getType(); + // If the captured variable is a non-escaping __block variable, the field + // type is the reference type. If the variable is a __block variable that + // already has a reference type, the field type is the variable's type. + return VD->isNonEscapingByref() ? + CGF.getContext().getLValueReferenceType(VD->getType()) : VD->getType(); } /// Compute the layout of the given block. Attempts to lay the block @@ -516,7 +533,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, info.CanBeGlobal = true; return; } - else if (C.getLangOpts().ObjC1 && + else if (C.getLangOpts().ObjC && CGM.getLangOpts().getGC() == LangOptions::NonGC) info.HasCapturedVariableLayout = true; @@ -549,7 +566,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, for (const auto &CI : block->captures()) { const VarDecl *variable = CI.getVariable(); - if (CI.isByRef()) { + if (CI.isEscapingByref()) { // We have to copy/dispose of the __block reference. info.NeedsCopyDispose = true; @@ -842,10 +859,12 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { /// Enter a full-expression with a non-trivial number of objects to /// clean up. This is in this file because, at the moment, the only /// kind of cleanup object is a BlockDecl*. -void CodeGenFunction::enterNonTrivialFullExpression(const ExprWithCleanups *E) { - assert(E->getNumObjects() != 0); - for (const ExprWithCleanups::CleanupObject &C : E->getObjects()) - enterBlockScope(*this, C); +void CodeGenFunction::enterNonTrivialFullExpression(const FullExpr *E) { + if (const auto EWC = dyn_cast<ExprWithCleanups>(E)) { + assert(EWC->getNumObjects() != 0); + for (const ExprWithCleanups::CleanupObject &C : EWC->getObjects()) + enterBlockScope(*this, C); + } } /// Find the layout for the given block in a linked list and remove it. @@ -902,12 +921,20 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; + auto GenVoidPtrTy = + IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; + LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default; + auto GenVoidPtrSize = CharUnits::fromQuantity( + CGM.getTarget().getPointerWidth( + CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) / + 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); CodeGenFunction BlockCGF{CGM, true}; BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); + auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) @@ -983,12 +1010,11 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()), getIntSize(), "block.align"); } - if (!IsOpenCL) { - addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy), - getPointerSize(), "block.invoke"); + addHeaderField(blockFn, GenVoidPtrSize, "block.invoke"); + if (!IsOpenCL) addHeaderField(descriptor, getPointerSize(), "block.descriptor"); - } else if (auto *Helper = - CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + else if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) { addHeaderField( I.first, @@ -1032,7 +1058,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // The lambda capture in a lambda's conversion-to-block-pointer is // special; we'll simply emit it directly. src = Address::invalid(); - } else if (CI.isByRef()) { + } else if (CI.isEscapingByref()) { if (BlockInfo && CI.isNested()) { // We need to use the capture from the enclosing block. const CGBlockInfo::Capture &enclosingCapture = @@ -1060,7 +1086,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // the block field. There's no need to chase the forwarding // pointer at this point, since we're building something that will // live a shorter life than the stack byref anyway. - if (CI.isByRef()) { + if (CI.isEscapingByref()) { // Get a void* that points to the byref struct. llvm::Value *byrefPointer; if (CI.isNested()) @@ -1192,23 +1218,38 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() { } llvm::Type *CodeGenModule::getGenericBlockLiteralType() { - assert(!getLangOpts().OpenCL && "OpenCL does not need this"); - if (GenericBlockLiteralType) return GenericBlockLiteralType; llvm::Type *BlockDescPtrTy = getBlockDescriptorType(); - // struct __block_literal_generic { - // void *__isa; - // int __flags; - // int __reserved; - // void (*__invoke)(void *); - // struct __block_descriptor *__descriptor; - // }; - GenericBlockLiteralType = - llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, - IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); + if (getLangOpts().OpenCL) { + // struct __opencl_block_literal_generic { + // int __size; + // int __align; + // __generic void *__invoke; + // /* custom fields */ + // }; + SmallVector<llvm::Type *, 8> StructFields( + {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()}); + if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldTypes()) + StructFields.push_back(I); + } + GenericBlockLiteralType = llvm::StructType::create( + StructFields, "struct.__opencl_block_literal_generic"); + } else { + // struct __block_literal_generic { + // void *__isa; + // int __flags; + // int __reserved; + // void (*__invoke)(void *); + // struct __block_descriptor *__descriptor; + // }; + GenericBlockLiteralType = + llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, + IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); + } return GenericBlockLiteralType; } @@ -1219,21 +1260,27 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, E->getCallee()->getType()->getAs<BlockPointerType>(); llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); - llvm::Value *FuncPtr = nullptr; - if (!CGM.getLangOpts().OpenCL) { - // Get a pointer to the generic block literal. - llvm::Type *BlockLiteralTy = - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0); + // Get a pointer to the generic block literal. + // For OpenCL we generate generic AS void ptr to be able to reuse the same + // block definition for blocks with captures generated as private AS local + // variables and without captures generated as global AS program scope + // variables. + unsigned AddrSpace = 0; + if (getLangOpts().OpenCL) + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); - // Bitcast the callee to a block literal. - BlockPtr = - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); + llvm::Type *BlockLiteralTy = + llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); - // Get the function pointer from the literal. - FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3); - } + // Bitcast the callee to a block literal. + BlockPtr = + Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); + + // Get the function pointer from the literal. + llvm::Value *FuncPtr = + Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, + CGM.getLangOpts().OpenCL ? 2 : 3); // Add the block literal. CallArgList Args; @@ -1256,11 +1303,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); // Load the function. - llvm::Value *Func; - if (CGM.getLangOpts().OpenCL) - Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); - else - Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); const FunctionType *FuncTy = FnType->castAs<FunctionType>(); const CGFunctionInfo &FnInfo = @@ -1279,8 +1322,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, return EmitCall(FnInfo, Callee, ReturnValue, Args); } -Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable, - bool isByRef) { +Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) { assert(BlockInfo && "evaluating block ref without block information?"); const CGBlockInfo::Capture &capture = BlockInfo->getCapture(variable); @@ -1291,7 +1333,7 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable, Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), capture.getOffset(), "block.capture.addr"); - if (isByRef) { + if (variable->isEscapingByref()) { // addr should be a void** right now. Load, then cast the result // to byref*. @@ -1305,6 +1347,10 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable, variable->getName()); } + assert((!variable->isNonEscapingByref() || + capture.fieldType()->isReferenceType()) && + "the capture field of a non-escaping variable should have a " + "reference type"); if (capture.fieldType()->isReferenceType()) addr = EmitLoadOfReference(MakeAddrLValue(addr, capture.fieldType())); @@ -1373,14 +1419,14 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, // Reserved fields.addInt(CGM.IntTy, 0); - - // Function - fields.add(blockFn); } else { fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity()); fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity()); } + // Function + fields.add(blockFn); + if (!IsOpenCL) { // Descriptor fields.add(buildBlockDescriptor(CGM, blockInfo)); @@ -1656,7 +1702,7 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags()); } BlockFieldFlags Flags; - if (CI.isByRef()) { + if (CI.isEscapingByref()) { Flags = BLOCK_FIELD_IS_BYREF; if (T.isObjCGCWeak()) Flags |= BLOCK_FIELD_IS_WEAK; @@ -1773,8 +1819,6 @@ static std::string getBlockCaptureStr(const BlockCaptureManagedEntity &E, CodeGenModule &CGM) { std::string Str; ASTContext &Ctx = CGM.getContext(); - std::unique_ptr<ItaniumMangleContext> MC( - ItaniumMangleContext::create(Ctx, Ctx.getDiagnostics())); const BlockDecl::Capture &CI = *E.CI; QualType CaptureTy = CI.getVariable()->getType(); @@ -1800,7 +1844,7 @@ static std::string getBlockCaptureStr(const BlockCaptureManagedEntity &E, Str += "c"; SmallString<256> TyStr; llvm::raw_svector_ostream Out(TyStr); - MC->mangleTypeName(CaptureTy, Out); + CGM.getCXXABI().getMangleContext().mangleTypeName(CaptureTy, Out); Str += llvm::to_string(TyStr.size()) + TyStr.c_str(); break; } @@ -1939,7 +1983,7 @@ static void setBlockHelperAttributesVisibility(bool CapturesNonExternalType, } else { Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); Fn->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); } } @@ -1964,16 +2008,16 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { ASTContext &C = getContext(); + QualType ReturnTy = C.VoidTy; + FunctionArgList args; - ImplicitParamDecl DstDecl(getContext(), C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamDecl DstDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&DstDecl); - ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); // FIXME: it would be nice if these were mergeable with things with // identical semantics. @@ -1983,20 +2027,20 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage, FuncName, &CGM.getModule()); - IdentifierInfo *II - = &CGM.getContext().Idents.get(FuncName); + IdentifierInfo *II = &C.Idents.get(FuncName); + + SmallVector<QualType, 2> ArgTys; + ArgTys.push_back(C.VoidPtrTy); + ArgTys.push_back(C.VoidPtrTy); + QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {}); - FunctionDecl *FD = FunctionDecl::Create(C, - C.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, C.VoidTy, - nullptr, SC_Static, - false, - false); + FunctionDecl *FD = FunctionDecl::Create( + C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, + FunctionTy, nullptr, SC_Static, false, false); setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI, CGM); - StartFunction(FD, C.VoidTy, Fn, FI, args); + StartFunction(FD, ReturnTy, Fn, FI, args); ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()}; llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); @@ -2102,7 +2146,7 @@ getBlockFieldFlagsForObjCObjectPointer(const BlockDecl::Capture &CI, static std::pair<BlockCaptureEntityKind, BlockFieldFlags> computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, const LangOptions &LangOpts) { - if (CI.isByRef()) { + if (CI.isEscapingByref()) { BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF; if (T.isObjCGCWeak()) Flags |= BLOCK_FIELD_IS_WEAK; @@ -2157,13 +2201,14 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { ASTContext &C = getContext(); + QualType ReturnTy = C.VoidTy; + FunctionArgList args; - ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); // FIXME: We'd like to put these into a mergable by content, with // internal linkage. @@ -2173,18 +2218,19 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage, FuncName, &CGM.getModule()); - IdentifierInfo *II - = &CGM.getContext().Idents.get(FuncName); + IdentifierInfo *II = &C.Idents.get(FuncName); - FunctionDecl *FD = FunctionDecl::Create(C, C.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, C.VoidTy, - nullptr, SC_Static, - false, false); + SmallVector<QualType, 1> ArgTys; + ArgTys.push_back(C.VoidPtrTy); + QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {}); + + FunctionDecl *FD = FunctionDecl::Create( + C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, + FunctionTy, nullptr, SC_Static, false, false); setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI, CGM); - StartFunction(FD, C.VoidTy, Fn, FI, args); + StartFunction(FD, ReturnTy, Fn, FI, args); markAsIgnoreThreadCheckingAtRuntime(Fn); ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()}; @@ -2403,19 +2449,17 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, BlockByrefHelpers &generator) { ASTContext &Context = CGF.getContext(); - QualType R = Context.VoidTy; + QualType ReturnTy = Context.VoidTy; FunctionArgList args; - ImplicitParamDecl Dst(CGF.getContext(), Context.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamDecl Dst(Context, Context.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&Dst); - ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamDecl Src(Context, Context.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&Src); const CGFunctionInfo &FI = - CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); + CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI); @@ -2428,16 +2472,18 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, IdentifierInfo *II = &Context.Idents.get("__Block_byref_object_copy_"); - FunctionDecl *FD = FunctionDecl::Create(Context, - Context.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, R, nullptr, - SC_Static, - false, false); + SmallVector<QualType, 2> ArgTys; + ArgTys.push_back(Context.VoidPtrTy); + ArgTys.push_back(Context.VoidPtrTy); + QualType FunctionTy = Context.getFunctionType(ReturnTy, ArgTys, {}); + + FunctionDecl *FD = FunctionDecl::Create( + Context, Context.getTranslationUnitDecl(), SourceLocation(), + SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false); CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); - CGF.StartFunction(FD, R, Fn, FI, args); + CGF.StartFunction(FD, ReturnTy, Fn, FI, args); if (generator.needsCopy()) { llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0); @@ -2502,12 +2548,13 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, IdentifierInfo *II = &Context.Idents.get("__Block_byref_object_dispose_"); - FunctionDecl *FD = FunctionDecl::Create(Context, - Context.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, R, nullptr, - SC_Static, - false, false); + SmallVector<QualType, 1> ArgTys; + ArgTys.push_back(Context.VoidPtrTy); + QualType FunctionTy = Context.getFunctionType(R, ArgTys, {}); + + FunctionDecl *FD = FunctionDecl::Create( + Context, Context.getTranslationUnitDecl(), SourceLocation(), + SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false); CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); @@ -2564,6 +2611,9 @@ BlockByrefHelpers * CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType, const AutoVarEmission &emission) { const VarDecl &var = *emission.Variable; + assert(var.isEscapingByref() && + "only escaping __block variables need byref helpers"); + QualType type = var.getType(); auto &byrefInfo = getBlockByrefInfo(&var); diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index df71dbb4b4..eea9207a34 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -21,10 +21,11 @@ #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" -#include "clang/Analysis/Analyses/OSLog.h" +#include "clang/AST/OSLog.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -93,11 +94,11 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, return V; } -/// Utility to insert an atomic instruction based on Instrinsic::ID +/// Utility to insert an atomic instruction based on Intrinsic::ID /// and the expression node. -static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, - llvm::AtomicRMWInst::BinOp Kind, - const CallExpr *E) { +static Value *MakeBinaryAtomicValue( + CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { QualType T = E->getType(); assert(E->getArg(0)->getType()->isPointerType()); assert(CGF.getContext().hasSameUnqualifiedType(T, @@ -119,7 +120,7 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, Args[1] = EmitToInt(CGF, Args[1], T, IntType); llvm::Value *Result = CGF.Builder.CreateAtomicRMW( - Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); + Kind, Args[0], Args[1], Ordering); return EmitFromInt(CGF, Result, T, ValueType); } @@ -151,7 +152,7 @@ static RValue EmitBinaryAtomic(CodeGenFunction &CGF, return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); } -/// Utility to insert an atomic instruction based Instrinsic::ID and +/// Utility to insert an atomic instruction based Intrinsic::ID and /// the expression node, where the return value is the result of the /// operation. static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, @@ -200,6 +201,9 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, /// cmpxchg result or the old value. /// /// @returns result of cmpxchg, according to ReturnBool +/// +/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics +/// invoke the function EmitAtomicCmpXchgForMSIntrin. static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool) { QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); @@ -230,6 +234,72 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, ValueType); } +/// This function should be invoked to emit atomic cmpxchg for Microsoft's +/// _InterlockedCompareExchange* intrinsics which have the following signature: +/// T _InterlockedCompareExchange(T volatile *Destination, +/// T Exchange, +/// T Comparand); +/// +/// Whereas the llvm 'cmpxchg' instruction has the following syntax: +/// cmpxchg *Destination, Comparand, Exchange. +/// So we need to swap Comparand and Exchange when invoking +/// CreateAtomicCmpXchg. That is the reason we could not use the above utility +/// function MakeAtomicCmpXchgValue since it expects the arguments to be +/// already swapped. + +static +Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) { + assert(E->getArg(0)->getType()->isPointerType()); + assert(CGF.getContext().hasSameUnqualifiedType( + E->getType(), E->getArg(0)->getType()->getPointeeType())); + assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), + E->getArg(1)->getType())); + assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), + E->getArg(2)->getType())); + + auto *Destination = CGF.EmitScalarExpr(E->getArg(0)); + auto *Comparand = CGF.EmitScalarExpr(E->getArg(2)); + auto *Exchange = CGF.EmitScalarExpr(E->getArg(1)); + + // For Release ordering, the failure ordering should be Monotonic. + auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ? + AtomicOrdering::Monotonic : + SuccessOrdering; + + auto *Result = CGF.Builder.CreateAtomicCmpXchg( + Destination, Comparand, Exchange, + SuccessOrdering, FailureOrdering); + Result->setVolatile(true); + return CGF.Builder.CreateExtractValue(Result, 0); +} + +static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { + assert(E->getArg(0)->getType()->isPointerType()); + + auto *IntTy = CGF.ConvertType(E->getType()); + auto *Result = CGF.Builder.CreateAtomicRMW( + AtomicRMWInst::Add, + CGF.EmitScalarExpr(E->getArg(0)), + ConstantInt::get(IntTy, 1), + Ordering); + return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1)); +} + +static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { + assert(E->getArg(0)->getType()->isPointerType()); + + auto *IntTy = CGF.ConvertType(E->getType()); + auto *Result = CGF.Builder.CreateAtomicRMW( + AtomicRMWInst::Sub, + CGF.EmitScalarExpr(E->getArg(0)), + ConstantInt::get(IntTy, 1), + Ordering); + return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); +} + // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, @@ -316,7 +386,7 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue) { - CGCallee callee = CGCallee::forDirect(calleeValue, FD); + CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); } @@ -485,7 +555,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, } namespace { -/// A struct to generically desribe a bit test intrinsic. +/// A struct to generically describe a bit test intrinsic. struct BitTest { enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set }; enum InterlockingKind : uint8_t { @@ -711,8 +781,11 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, } else { Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex"; Arg1Ty = CGF.Int8PtrTy; - Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), - llvm::ConstantInt::get(CGF.Int32Ty, 0)); + if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { + Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry)); + } else + Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), + llvm::ConstantInt::get(CGF.Int32Ty, 0)); } // Mark the call site and declaration with ReturnsTwice. @@ -745,6 +818,30 @@ enum class CodeGenFunction::MSVCIntrin { _InterlockedIncrement, _InterlockedOr, _InterlockedXor, + _InterlockedExchangeAdd_acq, + _InterlockedExchangeAdd_rel, + _InterlockedExchangeAdd_nf, + _InterlockedExchange_acq, + _InterlockedExchange_rel, + _InterlockedExchange_nf, + _InterlockedCompareExchange_acq, + _InterlockedCompareExchange_rel, + _InterlockedCompareExchange_nf, + _InterlockedOr_acq, + _InterlockedOr_rel, + _InterlockedOr_nf, + _InterlockedXor_acq, + _InterlockedXor_rel, + _InterlockedXor_nf, + _InterlockedAnd_acq, + _InterlockedAnd_rel, + _InterlockedAnd_nf, + _InterlockedIncrement_acq, + _InterlockedIncrement_rel, + _InterlockedIncrement_nf, + _InterlockedDecrement_acq, + _InterlockedDecrement_rel, + _InterlockedDecrement_nf, __fastfail, }; @@ -811,25 +908,74 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); case MSVCIntrin::_InterlockedXor: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); - - case MSVCIntrin::_InterlockedDecrement: { - llvm::Type *IntTy = ConvertType(E->getType()); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Sub, - EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - llvm::AtomicOrdering::SequentiallyConsistent); - return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); - } - case MSVCIntrin::_InterlockedIncrement: { - llvm::Type *IntTy = ConvertType(E->getType()); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Add, - EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - llvm::AtomicOrdering::SequentiallyConsistent); - return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); - } + case MSVCIntrin::_InterlockedExchangeAdd_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedExchangeAdd_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedExchangeAdd_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedExchange_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedExchange_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedExchange_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedCompareExchange_acq: + return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedCompareExchange_rel: + return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedCompareExchange_nf: + return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedOr_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedOr_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedOr_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedXor_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedXor_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedXor_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedAnd_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedAnd_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedAnd_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedIncrement_acq: + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedIncrement_rel: + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedIncrement_nf: + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedDecrement_acq: + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedDecrement_rel: + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedDecrement_nf: + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic); + + case MSVCIntrin::_InterlockedDecrement: + return EmitAtomicDecrementValue(*this, E); + case MSVCIntrin::_InterlockedIncrement: + return EmitAtomicIncrementValue(*this, E); case MSVCIntrin::__fastfail: { // Request immediate process termination from the kernel. The instruction @@ -923,35 +1069,42 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( if (llvm::Function *F = CGM.getModule().getFunction(Name)) return F; + llvm::SmallVector<QualType, 4> ArgTys; llvm::SmallVector<ImplicitParamDecl, 4> Params; Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy, ImplicitParamDecl::Other); + ArgTys.emplace_back(Ctx.VoidPtrTy); for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { char Size = Layout.Items[I].getSizeByte(); if (!Size) continue; + QualType ArgTy = getOSLogArgType(Ctx, Size); Params.emplace_back( Ctx, nullptr, SourceLocation(), - &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), - getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other); + &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy, + ImplicitParamDecl::Other); + ArgTys.emplace_back(ArgTy); } FunctionArgList Args; for (auto &P : Params) Args.push_back(&P); + QualType ReturnTy = Ctx.VoidTy; + QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {}); + // The helper function has linkonce_odr linkage to enable the linker to merge // identical functions. To ensure the merging always happens, 'noinline' is // attached to the function when compiling with -Oz. const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args); llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *Fn = llvm::Function::Create( FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); - CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); // Attach 'noinline' at -Oz. @@ -962,9 +1115,9 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( IdentifierInfo *II = &Ctx.Idents.get(Name); FunctionDecl *FD = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, - Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); + FuncionTy, nullptr, SC_PrivateExtern, false, false); - StartFunction(FD, Ctx.VoidTy, Fn, FI, Args); + StartFunction(FD, ReturnTy, Fn, FI, Args); // Create a scope with an artificial location for the body of this function. auto AL = ApplyDebugLocation::CreateArtificial(*this); @@ -1024,7 +1177,12 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { llvm::Value *ArgVal; - if (const Expr *TheExpr = Item.getExpr()) { + if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) { + uint64_t Val = 0; + for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I) + Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8; + ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val)); + } else if (const Expr *TheExpr = Item.getExpr()) { ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); // Check if this is a retainable type. @@ -1252,6 +1410,42 @@ static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, return Res; } +static bool +TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, + llvm::SmallPtrSetImpl<const Decl *> &Seen) { + if (const auto *Arr = Ctx.getAsArrayType(Ty)) + Ty = Ctx.getBaseElementType(Arr); + + const auto *Record = Ty->getAsCXXRecordDecl(); + if (!Record) + return false; + + // We've already checked this type, or are in the process of checking it. + if (!Seen.insert(Record).second) + return false; + + assert(Record->hasDefinition() && + "Incomplete types should already be diagnosed"); + + if (Record->isDynamicClass()) + return true; + + for (FieldDecl *F : Record->fields()) { + if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen)) + return true; + } + return false; +} + +/// Determine if the specified type requires laundering by checking if it is a +/// dynamic class type or contains a subobject which is a dynamic class type. +static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) { + if (!CGM.getCodeGenOpts().StrictVTablePointers) + return false; + llvm::SmallPtrSet<const Decl *, 16> Seen; + return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen); +} + RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { llvm::Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1)); @@ -1267,9 +1461,10 @@ RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); } -RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, - unsigned BuiltinID, const CallExpr *E, +RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, + const CallExpr *E, ReturnValueSlot ReturnValue) { + const FunctionDecl *FD = GD.getDecl()->getAsFunction(); // See if we can constant fold this builtin. If so, don't emit it at all. Expr::EvalResult Result; if (E->EvaluateAsRValue(Result, CGM.getContext()) && @@ -1644,6 +1839,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, "cast"); return RValue::get(Result); } + case Builtin::BI__lzcnt16: + case Builtin::BI__lzcnt: + case Builtin::BI__lzcnt64: { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + + llvm::Type *ArgType = ArgValue->getType(); + Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return RValue::get(Result); + } case Builtin::BI__popcnt16: case Builtin::BI__popcnt: case Builtin::BI__popcnt64: @@ -1662,46 +1872,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, "cast"); return RValue::get(Result); } - case Builtin::BI_rotr8: - case Builtin::BI_rotr16: - case Builtin::BI_rotr: - case Builtin::BI_lrotr: - case Builtin::BI_rotr64: { - Value *Val = EmitScalarExpr(E->getArg(0)); - Value *Shift = EmitScalarExpr(E->getArg(1)); - - llvm::Type *ArgType = Val->getType(); - Shift = Builder.CreateIntCast(Shift, ArgType, false); - unsigned ArgWidth = ArgType->getIntegerBitWidth(); - Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); - - Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask); - Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt); - Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask); - Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt); - Value *Result = Builder.CreateOr(LeftShifted, RightShifted); - return RValue::get(Result); - } - case Builtin::BI_rotl8: - case Builtin::BI_rotl16: - case Builtin::BI_rotl: - case Builtin::BI_lrotl: - case Builtin::BI_rotl64: { - Value *Val = EmitScalarExpr(E->getArg(0)); - Value *Shift = EmitScalarExpr(E->getArg(1)); - - llvm::Type *ArgType = Val->getType(); - Shift = Builder.CreateIntCast(Shift, ArgType, false); - unsigned ArgWidth = ArgType->getIntegerBitWidth(); - Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); - - Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask); - Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt); - Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask); - Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt); - Value *Result = Builder.CreateOr(LeftShifted, RightShifted); - return RValue::get(Result); - } case Builtin::BI__builtin_unpredictable: { // Always return the argument of __builtin_unpredictable. LLVM does not // handle this builtin. Metadata for this builtin should be added directly @@ -1760,14 +1930,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_rotateleft16: case Builtin::BI__builtin_rotateleft32: case Builtin::BI__builtin_rotateleft64: + case Builtin::BI_rotl8: // Microsoft variants of rotate left + case Builtin::BI_rotl16: + case Builtin::BI_rotl: + case Builtin::BI_lrotl: + case Builtin::BI_rotl64: return emitRotate(E, false); case Builtin::BI__builtin_rotateright8: case Builtin::BI__builtin_rotateright16: case Builtin::BI__builtin_rotateright32: case Builtin::BI__builtin_rotateright64: + case Builtin::BI_rotr8: // Microsoft variants of rotate right + case Builtin::BI_rotr16: + case Builtin::BI_rotr: + case Builtin::BI_lrotr: + case Builtin::BI_rotr64: return emitRotate(E, true); + case Builtin::BI__builtin_constant_p: { + llvm::Type *ResultType = ConvertType(E->getType()); + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + // At -O0, we don't perform inlining, so we don't need to delay the + // processing. + return RValue::get(ConstantInt::get(ResultType, 0)); + + const Expr *Arg = E->getArg(0); + QualType ArgType = Arg->getType(); + if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType()) + // We can only reason about scalar types. + return RValue::get(ConstantInt::get(ResultType, 0)); + + Value *ArgValue = EmitScalarExpr(Arg); + Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); + Value *Result = Builder.CreateCall(F, ArgValue); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); + return RValue::get(Result); + } case Builtin::BI__builtin_object_size: { unsigned Type = E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); @@ -2032,10 +2232,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin___memcpy_chk: { // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. - llvm::APSInt Size, DstSize; - if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || - !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) + Expr::EvalResult SizeResult, DstSizeResult; + if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || + !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; + llvm::APSInt Size = SizeResult.Val.getInt(); + llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -2056,10 +2258,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin___memmove_chk: { // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. - llvm::APSInt Size, DstSize; - if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || - !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) + Expr::EvalResult SizeResult, DstSizeResult; + if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || + !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; + llvm::APSInt Size = SizeResult.Val.getInt(); + llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -2094,10 +2298,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, } case Builtin::BI__builtin___memset_chk: { // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. - llvm::APSInt Size, DstSize; - if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || - !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) + Expr::EvalResult SizeResult, DstSizeResult; + if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || + !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; + llvm::APSInt Size = SizeResult.Val.getInt(); + llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -2305,6 +2511,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(nullptr); } + case Builtin::BI__builtin_launder: { + const Expr *Arg = E->getArg(0); + QualType ArgTy = Arg->getType()->getPointeeType(); + Value *Ptr = EmitScalarExpr(Arg); + if (TypeRequiresBuiltinLaunder(CGM, ArgTy)) + Ptr = Builder.CreateLaunderInvariantGroup(Ptr); + + return RValue::get(Ptr); + } case Builtin::BI__sync_fetch_and_add: case Builtin::BI__sync_fetch_and_sub: case Builtin::BI__sync_fetch_and_or: @@ -2999,7 +3214,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedExchangePointer: return RValue::get( EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); - case Builtin::BI_InterlockedCompareExchangePointer: { + case Builtin::BI_InterlockedCompareExchangePointer: + case Builtin::BI_InterlockedCompareExchangePointer_nf: { llvm::Type *RTy; llvm::IntegerType *IntType = IntegerType::get(getLLVMContext(), @@ -3016,10 +3232,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *Comparand = Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); - auto Result = - Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, - AtomicOrdering::SequentiallyConsistent, - AtomicOrdering::SequentiallyConsistent); + auto Ordering = + BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ? + AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent; + + auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + Ordering, Ordering); Result->setVolatile(true); return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, @@ -3029,16 +3247,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedCompareExchange8: case Builtin::BI_InterlockedCompareExchange16: case Builtin::BI_InterlockedCompareExchange: - case Builtin::BI_InterlockedCompareExchange64: { - AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( - EmitScalarExpr(E->getArg(0)), - EmitScalarExpr(E->getArg(2)), - EmitScalarExpr(E->getArg(1)), - AtomicOrdering::SequentiallyConsistent, - AtomicOrdering::SequentiallyConsistent); - CXI->setVolatile(true); - return RValue::get(Builder.CreateExtractValue(CXI, 0)); - } + case Builtin::BI_InterlockedCompareExchange64: + return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E)); case Builtin::BI_InterlockedIncrement16: case Builtin::BI_InterlockedIncrement: return RValue::get( @@ -3457,7 +3667,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); // Convert to generic address space. EventList = Builder.CreatePointerCast(EventList, EventPtrTy); - ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); + ClkEvent = ClkEvent->getType()->isIntegerTy() + ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy) + : Builder.CreatePointerCast(ClkEvent, EventPtrTy); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); llvm::Value *Kernel = @@ -3591,13 +3803,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_os_log_format: return emitBuiltinOSLogFormat(*E); - case Builtin::BI__builtin_os_log_format_buffer_size: { - analyze_os_log::OSLogBufferLayout Layout; - analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); - return RValue::get(ConstantInt::get(ConvertType(E->getType()), - Layout.size().getQuantity())); - } - case Builtin::BI__xray_customevent: { if (!ShouldXRayInstrumentFunction()) return RValue::getIgnored(); @@ -4365,6 +4570,14 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vextq_v), NEONMAP0(vfma_v), NEONMAP0(vfmaq_v), + NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0), + NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0), NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), @@ -5338,6 +5551,34 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); } + case NEON::BI__builtin_neon_vfmlal_low_v: + case NEON::BI__builtin_neon_vfmlalq_low_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low"); + } + case NEON::BI__builtin_neon_vfmlsl_low_v: + case NEON::BI__builtin_neon_vfmlslq_low_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low"); + } + case NEON::BI__builtin_neon_vfmlal_high_v: + case NEON::BI__builtin_neon_vfmlalq_high_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high"); + } + case NEON::BI__builtin_neon_vfmlsl_high_v: + case NEON::BI__builtin_neon_vfmlslq_high_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high"); + } } assert(Int && "Expected valid intrinsic number"); @@ -5585,10 +5826,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic=*/false); - APSInt Value; - if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) + Expr::EvalResult Result; + if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) llvm_unreachable("Sema will ensure that the parameter is constant"); + llvm::APSInt Value = Result.Val.getInt(); uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); llvm::InlineAsm *Emit = @@ -6070,6 +6312,120 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); case ARM::BI_InterlockedIncrement64: return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); + case ARM::BI_InterlockedExchangeAdd8_acq: + case ARM::BI_InterlockedExchangeAdd16_acq: + case ARM::BI_InterlockedExchangeAdd_acq: + case ARM::BI_InterlockedExchangeAdd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); + case ARM::BI_InterlockedExchangeAdd8_rel: + case ARM::BI_InterlockedExchangeAdd16_rel: + case ARM::BI_InterlockedExchangeAdd_rel: + case ARM::BI_InterlockedExchangeAdd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); + case ARM::BI_InterlockedExchangeAdd8_nf: + case ARM::BI_InterlockedExchangeAdd16_nf: + case ARM::BI_InterlockedExchangeAdd_nf: + case ARM::BI_InterlockedExchangeAdd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); + case ARM::BI_InterlockedExchange8_acq: + case ARM::BI_InterlockedExchange16_acq: + case ARM::BI_InterlockedExchange_acq: + case ARM::BI_InterlockedExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); + case ARM::BI_InterlockedExchange8_rel: + case ARM::BI_InterlockedExchange16_rel: + case ARM::BI_InterlockedExchange_rel: + case ARM::BI_InterlockedExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); + case ARM::BI_InterlockedExchange8_nf: + case ARM::BI_InterlockedExchange16_nf: + case ARM::BI_InterlockedExchange_nf: + case ARM::BI_InterlockedExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); + case ARM::BI_InterlockedCompareExchange8_acq: + case ARM::BI_InterlockedCompareExchange16_acq: + case ARM::BI_InterlockedCompareExchange_acq: + case ARM::BI_InterlockedCompareExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); + case ARM::BI_InterlockedCompareExchange8_rel: + case ARM::BI_InterlockedCompareExchange16_rel: + case ARM::BI_InterlockedCompareExchange_rel: + case ARM::BI_InterlockedCompareExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); + case ARM::BI_InterlockedCompareExchange8_nf: + case ARM::BI_InterlockedCompareExchange16_nf: + case ARM::BI_InterlockedCompareExchange_nf: + case ARM::BI_InterlockedCompareExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); + case ARM::BI_InterlockedOr8_acq: + case ARM::BI_InterlockedOr16_acq: + case ARM::BI_InterlockedOr_acq: + case ARM::BI_InterlockedOr64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); + case ARM::BI_InterlockedOr8_rel: + case ARM::BI_InterlockedOr16_rel: + case ARM::BI_InterlockedOr_rel: + case ARM::BI_InterlockedOr64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); + case ARM::BI_InterlockedOr8_nf: + case ARM::BI_InterlockedOr16_nf: + case ARM::BI_InterlockedOr_nf: + case ARM::BI_InterlockedOr64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); + case ARM::BI_InterlockedXor8_acq: + case ARM::BI_InterlockedXor16_acq: + case ARM::BI_InterlockedXor_acq: + case ARM::BI_InterlockedXor64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); + case ARM::BI_InterlockedXor8_rel: + case ARM::BI_InterlockedXor16_rel: + case ARM::BI_InterlockedXor_rel: + case ARM::BI_InterlockedXor64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); + case ARM::BI_InterlockedXor8_nf: + case ARM::BI_InterlockedXor16_nf: + case ARM::BI_InterlockedXor_nf: + case ARM::BI_InterlockedXor64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); + case ARM::BI_InterlockedAnd8_acq: + case ARM::BI_InterlockedAnd16_acq: + case ARM::BI_InterlockedAnd_acq: + case ARM::BI_InterlockedAnd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); + case ARM::BI_InterlockedAnd8_rel: + case ARM::BI_InterlockedAnd16_rel: + case ARM::BI_InterlockedAnd_rel: + case ARM::BI_InterlockedAnd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); + case ARM::BI_InterlockedAnd8_nf: + case ARM::BI_InterlockedAnd16_nf: + case ARM::BI_InterlockedAnd_nf: + case ARM::BI_InterlockedAnd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); + case ARM::BI_InterlockedIncrement16_acq: + case ARM::BI_InterlockedIncrement_acq: + case ARM::BI_InterlockedIncrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); + case ARM::BI_InterlockedIncrement16_rel: + case ARM::BI_InterlockedIncrement_rel: + case ARM::BI_InterlockedIncrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); + case ARM::BI_InterlockedIncrement16_nf: + case ARM::BI_InterlockedIncrement_nf: + case ARM::BI_InterlockedIncrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); + case ARM::BI_InterlockedDecrement16_acq: + case ARM::BI_InterlockedDecrement_acq: + case ARM::BI_InterlockedDecrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); + case ARM::BI_InterlockedDecrement16_rel: + case ARM::BI_InterlockedDecrement_rel: + case ARM::BI_InterlockedDecrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); + case ARM::BI_InterlockedDecrement16_nf: + case ARM::BI_InterlockedDecrement_nf: + case ARM::BI_InterlockedDecrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); } // Get the last argument, which specifies the vector type. @@ -6576,11 +6932,33 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); } + if (BuiltinID == AArch64::BI__getReg) { + Expr::EvalResult Result; + if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) + llvm_unreachable("Sema will ensure that the parameter is constant"); + + llvm::APSInt Value = Result.Val.getInt(); + LLVMContext &Context = CGM.getLLVMContext(); + std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10); + + llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)}; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + + llvm::Value *F = + CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); + return Builder.CreateCall(F, Metadata); + } + if (BuiltinID == AArch64::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); return Builder.CreateCall(F); } + if (BuiltinID == AArch64::BI_ReadWriteBarrier) + return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, + llvm::SyncScope::SingleThread); + // CRC32 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { @@ -6643,6 +7021,48 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); } + if (BuiltinID == AArch64::BI_ReadStatusReg || + BuiltinID == AArch64::BI_WriteStatusReg) { + LLVMContext &Context = CGM.getLLVMContext(); + + unsigned SysReg = + E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue(); + + std::string SysRegStr; + llvm::raw_string_ostream(SysRegStr) << + ((1 << 1) | ((SysReg >> 14) & 1)) << ":" << + ((SysReg >> 11) & 7) << ":" << + ((SysReg >> 7) & 15) << ":" << + ((SysReg >> 3) & 15) << ":" << + ( SysReg & 7); + + llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) }; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + + llvm::Type *RegisterType = Int64Ty; + llvm::Type *ValueType = Int32Ty; + llvm::Type *Types[] = { RegisterType }; + + if (BuiltinID == AArch64::BI_ReadStatusReg) { + llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + llvm::Value *Call = Builder.CreateCall(F, Metadata); + + return Builder.CreateTrunc(Call, ValueType); + } + + llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); + ArgValue = Builder.CreateZExt(ArgValue, RegisterType); + + return Builder.CreateCall(F, { Metadata, ArgValue }); + } + + if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { + llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + return Builder.CreateCall(F); + } + // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; @@ -6738,7 +7158,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvth_f16_u32: case NEON::BI__builtin_neon_vcvth_f16_u64: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_f16_s16: case NEON::BI__builtin_neon_vcvth_f16_s32: case NEON::BI__builtin_neon_vcvth_f16_s64: { @@ -6758,7 +7178,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vcvth_u16_f16: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_s16_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); @@ -6768,7 +7188,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vcvth_u32_f16: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_s32_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); @@ -6778,7 +7198,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vcvth_u64_f16: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_s64_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); @@ -8493,6 +8913,129 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); case AArch64::BI_InterlockedIncrement64: return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); + case AArch64::BI_InterlockedExchangeAdd8_acq: + case AArch64::BI_InterlockedExchangeAdd16_acq: + case AArch64::BI_InterlockedExchangeAdd_acq: + case AArch64::BI_InterlockedExchangeAdd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); + case AArch64::BI_InterlockedExchangeAdd8_rel: + case AArch64::BI_InterlockedExchangeAdd16_rel: + case AArch64::BI_InterlockedExchangeAdd_rel: + case AArch64::BI_InterlockedExchangeAdd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); + case AArch64::BI_InterlockedExchangeAdd8_nf: + case AArch64::BI_InterlockedExchangeAdd16_nf: + case AArch64::BI_InterlockedExchangeAdd_nf: + case AArch64::BI_InterlockedExchangeAdd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); + case AArch64::BI_InterlockedExchange8_acq: + case AArch64::BI_InterlockedExchange16_acq: + case AArch64::BI_InterlockedExchange_acq: + case AArch64::BI_InterlockedExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); + case AArch64::BI_InterlockedExchange8_rel: + case AArch64::BI_InterlockedExchange16_rel: + case AArch64::BI_InterlockedExchange_rel: + case AArch64::BI_InterlockedExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); + case AArch64::BI_InterlockedExchange8_nf: + case AArch64::BI_InterlockedExchange16_nf: + case AArch64::BI_InterlockedExchange_nf: + case AArch64::BI_InterlockedExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); + case AArch64::BI_InterlockedCompareExchange8_acq: + case AArch64::BI_InterlockedCompareExchange16_acq: + case AArch64::BI_InterlockedCompareExchange_acq: + case AArch64::BI_InterlockedCompareExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); + case AArch64::BI_InterlockedCompareExchange8_rel: + case AArch64::BI_InterlockedCompareExchange16_rel: + case AArch64::BI_InterlockedCompareExchange_rel: + case AArch64::BI_InterlockedCompareExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); + case AArch64::BI_InterlockedCompareExchange8_nf: + case AArch64::BI_InterlockedCompareExchange16_nf: + case AArch64::BI_InterlockedCompareExchange_nf: + case AArch64::BI_InterlockedCompareExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); + case AArch64::BI_InterlockedOr8_acq: + case AArch64::BI_InterlockedOr16_acq: + case AArch64::BI_InterlockedOr_acq: + case AArch64::BI_InterlockedOr64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); + case AArch64::BI_InterlockedOr8_rel: + case AArch64::BI_InterlockedOr16_rel: + case AArch64::BI_InterlockedOr_rel: + case AArch64::BI_InterlockedOr64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); + case AArch64::BI_InterlockedOr8_nf: + case AArch64::BI_InterlockedOr16_nf: + case AArch64::BI_InterlockedOr_nf: + case AArch64::BI_InterlockedOr64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); + case AArch64::BI_InterlockedXor8_acq: + case AArch64::BI_InterlockedXor16_acq: + case AArch64::BI_InterlockedXor_acq: + case AArch64::BI_InterlockedXor64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); + case AArch64::BI_InterlockedXor8_rel: + case AArch64::BI_InterlockedXor16_rel: + case AArch64::BI_InterlockedXor_rel: + case AArch64::BI_InterlockedXor64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); + case AArch64::BI_InterlockedXor8_nf: + case AArch64::BI_InterlockedXor16_nf: + case AArch64::BI_InterlockedXor_nf: + case AArch64::BI_InterlockedXor64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); + case AArch64::BI_InterlockedAnd8_acq: + case AArch64::BI_InterlockedAnd16_acq: + case AArch64::BI_InterlockedAnd_acq: + case AArch64::BI_InterlockedAnd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); + case AArch64::BI_InterlockedAnd8_rel: + case AArch64::BI_InterlockedAnd16_rel: + case AArch64::BI_InterlockedAnd_rel: + case AArch64::BI_InterlockedAnd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); + case AArch64::BI_InterlockedAnd8_nf: + case AArch64::BI_InterlockedAnd16_nf: + case AArch64::BI_InterlockedAnd_nf: + case AArch64::BI_InterlockedAnd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); + case AArch64::BI_InterlockedIncrement16_acq: + case AArch64::BI_InterlockedIncrement_acq: + case AArch64::BI_InterlockedIncrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); + case AArch64::BI_InterlockedIncrement16_rel: + case AArch64::BI_InterlockedIncrement_rel: + case AArch64::BI_InterlockedIncrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); + case AArch64::BI_InterlockedIncrement16_nf: + case AArch64::BI_InterlockedIncrement_nf: + case AArch64::BI_InterlockedIncrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); + case AArch64::BI_InterlockedDecrement16_acq: + case AArch64::BI_InterlockedDecrement_acq: + case AArch64::BI_InterlockedDecrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); + case AArch64::BI_InterlockedDecrement16_rel: + case AArch64::BI_InterlockedDecrement_rel: + case AArch64::BI_InterlockedDecrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); + case AArch64::BI_InterlockedDecrement16_nf: + case AArch64::BI_InterlockedDecrement_nf: + case AArch64::BI_InterlockedDecrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); + + case AArch64::BI_InterlockedAdd: { + Value *Arg0 = EmitScalarExpr(E->getArg(0)); + Value *Arg1 = EmitScalarExpr(E->getArg(1)); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Add, Arg0, Arg1, + llvm::AtomicOrdering::SequentiallyConsistent); + return Builder.CreateAdd(RMWI, Arg1); + } } } @@ -8947,7 +9490,7 @@ static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, const CallExpr *E, Value *Res; if (IsAddition) { // ADDUS: a > (a+b) ? ~0 : (a+b) - // If Ops[0] > Add, overflow occured. + // If Ops[0] > Add, overflow occurred. Value *Add = CGF.Builder.CreateAdd(Ops[0], Ops[1]); Value *ICmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Add); Value *Max = llvm::Constant::getAllOnesValue(ResultType); @@ -9018,17 +9561,17 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { return EmitX86CpuSupports(FeatureStr); } -uint32_t +uint64_t CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) { // Processor features and mapping to processor feature value. - uint32_t FeaturesMask = 0; + uint64_t FeaturesMask = 0; for (const StringRef &FeatureStr : FeatureStrs) { unsigned Feature = StringSwitch<unsigned>(FeatureStr) #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL) #include "llvm/Support/X86TargetParser.def" ; - FeaturesMask |= (1U << Feature); + FeaturesMask |= (1ULL << Feature); } return FeaturesMask; } @@ -9037,31 +9580,54 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs)); } -llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) { - // Matching the struct layout from the compiler-rt/libgcc structure that is - // filled in: - // unsigned int __cpu_vendor; - // unsigned int __cpu_type; - // unsigned int __cpu_subtype; - // unsigned int __cpu_features[1]; - llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, - llvm::ArrayType::get(Int32Ty, 1)); - - // Grab the global __cpu_model. - llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); +llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { + uint32_t Features1 = Lo_32(FeaturesMask); + uint32_t Features2 = Hi_32(FeaturesMask); + + Value *Result = Builder.getTrue(); + + if (Features1 != 0) { + // Matching the struct layout from the compiler-rt/libgcc structure that is + // filled in: + // unsigned int __cpu_vendor; + // unsigned int __cpu_type; + // unsigned int __cpu_subtype; + // unsigned int __cpu_features[1]; + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, + llvm::ArrayType::get(Int32Ty, 1)); + + // Grab the global __cpu_model. + llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + + // Grab the first (0th) element from the field __cpu_features off of the + // global in the struct STy. + Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3), + Builder.getInt32(0)}; + Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); + Value *Features = + Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); + + // Check the value of the bit corresponding to the feature requested. + Value *Mask = Builder.getInt32(Features1); + Value *Bitset = Builder.CreateAnd(Features, Mask); + Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); + Result = Builder.CreateAnd(Result, Cmp); + } + + if (Features2 != 0) { + llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty, + "__cpu_features2"); + Value *Features = + Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4)); + + // Check the value of the bit corresponding to the feature requested. + Value *Mask = Builder.getInt32(Features2); + Value *Bitset = Builder.CreateAnd(Features, Mask); + Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); + Result = Builder.CreateAnd(Result, Cmp); + } - // Grab the first (0th) element from the field __cpu_features off of the - // global in the struct STy. - Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3), - ConstantInt::get(Int32Ty, 0)}; - Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); - Value *Features = - Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); - - // Check the value of the bit corresponding to the feature requested. - Value *Bitset = Builder.CreateAnd( - Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask)); - return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); + return Result; } Value *CodeGenFunction::EmitX86CpuInit() { @@ -10425,30 +10991,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI__builtin_ia32_addcarryx_u32: case X86::BI__builtin_ia32_addcarryx_u64: - case X86::BI__builtin_ia32_addcarry_u32: - case X86::BI__builtin_ia32_addcarry_u64: case X86::BI__builtin_ia32_subborrow_u32: case X86::BI__builtin_ia32_subborrow_u64: { Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_addcarryx_u32: - IID = Intrinsic::x86_addcarryx_u32; + IID = Intrinsic::x86_addcarry_32; break; case X86::BI__builtin_ia32_addcarryx_u64: - IID = Intrinsic::x86_addcarryx_u64; - break; - case X86::BI__builtin_ia32_addcarry_u32: - IID = Intrinsic::x86_addcarry_u32; - break; - case X86::BI__builtin_ia32_addcarry_u64: - IID = Intrinsic::x86_addcarry_u64; + IID = Intrinsic::x86_addcarry_64; break; case X86::BI__builtin_ia32_subborrow_u32: - IID = Intrinsic::x86_subborrow_u32; + IID = Intrinsic::x86_subborrow_32; break; case X86::BI__builtin_ia32_subborrow_u64: - IID = Intrinsic::x86_subborrow_u64; + IID = Intrinsic::x86_subborrow_64; break; } @@ -11314,12 +11872,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); - case AMDGPU::BI__builtin_amdgcn_mov_dpp: { - llvm::SmallVector<llvm::Value *, 5> Args; - for (unsigned I = 0; I != 5; ++I) + case AMDGPU::BI__builtin_amdgcn_mov_dpp: + case AMDGPU::BI__builtin_amdgcn_update_dpp: { + llvm::SmallVector<llvm::Value *, 6> Args; + for (unsigned I = 0; I != E->getNumArgs(); ++I) Args.push_back(EmitScalarExpr(E->getArg(I))); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, - Args[0]->getType()); + assert(Args.size() == 5 || Args.size() == 6); + if (Args.size() == 5) + Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType())); + Value *F = + CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } case AMDGPU::BI__builtin_amdgcn_div_fixup: @@ -11685,7 +12247,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {X, Y, M4Value}); } - // Vector intrisincs that output the post-instruction CC value. + // Vector intrinsics that output the post-instruction CC value. #define INTRINSIC_WITH_CC(NAME) \ case SystemZ::BI__builtin_##NAME: \ @@ -12145,7 +12707,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, bool isColMajor = isColMajorArg.getSExtValue(); unsigned IID; unsigned NumResults = 8; - // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet + // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet // for some reason nvcc builtins use _c_. switch (BuiltinID) { case NVPTX::BI__hmma_m16n16k16_st_c_f16: @@ -12423,6 +12985,191 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); return Builder.CreateCall(Callee, {Addr, Count}); } + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: { + Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResT = ConvertType(E->getType()); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, + {ResT, Src->getType()}); + return Builder.CreateCall(Callee, {Src}); + } + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: { + Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResT = ConvertType(E->getType()); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, + {ResT, Src->getType()}); + return Builder.CreateCall(Callee, {Src}); + } + case WebAssembly::BI__builtin_wasm_min_f32: + case WebAssembly::BI__builtin_wasm_min_f64: + case WebAssembly::BI__builtin_wasm_min_f32x4: + case WebAssembly::BI__builtin_wasm_min_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(Intrinsic::minimum, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_max_f32: + case WebAssembly::BI__builtin_wasm_max_f64: + case WebAssembly::BI__builtin_wasm_max_f32x4: + case WebAssembly::BI__builtin_wasm_max_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(Intrinsic::maximum, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: + case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8: + case WebAssembly::BI__builtin_wasm_extract_lane_i32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_i64x2: + case WebAssembly::BI__builtin_wasm_extract_lane_f32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: { + llvm::APSInt LaneConst; + if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst); + Value *Extract = Builder.CreateExtractElement(Vec, Lane); + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: + return Builder.CreateSExt(Extract, ConvertType(E->getType())); + case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8: + return Builder.CreateZExt(Extract, ConvertType(E->getType())); + case WebAssembly::BI__builtin_wasm_extract_lane_i32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_i64x2: + case WebAssembly::BI__builtin_wasm_extract_lane_f32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: + return Extract; + default: + llvm_unreachable("unexpected builtin ID"); + } + } + case WebAssembly::BI__builtin_wasm_replace_lane_i8x16: + case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: + case WebAssembly::BI__builtin_wasm_replace_lane_i32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_i64x2: + case WebAssembly::BI__builtin_wasm_replace_lane_f32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: { + llvm::APSInt LaneConst; + if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst); + Value *Val = EmitScalarExpr(E->getArg(2)); + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_replace_lane_i8x16: + case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: { + llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType(); + Value *Trunc = Builder.CreateTrunc(Val, ElemType); + return Builder.CreateInsertElement(Vec, Trunc, Lane); + } + case WebAssembly::BI__builtin_wasm_replace_lane_i32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_i64x2: + case WebAssembly::BI__builtin_wasm_replace_lane_f32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: + return Builder.CreateInsertElement(Vec, Val, Lane); + default: + llvm_unreachable("unexpected builtin ID"); + } + } + case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8: + case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8: + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8: + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: { + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8: + IntNo = Intrinsic::sadd_sat; + break; + case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8: + IntNo = Intrinsic::uadd_sat; + break; + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8: + IntNo = Intrinsic::wasm_sub_saturate_signed; + break; + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: + IntNo = Intrinsic::wasm_sub_saturate_unsigned; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_bitselect: { + Value *V1 = EmitScalarExpr(E->getArg(0)); + Value *V2 = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {V1, V2, C}); + } + case WebAssembly::BI__builtin_wasm_any_true_i8x16: + case WebAssembly::BI__builtin_wasm_any_true_i16x8: + case WebAssembly::BI__builtin_wasm_any_true_i32x4: + case WebAssembly::BI__builtin_wasm_any_true_i64x2: + case WebAssembly::BI__builtin_wasm_all_true_i8x16: + case WebAssembly::BI__builtin_wasm_all_true_i16x8: + case WebAssembly::BI__builtin_wasm_all_true_i32x4: + case WebAssembly::BI__builtin_wasm_all_true_i64x2: { + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_any_true_i8x16: + case WebAssembly::BI__builtin_wasm_any_true_i16x8: + case WebAssembly::BI__builtin_wasm_any_true_i32x4: + case WebAssembly::BI__builtin_wasm_any_true_i64x2: + IntNo = Intrinsic::wasm_anytrue; + break; + case WebAssembly::BI__builtin_wasm_all_true_i8x16: + case WebAssembly::BI__builtin_wasm_all_true_i16x8: + case WebAssembly::BI__builtin_wasm_all_true_i32x4: + case WebAssembly::BI__builtin_wasm_all_true_i64x2: + IntNo = Intrinsic::wasm_alltrue; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); + return Builder.CreateCall(Callee, {Vec}); + } + case WebAssembly::BI__builtin_wasm_abs_f32x4: + case WebAssembly::BI__builtin_wasm_abs_f64x2: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); + return Builder.CreateCall(Callee, {Vec}); + } + case WebAssembly::BI__builtin_wasm_sqrt_f32x4: + case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); + return Builder.CreateCall(Callee, {Vec}); + } default: return nullptr; diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index 3f3f2c5e43..1c578bd151 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -137,7 +137,7 @@ CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const { CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), TheModule(CGM.getModule()), - RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) { + RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); @@ -353,8 +353,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // global variable and save a reference in GpuBinaryHandle to be cleaned up // in destructor on exit. Then associate all known kernels with the GPU binary // handle so CUDA runtime can figure out what to call on the GPU side. - std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary; - if (!IsHIP) { + std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr; + if (!CudaGpuBinaryFileName.empty()) { llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr = llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName); if (std::error_code EC = CudaGpuBinaryOrErr.getError()) { @@ -388,15 +388,23 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { ModuleIDSectionName = "__hip_module_id"; ModuleIDPrefix = "__hip_"; - // For HIP, create an external symbol __hip_fatbin in section .hip_fatbin. - // The external symbol is supposed to contain the fat binary but will be - // populated somewhere else, e.g. by lld through link script. - FatBinStr = new llvm::GlobalVariable( + if (CudaGpuBinary) { + // If fatbin is available from early finalization, create a string + // literal containing the fat binary loaded from the given file. + FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "", + FatbinConstantName, 8); + } else { + // If fatbin is not available, create an external symbol + // __hip_fatbin in section .hip_fatbin. The external symbol is supposed + // to contain the fat binary but will be populated somewhere else, + // e.g. by lld through link script. + FatBinStr = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, "__hip_fatbin", nullptr, llvm::GlobalVariable::NotThreadLocal); - cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName); + cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName); + } FatMagic = HIPFatMagic; } else { @@ -447,6 +455,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // thread safety of the loaded program. Therefore we can assume sequential // execution of constructor functions here. if (IsHIP) { + auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage : + llvm::GlobalValue::LinkOnceAnyLinkage; llvm::BasicBlock *IfBlock = llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc); llvm::BasicBlock *ExitBlock = @@ -455,12 +465,13 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // of HIP ABI. GpuBinaryHandle = new llvm::GlobalVariable( TheModule, VoidPtrPtrTy, /*isConstant=*/false, - llvm::GlobalValue::LinkOnceAnyLinkage, + Linkage, /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__hip_gpubin_handle"); GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); // Prevent the weak symbol in different shared libraries being merged. - GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility); + if (Linkage != llvm::GlobalValue::InternalLinkage) + GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility); Address GpuBinaryAddr( GpuBinaryHandle, CharUnits::fromQuantity(GpuBinaryHandle->getAlignment())); @@ -509,7 +520,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // Generate a unique module ID. SmallString<64> ModuleID; llvm::raw_svector_ostream OS(ModuleID); - OS << ModuleIDPrefix << llvm::format("%x", FatbinWrapper->getGUID()); + OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID()); llvm::Constant *ModuleIDConstant = makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32); diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index d5945be434..8b0733fbec 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -23,7 +23,7 @@ #include "clang/AST/Mangle.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtCXX.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" using namespace clang; using namespace CodeGen; @@ -276,7 +276,7 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt"); llvm::Value *VFunc = CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes); - CGCallee Callee(GD.getDecl()->getCanonicalDecl(), VFunc); + CGCallee Callee(GD, VFunc); return Callee; } diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 8857ffdde4..64e18e171e 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -23,11 +23,11 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" @@ -59,6 +59,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_X86Pascal: return llvm::CallingConv::C; // TODO: Add support for __vectorcall to LLVM. case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall; + case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall; case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC; case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); case CC_PreserveMost: return llvm::CallingConv::PreserveMost; @@ -67,11 +68,13 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { } } -/// Derives the 'this' type for codegen purposes, i.e. ignoring method +/// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR /// qualification. -/// FIXME: address space qualification? -static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD) { +static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD, + const CXXMethodDecl *MD) { QualType RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal(); + if (MD) + RecTy = Context.getAddrSpaceQualType(RecTy, MD->getType().getAddressSpace()); return Context.getPointerType(CanQualType::CreateUnsafe(RecTy)); } @@ -214,6 +217,9 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { if (PcsAttr *PCS = D->getAttr<PcsAttr>()) return (PCS->getPCS() == PcsAttr::AAPCS ? CC_AAPCS : CC_AAPCS_VFP); + if (D->hasAttr<AArch64VectorPcsAttr>()) + return CC_AArch64VectorCall; + if (D->hasAttr<IntelOclBiccAttr>()) return CC_IntelOclBicc; @@ -246,7 +252,7 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD, // Add the 'this' pointer. if (RD) - argTypes.push_back(GetThisType(Context, RD)); + argTypes.push_back(GetThisType(Context, RD, MD)); else argTypes.push_back(Context.VoidPtrTy); @@ -302,7 +308,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, SmallVector<CanQualType, 16> argTypes; SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; - argTypes.push_back(GetThisType(Context, MD->getParent())); + argTypes.push_back(GetThisType(Context, MD->getParent(), MD)); bool PassParams = true; @@ -529,7 +535,7 @@ const CGFunctionInfo & CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) { assert(MD->isVirtual() && "only methods have thunks"); CanQual<FunctionProtoType> FTP = GetFormalType(MD); - CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) }; + CanQualType ArgTys[] = { GetThisType(Context, MD->getParent(), MD) }; return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, ArgTys, FTP->getExtInfo(), {}, RequiredArgs(1)); @@ -543,7 +549,7 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD, CanQual<FunctionProtoType> FTP = GetFormalType(CD); SmallVector<CanQualType, 2> ArgTys; const CXXRecordDecl *RD = CD->getParent(); - ArgTys.push_back(GetThisType(Context, RD)); + ArgTys.push_back(GetThisType(Context, RD, CD)); if (CT == Ctor_CopyingClosure) ArgTys.push_back(*FTP->param_type_begin()); if (RD->getNumVBases() > 0) @@ -741,8 +747,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, FunctionType::ExtInfo info, ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, RequiredArgs required) { - assert(std::all_of(argTypes.begin(), argTypes.end(), - [](CanQualType T) { return T.isCanonicalAsParam(); })); + assert(llvm::all_of(argTypes, + [](CanQualType T) { return T.isCanonicalAsParam(); })); // Lookup or create unique function info. llvm::FoldingSetNodeID ID; @@ -1253,8 +1259,8 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // Otherwise do coercion through memory. This is stupid, but simple. Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment()); - Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); - Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy); + Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty); + Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty); CGF.Builder.CreateMemCpy(Casted, SrcCasted, llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize), false); @@ -1335,8 +1341,8 @@ static void CreateCoercedStore(llvm::Value *Src, // to that information. Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment()); CGF.Builder.CreateStore(Src, Tmp); - Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); - Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy); + Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty); + Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty); CGF.Builder.CreateMemCpy(DstCasted, Casted, llvm::ConstantInt::get(CGF.IntPtrTy, DstSize), false); @@ -1709,6 +1715,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, if (CodeGenOpts.DisableRedZone) FuncAttrs.addAttribute(llvm::Attribute::NoRedZone); + if (CodeGenOpts.IndirectTlsSegRefs) + FuncAttrs.addAttribute("indirect-tls-seg-refs"); if (CodeGenOpts.NoImplicitFloat) FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat); @@ -1785,6 +1793,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, if (CodeGenOpts.Backchain) FuncAttrs.addAttribute("backchain"); + // FIXME: The interaction of this attribute with the SLH command line flag + // has not been determined. if (CodeGenOpts.SpeculativeLoadHardening) FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); } @@ -1831,7 +1841,7 @@ void CodeGenModule::ConstructAttributeList( AddAttributesFromFunctionProtoType(getContext(), FuncAttrs, CalleeInfo.getCalleeFunctionProtoType()); - const Decl *TargetDecl = CalleeInfo.getCalleeDecl(); + const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl(); bool HasOptnone = false; // FIXME: handle sseregparm someday... @@ -1848,6 +1858,8 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate); if (TargetDecl->hasAttr<ConvergentAttr>()) FuncAttrs.addAttribute(llvm::Attribute::Convergent); + if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>()) + FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { AddAttributesFromFunctionProtoType( @@ -1939,7 +1951,7 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute("disable-tail-calls", llvm::toStringRef(DisableTailCalls)); - GetCPUAndFeaturesAttributes(TargetDecl, FuncAttrs); + GetCPUAndFeaturesAttributes(CalleeInfo.getCalleeDecl(), FuncAttrs); } ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); @@ -3066,8 +3078,9 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, QualType type = param->getType(); - assert(!isInAllocaArgument(CGM.getCXXABI(), type) && - "cannot emit delegate call arguments for inalloca arguments!"); + if (isInAllocaArgument(CGM.getCXXABI(), type)) { + CGM.ErrorUnsupported(param, "forwarded non-trivially copyable parameter"); + } // GetAddrOfLocalVar returns a pointer-to-pointer for references, // but the argument needs to be the original pointer. @@ -3948,15 +3961,28 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } else if (I->hasLValue()) { auto LV = I->getKnownLValue(); auto AS = LV.getAddressSpace(); + if ((!ArgInfo.getIndirectByVal() && (LV.getAlignment() >= - getContext().getTypeAlignInChars(I->Ty))) || - (ArgInfo.getIndirectByVal() && - ((AS != LangAS::Default && AS != LangAS::opencl_private && - AS != CGM.getASTAllocaAddressSpace())))) { + getContext().getTypeAlignInChars(I->Ty)))) { + NeedCopy = true; + } + if (!getLangOpts().OpenCL) { + if ((ArgInfo.getIndirectByVal() && + (AS != LangAS::Default && + AS != CGM.getASTAllocaAddressSpace()))) { + NeedCopy = true; + } + } + // For OpenCL even if RV is located in default or alloca address space + // we don't want to perform address space cast for it. + else if ((ArgInfo.getIndirectByVal() && + Addr.getType()->getAddressSpace() != IRFuncTy-> + getParamType(FirstIRArg)->getPointerAddressSpace())) { NeedCopy = true; } } + if (NeedCopy) { // Create an aligned temporary, and copy to it. Address AI = CreateMemTempWithoutCast( @@ -4238,6 +4264,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } #endif + // Update the largest vector width if any arguments have vector types. + for (unsigned i = 0; i < IRCallArgs.size(); ++i) { + if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + } + // Compute the calling convention and attributes. unsigned CallingConv; llvm::AttributeList Attrs; @@ -4251,8 +4284,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Apply always_inline to all calls within flatten functions. // FIXME: should this really take priority over __try, below? if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() && - !(Callee.getAbstractInfo().getCalleeDecl() && - Callee.getAbstractInfo().getCalleeDecl()->hasAttr<NoInlineAttr>())) { + !(Callee.getAbstractInfo().getCalleeDecl().getDecl() && + Callee.getAbstractInfo() + .getCalleeDecl() + .getDecl() + ->hasAttr<NoInlineAttr>())) { Attrs = Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline); @@ -4318,6 +4354,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (!CI->getType()->isVoidTy()) CI->setName("call"); + // Update largest vector width from the return type. + if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of // IPVK_IndirectCallTarget in InstrProfData.inc. @@ -4332,7 +4373,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Suppress tail calls if requested. if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) { - const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl(); + const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>()) Call->setTailCallKind(llvm::CallInst::TCK_NoTail); } @@ -4479,7 +4520,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } (); // Emit the assume_aligned check on the return value. - const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl(); + const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); if (Ret.isScalar() && TargetDecl) { if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) { llvm::Value *OffsetValue = nullptr; diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h index 99a36e4e12..c300808bea 100644 --- a/lib/CodeGen/CGCall.h +++ b/lib/CodeGen/CGCall.h @@ -46,21 +46,21 @@ class CGCalleeInfo { /// The function prototype of the callee. const FunctionProtoType *CalleeProtoTy; /// The function declaration of the callee. - const Decl *CalleeDecl; + GlobalDecl CalleeDecl; public: - explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {} - CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl) + explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl() {} + CGCalleeInfo(const FunctionProtoType *calleeProtoTy, GlobalDecl calleeDecl) : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {} CGCalleeInfo(const FunctionProtoType *calleeProtoTy) - : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {} - CGCalleeInfo(const Decl *calleeDecl) + : CalleeProtoTy(calleeProtoTy), CalleeDecl() {} + CGCalleeInfo(GlobalDecl calleeDecl) : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {} const FunctionProtoType *getCalleeFunctionProtoType() const { return CalleeProtoTy; } - const Decl *getCalleeDecl() const { return CalleeDecl; } + const GlobalDecl getCalleeDecl() const { return CalleeDecl; } }; /// All available information about a concrete callee. @@ -171,7 +171,7 @@ public: } CGCalleeInfo getAbstractInfo() const { if (isVirtual()) - return VirtualInfo.MD.getDecl(); + return VirtualInfo.MD; assert(isOrdinary()); return AbstractInfo; } diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index 468d81cbbb..cfc912cc9a 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -16,14 +16,15 @@ #include "CGDebugInfo.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" +#include "TargetInfo.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtCXX.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Metadata.h" #include "llvm/Transforms/Utils/SanitizerStats.h" @@ -2012,8 +2013,19 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, bool NewPointerIsChecked) { CallArgList Args; + LangAS SlotAS = E->getType().getAddressSpace(); + QualType ThisType = D->getThisType(getContext()); + LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace(); + llvm::Value *ThisPtr = This.getPointer(); + if (SlotAS != ThisAS) { + unsigned TargetThisAS = getContext().getTargetAddressSpace(ThisAS); + llvm::Type *NewType = + ThisPtr->getType()->getPointerElementType()->getPointerTo(TargetThisAS); + ThisPtr = getTargetHooks().performAddrSpaceCast(*this, This.getPointer(), + ThisAS, SlotAS, NewType); + } // Push the this ptr. - Args.add(RValue::get(This.getPointer()), D->getThisType(getContext())); + Args.add(RValue::get(ThisPtr), D->getThisType(getContext())); // If this is a trivial constructor, emit a memcpy now before we lose // the alignment information on the argument. @@ -2122,7 +2134,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, CGM.getAddrOfCXXStructor(D, getFromCtorType(Type)); const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall( Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs); - CGCallee Callee = CGCallee::forDirect(CalleePtr, D); + CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type)); EmitCall(Info, Callee, ReturnValueSlot(), Args); // Generate vtable assumptions if we're constructing a complete object @@ -2808,7 +2820,7 @@ void CodeGenFunction::EmitForwardingCallToLambda( // variadic arguments. // Now emit our call. - auto callee = CGCallee::forDirect(calleePtr, callOperator); + auto callee = CGCallee::forDirect(calleePtr, GlobalDecl(callOperator)); RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, callArgs); // If necessary, copy the returned value into the slot. @@ -2839,7 +2851,7 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { CallArgList CallArgs; QualType ThisType = getContext().getPointerType(getContext().getRecordType(Lambda)); - Address ThisPtr = GetAddrOfBlockDecl(variable, false); + Address ThisPtr = GetAddrOfBlockDecl(variable); CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType); // Add the rest of the parameters. diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index 0a766d1762..3743d24f11 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -366,7 +366,7 @@ static llvm::SwitchInst *TransitionToCleanupSwitch(CodeGenFunction &CGF, llvm::BasicBlock *Block) { // If it's a branch, turn it into a switch whose default // destination is its original target. - llvm::TerminatorInst *Term = Block->getTerminator(); + llvm::Instruction *Term = Block->getTerminator(); assert(Term && "can't transition block without terminator"); if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) { @@ -589,7 +589,7 @@ static void ForwardPrebranchedFallthrough(llvm::BasicBlock *Exit, llvm::BasicBlock *To) { // Exit is the exit block of a cleanup, so it always terminates in // an unconditional branch or a switch. - llvm::TerminatorInst *Term = Exit->getTerminator(); + llvm::Instruction *Term = Exit->getTerminator(); if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) { assert(Br->isUnconditional() && Br->getSuccessor(0) == From); diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 7d6eb83f12..f3a07a30eb 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -25,10 +25,10 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/RecordLayout.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Version.h" -#include "clang/Frontend/CodeGenOptions.h" #include "clang/Frontend/FrontendOptions.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/ModuleMap.h" @@ -41,6 +41,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MD5.h" @@ -180,8 +181,7 @@ void CGDebugInfo::setLocation(SourceLocation Loc) { SourceManager &SM = CGM.getContext().getSourceManager(); auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back()); PresumedLoc PCLoc = SM.getPresumedLoc(CurLoc); - - if (PCLoc.isInvalid() || Scope->getFilename() == PCLoc.getFilename()) + if (PCLoc.isInvalid() || Scope->getFile() == getOrCreateFile(CurLoc)) return; if (auto *LBF = dyn_cast<llvm::DILexicalBlockFile>(Scope)) { @@ -220,7 +220,7 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context, if (const auto *RDecl = dyn_cast<RecordDecl>(Context)) if (!RDecl->isDependentType()) return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl), - getOrCreateMainFile()); + TheCU->getFile()); return Default; } @@ -234,6 +234,9 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const { if (CGM.getCodeGenOpts().EmitCodeView) PP.MSVCFormatting = true; + // Apply -fdebug-prefix-map. + PP.RemapFilePaths = true; + PP.remapPath = [this](StringRef Path) { return remapDIPath(Path); }; return PP; } @@ -401,19 +404,18 @@ Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM, llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { if (!Loc.isValid()) // If Location is not valid then use main input file. - return getOrCreateMainFile(); + return TheCU->getFile(); SourceManager &SM = CGM.getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(Loc); - if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty()) + StringRef FileName = PLoc.getFilename(); + if (PLoc.isInvalid() || FileName.empty()) // If the location is not valid then use main input file. - return getOrCreateMainFile(); + return TheCU->getFile(); // Cache the results. - const char *fname = PLoc.getFilename(); - auto It = DIFileCache.find(fname); - + auto It = DIFileCache.find(FileName.data()); if (It != DIFileCache.end()) { // Verify that the information still exists. if (llvm::Metadata *V = It->second) @@ -426,22 +428,48 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; if (CSKind) CSInfo.emplace(*CSKind, Checksum); - - llvm::DIFile *F = DBuilder.createFile( - remapDIPath(PLoc.getFilename()), remapDIPath(getCurrentDirname()), CSInfo, - getSource(SM, SM.getFileID(Loc))); - - DIFileCache[fname].reset(F); + return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc))); +} + +llvm::DIFile * +CGDebugInfo::createFile(StringRef FileName, + Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo, + Optional<StringRef> Source) { + StringRef Dir; + StringRef File; + std::string RemappedFile = remapDIPath(FileName); + std::string CurDir = remapDIPath(getCurrentDirname()); + SmallString<128> DirBuf; + SmallString<128> FileBuf; + if (llvm::sys::path::is_absolute(RemappedFile)) { + // Strip the common prefix (if it is more than just "/") from current + // directory and FileName for a more space-efficient encoding. + auto FileIt = llvm::sys::path::begin(RemappedFile); + auto FileE = llvm::sys::path::end(RemappedFile); + auto CurDirIt = llvm::sys::path::begin(CurDir); + auto CurDirE = llvm::sys::path::end(CurDir); + for (; CurDirIt != CurDirE && *CurDirIt == *FileIt; ++CurDirIt, ++FileIt) + llvm::sys::path::append(DirBuf, *CurDirIt); + if (std::distance(llvm::sys::path::begin(CurDir), CurDirIt) == 1) { + // The common prefix only the root; stripping it would cause + // LLVM diagnostic locations to be more confusing. + Dir = {}; + File = RemappedFile; + } else { + for (; FileIt != FileE; ++FileIt) + llvm::sys::path::append(FileBuf, *FileIt); + Dir = DirBuf; + File = FileBuf; + } + } else { + Dir = CurDir; + File = RemappedFile; + } + llvm::DIFile *F = DBuilder.createFile(File, Dir, CSInfo, Source); + DIFileCache[FileName.data()].reset(F); return F; } -llvm::DIFile *CGDebugInfo::getOrCreateMainFile() { - return DBuilder.createFile( - remapDIPath(TheCU->getFilename()), remapDIPath(TheCU->getDirectory()), - TheCU->getFile()->getChecksum(), - CGM.getCodeGenOpts().EmbedSource ? TheCU->getSource() : None); -} - std::string CGDebugInfo::remapDIPath(StringRef Path) const { for (const auto &Entry : DebugPrefixMap) if (Path.startswith(Entry.first)) @@ -527,11 +555,11 @@ void CGDebugInfo::CreateCompileUnit() { llvm::dwarf::SourceLanguage LangTag; const LangOptions &LO = CGM.getLangOpts(); if (LO.CPlusPlus) { - if (LO.ObjC1) + if (LO.ObjC) LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus; else LangTag = llvm::dwarf::DW_LANG_C_plus_plus; - } else if (LO.ObjC1) { + } else if (LO.ObjC) { LangTag = llvm::dwarf::DW_LANG_ObjC; } else if (LO.RenderScript) { LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript; @@ -545,7 +573,7 @@ void CGDebugInfo::CreateCompileUnit() { // Figure out which version of the ObjC runtime we have. unsigned RuntimeVers = 0; - if (LO.ObjC1) + if (LO.ObjC) RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1; llvm::DICompileUnit::DebugEmissionKind EmissionKind; @@ -566,26 +594,33 @@ void CGDebugInfo::CreateCompileUnit() { break; } + uint64_t DwoId = 0; + auto &CGOpts = CGM.getCodeGenOpts(); + // The DIFile used by the CU is distinct from the main source + // file. Its directory part specifies what becomes the + // DW_AT_comp_dir (the compilation directory), even if the source + // file was specified with an absolute path. if (CSKind) CSInfo.emplace(*CSKind, Checksum); + llvm::DIFile *CUFile = DBuilder.createFile( + remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), CSInfo, + getSource(SM, SM.getMainFileID())); // Create new compile unit. - // FIXME - Eliminate TheCU. - auto &CGOpts = CGM.getCodeGenOpts(); TheCU = DBuilder.createCompileUnit( - LangTag, - DBuilder.createFile(remapDIPath(MainFileName), - remapDIPath(getCurrentDirname()), CSInfo, - getSource(SM, SM.getMainFileID())), - CGOpts.EmitVersionIdentMetadata ? Producer : "", + LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "", LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO, CGOpts.DwarfDebugFlags, RuntimeVers, - CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind, - 0 /* DWOid */, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling, + (CGOpts.getSplitDwarfMode() != CodeGenOptions::NoFission) + ? "" + : CGOpts.SplitDwarfFile, + EmissionKind, DwoId, CGOpts.SplitDwarfInlining, + CGOpts.DebugInfoForProfiling, CGM.getTarget().getTriple().isNVPTX() ? llvm::DICompileUnit::DebugNameTableKind::None : static_cast<llvm::DICompileUnit::DebugNameTableKind>( - CGOpts.DebugNameTable)); + CGOpts.DebugNameTable), + CGOpts.DebugRangesBaseAddress); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -603,9 +638,9 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return nullptr; case BuiltinType::ObjCClass: if (!ClassTy) - ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, - "objc_class", TheCU, - getOrCreateMainFile(), 0); + ClassTy = + DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, + "objc_class", TheCU, TheCU->getFile(), 0); return ClassTy; case BuiltinType::ObjCId: { // typedef struct objc_class *Class; @@ -617,21 +652,21 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return ObjTy; if (!ClassTy) - ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, - "objc_class", TheCU, - getOrCreateMainFile(), 0); + ClassTy = + DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, + "objc_class", TheCU, TheCU->getFile(), 0); unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy); auto *ISATy = DBuilder.createPointerType(ClassTy, Size); - ObjTy = DBuilder.createStructType( - TheCU, "objc_object", getOrCreateMainFile(), 0, 0, 0, - llvm::DINode::FlagZero, nullptr, llvm::DINodeArray()); + ObjTy = DBuilder.createStructType(TheCU, "objc_object", TheCU->getFile(), 0, + 0, 0, llvm::DINode::FlagZero, nullptr, + llvm::DINodeArray()); DBuilder.replaceArrays( ObjTy, DBuilder.getOrCreateArray(&*DBuilder.createMemberType( - ObjTy, "isa", getOrCreateMainFile(), 0, Size, 0, 0, + ObjTy, "isa", TheCU->getFile(), 0, Size, 0, 0, llvm::DINode::FlagZero, ISATy))); return ObjTy; } @@ -639,7 +674,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { if (!SelTy) SelTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, "objc_selector", TheCU, - getOrCreateMainFile(), 0); + TheCU->getFile(), 0); return SelTy; } @@ -658,6 +693,10 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy); case BuiltinType::OCLReserveID: return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy); +#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + case BuiltinType::Id: \ + return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty); +#include "clang/Basic/OpenCLExtensionTypes.def" case BuiltinType::UChar: case BuiltinType::Char_U: @@ -956,7 +995,7 @@ llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name, if (Cache) return Cache; Cache = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, Name, - TheCU, getOrCreateMainFile(), 0); + TheCU, TheCU->getFile(), 0); unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy); Cache = DBuilder.createPointerType(Cache, Size); return Cache; @@ -1093,6 +1132,7 @@ static unsigned getDwarfCC(CallingConv CC) { case CC_X86_64SysV: return llvm::dwarf::DW_CC_LLVM_X86_64SysV; case CC_AAPCS: + case CC_AArch64VectorCall: return llvm::dwarf::DW_CC_LLVM_AAPCS; case CC_AAPCS_VFP: return llvm::dwarf::DW_CC_LLVM_AAPCS_VFP; @@ -1485,16 +1525,16 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // Collect virtual method info. llvm::DIType *ContainingType = nullptr; - unsigned Virtuality = 0; unsigned VIndex = 0; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; + llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero; int ThisAdjustment = 0; if (Method->isVirtual()) { if (Method->isPure()) - Virtuality = llvm::dwarf::DW_VIRTUALITY_pure_virtual; + SPFlags |= llvm::DISubprogram::SPFlagPureVirtual; else - Virtuality = llvm::dwarf::DW_VIRTUALITY_virtual; + SPFlags |= llvm::DISubprogram::SPFlagVirtual; if (CGM.getTarget().getCXXABI().isItaniumFamily()) { // It doesn't make sense to give a virtual destructor a vtable index, @@ -1546,12 +1586,13 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( Flags |= llvm::DINode::FlagLValueReference; if (Method->getRefQualifier() == RQ_RValue) Flags |= llvm::DINode::FlagRValueReference; + if (CGM.getLangOpts().Optimize) + SPFlags |= llvm::DISubprogram::SPFlagOptimized; llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit); llvm::DISubprogram *SP = DBuilder.createMethod( RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine, - MethodTy, /*isLocalToUnit=*/false, /*isDefinition=*/false, Virtuality, - VIndex, ThisAdjustment, ContainingType, Flags, CGM.getLangOpts().Optimize, + MethodTy, VIndex, ThisAdjustment, ContainingType, Flags, SPFlags, TParamsArray.get()); SPCache[Method->getCanonicalDecl()].reset(SP); @@ -1776,6 +1817,29 @@ CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD, return llvm::DINodeArray(); } +llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL, + llvm::DIFile *Unit) { + if (auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL)) { + auto T = TS->getSpecializedTemplateOrPartial(); + auto TA = TS->getTemplateArgs().asArray(); + // Collect parameters for a partial specialization + if (T.is<VarTemplatePartialSpecializationDecl *>()) { + const TemplateParameterList *TList = + T.get<VarTemplatePartialSpecializationDecl *>() + ->getTemplateParameters(); + return CollectTemplateParams(TList, TA, Unit); + } + + // Collect parameters for an explicit specialization + if (T.is<VarTemplateDecl *>()) { + const TemplateParameterList *TList = T.get<VarTemplateDecl *>() + ->getTemplateParameters(); + return CollectTemplateParams(TList, TA, Unit); + } + } + return llvm::DINodeArray(); +} + llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams( const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) { // Always get the full list of parameters, not just the ones from @@ -1931,8 +1995,17 @@ static bool isDefinedInClangModule(const RecordDecl *RD) { if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) { if (!CXXDecl->isCompleteDefinition()) return false; + // Check wether RD is a template. auto TemplateKind = CXXDecl->getTemplateSpecializationKind(); if (TemplateKind != TSK_Undeclared) { + // Unfortunately getOwningModule() isn't accurate enough to find the + // owning module of a ClassTemplateSpecializationDecl that is inside a + // namespace spanning multiple modules. + bool Explicit = false; + if (auto *TD = dyn_cast<ClassTemplateSpecializationDecl>(CXXDecl)) + Explicit = TD->isExplicitInstantiationOrSpecialization(); + if (!Explicit && CXXDecl->getEnclosingNamespaceContext()) + return false; // This is a template, check the origin of the first member. if (CXXDecl->field_begin() == CXXDecl->field_end()) return TemplateKind == TSK_ExplicitInstantiationDeclaration; @@ -2480,9 +2553,9 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) { Count = CAT->getSize().getZExtValue(); else if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) { if (Expr *Size = VAT->getSizeExpr()) { - llvm::APSInt V; - if (Size->EvaluateAsInt(V, CGM.getContext())) - Count = V.getExtValue(); + Expr::EvalResult Result; + if (Size->EvaluateAsInt(Result, CGM.getContext())) + Count = Result.Val.getInt().getExtValue(); } } @@ -2548,9 +2621,9 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, const FunctionProtoType *FPT = Ty->getPointeeType()->getAs<FunctionProtoType>(); return DBuilder.createMemberPointerType( - getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType( - Ty->getClass(), FPT->getTypeQuals())), - FPT, U), + getOrCreateInstanceMethodType( + CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()), + FPT, U), ClassType, Size, /*Align=*/0, Flags); } @@ -3070,6 +3143,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, unsigned &LineNo, QualType &T, StringRef &Name, StringRef &LinkageName, + llvm::MDTuple *&TemplateParameters, llvm::DIScope *&VDContext) { Unit = getOrCreateFile(VD->getLocation()); LineNo = getLineNumber(VD->getLocation()); @@ -3093,6 +3167,13 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, if (LinkageName == Name) LinkageName = StringRef(); + if (isa<VarTemplateSpecializationDecl>(VD)) { + llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VD, &*Unit); + TemplateParameters = parameterNodes.get(); + } else { + TemplateParameters = nullptr; + } + // Since we emit declarations (DW_AT_members) for static members, place the // definition of those static members in the namespace they were declared in // in the source code (the lexical decl context). @@ -3119,6 +3200,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, llvm::DINodeArray TParamsArray; StringRef Name, LinkageName; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; + llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero; SourceLocation Loc = GD.getDecl()->getLocation(); llvm::DIFile *Unit = getOrCreateFile(Loc); llvm::DIScope *DContext = Unit; @@ -3135,20 +3217,23 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv(); QualType FnType = CGM.getContext().getFunctionType( FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); + if (!FD->isExternallyVisible()) + SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit; + if (CGM.getLangOpts().Optimize) + SPFlags |= llvm::DISubprogram::SPFlagOptimized; + if (Stub) { + Flags |= getCallSiteRelatedAttrs(); + SPFlags |= llvm::DISubprogram::SPFlagDefinition; return DBuilder.createFunction( DContext, Name, LinkageName, Unit, Line, - getOrCreateFunctionType(GD.getDecl(), FnType, Unit), - !FD->isExternallyVisible(), - /* isDefinition = */ true, 0, Flags, CGM.getLangOpts().Optimize, + getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags, TParamsArray.get(), getFunctionDeclaration(FD)); } llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl( DContext, Name, LinkageName, Unit, Line, - getOrCreateFunctionType(GD.getDecl(), FnType, Unit), - !FD->isExternallyVisible(), - /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize, + getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags, TParamsArray.get(), getFunctionDeclaration(FD)); const FunctionDecl *CanonDecl = FD->getCanonicalDecl(); FwdDeclReplaceMap.emplace_back(std::piecewise_construct, @@ -3173,12 +3258,14 @@ CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) { llvm::DIFile *Unit = getOrCreateFile(Loc); llvm::DIScope *DContext = Unit; unsigned Line = getLineNumber(Loc); + llvm::MDTuple *TemplateParameters = nullptr; - collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, DContext); + collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, TemplateParameters, + DContext); auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); auto *GV = DBuilder.createTempGlobalVariableFwdDecl( DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit), - !VD->isExternallyVisible(), nullptr, Align); + !VD->isExternallyVisible(), nullptr, TemplateParameters, Align); FwdDeclReplaceMap.emplace_back( std::piecewise_construct, std::make_tuple(cast<VarDecl>(VD->getCanonicalDecl())), @@ -3334,6 +3421,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, bool HasDecl = (D != nullptr); llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; + llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero; llvm::DIFile *Unit = getOrCreateFile(Loc); llvm::DIScope *FDContext = Unit; llvm::DINodeArray TParamsArray; @@ -3373,6 +3461,15 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, if (CurFuncIsThunk) Flags |= llvm::DINode::FlagThunk; + if (Fn->hasLocalLinkage()) + SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit; + if (CGM.getLangOpts().Optimize) + SPFlags |= llvm::DISubprogram::SPFlagOptimized; + + llvm::DINode::DIFlags FlagsForDef = Flags | getCallSiteRelatedAttrs(); + llvm::DISubprogram::DISPFlags SPFlagsForDef = + SPFlags | llvm::DISubprogram::SPFlagDefinition; + unsigned LineNo = getLineNumber(Loc); unsigned ScopeLine = getLineNumber(ScopeLoc); @@ -3383,9 +3480,8 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, // are emitted as CU level entities by the backend. llvm::DISubprogram *SP = DBuilder.createFunction( FDContext, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(), - true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, - TParamsArray.get(), getFunctionDeclaration(D)); + getOrCreateFunctionType(D, FnType, Unit), ScopeLine, FlagsForDef, + SPFlagsForDef, TParamsArray.get(), getFunctionDeclaration(D)); Fn->setSubprogram(SP); // We might get here with a VarDecl in the case we're generating // code for the initialization of globals. Do not record these decls @@ -3405,8 +3501,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, cast<llvm::DICompositeType>(It->second); llvm::DISubprogram *FD = DBuilder.createFunction( InterfaceDecl, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(), - false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, + getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags, TParamsArray.get()); DBuilder.finalizeSubprogram(FD); ObjCMethodCache[ID].push_back(FD); @@ -3455,11 +3550,13 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, } unsigned LineNo = getLineNumber(Loc); unsigned ScopeLine = 0; + llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero; + if (CGM.getLangOpts().Optimize) + SPFlags |= llvm::DISubprogram::SPFlagOptimized; DBuilder.retainType(DBuilder.createFunction( FDContext, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), false /*internalLinkage*/, - false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, + getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags, TParamsArray.get(), getFunctionDeclaration(D))); } @@ -3488,7 +3585,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { // Update our current location setLocation(Loc); - if (CurLoc.isInvalid() || CurLoc.isMacroID()) + if (CurLoc.isInvalid() || CurLoc.isMacroID() || LexicalBlockStack.empty()) return; llvm::MDNode *Scope = LexicalBlockStack.back(); @@ -4089,7 +4186,9 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, unsigned LineNo; StringRef DeclName, LinkageName; QualType T; - collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName, DContext); + llvm::MDTuple *TemplateParameters = nullptr; + collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName, + TemplateParameters, DContext); // Attempt to store one global variable for the declaration - even if we // emit a lot of fields. @@ -4115,7 +4214,8 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit), Var->hasLocalLinkage(), Expr.empty() ? nullptr : DBuilder.createExpression(Expr), - getOrCreateStaticDataMemberDeclarationOrNull(D), Align); + getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters, + Align); Var->addDebugInfo(GVE); } DeclCache[D->getCanonicalDecl()].reset(GVE); @@ -4172,10 +4272,19 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { InitExpr = DBuilder.createConstantValueExpression( Init.getFloat().bitcastToAPInt().getZExtValue()); } + + llvm::MDTuple *TemplateParameters = nullptr; + + if (isa<VarTemplateSpecializationDecl>(VD)) + if (VarD) { + llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VarD, &*Unit); + TemplateParameters = parameterNodes.get(); + } + GV.reset(DBuilder.createGlobalVariableExpression( DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty, true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD), - Align)); + TemplateParameters, Align)); } llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) { @@ -4364,7 +4473,7 @@ void CGDebugInfo::EmitExplicitCastType(QualType Ty) { if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) return; - if (auto *DieTy = getOrCreateType(Ty, getOrCreateMainFile())) + if (auto *DieTy = getOrCreateType(Ty, TheCU->getFile())) // Don't ignore in case of explicit cast where it is referenced indirectly. DBuilder.retainType(DieTy); } @@ -4376,3 +4485,22 @@ llvm::DebugLoc CGDebugInfo::SourceLocToDebugLoc(SourceLocation Loc) { llvm::MDNode *Scope = LexicalBlockStack.back(); return llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), Scope); } + +llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const { + // Call site-related attributes are only useful in optimized programs, and + // when there's a possibility of debugging backtraces. + if (!CGM.getLangOpts().Optimize || DebugKind == codegenoptions::NoDebugInfo || + DebugKind == codegenoptions::LocTrackingOnly) + return llvm::DINode::FlagZero; + + // Call site-related attributes are available in DWARF v5. Some debuggers, + // while not fully DWARF v5-compliant, may accept these attributes as if they + // were part of DWARF v4. + bool SupportsDWARFv4Ext = + CGM.getCodeGenOpts().DwarfVersion == 4 && + CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB; + if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5) + return llvm::DINode::FlagZero; + + return llvm::DINode::FlagAllCallsDescribed; +} diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index 8641c2d896..031e40b9dd 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -20,8 +20,8 @@ #include "clang/AST/ExternalASTSource.h" #include "clang/AST/Type.h" #include "clang/AST/TypeOrdering.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/SourceLocation.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" @@ -76,6 +76,9 @@ class CGDebugInfo { llvm::DIType *OCLQueueDITy = nullptr; llvm::DIType *OCLNDRangeDITy = nullptr; llvm::DIType *OCLReserveIDDITy = nullptr; +#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + llvm::DIType *Id##Ty = nullptr; +#include "clang/Basic/OpenCLExtensionTypes.def" /// Cache of previously constructed Types. llvm::DenseMap<const void *, llvm::TrackingMDRef> TypeCache; @@ -248,6 +251,11 @@ class CGDebugInfo { llvm::DINodeArray CollectFunctionTemplateParams(const FunctionDecl *FD, llvm::DIFile *Unit); + /// A helper function to collect debug info for function template + /// parameters. + llvm::DINodeArray CollectVarTemplateParams(const VarDecl *VD, + llvm::DIFile *Unit); + /// A helper function to collect debug info for template /// parameters. llvm::DINodeArray @@ -333,6 +341,9 @@ public: void finalize(); + /// Remap a given path with the current debug prefix map + std::string remapDIPath(StringRef) const; + /// Register VLA size expression debug node with the qualified type. void registerVLASizeExpression(QualType Ty, llvm::Metadata *SizeExpr) { SizeExprCache[Ty] = SizeExpr; @@ -520,9 +531,6 @@ private: /// Create new compile unit. void CreateCompileUnit(); - /// Remap a given path with the current debug prefix map - std::string remapDIPath(StringRef) const; - /// Compute the file checksum debug info for input file ID. Optional<llvm::DIFile::ChecksumKind> computeChecksum(FileID FID, SmallString<32> &Checksum) const; @@ -530,11 +538,15 @@ private: /// Get the source of the given file ID. Optional<StringRef> getSource(const SourceManager &SM, FileID FID); - /// Get the file debug info descriptor for the input location. + /// Convenience function to get the file debug info descriptor for the input + /// location. llvm::DIFile *getOrCreateFile(SourceLocation Loc); - /// Get the file info for main compile unit. - llvm::DIFile *getOrCreateMainFile(); + /// Create a file debug info descriptor for a source file. + llvm::DIFile * + createFile(StringRef FileName, + Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo, + Optional<StringRef> Source); /// Get the type from the cache or create a new type if necessary. llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg); @@ -603,6 +615,11 @@ private: unsigned LineNo, StringRef LinkageName, llvm::GlobalVariable *Var, llvm::DIScope *DContext); + + /// Return flags which enable debug info emission for call sites, provided + /// that it is supported and enabled. + llvm::DINode::DIFlags getCallSiteRelatedAttrs() const; + /// Get the printing policy for producing names for debug info. PrintingPolicy getPrintingPolicy() const; @@ -645,7 +662,9 @@ private: /// Collect various properties of a VarDecl. void collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, unsigned &LineNo, QualType &T, StringRef &Name, - StringRef &LinkageName, llvm::DIScope *&VDContext); + StringRef &LinkageName, + llvm::MDTuple *&TemplateParameters, + llvm::DIScope *&VDContext); /// Allocate a copy of \p A using the DebugInfoNames allocator /// and return a reference to it. If multiple arguments are given the strings @@ -725,7 +744,7 @@ public: /// function \p InlinedFn. The current debug location becomes the inlined call /// site of the inlined function. ApplyInlineDebugLocation(CodeGenFunction &CGF, GlobalDecl InlinedFn); - /// Restore everything back to the orginial state. + /// Restore everything back to the original state. ~ApplyInlineDebugLocation(); }; diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index e74066ef43..f4fef45a12 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -26,10 +26,10 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" @@ -754,9 +754,9 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, // If we're emitting a value with lifetime, we have to do the // initialization *before* we leave the cleanup scopes. - if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) { - enterFullExpression(ewc); - init = ewc->getSubExpr(); + if (const FullExpr *fe = dyn_cast<FullExpr>(init)) { + enterFullExpression(fe); + init = fe->getSubExpr(); } CodeGenFunction::RunCleanupsScope Scope(*this); @@ -963,6 +963,49 @@ static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init, return llvm::isBytewiseValue(Init); } +static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D, + CGBuilderTy &Builder, + llvm::Constant *Constant, + CharUnits Align) { + auto FunctionName = [&](const DeclContext *DC) -> std::string { + if (const auto *FD = dyn_cast<FunctionDecl>(DC)) { + if (const auto *CC = dyn_cast<CXXConstructorDecl>(FD)) + return CC->getNameAsString(); + if (const auto *CD = dyn_cast<CXXDestructorDecl>(FD)) + return CD->getNameAsString(); + return CGM.getMangledName(FD); + } else if (const auto *OM = dyn_cast<ObjCMethodDecl>(DC)) { + return OM->getNameAsString(); + } else if (isa<BlockDecl>(DC)) { + return "<block>"; + } else if (isa<CapturedDecl>(DC)) { + return "<captured>"; + } else { + llvm::llvm_unreachable_internal("expected a function or method"); + } + }; + + auto *Ty = Constant->getType(); + bool isConstant = true; + llvm::GlobalVariable *InsertBefore = nullptr; + unsigned AS = CGM.getContext().getTargetAddressSpace( + CGM.getStringLiteralAddressSpace()); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + CGM.getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage, + Constant, + "__const." + FunctionName(D.getParentFunctionOrMethod()) + "." + + D.getName(), + InsertBefore, llvm::GlobalValue::NotThreadLocal, AS); + GV->setAlignment(Align.getQuantity()); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + + Address SrcPtr = Address(GV, Align); + llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), AS); + if (SrcPtr.getType() != BP) + SrcPtr = Builder.CreateBitCast(SrcPtr, BP); + return SrcPtr; +} + static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder, @@ -1002,25 +1045,10 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, return; } - // Otherwise, create a temporary global with the initializer then memcpy from - // the global to the alloca. - std::string Name = getStaticDeclName(CGM, D); - unsigned AS = CGM.getContext().getTargetAddressSpace( - CGM.getStringLiteralAddressSpace()); - llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), AS); - - llvm::GlobalVariable *GV = new llvm::GlobalVariable( - CGM.getModule(), constant->getType(), true, - llvm::GlobalValue::PrivateLinkage, constant, Name, nullptr, - llvm::GlobalValue::NotThreadLocal, AS); - GV->setAlignment(Loc.getAlignment().getQuantity()); - GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - Address SrcPtr = Address(GV, Loc.getAlignment()); - if (SrcPtr.getType() != BP) - SrcPtr = Builder.CreateBitCast(SrcPtr, BP); - - Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile); + Builder.CreateMemCpy( + Loc, + createUnnamedGlobalFrom(CGM, D, Builder, constant, Loc.getAlignment()), + SizeVal, isVolatile); } /// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a @@ -1066,6 +1094,7 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( // For each dimension stores its QualType and corresponding // size-expression Value. SmallVector<CodeGenFunction::VlaSizePair, 4> Dimensions; + SmallVector<IdentifierInfo *, 4> VLAExprNames; // Break down the array into individual dimensions. QualType Type1D = D.getType(); @@ -1074,8 +1103,14 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts)) Dimensions.emplace_back(C, Type1D.getUnqualifiedType()); else { - auto SizeExprAddr = CreateDefaultAlignTempAlloca( - VlaSize.NumElts->getType(), "__vla_expr"); + // Generate a locally unique name for the size expression. + Twine Name = Twine("__vla_expr") + Twine(VLAExprCounter++); + SmallString<12> Buffer; + StringRef NameRef = Name.toStringRef(Buffer); + auto &Ident = getContext().Idents.getOwn(NameRef); + VLAExprNames.push_back(&Ident); + auto SizeExprAddr = + CreateDefaultAlignTempAlloca(VlaSize.NumElts->getType(), NameRef); Builder.CreateStore(VlaSize.NumElts, SizeExprAddr); Dimensions.emplace_back(SizeExprAddr.getPointer(), Type1D.getUnqualifiedType()); @@ -1089,20 +1124,20 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( // Register each dimension's size-expression with a DILocalVariable, // so that it can be used by CGDebugInfo when instantiating a DISubrange // to describe this array. + unsigned NameIdx = 0; for (auto &VlaSize : Dimensions) { llvm::Metadata *MD; if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts)) MD = llvm::ConstantAsMetadata::get(C); else { // Create an artificial VarDecl to generate debug info for. - IdentifierInfo &NameIdent = getContext().Idents.getOwn( - cast<llvm::AllocaInst>(VlaSize.NumElts)->getName()); + IdentifierInfo *NameIdent = VLAExprNames[NameIdx++]; auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType(); auto QT = getContext().getIntTypeForBitwidth( VlaExprTy->getScalarSizeInBits(), false); auto *ArtificialDecl = VarDecl::Create( getContext(), const_cast<DeclContext *>(D.getDeclContext()), - D.getLocation(), D.getLocation(), &NameIdent, QT, + D.getLocation(), D.getLocation(), NameIdent, QT, getContext().CreateTypeSourceInfo(QT), SC_Auto); ArtificialDecl->setImplicit(); @@ -1125,8 +1160,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { AutoVarEmission emission(D); - bool isByRef = D.hasAttr<BlocksAttr>(); - emission.IsByRef = isByRef; + bool isEscapingByRef = D.isEscapingByref(); + emission.IsEscapingByRef = isEscapingByRef; CharUnits alignment = getContext().getDeclAlign(&D); @@ -1165,8 +1200,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // in OpenCL. if ((!getLangOpts().OpenCL || Ty.getAddressSpace() == LangAS::opencl_constant) && - (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef && - CGM.isTypeConstant(Ty, true))) { + (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && + !isEscapingByRef && CGM.isTypeConstant(Ty, true))) { EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage); // Signal this condition to later callbacks. @@ -1218,7 +1253,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { } else { CharUnits allocaAlignment; llvm::Type *allocaTy; - if (isByRef) { + if (isEscapingByRef) { auto &byrefInfo = getBlockByrefInfo(&D); allocaTy = byrefInfo.Type; allocaAlignment = byrefInfo.ByrefAlignment; @@ -1418,7 +1453,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { } // Initialize the structure of a __block variable. - if (emission.IsByRef) + if (emission.IsEscapingByRef) emitByrefStructureInit(emission); // Initialize the variable here if it doesn't have a initializer and it is a @@ -1428,7 +1463,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { type.isNonTrivialToPrimitiveDefaultInitialize() == QualType::PDIK_Struct) { LValue Dst = MakeAddrLValue(emission.getAllocatedAddress(), type); - if (emission.IsByRef) + if (emission.IsEscapingByRef) drillIntoBlockVariable(*this, Dst, &D); defaultInitNonTrivialCStructVar(Dst); return; @@ -1440,7 +1475,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { // Check whether this is a byref variable that's potentially // captured and moved by its own initializer. If so, we'll need to // emit the initializer first, then copy into the variable. - bool capturedByInit = emission.IsByRef && isCapturedBy(D, Init); + bool capturedByInit = emission.IsEscapingByRef && isCapturedBy(D, Init); Address Loc = capturedByInit ? emission.Addr : emission.getObjectAddress(*this); @@ -1634,7 +1669,8 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { // If this is a block variable, call _Block_object_destroy // (on the unforwarded address). Don't enter this cleanup if we're in pure-GC // mode. - if (emission.IsByRef && CGM.getLangOpts().getGC() != LangOptions::GCOnly) { + if (emission.IsEscapingByRef && + CGM.getLangOpts().getGC() != LangOptions::GCOnly) { BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF; if (emission.Variable->getType().isObjCGCWeak()) Flags |= BLOCK_FIELD_IS_WEAK; @@ -2149,5 +2185,5 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, } void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { - //Do nothing - here to avoid build errors + getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D); } diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index 3f96fea608..9aa31f181e 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -15,7 +15,7 @@ #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" @@ -26,7 +26,10 @@ using namespace CodeGen; static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, ConstantAddress DeclPtr) { - assert(D.hasGlobalStorage() && "VarDecl must have global storage!"); + assert( + (D.hasGlobalStorage() || + (D.hasLocalStorage() && CGF.getContext().getLangOpts().OpenCLCPlusPlus)) && + "VarDecl must have global or local (in the case of OpenCL) storage!"); assert(!D.getType()->isReferenceType() && "Should not call EmitDeclInit on a reference!"); @@ -63,15 +66,24 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, /// Emit code to cause the destruction of the given variable with /// static storage duration. static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, - ConstantAddress addr) { + ConstantAddress Addr) { + // Honor __attribute__((no_destroy)) and bail instead of attempting + // to emit a reference to a possibly nonexistent destructor, which + // in turn can cause a crash. This will result in a global constructor + // that isn't balanced out by a destructor call as intended by the + // attribute. This also checks for -fno-c++-static-destructors and + // bails even if the attribute is not present. + if (D.isNoDestroy(CGF.getContext())) + return; + CodeGenModule &CGM = CGF.CGM; // FIXME: __attribute__((cleanup)) ? - QualType type = D.getType(); - QualType::DestructionKind dtorKind = type.isDestructedType(); + QualType Type = D.getType(); + QualType::DestructionKind DtorKind = Type.isDestructedType(); - switch (dtorKind) { + switch (DtorKind) { case QualType::DK_none: return; @@ -86,13 +98,14 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, return; } - llvm::Constant *function; - llvm::Constant *argument; + llvm::Constant *Func; + llvm::Constant *Argument; // Special-case non-array C++ destructors, if they have the right signature. // Under some ABIs, destructors return this instead of void, and cannot be - // passed directly to __cxa_atexit if the target does not allow this mismatch. - const CXXRecordDecl *Record = type->getAsCXXRecordDecl(); + // passed directly to __cxa_atexit if the target does not allow this + // mismatch. + const CXXRecordDecl *Record = Type->getAsCXXRecordDecl(); bool CanRegisterDestructor = Record && (!CGM.getCXXABI().HasThisReturn( GlobalDecl(Record->getDestructor(), Dtor_Complete)) || @@ -103,43 +116,47 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, bool UsingExternalHelper = !CGM.getCodeGenOpts().CXAAtExit; if (Record && (CanRegisterDestructor || UsingExternalHelper)) { assert(!Record->hasTrivialDestructor()); - CXXDestructorDecl *dtor = Record->getDestructor(); + CXXDestructorDecl *Dtor = Record->getDestructor(); - function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete); - argument = llvm::ConstantExpr::getBitCast( - addr.getPointer(), CGF.getTypes().ConvertType(type)->getPointerTo()); + Func = CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete); + Argument = llvm::ConstantExpr::getBitCast( + Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo()); // Otherwise, the standard logic requires a helper function. } else { - function = CodeGenFunction(CGM) - .generateDestroyHelper(addr, type, CGF.getDestroyer(dtorKind), - CGF.needsEHCleanup(dtorKind), &D); - argument = llvm::Constant::getNullValue(CGF.Int8PtrTy); + Func = CodeGenFunction(CGM) + .generateDestroyHelper(Addr, Type, CGF.getDestroyer(DtorKind), + CGF.needsEHCleanup(DtorKind), &D); + Argument = llvm::Constant::getNullValue(CGF.Int8PtrTy); } - CGM.getCXXABI().registerGlobalDtor(CGF, D, function, argument); + CGM.getCXXABI().registerGlobalDtor(CGF, D, Func, Argument); } /// Emit code to cause the variable at the given address to be considered as /// constant from this point onwards. static void EmitDeclInvariant(CodeGenFunction &CGF, const VarDecl &D, llvm::Constant *Addr) { + return CGF.EmitInvariantStart( + Addr, CGF.getContext().getTypeSizeInChars(D.getType())); +} + +void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) { // Do not emit the intrinsic if we're not optimizing. - if (!CGF.CGM.getCodeGenOpts().OptimizationLevel) + if (!CGM.getCodeGenOpts().OptimizationLevel) return; // Grab the llvm.invariant.start intrinsic. llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start; // Overloaded address space type. - llvm::Type *ObjectPtr[1] = {CGF.Int8PtrTy}; - llvm::Constant *InvariantStart = CGF.CGM.getIntrinsic(InvStartID, ObjectPtr); + llvm::Type *ObjectPtr[1] = {Int8PtrTy}; + llvm::Constant *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); // Emit a call with the size in bytes of the object. - CharUnits WidthChars = CGF.getContext().getTypeSizeInChars(D.getType()); - uint64_t Width = WidthChars.getQuantity(); - llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(CGF.Int64Ty, Width), - llvm::ConstantExpr::getBitCast(Addr, CGF.Int8PtrTy)}; - CGF.Builder.CreateCall(InvariantStart, Args); + uint64_t Width = Size.getQuantity(); + llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width), + llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)}; + Builder.CreateCall(InvariantStart, Args); } void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, @@ -360,11 +377,21 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( Fn->addFnAttr(llvm::Attribute::ShadowCallStack); auto RASignKind = getCodeGenOpts().getSignReturnAddress(); - if (RASignKind != CodeGenOptions::SignReturnAddressScope::None) + if (RASignKind != CodeGenOptions::SignReturnAddressScope::None) { Fn->addFnAttr("sign-return-address", RASignKind == CodeGenOptions::SignReturnAddressScope::All ? "all" : "non-leaf"); + auto RASignKey = getCodeGenOpts().getSignReturnAddressKey(); + Fn->addFnAttr("sign-return-address-key", + RASignKey == CodeGenOptions::SignReturnAddressKeyValue::AKey + ? "a_key" + : "b_key"); + } + + if (getCodeGenOpts().BranchTargetEnforcement) + Fn->addFnAttr("branch-target-enforcement"); + return Fn; } @@ -597,7 +624,7 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, void CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, ArrayRef<llvm::Function *> Decls, - Address Guard) { + ConstantAddress Guard) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, @@ -621,6 +648,12 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, // initializers use previously-initialized thread_local vars, that's // probably supposed to be OK, but the standard doesn't say. Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard); + + // The guard variable can't ever change again. + EmitInvariantStart( + Guard.getPointer(), + CharUnits::fromQuantity( + CGM.getDataLayout().getTypeAllocSize(GuardVal->getType()))); } RunCleanupsScope Scope(*this); diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 4ee835259a..f1298d1201 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -66,7 +66,7 @@ llvm::Constant *CodeGenModule::getTerminateFn() { name = "__std_terminate"; else name = "?terminate@@YAXXZ"; - } else if (getLangOpts().ObjC1 && + } else if (getLangOpts().ObjC && getLangOpts().ObjCRuntime.hasTerminate()) name = "objc_terminate"; else @@ -224,7 +224,7 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM, if (FD && FD->usesSEHTry()) return getSEHPersonalityMSVC(T); - if (L.ObjC1) + if (L.ObjC) return L.CPlusPlus ? getObjCXXPersonality(Target, L) : getObjCPersonality(Target, L); return L.CPlusPlus ? getCXXPersonality(Target, L) @@ -250,7 +250,11 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { llvm::Constant *Fn = getPersonalityFn(CGM, Personality); - return llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); + llvm::PointerType* Int8PtrTy = llvm::PointerType::get( + llvm::Type::getInt8Ty(CGM.getLLVMContext()), + CGM.getDataLayout().getProgramAddressSpace()); + + return llvm::ConstantExpr::getBitCast(Fn, Int8PtrTy); } /// Check whether a landingpad instruction only uses C++ features. @@ -315,7 +319,7 @@ static bool PersonalityHasOnlyCXXUses(llvm::Constant *Fn) { /// when it really needs it. void CodeGenModule::SimplifyPersonality() { // If we're not in ObjC++ -fexceptions, there's nothing to do. - if (!LangOpts.CPlusPlus || !LangOpts.ObjC1 || !LangOpts.Exceptions) + if (!LangOpts.CPlusPlus || !LangOpts.ObjC || !LangOpts.Exceptions) return; // Both the problem this endeavors to fix and the way the logic @@ -1248,7 +1252,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { // we follow the false destination for each of the cond branches to reach // the rethrow block. llvm::BasicBlock *RethrowBlock = WasmCatchStartBlock; - while (llvm::TerminatorInst *TI = RethrowBlock->getTerminator()) { + while (llvm::Instruction *TI = RethrowBlock->getTerminator()) { auto *BI = cast<llvm::BranchInst>(TI); assert(BI->isConditional()); RethrowBlock = BI->getSuccessor(1); @@ -1874,7 +1878,7 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, OutlinedStmt->getBeginLoc(), OutlinedStmt->getBeginLoc()); CurSEHParent = ParentCGF.CurSEHParent; - CGM.SetLLVMFunctionAttributes(nullptr, FnInfo, CurFn); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, CurFn); EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter); } diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index b74937e9ca..6ef1091cc0 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -26,7 +26,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/NSAPI.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" @@ -1260,6 +1260,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { return EmitVAArgExprLValue(cast<VAArgExpr>(E)); case Expr::DeclRefExprClass: return EmitDeclRefLValue(cast<DeclRefExpr>(E)); + case Expr::ConstantExprClass: + return EmitLValue(cast<ConstantExpr>(E)->getSubExpr()); case Expr::ParenExprClass: return EmitLValue(cast<ParenExpr>(E)->getSubExpr()); case Expr::GenericSelectionExprClass: @@ -1491,6 +1493,16 @@ CodeGenFunction::tryEmitAsConstant(const MemberExpr *ME) { return ConstantEmission(); } +llvm::Value *CodeGenFunction::emitScalarConstant( + const CodeGenFunction::ConstantEmission &Constant, Expr *E) { + assert(Constant && "not a constant"); + if (Constant.isReference()) + return EmitLoadOfLValue(Constant.getReferenceLValue(*this, E), + E->getExprLoc()) + .getScalarVal(); + return Constant.getValue(); +} + llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue, SourceLocation Loc) { return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), @@ -2486,7 +2498,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { } assert(isa<BlockDecl>(CurCodeDecl)); - Address addr = GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>()); + Address addr = GetAddrOfBlockDecl(VD); return MakeAddrLValue(addr, T, AlignmentSource::Decl); } } @@ -2538,7 +2550,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { } // Drill into block byref variables. - bool isBlockByref = VD->hasAttr<BlocksAttr>(); + bool isBlockByref = VD->isEscapingByref(); if (isBlockByref) { addr = emitBlockByrefAddress(addr, VD); } @@ -2601,7 +2613,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { // of a pointer to object; as in void foo (__weak id *param); *param = 0; // But, we continue to generate __strong write barrier on indirect write // into a pointer to object. - if (getLangOpts().ObjC1 && + if (getLangOpts().ObjC && getLangOpts().getGC() != LangOptions::NonGC && LV.isObjCWeak()) LV.setNonGC(!E->isOBJCGCCandidate(getContext())); @@ -2662,7 +2674,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { if (FnName.startswith("\01")) FnName = FnName.substr(1); StringRef NameItems[] = { - PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName}; + PredefinedExpr::getIdentKindName(E->getIdentKind()), FnName}; std::string GVName = llvm::join(NameItems, NameItems + 2, "."); if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) { std::string Name = SL->getString(); @@ -2867,6 +2879,11 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, CheckRecoverableKind RecoverKind, bool IsFatal, llvm::BasicBlock *ContBB) { assert(IsFatal || RecoverKind != CheckRecoverableKind::Unrecoverable); + Optional<ApplyDebugLocation> DL; + if (!CGF.Builder.getCurrentDebugLocation()) { + // Ensure that the call has at least an artificial debug location. + DL.emplace(CGF, SourceLocation()); + } bool NeedsAbortSuffix = IsFatal && RecoverKind != CheckRecoverableKind::Unrecoverable; bool MinimalRuntime = CGF.CGM.getCodeGenOpts().SanitizeMinimalRuntime; @@ -3478,7 +3495,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo); - if (getLangOpts().ObjC1 && + if (getLangOpts().ObjC && getLangOpts().getGC() != LangOptions::NonGC) { LV.setNonGC(!E->isOBJCGCCandidate(getContext())); setObjCGCLValueClass(getContext(), E, LV); @@ -3931,7 +3948,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo); if (RecordCVR & Qualifiers::Volatile) - RefLVal.getQuals().setVolatile(true); + RefLVal.getQuals().addVolatile(); addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo); // Qualifiers on the struct don't apply to the referencee. @@ -4151,8 +4168,9 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_ARCReclaimReturnedObject: case CK_ARCExtendBlockObject: case CK_CopyAndAutoreleaseBlockObject: - case CK_AddressSpaceConversion: case CK_IntToOCLSampler: + case CK_FixedPointCast: + case CK_FixedPointToBoolean: return EmitUnsupportedLValue(E, "unexpected cast lvalue"); case CK_Dependent: @@ -4246,6 +4264,15 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), CGM.getTBAAInfoForSubobject(LV, E->getType())); } + case CK_AddressSpaceConversion: { + LValue LV = EmitLValue(E->getSubExpr()); + QualType DestTy = getContext().getPointerType(E->getType()); + llvm::Value *V = getTargetHooks().performAddrSpaceCast( + *this, LV.getPointer(), E->getSubExpr()->getType().getAddressSpace(), + E->getType().getAddressSpace(), ConvertType(DestTy)); + return MakeAddrLValue(Address(V, LV.getAddress().getAlignment()), + E->getType(), LV.getBaseInfo(), LV.getTBAAInfo()); + } case CK_ObjCObjectLValueCast: { LValue LV = EmitLValue(E->getSubExpr()); Address V = Builder.CreateElementBitCast(LV.getAddress(), @@ -4253,10 +4280,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), CGM.getTBAAInfoForSubobject(LV, E->getType())); } - case CK_ZeroToOCLQueue: - llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid"); - case CK_ZeroToOCLEvent: - llvm_unreachable("NULL to OpenCL event lvalue cast is not valid"); + case CK_ZeroToOCLOpaqueType: + llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid"); } llvm_unreachable("Unhandled lvalue cast kind?"); @@ -4363,7 +4388,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, const FunctionDecl *FD) { } llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, FD); - return CGCallee::forDirect(calleePtr, FD); + return CGCallee::forDirect(calleePtr, GlobalDecl(FD)); } CGCallee CodeGenFunction::EmitCallee(const Expr *E) { @@ -4407,8 +4432,13 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) { calleePtr = EmitLValue(E).getPointer(); } assert(functionType->isFunctionType()); - CGCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(), - E->getReferencedDeclOfCallee()); + + GlobalDecl GD; + if (const auto *VD = + dyn_cast_or_null<VarDecl>(E->getReferencedDeclOfCallee())) + GD = GlobalDecl(VD); + + CGCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(), GD); CGCallee callee(calleeInfo, calleePtr); return callee; } @@ -4593,7 +4623,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee assert(CalleeType->isFunctionPointerType() && "Call must have function pointer type!"); - const Decl *TargetDecl = OrigCallee.getAbstractInfo().getCalleeDecl(); + const Decl *TargetDecl = + OrigCallee.getAbstractInfo().getCalleeDecl().getDecl(); if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) // We can only guarantee that a function is called from the correct diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index 6264110286..db49b3f28a 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -125,6 +125,10 @@ public: return Visit(E->getReplacement()); } + void VisitConstantExpr(ConstantExpr *E) { + return Visit(E->getSubExpr()); + } + // l-values. void VisitDeclRefExpr(DeclRefExpr *E) { EmitAggLoadOfLValue(E); } void VisitMemberExpr(MemberExpr *ME) { EmitAggLoadOfLValue(ME); } @@ -847,10 +851,11 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { case CK_ARCExtendBlockObject: case CK_CopyAndAutoreleaseBlockObject: case CK_BuiltinFnToFnPtr: - case CK_ZeroToOCLEvent: - case CK_ZeroToOCLQueue: + case CK_ZeroToOCLOpaqueType: case CK_AddressSpaceConversion: case CK_IntToOCLSampler: + case CK_FixedPointCast: + case CK_FixedPointToBoolean: llvm_unreachable("cast kind invalid for aggregate types"); } } diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 393a4aa787..2e0d4ca767 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -17,8 +17,8 @@ #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "ConstantEmitter.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Intrinsics.h" @@ -177,7 +177,8 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE, if (MD->isStatic()) { // The method is static, emit it as we would a regular call. - CGCallee callee = CGCallee::forDirect(CGM.GetAddrOfFunction(MD), MD); + CGCallee callee = + CGCallee::forDirect(CGM.GetAddrOfFunction(MD), GlobalDecl(MD)); return EmitCall(getContext().getPointerType(MD->getType()), callee, CE, ReturnValue); } @@ -353,13 +354,13 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( else if (!DevirtualizedMethod) Callee = CGCallee::forDirect( CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete, FInfo, Ty), - Dtor); + GlobalDecl(Dtor, Dtor_Complete)); else { const CXXDestructorDecl *DDtor = cast<CXXDestructorDecl>(DevirtualizedMethod); Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty), - DDtor); + CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty), + GlobalDecl(DDtor, Dtor_Complete)); } EmitCXXMemberOrOperatorCall( CalleeDecl, Callee, ReturnValue, This.getPointer(), @@ -371,8 +372,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( CGCallee Callee; if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) { Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty), - Ctor); + CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty), + GlobalDecl(Ctor, Ctor_Complete)); } else if (UseVirtualCall) { Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty); } else { @@ -389,11 +390,12 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier) Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty); else if (!DevirtualizedMethod) - Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty), MD); + Callee = + CGCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty), GlobalDecl(MD)); else { - Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(DevirtualizedMethod, Ty), - DevirtualizedMethod); + Callee = + CGCallee::forDirect(CGM.GetAddrOfFunction(DevirtualizedMethod, Ty), + GlobalDecl(DevirtualizedMethod)); } } @@ -1293,7 +1295,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, const CallArgList &Args) { llvm::Instruction *CallOrInvoke; llvm::Constant *CalleePtr = CGF.CGM.GetAddrOfFunction(CalleeDecl); - CGCallee Callee = CGCallee::forDirect(CalleePtr, CalleeDecl); + CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl)); RValue RV = CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall( Args, CalleeType, /*chainCall=*/false), @@ -2252,7 +2254,6 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, } void CodeGenFunction::EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Slot) { - RunCleanupsScope Scope(*this); LValue SlotLV = MakeAddrLValue(Slot.getAddress(), E->getType()); CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin(); diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index fb176093a7..2db693b44c 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -101,6 +101,9 @@ public: llvm_unreachable("Stmt can't have complex result type!"); } ComplexPairTy VisitExpr(Expr *S); + ComplexPairTy VisitConstantExpr(ConstantExpr *E) { + return Visit(E->getSubExpr()); + } ComplexPairTy VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr());} ComplexPairTy VisitGenericSelectionExpr(GenericSelectionExpr *GE) { return Visit(GE->getResultExpr()); @@ -505,10 +508,11 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_ARCExtendBlockObject: case CK_CopyAndAutoreleaseBlockObject: case CK_BuiltinFnToFnPtr: - case CK_ZeroToOCLEvent: - case CK_ZeroToOCLQueue: + case CK_ZeroToOCLOpaqueType: case CK_AddressSpaceConversion: case CK_IntToOCLSampler: + case CK_FixedPointCast: + case CK_FixedPointToBoolean: llvm_unreachable("invalid cast kind for complex value"); case CK_FloatingRealToComplex: diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index 651b05a26f..c9475840ae 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -47,7 +47,7 @@ class ConstStructBuilder { public: static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, ConstExprEmitter *ExprEmitter, - llvm::ConstantStruct *Base, + llvm::Constant *Base, InitListExpr *Updater, QualType ValTy); static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, @@ -76,7 +76,7 @@ private: void ConvertStructToPacked(); bool Build(InitListExpr *ILE); - bool Build(ConstExprEmitter *Emitter, llvm::ConstantStruct *Base, + bool Build(ConstExprEmitter *Emitter, llvm::Constant *Base, InitListExpr *Updater); bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, const CXXRecordDecl *VTableClass, CharUnits BaseOffset); @@ -566,7 +566,7 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, ConstExprEmitter *ExprEmitter, - llvm::ConstantStruct *Base, + llvm::Constant *Base, InitListExpr *Updater, QualType ValTy) { ConstStructBuilder Builder(Emitter); @@ -723,6 +723,10 @@ public: return nullptr; } + llvm::Constant *VisitConstantExpr(ConstantExpr *CE, QualType T) { + return Visit(CE->getSubExpr(), T); + } + llvm::Constant *VisitParenExpr(ParenExpr *PE, QualType T) { return Visit(PE->getSubExpr(), T); } @@ -869,8 +873,9 @@ public: case CK_FloatingToIntegral: case CK_FloatingToBoolean: case CK_FloatingCast: - case CK_ZeroToOCLEvent: - case CK_ZeroToOCLQueue: + case CK_FixedPointCast: + case CK_FixedPointToBoolean: + case CK_ZeroToOCLOpaqueType: return nullptr; } llvm_unreachable("Invalid CastKind"); @@ -1026,8 +1031,8 @@ public: } if (destType->isRecordType()) - return ConstStructBuilder::BuildStruct(Emitter, this, - dyn_cast<llvm::ConstantStruct>(Base), Updater, destType); + return ConstStructBuilder::BuildStruct(Emitter, this, Base, Updater, + destType); return nullptr; } @@ -1102,7 +1107,7 @@ public: } // end anonymous namespace. bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter, - llvm::ConstantStruct *Base, + llvm::Constant *Base, InitListExpr *Updater) { assert(Base && "base expression should not be empty"); @@ -1110,7 +1115,7 @@ bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter, RecordDecl *RD = ExprType->getAs<RecordType>()->getDecl(); const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); const llvm::StructLayout *BaseLayout = CGM.getDataLayout().getStructLayout( - Base->getType()); + cast<llvm::StructType>(Base->getType())); unsigned FieldNo = -1; unsigned ElementNo = 0; @@ -1131,7 +1136,7 @@ bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter, if (Field->isUnnamedBitfield()) continue; - llvm::Constant *EltInit = Base->getOperand(ElementNo); + llvm::Constant *EltInit = Base->getAggregateElement(ElementNo); // Bail out if the type of the ConstantStruct does not have the same layout // as the type of the InitListExpr. @@ -1450,6 +1455,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) { if (CD->isTrivial() && CD->isDefaultConstructor()) return CGM.EmitNullConstant(D.getType()); } + InConstantContext = true; } QualType destType = D.getType(); @@ -1547,7 +1553,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E, if (destType->isReferenceType()) Success = E->EvaluateAsLValue(Result, CGM.getContext()); else - Success = E->EvaluateAsRValue(Result, CGM.getContext()); + Success = E->EvaluateAsRValue(Result, CGM.getContext(), InConstantContext); llvm::Constant *C; if (Success && !Result.HasSideEffects) @@ -1600,6 +1606,7 @@ private: ConstantLValue tryEmitBase(const APValue::LValueBase &base); ConstantLValue VisitStmt(const Stmt *S) { return nullptr; } + ConstantLValue VisitConstantExpr(const ConstantExpr *E); ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); ConstantLValue VisitStringLiteral(const StringLiteral *E); ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E); @@ -1755,6 +1762,11 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { } ConstantLValue +ConstantLValueEmitter::VisitConstantExpr(const ConstantExpr *E) { + return Visit(E->getSubExpr()); +} + +ConstantLValue ConstantLValueEmitter::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { return tryEmitGlobalCompoundLiteral(CGM, Emitter.CGF, E); } @@ -1782,7 +1794,7 @@ ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) { return cast<ConstantAddress>(Res.getAddress()); } - auto kind = E->getIdentType(); + auto kind = E->getIdentKind(); if (kind == PredefinedExpr::PrettyFunction) { return CGM.GetAddrOfConstantCString("top level", ".tmp"); } diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 4995db28ba..f53bb33e46 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" -#include "CGCleanup.h" #include "CGCXXABI.h" +#include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" @@ -23,8 +23,9 @@ #include "clang/AST/Expr.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" +#include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/FixedPoint.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/Optional.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -302,7 +303,11 @@ public: /// Known implicit conversion check kinds. /// Keep in sync with the enum of the same name in ubsan_handlers.h enum ImplicitConversionCheckKind : unsigned char { - ICCK_IntegerTruncation = 0, + ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7. + ICCK_UnsignedIntegerTruncation = 1, + ICCK_SignedIntegerTruncation = 2, + ICCK_IntegerSignChange = 3, + ICCK_SignedIntegerTruncationOrSignChange = 4, }; /// Emit a check that an [implicit] truncation of an integer does not @@ -310,21 +315,39 @@ public: void EmitIntegerTruncationCheck(Value *Src, QualType SrcType, Value *Dst, QualType DstType, SourceLocation Loc); + /// Emit a check that an [implicit] conversion of an integer does not change + /// the sign of the value. It is not UB, so we use the value after conversion. + /// NOTE: Src and Dst may be the exact same value! (point to the same thing) + void EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, SourceLocation Loc); + /// Emit a conversion from the specified type to the specified destination /// type, both of which are LLVM scalar types. struct ScalarConversionOpts { bool TreatBooleanAsSigned; bool EmitImplicitIntegerTruncationChecks; + bool EmitImplicitIntegerSignChangeChecks; ScalarConversionOpts() : TreatBooleanAsSigned(false), - EmitImplicitIntegerTruncationChecks(false) {} + EmitImplicitIntegerTruncationChecks(false), + EmitImplicitIntegerSignChangeChecks(false) {} + + ScalarConversionOpts(clang::SanitizerSet SanOpts) + : TreatBooleanAsSigned(false), + EmitImplicitIntegerTruncationChecks( + SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation)), + EmitImplicitIntegerSignChangeChecks( + SanOpts.has(SanitizerKind::ImplicitIntegerSignChange)) {} }; Value * EmitScalarConversion(Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc, ScalarConversionOpts Opts = ScalarConversionOpts()); + Value *EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy, + SourceLocation Loc); + /// Emit a conversion from the specified complex type to the specified /// destination type, where the destination type is an LLVM scalar type. Value *EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src, @@ -382,6 +405,9 @@ public: } Value *VisitExpr(Expr *S); + Value *VisitConstantExpr(ConstantExpr *E) { + return Visit(E->getSubExpr()); + } Value *VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr()); } @@ -450,19 +476,10 @@ public: return CGF.getOrCreateOpaqueRValueMapping(E).getScalarVal(); } - Value *emitConstant(const CodeGenFunction::ConstantEmission &Constant, - Expr *E) { - assert(Constant && "not a constant"); - if (Constant.isReference()) - return EmitLoadOfLValue(Constant.getReferenceLValue(CGF, E), - E->getExprLoc()); - return Constant.getValue(); - } - // l-values. Value *VisitDeclRefExpr(DeclRefExpr *E) { if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) - return emitConstant(Constant, E); + return CGF.emitScalarConstant(Constant, E); return EmitLoadOfLValue(E); } @@ -664,7 +681,7 @@ public: case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul"); - // Fall through. + LLVM_FALLTHROUGH; case LangOptions::SOB_Trapping: if (CanElideOverflowCheck(CGF.getContext(), Ops)) return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul"); @@ -941,48 +958,233 @@ void ScalarExprEmitter::EmitFloatConversionCheck( SanitizerHandler::FloatCastOverflow, StaticArgs, OrigSrc); } +// Should be called within CodeGenFunction::SanitizerScope RAII scope. +// Returns 'i1 false' when the truncation Src -> Dst was lossy. +static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind, + std::pair<llvm::Value *, SanitizerMask>> +EmitIntegerTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, CGBuilderTy &Builder) { + llvm::Type *SrcTy = Src->getType(); + llvm::Type *DstTy = Dst->getType(); + (void)DstTy; // Only used in assert() + + // This should be truncation of integral types. + assert(Src != Dst); + assert(SrcTy->getScalarSizeInBits() > Dst->getType()->getScalarSizeInBits()); + assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) && + "non-integer llvm type"); + + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + + // If both (src and dst) types are unsigned, then it's an unsigned truncation. + // Else, it is a signed truncation. + ScalarExprEmitter::ImplicitConversionCheckKind Kind; + SanitizerMask Mask; + if (!SrcSigned && !DstSigned) { + Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation; + Mask = SanitizerKind::ImplicitUnsignedIntegerTruncation; + } else { + Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation; + Mask = SanitizerKind::ImplicitSignedIntegerTruncation; + } + + llvm::Value *Check = nullptr; + // 1. Extend the truncated value back to the same width as the Src. + Check = Builder.CreateIntCast(Dst, SrcTy, DstSigned, "anyext"); + // 2. Equality-compare with the original source value + Check = Builder.CreateICmpEQ(Check, Src, "truncheck"); + // If the comparison result is 'i1 false', then the truncation was lossy. + return std::make_pair(Kind, std::make_pair(Check, Mask)); +} + void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, Value *Dst, QualType DstType, SourceLocation Loc) { - if (!CGF.SanOpts.has(SanitizerKind::ImplicitIntegerTruncation)) + if (!CGF.SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation)) return; - llvm::Type *SrcTy = Src->getType(); - llvm::Type *DstTy = Dst->getType(); - // We only care about int->int conversions here. // We ignore conversions to/from pointer and/or bool. if (!(SrcType->isIntegerType() && DstType->isIntegerType())) return; - assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) && - "clang integer type lowered to non-integer llvm type"); - - unsigned SrcBits = SrcTy->getScalarSizeInBits(); - unsigned DstBits = DstTy->getScalarSizeInBits(); + unsigned SrcBits = Src->getType()->getScalarSizeInBits(); + unsigned DstBits = Dst->getType()->getScalarSizeInBits(); // This must be truncation. Else we do not care. if (SrcBits <= DstBits) return; assert(!DstType->isBooleanType() && "we should not get here with booleans."); + // If the integer sign change sanitizer is enabled, + // and we are truncating from larger unsigned type to smaller signed type, + // let that next sanitizer deal with it. + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + if (CGF.SanOpts.has(SanitizerKind::ImplicitIntegerSignChange) && + (!SrcSigned && DstSigned)) + return; + CodeGenFunction::SanitizerScope SanScope(&CGF); + std::pair<ScalarExprEmitter::ImplicitConversionCheckKind, + std::pair<llvm::Value *, SanitizerMask>> + Check = + EmitIntegerTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder); + // If the comparison result is 'i1 false', then the truncation was lossy. + + // Do we care about this type of truncation? + if (!CGF.SanOpts.has(Check.second.second)) + return; + + llvm::Constant *StaticArgs[] = { + CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType), + CGF.EmitCheckTypeDescriptor(DstType), + llvm::ConstantInt::get(Builder.getInt8Ty(), Check.first)}; + CGF.EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs, + {Src, Dst}); +} + +// Should be called within CodeGenFunction::SanitizerScope RAII scope. +// Returns 'i1 false' when the conversion Src -> Dst changed the sign. +static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind, + std::pair<llvm::Value *, SanitizerMask>> +EmitIntegerSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, CGBuilderTy &Builder) { + llvm::Type *SrcTy = Src->getType(); + llvm::Type *DstTy = Dst->getType(); + + assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) && + "non-integer llvm type"); + + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + (void)SrcSigned; // Only used in assert() + (void)DstSigned; // Only used in assert() + unsigned SrcBits = SrcTy->getScalarSizeInBits(); + unsigned DstBits = DstTy->getScalarSizeInBits(); + (void)SrcBits; // Only used in assert() + (void)DstBits; // Only used in assert() + + assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) && + "either the widths should be different, or the signednesses."); + + // NOTE: zero value is considered to be non-negative. + auto EmitIsNegativeTest = [&Builder](Value *V, QualType VType, + const char *Name) -> Value * { + // Is this value a signed type? + bool VSigned = VType->isSignedIntegerOrEnumerationType(); + llvm::Type *VTy = V->getType(); + if (!VSigned) { + // If the value is unsigned, then it is never negative. + // FIXME: can we encounter non-scalar VTy here? + return llvm::ConstantInt::getFalse(VTy->getContext()); + } + // Get the zero of the same type with which we will be comparing. + llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); + // %V.isnegative = icmp slt %V, 0 + // I.e is %V *strictly* less than zero, does it have negative value? + return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, + llvm::Twine(Name) + "." + V->getName() + + ".negativitycheck"); + }; + + // 1. Was the old Value negative? + llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src"); + // 2. Is the new Value negative? + llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst"); + // 3. Now, was the 'negativity status' preserved during the conversion? + // NOTE: conversion from negative to zero is considered to change the sign. + // (We want to get 'false' when the conversion changed the sign) + // So we should just equality-compare the negativity statuses. llvm::Value *Check = nullptr; + Check = Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "signchangecheck"); + // If the comparison result is 'false', then the conversion changed the sign. + return std::make_pair( + ScalarExprEmitter::ICCK_IntegerSignChange, + std::make_pair(Check, SanitizerKind::ImplicitIntegerSignChange)); +} - // 1. Extend the truncated value back to the same width as the Src. - bool InputSigned = DstType->isSignedIntegerOrEnumerationType(); - Check = Builder.CreateIntCast(Dst, SrcTy, InputSigned, "anyext"); - // 2. Equality-compare with the original source value - Check = Builder.CreateICmpEQ(Check, Src, "truncheck"); - // If the comparison result is 'i1 false', then the truncation was lossy. +void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, + Value *Dst, QualType DstType, + SourceLocation Loc) { + if (!CGF.SanOpts.has(SanitizerKind::ImplicitIntegerSignChange)) + return; + + llvm::Type *SrcTy = Src->getType(); + llvm::Type *DstTy = Dst->getType(); + + // We only care about int->int conversions here. + // We ignore conversions to/from pointer and/or bool. + if (!(SrcType->isIntegerType() && DstType->isIntegerType())) + return; + + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + unsigned SrcBits = SrcTy->getScalarSizeInBits(); + unsigned DstBits = DstTy->getScalarSizeInBits(); + + // Now, we do not need to emit the check in *all* of the cases. + // We can avoid emitting it in some obvious cases where it would have been + // dropped by the opt passes (instcombine) always anyways. + // If it's a cast between effectively the same type, no check. + // NOTE: this is *not* equivalent to checking the canonical types. + if (SrcSigned == DstSigned && SrcBits == DstBits) + return; + // At least one of the values needs to have signed type. + // If both are unsigned, then obviously, neither of them can be negative. + if (!SrcSigned && !DstSigned) + return; + // If the conversion is to *larger* *signed* type, then no check is needed. + // Because either sign-extension happens (so the sign will remain), + // or zero-extension will happen (the sign bit will be zero.) + if ((DstBits > SrcBits) && DstSigned) + return; + if (CGF.SanOpts.has(SanitizerKind::ImplicitSignedIntegerTruncation) && + (SrcBits > DstBits) && SrcSigned) { + // If the signed integer truncation sanitizer is enabled, + // and this is a truncation from signed type, then no check is needed. + // Because here sign change check is interchangeable with truncation check. + return; + } + // That's it. We can't rule out any more cases with the data we have. + + CodeGenFunction::SanitizerScope SanScope(&CGF); + + std::pair<ScalarExprEmitter::ImplicitConversionCheckKind, + std::pair<llvm::Value *, SanitizerMask>> + Check; + + // Each of these checks needs to return 'false' when an issue was detected. + ImplicitConversionCheckKind CheckKind; + llvm::SmallVector<std::pair<llvm::Value *, SanitizerMask>, 2> Checks; + // So we can 'and' all the checks together, and still get 'false', + // if at least one of the checks detected an issue. + + Check = EmitIntegerSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder); + CheckKind = Check.first; + Checks.emplace_back(Check.second); + + if (CGF.SanOpts.has(SanitizerKind::ImplicitSignedIntegerTruncation) && + (SrcBits > DstBits) && !SrcSigned && DstSigned) { + // If the signed integer truncation sanitizer was enabled, + // and we are truncating from larger unsigned type to smaller signed type, + // let's handle the case we skipped in that check. + Check = + EmitIntegerTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder); + CheckKind = ICCK_SignedIntegerTruncationOrSignChange; + Checks.emplace_back(Check.second); + // If the comparison result is 'i1 false', then the truncation was lossy. + } llvm::Constant *StaticArgs[] = { CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType), CGF.EmitCheckTypeDescriptor(DstType), - llvm::ConstantInt::get(Builder.getInt8Ty(), ICCK_IntegerTruncation)}; - CGF.EmitCheck(std::make_pair(Check, SanitizerKind::ImplicitIntegerTruncation), - SanitizerHandler::ImplicitConversion, StaticArgs, {Src, Dst}); + llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind)}; + // EmitCheck() will 'and' all the checks together. + CGF.EmitCheck(Checks, SanitizerHandler::ImplicitConversion, StaticArgs, + {Src, Dst}); } /// Emit a conversion from the specified type to the specified destination type, @@ -991,6 +1193,27 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, QualType DstType, SourceLocation Loc, ScalarConversionOpts Opts) { + // All conversions involving fixed point types should be handled by the + // EmitFixedPoint family functions. This is done to prevent bloating up this + // function more, and although fixed point numbers are represented by + // integers, we do not want to follow any logic that assumes they should be + // treated as integers. + // TODO(leonardchan): When necessary, add another if statement checking for + // conversions to fixed point types from other types. + if (SrcType->isFixedPointType()) { + if (DstType->isFixedPointType()) { + return EmitFixedPointConversion(Src, SrcType, DstType, Loc); + } else if (DstType->isBooleanType()) { + // We do not need to check the padding bit on unsigned types if unsigned + // padding is enabled because overflow into this bit is undefined + // behavior. + return Builder.CreateIsNotNull(Src, "tobool"); + } + + llvm_unreachable( + "Unhandled scalar conversion involving a fixed point type."); + } + QualType NoncanonicalSrcType = SrcType; QualType NoncanonicalDstType = DstType; @@ -1036,8 +1259,13 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, } // Ignore conversions like int -> uint. - if (SrcTy == DstTy) + if (SrcTy == DstTy) { + if (Opts.EmitImplicitIntegerSignChangeChecks) + EmitIntegerSignChangeCheck(Src, NoncanonicalSrcType, Src, + NoncanonicalDstType, Loc); + return Src; + } // Handle pointer conversions next: pointers can only be converted to/from // other pointers and integers. Check for pointer types in terms of LLVM, as @@ -1181,9 +1409,91 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, EmitIntegerTruncationCheck(Src, NoncanonicalSrcType, Res, NoncanonicalDstType, Loc); + if (Opts.EmitImplicitIntegerSignChangeChecks) + EmitIntegerSignChangeCheck(Src, NoncanonicalSrcType, Res, + NoncanonicalDstType, Loc); + return Res; } +Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, + QualType DstTy, + SourceLocation Loc) { + using llvm::APInt; + using llvm::ConstantInt; + using llvm::Value; + + assert(SrcTy->isFixedPointType()); + assert(DstTy->isFixedPointType()); + + FixedPointSemantics SrcFPSema = + CGF.getContext().getFixedPointSemantics(SrcTy); + FixedPointSemantics DstFPSema = + CGF.getContext().getFixedPointSemantics(DstTy); + unsigned SrcWidth = SrcFPSema.getWidth(); + unsigned DstWidth = DstFPSema.getWidth(); + unsigned SrcScale = SrcFPSema.getScale(); + unsigned DstScale = DstFPSema.getScale(); + bool SrcIsSigned = SrcFPSema.isSigned(); + bool DstIsSigned = DstFPSema.isSigned(); + + llvm::Type *DstIntTy = Builder.getIntNTy(DstWidth); + + Value *Result = Src; + unsigned ResultWidth = SrcWidth; + + if (!DstFPSema.isSaturated()) { + // Downscale. + if (DstScale < SrcScale) + Result = SrcIsSigned ? + Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") : + Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); + + // Resize. + Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); + + // Upscale. + if (DstScale > SrcScale) + Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale"); + } else { + // Adjust the number of fractional bits. + if (DstScale > SrcScale) { + ResultWidth = SrcWidth + DstScale - SrcScale; + llvm::Type *UpscaledTy = Builder.getIntNTy(ResultWidth); + Result = Builder.CreateIntCast(Result, UpscaledTy, SrcIsSigned, "resize"); + Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale"); + } else if (DstScale < SrcScale) { + Result = SrcIsSigned ? + Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") : + Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); + } + + // Handle saturation. + bool LessIntBits = DstFPSema.getIntegralBits() < SrcFPSema.getIntegralBits(); + if (LessIntBits) { + Value *Max = ConstantInt::get( + CGF.getLLVMContext(), + APFixedPoint::getMax(DstFPSema).getValue().extOrTrunc(ResultWidth)); + Value *TooHigh = SrcIsSigned ? Builder.CreateICmpSGT(Result, Max) + : Builder.CreateICmpUGT(Result, Max); + Result = Builder.CreateSelect(TooHigh, Max, Result, "satmax"); + } + // Cannot overflow min to dest type if src is unsigned since all fixed + // point types can cover the unsigned min of 0. + if (SrcIsSigned && (LessIntBits || !DstIsSigned)) { + Value *Min = ConstantInt::get( + CGF.getLLVMContext(), + APFixedPoint::getMin(DstFPSema).getValue().extOrTrunc(ResultWidth)); + Value *TooLow = Builder.CreateICmpSLT(Result, Min); + Result = Builder.CreateSelect(TooLow, Min, Result, "satmin"); + } + + // Resize the integer part to get the final destination size. + Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); + } + return Result; +} + /// Emit a conversion from the specified complex type to the specified /// destination type, where the destination type is an LLVM scalar type. Value *ScalarExprEmitter::EmitComplexToScalarConversion( @@ -1405,10 +1715,11 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) { if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) { CGF.EmitIgnoredExpr(E->getBase()); - return emitConstant(Constant, E); + return CGF.emitScalarConstant(Constant, E); } else { - llvm::APSInt Value; - if (E->EvaluateAsInt(Value, CGF.getContext(), Expr::SE_AllowSideEffects)) { + Expr::EvalResult Result; + if (E->EvaluateAsInt(Result, CGF.getContext(), Expr::SE_AllowSideEffects)) { + llvm::APSInt Value = Result.Val.getInt(); CGF.EmitIgnoredExpr(E->getBase()); return Builder.getInt(Value); } @@ -1874,11 +2185,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return Builder.CreateVectorSplat(NumElements, Elt, "splat"); } + case CK_FixedPointCast: + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc()); + + case CK_FixedPointToBoolean: + assert(E->getType()->isFixedPointType() && + "Expected src type to be fixed point type"); + assert(DestTy->isBooleanType() && "Expected dest type to be boolean type"); + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc()); + case CK_IntegralCast: { ScalarConversionOpts Opts; - if (CGF.SanOpts.has(SanitizerKind::ImplicitIntegerTruncation)) { - if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) - Opts.EmitImplicitIntegerTruncationChecks = !ICE->isPartOfExplicitCast(); + if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) { + if (!ICE->isPartOfExplicitCast()) + Opts = ScalarConversionOpts(CGF.SanOpts); } return EmitScalarConversion(Visit(E), E->getType(), DestTy, CE->getExprLoc(), Opts); @@ -1919,13 +2241,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { CE->getExprLoc()); } - case CK_ZeroToOCLEvent: { - assert(DestTy->isEventT() && "CK_ZeroToOCLEvent cast on non-event type"); - return llvm::Constant::getNullValue(ConvertType(DestTy)); - } - - case CK_ZeroToOCLQueue: { - assert(DestTy->isQueueT() && "CK_ZeroToOCLQueue cast on non queue_t type"); + case CK_ZeroToOCLOpaqueType: { + assert((DestTy->isEventT() || DestTy->isQueueT() || + DestTy->isOCLIntelSubgroupAVCType()) && + "CK_ZeroToOCLEvent cast on non-event type"); return llvm::Constant::getNullValue(ConvertType(DestTy)); } @@ -1984,7 +2303,7 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior( case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWAdd(InVal, Amount, Name); - // Fall through. + LLVM_FALLTHROUGH; case LangOptions::SOB_Trapping: if (!E->canOverflow()) return Builder.CreateNSWAdd(InVal, Amount, Name); @@ -2279,9 +2598,11 @@ Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) { Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) { // Try folding the offsetof to a constant. - llvm::APSInt Value; - if (E->EvaluateAsInt(Value, CGF.getContext())) + Expr::EvalResult EVResult; + if (E->EvaluateAsInt(EVResult, CGF.getContext())) { + llvm::APSInt Value = EVResult.Val.getInt(); return Builder.getInt(Value); + } // Loop over the components of the offsetof to compute the value. unsigned n = E->getNumComponents(); @@ -2550,9 +2871,10 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // Expand the binary operator. Result = (this->*Func)(OpInfo); - // Convert the result back to the LHS type. - Result = - EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, Loc); + // Convert the result back to the LHS type, + // potentially with Implicit Conversion sanitizer check. + Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, + Loc, ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { llvm::BasicBlock *opBB = Builder.GetInsertBlock(); @@ -2990,7 +3312,7 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWAdd(op.LHS, op.RHS, "add"); - // Fall through. + LLVM_FALLTHROUGH; case LangOptions::SOB_Trapping: if (CanElideOverflowCheck(CGF.getContext(), op)) return Builder.CreateNSWAdd(op.LHS, op.RHS, "add"); @@ -3025,7 +3347,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWSub(op.LHS, op.RHS, "sub"); - // Fall through. + LLVM_FALLTHROUGH; case LangOptions::SOB_Trapping: if (CanElideOverflowCheck(CGF.getContext(), op)) return Builder.CreateNSWSub(op.LHS, op.RHS, "sub"); diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp index 8f9a9b9607..169ae4fcde 100644 --- a/lib/CodeGen/CGLoopInfo.cpp +++ b/lib/CodeGen/CGLoopInfo.cpp @@ -10,8 +10,8 @@ #include "CGLoopInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" -#include "clang/Sema/LoopHint.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" @@ -335,10 +335,10 @@ void LoopInfoStack::InsertHelper(Instruction *I) const { if (!L.getLoopID()) return; - if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) { - for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i) - if (TI->getSuccessor(i) == L.getHeader()) { - TI->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID()); + if (I->isTerminator()) { + for (BasicBlock *Succ : successors(I)) + if (Succ == L.getHeader()) { + I->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID()); break; } return; diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp index e9f60a9113..c6a96a9126 100644 --- a/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/lib/CodeGen/CGNonTrivialStruct.cpp @@ -187,6 +187,7 @@ template <class Derived> struct GenFuncNameBase { if (!FK) return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset); + asDerived().flushTrivialFields(); CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD); ASTContext &Ctx = asDerived().getContext(); const ConstantArrayType *CAT = cast<ConstantArrayType>(AT); @@ -336,6 +337,7 @@ template <class Derived> struct GenFuncBase { return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset, Addrs); + asDerived().flushTrivialFields(Addrs); CodeGenFunction &CGF = *this->CGF; ASTContext &Ctx = CGF.getContext(); @@ -456,12 +458,13 @@ template <class Derived> struct GenFuncBase { llvm::Function::Create(FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, FuncName, &CGM.getModule()); F->setVisibility(llvm::GlobalValue::HiddenVisibility); - CGM.SetLLVMFunctionAttributes(nullptr, FI, F); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, F); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F); IdentifierInfo *II = &Ctx.Idents.get(FuncName); FunctionDecl *FD = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), - II, Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); + II, Ctx.getFunctionType(Ctx.VoidTy, llvm::None, {}), nullptr, + SC_PrivateExtern, false, false); CodeGenFunction NewCGF(CGM); setCGF(&NewCGF); CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args); diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index c62f40b790..cc582b926b 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -352,6 +352,56 @@ static const Expr *findWeakLValue(const Expr *E) { return nullptr; } +/// The ObjC runtime may provide entrypoints that are likely to be faster +/// than an ordinary message send of the appropriate selector. +/// +/// The entrypoints are guaranteed to be equivalent to just sending the +/// corresponding message. If the entrypoint is implemented naively as just a +/// message send, using it is a trade-off: it sacrifices a few cycles of +/// overhead to save a small amount of code. However, it's possible for +/// runtimes to detect and special-case classes that use "standard" +/// behavior; if that's dynamically a large proportion of all objects, using +/// the entrypoint will also be faster than using a message send. +/// +/// If the runtime does support a required entrypoint, then this method will +/// generate a call and return the resulting value. Otherwise it will return +/// None and the caller can generate a msgSend instead. +static Optional<llvm::Value *> +tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, + llvm::Value *Receiver, + const CallArgList& Args, Selector Sel, + const ObjCMethodDecl *method) { + auto &CGM = CGF.CGM; + if (!CGM.getCodeGenOpts().ObjCConvertMessagesToRuntimeCalls) + return None; + + auto &Runtime = CGM.getLangOpts().ObjCRuntime; + switch (Sel.getMethodFamily()) { + case OMF_alloc: + if (Runtime.shouldUseRuntimeFunctionsForAlloc() && + ResultType->isObjCObjectPointerType()) { + // [Foo alloc] -> objc_alloc(Foo) + if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "alloc") + return CGF.EmitObjCAlloc(Receiver, CGF.ConvertType(ResultType)); + // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo) + if (Sel.isKeywordSelector() && Sel.getNumArgs() == 1 && + Args.size() == 1 && Args.front().getType()->isPointerType() && + Sel.getNameForSlot(0) == "allocWithZone") { + const llvm::Value* arg = Args.front().getKnownRValue().getScalarVal(); + if (isa<llvm::ConstantPointerNull>(arg)) + return CGF.EmitObjCAllocWithZone(Receiver, + CGF.ConvertType(ResultType)); + return None; + } + } + break; + + default: + break; + } + return None; +} + RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, ReturnValueSlot Return) { // Only the lookup mechanism and first two arguments of the method @@ -474,10 +524,16 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, Args, method); } else { - result = Runtime.GenerateMessageSend(*this, Return, ResultType, - E->getSelector(), - Receiver, Args, OID, - method); + // Call runtime methods directly if we can. + if (Optional<llvm::Value *> SpecializedResult = + tryGenerateSpecializedMessageSend(*this, ResultType, Receiver, Args, + E->getSelector(), method)) { + result = RValue::get(SpecializedResult.getValue()); + } else { + result = Runtime.GenerateMessageSend(*this, Return, ResultType, + E->getSelector(), Receiver, Args, + OID, method); + } } // For delegate init calls in ARC, implicitly store the result of @@ -568,7 +624,7 @@ static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF, LValue lvalue, QualType type); /// Generate an Objective-C method. An Objective-C method is a C function with -/// its pointer, name, and types registered in the class struture. +/// its pointer, name, and types registered in the class structure. void CodeGenFunction::GenerateObjCMethod(const ObjCMethodDecl *OMD) { StartObjCMethod(OMD, OMD->getClassInterface()); PGO.assignRegionCounters(GlobalDecl(OMD), CurFn); @@ -883,9 +939,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // If there's a non-trivial 'get' expression, we just have to emit that. if (!hasTrivialGetExpr(propImpl)) { if (!AtomicHelperFn) { - ReturnStmt ret(SourceLocation(), propImpl->getGetterCXXConstructor(), - /*nrvo*/ nullptr); - EmitReturnStmt(ret); + auto *ret = ReturnStmt::Create(getContext(), SourceLocation(), + propImpl->getGetterCXXConstructor(), + /* NRVOCandidate=*/nullptr); + EmitReturnStmt(*ret); } else { ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl(); @@ -1844,6 +1901,7 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM, /// where a null input causes a no-op and returns null. static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, llvm::Value *value, + llvm::Type *returnType, llvm::Constant *&fn, StringRef fnName, bool isTailCall = false) { @@ -1857,7 +1915,7 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, } // Cast the argument to 'id'. - llvm::Type *origType = value->getType(); + llvm::Type *origType = returnType ? returnType : value->getType(); value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy); // Call the function. @@ -1963,7 +2021,7 @@ llvm::Value *CodeGenFunction::EmitARCRetain(QualType type, llvm::Value *value) { /// Retain the given object, with normal retain semantics. /// call i8* \@objc_retain(i8* %value) llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) { - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retain, "objc_retain"); } @@ -1977,7 +2035,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) { llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value, bool mandatory) { llvm::Value *result - = emitARCValueOperation(*this, value, + = emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainBlock, "objc_retainBlock"); @@ -2047,7 +2105,7 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { llvm::Value * CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { emitAutoreleasedReturnValueMarker(*this); - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, "objc_retainAutoreleasedReturnValue"); } @@ -2062,7 +2120,7 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { llvm::Value * CodeGenFunction::EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value) { emitAutoreleasedReturnValueMarker(*this); - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue, "objc_unsafeClaimAutoreleasedReturnValue"); } @@ -2177,7 +2235,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst, /// Autorelease the given object. /// call i8* \@objc_autorelease(i8* %value) llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) { - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_autorelease, "objc_autorelease"); } @@ -2186,7 +2244,7 @@ llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) { /// call i8* \@objc_autoreleaseReturnValue(i8* %value) llvm::Value * CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) { - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_autoreleaseReturnValue, "objc_autoreleaseReturnValue", /*isTailCall*/ true); @@ -2196,7 +2254,7 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) { /// call i8* \@objc_retainAutoreleaseReturnValue(i8* %value) llvm::Value * CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) { - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue, "objc_retainAutoreleaseReturnValue", /*isTailCall*/ true); @@ -2225,7 +2283,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainAutorelease(QualType type, /// call i8* \@objc_retainAutorelease(i8* %value) llvm::Value * CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) { - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainAutorelease, "objc_retainAutorelease"); } @@ -2384,6 +2442,24 @@ llvm::Value *CodeGenFunction::EmitObjCMRRAutoreleasePoolPush() { return InitRV.getScalarVal(); } +/// Allocate the given objc object. +/// call i8* \@objc_alloc(i8* %value) +llvm::Value *CodeGenFunction::EmitObjCAlloc(llvm::Value *value, + llvm::Type *resultType) { + return emitARCValueOperation(*this, value, resultType, + CGM.getObjCEntrypoints().objc_alloc, + "objc_alloc"); +} + +/// Allocate the given objc object. +/// call i8* \@objc_allocWithZone(i8* %value) +llvm::Value *CodeGenFunction::EmitObjCAllocWithZone(llvm::Value *value, + llvm::Type *resultType) { + return emitARCValueOperation(*this, value, resultType, + CGM.getObjCEntrypoints().objc_allocWithZone, + "objc_allocWithZone"); +} + /// Produce the code to do a primitive release. /// [tmp drain]; void CodeGenFunction::EmitObjCMRRAutoreleasePoolPop(llvm::Value *Arg) { @@ -2446,27 +2522,36 @@ void CodeGenFunction::EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr) { EHStack.pushCleanup<CallObjCMRRAutoreleasePoolObject>(NormalCleanup, Ptr); } -static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, - LValue lvalue, - QualType type) { - switch (type.getObjCLifetime()) { +static bool shouldRetainObjCLifetime(Qualifiers::ObjCLifetime lifetime) { + switch (lifetime) { case Qualifiers::OCL_None: case Qualifiers::OCL_ExplicitNone: case Qualifiers::OCL_Strong: case Qualifiers::OCL_Autoreleasing: - return TryEmitResult(CGF.EmitLoadOfLValue(lvalue, - SourceLocation()).getScalarVal(), - false); + return true; case Qualifiers::OCL_Weak: - return TryEmitResult(CGF.EmitARCLoadWeakRetained(lvalue.getAddress()), - true); + return false; } llvm_unreachable("impossible lifetime!"); } static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, + LValue lvalue, + QualType type) { + llvm::Value *result; + bool shouldRetain = shouldRetainObjCLifetime(type.getObjCLifetime()); + if (shouldRetain) { + result = CGF.EmitLoadOfLValue(lvalue, SourceLocation()).getScalarVal(); + } else { + assert(type.getObjCLifetime() == Qualifiers::OCL_Weak); + result = CGF.EmitARCLoadWeakRetained(lvalue.getAddress()); + } + return TryEmitResult(result, !shouldRetain); +} + +static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, const Expr *e) { e = e->IgnoreParens(); QualType type = e->getType(); @@ -2500,6 +2585,16 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, cast<BinaryOperator>(e)->getOpcode() == BO_Assign) return TryEmitResult(CGF.EmitScalarExpr(e), false); + // Try to emit code for scalar constant instead of emitting LValue and + // loading it because we are not guaranteed to have an l-value. One of such + // cases is DeclRefExpr referencing non-odr-used constant-evaluated variable. + if (const auto *decl_expr = dyn_cast<DeclRefExpr>(e)) { + auto *DRE = const_cast<DeclRefExpr *>(decl_expr); + if (CodeGenFunction::ConstantEmission constant = CGF.tryEmitAsConstant(DRE)) + return TryEmitResult(CGF.emitScalarConstant(constant, DRE), + !shouldRetainObjCLifetime(type.getObjCLifetime())); + } + return tryEmitARCRetainLoadOfScalar(CGF, CGF.EmitLValue(e), type); } @@ -3229,29 +3324,32 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( ASTContext &C = getContext(); IdentifierInfo *II = &CGM.getContext().Idents.get("__assign_helper_atomic_property_"); - FunctionDecl *FD = FunctionDecl::Create(C, - C.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, C.VoidTy, - nullptr, SC_Static, - false, - false); + QualType ReturnTy = C.VoidTy; QualType DestTy = C.getPointerType(Ty); QualType SrcTy = Ty; SrcTy.addConst(); SrcTy = C.getPointerType(SrcTy); + SmallVector<QualType, 2> ArgTys; + ArgTys.push_back(DestTy); + ArgTys.push_back(SrcTy); + QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {}); + + FunctionDecl *FD = FunctionDecl::Create( + C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, + FunctionTy, nullptr, SC_Static, false, false); + FunctionArgList args; - ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, - DestTy, ImplicitParamDecl::Other); + ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy, + ImplicitParamDecl::Other); args.push_back(&DstDecl); - ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, - SrcTy, ImplicitParamDecl::Other); + ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy, + ImplicitParamDecl::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); @@ -3262,7 +3360,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); - StartFunction(FD, C.VoidTy, Fn, FI, args); + StartFunction(FD, ReturnTy, Fn, FI, args); DeclRefExpr DstExpr(&DstDecl, false, DestTy, VK_RValue, SourceLocation()); @@ -3301,50 +3399,51 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic))) return nullptr; llvm::Constant *HelperFn = nullptr; - if (hasTrivialGetExpr(PID)) return nullptr; assert(PID->getGetterCXXConstructor() && "getGetterCXXConstructor - null"); if ((HelperFn = CGM.getAtomicGetterHelperFnMap(Ty))) return HelperFn; - ASTContext &C = getContext(); - IdentifierInfo *II - = &CGM.getContext().Idents.get("__copy_helper_atomic_property_"); - FunctionDecl *FD = FunctionDecl::Create(C, - C.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, C.VoidTy, - nullptr, SC_Static, - false, - false); + IdentifierInfo *II = + &CGM.getContext().Idents.get("__copy_helper_atomic_property_"); + QualType ReturnTy = C.VoidTy; QualType DestTy = C.getPointerType(Ty); QualType SrcTy = Ty; SrcTy.addConst(); SrcTy = C.getPointerType(SrcTy); + SmallVector<QualType, 2> ArgTys; + ArgTys.push_back(DestTy); + ArgTys.push_back(SrcTy); + QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {}); + + FunctionDecl *FD = FunctionDecl::Create( + C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, + FunctionTy, nullptr, SC_Static, false, false); + FunctionArgList args; - ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, - DestTy, ImplicitParamDecl::Other); + ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy, + ImplicitParamDecl::Other); args.push_back(&DstDecl); - ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, - SrcTy, ImplicitParamDecl::Other); + ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy, + ImplicitParamDecl::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *Fn = - llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, - "__copy_helper_atomic_property_", &CGM.getModule()); + llvm::Function *Fn = llvm::Function::Create( + LTy, llvm::GlobalValue::InternalLinkage, "__copy_helper_atomic_property_", + &CGM.getModule()); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); - StartFunction(FD, C.VoidTy, Fn, FI, args); + StartFunction(FD, ReturnTy, Fn, FI, args); DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy, VK_RValue, SourceLocation()); diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index 2b6d895f93..d91eb43ca3 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -23,9 +23,9 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtObjC.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/LangOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" @@ -7188,15 +7188,21 @@ CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name, Weak ? llvm::GlobalValue::ExternalWeakLinkage : llvm::GlobalValue::ExternalLinkage; - - llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name); - if (!GV) { - GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABITy, - false, L, nullptr, Name); + if (!GV || GV->getType() != ObjCTypes.ClassnfABITy->getPointerTo()) { + auto *NewGV = new llvm::GlobalVariable(ObjCTypes.ClassnfABITy, false, L, + nullptr, Name); if (DLLImport) - GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + NewGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + + if (GV) { + GV->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(NewGV, GV->getType())); + GV->eraseFromParent(); + } + GV = NewGV; + CGM.getModule().getGlobalList().push_back(GV); } assert(GV->getLinkage() == L); diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp index 8390bca737..4b6f24a03f 100644 --- a/lib/CodeGen/CGObjCRuntime.cpp +++ b/lib/CodeGen/CGObjCRuntime.cpp @@ -296,7 +296,7 @@ void CGObjCRuntime::EmitInitOfCatchParam(CodeGenFunction &CGF, switch (paramDecl->getType().getQualifiers().getObjCLifetime()) { case Qualifiers::OCL_Strong: exn = CGF.EmitARCRetainNonBlock(exn); - // fallthrough + LLVM_FALLTHROUGH; case Qualifiers::OCL_None: case Qualifiers::OCL_ExplicitNone: diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp index 1da19a90c3..7f6f595dd5 100644 --- a/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/lib/CodeGen/CGOpenCLRuntime.cpp @@ -62,6 +62,11 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { case BuiltinType::OCLReserveID: return llvm::PointerType::get( llvm::StructType::create(Ctx, "opencl.reserve_id_t"), AddrSpc); +#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + case BuiltinType::Id: \ + return llvm::PointerType::get( \ + llvm::StructType::create(Ctx, "opencl." #ExtType), AddrSpc); +#include "clang/Basic/OpenCLExtensionTypes.def" } } @@ -118,25 +123,6 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); } -// Get the block literal from an expression derived from the block expression. -// OpenCL v2.0 s6.12.5: -// Block variable declarations are implicitly qualified with const. Therefore -// all block variables must be initialized at declaration time and may not be -// reassigned. -static const BlockExpr *getBlockExpr(const Expr *E) { - if (auto Cast = dyn_cast<CastExpr>(E)) { - E = Cast->getSubExpr(); - } - if (auto DR = dyn_cast<DeclRefExpr>(E)) { - E = cast<VarDecl>(DR->getDecl())->getInit(); - } - E = E->IgnoreImplicit(); - if (auto Cast = dyn_cast<CastExpr>(E)) { - E = Cast->getSubExpr(); - } - return cast<BlockExpr>(E); -} - /// Record emitted llvm invoke function and llvm block literal for the /// corresponding block expression. void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, @@ -151,15 +137,21 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, EnqueuedBlockMap[E].Kernel = nullptr; } -llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { - return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc; -} - CGOpenCLRuntime::EnqueuedBlockInfo CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { CGF.EmitScalarExpr(E); - const BlockExpr *Block = getBlockExpr(E); + // The block literal may be assigned to a const variable. Chasing down + // to get the block literal. + if (auto DR = dyn_cast<DeclRefExpr>(E)) { + E = cast<VarDecl>(DR->getDecl())->getInit(); + } + E = E->IgnoreImplicit(); + if (auto Cast = dyn_cast<CastExpr>(E)) { + E = Cast->getSubExpr(); + } + auto *Block = cast<BlockExpr>(E); + assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && "Block expression not emitted"); diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h index a513340827..3da55af065 100644 --- a/lib/CodeGen/CGOpenCLRuntime.h +++ b/lib/CodeGen/CGOpenCLRuntime.h @@ -91,10 +91,6 @@ public: /// \param Block block literal emitted for the block expression. void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, llvm::Value *Block); - - /// \return LLVM block invoke function emitted for an expression derived from - /// the block expression. - llvm::Function *getInvokeFunction(const Expr *E); }; } diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index d421729faa..66f0783e27 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1223,6 +1223,17 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, void CGOpenMPRuntime::clear() { InternalVars.clear(); + // Clean non-target variable declarations possibly used only in debug info. + for (const auto &Data : EmittedNonTargetVariables) { + if (!Data.getValue().pointsToAliveValue()) + continue; + auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); + if (!GV) + continue; + if (!GV->isDeclaration() || GV->getNumUses() > 0) + continue; + GV->eraseFromParent(); + } } std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { @@ -1456,7 +1467,9 @@ createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); - llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); + unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); + FlagsTy FlagsKey(Flags, Reserved2Flags); + llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); if (!Entry) { if (!DefaultOpenMPPSource) { // Initialize default location for psource field of ident_t structure of @@ -1469,22 +1482,47 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } - llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty), - llvm::ConstantInt::get(CGM.Int32Ty, Flags), - llvm::ConstantInt::getNullValue(CGM.Int32Ty), - llvm::ConstantInt::getNullValue(CGM.Int32Ty), - DefaultOpenMPPSource}; + llvm::Constant *Data[] = { + llvm::ConstantInt::getNullValue(CGM.Int32Ty), + llvm::ConstantInt::get(CGM.Int32Ty, Flags), + llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), + llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; llvm::GlobalValue *DefaultOpenMPLocation = - createGlobalStruct(CGM, IdentQTy, /*IsConstant=*/false, Data, "", + createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", llvm::GlobalValue::PrivateLinkage); DefaultOpenMPLocation->setUnnamedAddr( llvm::GlobalValue::UnnamedAddr::Global); - OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; + OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; } return Address(Entry, Align); } +void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, + bool AtCurrentPoint) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); + + llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); + if (AtCurrentPoint) { + Elem.second.ServiceInsertPt = new llvm::BitCastInst( + Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); + } else { + Elem.second.ServiceInsertPt = + new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); + Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); + } +} + +void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + if (Elem.second.ServiceInsertPt) { + llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; + Elem.second.ServiceInsertPt = nullptr; + Ptr->eraseFromParent(); + } +} + llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags) { @@ -1511,8 +1549,10 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, Elem.second.DebugLoc = AI.getPointer(); LocValue = AI; + if (!Elem.second.ServiceInsertPt) + setLocThreadIdInsertPt(CGF); CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); + CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), CGF.getTypeSize(IdentQTy)); } @@ -1582,21 +1622,25 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, // kmpc_global_thread_num(ident_t *loc). // Generate thread id value and cache this value for use across the // function. + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + if (!Elem.second.ServiceInsertPt) + setLocThreadIdInsertPt(CGF); CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); + CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); llvm::CallInst *Call = CGF.Builder.CreateCall( createRuntimeFunction(OMPRTL__kmpc_global_thread_num), emitUpdateLocation(CGF, Loc)); Call->setCallingConv(CGF.getRuntimeCC()); - auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = Call; return Call; } void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { assert(CGF.CurFn && "No function in current CodeGenFunction."); - if (OpenMPLocThreadIDMap.count(CGF.CurFn)) + if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { + clearLocThreadIdInsertPt(CGF); OpenMPLocThreadIDMap.erase(CGF.CurFn); + } if (FunctionUDRMap.count(CGF.CurFn) > 0) { for(auto *D : FunctionUDRMap[CGF.CurFn]) UDRMap.erase(D); @@ -2470,8 +2514,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; VD = VD->getDefinition(CGM.getContext()); - if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { - ThreadPrivateWithDefinition.insert(VD); + if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { QualType ASTTy = VD->getType(); llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; @@ -2617,7 +2660,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) return CGM.getLangOpts().OpenMPIsDevice; VD = VD->getDefinition(CGM.getContext()); - if (VD && !DeclareTargetWithDefinition.insert(VD).second) + if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) return CGM.getLangOpts().OpenMPIsDevice; QualType ASTTy = VD->getType(); @@ -3171,13 +3214,7 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } -void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDirectiveKind Kind, bool EmitChecks, - bool ForceSimpleCall) { - if (!CGF.HaveInsertPoint()) - return; - // Build call __kmpc_cancel_barrier(loc, thread_id); - // Build call __kmpc_barrier(loc, thread_id); +unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { unsigned Flags; if (Kind == OMPD_for) Flags = OMP_IDENT_BARRIER_IMPL_FOR; @@ -3189,6 +3226,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, Flags = OMP_IDENT_BARRIER_EXPL; else Flags = OMP_IDENT_BARRIER_IMPL; + return Flags; +} + +void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind, bool EmitChecks, + bool ForceSimpleCall) { + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_cancel_barrier(loc, thread_id); + // Build call __kmpc_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), @@ -3261,6 +3309,18 @@ bool CGOpenMPRuntime::isStaticNonchunked( return Schedule == OMP_dist_sch_static; } +bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const { + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); + return Schedule == OMP_sch_static_chunked; +} + +bool CGOpenMPRuntime::isStaticChunked( + OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + return Schedule == OMP_dist_sch_static_chunked; +} bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { OpenMPSchedType Schedule = @@ -3881,6 +3941,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { llvm::LLVMContext &C = M.getContext(); SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> OrderedEntries(OffloadEntriesInfoManager.size()); + llvm::SmallVector<StringRef, 16> ParentFunctions( + OffloadEntriesInfoManager.size()); // Auxiliary methods to create metadata values and strings. auto &&GetMDInt = [this](unsigned V) { @@ -3895,7 +3957,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create function that emits metadata for each target region entry; auto &&TargetRegionMetadataEmitter = - [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( + [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { @@ -3915,6 +3977,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Save this entry in the right position of the ordered entries array. OrderedEntries[E.getOrder()] = &E; + ParentFunctions[E.getOrder()] = ParentName; // Add metadata to the named metadata node. MD->addOperand(llvm::MDNode::get(C, Ops)); @@ -3956,6 +4019,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( E)) { if (!CE->getID() || !CE->getAddress()) { + // Do not blame the entry if the parent funtion is not emitted. + StringRef FnName = ParentFunctions[CE->getOrder()]; + if (!CGM.GetGlobalValue(FnName)) + continue; unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, "Offloading entry for target region is incorrect: either the " @@ -5215,8 +5282,8 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, LBLVal.getPointer(), UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, Loc), - llvm::ConstantInt::getNullValue( - CGF.IntTy), // Always 0 because taskgroup emitted by the compiler + llvm::ConstantInt::getSigned( + CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -6735,10 +6802,11 @@ private: } // Check if the length evaluates to 1. - llvm::APSInt ConstLength; - if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) + Expr::EvalResult Result; + if (!Length->EvaluateAsInt(Result, CGF.getContext())) return true; // Can have more that size 1. + llvm::APSInt ConstLength = Result.Val.getInt(); return ConstLength.getSExtValue() != 1; } @@ -7489,6 +7557,82 @@ public: } } + /// Emit capture info for lambdas for variables captured by reference. + void generateInfoForLambdaCaptures( + const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types, + llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { + const auto *RD = VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl(); + if (!RD || !RD->isLambda()) + return; + Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); + LValue VDLVal = CGF.MakeAddrLValue( + VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); + llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + FieldDecl *ThisCapture = nullptr; + RD->getCaptureFields(Captures, ThisCapture); + if (ThisCapture) { + LValue ThisLVal = + CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); + LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); + LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); + BasePointers.push_back(ThisLVal.getPointer()); + Pointers.push_back(ThisLValVal.getPointer()); + Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); + Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + } + for (const LambdaCapture &LC : RD->captures()) { + if (LC.getCaptureKind() != LCK_ByRef) + continue; + const VarDecl *VD = LC.getCapturedVar(); + auto It = Captures.find(VD); + assert(It != Captures.end() && "Found lambda capture without field."); + LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); + LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); + LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); + BasePointers.push_back(VarLVal.getPointer()); + Pointers.push_back(VarLValVal.getPointer()); + Sizes.push_back(CGF.getTypeSize( + VD->getType().getCanonicalType().getNonReferenceType())); + Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + } + } + + /// Set correct indices for lambdas captures. + void adjustMemberOfForLambdaCaptures( + const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, + MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, + MapFlagsArrayTy &Types) const { + for (unsigned I = 0, E = Types.size(); I < E; ++I) { + // Set correct member_of idx for all implicit lambda captures. + if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) + continue; + llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); + assert(BasePtr && "Unable to find base lambda address."); + int TgtIdx = -1; + for (unsigned J = I; J > 0; --J) { + unsigned Idx = J - 1; + if (Pointers[Idx] != BasePtr) + continue; + TgtIdx = Idx; + break; + } + assert(TgtIdx != -1 && "Unable to find parent lambda."); + // All other current entries will be MEMBER_OF the combined entry + // (except for PTR_AND_OBJ entries which do not have a placeholder value + // 0xFFFF in the MEMBER_OF field). + OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); + setCorrectMemberOfFlag(Types[I], MemberOfFlag); + } + } + /// Generate the base pointers, section pointers, sizes and map types /// associated to a given capture. void generateInfoForCapture(const CapturedStmt::Capture *Cap, @@ -8061,6 +8205,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; auto RI = CS.getCapturedRecordDecl()->field_begin(); auto CV = CapturedVars.begin(); @@ -8090,6 +8235,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, if (CurBasePointers.empty()) MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, CurPointers, CurSizes, CurMapTypes); + // Generate correct mapping for variables captured by reference in + // lambdas. + if (CI->capturesVariable()) + MEHandler.generateInfoForLambdaCaptures( + CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, + CurMapTypes, LambdaPointers); } // We expect to have at least an element of information for this capture. assert(!CurBasePointers.empty() && @@ -8111,6 +8262,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, Sizes.append(CurSizes.begin(), CurSizes.end()); MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); } + // Adjust MEMBER_OF flags for the lambdas captures. + MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, + Pointers, MapTypes); // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, @@ -8304,14 +8458,15 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { if (!CGM.getLangOpts().OpenMPIsDevice) return false; - // Try to detect target regions in the function. const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); + StringRef Name = CGM.getMangledName(GD); + // Try to detect target regions in the function. if (const auto *FD = dyn_cast<FunctionDecl>(VD)) - scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD)); + scanForTargetRegionsFunctions(FD->getBody(), Name); // Do not to emit function if it is not marked as declare target. return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && - AlreadyEmittedTargetFunctions.count(VD->getCanonicalDecl()) == 0; + AlreadyEmittedTargetFunctions.count(Name) == 0; } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { @@ -8348,54 +8503,62 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { - if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { - OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; - StringRef VarName; - CharUnits VarSize; - llvm::GlobalValue::LinkageTypes Linkage; - switch (*Res) { - case OMPDeclareTargetDeclAttr::MT_To: - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; - VarName = CGM.getMangledName(VD); - if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { - VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); - assert(!VarSize.isZero() && "Expected non-zero size of the variable"); - } else { - VarSize = CharUnits::Zero(); - } - Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); - // Temp solution to prevent optimizations of the internal variables. - if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { - std::string RefName = getName({VarName, "ref"}); - if (!CGM.GetGlobalValue(RefName)) { - llvm::Constant *AddrRef = - getOrCreateInternalVariable(Addr->getType(), RefName); - auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); - GVAddrRef->setConstant(/*Val=*/true); - GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); - GVAddrRef->setInitializer(Addr); - CGM.addCompilerUsedGlobal(GVAddrRef); - } - } - break; - case OMPDeclareTargetDeclAttr::MT_Link: - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; - if (CGM.getLangOpts().OpenMPIsDevice) { - VarName = Addr->getName(); - Addr = nullptr; - } else { - VarName = getAddrOfDeclareTargetLink(VD).getName(); - Addr = - cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + if (!Res) { + if (CGM.getLangOpts().OpenMPIsDevice) { + // Register non-target variables being emitted in device code (debug info + // may cause this). + StringRef VarName = CGM.getMangledName(VD); + EmittedNonTargetVariables.try_emplace(VarName, Addr); + } + return; + } + // Register declare target variables. + OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; + StringRef VarName; + CharUnits VarSize; + llvm::GlobalValue::LinkageTypes Linkage; + switch (*Res) { + case OMPDeclareTargetDeclAttr::MT_To: + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; + VarName = CGM.getMangledName(VD); + if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { + VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); + assert(!VarSize.isZero() && "Expected non-zero size of the variable"); + } else { + VarSize = CharUnits::Zero(); + } + Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + // Temp solution to prevent optimizations of the internal variables. + if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { + std::string RefName = getName({VarName, "ref"}); + if (!CGM.GetGlobalValue(RefName)) { + llvm::Constant *AddrRef = + getOrCreateInternalVariable(Addr->getType(), RefName); + auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); + GVAddrRef->setConstant(/*Val=*/true); + GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); + GVAddrRef->setInitializer(Addr); + CGM.addCompilerUsedGlobal(GVAddrRef); } - VarSize = CGM.getPointerSize(); - Linkage = llvm::GlobalValue::WeakAnyLinkage; - break; } - OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( - VarName, Addr, VarSize, Flags, Linkage); + break; + case OMPDeclareTargetDeclAttr::MT_Link: + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; + if (CGM.getLangOpts().OpenMPIsDevice) { + VarName = Addr->getName(); + Addr = nullptr; + } else { + VarName = getAddrOfDeclareTargetLink(VD).getName(); + Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); + } + VarSize = CGM.getPointerSize(); + Linkage = llvm::GlobalValue::WeakAnyLinkage; + break; } + OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( + VarName, Addr, VarSize, Flags, Linkage); } bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { @@ -8422,6 +8585,12 @@ void CGOpenMPRuntime::emitDeferredTargetDecls() const { } } +void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( + CodeGenFunction &CGF, const OMPExecutableDirective &D) const { + assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && + " Expected target-based directive."); +} + CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { @@ -8440,21 +8609,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) return true; + StringRef Name = CGM.getMangledName(GD); const auto *D = cast<FunctionDecl>(GD.getDecl()); - const FunctionDecl *FD = D->getCanonicalDecl(); // Do not to emit function if it is marked as declare target as it was already // emitted. if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { - if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) { - if (auto *F = dyn_cast_or_null<llvm::Function>( - CGM.GetGlobalValue(CGM.getMangledName(GD)))) + if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { + if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) return !F->isDeclaration(); return false; } return true; } - return !AlreadyEmittedTargetFunctions.insert(FD).second; + return !AlreadyEmittedTargetFunctions.insert(Name).second; } llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { @@ -9002,8 +9170,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ParamAttrTy &ParamAttr = ParamAttrs[Pos]; ParamAttr.Kind = Linear; if (*SI) { - if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, - Expr::SE_AllowSideEffects)) { + Expr::EvalResult Result; + if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { if (const auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { @@ -9012,6 +9180,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ParamPositions[StridePVD->getCanonicalDecl()]); } } + } else { + ParamAttr.StrideOrArg = Result.Val.getInt(); } } ++SI; diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 35f75a9ec0..d9ac5df36b 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -19,8 +19,8 @@ #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/Function.h" #include "llvm/IR/ValueHandle.h" @@ -278,12 +278,39 @@ protected: /// stored. virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc); + void setLocThreadIdInsertPt(CodeGenFunction &CGF, + bool AtCurrentPoint = false); + void clearLocThreadIdInsertPt(CodeGenFunction &CGF); + + /// Check if the default location must be constant. + /// Default is false to support OMPT/OMPD. + virtual bool isDefaultLocationConstant() const { return false; } + + /// Returns additional flags that can be stored in reserved_2 field of the + /// default location. + virtual unsigned getDefaultLocationReserved2Flags() const { return 0; } + + /// Returns default flags for the barriers depending on the directive, for + /// which this barier is going to be emitted. + static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind); + + /// Get the LLVM type for the critical name. + llvm::ArrayType *getKmpCriticalNameTy() const {return KmpCriticalNameTy;} + + /// Returns corresponding lock object for the specified critical region + /// name. If the lock object does not exist it is created, otherwise the + /// reference to the existing copy is returned. + /// \param CriticalName Name of the critical region. + /// + llvm::Value *getCriticalRegionLock(StringRef CriticalName); + private: /// Default const ident_t object used for initialization of all other /// ident_t objects. llvm::Constant *DefaultOpenMPPSource = nullptr; + using FlagsTy = std::pair<unsigned, unsigned>; /// Map of flags and corresponding default locations. - typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy; + using OpenMPDefaultLocMapTy = llvm::DenseMap<FlagsTy, llvm::Value *>; OpenMPDefaultLocMapTy OpenMPDefaultLocMap; Address getOrCreateDefaultLocation(unsigned Flags); @@ -300,6 +327,8 @@ private: struct DebugLocThreadIdTy { llvm::Value *DebugLoc; llvm::Value *ThreadID; + /// Insert point for the service instructions. + llvm::AssertingVH<llvm::Instruction> ServiceInsertPt = nullptr; }; /// Map of local debug location, ThreadId and functions. typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy> @@ -596,7 +625,11 @@ private: OffloadEntriesInfoManagerTy OffloadEntriesInfoManager; bool ShouldMarkAsGlobal = true; - llvm::SmallDenseSet<const Decl *> AlreadyEmittedTargetFunctions; + /// List of the emitted functions. + llvm::StringSet<> AlreadyEmittedTargetFunctions; + /// List of the global variables with their addresses that should not be + /// emitted for the target. + llvm::StringMap<llvm::WeakTrackingVH> EmittedNonTargetVariables; /// List of variables that can become declare target implicitly and, thus, /// must be emitted. @@ -673,10 +706,10 @@ private: const llvm::Twine &Name); /// Set of threadprivate variables with the generated initializer. - llvm::SmallPtrSet<const VarDecl *, 4> ThreadPrivateWithDefinition; + llvm::StringSet<> ThreadPrivateWithDefinition; /// Set of declare target variables with the generated initializer. - llvm::SmallPtrSet<const VarDecl *, 4> DeclareTargetWithDefinition; + llvm::StringSet<> DeclareTargetWithDefinition; /// Emits initialization code for the threadprivate variables. /// \param VDAddr Address of the global variable \a VD. @@ -688,13 +721,6 @@ private: llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc); - /// Returns corresponding lock object for the specified critical region - /// name. If the lock object does not exist it is created, otherwise the - /// reference to the existing copy is returned. - /// \param CriticalName Name of the critical region. - /// - llvm::Value *getCriticalRegionLock(StringRef CriticalName); - struct TaskResultTy { llvm::Value *NewTask = nullptr; llvm::Value *TaskEntry = nullptr; @@ -884,6 +910,20 @@ public: virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const; + /// Check if the specified \a ScheduleKind is static chunked. + /// \param ScheduleKind Schedule kind specified in the 'schedule' clause. + /// \param Chunked True if chunk is specified in the clause. + /// + virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const; + + /// Check if the specified \a ScheduleKind is static non-chunked. + /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause. + /// \param Chunked True if chunk is specified in the clause. + /// + virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind, + bool Chunked) const; + /// Check if the specified \a ScheduleKind is dynamic. /// This kind of worksharing directive is emitted without outer loop. /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause. @@ -1500,7 +1540,7 @@ public: /// schedule clause. virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, - llvm::Value *&Chunk) const {} + const Expr *&ChunkExpr) const {} /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. @@ -1517,12 +1557,23 @@ public: virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD); - /// Marks the declaration as alread emitted for the device code and returns + /// Marks the declaration as already emitted for the device code and returns /// true, if it was marked already, and false, otherwise. bool markAsGlobalTarget(GlobalDecl GD); /// Emit deferred declare target variables marked for deferred emission. void emitDeferredTargetDecls() const; + + /// Adjust some parameters for the target-based directives, like addresses of + /// the variables captured by reference in lambdas. + virtual void + adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, + const OMPExecutableDirective &D) const; + + /// Perform check on requires decl to ensure that target architecture + /// supports unified addressing + virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM, + const OMPRequiresDecl *D) const {} }; /// Class supports emissionof SIMD-only code. diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 14fd4a3113..b055132ef0 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -17,6 +17,7 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" +#include "clang/Basic/Cuda.h" #include "llvm/ADT/SmallPtrSet.h" using namespace clang; @@ -32,8 +33,8 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); OMPRTL_NVPTX__kmpc_spmd_kernel_init, - /// Call to void __kmpc_spmd_kernel_deinit(); - OMPRTL_NVPTX__kmpc_spmd_kernel_deinit, + /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); + OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, /// Call to void __kmpc_kernel_prepare_parallel(void /// *outlined_function, int16_t /// IsOMPRuntimeInitialized); @@ -61,31 +62,21 @@ enum OpenMPRTLFunctionNVPTX { /// lane_offset, int16_t shortCircuit), /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); OMPRTL_NVPTX__kmpc_parallel_reduce_nowait, - /// Call to __kmpc_nvptx_simd_reduce_nowait(kmp_int32 - /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); - OMPRTL_NVPTX__kmpc_simd_reduce_nowait, - /// Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, - /// int32_t num_vars, size_t reduce_size, void *reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), - /// void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad, - /// int32_t index, int32_t width), - /// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t - /// index, int32_t width, int32_t reduce)) - OMPRTL_NVPTX__kmpc_teams_reduce_nowait, + /// Call to __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32 + /// global_tid, kmp_critical_name *lck) + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple, + /// Call to __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, + /// kmp_int32 global_tid, kmp_critical_name *lck) + OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple, /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); OMPRTL_NVPTX__kmpc_end_reduce_nowait, /// Call to void __kmpc_data_sharing_init_stack(); OMPRTL_NVPTX__kmpc_data_sharing_init_stack, /// Call to void __kmpc_data_sharing_init_stack_spmd(); OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, - /// Call to void* __kmpc_data_sharing_push_stack(size_t size, + /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size, /// int16_t UseSharedMemory); - OMPRTL_NVPTX__kmpc_data_sharing_push_stack, + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack, /// Call to void __kmpc_data_sharing_pop_stack(void *a); OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, /// Call to void __kmpc_begin_sharing_variables(void ***args, @@ -100,6 +91,13 @@ enum OpenMPRTLFunctionNVPTX { OMPRTL_NVPTX__kmpc_parallel_level, /// Call to int8_t __kmpc_is_spmd_exec_mode(); OMPRTL_NVPTX__kmpc_is_spmd_exec_mode, + /// Call to void __kmpc_get_team_static_memory(const void *buf, size_t size, + /// int16_t is_shared, const void **res); + OMPRTL_NVPTX__kmpc_get_team_static_memory, + /// Call to void __kmpc_restore_team_static_memory(int16_t is_shared); + OMPRTL_NVPTX__kmpc_restore_team_static_memory, + // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_barrier, }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. @@ -142,19 +140,35 @@ public: /// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry /// to the target region and used by containing directives such as 'parallel' /// to emit optimized code. -class ExecutionModeRAII { +class ExecutionRuntimeModesRAII { private: - CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode; - CGOpenMPRuntimeNVPTX::ExecutionMode &Mode; + CGOpenMPRuntimeNVPTX::ExecutionMode SavedExecMode = + CGOpenMPRuntimeNVPTX::EM_Unknown; + CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode; + bool SavedRuntimeMode = false; + bool *RuntimeMode = nullptr; public: - ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD) - : Mode(Mode) { - SavedMode = Mode; - Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD - : CGOpenMPRuntimeNVPTX::EM_NonSPMD; + /// Constructor for Non-SPMD mode. + ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode) + : ExecMode(ExecMode) { + SavedExecMode = ExecMode; + ExecMode = CGOpenMPRuntimeNVPTX::EM_NonSPMD; + } + /// Constructor for SPMD mode. + ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode, + bool &RuntimeMode, bool FullRuntimeMode) + : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) { + SavedExecMode = ExecMode; + SavedRuntimeMode = RuntimeMode; + ExecMode = CGOpenMPRuntimeNVPTX::EM_SPMD; + RuntimeMode = FullRuntimeMode; + } + ~ExecutionRuntimeModesRAII() { + ExecMode = SavedExecMode; + if (RuntimeMode) + *RuntimeMode = SavedRuntimeMode; } - ~ExecutionModeRAII() { Mode = SavedMode; } }; /// GPU Configuration: This information can be derived from cuda registers, @@ -169,16 +183,34 @@ enum MachineConfiguration : unsigned { LaneIDMask = WarpSize - 1, /// Global memory alignment for performance. - GlobalMemoryAlignment = 256, -}; + GlobalMemoryAlignment = 128, -enum NamedBarrier : unsigned { - /// Synchronize on this barrier #ID using a named barrier primitive. - /// Only the subset of active threads in a parallel region arrive at the - /// barrier. - NB_Parallel = 1, + /// Maximal size of the shared memory buffer. + SharedMemorySize = 128, }; +static const ValueDecl *getPrivateItem(const Expr *RefExpr) { + RefExpr = RefExpr->IgnoreParens(); + if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) { + const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + RefExpr = Base; + } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) { + const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + RefExpr = Base; + } + RefExpr = RefExpr->IgnoreParenImpCasts(); + if (const auto *DE = dyn_cast<DeclRefExpr>(RefExpr)) + return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl()); + const auto *ME = cast<MemberExpr>(RefExpr); + return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl()); +} + typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy; static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { return P1.first > P2.first; @@ -186,20 +218,31 @@ static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { static RecordDecl *buildRecordForGlobalizedVars( ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls, + ArrayRef<const ValueDecl *> EscapedDeclsForTeams, llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> &MappedDeclsFields) { - if (EscapedDecls.empty()) + if (EscapedDecls.empty() && EscapedDeclsForTeams.empty()) return nullptr; SmallVector<VarsDataTy, 4> GlobalizedVars; for (const ValueDecl *D : EscapedDecls) + GlobalizedVars.emplace_back( + CharUnits::fromQuantity(std::max( + C.getDeclAlign(D).getQuantity(), + static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))), + D); + for (const ValueDecl *D : EscapedDeclsForTeams) GlobalizedVars.emplace_back(C.getDeclAlign(D), D); std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(), stable_sort_comparator); // Build struct _globalized_locals_ty { - // /* globalized vars */ + // /* globalized vars */[WarSize] align (max(decl_align, + // GlobalMemoryAlignment)) + // /* globalized vars */ for EscapedDeclsForTeams // }; RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty"); GlobalizedRD->startDefinition(); + llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped( + EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end()); for (const auto &Pair : GlobalizedVars) { const ValueDecl *VD = Pair.second; QualType Type = VD->getType(); @@ -208,19 +251,39 @@ static RecordDecl *buildRecordForGlobalizedVars( else Type = Type.getNonReferenceType(); SourceLocation Loc = VD->getLocation(); - auto *Field = - FieldDecl::Create(C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, - C.getTrivialTypeSourceInfo(Type, SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, - /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - GlobalizedRD->addDecl(Field); - if (VD->hasAttrs()) { - for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), - E(VD->getAttrs().end()); - I != E; ++I) - Field->addAttr(*I); + FieldDecl *Field; + if (SingleEscaped.count(VD)) { + Field = FieldDecl::Create( + C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, + C.getTrivialTypeSourceInfo(Type, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + if (VD->hasAttrs()) { + for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), + E(VD->getAttrs().end()); + I != E; ++I) + Field->addAttr(*I); + } + } else { + llvm::APInt ArraySize(32, WarpSize); + Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0); + Field = FieldDecl::Create( + C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, + C.getTrivialTypeSourceInfo(Type, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(), + static_cast<CharUnits::QuantityType>( + GlobalMemoryAlignment))); + Field->addAttr(AlignedAttr::CreateImplicit( + C, AlignedAttr::GNU_aligned, /*IsAlignmentExpr=*/true, + IntegerLiteral::Create(C, Align, + C.getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()))); } + GlobalizedRD->addDecl(Field); MappedDeclsFields.try_emplace(VD, Field); } GlobalizedRD->completeDefinition(); @@ -256,9 +319,11 @@ class CheckVarsEscapingDeclContext final const auto *Attr = FD->getAttr<OMPCaptureKindAttr>(); if (!Attr) return; - if (!isOpenMPPrivate( - static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) || - Attr->getCaptureKind() == OMPC_map) + if (((Attr->getCaptureKind() != OMPC_map) && + !isOpenMPPrivate( + static_cast<OpenMPClauseKind>(Attr->getCaptureKind()))) || + ((Attr->getCaptureKind() == OMPC_map) && + !FD->getType()->isAnyPointerType())) return; } if (!FD->getType()->isReferenceType()) { @@ -340,15 +405,24 @@ class CheckVarsEscapingDeclContext final } } - void buildRecordForGlobalizedVars() { + void buildRecordForGlobalizedVars(bool IsInTTDRegion) { assert(!GlobalizedRD && "Record for globalized variables is built already."); + ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams; + if (IsInTTDRegion) + EscapedDeclsForTeams = EscapedDecls.getArrayRef(); + else + EscapedDeclsForParallel = EscapedDecls.getArrayRef(); GlobalizedRD = ::buildRecordForGlobalizedVars( - CGF.getContext(), EscapedDecls.getArrayRef(), MappedDeclsFields); + CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams, + MappedDeclsFields); } public: - CheckVarsEscapingDeclContext(CodeGenFunction &CGF) : CGF(CGF) {} + CheckVarsEscapingDeclContext(CodeGenFunction &CGF, + ArrayRef<const ValueDecl *> TeamsReductions) + : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) { + } virtual ~CheckVarsEscapingDeclContext() = default; void VisitDeclStmt(const DeclStmt *S) { if (!S) @@ -490,9 +564,9 @@ public: /// Returns the record that handles all the escaped local variables and used /// instead of their original storage. - const RecordDecl *getGlobalizedRecord() { + const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) { if (!GlobalizedRD) - buildRecordForGlobalizedVars(); + buildRecordForGlobalizedVars(IsInTTDRegion); return GlobalizedRD; } @@ -568,29 +642,15 @@ static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { /// Get barrier to synchronize all threads in a block. static void getNVPTXCTABarrier(CodeGenFunction &CGF) { - CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); -} - -/// Get barrier #ID to synchronize selected (multiple of warp size) threads in -/// a CTA. -static void getNVPTXBarrier(CodeGenFunction &CGF, int ID, - llvm::Value *NumThreads) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads}; - CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier), - Args); + llvm::Function *F = llvm::Intrinsic::getDeclaration( + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0); + F->addFnAttr(llvm::Attribute::Convergent); + CGF.EmitRuntimeCall(F); } /// Synchronize all GPU threads in a block. static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); } -/// Synchronize worker threads in a parallel region. -static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) { - return getNVPTXBarrier(CGF, NB_Parallel, NumThreads); -} - /// Get the value of the thread_limit clause in the teams directive. /// For the 'generic' execution mode, the runtime encodes thread_limit in /// the launch parameters, always starting thread_limit+warpSize threads per @@ -652,12 +712,58 @@ getDataSharingMode(CodeGenModule &CGM) { : CGOpenMPRuntimeNVPTX::Generic; } +// Checks if the expression is constant or does not have non-trivial function +// calls. +static bool isTrivial(ASTContext &Ctx, const Expr * E) { + // We can skip constant expressions. + // We can skip expressions with trivial calls or simple expressions. + return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || + !E->hasNonTrivialCall(Ctx)) && + !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); +} + /// Checks if the \p Body is the \a CompoundStmt and returns its child statement -/// iff there is only one. -static const Stmt *getSingleCompoundChild(const Stmt *Body) { - if (const auto *C = dyn_cast<CompoundStmt>(Body)) - if (C->size() == 1) - return C->body_front(); +/// iff there is only one that is not evaluatable at the compile time. +static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { + if (const auto *C = dyn_cast<CompoundStmt>(Body)) { + const Stmt *Child = nullptr; + for (const Stmt *S : C->body()) { + if (const auto *E = dyn_cast<Expr>(S)) { + if (isTrivial(Ctx, E)) + continue; + } + // Some of the statements can be ignored. + if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || + isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) + continue; + // Analyze declarations. + if (const auto *DS = dyn_cast<DeclStmt>(S)) { + if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { + if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || + isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || + isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || + isa<UsingDirectiveDecl>(D) || + isa<OMPDeclareReductionDecl>(D) || + isa<OMPThreadPrivateDecl>(D)) + return true; + const auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + return false; + return VD->isConstexpr() || + ((VD->getType().isTrivialType(Ctx) || + VD->getType()->isReferenceType()) && + (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); + })) + continue; + } + // Found multiple children - cannot get the one child only. + if (Child) + return Body; + Child = S; + } + if (Child) + return Child; + } return Body; } @@ -686,7 +792,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Body); + const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); @@ -700,7 +806,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && @@ -781,10 +887,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: - return !hasParallelIfNumThreadsClause(Ctx, D); case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: - // Distribute with lastprivates requires non-SPMD execution mode. return !hasParallelIfNumThreadsClause(Ctx, D); case OMPD_target_simd: case OMPD_target_teams_distribute: @@ -861,7 +965,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Body); + const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); @@ -876,7 +980,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && @@ -888,7 +992,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && @@ -900,7 +1004,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && @@ -921,7 +1025,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && @@ -1069,7 +1173,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false); + ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode); EntryFunctionState EST; WorkerFunctionState WST(CGM, D.getBeginLoc()); Work.clear(); @@ -1085,17 +1189,35 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D, CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST) : EST(EST), WST(WST) {} void Enter(CodeGenFunction &CGF) override { - static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) - .emitNonSPMDEntryHeader(CGF, EST, WST); + auto &RT = + static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()); + RT.emitNonSPMDEntryHeader(CGF, EST, WST); + // Skip target region initialization. + RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); } void Exit(CodeGenFunction &CGF) override { - static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) - .emitNonSPMDEntryFooter(CGF, EST); + auto &RT = + static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()); + RT.clearLocThreadIdInsertPt(CGF); + RT.emitNonSPMDEntryFooter(CGF, EST); } } Action(EST, WST); CodeGen.setAction(Action); + IsInTTDRegion = true; + // Reserve place for the globalized memory. + GlobalizedRecords.emplace_back(); + if (!KernelStaticGlobalized) { + KernelStaticGlobalized = new llvm::GlobalVariable( + CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr, + llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); + } emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); + IsInTTDRegion = false; // Now change the name of the worker function to correspond to this target // region's entry function. @@ -1183,7 +1305,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true); + ExecutionRuntimeModesRAII ModeRAII( + CurrentExecutionMode, RequiresFullRuntime, + CGM.getLangOpts().OpenMPCUDAForceFullRuntime || + !supportsLightweightRuntime(CGM.getContext(), D)); EntryFunctionState EST; // Emit target region as a standalone region. @@ -1199,14 +1324,30 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D, : RT(RT), EST(EST), D(D) {} void Enter(CodeGenFunction &CGF) override { RT.emitSPMDEntryHeader(CGF, EST, D); + // Skip target region initialization. + RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); } void Exit(CodeGenFunction &CGF) override { + RT.clearLocThreadIdInsertPt(CGF); RT.emitSPMDEntryFooter(CGF, EST); } } Action(*this, EST, D); CodeGen.setAction(Action); + IsInTTDRegion = true; + // Reserve place for the globalized memory. + GlobalizedRecords.emplace_back(); + if (!KernelStaticGlobalized) { + KernelStaticGlobalized = new llvm::GlobalVariable( + CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr, + llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); + } emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); + IsInTTDRegion = false; } void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( @@ -1218,14 +1359,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute"); EST.ExitBB = CGF.createBasicBlock(".exit"); - // Initialize the OMP state in the runtime; called by all active threads. - bool RequiresFullRuntime = CGM.getLangOpts().OpenMPCUDAForceFullRuntime || - !supportsLightweightRuntime(CGF.getContext(), D); - llvm::Value *Args[] = { - getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), - /*RequiresOMPRuntime=*/ - Bld.getInt16(RequiresFullRuntime ? 1 : 0), - /*RequiresDataSharing=*/Bld.getInt16(RequiresFullRuntime ? 1 : 0)}; + llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), + /*RequiresOMPRuntime=*/ + Bld.getInt16(RequiresFullRuntime ? 1 : 0), + /*RequiresDataSharing=*/Bld.getInt16(0)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); @@ -1256,8 +1393,11 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF, CGF.EmitBlock(OMPDeInitBB); // DeInitialize the OMP state in the runtime; called by all active threads. + llvm::Value *Args[] = {/*RequiresOMPRuntime=*/ + CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None); + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(EST.ExitBB); @@ -1344,6 +1484,8 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Signal start of parallel region. CGF.EmitBlock(ExecuteBB); + // Skip initialization. + setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); // Process work items: outlined parallel functions. for (llvm::Function *W : Work) { @@ -1404,6 +1546,8 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Exit target region. CGF.EmitBlock(ExitBB); + // Skip initialization. + clearLocThreadIdInsertPt(CGF); } /// Returns specified OpenMP runtime function for the current OpenMP @@ -1440,11 +1584,12 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); break; } - case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: { - // Build void __kmpc_spmd_kernel_deinit(); + case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: { + // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); + llvm::Type *TypeParams[] = {CGM.Int16Ty}; auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit"); + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2"); break; } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { @@ -1536,83 +1681,37 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait"); break; } - case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: { - // Build int32_t kmpc_nvptx_simd_reduce_nowait(kmp_int32 global_tid, - // kmp_int32 num_vars, size_t reduce_size, void* reduce_data, - // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - // lane_offset, int16_t Algorithm Version), - // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {CGM.Int32Ty, - CGM.Int32Ty, - CGM.SizeTy, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo()}; + case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { + // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); + llvm::Type *TypeParams[] = {CGM.Int32Ty}; auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_simd_reduce_nowait"); + FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); break; } - case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: { - // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, - // int32_t num_vars, size_t reduce_size, void *reduce_data, - // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - // lane_offset, int16_t shortCircuit), - // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), - // void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad, - // int32_t index, int32_t width), - // void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, - // int32_t index, int32_t width, int32_t reduce)) - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *CopyToScratchpadTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, - CGM.Int32Ty, CGM.Int32Ty}; - auto *CopyToScratchpadFnTy = - llvm::FunctionType::get(CGM.VoidTy, CopyToScratchpadTypeParams, - /*isVarArg=*/false); - llvm::Type *LoadReduceTypeParams[] = { - CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty}; - auto *LoadReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, LoadReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {CGM.Int32Ty, - CGM.Int32Ty, - CGM.SizeTy, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo(), - CopyToScratchpadFnTy->getPointerTo(), - LoadReduceFnTy->getPointerTo()}; + case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple: { + // Build __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32 + // global_tid, kmp_critical_name *lck) + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), CGM.Int32Ty, + llvm::PointerType::getUnqual(getKmpCriticalNameTy())}; auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait"); + FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_simple"); break; } - case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { - // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); - llvm::Type *TypeParams[] = {CGM.Int32Ty}; + case OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple: { + // Build __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, kmp_int32 + // global_tid, kmp_critical_name *lck) + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), CGM.Int32Ty, + llvm::PointerType::getUnqual(getKmpCriticalNameTy())}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); + FnTy, /*Name=*/"__kmpc_nvptx_teams_end_reduce_nowait_simple"); break; } case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { @@ -1630,14 +1729,14 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); break; } - case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { - // Build void *__kmpc_data_sharing_push_stack(size_t size, + case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: { + // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size, // int16_t UseSharedMemory); llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); + FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack"); break; } case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { @@ -1687,6 +1786,33 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode"); break; } + case OMPRTL_NVPTX__kmpc_get_team_static_memory: { + // Build void __kmpc_get_team_static_memory(const void *buf, size_t size, + // int16_t is_shared, const void **res); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.SizeTy, CGM.Int16Ty, + CGM.VoidPtrPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory"); + break; + } + case OMPRTL_NVPTX__kmpc_restore_team_static_memory: { + // Build void __kmpc_restore_team_static_memory(int16_t is_shared); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, CGM.Int16Ty, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); + break; + } + case OMPRTL__kmpc_barrier: { + // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); + cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent); + break; + } } return RTLFn; } @@ -1733,6 +1859,37 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } +namespace { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +/// Enum for accesseing the reserved_2 field of the ident_t struct. +enum ModeFlagsTy : unsigned { + /// Bit set to 1 when in SPMD mode. + KMP_IDENT_SPMD_MODE = 0x01, + /// Bit set to 1 when a simplified runtime is used. + KMP_IDENT_SIMPLE_RT_MODE = 0x02, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE) +}; + +/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime. +static const ModeFlagsTy UndefinedMode = + (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE; +} // anonymous namespace + +unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const { + switch (getExecutionMode()) { + case EM_SPMD: + if (requiresFullRuntime()) + return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE); + return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE; + case EM_NonSPMD: + assert(requiresFullRuntime() && "Expected full runtime."); + return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE); + case EM_Unknown: + return UndefinedMode; + } + llvm_unreachable("Unknown flags are requested."); +} + CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) @@ -1784,12 +1941,15 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( } } Action(IsInParallelRegion); CodeGen.setAction(Action); + bool PrevIsInTTDRegion = IsInTTDRegion; + IsInTTDRegion = false; bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion; IsInTargetMasterThreadRegion = false; auto *OutlinedFun = cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; + IsInTTDRegion = PrevIsInTTDRegion; if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD && !IsInParallelRegion) { llvm::Function *WrapperFun = @@ -1803,13 +1963,14 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( /// Get list of lastprivate variables from the teams distribute ... or /// teams {distribute ...} directives. static void -getDistributeLastprivateVars(const OMPExecutableDirective &D, +getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl<const ValueDecl *> &Vars) { assert(isOpenMPTeamsDirective(D.getDirectiveKind()) && "expected teams directive."); const OMPExecutableDirective *Dir = &D; if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { if (const Stmt *S = getSingleCompoundChild( + Ctx, D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true))) { Dir = dyn_cast<OMPExecutableDirective>(S); @@ -1819,12 +1980,21 @@ getDistributeLastprivateVars(const OMPExecutableDirective &D, } if (!Dir) return; - for (const OMPLastprivateClause *C : - Dir->getClausesOfKind<OMPLastprivateClause>()) { - for (const Expr *E : C->getVarRefs()) { - const auto *DE = cast<DeclRefExpr>(E->IgnoreParens()); - Vars.push_back(cast<ValueDecl>(DE->getDecl()->getCanonicalDecl())); - } + for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) { + for (const Expr *E : C->getVarRefs()) + Vars.push_back(getPrivateItem(E)); + } +} + +/// Get list of reduction variables from the teams ... directives. +static void +getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, + llvm::SmallVectorImpl<const ValueDecl *> &Vars) { + assert(isOpenMPTeamsDirective(D.getDirectiveKind()) && + "expected teams directive."); + for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { + for (const Expr *E : C->privates()) + Vars.push_back(getPrivateItem(E)); } } @@ -1834,13 +2004,22 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( SourceLocation Loc = D.getBeginLoc(); const RecordDecl *GlobalizedRD = nullptr; - llvm::SmallVector<const ValueDecl *, 4> LastPrivates; + llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions; llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields; + // Globalize team reductions variable unconditionally in all modes. + getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions); if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { - getDistributeLastprivateVars(D, LastPrivates); - if (!LastPrivates.empty()) - GlobalizedRD = buildRecordForGlobalizedVars( - CGM.getContext(), LastPrivates, MappedDeclsFields); + getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions); + if (!LastPrivatesReductions.empty()) { + GlobalizedRD = ::buildRecordForGlobalizedVars( + CGM.getContext(), llvm::None, LastPrivatesReductions, + MappedDeclsFields); + } + } else if (!LastPrivatesReductions.empty()) { + assert(!TeamAndReductions.first && + "Previous team declaration is not expected."); + TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl(); + std::swap(TeamAndReductions.second, LastPrivatesReductions); } // Emit target region as a standalone region. @@ -1869,9 +2048,9 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( for (const auto &Pair : MappedDeclsFields) { assert(Pair.getFirst()->isCanonicalDecl() && "Expected canonical declaration"); - Data.insert(std::make_pair( - Pair.getFirst(), - std::make_pair(Pair.getSecond(), Address::invalid()))); + Data.insert(std::make_pair(Pair.getFirst(), + MappedVarData(Pair.getSecond(), + /*IsOnePerTeam=*/true))); } } Rt.emitGenericVarsProlog(CGF, Loc); @@ -1905,74 +2084,184 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, if (I == FunctionGlobalizedDecls.end()) return; if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) { - QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord); + QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord); + QualType SecGlobalRecTy; // Recover pointer to this function's global record. The runtime will // handle the specifics of the allocation of the memory. // Use actual memory size of the record including the padding // for alignment purposes. unsigned Alignment = - CGM.getContext().getTypeAlignInChars(RecTy).getQuantity(); + CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity(); unsigned GlobalRecordSize = - CGM.getContext().getTypeSizeInChars(RecTy).getQuantity(); + CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity(); GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); + llvm::PointerType *GlobalRecPtrTy = + CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo(); llvm::Value *GlobalRecCastAddr; - if (WithSPMDCheck || - getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) { + llvm::Value *IsTTD = nullptr; + if (!IsInTTDRegion && + (WithSPMDCheck || + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd"); llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd"); + if (I->getSecond().SecondaryGlobalRecord.hasValue()) { + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *PL = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + {RTLoc, ThreadID}); + IsTTD = Bld.CreateIsNull(PL); + } llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(SPMDBB); - Address RecPtr = CGF.CreateMemTemp(RecTy, "_local_stack"); + Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy), + CharUnits::fromQuantity(Alignment)); CGF.EmitBranch(ExitBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(NonSPMDBB); + llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize); + if (const RecordDecl *SecGlobalizedVarsRecord = + I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) { + SecGlobalRecTy = + CGM.getContext().getRecordType(SecGlobalizedVarsRecord); + + // Recover pointer to this function's global record. The runtime will + // handle the specifics of the allocation of the memory. + // Use actual memory size of the record including the padding + // for alignment purposes. + unsigned Alignment = + CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity(); + unsigned GlobalRecordSize = + CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity(); + GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); + Size = Bld.CreateSelect( + IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size); + } // TODO: allow the usage of shared memory to be controlled by // the user, for now, default to global. llvm::Value *GlobalRecordSizeArg[] = { - llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), - CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; - llvm::Value *GlobalRecValue = - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_push_stack), - GlobalRecordSizeArg); + Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; + llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( - GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo()); + GlobalRecValue, GlobalRecPtrTy); CGF.EmitBlock(ExitBB); - auto *Phi = Bld.CreatePHI(GlobalRecCastAddr->getType(), + auto *Phi = Bld.CreatePHI(GlobalRecPtrTy, /*NumReservedValues=*/2, "_select_stack"); Phi->addIncoming(RecPtr.getPointer(), SPMDBB); Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB); GlobalRecCastAddr = Phi; I->getSecond().GlobalRecordAddr = Phi; I->getSecond().IsInSPMDModeFlag = IsSPMD; + } else if (IsInTTDRegion) { + assert(GlobalizedRecords.back().Records.size() < 2 && + "Expected less than 2 globalized records: one for target and one " + "for teams."); + unsigned Offset = 0; + for (const RecordDecl *RD : GlobalizedRecords.back().Records) { + QualType RDTy = CGM.getContext().getRecordType(RD); + unsigned Alignment = + CGM.getContext().getTypeAlignInChars(RDTy).getQuantity(); + unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity(); + Offset = + llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment); + } + unsigned Alignment = + CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity(); + Offset = llvm::alignTo(Offset, Alignment); + GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord); + ++GlobalizedRecords.back().RegionCounter; + if (GlobalizedRecords.back().Records.size() == 1) { + assert(KernelStaticGlobalized && + "Kernel static pointer must be initialized already."); + auto *UseSharedMemory = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, nullptr, + "_openmp_static_kernel$is_shared"); + UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth( + /*DestWidth=*/16, /*Signed=*/0); + llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar( + Address(UseSharedMemory, + CGM.getContext().getTypeAlignInChars(Int16Ty)), + /*Volatile=*/false, Int16Ty, Loc); + auto *StaticGlobalized = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false, + llvm::GlobalValue::CommonLinkage, nullptr); + auto *RecSize = new llvm::GlobalVariable( + CGM.getModule(), CGM.SizeTy, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, nullptr, + "_openmp_static_kernel$size"); + RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + llvm::Value *Ld = CGF.EmitLoadOfScalar( + Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false, + CGM.getContext().getSizeType(), Loc); + llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( + KernelStaticGlobalized, CGM.VoidPtrPtrTy); + llvm::Value *GlobalRecordSizeArg[] = {StaticGlobalized, Ld, + IsInSharedMemory, ResAddr}; + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_get_team_static_memory), + GlobalRecordSizeArg); + GlobalizedRecords.back().Buffer = StaticGlobalized; + GlobalizedRecords.back().RecSize = RecSize; + GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; + GlobalizedRecords.back().Loc = Loc; + } + assert(KernelStaticGlobalized && "Global address must be set already."); + Address FrameAddr = CGF.EmitLoadOfPointer( + Address(KernelStaticGlobalized, CGM.getPointerAlign()), + CGM.getContext() + .getPointerType(CGM.getContext().VoidPtrTy) + .castAs<PointerType>()); + llvm::Value *GlobalRecValue = + Bld.CreateConstInBoundsGEP(FrameAddr, Offset, CharUnits::One()) + .getPointer(); + I->getSecond().GlobalRecordAddr = GlobalRecValue; + I->getSecond().IsInSPMDModeFlag = nullptr; + GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( + GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo()); } else { // TODO: allow the usage of shared memory to be controlled by // the user, for now, default to global. llvm::Value *GlobalRecordSizeArg[] = { llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; - llvm::Value *GlobalRecValue = - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_push_stack), - GlobalRecordSizeArg); + llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( - GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo()); + GlobalRecValue, GlobalRecPtrTy); I->getSecond().GlobalRecordAddr = GlobalRecValue; I->getSecond().IsInSPMDModeFlag = nullptr; } LValue Base = - CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy); + CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy); // Emit the "global alloca" which is a GEP from the global declaration // record using the pointer returned by the runtime. + LValue SecBase; + decltype(I->getSecond().LocalVarData)::const_iterator SecIt; + if (IsTTD) { + SecIt = I->getSecond().SecondaryLocalVarData->begin(); + llvm::PointerType *SecGlobalRecPtrTy = + CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo(); + SecBase = CGF.MakeNaturalAlignPointeeAddrLValue( + Bld.CreatePointerBitCastOrAddrSpaceCast( + I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy), + SecGlobalRecTy); + } for (auto &Rec : I->getSecond().LocalVarData) { bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first); llvm::Value *ParValue; @@ -1982,14 +2271,51 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc); } - const FieldDecl *FD = Rec.second.first; - LValue VarAddr = CGF.EmitLValueForField(Base, FD); - Rec.second.second = VarAddr.getAddress(); + LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD); + // Emit VarAddr basing on lane-id if required. + QualType VarTy; + if (Rec.second.IsOnePerTeam) { + VarTy = Rec.second.FD->getType(); + } else { + llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP( + VarAddr.getAddress().getPointer(), + {Bld.getInt32(0), getNVPTXLaneID(CGF)}); + VarTy = + Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType(); + VarAddr = CGF.MakeAddrLValue( + Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy, + AlignmentSource::Decl); + } + Rec.second.PrivateAddr = VarAddr.getAddress(); + if (!IsInTTDRegion && + (WithSPMDCheck || + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { + assert(I->getSecond().IsInSPMDModeFlag && + "Expected unknown execution mode or required SPMD check."); + if (IsTTD) { + assert(SecIt->second.IsOnePerTeam && + "Secondary glob data must be one per team."); + LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD); + VarAddr.setAddress( + Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(), + VarAddr.getPointer()), + VarAddr.getAlignment())); + Rec.second.PrivateAddr = VarAddr.getAddress(); + } + Address GlobalPtr = Rec.second.PrivateAddr; + Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName()); + Rec.second.PrivateAddr = Address( + Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag, + LocalAddr.getPointer(), GlobalPtr.getPointer()), + LocalAddr.getAlignment()); + } if (EscapedParam) { const auto *VD = cast<VarDecl>(Rec.first); CGF.EmitStoreOfScalar(ParValue, VarAddr); I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress()); } + if (IsTTD) + ++SecIt; } } for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) { @@ -2011,7 +2337,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); @@ -2043,8 +2370,9 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF, Addr); } if (I->getSecond().GlobalRecordAddr) { - if (WithSPMDCheck || - getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) { + if (!IsInTTDRegion && + (WithSPMDCheck || + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { CGBuilderTy &Bld = CGF.Builder; llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd"); @@ -2057,6 +2385,23 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF, OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); CGF.EmitBlock(ExitBB); + } else if (IsInTTDRegion) { + assert(GlobalizedRecords.back().RegionCounter > 0 && + "region counter must be > 0."); + --GlobalizedRecords.back().RegionCounter; + // Emit the restore function only in the target region. + if (GlobalizedRecords.back().RegionCounter == 0) { + QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth( + /*DestWidth=*/16, /*Signed=*/0); + llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar( + Address(GlobalizedRecords.back().UseSharedMemory, + CGM.getContext().getTypeAlignInChars(Int16Ty)), + /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_restore_team_static_memory), + IsInSharedMemory); + } } else { CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), @@ -2155,7 +2500,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( // passed from the outside of the target region. CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF); - // There's somehting to share. + // There's something to share. if (!CapturedVars.empty()) { // Prepare for parallel region. Indicate the outlined function. Address SharedArgs = @@ -2209,30 +2554,24 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( Work.emplace_back(WFn); }; - auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen, - &ThreadIDAddr](CodeGenFunction &CGF, - PrePostActionTy &Action) { - RegionCodeGenTy RCG(CodeGen); + auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen]( + CodeGenFunction &CGF, PrePostActionTy &Action) { if (IsInParallelRegion) { SeqGen(CGF, Action); } else if (IsInTargetMasterThreadRegion) { L0ParallelGen(CGF, Action); - } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) { - RCG(CGF); } else { // Check for master and then parallelism: // if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) { - // Serialized execution. - // } else if (master) { - // Worker call. + // Serialized execution. // } else { - // Outlined function call. + // Worker call. // } CGBuilderTy &Bld = CGF.Builder; llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); - llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck"); + llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); @@ -2245,29 +2584,17 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), {RTLoc, ThreadID}); llvm::Value *Res = Bld.CreateIsNotNull(PL); - Bld.CreateCondBr(Res, SeqBB, MasterCheckBB); + Bld.CreateCondBr(Res, SeqBB, MasterBB); CGF.EmitBlock(SeqBB); SeqGen(CGF, Action); CGF.EmitBranch(ExitBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(MasterCheckBB); - llvm::BasicBlock *MasterThenBB = CGF.createBasicBlock("master.then"); - llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); - llvm::Value *IsMaster = - Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); - Bld.CreateCondBr(IsMaster, MasterThenBB, ElseBlock); - CGF.EmitBlock(MasterThenBB); + CGF.EmitBlock(MasterBB); L0ParallelGen(CGF, Action); CGF.EmitBranch(ExitBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(ElseBlock); - // In the worker need to use the real thread id. - ThreadIDAddr = emitThreadIDAddress(CGF, Loc); - RCG(CGF); - // There is no need to emit line number for unconditional branch. - (void)ApplyDebugLocation::CreateEmpty(CGF); // Emit the continuation block for code after the if. CGF.EmitBlock(ExitBB, /*IsFinished=*/true); } @@ -2338,6 +2665,20 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( } } +void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPDirectiveKind Kind, bool, + bool) { + // Always emit simple barriers! + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_cancel_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), + getThreadID(CGF, Loc)}; + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); +} + void CGOpenMPRuntimeNVPTX::emitCriticalRegion( CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, @@ -2380,14 +2721,16 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion( CGF.EmitBlock(BodyBB); // Output the critical statement. - CriticalOpGen(CGF); + CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc, + Hint); // After the body surrounded by the critical region, the single executing // thread will jump to the synchronisation point. // Block waits for all threads in current team to finish then increments the // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); - getNVPTXCTABarrier(CGF); + emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); @@ -2509,11 +2852,12 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc), IntType, Offset, Loc); CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType); - Ptr = Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize)); - ElemPtr = + Address LocalPtr = + Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize)); + Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize)); - PhiSrc->addIncoming(Ptr.getPointer(), ThenBB); - PhiDest->addIncoming(ElemPtr.getPointer(), ThenBB); + PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB); + PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB); CGF.EmitBranch(PreCondBB); CGF.EmitBlock(ExitBB); } else { @@ -2739,223 +3083,6 @@ static void emitReductionListCopy( } } -/// This function emits a helper that loads data from the scratchpad array -/// and (optionally) reduces it with the input operand. -/// -/// load_and_reduce(local, scratchpad, index, width, should_reduce) -/// reduce_data remote; -/// for elem in remote: -/// remote.elem = Scratchpad[elem_id][index] -/// if (should_reduce) -/// local = local @ remote -/// else -/// local = remote -static llvm::Value *emitReduceScratchpadFunction( - CodeGenModule &CGM, ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) { - ASTContext &C = CGM.getContext(); - QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1); - - // Destination of the copy. - ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); - // Base address of the scratchpad array, with each element storing a - // Reduce list per team. - ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); - // A source index into the scratchpad array. - ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, - ImplicitParamDecl::Other); - // Row width of an element in the scratchpad array, typically - // the number of teams. - ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, - ImplicitParamDecl::Other); - // If should_reduce == 1, then it's load AND reduce, - // If should_reduce == 0 (or otherwise), then it only loads (+ copy). - // The latter case is used for initialization. - ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - Int32Ty, ImplicitParamDecl::Other); - - FunctionArgList Args; - Args.push_back(&ReduceListArg); - Args.push_back(&ScratchPadArg); - Args.push_back(&IndexArg); - Args.push_back(&WidthArg); - Args.push_back(&ShouldReduceArg); - - const CGFunctionInfo &CGFI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *Fn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - "_omp_reduction_load_and_reduce", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); - Fn->setDoesNotRecurse(); - CodeGenFunction CGF(CGM); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - - CGBuilderTy &Bld = CGF.Builder; - - // Get local Reduce list pointer. - Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); - Address ReduceListAddr( - Bld.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, - C.VoidPtrTy, Loc), - CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), - CGF.getPointerAlign()); - - Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg); - llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar( - AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc); - - Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg); - llvm::Value *IndexVal = Bld.CreateIntCast( - CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc), - CGM.SizeTy, /*isSigned=*/true); - - Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg); - llvm::Value *WidthVal = Bld.CreateIntCast( - CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, Int32Ty, Loc), - CGM.SizeTy, /*isSigned=*/true); - - Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg); - llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar( - AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, Loc); - - // The absolute ptr address to the base addr of the next element to copy. - llvm::Value *CumulativeElemBasePtr = - Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy); - Address SrcDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign()); - - // Create a Remote Reduce list to store the elements read from the - // scratchpad array. - Address RemoteReduceList = - CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_red_list"); - - // Assemble remote Reduce list from scratchpad array. - emitReductionListCopy(ScratchpadToThread, CGF, ReductionArrayTy, Privates, - SrcDataAddr, RemoteReduceList, - {/*RemoteLaneOffset=*/nullptr, - /*ScratchpadIndex=*/IndexVal, - /*ScratchpadWidth=*/WidthVal}); - - llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); - llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); - llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); - - llvm::Value *CondReduce = Bld.CreateIsNotNull(ShouldReduceVal); - Bld.CreateCondBr(CondReduce, ThenBB, ElseBB); - - CGF.EmitBlock(ThenBB); - // We should reduce with the local Reduce list. - // reduce_function(LocalReduceList, RemoteReduceList) - llvm::Value *LocalDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( - ReduceListAddr.getPointer(), CGF.VoidPtrTy); - llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( - RemoteReduceList.getPointer(), CGF.VoidPtrTy); - CGM.getOpenMPRuntime().emitOutlinedFunctionCall( - CGF, Loc, ReduceFn, {LocalDataPtr, RemoteDataPtr}); - Bld.CreateBr(MergeBB); - - CGF.EmitBlock(ElseBB); - // No reduction; just copy: - // Local Reduce list = Remote Reduce list. - emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates, - RemoteReduceList, ReduceListAddr); - Bld.CreateBr(MergeBB); - - CGF.EmitBlock(MergeBB); - - CGF.FinishFunction(); - return Fn; -} - -/// This function emits a helper that stores reduced data from the team -/// master to a scratchpad array in global memory. -/// -/// for elem in Reduce List: -/// scratchpad[elem_id][index] = elem -/// -static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, - ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy, - SourceLocation Loc) { - - ASTContext &C = CGM.getContext(); - QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1); - - // Source of the copy. - ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); - // Base address of the scratchpad array, with each element storing a - // Reduce list per team. - ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); - // A destination index into the scratchpad array, typically the team - // identifier. - ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, - ImplicitParamDecl::Other); - // Row width of an element in the scratchpad array, typically - // the number of teams. - ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, - ImplicitParamDecl::Other); - - FunctionArgList Args; - Args.push_back(&ReduceListArg); - Args.push_back(&ScratchPadArg); - Args.push_back(&IndexArg); - Args.push_back(&WidthArg); - - const CGFunctionInfo &CGFI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *Fn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - "_omp_reduction_copy_to_scratchpad", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); - Fn->setDoesNotRecurse(); - CodeGenFunction CGF(CGM); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - - CGBuilderTy &Bld = CGF.Builder; - - Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); - Address SrcDataAddr( - Bld.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, - C.VoidPtrTy, Loc), - CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), - CGF.getPointerAlign()); - - Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg); - llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar( - AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc); - - Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg); - llvm::Value *IndexVal = Bld.CreateIntCast( - CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc), - CGF.SizeTy, /*isSigned=*/true); - - Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg); - llvm::Value *WidthVal = - Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, - Int32Ty, SourceLocation()), - CGF.SizeTy, /*isSigned=*/true); - - // The absolute ptr address to the base addr of the next element to copy. - llvm::Value *CumulativeElemBasePtr = - Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy); - Address DestDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign()); - - emitReductionListCopy(ThreadToScratchpad, CGF, ReductionArrayTy, Privates, - SrcDataAddr, DestDataAddr, - {/*RemoteLaneOffset=*/nullptr, - /*ScratchpadIndex=*/IndexVal, - /*ScratchpadWidth=*/WidthVal}); - - CGF.FinishFunction(); - return Fn; -} - /// This function emits a helper that gathers Reduce lists from the first /// lane of every active warp to lanes in the first warp. /// @@ -3013,11 +3140,10 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, llvm::GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName); if (!TransferMedium) { - auto *Ty = llvm::ArrayType::get(CGM.Int64Ty, WarpSize); + auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize); unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared); TransferMedium = new llvm::GlobalVariable( - M, Ty, - /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage, + M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage, llvm::Constant::getNullValue(Ty), TransferMediumName, /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, SharedAddressSpace); @@ -3035,7 +3161,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, Address LocalReduceList( Bld.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, - C.VoidPtrTy, SourceLocation()), + C.VoidPtrTy, Loc), CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), CGF.getPointerAlign()); @@ -3045,121 +3171,153 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, // Warp master copies reduce element to transfer medium in __shared__ // memory. // - llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); - llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); - llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); - - // if (lane_id == 0) - llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master"); - Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB); - CGF.EmitBlock(ThenBB); - - // Reduce element = LocalReduceList[i] - Address ElemPtrPtrAddr = - Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); - llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( - ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - // elemptr = (type[i]*)(elemptrptr) - Address ElemPtr = - Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); - ElemPtr = Bld.CreateElementBitCast( - ElemPtr, CGF.ConvertTypeForMem(Private->getType())); - - // Get pointer to location in transfer medium. - // MediumPtr = &medium[warp_id] - llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP( - TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID}); - Address MediumPtr(MediumPtrVal, C.getTypeAlignInChars(Private->getType())); - // Casting to actual data type. - // MediumPtr = (type[i]*)MediumPtrAddr; - MediumPtr = Bld.CreateElementBitCast( - MediumPtr, CGF.ConvertTypeForMem(Private->getType())); - - // elem = *elemptr - //*MediumPtr = elem - if (Private->getType()->isScalarType()) { - llvm::Value *Elem = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, - Private->getType(), Loc); - // Store the source element value to the dest element address. - CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/false, - Private->getType()); - } else { - CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()), - CGF.MakeAddrLValue(MediumPtr, Private->getType()), - Private->getType(), AggValueSlot::DoesNotOverlap); - } - - Bld.CreateBr(MergeBB); - - CGF.EmitBlock(ElseBB); - Bld.CreateBr(MergeBB); + unsigned RealTySize = + C.getTypeSizeInChars(Private->getType()) + .alignTo(C.getTypeAlignInChars(Private->getType())) + .getQuantity(); + for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) { + unsigned NumIters = RealTySize / TySize; + if (NumIters == 0) + continue; + QualType CType = C.getIntTypeForBitwidth( + C.toBits(CharUnits::fromQuantity(TySize)), /*Signed=*/1); + llvm::Type *CopyType = CGF.ConvertTypeForMem(CType); + CharUnits Align = CharUnits::fromQuantity(TySize); + llvm::Value *Cnt = nullptr; + Address CntAddr = Address::invalid(); + llvm::BasicBlock *PrecondBB = nullptr; + llvm::BasicBlock *ExitBB = nullptr; + if (NumIters > 1) { + CntAddr = CGF.CreateMemTemp(C.IntTy, ".cnt.addr"); + CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.IntTy), CntAddr, + /*Volatile=*/false, C.IntTy); + PrecondBB = CGF.createBasicBlock("precond"); + ExitBB = CGF.createBasicBlock("exit"); + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("body"); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(PrecondBB); + Cnt = CGF.EmitLoadOfScalar(CntAddr, /*Volatile=*/false, C.IntTy, Loc); + llvm::Value *Cmp = + Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.IntTy, NumIters)); + Bld.CreateCondBr(Cmp, BodyBB, ExitBB); + CGF.EmitBlock(BodyBB); + } + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); - CGF.EmitBlock(MergeBB); + // if (lane_id == 0) + llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master"); + Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB); + CGF.EmitBlock(ThenBB); - Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg); - llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar( - AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation()); + // Reduce element = LocalReduceList[i] + Address ElemPtrPtrAddr = + Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( + ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + // elemptr = ((CopyType*)(elemptrptr)) + I + Address ElemPtr = Address(ElemPtrPtr, Align); + ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType); + if (NumIters > 1) { + ElemPtr = Address(Bld.CreateGEP(ElemPtr.getPointer(), Cnt), + ElemPtr.getAlignment()); + } - llvm::Value *NumActiveThreads = Bld.CreateNSWMul( - NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads"); - // named_barrier_sync(ParallelBarrierID, num_active_threads) - syncParallelThreads(CGF, NumActiveThreads); + // Get pointer to location in transfer medium. + // MediumPtr = &medium[warp_id] + llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP( + TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID}); + Address MediumPtr(MediumPtrVal, Align); + // Casting to actual data type. + // MediumPtr = (CopyType*)MediumPtrAddr; + MediumPtr = Bld.CreateElementBitCast(MediumPtr, CopyType); + + // elem = *elemptr + //*MediumPtr = elem + llvm::Value *Elem = + CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc); + // Store the source element value to the dest element address. + CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType); + + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(ElseBB); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(MergeBB); + + // kmpc_barrier. + CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown, + /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + + // + // Warp 0 copies reduce element from transfer medium. + // + llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont"); + + Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg); + llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar( + AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, Loc); + + // Up to 32 threads in warp 0 are active. + llvm::Value *IsActiveThread = + Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread"); + Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB); + + CGF.EmitBlock(W0ThenBB); + + // SrcMediumPtr = &medium[tid] + llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP( + TransferMedium, + {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID}); + Address SrcMediumPtr(SrcMediumPtrVal, Align); + // SrcMediumVal = *SrcMediumPtr; + SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType); + + // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I + Address TargetElemPtrPtr = + Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar( + TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc); + Address TargetElemPtr = Address(TargetElemPtrVal, Align); + TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType); + if (NumIters > 1) { + TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getPointer(), Cnt), + TargetElemPtr.getAlignment()); + } - // - // Warp 0 copies reduce element from transfer medium. - // - llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then"); - llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else"); - llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont"); - - // Up to 32 threads in warp 0 are active. - llvm::Value *IsActiveThread = - Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread"); - Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB); - - CGF.EmitBlock(W0ThenBB); - - // SrcMediumPtr = &medium[tid] - llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP( - TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID}); - Address SrcMediumPtr(SrcMediumPtrVal, - C.getTypeAlignInChars(Private->getType())); - // SrcMediumVal = *SrcMediumPtr; - SrcMediumPtr = Bld.CreateElementBitCast( - SrcMediumPtr, CGF.ConvertTypeForMem(Private->getType())); - - // TargetElemPtr = (type[i]*)(SrcDataAddr[i]) - Address TargetElemPtrPtr = - Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); - llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar( - TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - Address TargetElemPtr = - Address(TargetElemPtrVal, C.getTypeAlignInChars(Private->getType())); - TargetElemPtr = Bld.CreateElementBitCast( - TargetElemPtr, CGF.ConvertTypeForMem(Private->getType())); - - // *TargetElemPtr = SrcMediumVal; - if (Private->getType()->isScalarType()) { - llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar( - SrcMediumPtr, /*Volatile=*/false, Private->getType(), Loc); + // *TargetElemPtr = SrcMediumVal; + llvm::Value *SrcMediumValue = + CGF.EmitLoadOfScalar(SrcMediumPtr, /*Volatile=*/true, CType, Loc); CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false, - Private->getType()); - } else { - CGF.EmitAggregateCopy( - CGF.MakeAddrLValue(SrcMediumPtr, Private->getType()), - CGF.MakeAddrLValue(TargetElemPtr, Private->getType()), - Private->getType(), AggValueSlot::DoesNotOverlap); + CType); + Bld.CreateBr(W0MergeBB); + + CGF.EmitBlock(W0ElseBB); + Bld.CreateBr(W0MergeBB); + + CGF.EmitBlock(W0MergeBB); + + // While warp 0 copies values from transfer medium, all other warps must + // wait. + // kmpc_barrier. + CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown, + /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + if (NumIters > 1) { + Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, /*V=*/1)); + CGF.EmitStoreOfScalar(Cnt, CntAddr, /*Volatile=*/false, C.IntTy); + CGF.EmitBranch(PrecondBB); + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ExitBB); + } + RealTySize %= TySize; } - Bld.CreateBr(W0MergeBB); - - CGF.EmitBlock(W0ElseBB); - Bld.CreateBr(W0MergeBB); - - CGF.EmitBlock(W0MergeBB); - - // While warp 0 copies values from transfer medium, all other warps must - // wait. - syncParallelThreads(CGF, NumActiveThreads); ++Idx; } @@ -3633,125 +3791,115 @@ void CGOpenMPRuntimeNVPTX::emitReduction( return; bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); +#ifndef NDEBUG bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); - bool SimdReduction = isOpenMPSimdDirective(Options.ReductionKind); - assert((TeamsReduction || ParallelReduction || SimdReduction) && - "Invalid reduction selection in emitReduction."); +#endif if (Options.SimpleReduction) { + assert(!TeamsReduction && !ParallelReduction && + "Invalid reduction selection in emitReduction."); CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, ReductionOps, Options); return; } - ASTContext &C = CGM.getContext(); - - // 1. Build a list of reduction variables. - // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; - auto Size = RHSExprs.size(); - for (const Expr *E : Privates) { - if (E->getType()->isVariablyModifiedType()) - // Reserve place for array size. - ++Size; - } - llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); - QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, - /*IndexTypeQuals=*/0); - Address ReductionList = - CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); - auto IPriv = Privates.begin(); - unsigned Idx = 0; - for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { - Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, - CGF.getPointerSize()); - CGF.Builder.CreateStore( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), - Elem); - if ((*IPriv)->getType()->isVariablyModifiedType()) { - // Store array size. - ++Idx; - Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, - CGF.getPointerSize()); - llvm::Value *Size = CGF.Builder.CreateIntCast( - CGF.getVLASize( - CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) - .NumElts, - CGF.SizeTy, /*isSigned=*/false); - CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), - Elem); - } - } - - // 2. Emit reduce_func(). - llvm::Value *ReductionFn = emitReductionFunction( - CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), - Privates, LHSExprs, RHSExprs, ReductionOps); + assert((TeamsReduction || ParallelReduction) && + "Invalid reduction selection in emitReduction."); - // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList), + // Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList), // RedList, shuffle_reduce_func, interwarp_copy_func); + // or + // Build res = __kmpc_reduce_teams_nowait_simple(<loc>, <gtid>, <lck>); llvm::Value *ThreadId = getThreadID(CGF, Loc); - llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); - llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - ReductionList.getPointer(), CGF.VoidPtrTy); - - llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction( - CGM, Privates, ReductionArrayTy, ReductionFn, Loc); - llvm::Value *InterWarpCopyFn = - emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); - - llvm::Value *Args[] = {ThreadId, - CGF.Builder.getInt32(RHSExprs.size()), - ReductionArrayTySize, - RL, - ShuffleAndReduceFn, - InterWarpCopyFn}; - - llvm::Value *Res = nullptr; - if (ParallelReduction) - Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait), - Args); - else if (SimdReduction) - Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_simd_reduce_nowait), - Args); - if (TeamsReduction) { - llvm::Value *ScratchPadCopyFn = - emitCopyToScratchpad(CGM, Privates, ReductionArrayTy, Loc); - llvm::Value *LoadAndReduceFn = emitReduceScratchpadFunction( + llvm::Value *Res; + if (ParallelReduction) { + ASTContext &C = CGM.getContext(); + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + auto Size = RHSExprs.size(); + for (const Expr *E : Privates) { + if (E->getType()->isVariablyModifiedType()) + // Reserve place for array size. + ++Size; + } + llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); + QualType ReductionArrayTy = + C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + Address ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + auto IPriv = Privates.begin(); + unsigned Idx = 0; + for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, + CGF.getPointerSize()); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + Elem); + if ((*IPriv)->getType()->isVariablyModifiedType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, + CGF.getPointerSize()); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .NumElts, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); + } + } + + llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + ReductionList.getPointer(), CGF.VoidPtrTy); + llvm::Value *ReductionFn = emitReductionFunction( + CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), + Privates, LHSExprs, RHSExprs, ReductionOps); + llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction( CGM, Privates, ReductionArrayTy, ReductionFn, Loc); + llvm::Value *InterWarpCopyFn = + emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); llvm::Value *Args[] = {ThreadId, CGF.Builder.getInt32(RHSExprs.size()), ReductionArrayTySize, RL, ShuffleAndReduceFn, - InterWarpCopyFn, - ScratchPadCopyFn, - LoadAndReduceFn}; + InterWarpCopyFn}; + + Res = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait), + Args); + } else { + assert(TeamsReduction && "expected teams reduction."); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + std::string Name = getName({"reduction"}); + llvm::Value *Lock = getCriticalRegionLock(Name); + llvm::Value *Args[] = {RTLoc, ThreadId, Lock}; Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_teams_reduce_nowait), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple), Args); } - // 5. Build switch(res) - llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); - llvm::SwitchInst *SwInst = - CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1); + // 5. Build if (res == 1) + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.reduction.done"); + llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.then"); + llvm::Value *Cond = CGF.Builder.CreateICmpEQ( + Res, llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1)); + CGF.Builder.CreateCondBr(Cond, ThenBB, ExitBB); - // 6. Build case 1: where we have reduced values in the master + // 6. Build then branch: where we have reduced values in the master // thread in each team. // __kmpc_end_reduce{_nowait}(<gtid>); // break; - llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); - SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); - CGF.EmitBlock(Case1BB); + CGF.EmitBlock(ThenBB); // Add emission of __kmpc_end_reduce{_nowait}(<gtid>); - llvm::Value *EndArgs[] = {ThreadId}; auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps, this](CodeGenFunction &CGF, PrePostActionTy &Action) { auto IPriv = Privates.begin(); @@ -3765,15 +3913,33 @@ void CGOpenMPRuntimeNVPTX::emitReduction( ++IRHS; } }; - RegionCodeGenTy RCG(CodeGen); - NVPTXActionTy Action( - nullptr, llvm::None, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), - EndArgs); - RCG.setAction(Action); - RCG(CGF); - CGF.EmitBranch(DefaultBB); - CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); + if (ParallelReduction) { + llvm::Value *EndArgs[] = {ThreadId}; + RegionCodeGenTy RCG(CodeGen); + NVPTXActionTy Action( + nullptr, llvm::None, + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), + EndArgs); + RCG.setAction(Action); + RCG(CGF); + } else { + assert(TeamsReduction && "expected teams reduction."); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + std::string Name = getName({"reduction"}); + llvm::Value *Lock = getCriticalRegionLock(Name); + llvm::Value *EndArgs[] = {RTLoc, ThreadId, Lock}; + RegionCodeGenTy RCG(CodeGen); + NVPTXActionTy Action( + nullptr, llvm::None, + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple), + EndArgs); + RCG.setAction(Action); + RCG(CGF); + } + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); } const VarDecl * @@ -4000,6 +4166,8 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, assert(D && "Expected function or captured|block decl."); assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 && "Function is registered already."); + assert((!TeamAndReductions.first || TeamAndReductions.first == D) && + "Team is set but not processed."); const Stmt *Body = nullptr; bool NeedToDelayGlobalization = false; if (const auto *FD = dyn_cast<FunctionDecl>(D)) { @@ -4015,9 +4183,12 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, } if (!Body) return; - CheckVarsEscapingDeclContext VarChecker(CGF); + CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second); VarChecker.Visit(Body); - const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord(); + const RecordDecl *GlobalizedVarsRecord = + VarChecker.getGlobalizedRecord(IsInTTDRegion); + TeamAndReductions.first = nullptr; + TeamAndReductions.second.clear(); ArrayRef<const ValueDecl *> EscapedVariableLengthDecls = VarChecker.getEscapedVariableLengthDecls(); if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty()) @@ -4035,7 +4206,21 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { assert(VD->isCanonicalDecl() && "Expected canonical declaration"); const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); - Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid()))); + Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion))); + } + if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) { + CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None); + VarChecker.Visit(Body); + I->getSecond().SecondaryGlobalRecord = + VarChecker.getGlobalizedRecord(/*IsInTTDRegion=*/true); + I->getSecond().SecondaryLocalVarData.emplace(); + DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue(); + for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { + assert(VD->isCanonicalDecl() && "Expected canonical declaration"); + const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); + Data.insert( + std::make_pair(VD, MappedVarData(FD, /*IsInTTDRegion=*/true))); + } } if (!NeedToDelayGlobalization) { emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true); @@ -4062,7 +4247,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, return Address::invalid(); auto VDI = I->getSecond().LocalVarData.find(VD); if (VDI != I->getSecond().LocalVarData.end()) - return VDI->second.second; + return VDI->second.PrivateAddr; if (VD->hasAttrs()) { for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()), E(VD->attr_end()); @@ -4071,7 +4256,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl()) ->getCanonicalDecl()); if (VDI != I->getSecond().LocalVarData.end()) - return VDI->second.second; + return VDI->second.PrivateAddr; } } return Address::invalid(); @@ -4091,16 +4276,285 @@ void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk( Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF), CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), S.getIterationVariable()->getType(), S.getBeginLoc()); + return; } + CGOpenMPRuntime::getDefaultDistScheduleAndChunk( + CGF, S, ScheduleKind, Chunk); } void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk( CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, - llvm::Value *&Chunk) const { - if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { - ScheduleKind = OMPC_SCHEDULE_static; - Chunk = CGF.Builder.getIntN(CGF.getContext().getTypeSize( - S.getIterationVariable()->getType()), 1); + const Expr *&ChunkExpr) const { + ScheduleKind = OMPC_SCHEDULE_static; + // Chunk size is 1 in this case. + llvm::APInt ChunkSize(32, 1); + ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize, + CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); +} + +void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas( + CodeGenFunction &CGF, const OMPExecutableDirective &D) const { + assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && + " Expected target-based directive."); + const CapturedStmt *CS = D.getCapturedStmt(OMPD_target); + for (const CapturedStmt::Capture &C : CS->captures()) { + // Capture variables captured by reference in lambdas for target-based + // directives. + if (!C.capturesVariable()) + continue; + const VarDecl *VD = C.getCapturedVar(); + const auto *RD = VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl(); + if (!RD || !RD->isLambda()) + continue; + Address VDAddr = CGF.GetAddrOfLocalVar(VD); + LValue VDLVal; + if (VD->getType().getCanonicalType()->isReferenceType()) + VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType()); + else + VDLVal = CGF.MakeAddrLValue( + VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); + llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + FieldDecl *ThisCapture = nullptr; + RD->getCaptureFields(Captures, ThisCapture); + if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) { + LValue ThisLVal = + CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); + llvm::Value *CXXThis = CGF.LoadCXXThis(); + CGF.EmitStoreOfScalar(CXXThis, ThisLVal); + } + for (const LambdaCapture &LC : RD->captures()) { + if (LC.getCaptureKind() != LCK_ByRef) + continue; + const VarDecl *VD = LC.getCapturedVar(); + if (!CS->capturesVariable(VD)) + continue; + auto It = Captures.find(VD); + assert(It != Captures.end() && "Found lambda capture without field."); + LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); + Address VDAddr = CGF.GetAddrOfLocalVar(VD); + if (VD->getType().getCanonicalType()->isReferenceType()) + VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr, + VD->getType().getCanonicalType()) + .getAddress(); + CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal); + } + } +} + +// Get current CudaArch and ignore any unknown values +static CudaArch getCudaArch(CodeGenModule &CGM) { + if (!CGM.getTarget().hasFeature("ptx")) + return CudaArch::UNKNOWN; + llvm::StringMap<bool> Features; + CGM.getTarget().initFeatureMap(Features, CGM.getDiags(), + CGM.getTarget().getTargetOpts().CPU, + CGM.getTarget().getTargetOpts().Features); + for (const auto &Feature : Features) { + if (Feature.getValue()) { + CudaArch Arch = StringToCudaArch(Feature.getKey()); + if (Arch != CudaArch::UNKNOWN) + return Arch; + } + } + return CudaArch::UNKNOWN; +} + +/// Check to see if target architecture supports unified addressing which is +/// a restriction for OpenMP requires clause "unified_shared_memory". +void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( + CodeGenModule &CGM, const OMPRequiresDecl *D) const { + for (const OMPClause *Clause : D->clauselists()) { + if (Clause->getClauseKind() == OMPC_unified_shared_memory) { + switch (getCudaArch(CGM)) { + case CudaArch::SM_20: + case CudaArch::SM_21: + case CudaArch::SM_30: + case CudaArch::SM_32: + case CudaArch::SM_35: + case CudaArch::SM_37: + case CudaArch::SM_50: + case CudaArch::SM_52: + case CudaArch::SM_53: + case CudaArch::SM_60: + case CudaArch::SM_61: + case CudaArch::SM_62: + CGM.Error(Clause->getBeginLoc(), + "Target architecture does not support unified addressing"); + return; + case CudaArch::SM_70: + case CudaArch::SM_72: + case CudaArch::SM_75: + case CudaArch::GFX600: + case CudaArch::GFX601: + case CudaArch::GFX700: + case CudaArch::GFX701: + case CudaArch::GFX702: + case CudaArch::GFX703: + case CudaArch::GFX704: + case CudaArch::GFX801: + case CudaArch::GFX802: + case CudaArch::GFX803: + case CudaArch::GFX810: + case CudaArch::GFX900: + case CudaArch::GFX902: + case CudaArch::GFX904: + case CudaArch::GFX906: + case CudaArch::GFX909: + case CudaArch::UNKNOWN: + break; + case CudaArch::LAST: + llvm_unreachable("Unexpected Cuda arch."); + } + } + } +} + +/// Get number of SMs and number of blocks per SM. +static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) { + std::pair<unsigned, unsigned> Data; + if (CGM.getLangOpts().OpenMPCUDANumSMs) + Data.first = CGM.getLangOpts().OpenMPCUDANumSMs; + if (CGM.getLangOpts().OpenMPCUDABlocksPerSM) + Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM; + if (Data.first && Data.second) + return Data; + switch (getCudaArch(CGM)) { + case CudaArch::SM_20: + case CudaArch::SM_21: + case CudaArch::SM_30: + case CudaArch::SM_32: + case CudaArch::SM_35: + case CudaArch::SM_37: + case CudaArch::SM_50: + case CudaArch::SM_52: + case CudaArch::SM_53: + return {16, 16}; + case CudaArch::SM_60: + case CudaArch::SM_61: + case CudaArch::SM_62: + return {56, 32}; + case CudaArch::SM_70: + case CudaArch::SM_72: + case CudaArch::SM_75: + return {84, 32}; + case CudaArch::GFX600: + case CudaArch::GFX601: + case CudaArch::GFX700: + case CudaArch::GFX701: + case CudaArch::GFX702: + case CudaArch::GFX703: + case CudaArch::GFX704: + case CudaArch::GFX801: + case CudaArch::GFX802: + case CudaArch::GFX803: + case CudaArch::GFX810: + case CudaArch::GFX900: + case CudaArch::GFX902: + case CudaArch::GFX904: + case CudaArch::GFX906: + case CudaArch::GFX909: + case CudaArch::UNKNOWN: + break; + case CudaArch::LAST: + llvm_unreachable("Unexpected Cuda arch."); + } + llvm_unreachable("Unexpected NVPTX target without ptx feature."); +} + +void CGOpenMPRuntimeNVPTX::clear() { + if (!GlobalizedRecords.empty()) { + ASTContext &C = CGM.getContext(); + llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs; + llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs; + RecordDecl *StaticRD = C.buildImplicitRecord( + "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union); + StaticRD->startDefinition(); + RecordDecl *SharedStaticRD = C.buildImplicitRecord( + "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union); + SharedStaticRD->startDefinition(); + for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) { + if (Records.Records.empty()) + continue; + unsigned Size = 0; + unsigned RecAlignment = 0; + for (const RecordDecl *RD : Records.Records) { + QualType RDTy = C.getRecordType(RD); + unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity(); + RecAlignment = std::max(RecAlignment, Alignment); + unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity(); + Size = + llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment); + } + Size = llvm::alignTo(Size, RecAlignment); + llvm::APInt ArySize(/*numBits=*/64, Size); + QualType SubTy = C.getConstantArrayType( + C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + const bool UseSharedMemory = Size <= SharedMemorySize; + auto *Field = + FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD, + SourceLocation(), SourceLocation(), nullptr, SubTy, + C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + if (UseSharedMemory) { + SharedStaticRD->addDecl(Field); + SharedRecs.push_back(&Records); + } else { + StaticRD->addDecl(Field); + GlobalRecs.push_back(&Records); + } + Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size)); + Records.UseSharedMemory->setInitializer( + llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0)); + } + SharedStaticRD->completeDefinition(); + if (!SharedStaticRD->field_empty()) { + QualType StaticTy = C.getRecordType(SharedStaticRD); + llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), LLVMStaticTy, + /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, + llvm::Constant::getNullValue(LLVMStaticTy), + "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr, + llvm::GlobalValue::NotThreadLocal, + C.getTargetAddressSpace(LangAS::cuda_shared)); + auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + GV, CGM.VoidPtrTy); + for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) { + Rec->Buffer->replaceAllUsesWith(Replacement); + Rec->Buffer->eraseFromParent(); + } + } + StaticRD->completeDefinition(); + if (!StaticRD->field_empty()) { + QualType StaticTy = C.getRecordType(StaticRD); + std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM); + llvm::APInt Size1(32, SMsBlockPerSM.second); + QualType Arr1Ty = + C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal, + /*IndexTypeQuals=*/0); + llvm::APInt Size2(32, SMsBlockPerSM.first); + QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal, + /*IndexTypeQuals=*/0); + llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), LLVMArr2Ty, + /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, + llvm::Constant::getNullValue(LLVMArr2Ty), + "_openmp_static_glob_rd_$_"); + auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + GV, CGM.VoidPtrTy); + for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) { + Rec->Buffer->replaceAllUsesWith(Replacement); + Rec->Buffer->eraseFromParent(); + } + } } + CGOpenMPRuntime::clear(); } diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index fc8cd2467b..8fb3b0a061 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -56,6 +56,8 @@ private: ExecutionMode getExecutionMode() const; + bool requiresFullRuntime() const { return RequiresFullRuntime; } + /// Emit the worker function for the current target region. void emitWorkerFunction(WorkerFunctionState &WST); @@ -180,8 +182,19 @@ protected: return "__omp_outlined__"; } + /// Check if the default location must be constant. + /// Constant for NVPTX for better optimization. + bool isDefaultLocationConstant() const override { return true; } + + /// Returns additional flags that can be stored in reserved_2 field of the + /// default location. + /// For NVPTX target contains data about SPMD/Non-SPMD execution mode + + /// Full/Lightweight runtime mode. Used for better optimization. + unsigned getDefaultLocationReserved2Flags() const override; + public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); + void clear() override; /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. @@ -261,6 +274,18 @@ public: ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) override; + /// Emit an implicit/explicit barrier for OpenMP threads. + /// \param Kind Directive for which this implicit barrier call must be + /// generated. Must be OMPD_barrier for explicit barrier generation. + /// \param EmitChecks true if need to emit checks for cancellation barriers. + /// \param ForceSimpleCall true simple barrier call must be emitted, false if + /// runtime class decides which one to emit (simple or with cancellation + /// checks). + /// + void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind, bool EmitChecks = true, + bool ForceSimpleCall = false) override; + /// Emits a critical region. /// \param CriticalName Name of the critical region. /// \param CriticalOpGen Generator for the statement associated with the given @@ -348,7 +373,17 @@ public: /// Choose a default value for the schedule clause. void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, - llvm::Value *&Chunk) const override; + const Expr *&ChunkExpr) const override; + + /// Adjust some parameters for the target-based directives, like addresses of + /// the variables captured by reference in lambdas. + void adjustTargetSpecificDataForLambdas( + CodeGenFunction &CGF, const OMPExecutableDirective &D) const override; + + /// Perform check on requires decl to ensure that target architecture + /// supports unified addressing + void checkArchForUnifiedAddressing(CodeGenModule &CGM, + const OMPRequiresDecl *D) const override; private: /// Track the execution mode when codegening directives within a target @@ -357,9 +392,15 @@ private: /// to emit optimized code. ExecutionMode CurrentExecutionMode = EM_Unknown; + /// Check if the full runtime is required (default - yes). + bool RequiresFullRuntime = true; + /// true if we're emitting the code for the target region and next parallel /// region is L0 for sure. bool IsInTargetMasterThreadRegion = false; + /// true if currently emitting code for target/teams/distribute region, false + /// - otherwise. + bool IsInTTDRegion = false; /// true if we're definitely in the parallel region. bool IsInParallelRegion = false; @@ -373,17 +414,31 @@ private: llvm::Function *createParallelDataSharingWrapper( llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D); + /// The data for the single globalized variable. + struct MappedVarData { + /// Corresponding field in the global record. + const FieldDecl *FD = nullptr; + /// Corresponding address. + Address PrivateAddr = Address::invalid(); + /// true, if only one element is required (for latprivates in SPMD mode), + /// false, if need to create based on the warp-size. + bool IsOnePerTeam = false; + MappedVarData() = delete; + MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false) + : FD(FD), IsOnePerTeam(IsOnePerTeam) {} + }; /// The map of local variables to their addresses in the global memory. - using DeclToAddrMapTy = llvm::MapVector<const Decl *, - std::pair<const FieldDecl *, Address>>; + using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>; /// Set of the parameters passed by value escaping OpenMP context. using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>; struct FunctionData { DeclToAddrMapTy LocalVarData; + llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None; EscapedParamsTy EscapedParameters; llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls; llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs; const RecordDecl *GlobalRecord = nullptr; + llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None; llvm::Value *GlobalRecordAddr = nullptr; llvm::Value *IsInSPMDModeFlag = nullptr; std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams; @@ -391,6 +446,27 @@ private: /// Maps the function to the list of the globalized variables with their /// addresses. llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls; + /// List of records for the globalized variables in target/teams/distribute + /// contexts. Inner records are going to be joined into the single record, + /// while those resulting records are going to be joined into the single + /// union. This resulting union (one per CU) is the entry point for the static + /// memory management runtime functions. + struct GlobalPtrSizeRecsTy { + llvm::GlobalVariable *UseSharedMemory = nullptr; + llvm::GlobalVariable *RecSize = nullptr; + llvm::GlobalVariable *Buffer = nullptr; + SourceLocation Loc; + llvm::SmallVector<const RecordDecl *, 2> Records; + unsigned RegionCounter = 0; + }; + llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords; + /// Shared pointer for the global memory in the global memory buffer used for + /// the given kernel. + llvm::GlobalVariable *KernelStaticGlobalized = nullptr; + /// Pair of the Non-SPMD team and all reductions variables in this team + /// region. + std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>> + TeamAndReductions; }; } // CodeGen namespace. diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp index 58aaae6925..c754541ac1 100644 --- a/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -20,7 +20,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" #include "clang/AST/RecordLayout.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 39a2cc145f..bc7a18af1e 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -19,8 +19,6 @@ #include "clang/Basic/Builtins.h" #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Sema/LoopHint.h" -#include "clang/Sema/SemaDiagnostic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -1047,10 +1045,9 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { // exception to our over-conservative rules about not jumping to // statements following block literals with non-trivial cleanups. RunCleanupsScope cleanupScope(*this); - if (const ExprWithCleanups *cleanups = - dyn_cast_or_null<ExprWithCleanups>(RV)) { - enterFullExpression(cleanups); - RV = cleanups->getSubExpr(); + if (const FullExpr *fe = dyn_cast_or_null<FullExpr>(RV)) { + enterFullExpression(fe); + RV = fe->getSubExpr(); } // FIXME: Clean this up by using an LValue for ReturnTemp, @@ -1823,9 +1820,9 @@ llvm::Value* CodeGenFunction::EmitAsmInput( // If this can't be a register or memory, i.e., has to be a constant // (immediate or symbolic), try to emit it as such. if (!Info.allowsRegister() && !Info.allowsMemory()) { - llvm::APSInt Result; + Expr::EvalResult Result; if (InputExpr->EvaluateAsInt(Result, getContext())) - return llvm::ConstantInt::get(getLLVMContext(), Result); + return llvm::ConstantInt::get(getLLVMContext(), Result.Val.getInt()); assert(!Info.requiresImmediateConstant() && "Required-immediate inlineasm arg isn't constant?"); } diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 79ffa7c8e9..4f635efe71 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -385,12 +385,12 @@ static llvm::Function *emitOutlinedFunctionPrologue( FunctionDecl *DebugFunctionDecl = nullptr; if (!FO.UIntPtrCastRequired) { FunctionProtoType::ExtProtoInfo EPI; + QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); DebugFunctionDecl = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), - SourceLocation(), DeclarationName(), Ctx.VoidTy, - Ctx.getTrivialTypeSourceInfo( - Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)), - SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); + SourceLocation(), DeclarationName(), FunctionTy, + Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, + /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); } for (const FieldDecl *FD : RD->fields()) { QualType ArgType = FD->getType(); @@ -1738,6 +1738,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPReductionClauseInit(S, LoopScope); bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { @@ -2006,7 +2008,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); // for combined 'distribute' and 'for' the increment expression of distribute - // is store in DistInc. For 'distribute' alone, it is in Inc. + // is stored in DistInc. For 'distribute' alone, it is in Inc. Expr *IncExpr; if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) IncExpr = S.getDistInc(); @@ -2296,24 +2298,34 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( EmitOMPPrivateLoopCounters(S, LoopScope); EmitOMPLinearClause(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); // Detect the loop schedule kind and chunk. - llvm::Value *Chunk = nullptr; + const Expr *ChunkExpr = nullptr; OpenMPScheduleTy ScheduleKind; if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { ScheduleKind.Schedule = C->getScheduleKind(); ScheduleKind.M1 = C->getFirstScheduleModifier(); ScheduleKind.M2 = C->getSecondScheduleModifier(); - if (const Expr *Ch = C->getChunkSize()) { - Chunk = EmitScalarExpr(Ch); - Chunk = EmitScalarConversion(Chunk, Ch->getType(), - S.getIterationVariable()->getType(), - S.getBeginLoc()); - } + ChunkExpr = C->getChunkSize(); } else { // Default behaviour for schedule clause. CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( - *this, S, ScheduleKind.Schedule, Chunk); + *this, S, ScheduleKind.Schedule, ChunkExpr); + } + bool HasChunkSizeOne = false; + llvm::Value *Chunk = nullptr; + if (ChunkExpr) { + Chunk = EmitScalarExpr(ChunkExpr); + Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), + S.getIterationVariable()->getType(), + S.getBeginLoc()); + Expr::EvalResult Result; + if (ChunkExpr->EvaluateAsInt(Result, getContext())) { + llvm::APSInt EvaluatedChunk = Result.Val.getInt(); + HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); + } } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); @@ -2321,8 +2333,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // If the static schedule kind is specified or if the ordered clause is // specified, and if no monotonic modifier is specified, the effect will // be as if the monotonic modifier was specified. - if (RT.isStaticNonchunked(ScheduleKind.Schedule, - /* Chunked */ Chunk != nullptr) && + bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); + if ((RT.isStaticNonchunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) || + StaticChunkedOne) && !Ordered) { if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, /*IsMonotonic=*/true); @@ -2333,23 +2349,38 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // unspecified in this case. CGOpenMPRuntime::StaticRTInput StaticInit( IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + UB.getAddress(), ST.getAddress(), + StaticChunkedOne ? Chunk : nullptr); RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); JumpDest LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); - EmitIgnoredExpr(S.getEnsureUpperBound()); + if (!StaticChunkedOne) + EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; EmitIgnoredExpr(S.getInit()); - // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); + // For unchunked static schedule generate: + // + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // For static schedule with chunk one: + // + // while (IV <= PrevUB) { + // BODY; + // IV += ST; + // } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(), + StaticChunkedOne ? S.getDistInc() : S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { @@ -2564,6 +2595,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); // Emit static non-chunked loop. OpenMPScheduleTy ScheduleKind; @@ -2922,7 +2955,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( RedCG.emitAggregateType(CGF, Cnt); // FIXME: This must removed once the runtime library is fixed. // Emit required threadprivate variables for - // initilizer/combiner/finalizer. + // initializer/combiner/finalizer. CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), RedCG, Cnt); Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( @@ -2968,10 +3001,10 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( RedCG.emitSharedLValue(CGF, Cnt); RedCG.emitAggregateType(CGF, Cnt); // The taskgroup descriptor variable is always implicit firstprivate and - // privatized already during procoessing of the firstprivates. + // privatized already during processing of the firstprivates. // FIXME: This must removed once the runtime library is fixed. // Emit required threadprivate variables for - // initilizer/combiner/finalizer. + // initializer/combiner/finalizer. CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), RedCG, Cnt); llvm::Value *ReductionsPtr = @@ -3317,6 +3350,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPPrivateLoopCounters(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); // Detect the distribute schedule kind and chunk. llvm::Value *Chunk = nullptr; @@ -3345,13 +3380,18 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // iteration space is divided into chunks that are approximately equal // in size, and at most one chunk is distributed to each team of the // league. The size of the chunks is unspecified in this case. + bool StaticChunked = RT.isStaticChunked( + ScheduleKind, /* Chunked */ Chunk != nullptr) && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); if (RT.isStaticNonchunked(ScheduleKind, - /* Chunked */ Chunk != nullptr)) { + /* Chunked */ Chunk != nullptr) || + StaticChunked) { if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, /*IsMonotonic=*/true); CGOpenMPRuntime::StaticRTInput StaticInit( IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), - LB.getAddress(), UB.getAddress(), ST.getAddress()); + LB.getAddress(), UB.getAddress(), ST.getAddress(), + StaticChunked ? Chunk : nullptr); RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); JumpDest LoopExit = @@ -3370,15 +3410,45 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, ? S.getCombinedCond() : S.getCond(); - // for distribute alone, codegen - // while (idx <= UB) { BODY; ++idx; } - // when combined with 'for' (e.g. as in 'distribute parallel for') - // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + if (StaticChunked) + Cond = S.getCombinedDistCond(); + + // For static unchunked schedules generate: + // + // 1. For distribute alone, codegen + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // 2. When combined with 'for' (e.g. as in 'distribute parallel for') + // while (idx <= UB) { + // <CodeGen rest of pragma>(LB, UB); + // idx += ST; + // } + // + // For static chunk one schedule generate: + // + // while (IV <= GlobalUB) { + // <CodeGen rest of pragma>(LB, UB); + // LB += ST; + // UB += ST; + // UB = min(UB, GlobalUB); + // IV = LB; + // } + // EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { CodeGenLoop(CGF, S, LoopExit); }, - [](CodeGenFunction &) {}); + [&S, StaticChunked](CodeGenFunction &CGF) { + if (StaticChunked) { + CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); + CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedInit()); + } + }); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); @@ -3400,20 +3470,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, if (isOpenMPSimdDirective(S.getDirectiveKind()) && !isOpenMPParallelDirective(S.getDirectiveKind()) && !isOpenMPTeamsDirective(S.getDirectiveKind())) { - OpenMPDirectiveKind ReductionKind = OMPD_unknown; - if (isOpenMPParallelDirective(S.getDirectiveKind()) && - isOpenMPSimdDirective(S.getDirectiveKind())) { - ReductionKind = OMPD_parallel_for_simd; - } else if (isOpenMPParallelDirective(S.getDirectiveKind())) { - ReductionKind = OMPD_parallel_for; - } else if (isOpenMPSimdDirective(S.getDirectiveKind())) { - ReductionKind = OMPD_simd; - } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) && - S.hasClausesOfKind<OMPReductionClause>()) { - llvm_unreachable( - "No reduction clauses is allowed in distribute directive."); - } - EmitOMPReductionClauseFinal(S, ReductionKind); + EmitOMPReductionClauseFinal(S, OMPD_simd); // Emit post-update of the reduction variables if IsLastIter != 0. emitPostUpdateForReductionClause( *this, S, [IL, &S](CodeGenFunction &CGF) { @@ -3912,6 +3969,10 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_use_device_ptr: case OMPC_is_device_ptr: case OMPC_unified_address: + case OMPC_unified_shared_memory: + case OMPC_reverse_offload: + case OMPC_dynamic_allocators: + case OMPC_atomic_default_mem_order: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } @@ -3928,13 +3989,13 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { } const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); - if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) - enterFullExpression(EWC); + if (const auto *FE = dyn_cast<FullExpr>(CS)) + enterFullExpression(FE); // Processing for statements under 'atomic capture'. if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { for (const Stmt *C : Compound->body()) { - if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) - enterFullExpression(EWC); + if (const auto *FE = dyn_cast<FullExpr>(C)) + enterFullExpression(FE); } } @@ -4021,6 +4082,8 @@ static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); } @@ -4101,6 +4164,8 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; @@ -4659,6 +4724,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF, CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); // TODO: Add support for clauses. CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); @@ -4959,10 +5026,16 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( if (isOpenMPSimdDirective(D.getDirectiveKind())) { emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); } else { + OMPPrivateScope LoopGlobals(CGF); if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { for (const Expr *E : LD->counters()) { - if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( - cast<DeclRefExpr>(E)->getDecl())) { + const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { + LValue GlobLVal = CGF.EmitLValue(E); + LoopGlobals.addPrivate( + VD, [&GlobLVal]() { return GlobLVal.getAddress(); }); + } + if (isa<OMPCapturedExprDecl>(VD)) { // Emit only those that were not explicitly referenced in clauses. if (!CGF.LocalDeclMap.count(VD)) CGF.EmitVarDecl(*VD); @@ -4983,6 +5056,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( } } } + LoopGlobals.Privatize(); CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); } }; diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp index e29a035e31..09535900b5 100644 --- a/lib/CodeGen/CGVTables.cpp +++ b/lib/CodeGen/CGVTables.cpp @@ -16,9 +16,9 @@ #include "CodeGenModule.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/RecordLayout.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/ConstantInitBuilder.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Format.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -304,7 +304,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, CGM.ErrorUnsupported( MD, "non-trivial argument copy for return-adjusting thunk"); } - EmitMustTailThunk(MD, AdjustedThisPtr, CalleePtr); + EmitMustTailThunk(CurGD, AdjustedThisPtr, CalleePtr); return; } @@ -350,13 +350,12 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, : FPT->getReturnType(); ReturnValueSlot Slot; if (!ResultType->isVoidType() && - CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect && - !hasScalarEvaluationKind(CurFnInfo->getReturnType())) + CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect) Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified()); // Now emit our call. llvm::Instruction *CallOrInvoke; - CGCallee Callee = CGCallee::forDirect(CalleePtr, MD); + CGCallee Callee = CGCallee::forDirect(CalleePtr, CurGD); RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, &CallOrInvoke); // Consider return adjustment if we have ThunkInfo. @@ -375,7 +374,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, FinishThunk(); } -void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD, +void CodeGenFunction::EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr, llvm::Value *CalleePtr) { // Emitting a musttail call thunk doesn't use any of the CGCall.cpp machinery @@ -412,7 +411,7 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD, // Apply the standard set of call attributes. unsigned CallingConv; llvm::AttributeList Attrs; - CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, MD, Attrs, + CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, GD, Attrs, CallingConv, /*AttrOnCallSite=*/true); Call->setAttributes(Attrs); Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index 0dcbea423a..da8a8efb84 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -562,7 +562,10 @@ public: } void setVolatile(bool flag) { - Quals.setVolatile(flag); + if (flag) + Quals.addVolatile(); + else + Quals.removeVolatile(); } Qualifiers::ObjCLifetime getObjCLifetime() const { diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 2a0f4f0e83..29c6793c60 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -102,4 +102,5 @@ add_clang_library(clangCodeGen clangBasic clangFrontend clangLex + clangSerialization ) diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp index c152291b15..27f5d53ffe 100644 --- a/lib/CodeGen/CodeGenABITypes.cpp +++ b/lib/CodeGen/CodeGenABITypes.cpp @@ -20,7 +20,6 @@ #include "CGRecordLayout.h" #include "CodeGenModule.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/PreprocessorOptions.h" diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp index 1a2b0616dc..fd4506f2d1 100644 --- a/lib/CodeGen/CodeGenAction.cpp +++ b/lib/CodeGen/CodeGenAction.cpp @@ -549,12 +549,16 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( SourceLocation DILoc; if (D.isLocationAvailable()) { - D.getLocation(&Filename, &Line, &Column); - const FileEntry *FE = FileMgr.getFile(Filename); - if (FE && Line > 0) { - // If -gcolumn-info was not used, Column will be 0. This upsets the - // source manager, so pass 1 if Column is not set. - DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1); + D.getLocation(Filename, Line, Column); + if (Line > 0) { + const FileEntry *FE = FileMgr.getFile(Filename); + if (!FE) + FE = FileMgr.getFile(D.getAbsolutePath()); + if (FE) { + // If -gcolumn-info was not used, Column will be 0. This upsets the + // source manager, so pass 1 if Column is not set. + DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1); + } } BadDebugInfo = DILoc.isInvalid(); } diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index 77f978f687..f012384f3d 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -28,10 +28,10 @@ #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/Basic/Builtins.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" -#include "clang/Sema/SemaDiagnostic.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Intrinsics.h" @@ -430,10 +430,25 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { NormalCleanupDest = Address::invalid(); } - // Add the required-vector-width attribute. - if (LargestVectorWidth != 0) - CurFn->addFnAttr("min-legal-vector-width", - llvm::utostr(LargestVectorWidth)); + // Scan function arguments for vector width. + for (llvm::Argument &A : CurFn->args()) + if (auto *VT = dyn_cast<llvm::VectorType>(A.getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + + // Update vector width based on return type. + if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + + // Add the required-vector-width attribute. This contains the max width from: + // 1. min-vector-width attribute used in the source program. + // 2. Any builtins used that have a vector width specified. + // 3. Values passed in and out of inline assembly. + // 4. Width of vector arguments and return types for this function. + // 5. Width of vector aguments and return types for functions called by this + // function. + CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); } /// ShouldInstrumentFunction - Return true if the current function should be @@ -1058,9 +1073,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // Count the implicit return. if (!endsWithReturn(D)) ++NumReturnExprs; - } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect && - !hasScalarEvaluationKind(CurFnInfo->getReturnType())) { - // Indirect aggregate return; emit returned value directly into sret slot. + } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect) { + // Indirect return; emit returned value directly into sret slot. // This reduces code size, and affects correctness in C++. auto AI = CurFn->arg_begin(); if (CurFnInfo->getReturnInfo().isSRetAfterThis()) @@ -1188,8 +1202,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, LargestVectorWidth = VecWidth->getVectorWidth(); } -void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args, - const Stmt *Body) { +void CodeGenFunction::EmitFunctionBody(const Stmt *Body) { incrementProfileCounter(Body); if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body)) EmitCompoundStmtWithoutScope(*S); @@ -1357,7 +1370,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // copy-constructors. emitImplicitAssignmentOperatorBody(Args); } else if (Body) { - EmitFunctionBody(Args, Body); + EmitFunctionBody(Body); } else llvm_unreachable("no definition for emitted function"); @@ -1498,10 +1511,11 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, bool AllowLabels) { // FIXME: Rename and handle conversion of other evaluatable things // to bool. - llvm::APSInt Int; - if (!Cond->EvaluateAsInt(Int, getContext())) + Expr::EvalResult Result; + if (!Cond->EvaluateAsInt(Result, getContext())) return false; // Not foldable, not integer or not fully evaluatable. + llvm::APSInt Int = Result.Val.getInt(); if (!AllowLabels && CodeGenFunction::ContainsLabel(Cond)) return false; // Contains a label. @@ -1686,7 +1700,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // create metadata that specifies that the branch is unpredictable. // Don't bother if not optimizing because that metadata would not be used. llvm::MDNode *Unpredictable = nullptr; - auto *Call = dyn_cast<CallExpr>(Cond); + auto *Call = dyn_cast<CallExpr>(Cond->IgnoreImpCasts()); if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) { auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl()); if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { @@ -2276,7 +2290,7 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures, // Now build up the set of caller features and verify that all the required // features are there. llvm::StringMap<bool> CallerFeatureMap; - CGM.getFunctionFeatureMap(CallerFeatureMap, FD); + CGM.getFunctionFeatureMap(CallerFeatureMap, GlobalDecl().getWithDecl(FD)); // If we have at least one of the features in the feature list return // true, otherwise return false. @@ -2284,14 +2298,13 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures, ReqFeatures.begin(), ReqFeatures.end(), [&](StringRef Feature) { SmallVector<StringRef, 1> OrFeatures; Feature.split(OrFeatures, '|'); - return std::any_of(OrFeatures.begin(), OrFeatures.end(), - [&](StringRef Feature) { - if (!CallerFeatureMap.lookup(Feature)) { - FirstMissing = Feature.str(); - return false; - } - return true; - }); + return llvm::any_of(OrFeatures, [&](StringRef Feature) { + if (!CallerFeatureMap.lookup(Feature)) { + FirstMissing = Feature.str(); + return false; + } + return true; + }); }); } @@ -2378,6 +2391,29 @@ CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) { return Condition; } +static void CreateMultiVersionResolverReturn(CodeGenModule &CGM, + llvm::Function *Resolver, + CGBuilderTy &Builder, + llvm::Function *FuncToReturn, + bool SupportsIFunc) { + if (SupportsIFunc) { + Builder.CreateRet(FuncToReturn); + return; + } + + llvm::SmallVector<llvm::Value *, 10> Args; + llvm::for_each(Resolver->args(), + [&](llvm::Argument &Arg) { Args.push_back(&Arg); }); + + llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args); + Result->setTailCallKind(llvm::CallInst::TCK_MustTail); + + if (Resolver->getReturnType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(Result); +} + void CodeGenFunction::EmitMultiVersionResolver( llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) { assert((getContext().getTargetInfo().getTriple().getArch() == @@ -2385,6 +2421,9 @@ void CodeGenFunction::EmitMultiVersionResolver( getContext().getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64) && "Only implemented for x86 targets"); + + bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc(); + // Main function's basic block. llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); Builder.SetInsertPoint(CurBlock); @@ -2398,13 +2437,15 @@ void CodeGenFunction::EmitMultiVersionResolver( if (!Condition) { assert(&RO == Options.end() - 1 && "Default or Generic case must be last"); - Builder.CreateRet(RO.Function); + CreateMultiVersionResolverReturn(CGM, Resolver, Builder, RO.Function, + SupportsIFunc); return; } llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); - llvm::IRBuilder<> RetBuilder(RetBlock); - RetBuilder.CreateRet(RO.Function); + CGBuilderTy RetBuilder(*this, RetBlock); + CreateMultiVersionResolverReturn(CGM, Resolver, RetBuilder, RO.Function, + SupportsIFunc); CurBlock = createBasicBlock("resolver_else", Resolver); Builder.CreateCondBr(Condition, RetBlock, CurBlock); } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 6ea2d75b31..8971accdcd 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -29,9 +29,9 @@ #include "clang/AST/Type.h" #include "clang/Basic/ABI.h" #include "clang/Basic/CapturedStmt.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" @@ -1197,6 +1197,8 @@ public: private: CGDebugInfo *DebugInfo; + /// Used to create unique names for artificial VLA size debug info variables. + unsigned VLAExprCounter = 0; bool DisableDebugInfo = false; /// DidCallStackSave - Whether llvm.stacksave has been called. Used to avoid @@ -1787,7 +1789,7 @@ public: llvm::Value *ptr); Address LoadBlockStruct(); - Address GetAddrOfBlockDecl(const VarDecl *var, bool ByRef); + Address GetAddrOfBlockDecl(const VarDecl *var); /// BuildBlockByrefAddress - Computes the location of the /// data in a variable which is declared as __block. @@ -1825,7 +1827,7 @@ public: void EmitConstructorBody(FunctionArgList &Args); void EmitDestructorBody(FunctionArgList &Args); void emitImplicitAssignmentOperatorBody(FunctionArgList &Args); - void EmitFunctionBody(FunctionArgList &Args, const Stmt *Body); + void EmitFunctionBody(const Stmt *Body); void EmitBlockWithFallThrough(llvm::BasicBlock *BB, const Stmt *S); void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator, @@ -1854,7 +1856,7 @@ public: void FinishThunk(); /// Emit a musttail call for a thunk with a potentially adjusted this pointer. - void EmitMustTailThunk(const CXXMethodDecl *MD, llvm::Value *AdjustedThisPtr, + void EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr, llvm::Value *Callee); /// Generate a thunk for the given method. @@ -2683,8 +2685,9 @@ public: llvm::Value *NRVOFlag; - /// True if the variable is a __block variable. - bool IsByRef; + /// True if the variable is a __block variable that is captured by an + /// escaping block. + bool IsEscapingByRef; /// True if the variable is of aggregate type and has a constant /// initializer. @@ -2704,7 +2707,7 @@ public: AutoVarEmission(const VarDecl &variable) : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr), - IsByRef(false), IsConstantAggregate(false), + IsEscapingByRef(false), IsConstantAggregate(false), SizeForLifetimeMarkers(nullptr), AllocaAddr(Address::invalid()) {} bool wasEmittedAsGlobal() const { return !Addr.isValid(); } @@ -2734,7 +2737,7 @@ public: /// Note that this does not chase the forwarding pointer for /// __block decls. Address getObjectAddress(CodeGenFunction &CGF) const { - if (!IsByRef) return Addr; + if (!IsEscapingByRef) return Addr; return CGF.emitBlockByrefAddress(Addr, Variable, /*forward*/ false); } @@ -3523,6 +3526,7 @@ public: ConstantEmission tryEmitAsConstant(DeclRefExpr *refExpr); ConstantEmission tryEmitAsConstant(const MemberExpr *ME); + llvm::Value *emitScalarConstant(const ConstantEmission &Constant, Expr *E); RValue EmitPseudoObjectRValue(const PseudoObjectExpr *e, AggValueSlot slot = AggValueSlot::ignored()); @@ -3677,9 +3681,8 @@ public: RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue); - RValue EmitBuiltinExpr(const FunctionDecl *FD, - unsigned BuiltinID, const CallExpr *E, - ReturnValueSlot ReturnValue); + RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, + const CallExpr *E, ReturnValueSlot ReturnValue); RValue emitRotate(const CallExpr *E, bool IsRotateRight); @@ -3802,6 +3805,10 @@ public: std::pair<LValue,llvm::Value*> EmitARCStoreUnsafeUnretained(const BinaryOperator *e, bool ignored); + llvm::Value *EmitObjCAlloc(llvm::Value *value, + llvm::Type *returnType); + llvm::Value *EmitObjCAllocWithZone(llvm::Value *value, + llvm::Type *returnType); llvm::Value *EmitObjCThrowOperand(const Expr *expr); llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr); llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr); @@ -3891,6 +3898,8 @@ public: AddInitializerToStaticVarDecl(const VarDecl &D, llvm::GlobalVariable *GV); + // Emit an @llvm.invariant.start call for the given memory region. + void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size); /// EmitCXXGlobalVarDeclInit - Create the initializer for a C++ /// variable with global storage. @@ -3926,9 +3935,10 @@ public: /// GenerateCXXGlobalInitFunc - Generates code for initializing global /// variables. - void GenerateCXXGlobalInitFunc(llvm::Function *Fn, - ArrayRef<llvm::Function *> CXXThreadLocals, - Address Guard = Address::invalid()); + void + GenerateCXXGlobalInitFunc(llvm::Function *Fn, + ArrayRef<llvm::Function *> CXXThreadLocals, + ConstantAddress Guard = ConstantAddress::invalid()); /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global /// variables. @@ -3946,11 +3956,13 @@ public: void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp); - void enterFullExpression(const ExprWithCleanups *E) { - if (E->getNumObjects() == 0) return; + void enterFullExpression(const FullExpr *E) { + if (const auto *EWC = dyn_cast<ExprWithCleanups>(E)) + if (EWC->getNumObjects() == 0) + return; enterNonTrivialFullExpression(E); } - void enterNonTrivialFullExpression(const ExprWithCleanups *E); + void enterNonTrivialFullExpression(const FullExpr *E); void EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint = true); @@ -4273,6 +4285,7 @@ public: struct MultiVersionResolverOption { llvm::Function *Function; + FunctionDecl *FD; struct Conds { StringRef Architecture; llvm::SmallVector<StringRef, 8> Features; @@ -4292,22 +4305,7 @@ public: void EmitMultiVersionResolver(llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options); - struct CPUDispatchMultiVersionResolverOption { - llvm::Function *Function; - // Note: EmitX86CPUSupports only has 32 bits available, so we store the mask - // as 32 bits here. When 64-bit support is added to __builtin_cpu_supports, - // this can be extended to 64 bits. - uint32_t FeatureMask; - CPUDispatchMultiVersionResolverOption(llvm::Function *F, uint64_t Mask) - : Function(F), FeatureMask(static_cast<uint32_t>(Mask)) {} - bool operator>(const CPUDispatchMultiVersionResolverOption &Other) const { - return FeatureMask > Other.FeatureMask; - } - }; - void EmitCPUDispatchMultiVersionResolver( - llvm::Function *Resolver, - ArrayRef<CPUDispatchMultiVersionResolverOption> Options); - static uint32_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs); + static uint64_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs); private: QualType getVarArgType(const Expr *Arg); @@ -4324,7 +4322,7 @@ private: llvm::Value *EmitX86CpuIs(StringRef CPUStr); llvm::Value *EmitX86CpuSupports(const CallExpr *E); llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs); - llvm::Value *EmitX86CpuSupports(uint32_t Mask); + llvm::Value *EmitX86CpuSupports(uint64_t Mask); llvm::Value *EmitX86CpuInit(); llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO); }; diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index c2fb4797cc..df814d6386 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -36,14 +36,14 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CharInfo.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Version.h" #include "clang/CodeGen/ConstantInitBuilder.h" -#include "clang/Frontend/CodeGenOptions.h" -#include "clang/Sema/SemaDiagnostic.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -126,7 +126,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); - if (LangOpts.ObjC1) + if (LangOpts.ObjC) createObjCRuntime(); if (LangOpts.OpenCL) createOpenCLRuntime(); @@ -149,12 +149,12 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, Block.GlobalUniqueCount = 0; - if (C.getLangOpts().ObjC1) + if (C.getLangOpts().ObjC) ObjCData.reset(new ObjCEntrypoints()); if (CodeGenOpts.hasProfileClangUse()) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create( - CodeGenOpts.ProfileInstrumentUsePath); + CodeGenOpts.ProfileInstrumentUsePath, CodeGenOpts.ProfileRemappingFile); if (auto E = ReaderOrErr.takeError()) { unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "Could not read profile %0: %1"); @@ -322,8 +322,6 @@ void CodeGenModule::checkAliases() { assert(FTy); if (!FTy->getReturnType()->isPointerTy()) Diags.Report(Location, diag::err_ifunc_resolver_return); - if (FTy->getNumParams()) - Diags.Report(Location, diag::err_ifunc_resolver_params); } llvm::Constant *Aliasee = Alias->getIndirectSymbol(); @@ -460,6 +458,9 @@ void CodeGenModule::Release() { // Indicate that we want CodeView in the metadata. getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1); } + if (CodeGenOpts.CodeViewGHash) { + getModule().addModuleFlag(llvm::Module::Warning, "CodeViewGHash", 1); + } if (CodeGenOpts.ControlFlowGuard) { // We want function ID tables for Control Flow Guard. getModule().addModuleFlag(llvm::Module::Warning, "cfguardtable", 1); @@ -589,6 +590,9 @@ void CodeGenModule::Release() { if (getCodeGenOpts().EmitVersionIdentMetadata) EmitVersionIdentMetadata(); + if (!getCodeGenOpts().RecordCommandLine.empty()) + EmitCommandLineMetadata(); + EmitTargetMetadata(); } @@ -893,11 +897,13 @@ static std::string getCPUSpecificMangling(const CodeGenModule &CGM, static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM, const CPUSpecificAttr *Attr, + unsigned CPUIndex, raw_ostream &Out) { - // cpu_specific gets the current name, dispatch gets the resolver. + // cpu_specific gets the current name, dispatch gets the resolver if IFunc is + // supported. if (Attr) - Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName()); - else + Out << getCPUSpecificMangling(CGM, Attr->getCPUName(CPUIndex)->getName()); + else if (CGM.getTarget().supportsIFunc()) Out << ".resolver"; } @@ -963,11 +969,19 @@ static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD, if (const auto *FD = dyn_cast<FunctionDecl>(ND)) if (FD->isMultiVersion() && !OmitMultiVersionMangling) { - if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion()) - AppendCPUSpecificCPUDispatchMangling( - CGM, FD->getAttr<CPUSpecificAttr>(), Out); - else + switch (FD->getMultiVersionKind()) { + case MultiVersionKind::CPUDispatch: + case MultiVersionKind::CPUSpecific: + AppendCPUSpecificCPUDispatchMangling(CGM, + FD->getAttr<CPUSpecificAttr>(), + GD.getMultiVersionIndex(), Out); + break; + case MultiVersionKind::Target: AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out); + break; + case MultiVersionKind::None: + llvm_unreachable("None multiversion type isn't valid here"); + } } return Out.str(); @@ -992,8 +1006,10 @@ void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD, "Other GD should now be a multiversioned function"); // OtherFD is the version of this function that was mangled BEFORE // becoming a MultiVersion function. It potentially needs to be updated. - const FunctionDecl *OtherFD = - OtherGD.getCanonicalDecl().getDecl()->getAsFunction(); + const FunctionDecl *OtherFD = OtherGD.getCanonicalDecl() + .getDecl() + ->getAsFunction() + ->getMostRecentDecl(); std::string OtherName = getMangledNameImpl(*this, OtherGD, OtherFD); // This is so that if the initial version was already the 'default' // version, we don't try to update it. @@ -1025,26 +1041,6 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { } } - const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()); - // Since CPUSpecific can require multiple emits per decl, store the manglings - // separately. - if (FD && - (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) { - const auto *SD = FD->getAttr<CPUSpecificAttr>(); - - std::pair<GlobalDecl, unsigned> SpecCanonicalGD{ - CanonicalGD, - SD ? SD->ActiveArgIndex : std::numeric_limits<unsigned>::max()}; - - auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD); - if (FoundName != CPUSpecificMangledDeclNames.end()) - return FoundName->second; - - auto Result = CPUSpecificManglings.insert( - std::make_pair(getMangledNameImpl(*this, GD, FD), SpecCanonicalGD)); - return CPUSpecificMangledDeclNames[SpecCanonicalGD] = Result.first->first(); - } - auto FoundName = MangledDeclNames.find(CanonicalGD); if (FoundName != MangledDeclNames.end()) return FoundName->second; @@ -1106,11 +1102,12 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { // Ctor function type is void()*. llvm::FunctionType* CtorFTy = llvm::FunctionType::get(VoidTy, false); - llvm::Type *CtorPFTy = llvm::PointerType::getUnqual(CtorFTy); + llvm::Type *CtorPFTy = llvm::PointerType::get(CtorFTy, + TheModule.getDataLayout().getProgramAddressSpace()); // Get the type of a ctor entry, { i32, void ()*, i8* }. llvm::StructType *CtorStructTy = llvm::StructType::get( - Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy); + Int32Ty, CtorPFTy, VoidPtrTy); // Construct the constructor and destructor arrays. ConstantInitBuilder builder(*this); @@ -1166,12 +1163,12 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString())); } -void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D, +void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F) { unsigned CallingConv; llvm::AttributeList PAL; - ConstructAttributeList(F->getName(), Info, D, PAL, CallingConv, false); + ConstructAttributeList(F->getName(), Info, GD, PAL, CallingConv, false); F->setAttributes(PAL); F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); } @@ -1301,9 +1298,19 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // Otherwise, propagate the inline hint attribute and potentially use its // absence to mark things as noinline. if (auto *FD = dyn_cast<FunctionDecl>(D)) { - if (any_of(FD->redecls(), [&](const FunctionDecl *Redecl) { - return Redecl->isInlineSpecified(); - })) { + // Search function and template pattern redeclarations for inline. + auto CheckForInline = [](const FunctionDecl *FD) { + auto CheckRedeclForInline = [](const FunctionDecl *Redecl) { + return Redecl->isInlineSpecified(); + }; + if (any_of(FD->redecls(), CheckRedeclForInline)) + return true; + const FunctionDecl *Pattern = FD->getTemplateInstantiationPattern(); + if (!Pattern) + return false; + return any_of(Pattern->redecls(), CheckRedeclForInline); + }; + if (CheckForInline(FD)) { B.addAttribute(llvm::Attribute::InlineHint); } else if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyHintInlining && @@ -1377,26 +1384,27 @@ void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) { if (CodeGenOpts.KeepStaticConsts && D && isa<VarDecl>(D)) { const auto *VD = cast<VarDecl>(D); - if (VD->getType().isConstQualified() && VD->getStorageClass() == SC_Static) + if (VD->getType().isConstQualified() && + VD->getStorageDuration() == SD_Static) addUsedGlobal(GV); } } -bool CodeGenModule::GetCPUAndFeaturesAttributes(const Decl *D, +bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, llvm::AttrBuilder &Attrs) { // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then // parse that and add it to the feature set. StringRef TargetCPU = getTarget().getTargetOpts().CPU; std::vector<std::string> Features; - const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + const auto *FD = dyn_cast_or_null<FunctionDecl>(GD.getDecl()); FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr; const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr; bool AddedAttr = false; if (TD || SD) { llvm::StringMap<bool> FeatureMap; - getFunctionFeatureMap(FeatureMap, FD); + getFunctionFeatureMap(FeatureMap, GD); // Produce the canonical string for this set of features. for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap) @@ -1452,7 +1460,7 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, F->addFnAttr("implicit-section-name", SA->getName()); llvm::AttrBuilder Attrs; - if (GetCPUAndFeaturesAttributes(D, Attrs)) { + if (GetCPUAndFeaturesAttributes(GD, Attrs)) { // We know that GetCPUAndFeaturesAttributes will always have the // newest set, since it has the newest possible FunctionDecl, so the // new ones should replace the old. @@ -1475,7 +1483,7 @@ void CodeGenModule::SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI) { const Decl *D = GD.getDecl(); - SetLLVMFunctionAttributes(D, FI, F); + SetLLVMFunctionAttributes(GD, FI, F); SetLLVMFunctionAttributesForDefinition(D, F); F->setLinkage(llvm::Function::InternalLinkage); @@ -1537,7 +1545,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, const auto *FD = cast<FunctionDecl>(GD.getDecl()); if (!IsIncompleteFunction) { - SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F); + SetLLVMFunctionAttributes(GD, getTypes().arrangeGlobalDeclaration(GD), F); // Setup target-specific attributes. if (F->isDeclaration()) getTargetCodeGenInfo().setTargetAttributes(FD, F, *this); @@ -2015,7 +2023,7 @@ bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { if (CodeGenOpts.KeepStaticConsts) { const auto *VD = dyn_cast<VarDecl>(Global); if (VD && VD->getType().isConstQualified() && - VD->getStorageClass() == SC_Static) + VD->getStorageDuration() == SD_Static) return true; } @@ -2413,6 +2421,19 @@ bool CodeGenModule::shouldOpportunisticallyEmitVTables() { return CodeGenOpts.OptimizationLevel > 0; } +void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, + llvm::GlobalValue *GV) { + const auto *FD = cast<FunctionDecl>(GD.getDecl()); + + if (FD->isCPUSpecificMultiVersion()) { + auto *Spec = FD->getAttr<CPUSpecificAttr>(); + for (unsigned I = 0; I < Spec->cpus_size(); ++I) + EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); + // Requires multiple emits. + } else + EmitGlobalFunctionDefinition(GD, GV); +} + void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast<ValueDecl>(GD.getDecl()); @@ -2420,7 +2441,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { Context.getSourceManager(), "Generating code for declaration"); - if (isa<FunctionDecl>(D)) { + if (const auto *FD = dyn_cast<FunctionDecl>(D)) { // At -O0, don't generate IR for functions with available_externally // linkage. if (!shouldEmitFunction(GD)) @@ -2433,6 +2454,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { ABI->emitCXXStructor(CD, getFromCtorType(GD.getCtorType())); else if (const auto *DD = dyn_cast<CXXDestructorDecl>(Method)) ABI->emitCXXStructor(DD, getFromDtorType(GD.getDtorType())); + else if (FD->isMultiVersion()) + EmitMultiVersionFunctionDefinition(GD, GV); else EmitGlobalFunctionDefinition(GD, GV); @@ -2442,6 +2465,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { return; } + if (FD->isMultiVersion()) + return EmitMultiVersionFunctionDefinition(GD, GV); return EmitGlobalFunctionDefinition(GD, GV); } @@ -2499,13 +2524,19 @@ void CodeGenModule::emitMultiVersionFunctions() { TA->getArchitecture(), Feats); }); - llvm::Function *ResolverFunc = cast<llvm::Function>( - GetGlobalValue((getMangledName(GD) + ".resolver").str())); + llvm::Function *ResolverFunc; + const TargetInfo &TI = getTarget(); + + if (TI.supportsIFunc() || FD->isTargetMultiVersion()) + ResolverFunc = cast<llvm::Function>( + GetGlobalValue((getMangledName(GD) + ".resolver").str())); + else + ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD))); + if (supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); - const TargetInfo &TI = getTarget(); std::stable_sort( Options.begin(), Options.end(), [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, @@ -2522,26 +2553,58 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { assert(FD && "Not a FunctionDecl?"); const auto *DD = FD->getAttr<CPUDispatchAttr>(); assert(DD && "Not a cpu_dispatch Function?"); - llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType()); + QualType CanonTy = Context.getCanonicalType(FD->getType()); + llvm::Type *DeclTy = getTypes().ConvertFunctionType(CanonTy, FD); + + if (const auto *CXXFD = dyn_cast<CXXMethodDecl>(FD)) { + const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD); + DeclTy = getTypes().GetFunctionType(FInfo); + } StringRef ResolverName = getMangledName(GD); - llvm::Type *ResolverType = llvm::FunctionType::get( - llvm::PointerType::get(DeclTy, - Context.getTargetAddressSpace(FD->getType())), - false); - auto *ResolverFunc = cast<llvm::Function>( - GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, - /*ForVTable=*/false)); + + llvm::Type *ResolverType; + GlobalDecl ResolverGD; + if (getTarget().supportsIFunc()) + ResolverType = llvm::FunctionType::get( + llvm::PointerType::get(DeclTy, + Context.getTargetAddressSpace(FD->getType())), + false); + else { + ResolverType = DeclTy; + ResolverGD = GD; + } + + auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction( + ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false)); SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options; const TargetInfo &Target = getTarget(); + unsigned Index = 0; for (const IdentifierInfo *II : DD->cpus()) { // Get the name of the target function so we can look it up/create it. std::string MangledName = getMangledNameImpl(*this, GD, FD, true) + getCPUSpecificMangling(*this, II->getName()); - llvm::Constant *Func = GetOrCreateLLVMFunction( - MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false, - /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + + llvm::Constant *Func = GetGlobalValue(MangledName); + + if (!Func) { + GlobalDecl ExistingDecl = Manglings.lookup(MangledName); + if (ExistingDecl.getDecl() && + ExistingDecl.getDecl()->getAsFunction()->isDefined()) { + EmitGlobalFunctionDefinition(ExistingDecl, nullptr); + Func = GetGlobalValue(MangledName); + } else { + if (!ExistingDecl.getDecl()) + ExistingDecl = GD.getWithMultiVersionIndex(Index); + + Func = GetOrCreateLLVMFunction( + MangledName, DeclTy, ExistingDecl, + /*ForVTable=*/false, /*DontDefer=*/true, + /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + } + } + llvm::SmallVector<StringRef, 32> Features; Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features); llvm::transform(Features, Features.begin(), @@ -2551,29 +2614,53 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { return !Target.validateCpuSupports(Feat); }), Features.end()); Options.emplace_back(cast<llvm::Function>(Func), StringRef{}, Features); + ++Index; } llvm::sort( - Options.begin(), Options.end(), - [](const CodeGenFunction::MultiVersionResolverOption &LHS, - const CodeGenFunction::MultiVersionResolverOption &RHS) { + Options, [](const CodeGenFunction::MultiVersionResolverOption &LHS, + const CodeGenFunction::MultiVersionResolverOption &RHS) { return CodeGenFunction::GetX86CpuSupportsMask(LHS.Conditions.Features) > CodeGenFunction::GetX86CpuSupportsMask(RHS.Conditions.Features); }); + + // If the list contains multiple 'default' versions, such as when it contains + // 'pentium' and 'generic', don't emit the call to the generic one (since we + // always run on at least a 'pentium'). We do this by deleting the 'least + // advanced' (read, lowest mangling letter). + while (Options.size() > 1 && + CodeGenFunction::GetX86CpuSupportsMask( + (Options.end() - 2)->Conditions.Features) == 0) { + StringRef LHSName = (Options.end() - 2)->Function->getName(); + StringRef RHSName = (Options.end() - 1)->Function->getName(); + if (LHSName.compare(RHSName) < 0) + Options.erase(Options.end() - 2); + else + Options.erase(Options.end() - 1); + } + CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); } -/// If an ifunc for the specified mangled name is not in the module, create and -/// return an llvm IFunc Function with the specified type. -llvm::Constant * -CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy, - const FunctionDecl *FD) { +/// If a dispatcher for the specified mangled name is not in the module, create +/// and return an llvm Function with the specified type. +llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver( + GlobalDecl GD, llvm::Type *DeclTy, const FunctionDecl *FD) { std::string MangledName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); - std::string IFuncName = MangledName + ".ifunc"; - if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName)) - return IFuncGV; + + // Holds the name of the resolver, in ifunc mode this is the ifunc (which has + // a separate resolver). + std::string ResolverName = MangledName; + if (getTarget().supportsIFunc()) + ResolverName += ".ifunc"; + else if (FD->isTargetMultiVersion()) + ResolverName += ".resolver"; + + // If this already exists, just return that one. + if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName)) + return ResolverGV; // Since this is the first time we've created this IFunc, make sure // that we put this multiversioned function into the list to be @@ -2581,20 +2668,28 @@ CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy, if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion()) MultiVersionFuncs.push_back(GD); - std::string ResolverName = MangledName + ".resolver"; - llvm::Type *ResolverType = llvm::FunctionType::get( - llvm::PointerType::get(DeclTy, - Context.getTargetAddressSpace(FD->getType())), - false); - llvm::Constant *Resolver = - GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, - /*ForVTable=*/false); - llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( - DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); - GIF->setName(IFuncName); - SetCommonAttributes(FD, GIF); + if (getTarget().supportsIFunc()) { + llvm::Type *ResolverType = llvm::FunctionType::get( + llvm::PointerType::get( + DeclTy, getContext().getTargetAddressSpace(FD->getType())), + false); + llvm::Constant *Resolver = GetOrCreateLLVMFunction( + MangledName + ".resolver", ResolverType, GlobalDecl{}, + /*ForVTable=*/false); + llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( + DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); + GIF->setName(ResolverName); + SetCommonAttributes(FD, GIF); - return GIF; + return GIF; + } + + llvm::Constant *Resolver = GetOrCreateLLVMFunction( + ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false); + assert(isa<llvm::GlobalValue>(Resolver) && + "Resolver should be created for the first time"); + SetCommonAttributes(FD, cast<llvm::GlobalValue>(Resolver)); + return Resolver; } /// GetOrCreateLLVMFunction - If the specified mangled name is not in the @@ -2634,7 +2729,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( if (TA && TA->isDefaultVersion()) UpdateMultiVersionNames(GD, FD); if (!IsForDefinition) - return GetOrCreateMultiVersionIFunc(GD, Ty, FD); + return GetOrCreateMultiVersionResolver(GD, Ty, FD); } } @@ -3393,8 +3488,15 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // CUDA E.2.4.1 "__shared__ variables cannot have an initialization // as part of their declaration." Sema has already checked for // error cases, so we just need to set Init to UndefValue. - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && - D->hasAttr<CUDASharedAttr>()) + bool IsCUDASharedVar = + getLangOpts().CUDAIsDevice && D->hasAttr<CUDASharedAttr>(); + // Shadows of initialized device-side global variables are also left + // undefined. + bool IsCUDAShadowVar = + !getLangOpts().CUDAIsDevice && + (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() || + D->hasAttr<CUDASharedAttr>()); + if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (!InitExpr) { // This is a tentative definition; tentative definitions are @@ -3672,6 +3774,10 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( return llvm::GlobalVariable::WeakAnyLinkage; } + if (const auto *FD = D->getAsFunction()) + if (FD->isMultiVersion() && Linkage == GVA_AvailableExternally) + return llvm::GlobalVariable::LinkOnceAnyLinkage; + // We are guaranteed to have a strong definition somewhere else, // so we can use available_externally linkage. if (Linkage == GVA_AvailableExternally) @@ -3908,15 +4014,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, AddGlobalDtor(Fn, DA->getPriority()); if (D->hasAttr<AnnotateAttr>()) AddGlobalAnnotations(D, Fn); - - if (D->isCPUSpecificMultiVersion()) { - auto *Spec = D->getAttr<CPUSpecificAttr>(); - // If there is another specific version we need to emit, do so here. - if (Spec->ActiveArgIndex + 1 < Spec->cpus_size()) { - ++Spec->ActiveArgIndex; - EmitGlobalFunctionDefinition(GD, nullptr); - } - } } void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { @@ -4109,51 +4206,82 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { llvm::Constant *Zero = llvm::Constant::getNullValue(Int32Ty); llvm::Constant *Zeros[] = { Zero, Zero }; - + + const ASTContext &Context = getContext(); + const llvm::Triple &Triple = getTriple(); + + const auto CFRuntime = getLangOpts().CFRuntime; + const bool IsSwiftABI = + static_cast<unsigned>(CFRuntime) >= + static_cast<unsigned>(LangOptions::CoreFoundationABI::Swift); + const bool IsSwift4_1 = CFRuntime == LangOptions::CoreFoundationABI::Swift4_1; + // If we don't already have it, get __CFConstantStringClassReference. if (!CFConstantStringClassRef) { + const char *CFConstantStringClassName = "__CFConstantStringClassReference"; llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy); Ty = llvm::ArrayType::get(Ty, 0); - llvm::Constant *C = - CreateRuntimeVariable(Ty, "__CFConstantStringClassReference"); - - if (getTriple().isOSBinFormatELF() || getTriple().isOSBinFormatCOFF()) { + + switch (CFRuntime) { + default: break; + case LangOptions::CoreFoundationABI::Swift: LLVM_FALLTHROUGH; + case LangOptions::CoreFoundationABI::Swift5_0: + CFConstantStringClassName = + Triple.isOSDarwin() ? "$s15SwiftFoundation19_NSCFConstantStringCN" + : "$s10Foundation19_NSCFConstantStringCN"; + Ty = IntPtrTy; + break; + case LangOptions::CoreFoundationABI::Swift4_2: + CFConstantStringClassName = + Triple.isOSDarwin() ? "$S15SwiftFoundation19_NSCFConstantStringCN" + : "$S10Foundation19_NSCFConstantStringCN"; + Ty = IntPtrTy; + break; + case LangOptions::CoreFoundationABI::Swift4_1: + CFConstantStringClassName = + Triple.isOSDarwin() ? "__T015SwiftFoundation19_NSCFConstantStringCN" + : "__T010Foundation19_NSCFConstantStringCN"; + Ty = IntPtrTy; + break; + } + + llvm::Constant *C = CreateRuntimeVariable(Ty, CFConstantStringClassName); + + if (Triple.isOSBinFormatELF() || Triple.isOSBinFormatCOFF()) { llvm::GlobalValue *GV = nullptr; - + if ((GV = dyn_cast<llvm::GlobalValue>(C))) { - IdentifierInfo &II = getContext().Idents.get(GV->getName()); - TranslationUnitDecl *TUDecl = getContext().getTranslationUnitDecl(); + IdentifierInfo &II = Context.Idents.get(GV->getName()); + TranslationUnitDecl *TUDecl = Context.getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); const VarDecl *VD = nullptr; for (const auto &Result : DC->lookup(&II)) if ((VD = dyn_cast<VarDecl>(Result))) break; - - if (getTriple().isOSBinFormatELF()) { + + if (Triple.isOSBinFormatELF()) { if (!VD) GV->setLinkage(llvm::GlobalValue::ExternalLinkage); - } - else { - if (!VD || !VD->hasAttr<DLLExportAttr>()) { + } else { + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + if (!VD || !VD->hasAttr<DLLExportAttr>()) GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); - GV->setLinkage(llvm::GlobalValue::ExternalLinkage); - } else { + else GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); - GV->setLinkage(llvm::GlobalValue::ExternalLinkage); - } } - + setDSOLocal(GV); } } - + // Decay array -> ptr CFConstantStringClassRef = - llvm::ConstantExpr::getGetElementPtr(Ty, C, Zeros); + IsSwiftABI ? llvm::ConstantExpr::getPtrToInt(C, Ty) + : llvm::ConstantExpr::getGetElementPtr(Ty, C, Zeros); } - QualType CFTy = getContext().getCFConstantStringType(); + QualType CFTy = Context.getCFConstantStringType(); auto *STy = cast<llvm::StructType>(getTypes().ConvertType(CFTy)); @@ -4164,7 +4292,12 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { Fields.add(cast<llvm::ConstantExpr>(CFConstantStringClassRef)); // Flags. - Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8); + if (IsSwiftABI) { + Fields.addInt(IntPtrTy, IsSwift4_1 ? 0x05 : 0x01); + Fields.addInt(Int64Ty, isUTF16 ? 0x07d0 : 0x07c8); + } else { + Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8); + } // String pointer. llvm::Constant *C = nullptr; @@ -4185,22 +4318,21 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. - CharUnits Align = isUTF16 - ? getContext().getTypeAlignInChars(getContext().ShortTy) - : getContext().getTypeAlignInChars(getContext().CharTy); + CharUnits Align = isUTF16 ? Context.getTypeAlignInChars(Context.ShortTy) + : Context.getTypeAlignInChars(Context.CharTy); GV->setAlignment(Align.getQuantity()); // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. // Without it LLVM can merge the string with a non unnamed_addr one during // LTO. Doing that changes the section it ends in, which surprises ld64. - if (getTriple().isOSBinFormatMachO()) + if (Triple.isOSBinFormatMachO()) GV->setSection(isUTF16 ? "__TEXT,__ustring" : "__TEXT,__cstring,cstring_literals"); // Make sure the literal ends up in .rodata to allow for safe ICF and for // the static linker to adjust permissions to read-only later on. - else if (getTriple().isOSBinFormatELF()) + else if (Triple.isOSBinFormatELF()) GV->setSection(".rodata"); - + // String. llvm::Constant *Str = llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros); @@ -4211,8 +4343,17 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { Fields.add(Str); // String length. - auto Ty = getTypes().ConvertType(getContext().LongTy); - Fields.addInt(cast<llvm::IntegerType>(Ty), StringLength); + llvm::IntegerType *LengthTy = + llvm::IntegerType::get(getModule().getContext(), + Context.getTargetInfo().getLongWidth()); + if (IsSwiftABI) { + if (CFRuntime == LangOptions::CoreFoundationABI::Swift4_1 || + CFRuntime == LangOptions::CoreFoundationABI::Swift4_2) + LengthTy = Int32Ty; + else + LengthTy = IntPtrTy; + } + Fields.addInt(LengthTy, StringLength); CharUnits Alignment = getPointerAlign(); @@ -4220,7 +4361,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment, /*isConstant=*/false, llvm::GlobalVariable::PrivateLinkage); - switch (getTriple().getObjectFormat()) { + switch (Triple.getObjectFormat()) { case llvm::Triple::UnknownObjectFormat: llvm_unreachable("unknown file format"); case llvm::Triple::COFF: @@ -4713,6 +4854,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { case Decl::TypeAliasTemplate: case Decl::Block: case Decl::Empty: + case Decl::Binding: break; case Decl::Using: // using X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) @@ -4879,7 +5021,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { case Decl::OMPDeclareReduction: EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D)); break; - + case Decl::OMPRequires: EmitOMPRequiresDecl(cast<OMPRequiresDecl>(D)); break; @@ -5078,6 +5220,16 @@ void CodeGenModule::EmitVersionIdentMetadata() { IdentMetadata->addOperand(llvm::MDNode::get(Ctx, IdentNode)); } +void CodeGenModule::EmitCommandLineMetadata() { + llvm::NamedMDNode *CommandLineMetadata = + TheModule.getOrInsertNamedMetadata("llvm.commandline"); + std::string CommandLine = getCodeGenOpts().RecordCommandLine; + llvm::LLVMContext &Ctx = TheModule.getContext(); + + llvm::Metadata *CommandLineNode[] = {llvm::MDString::get(Ctx, CommandLine)}; + CommandLineMetadata->addOperand(llvm::MDNode::get(Ctx, CommandLineNode)); +} + void CodeGenModule::EmitTargetMetadata() { // Warning, new MangledDeclNames may be appended within this loop. // We rely on MapVector insertions adding new elements to the end @@ -5293,8 +5445,9 @@ TargetAttr::ParsedTargetAttr CodeGenModule::filterFunctionTargetAttrs(const Targ // Fills in the supplied string map with the set of target features for the // passed in function. void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, - const FunctionDecl *FD) { + GlobalDecl GD) { StringRef TargetCPU = Target.getTargetOpts().CPU; + const FunctionDecl *FD = GD.getDecl()->getAsFunction(); if (const auto *TD = FD->getAttr<TargetAttr>()) { TargetAttr::ParsedTargetAttr ParsedAttr = filterFunctionTargetAttrs(TD); @@ -5316,8 +5469,8 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, ParsedAttr.Features); } else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) { llvm::SmallVector<StringRef, 32> FeaturesTmp; - Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(), - FeaturesTmp); + Target.getCPUSpecificCPUDispatchFeatures( + SD->getCPUName(GD.getMultiVersionIndex())->getName(), FeaturesTmp); std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end()); Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features); } else { diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index baf3619ca8..0f6c3bec9e 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -119,7 +119,13 @@ struct OrderGlobalInits { struct ObjCEntrypoints { ObjCEntrypoints() { memset(this, 0, sizeof(*this)); } - /// void objc_autoreleasePoolPop(void*); + /// void objc_alloc(id); + llvm::Constant *objc_alloc; + + /// void objc_allocWithZone(id); + llvm::Constant *objc_allocWithZone; + + /// void objc_autoreleasePoolPop(void*); llvm::Constant *objc_autoreleasePoolPop; /// void *objc_autoreleasePoolPush(void); @@ -1043,8 +1049,7 @@ public: const CGFunctionInfo &FI); /// Set the LLVM function attributes (sext, zext, etc). - void SetLLVMFunctionAttributes(const Decl *D, - const CGFunctionInfo &Info, + void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F); /// Set the LLVM function attributes which only apply to a function @@ -1104,8 +1109,7 @@ public: // Fills in the supplied string map with the set of target features for the // passed in function. - void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, - const FunctionDecl *FD); + void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, GlobalDecl GD); StringRef getMangledName(GlobalDecl GD); StringRef getBlockMangledName(GlobalDecl GD, const BlockDecl *BD); @@ -1293,9 +1297,9 @@ private: llvm::AttributeList ExtraAttrs = llvm::AttributeList(), ForDefinition_t IsForDefinition = NotForDefinition); - llvm::Constant *GetOrCreateMultiVersionIFunc(GlobalDecl GD, - llvm::Type *DeclTy, - const FunctionDecl *FD); + llvm::Constant *GetOrCreateMultiVersionResolver(GlobalDecl GD, + llvm::Type *DeclTy, + const FunctionDecl *FD); void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD); llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName, @@ -1304,7 +1308,7 @@ private: ForDefinition_t IsForDefinition = NotForDefinition); - bool GetCPUAndFeaturesAttributes(const Decl *D, + bool GetCPUAndFeaturesAttributes(GlobalDecl GD, llvm::AttrBuilder &AttrBuilder); void setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO); @@ -1315,6 +1319,8 @@ private: void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr); void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV); + void EmitMultiVersionFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV); + void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false); void EmitAliasDefinition(GlobalDecl GD); void emitIFuncDefinition(GlobalDecl GD); @@ -1402,6 +1408,9 @@ private: /// Emit the Clang version as llvm.ident metadata. void EmitVersionIdentMetadata(); + /// Emit the Clang commandline as llvm.commandline metadata. + void EmitCommandLineMetadata(); + /// Emits target specific Metadata for global declarations. void EmitTargetMetadata(); diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h index 0759e65388..120ab651a4 100644 --- a/lib/CodeGen/CodeGenPGO.h +++ b/lib/CodeGen/CodeGenPGO.h @@ -17,7 +17,6 @@ #include "CGBuilder.h" #include "CodeGenModule.h" #include "CodeGenTypes.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ProfileData/InstrProfReader.h" #include <array> #include <memory> diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp index ec48231e52..27d39716d2 100644 --- a/lib/CodeGen/CodeGenTBAA.cpp +++ b/lib/CodeGen/CodeGenTBAA.cpp @@ -20,7 +20,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/Mangle.h" #include "clang/AST/RecordLayout.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp index 1a1395e6ae..2acf1ac161 100644 --- a/lib/CodeGen/CodeGenTypes.cpp +++ b/lib/CodeGen/CodeGenTypes.cpp @@ -503,6 +503,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" +#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + case BuiltinType::Id: +#include "clang/Basic/OpenCLExtensionTypes.def" case BuiltinType::OCLSampler: case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h index 626869f000..8e344e91b8 100644 --- a/lib/CodeGen/CodeGenTypes.h +++ b/lib/CodeGen/CodeGenTypes.h @@ -17,7 +17,6 @@ #include "CGCall.h" #include "clang/Basic/ABI.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Sema/Sema.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Module.h" diff --git a/lib/CodeGen/ConstantEmitter.h b/lib/CodeGen/ConstantEmitter.h index b4d1b65743..7ad8e5d37c 100644 --- a/lib/CodeGen/ConstantEmitter.h +++ b/lib/CodeGen/ConstantEmitter.h @@ -38,6 +38,9 @@ private: /// Whether the constant-emission failed. bool Failed = false; + /// Whether we're in a constant context. + bool InConstantContext = false; + /// The AST address space where this (non-abstract) initializer is going. /// Used for generating appropriate placeholders. LangAS DestAddressSpace; diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp index e75f2c8091..35962c73d9 100644 --- a/lib/CodeGen/CoverageMappingGen.cpp +++ b/lib/CodeGen/CoverageMappingGen.cpp @@ -552,6 +552,15 @@ struct CounterCoverageMappingBuilder completeDeferred(Count, DeferredEndLoc); } + size_t locationDepth(SourceLocation Loc) { + size_t Depth = 0; + while (Loc.isValid()) { + Loc = getIncludeOrExpansionLoc(Loc); + Depth++; + } + return Depth; + } + /// Pop regions from the stack into the function's list of regions. /// /// Adds all regions from \c ParentIndex to the top of the stack to the @@ -566,19 +575,41 @@ struct CounterCoverageMappingBuilder SourceLocation EndLoc = Region.hasEndLoc() ? Region.getEndLoc() : RegionStack[ParentIndex].getEndLoc(); + size_t StartDepth = locationDepth(StartLoc); + size_t EndDepth = locationDepth(EndLoc); while (!SM.isWrittenInSameFile(StartLoc, EndLoc)) { - // The region ends in a nested file or macro expansion. Create a - // separate region for each expansion. - SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc); - assert(SM.isWrittenInSameFile(NestedLoc, EndLoc)); - - if (!isRegionAlreadyAdded(NestedLoc, EndLoc)) - SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc); - - EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc)); - if (EndLoc.isInvalid()) - llvm::report_fatal_error("File exit not handled before popRegions"); + bool UnnestStart = StartDepth >= EndDepth; + bool UnnestEnd = EndDepth >= StartDepth; + if (UnnestEnd) { + // The region ends in a nested file or macro expansion. Create a + // separate region for each expansion. + SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc); + assert(SM.isWrittenInSameFile(NestedLoc, EndLoc)); + + if (!isRegionAlreadyAdded(NestedLoc, EndLoc)) + SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc); + + EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc)); + if (EndLoc.isInvalid()) + llvm::report_fatal_error("File exit not handled before popRegions"); + EndDepth--; + } + if (UnnestStart) { + // The region begins in a nested file or macro expansion. Create a + // separate region for each expansion. + SourceLocation NestedLoc = getEndOfFileOrMacro(StartLoc); + assert(SM.isWrittenInSameFile(StartLoc, NestedLoc)); + + if (!isRegionAlreadyAdded(StartLoc, NestedLoc)) + SourceRegions.emplace_back(Region.getCounter(), StartLoc, NestedLoc); + + StartLoc = getIncludeOrExpansionLoc(StartLoc); + if (StartLoc.isInvalid()) + llvm::report_fatal_error("File exit not handled before popRegions"); + StartDepth--; + } } + Region.setStartLoc(StartLoc); Region.setEndLoc(EndLoc); MostRecentLocation = EndLoc; @@ -625,12 +656,15 @@ struct CounterCoverageMappingBuilder return RegionStack.back(); } - /// Propagate counts through the children of \c S. - Counter propagateCounts(Counter TopCount, const Stmt *S) { + /// Propagate counts through the children of \p S if \p VisitChildren is true. + /// Otherwise, only emit a count for \p S itself. + Counter propagateCounts(Counter TopCount, const Stmt *S, + bool VisitChildren = true) { SourceLocation StartLoc = getStart(S); SourceLocation EndLoc = getEnd(S); size_t Index = pushRegion(TopCount, StartLoc, EndLoc); - Visit(S); + if (VisitChildren) + Visit(S); Counter ExitCount = getRegion().getCounter(); popRegions(Index); @@ -843,7 +877,16 @@ struct CounterCoverageMappingBuilder if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body)))) return; - propagateCounts(getRegionCounter(Body), Body); + // Do not visit the artificial children nodes of defaulted methods. The + // lexer may not be able to report back precise token end locations for + // these children nodes (llvm.org/PR39822), and moreover users will not be + // able to see coverage for them. + bool Defaulted = false; + if (auto *Method = dyn_cast<CXXMethodDecl>(D)) + Defaulted = Method->isDefaulted(); + + propagateCounts(getRegionCounter(Body), Body, + /*VisitChildren=*/!Defaulted); assert(RegionStack.empty() && "Regions entered but never exited"); // Discard the last uncompleted deferred region in a decl, if one exists. diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h index b08ad896d7..c62db09695 100644 --- a/lib/CodeGen/CoverageMappingGen.h +++ b/lib/CodeGen/CoverageMappingGen.h @@ -16,7 +16,6 @@ #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" -#include "clang/Frontend/CodeGenOptions.h" #include "clang/Lex/PPCallbacks.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/GlobalValue.h" diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index a7e3c8d58b..b53304528c 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -287,6 +287,7 @@ public: void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override; bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override; + bool canSpeculativelyEmitVTableAsBaseClass(const CXXRecordDecl *RD) const; void setThunkLinkage(llvm::Function *Thunk, bool ForVTable, GlobalDecl GD, bool ReturnAdjustment) override { @@ -1562,9 +1563,8 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF, Type != Dtor_Base && DD->isVirtual()) Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent()); else - Callee = - CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), - DD); + Callee = CGCallee::forDirect( + CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), GD); CGF.EmitCXXMemberOrOperatorCall(DD, Callee, ReturnValueSlot(), This.getPointer(), VTT, VTTTy, @@ -1750,7 +1750,7 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, VFunc = VFuncLoad; } - CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc); + CGCallee Callee(GD, VFunc); return Callee; } @@ -1778,7 +1778,8 @@ void ItaniumCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) { VTables.EmitVTTDefinition(VTT, CGM.getVTableLinkage(RD), RD); } -bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { +bool ItaniumCXXABI::canSpeculativelyEmitVTableAsBaseClass( + const CXXRecordDecl *RD) const { // We don't emit available_externally vtables if we are in -fapple-kext mode // because kext mode does not permit devirtualization. if (CGM.getLangOpts().AppleKext) @@ -1796,7 +1797,43 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { // to emit an available_externally copy of vtable. // FIXME we can still emit a copy of the vtable if we // can emit definition of the inline functions. - return !hasAnyUnusedVirtualInlineFunction(RD); + if (hasAnyUnusedVirtualInlineFunction(RD)) + return false; + + // For a class with virtual bases, we must also be able to speculatively + // emit the VTT, because CodeGen doesn't have separate notions of "can emit + // the vtable" and "can emit the VTT". For a base subobject, this means we + // need to be able to emit non-virtual base vtables. + if (RD->getNumVBases()) { + for (const auto &B : RD->bases()) { + auto *BRD = B.getType()->getAsCXXRecordDecl(); + assert(BRD && "no class for base specifier"); + if (B.isVirtual() || !BRD->isDynamicClass()) + continue; + if (!canSpeculativelyEmitVTableAsBaseClass(BRD)) + return false; + } + } + + return true; +} + +bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { + if (!canSpeculativelyEmitVTableAsBaseClass(RD)) + return false; + + // For a complete-object vtable (or more specifically, for the VTT), we need + // to be able to speculatively emit the vtables of all dynamic virtual bases. + for (const auto &B : RD->vbases()) { + auto *BRD = B.getType()->getAsCXXRecordDecl(); + assert(BRD && "no class for base specifier"); + if (!BRD->isDynamicClass()) + continue; + if (!canSpeculativelyEmitVTableAsBaseClass(BRD)) + return false; + } + + return true; } static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, Address InitialPtr, @@ -1916,7 +1953,7 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, // Handle the array cookie specially in ASan. if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) && AS == 0 && (expr->getOperatorNew()->isReplaceableGlobalAllocationFunction() || - CGM.getCodeGenOpts().SanitizeAddressPoisonClassMemberArrayNewCookie)) { + CGM.getCodeGenOpts().SanitizeAddressPoisonCustomArrayCookie)) { // The store to the CookiePtr does not need to be instrumented. CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI); llvm::FunctionType *FTy = @@ -2315,11 +2352,13 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() { FTy, GlobalInitFnName, getTypes().arrangeNullaryFunction(), SourceLocation()); ASTContext &Ctx = getContext(); + QualType ReturnTy = Ctx.VoidTy; + QualType FunctionTy = Ctx.getFunctionType(ReturnTy, llvm::None, {}); FunctionDecl *FD = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), - &Ctx.Idents.get(GlobalInitFnName), Ctx.VoidTy, nullptr, SC_Static, + &Ctx.Idents.get(GlobalInitFnName), FunctionTy, nullptr, SC_Static, false, false); - CGF.StartFunction(GlobalDecl(FD), getContext().VoidTy, GlobalInitFn, + CGF.StartFunction(GlobalDecl(FD), ReturnTy, GlobalInitFn, getTypes().arrangeNullaryFunction(), FunctionArgList(), SourceLocation(), SourceLocation()); @@ -2418,7 +2457,7 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD, llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM), WrapperName.str(), &CGM.getModule()); - CGM.SetLLVMFunctionAttributes(nullptr, FI, Wrapper); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Wrapper); if (VD->hasDefinition()) CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper); @@ -2472,8 +2511,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( CharUnits GuardAlign = CharUnits::One(); Guard->setAlignment(GuardAlign.getQuantity()); - CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, OrderedInits, - Address(Guard, GuardAlign)); + CodeGenFunction(CGM).GenerateCXXGlobalInitFunc( + InitFunc, OrderedInits, ConstantAddress(Guard, GuardAlign)); // On Darwin platforms, use CXX_FAST_TLS calling convention. if (CGM.getTarget().getTriple().isOSDarwin()) { InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); @@ -2525,7 +2564,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( llvm::GlobalVariable::ExternalWeakLinkage, InitFnName.str(), &CGM.getModule()); const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init)); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, + cast<llvm::Function>(Init)); } if (Init) { @@ -2812,6 +2852,9 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" +#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + case BuiltinType::Id: +#include "clang/Basic/OpenCLExtensionTypes.def" case BuiltinType::OCLSampler: case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: @@ -3088,7 +3131,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { } assert(isa<ObjCInterfaceType>(Ty)); - // Fall through. + LLVM_FALLTHROUGH; case Type::ObjCInterface: if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) { @@ -4025,7 +4068,7 @@ static void InitCatchParam(CodeGenFunction &CGF, switch (CatchType.getQualifiers().getObjCLifetime()) { case Qualifiers::OCL_Strong: CastExn = CGF.EmitARCRetainNonBlock(CastExn); - // fallthrough + LLVM_FALLTHROUGH; case Qualifiers::OCL_None: case Qualifiers::OCL_ExplicitNone: diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp index 48dea7d54b..013ca15e23 100644 --- a/lib/CodeGen/MacroPPCallbacks.cpp +++ b/lib/CodeGen/MacroPPCallbacks.cpp @@ -14,7 +14,8 @@ #include "MacroPPCallbacks.h" #include "CGDebugInfo.h" #include "clang/CodeGen/ModuleBuilder.h" -#include "clang/Parse/Parser.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" using namespace clang; @@ -88,16 +89,6 @@ SourceLocation MacroPPCallbacks::getCorrectLocation(SourceLocation Loc) { return SourceLocation(); } -static bool isBuiltinFile(SourceManager &SM, SourceLocation Loc) { - StringRef Filename(SM.getPresumedLoc(Loc).getFilename()); - return Filename.equals("<built-in>"); -} - -static bool isCommandLineFile(SourceManager &SM, SourceLocation Loc) { - StringRef Filename(SM.getPresumedLoc(Loc).getFilename()); - return Filename.equals("<command line>"); -} - void MacroPPCallbacks::updateStatusToNextScope() { switch (Status) { case NoScope: @@ -127,7 +118,7 @@ void MacroPPCallbacks::FileEntered(SourceLocation Loc) { updateStatusToNextScope(); return; case BuiltinScope: - if (isCommandLineFile(PP.getSourceManager(), Loc)) + if (PP.getSourceManager().isWrittenInCommandLineFile(Loc)) return; updateStatusToNextScope(); LLVM_FALLTHROUGH; @@ -147,7 +138,7 @@ void MacroPPCallbacks::FileExited(SourceLocation Loc) { default: llvm_unreachable("Do not expect to exit a file from current scope"); case BuiltinScope: - if (!isBuiltinFile(PP.getSourceManager(), Loc)) + if (!PP.getSourceManager().isWrittenInBuiltinFile(Loc)) // Skip next scope and change status to MainFileScope. Status = MainFileScope; return; diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index 3b894fcdc1..0ad19ad5ab 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1552,9 +1552,9 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, if (Type == Dtor_Complete && DD->getParent()->getNumVBases() == 0) Type = Dtor_Base; - CGCallee Callee = CGCallee::forDirect( - CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), - DD); + CGCallee Callee = + CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), + GlobalDecl(DD, Type)); if (DD->isVirtual()) { assert(Type != CXXDtorType::Dtor_Deleting && @@ -1872,7 +1872,7 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, VFunc = Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign()); } - CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc); + CGCallee Callee(GD, VFunc); return Callee; } @@ -3956,7 +3956,8 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, // Call the destructor with our arguments. llvm::Constant *CalleePtr = CGM.getAddrOfCXXStructor(CD, StructorType::Complete); - CGCallee Callee = CGCallee::forDirect(CalleePtr, CD); + CGCallee Callee = + CGCallee::forDirect(CalleePtr, GlobalDecl(CD, Ctor_Complete)); const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall( Args, CD, Ctor_Complete, ExtraArgs.Prefix, ExtraArgs.Suffix); CGF.EmitCall(CalleeInfo, Callee, ReturnValueSlot(), Args); diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp index 511cf75d6a..1264893ec1 100644 --- a/lib/CodeGen/ModuleBuilder.cpp +++ b/lib/CodeGen/ModuleBuilder.cpp @@ -17,9 +17,9 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index c164cec5d9..6f00c836f9 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -14,14 +14,13 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/BackendUtil.h" -#include "clang/Frontend/CodeGenOptions.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" -#include "clang/Serialization/ASTWriter.h" #include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitstreamReader.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -156,6 +155,8 @@ public: LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule; CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo); CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning()); + CodeGenOpts.DebugPrefixMap = + CI.getInvocation().getCodeGenOpts().DebugPrefixMap; } ~PCHContainerGenerator() override = default; diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp index b411a501ea..75a0fa5ce1 100644 --- a/lib/CodeGen/SwiftCallingConv.cpp +++ b/lib/CodeGen/SwiftCallingConv.cpp @@ -415,6 +415,40 @@ static bool areBytesInSameUnit(CharUnits first, CharUnits second, == getOffsetAtStartOfUnit(second, chunkSize); } +static bool isMergeableEntryType(llvm::Type *type) { + // Opaquely-typed memory is always mergeable. + if (type == nullptr) return true; + + // Pointers and integers are always mergeable. In theory we should not + // merge pointers, but (1) it doesn't currently matter in practice because + // the chunk size is never greater than the size of a pointer and (2) + // Swift IRGen uses integer types for a lot of things that are "really" + // just storing pointers (like Optional<SomePointer>). If we ever have a + // target that would otherwise combine pointers, we should put some effort + // into fixing those cases in Swift IRGen and then call out pointer types + // here. + + // Floating-point and vector types should never be merged. + // Most such types are too large and highly-aligned to ever trigger merging + // in practice, but it's important for the rule to cover at least 'half' + // and 'float', as well as things like small vectors of 'i1' or 'i8'. + return (!type->isFloatingPointTy() && !type->isVectorTy()); +} + +bool SwiftAggLowering::shouldMergeEntries(const StorageEntry &first, + const StorageEntry &second, + CharUnits chunkSize) { + // Only merge entries that overlap the same chunk. We test this first + // despite being a bit more expensive because this is the condition that + // tends to prevent merging. + if (!areBytesInSameUnit(first.End - CharUnits::One(), second.Begin, + chunkSize)) + return false; + + return (isMergeableEntryType(first.Type) && + isMergeableEntryType(second.Type)); +} + void SwiftAggLowering::finish() { if (Entries.empty()) { Finished = true; @@ -425,12 +459,12 @@ void SwiftAggLowering::finish() { // which is generally the size of a pointer. const CharUnits chunkSize = getMaximumVoluntaryIntegerSize(CGM); - // First pass: if two entries share a chunk, make them both opaque + // First pass: if two entries should be merged, make them both opaque // and stretch one to meet the next. + // Also, remember if there are any opaque entries. bool hasOpaqueEntries = (Entries[0].Type == nullptr); for (size_t i = 1, e = Entries.size(); i != e; ++i) { - if (areBytesInSameUnit(Entries[i - 1].End - CharUnits::One(), - Entries[i].Begin, chunkSize)) { + if (shouldMergeEntries(Entries[i - 1], Entries[i], chunkSize)) { Entries[i - 1].Type = nullptr; Entries[i].Type = nullptr; Entries[i - 1].End = Entries[i].Begin; diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index ada42fd2ae..ae080f5bbd 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -19,9 +19,9 @@ #include "CGValue.h" #include "CodeGenFunction.h" #include "clang/AST/RecordLayout.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -2337,7 +2337,7 @@ static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { bool Quote = (Lib.find(" ") != StringRef::npos); std::string ArgStr = Quote ? "\"" : ""; ArgStr += Lib; - if (!Lib.endswith_lower(".lib")) + if (!Lib.endswith_lower(".lib") && !Lib.endswith_lower(".a")) ArgStr += ".lib"; ArgStr += Quote ? "\"" : ""; return ArgStr; @@ -3944,18 +3944,39 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); } - // Bool type is always extended to the ABI, other builtin types are not - // extended. - const BuiltinType *BT = Ty->getAs<BuiltinType>(); - if (BT && BT->getKind() == BuiltinType::Bool) - return ABIArgInfo::getExtend(Ty); + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { + switch (BT->getKind()) { + case BuiltinType::Bool: + // Bool type is always extended to the ABI, other builtin types are not + // extended. + return ABIArgInfo::getExtend(Ty); - // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It - // passes them indirectly through memory. - if (IsMingw64 && BT && BT->getKind() == BuiltinType::LongDouble) { - const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::x87DoubleExtended()) - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + case BuiltinType::LongDouble: + // Mingw64 GCC uses the old 80 bit extended precision floating point + // unit. It passes them indirectly through memory. + if (IsMingw64) { + const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); + if (LDF == &llvm::APFloat::x87DoubleExtended()) + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } + break; + + case BuiltinType::Int128: + case BuiltinType::UInt128: + // If it's a parameter type, the normal ABI rule is that arguments larger + // than 8 bytes are passed indirectly. GCC follows it. We follow it too, + // even though it isn't particularly efficient. + if (!IsReturnType) + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + + // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. + // Clang matches them for compatibility. + return ABIArgInfo::getDirect( + llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2)); + + default: + break; + } } return ABIArgInfo::getDirect(); @@ -4978,13 +4999,21 @@ public: llvm::Function *Fn = cast<llvm::Function>(GV); auto Kind = CGM.getCodeGenOpts().getSignReturnAddress(); - if (Kind == CodeGenOptions::SignReturnAddressScope::None) - return; + if (Kind != CodeGenOptions::SignReturnAddressScope::None) { + Fn->addFnAttr("sign-return-address", + Kind == CodeGenOptions::SignReturnAddressScope::All + ? "all" + : "non-leaf"); - Fn->addFnAttr("sign-return-address", - Kind == CodeGenOptions::SignReturnAddressScope::All - ? "all" - : "non-leaf"); + auto Key = CGM.getCodeGenOpts().getSignReturnAddressKey(); + Fn->addFnAttr("sign-return-address-key", + Key == CodeGenOptions::SignReturnAddressKeyValue::AKey + ? "a_key" + : "b_key"); + } + + if (CGM.getCodeGenOpts().BranchTargetEnforcement) + Fn->addFnAttr("branch-target-enforcement"); } }; @@ -4993,6 +5022,9 @@ public: WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K) : AArch64TargetCodeGenInfo(CGT, K) {} + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override; + void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); @@ -5003,6 +5035,14 @@ public: Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; + +void WindowsAArch64TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) + return; + addStackProbeTargetAttributes(D, GV, CGM); +} } ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { @@ -8209,6 +8249,137 @@ SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, return false; } +// ARC ABI implementation. +namespace { + +class ARCABIInfo : public DefaultABIInfo { +public: + using DefaultABIInfo::DefaultABIInfo; + +private: + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const { + if (!State.FreeRegs) + return; + if (Info.isIndirect() && Info.getInReg()) + State.FreeRegs--; + else if (Info.isDirect() && Info.getInReg()) { + unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32; + if (sz < State.FreeRegs) + State.FreeRegs -= sz; + else + State.FreeRegs = 0; + } + } + + void computeInfo(CGFunctionInfo &FI) const override { + CCState State(FI.getCallingConvention()); + // ARC uses 8 registers to pass arguments. + State.FreeRegs = 8; + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + updateState(FI.getReturnInfo(), FI.getReturnType(), State); + for (auto &I : FI.arguments()) { + I.info = classifyArgumentType(I.type, State.FreeRegs); + updateState(I.info, I.type, State); + } + } + + ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const; + ABIArgInfo getIndirectByValue(QualType Ty) const; + ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; +}; + +class ARCTargetCodeGenInfo : public TargetCodeGenInfo { +public: + ARCTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new ARCABIInfo(CGT)) {} +}; + + +ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const { + return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) : + getNaturalAlignIndirect(Ty, false); +} + +ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const { + // Compute the byval alignment. + const unsigned MinABIStackAlignInBytes = 4; + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, + TypeAlign > MinABIStackAlignInBytes); +} + +Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(4), true); +} + +ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty, + uint8_t FreeRegs) const { + // Handle the generic C++ ABI. + const RecordType *RT = Ty->getAs<RecordType>(); + if (RT) { + CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); + if (RAA == CGCXXABI::RAA_Indirect) + return getIndirectByRef(Ty, FreeRegs > 0); + + if (RAA == CGCXXABI::RAA_DirectInMemory) + return getIndirectByValue(Ty); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32; + + if (isAggregateTypeForABI(Ty)) { + // Structures with flexible arrays are always indirect. + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectByValue(Ty); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + llvm::LLVMContext &LLVMContext = getVMContext(); + + llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); + SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32); + llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); + + return FreeRegs >= SizeInRegs ? + ABIArgInfo::getDirectInReg(Result) : + ABIArgInfo::getDirect(Result, 0, nullptr, false); + } + + return Ty->isPromotableIntegerType() ? + (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) : + ABIArgInfo::getExtend(Ty)) : + (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() : + ABIArgInfo::getDirect()); +} + +ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isAnyComplexType()) + return ABIArgInfo::getDirectInReg(); + + // Arguments of size > 4 registers are indirect. + auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32; + if (RetSize > 4) + return getIndirectByRef(RetTy, /*HasFreeRegs*/ true); + + return DefaultABIInfo::classifyReturnType(RetTy); +} + +} // End anonymous namespace. //===----------------------------------------------------------------------===// // XCore ABI Implementation @@ -9230,6 +9401,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new SparcV9TargetCodeGenInfo(Types)); case llvm::Triple::xcore: return SetCGInfo(new XCoreTargetCodeGenInfo(Types)); + case llvm::Triple::arc: + return SetCGInfo(new ARCTargetCodeGenInfo(Types)); case llvm::Triple::spir: case llvm::Triple::spir64: return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); diff --git a/lib/CodeGen/VarBypassDetector.cpp b/lib/CodeGen/VarBypassDetector.cpp index 2f8a591a3e..859cdd4282 100644 --- a/lib/CodeGen/VarBypassDetector.cpp +++ b/lib/CodeGen/VarBypassDetector.cpp @@ -78,7 +78,7 @@ bool VarBypassDetector::BuildScopeInformation(const Stmt *S, return false; ++StmtsToSkip; } - // Fall through + LLVM_FALLTHROUGH; case Stmt::GotoStmtClass: FromScopes.push_back({S, ParentScope}); |