diff options
Diffstat (limited to 'lib/CodeGen')
88 files changed, 7245 insertions, 3543 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h index feed3833f2..0c3a076da0 100644 --- a/lib/CodeGen/ABIInfo.h +++ b/lib/CodeGen/ABIInfo.h @@ -1,9 +1,8 @@ //===----- ABIInfo.h - ABI information access & encapsulation ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/Address.h b/lib/CodeGen/Address.h index 334308081f..6a8e57f8db 100644 --- a/lib/CodeGen/Address.h +++ b/lib/CodeGen/Address.h @@ -1,9 +1,8 @@ //===-- Address.h - An aligned address -------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index b927acabac..cd2a5f6fa3 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -1,9 +1,8 @@ //===--- BackendUtil.cpp - LLVM Backend Utilities -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -37,11 +36,13 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -53,8 +54,10 @@ #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizer.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" +#include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" @@ -243,15 +246,15 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts); PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover, UseAfterScope)); - PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover, - UseGlobalsGC, UseOdrIndicator)); + PM.add(createModuleAddressSanitizerLegacyPassPass( + /*CompileKernel*/ false, Recover, UseGlobalsGC, UseOdrIndicator)); } static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { PM.add(createAddressSanitizerFunctionPass( /*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false)); - PM.add(createAddressSanitizerModulePass( + PM.add(createModuleAddressSanitizerLegacyPassPass( /*CompileKernel*/ true, /*Recover*/ true, /*UseGlobalsGC*/ true, /*UseOdrIndicator*/ false)); } @@ -279,7 +282,8 @@ static void addGeneralOptsForMemorySanitizer(const PassManagerBuilder &Builder, const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); int TrackOrigins = CGOpts.SanitizeMemoryTrackOrigins; bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Memory); - PM.add(createMemorySanitizerLegacyPassPass(TrackOrigins, Recover, CompileKernel)); + PM.add(createMemorySanitizerLegacyPassPass( + MemorySanitizerOptions{TrackOrigins, Recover, CompileKernel})); // MemorySanitizer inserts complex instrumentation that mostly follows // the logic of the original code, but operates on "shadow" values. @@ -317,19 +321,6 @@ static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder, PM.add(createDataFlowSanitizerPass(LangOpts.SanitizerBlacklistFiles)); } -static void addEfficiencySanitizerPass(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - const PassManagerBuilderWrapper &BuilderWrapper = - static_cast<const PassManagerBuilderWrapper&>(Builder); - const LangOptions &LangOpts = BuilderWrapper.getLangOpts(); - EfficiencySanitizerOptions Opts; - if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyCacheFrag)) - Opts.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; - else if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyWorkingSet)) - Opts.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet; - PM.add(createEfficiencySanitizerPass(Opts)); -} - static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, const CodeGenOptions &CodeGenOpts) { TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); @@ -515,6 +506,21 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) { return Options; } +static Optional<InstrProfOptions> +getInstrProfOptions(const CodeGenOptions &CodeGenOpts, + const LangOptions &LangOpts) { + if (!CodeGenOpts.hasProfileClangInstr()) + return None; + InstrProfOptions Options; + Options.NoRedZone = CodeGenOpts.DisableRedZone; + Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; + + // TODO: Surface the option to emit atomic profile counter increments at + // the driver level. + Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread); + return Options; +} + void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM) { // Handle disabling of all LLVM passes, where we want to preserve the @@ -554,6 +560,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; + // Loop interleaving in the loop vectorizer has historically been set to be + // enabled when loop unrolling is enabled. + PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops; PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO; PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; @@ -579,7 +588,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addObjCARCOptPass); } - if (LangOpts.CoroutinesTS) + if (LangOpts.Coroutines) addCoroutinePassesToExtensionPoints(PMBuilder); if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) { @@ -654,13 +663,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addDataFlowSanitizerPass); } - if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) { - PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, - addEfficiencySanitizerPass); - PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, - addEfficiencySanitizerPass); - } - // Set up the per-function pass manager. FPM.add(new TargetLibraryInfoWrapperPass(*TLII)); if (CodeGenOpts.VerifyModule) @@ -676,26 +678,35 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, MPM.add(createStripSymbolsPass(true)); } - if (CodeGenOpts.hasProfileClangInstr()) { - InstrProfOptions Options; - Options.NoRedZone = CodeGenOpts.DisableRedZone; - Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; + if (Optional<InstrProfOptions> Options = + getInstrProfOptions(CodeGenOpts, LangOpts)) + MPM.add(createInstrProfilingLegacyPass(*Options, false)); - // TODO: Surface the option to emit atomic profile counter increments at - // the driver level. - Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread); - - MPM.add(createInstrProfilingLegacyPass(Options)); - } + bool hasIRInstr = false; if (CodeGenOpts.hasProfileIRInstr()) { PMBuilder.EnablePGOInstrGen = true; + hasIRInstr = true; + } + if (CodeGenOpts.hasProfileCSIRInstr()) { + assert(!CodeGenOpts.hasProfileCSIRUse() && + "Cannot have both CSProfileUse pass and CSProfileGen pass at the " + "same time"); + assert(!hasIRInstr && + "Cannot have both ProfileGen pass and CSProfileGen pass at the " + "same time"); + PMBuilder.EnablePGOCSInstrGen = true; + hasIRInstr = true; + } + if (hasIRInstr) { if (!CodeGenOpts.InstrProfileOutput.empty()) PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput; else PMBuilder.PGOInstrGen = DefaultProfileGenName; } - if (CodeGenOpts.hasProfileIRUse()) + if (CodeGenOpts.hasProfileIRUse()) { PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath; + PMBuilder.EnablePGOCSInstrUse = CodeGenOpts.hasProfileCSIRUse(); + } if (!CodeGenOpts.SampleProfileFile.empty()) PMBuilder.PGOSampleUse = CodeGenOpts.SampleProfileFile; @@ -916,6 +927,31 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { } } +static void addSanitizersAtO0(ModulePassManager &MPM, + const Triple &TargetTriple, + const LangOptions &LangOpts, + const CodeGenOptions &CodeGenOpts) { + if (LangOpts.Sanitize.has(SanitizerKind::Address)) { + MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>()); + bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::Address); + MPM.addPass(createModuleToFunctionPassAdaptor( + AddressSanitizerPass(/*CompileKernel=*/false, Recover, + CodeGenOpts.SanitizeAddressUseAfterScope))); + bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts); + MPM.addPass(ModuleAddressSanitizerPass( + /*CompileKernel=*/false, Recover, ModuleUseAfterScope, + CodeGenOpts.SanitizeAddressUseOdrIndicator)); + } + + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { + MPM.addPass(createModuleToFunctionPassAdaptor(MemorySanitizerPass({}))); + } + + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); + } +} + /// A clean version of `EmitAssembly` that uses the new pass manager. /// /// Not all features are currently supported in this system, but where @@ -929,13 +965,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( TimeRegion Region(FrontendTimesIsEnabled ? &CodeGenerationTime : nullptr); setCommandLineOpts(CodeGenOpts); - // The new pass manager always makes a target machine available to passes - // during construction. - CreateTargetMachine(/*MustCreateTM*/ true); - if (!TM) - // This will already be diagnosed, just bail. + bool RequiresCodeGen = (Action != Backend_EmitNothing && + Action != Backend_EmitBC && + Action != Backend_EmitLL); + CreateTargetMachine(RequiresCodeGen); + + if (RequiresCodeGen && !TM) return; - TheModule->setDataLayout(TM->createDataLayout()); + if (TM) + TheModule->setDataLayout(TM->createDataLayout()); Optional<PGOOptions> PGOOpt; @@ -944,23 +982,61 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() ? DefaultProfileGenName : CodeGenOpts.InstrProfileOutput, - "", "", "", true, + "", "", PGOOptions::IRInstr, PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); - else if (CodeGenOpts.hasProfileIRUse()) + else if (CodeGenOpts.hasProfileIRUse()) { // -fprofile-use. - PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", - CodeGenOpts.ProfileRemappingFile, false, - CodeGenOpts.DebugInfoForProfiling); - else if (!CodeGenOpts.SampleProfileFile.empty()) + auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse + : PGOOptions::NoCSAction; + PGOOpt = PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", + CodeGenOpts.ProfileRemappingFile, PGOOptions::IRUse, + CSAction, CodeGenOpts.DebugInfoForProfiling); + } else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use - PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, - CodeGenOpts.ProfileRemappingFile, false, - CodeGenOpts.DebugInfoForProfiling); + PGOOpt = + PGOOptions(CodeGenOpts.SampleProfileFile, "", + CodeGenOpts.ProfileRemappingFile, PGOOptions::SampleUse, + PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.DebugInfoForProfiling) // -fdebug-info-for-profiling - PGOOpt = PGOOptions("", "", "", "", false, true); + PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOptions::NoCSAction, true); + + // Check to see if we want to generate a CS profile. + if (CodeGenOpts.hasProfileCSIRInstr()) { + assert(!CodeGenOpts.hasProfileCSIRUse() && + "Cannot have both CSProfileUse pass and CSProfileGen pass at " + "the same time"); + if (PGOOpt.hasValue()) { + assert(PGOOpt->Action != PGOOptions::IRInstr && + PGOOpt->Action != PGOOptions::SampleUse && + "Cannot run CSProfileGen pass with ProfileGen or SampleUse " + " pass"); + PGOOpt->CSProfileGenFile = CodeGenOpts.InstrProfileOutput.empty() + ? DefaultProfileGenName + : CodeGenOpts.InstrProfileOutput; + PGOOpt->CSAction = PGOOptions::CSIRInstr; + } else + PGOOpt = PGOOptions("", + CodeGenOpts.InstrProfileOutput.empty() + ? DefaultProfileGenName + : CodeGenOpts.InstrProfileOutput, + "", PGOOptions::NoAction, PGOOptions::CSIRInstr, + CodeGenOpts.DebugInfoForProfiling); + } - PassBuilder PB(TM.get(), PGOOpt); + PassBuilder PB(TM.get(), PipelineTuningOptions(), PGOOpt); + + // Attempt to load pass plugins and register their callbacks with PB. + for (auto &PluginFN : CodeGenOpts.PassPlugins) { + auto PassPlugin = PassPlugin::Load(PluginFN); + if (PassPlugin) { + PassPlugin->registerPassBuilderCallbacks(PB); + } else { + Diags.Report(diag::err_fe_unable_to_load_plugin) + << PluginFN << toString(PassPlugin.takeError()); + } + } LoopAnalysisManager LAM(CodeGenOpts.DebugPassManager); FunctionAnalysisManager FAM(CodeGenOpts.DebugPassManager); @@ -994,6 +1070,9 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (CodeGenOpts.OptimizationLevel == 0) { if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) MPM.addPass(GCOVProfilerPass(*Options)); + if (Optional<InstrProfOptions> Options = + getInstrProfOptions(CodeGenOpts, LangOpts)) + MPM.addPass(InstrProfiling(*Options, false)); // Build a minimal pipeline based on the semantics required by Clang, // which is just that always inlining occurs. @@ -1015,15 +1094,54 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // Register callbacks to schedule sanitizer passes at the appropriate part of // the pipeline. + // FIXME: either handle asan/the remaining sanitizers or error out if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) PB.registerScalarOptimizerLateEPCallback( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(BoundsCheckingPass()); }); + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) + PB.registerOptimizerLastEPCallback( + [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + FPM.addPass(MemorySanitizerPass({})); + }); + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) + PB.registerOptimizerLastEPCallback( + [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + FPM.addPass(ThreadSanitizerPass()); + }); + if (LangOpts.Sanitize.has(SanitizerKind::Address)) { + PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM) { + MPM.addPass( + RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>()); + }); + bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::Address); + bool UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; + PB.registerOptimizerLastEPCallback( + [Recover, UseAfterScope](FunctionPassManager &FPM, + PassBuilder::OptimizationLevel Level) { + FPM.addPass(AddressSanitizerPass( + /*CompileKernel=*/false, Recover, UseAfterScope)); + }); + bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts); + bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator; + PB.registerPipelineStartEPCallback( + [Recover, ModuleUseAfterScope, + UseOdrIndicator](ModulePassManager &MPM) { + MPM.addPass(ModuleAddressSanitizerPass( + /*CompileKernel=*/false, Recover, ModuleUseAfterScope, + UseOdrIndicator)); + }); + } if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) { MPM.addPass(GCOVProfilerPass(*Options)); }); + if (Optional<InstrProfOptions> Options = + getInstrProfOptions(CodeGenOpts, LangOpts)) + PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) { + MPM.addPass(InstrProfiling(*Options, false)); + }); if (IsThinLTO) { MPM = PB.buildThinLTOPreLinkDefaultPipeline( @@ -1040,6 +1158,9 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( CodeGenOpts.DebugPassManager); } } + + if (CodeGenOpts.OptimizationLevel == 0) + addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts); } // FIXME: We still use the legacy pass manager to do code generation. We @@ -1226,13 +1347,25 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, Conf.MAttrs = TOpts.Features; Conf.RelocModel = CGOpts.RelocationModel; Conf.CGOptLevel = getCGOptLevel(CGOpts); + Conf.OptLevel = CGOpts.OptimizationLevel; initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); + + // Context sensitive profile. + if (CGOpts.hasProfileCSIRInstr()) { + Conf.RunCSIRInstr = true; + Conf.CSIRProfile = std::move(CGOpts.InstrProfileOutput); + } else if (CGOpts.hasProfileCSIRUse()) { + Conf.RunCSIRInstr = false; + Conf.CSIRProfile = std::move(CGOpts.ProfileInstrumentUsePath); + } + Conf.ProfileRemapping = std::move(ProfileRemapping); Conf.UseNewPM = CGOpts.ExperimentalNewPassManager; Conf.DebugPassManager = CGOpts.DebugPassManager; Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness; Conf.RemarksFilename = CGOpts.OptRecordFile; + Conf.RemarksPasses = CGOpts.OptRecordPasses; Conf.DwoPath = CGOpts.SplitDwarfFile; switch (Action) { case Backend_EmitNothing: @@ -1273,6 +1406,9 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, const llvm::DataLayout &TDesc, Module *M, BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { + + llvm::TimeTraceScope TimeScope("Backend", StringRef("")); + std::unique_ptr<llvm::Module> EmptyModule; if (!CGOpts.ThinLTOIndexFile.empty()) { // If we are performing a ThinLTO importing compile, load the function index @@ -1339,6 +1475,9 @@ static const char* getSectionNameForBitcode(const Triple &T) { case Triple::Wasm: case Triple::UnknownObjectFormat: return ".llvmbc"; + case Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); + break; } llvm_unreachable("Unimplemented ObjectFormatType"); } @@ -1352,6 +1491,9 @@ static const char* getSectionNameForCommandline(const Triple &T) { case Triple::Wasm: case Triple::UnknownObjectFormat: return ".llvmcmd"; + case Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); + break; } llvm_unreachable("Unimplemented ObjectFormatType"); } diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index 24056a449d..a95cd12c2d 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -1,9 +1,8 @@ //===--- CGAtomic.cpp - Emit LLVM IR for atomic operations ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -36,7 +35,6 @@ namespace { uint64_t ValueSizeInBits; CharUnits AtomicAlign; CharUnits ValueAlign; - CharUnits LValueAlign; TypeEvaluationKind EvaluationKind; bool UseLibcall; LValue LVal; @@ -133,7 +131,6 @@ namespace { QualType getAtomicType() const { return AtomicTy; } QualType getValueType() const { return ValueTy; } CharUnits getAtomicAlignment() const { return AtomicAlign; } - CharUnits getValueAlignment() const { return ValueAlign; } uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; } uint64_t getValueSizeInBits() const { return ValueSizeInBits; } TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; } @@ -202,7 +199,7 @@ namespace { assert(LVal.isSimple()); Address addr = getAtomicAddress(); if (hasPadding()) - addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits()); + addr = CGF.Builder.CreateStructGEP(addr, 0); return LValue::MakeAddr(addr, getValueType(), CGF.getContext(), LVal.getBaseInfo(), LVal.getTBAAInfo()); @@ -308,7 +305,7 @@ static RValue emitAtomicLibcall(CodeGenFunction &CGF, const CGFunctionInfo &fnInfo = CGF.CGM.getTypes().arrangeBuiltinFunctionCall(resultType, args); llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo); - llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); + llvm::FunctionCallee fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); auto callee = CGCallee::forDirect(fn); return CGF.EmitCall(fnInfo, callee, ReturnValueSlot(), args); } @@ -680,7 +677,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, // Handle constant scope. if (auto SC = dyn_cast<llvm::ConstantInt>(Scope)) { auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID( - ScopeModel->map(SC->getZExtValue()), CGF.CGM.getLLVMContext()); + CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()), + Order, CGF.CGM.getLLVMContext()); EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, SCID); return; @@ -709,7 +707,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, Builder.SetInsertPoint(B); EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, - CGF.getTargetHooks().getLLVMSyncScopeID(ScopeModel->map(S), + CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(), + ScopeModel->map(S), + Order, CGF.getLLVMContext())); Builder.CreateBr(ContBB); } @@ -1357,7 +1357,7 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr, // Drill into the padding structure if we have one. if (hasPadding()) - addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits()); + addr = CGF.Builder.CreateStructGEP(addr, 0); // Otherwise, just convert the temporary to an r-value using the // normal conversion routine. @@ -1688,7 +1688,7 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal, UpRVal = OldRVal; DesiredLVal = CGF.MakeAddrLValue(DesiredAddr, AtomicLVal.getType()); } else { - // Build new lvalue for temp address + // Build new lvalue for temp address. Address Ptr = Atomics.materializeRValue(OldRVal); LValue UpdateLVal; if (AtomicLVal.isBitField()) { @@ -1721,7 +1721,7 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal, } UpRVal = CGF.EmitLoadOfLValue(UpdateLVal, SourceLocation()); } - // Store new value in the corresponding memory area + // Store new value in the corresponding memory area. RValue NewRVal = UpdateOp(UpRVal); if (NewRVal.isScalar()) { CGF.EmitStoreThroughLValue(NewRVal, DesiredLVal); @@ -1786,7 +1786,7 @@ void AtomicInfo::EmitAtomicUpdateOp( SourceLocation(), /*AsValue=*/false); EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr); auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr); - // Try to write new value using cmpxchg operation + // Try to write new value using cmpxchg operation. auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure); PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock()); CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB); @@ -1797,7 +1797,7 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue UpdateRVal, Address DesiredAddr) { LValue AtomicLVal = Atomics.getAtomicLValue(); LValue DesiredLVal; - // Build new lvalue for temp address + // Build new lvalue for temp address. if (AtomicLVal.isBitField()) { DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), @@ -1814,7 +1814,7 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } - // Store new value in the corresponding memory area + // Store new value in the corresponding memory area. assert(UpdateRVal.isScalar()); CGF.EmitStoreThroughLValue(UpdateRVal, DesiredLVal); } @@ -1866,7 +1866,7 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal, } EmitAtomicUpdateValue(CGF, *this, UpdateRVal, NewAtomicAddr); auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr); - // Try to write new value using cmpxchg operation + // Try to write new value using cmpxchg operation. auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure); PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock()); CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB); diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index fa3c3ee861..2a317fc956 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -1,9 +1,8 @@ //===--- CGBlocks.cpp - Emit LLVM Code for declarations ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,7 +22,6 @@ #include "clang/AST/DeclObjC.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" #include "llvm/Support/ScopedPrinter.h" @@ -276,6 +274,8 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, /*constant*/ true, linkage, AddrSpace); if (linkage == llvm::GlobalValue::LinkOnceODRLinkage) { + if (CGM.supportsCOMDAT()) + global->setComdat(CGM.getModule().getOrInsertComdat(descName)); global->setVisibility(llvm::GlobalValue::HiddenVisibility); global->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); } @@ -671,7 +671,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, // Sort the layout by alignment. We have to use a stable sort here // to get reproducible results. There should probably be an // llvm::array_pod_stable_sort. - std::stable_sort(layout.begin(), layout.end()); + llvm::stable_sort(layout); // Needed for blocks layout info. info.BlockHeaderForcedGapOffset = info.BlockSize; @@ -838,9 +838,8 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { } // GEP down to the address. - Address addr = CGF.Builder.CreateStructGEP(blockInfo.LocalAddress, - capture.getIndex(), - capture.getOffset()); + Address addr = + CGF.Builder.CreateStructGEP(blockInfo.LocalAddress, capture.getIndex()); // We can use that GEP as the dominating IP. if (!blockInfo.DominatingIP) @@ -977,27 +976,24 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { flags |= BLOCK_IS_NOESCAPE | BLOCK_IS_GLOBAL; } - auto projectField = - [&](unsigned index, CharUnits offset, const Twine &name) -> Address { - return Builder.CreateStructGEP(blockAddr, index, offset, name); - }; - auto storeField = - [&](llvm::Value *value, unsigned index, CharUnits offset, - const Twine &name) { - Builder.CreateStore(value, projectField(index, offset, name)); - }; + auto projectField = [&](unsigned index, const Twine &name) -> Address { + return Builder.CreateStructGEP(blockAddr, index, name); + }; + auto storeField = [&](llvm::Value *value, unsigned index, const Twine &name) { + Builder.CreateStore(value, projectField(index, name)); + }; // Initialize the block header. { // We assume all the header fields are densely packed. unsigned index = 0; CharUnits offset; - auto addHeaderField = - [&](llvm::Value *value, CharUnits size, const Twine &name) { - storeField(value, index, offset, name); - offset += size; - index++; - }; + auto addHeaderField = [&](llvm::Value *value, CharUnits size, + const Twine &name) { + storeField(value, index, name); + offset += size; + index++; + }; if (!IsOpenCL) { addHeaderField(isa, getPointerSize(), "block.isa"); @@ -1033,8 +1029,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // First, 'this'. if (blockDecl->capturesCXXThis()) { - Address addr = projectField(blockInfo.CXXThisIndex, blockInfo.CXXThisOffset, - "block.captured-this.addr"); + Address addr = + projectField(blockInfo.CXXThisIndex, "block.captured-this.addr"); Builder.CreateStore(LoadCXXThis(), addr); } @@ -1050,8 +1046,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // This will be a [[type]]*, except that a byref entry will just be // an i8**. - Address blockField = - projectField(capture.getIndex(), capture.getOffset(), "block.captured"); + Address blockField = projectField(capture.getIndex(), "block.captured"); // Compute the address of the thing we're going to move into the // block literal. @@ -1070,7 +1065,6 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // This is a [[type]]*, except that a byref entry will just be an i8**. src = Builder.CreateStructGEP(LoadBlockStruct(), enclosingCapture.getIndex(), - enclosingCapture.getOffset(), "block.capture.addr"); } else { auto I = LocalDeclMap.find(variable); @@ -1261,52 +1255,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { const BlockPointerType *BPT = E->getCallee()->getType()->getAs<BlockPointerType>(); - llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); - - // Get a pointer to the generic block literal. - // For OpenCL we generate generic AS void ptr to be able to reuse the same - // block definition for blocks with captures generated as private AS local - // variables and without captures generated as global AS program scope - // variables. - unsigned AddrSpace = 0; - if (getLangOpts().OpenCL) - AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); - - llvm::Type *BlockLiteralTy = - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); - - // Bitcast the callee to a block literal. - BlockPtr = - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); - - // Get the function pointer from the literal. - llvm::Value *FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, - CGM.getLangOpts().OpenCL ? 2 : 3); - - // Add the block literal. + llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType(); + llvm::Value *Func = nullptr; + QualType FnType = BPT->getPointeeType(); + ASTContext &Ctx = getContext(); CallArgList Args; - QualType VoidPtrQualTy = getContext().VoidPtrTy; - llvm::Type *GenericVoidPtrTy = VoidPtrTy; if (getLangOpts().OpenCL) { - GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); - VoidPtrQualTy = - getContext().getPointerType(getContext().getAddrSpaceQualType( - getContext().VoidTy, LangAS::opencl_generic)); - } - - BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); - Args.add(RValue::get(BlockPtr), VoidPtrQualTy); - - QualType FnType = BPT->getPointeeType(); + // For OpenCL, BlockPtr is already casted to generic block literal. + + // First argument of a block call is a generic block literal casted to + // generic void pointer, i.e. i8 addrspace(4)* + llvm::Value *BlockDescriptor = Builder.CreatePointerCast( + BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType()); + QualType VoidPtrQualTy = Ctx.getPointerType( + Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic)); + Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy); + // And the rest of the arguments. + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); + + // We *can* call the block directly unless it is a function argument. + if (!isa<ParmVarDecl>(E->getCalleeDecl())) + Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); + else { + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2); + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + } + } else { + // Bitcast the block literal to a generic block literal. + BlockPtr = Builder.CreatePointerCast( + BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal"); + // Get pointer to the block invoke function + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3); - // And the rest of the arguments. - EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); + // First argument is a block literal casted to a void pointer + BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy); + Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy); + // And the rest of the arguments. + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); - // Load the function. - llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + // Load the function. + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + } const FunctionType *FuncTy = FnType->castAs<FunctionType>(); const CGFunctionInfo &FnInfo = @@ -1332,9 +1323,8 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) { // Handle constant captures. if (capture.isConstant()) return LocalDeclMap.find(variable)->second; - Address addr = - Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), - capture.getOffset(), "block.capture.addr"); + Address addr = Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), + "block.capture.addr"); if (variable->isEscapingByref()) { // addr should be a void** right now. Load, then cast the result @@ -1617,9 +1607,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, // If we have a C++ 'this' reference, go ahead and force it into // existence now. if (blockDecl->capturesCXXThis()) { - Address addr = - Builder.CreateStructGEP(LoadBlockStruct(), blockInfo.CXXThisIndex, - blockInfo.CXXThisOffset, "block.captured-this"); + Address addr = Builder.CreateStructGEP( + LoadBlockStruct(), blockInfo.CXXThisIndex, "block.captured-this"); CXXThisValue = Builder.CreateLoad(addr, "this"); } @@ -2029,6 +2018,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage, FuncName, &CGM.getModule()); + if (CGM.supportsCOMDAT()) + Fn->setComdat(CGM.getModule().getOrInsertComdat(FuncName)); IdentifierInfo *II = &C.Idents.get(FuncName); @@ -2062,8 +2053,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { BlockFieldFlags flags = CopiedCapture.CopyFlags; unsigned index = capture.getIndex(); - Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset()); - Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset()); + Address srcField = Builder.CreateStructGEP(src, index); + Address dstField = Builder.CreateStructGEP(dst, index); switch (CopiedCapture.CopyKind) { case BlockCaptureEntityKind::CXXRecord: @@ -2220,6 +2211,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage, FuncName, &CGM.getModule()); + if (CGM.supportsCOMDAT()) + Fn->setComdat(CGM.getModule().getOrInsertComdat(FuncName)); IdentifierInfo *II = &C.Idents.get(FuncName); @@ -2251,8 +2244,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { const CGBlockInfo::Capture &capture = *DestroyedCapture.Capture; BlockFieldFlags flags = DestroyedCapture.DisposeFlags; - Address srcField = - Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset()); + Address srcField = Builder.CreateStructGEP(src, capture.getIndex()); pushCaptureCleanup(DestroyedCapture.DisposeKind, srcField, CI.getVariable()->getType(), flags, @@ -2286,7 +2278,7 @@ public: unsigned flags = (Flags | BLOCK_BYREF_CALLER).getBitMask(); llvm::Value *flagsVal = llvm::ConstantInt::get(CGF.Int32Ty, flags); - llvm::Value *fn = CGF.CGM.getBlockObjectAssign(); + llvm::FunctionCallee fn = CGF.CGM.getBlockObjectAssign(); llvm::Value *args[] = { destField.getPointer(), srcValue, flagsVal }; CGF.EmitNounwindRuntimeCall(fn, args); @@ -2712,13 +2704,11 @@ Address CodeGenFunction::emitBlockByrefAddress(Address baseAddr, const llvm::Twine &name) { // Chase the forwarding address if requested. if (followForward) { - Address forwardingAddr = - Builder.CreateStructGEP(baseAddr, 1, getPointerSize(), "forwarding"); + Address forwardingAddr = Builder.CreateStructGEP(baseAddr, 1, "forwarding"); baseAddr = Address(Builder.CreateLoad(forwardingAddr), info.ByrefAlignment); } - return Builder.CreateStructGEP(baseAddr, info.FieldIndex, - info.FieldOffset, name); + return Builder.CreateStructGEP(baseAddr, info.FieldIndex, name); } /// BuildByrefInfo - This routine changes a __block variable declared as T x @@ -2836,8 +2826,7 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) { CharUnits nextHeaderOffset; auto storeHeaderField = [&](llvm::Value *value, CharUnits fieldSize, const Twine &name) { - auto fieldAddr = Builder.CreateStructGEP(addr, nextHeaderIndex, - nextHeaderOffset, name); + auto fieldAddr = Builder.CreateStructGEP(addr, nextHeaderIndex, name); Builder.CreateStore(value, fieldAddr); nextHeaderIndex++; @@ -2933,7 +2922,7 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) { void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags, bool CanThrow) { - llvm::Value *F = CGM.getBlockObjectDispose(); + llvm::FunctionCallee F = CGM.getBlockObjectDispose(); llvm::Value *args[] = { Builder.CreateBitCast(V, Int8PtrTy), llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) @@ -2989,7 +2978,7 @@ static void configureBlocksRuntimeObject(CodeGenModule &CGM, CGM.setDSOLocal(GV); } -llvm::Constant *CodeGenModule::getBlockObjectDispose() { +llvm::FunctionCallee CodeGenModule::getBlockObjectDispose() { if (BlockObjectDispose) return BlockObjectDispose; @@ -2997,11 +2986,12 @@ llvm::Constant *CodeGenModule::getBlockObjectDispose() { llvm::FunctionType *fty = llvm::FunctionType::get(VoidTy, args, false); BlockObjectDispose = CreateRuntimeFunction(fty, "_Block_object_dispose"); - configureBlocksRuntimeObject(*this, BlockObjectDispose); + configureBlocksRuntimeObject( + *this, cast<llvm::Constant>(BlockObjectDispose.getCallee())); return BlockObjectDispose; } -llvm::Constant *CodeGenModule::getBlockObjectAssign() { +llvm::FunctionCallee CodeGenModule::getBlockObjectAssign() { if (BlockObjectAssign) return BlockObjectAssign; @@ -3009,7 +2999,8 @@ llvm::Constant *CodeGenModule::getBlockObjectAssign() { llvm::FunctionType *fty = llvm::FunctionType::get(VoidTy, args, false); BlockObjectAssign = CreateRuntimeFunction(fty, "_Block_object_assign"); - configureBlocksRuntimeObject(*this, BlockObjectAssign); + configureBlocksRuntimeObject( + *this, cast<llvm::Constant>(BlockObjectAssign.getCallee())); return BlockObjectAssign; } diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h index 3f9fc16d9b..c4bfde6661 100644 --- a/lib/CodeGen/CGBlocks.h +++ b/lib/CodeGen/CGBlocks.h @@ -1,9 +1,8 @@ //===-- CGBlocks.h - state for LLVM CodeGen for blocks ----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h index 654ef72060..50ef853b83 100644 --- a/lib/CodeGen/CGBuilder.h +++ b/lib/CodeGen/CGBuilder.h @@ -1,9 +1,8 @@ //===-- CGBuilder.h - Choose IRBuilder implementation ----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -168,19 +167,25 @@ public: return Address(Ptr, Addr.getAlignment()); } + /// Given + /// %addr = {T1, T2...}* ... + /// produce + /// %name = getelementptr inbounds %addr, i32 0, i32 index + /// + /// This API assumes that drilling into a struct like this is always an + /// inbounds operation. using CGBuilderBaseTy::CreateStructGEP; - Address CreateStructGEP(Address Addr, unsigned Index, CharUnits Offset, + Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name = "") { + llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType()); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); + auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); + return Address(CreateStructGEP(Addr.getElementType(), Addr.getPointer(), Index, Name), Addr.getAlignment().alignmentAtOffset(Offset)); } - Address CreateStructGEP(Address Addr, unsigned Index, - const llvm::StructLayout *Layout, - const llvm::Twine &Name = "") { - auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); - return CreateStructGEP(Addr, Index, Offset, Name); - } /// Given /// %addr = [n x T]* ... @@ -190,15 +195,17 @@ public: /// /// This API assumes that drilling into an array like this is always /// an inbounds operation. - /// - /// \param EltSize - the size of the type T in bytes - Address CreateConstArrayGEP(Address Addr, uint64_t Index, CharUnits EltSize, + Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name = "") { - return Address(CreateInBoundsGEP(Addr.getPointer(), - {getSize(CharUnits::Zero()), - getSize(Index)}, - Name), - Addr.getAlignment().alignmentAtOffset(Index * EltSize)); + llvm::ArrayType *ElTy = cast<llvm::ArrayType>(Addr.getElementType()); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = + CharUnits::fromQuantity(DL.getTypeAllocSize(ElTy->getElementType())); + + return Address( + CreateInBoundsGEP(Addr.getPointer(), + {getSize(CharUnits::Zero()), getSize(Index)}, Name), + Addr.getAlignment().alignmentAtOffset(Index * EltSize)); } /// Given @@ -206,11 +213,12 @@ public: /// produce /// %name = getelementptr inbounds %addr, i64 index /// where i64 is actually the target word size. - /// - /// \param EltSize - the size of the type T in bytes Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, - CharUnits EltSize, const llvm::Twine &Name = "") { + llvm::Type *ElTy = Addr.getElementType(); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = CharUnits::fromQuantity(DL.getTypeAllocSize(ElTy)); + return Address(CreateInBoundsGEP(Addr.getElementType(), Addr.getPointer(), getSize(Index), Name), Addr.getAlignment().alignmentAtOffset(Index * EltSize)); @@ -221,10 +229,12 @@ public: /// produce /// %name = getelementptr inbounds %addr, i64 index /// where i64 is actually the target word size. - /// - /// \param EltSize - the size of the type T in bytes - Address CreateConstGEP(Address Addr, uint64_t Index, CharUnits EltSize, + Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name = "") { + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = + CharUnits::fromQuantity(DL.getTypeAllocSize(Addr.getElementType())); + return Address(CreateGEP(Addr.getElementType(), Addr.getPointer(), getSize(Index), Name), Addr.getAlignment().alignmentAtOffset(Index * EltSize)); @@ -245,9 +255,10 @@ public: } using CGBuilderBaseTy::CreateConstInBoundsGEP2_32; - Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0, - unsigned Idx1, const llvm::DataLayout &DL, - const llvm::Twine &Name = "") { + Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0, unsigned Idx1, + const llvm::Twine &Name = "") { + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + auto *GEP = cast<llvm::GetElementPtrInst>(CreateConstInBoundsGEP2_32( Addr.getElementType(), Addr.getPointer(), Idx0, Idx1, Name)); llvm::APInt Offset( @@ -259,17 +270,6 @@ public: CharUnits::fromQuantity(Offset.getSExtValue()))); } - llvm::Value *CreateConstInBoundsByteGEP(llvm::Value *Ptr, CharUnits Offset, - const llvm::Twine &Name = "") { - assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty); - return CreateInBoundsGEP(Ptr, getSize(Offset), Name); - } - llvm::Value *CreateConstByteGEP(llvm::Value *Ptr, CharUnits Offset, - const llvm::Twine &Name = "") { - assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty); - return CreateGEP(Ptr, getSize(Offset), Name); - } - using CGBuilderBaseTy::CreateMemCpy; llvm::CallInst *CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index a718f2f19a..048103275d 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1,9 +1,8 @@ //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,6 +17,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -27,7 +27,6 @@ #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -46,6 +45,25 @@ int64_t clamp(int64_t Value, int64_t Low, int64_t High) { return std::min(High, std::max(Low, Value)); } +static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) { + ConstantInt *Byte; + switch (CGF.getLangOpts().getTrivialAutoVarInit()) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + // Nothing to initialize. + return; + case LangOptions::TrivialAutoVarInitKind::Zero: + Byte = CGF.Builder.getInt8(0x00); + break; + case LangOptions::TrivialAutoVarInitKind::Pattern: { + llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext()); + Byte = llvm::dyn_cast<llvm::ConstantInt>( + initializationPatternFor(CGF.CGM, Int8)); + break; + } + } + CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -300,6 +318,34 @@ static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); } +// Build a plain volatile load. +static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = + llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8); + Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(Ptr, LoadSize); + Load->setVolatile(true); + return Load; +} + +// Build a plain volatile store. +static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + Value *Value = CGF.EmitScalarExpr(E->getArg(1)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = + llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8); + Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::StoreInst *Store = + CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize); + Store->setVolatile(true); + return Store; +} + // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, @@ -307,7 +353,7 @@ static Value *emitUnaryBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, Src0); } @@ -318,7 +364,7 @@ static Value *emitBinaryBuiltin(CodeGenFunction &CGF, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, { Src0, Src1 }); } @@ -330,7 +376,7 @@ static Value *emitTernaryBuiltin(CodeGenFunction &CGF, llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); } @@ -341,13 +387,13 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, {Src0, Src1}); } /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { - Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); + Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); Call->setDoesNotAccessMemory(); return Call; @@ -408,7 +454,7 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, "Arguments must be the same type. (Did you forget to make sure both " "arguments have the same integer width?)"); - llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); + Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); Carry = CGF.Builder.CreateExtractValue(Tmp, 1); return CGF.Builder.CreateExtractValue(Tmp, 0); @@ -419,7 +465,7 @@ static Value *emitRangedBuiltin(CodeGenFunction &CGF, int low, int high) { llvm::MDBuilder MDHelper(CGF.getLLVMContext()); llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); llvm::Instruction *Call = CGF.Builder.CreateCall(F); Call->setMetadata(llvm::LLVMContext::MD_range, RNode); return Call; @@ -496,10 +542,11 @@ getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { llvm::Value * CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE) { + llvm::Value *EmittedE, + bool IsDynamic) { uint64_t ObjectSize; if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) - return emitBuiltinObjectSize(E, Type, ResType, EmittedE); + return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic); return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } @@ -515,7 +562,7 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::Value * CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE) { + llvm::Value *EmittedE, bool IsDynamic) { // We need to reference an argument if the pointer is a parameter with the // pass_object_size attribute. if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { @@ -545,13 +592,15 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, assert(Ptr->getType()->isPointerTy() && "Non-pointer passed to __builtin_object_size?"); - Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); + Function *F = + CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. Value *Min = Builder.getInt1((Type & 2) != 0); // For GCC compatibility, __builtin_object_size treat NULL as unknown size. Value *NullIsUnknown = Builder.getTrue(); - return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); + Value *Dynamic = Builder.getInt1(IsDynamic); + return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}); } namespace { @@ -793,16 +842,16 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::ReturnsTwice); - llvm::Constant *SetJmpFn = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name, ReturnsTwiceAttr, /*Local=*/true); llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast( CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy); llvm::Value *Args[] = {Buf, Arg1}; - llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); - CS.setAttributes(ReturnsTwiceAttr); - return RValue::get(CS.getInstruction()); + llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); + CB->setAttributes(ReturnsTwiceAttr); + return RValue::get(CB); } // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code, @@ -876,7 +925,7 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); if (BuiltinID == MSVCIntrin::_BitScanForward) { - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); Builder.CreateStore(ZeroCount, IndexAddress, false); @@ -884,7 +933,7 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); @@ -996,6 +1045,9 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, Asm = "udf #251"; Constraints = "{r0}"; break; + case llvm::Triple::aarch64: + Asm = "brk #0xF003"; + Constraints = "{w0}"; } llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); llvm::InlineAsm *IA = @@ -1003,9 +1055,9 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); - CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); - CS.setAttributes(NoReturnAttr); - return CS.getInstruction(); + llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); + CI->setAttributes(NoReturnAttr); + return CI; } } llvm_unreachable("Incorrect MSVC intrinsic!"); @@ -1106,6 +1158,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); + Fn->setDoesNotThrow(); // Attach 'noinline' at -Oz. if (CGM.getCodeGenOpts().OptimizeSize == 2) @@ -1330,8 +1383,8 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, } static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, - Value *&RecordPtr, CharUnits Align, Value *Func, - int Lvl) { + Value *&RecordPtr, CharUnits Align, + llvm::FunctionCallee Func, int Lvl) { const auto *RT = RType->getAs<RecordType>(); ASTContext &Context = CGF.getContext(); RecordDecl *RD = RT->getDecl()->getDefinition(); @@ -1466,7 +1519,7 @@ RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; - Value *F = CGM.getIntrinsic(IID, Ty); + Function *F = CGM.getIntrinsic(IID, Ty); return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); } @@ -1735,6 +1788,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_dump_struct: { + llvm::Type *LLVMIntTy = getTypes().ConvertType(getContext().IntTy); + llvm::FunctionType *LLVMFuncType = llvm::FunctionType::get( + LLVMIntTy, {llvm::Type::getInt8PtrTy(getLLVMContext())}, true); + Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts()); CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment(); @@ -1742,7 +1799,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, QualType Arg0Type = Arg0->getType()->getPointeeType(); Value *RecordPtr = EmitScalarExpr(Arg0); - Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0); + Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, + {LLVMFuncType, Func}, 0); return RValue::get(Res); } @@ -1763,7 +1821,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Zero = llvm::Constant::getNullValue(ArgType); @@ -1783,7 +1841,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); @@ -1800,7 +1858,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); @@ -1817,7 +1875,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Tmp = @@ -1838,7 +1896,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Tmp = Builder.CreateCall(F, ArgValue); @@ -1854,7 +1912,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); @@ -1872,7 +1930,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, ArgValue); @@ -1898,7 +1956,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (CGM.getCodeGenOpts().OptimizationLevel == 0) return RValue::get(ArgValue); - Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); + Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); Value *Result = Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); return RValue::get(Result); @@ -1913,7 +1971,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); unsigned Alignment = (unsigned)AlignmentCI->getZExtValue(); - EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(), + EmitAlignmentAssumption(PtrValue, Ptr, + /*The expr loc is sufficient.*/ SourceLocation(), Alignment, OffsetValue); return RValue::get(PtrValue); } @@ -1923,7 +1982,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); Value *ArgValue = EmitScalarExpr(E->getArg(0)); - Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); + Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); } case Builtin::BI__builtin_bswap16: @@ -1968,17 +2027,34 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const Expr *Arg = E->getArg(0); QualType ArgType = Arg->getType(); - if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType()) - // We can only reason about scalar types. + // FIXME: The allowance for Obj-C pointers and block pointers is historical + // and likely a mistake. + if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() && + !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType()) + // Per the GCC documentation, only numeric constants are recognized after + // inlining. + return RValue::get(ConstantInt::get(ResultType, 0)); + + if (Arg->HasSideEffects(getContext())) + // The argument is unevaluated, so be conservative if it might have + // side-effects. return RValue::get(ConstantInt::get(ResultType, 0)); Value *ArgValue = EmitScalarExpr(Arg); - Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); + if (ArgType->isObjCObjectPointerType()) { + // Convert Objective-C objects to id because we cannot distinguish between + // LLVM types for Obj-C classes as they are opaque. + ArgType = CGM.getContext().getObjCIdType(); + ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType)); + } + Function *F = + CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); Value *Result = Builder.CreateCall(F, ArgValue); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); return RValue::get(Result); } + case Builtin::BI__builtin_dynamic_object_size: case Builtin::BI__builtin_object_size: { unsigned Type = E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); @@ -1986,8 +2062,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // We pass this builtin onto the optimizer so that it can figure out the // object size in more complex cases. + bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size; return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, - /*EmittedE=*/nullptr)); + /*EmittedE=*/nullptr, IsDynamic)); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); @@ -1997,17 +2074,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : llvm::ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); } case Builtin::BI__builtin_readcyclecounter: { - Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); + Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin___clear_cache: { Value *Begin = EmitScalarExpr(E->getArg(0)); Value *End = EmitScalarExpr(E->getArg(1)); - Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); + Function *F = CGM.getIntrinsic(Intrinsic::clear_cache); return RValue::get(Builder.CreateCall(F, {Begin, End})); } case Builtin::BI__builtin_trap: @@ -2029,7 +2106,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *Base = EmitScalarExpr(E->getArg(0)); Value *Exponent = EmitScalarExpr(E->getArg(1)); llvm::Type *ArgType = Base->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); return RValue::get(Builder.CreateCall(F, {Base, Exponent})); } @@ -2130,6 +2207,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } + case Builtin::BI__builtin_flt_rounds: { + Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Result = Builder.CreateCall(F); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return RValue::get(Result); + } + case Builtin::BI__builtin_fpclassify: { Value *V = EmitScalarExpr(E->getArg(5)); llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); @@ -2200,6 +2288,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, .getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(SuitableAlignmentInBytes); + initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); return RValue::get(AI); } @@ -2212,6 +2301,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(AlignmentInBytes); + initializeAlloca(*this, AI, Size, AlignmentInBytes); return RValue::get(AI); } @@ -2392,24 +2482,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // this instead of hard-coding 0, which is correct for most targets. int32_t Offset = 0; - Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); + Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); return RValue::get(Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, Offset))); } case Builtin::BI__builtin_return_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); - Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI_ReturnAddress: { - Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); } case Builtin::BI__builtin_frame_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); - Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); + Function *F = CGM.getIntrinsic(Intrinsic::frameaddress); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI__builtin_extract_return_addr: { @@ -2445,9 +2535,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); - Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 - ? Intrinsic::eh_return_i32 - : Intrinsic::eh_return_i64); + Function *F = + CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32 + : Intrinsic::eh_return_i64); Builder.CreateCall(F, {Int, Ptr}); Builder.CreateUnreachable(); @@ -2457,7 +2547,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); } case Builtin::BI__builtin_unwind_init: { - Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); + Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin_extend_pointer: { @@ -2498,12 +2588,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Store the stack pointer to the setjmp buffer. Value *StackAddr = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); - Address StackSaveSlot = - Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); + Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2); Builder.CreateStore(StackAddr, StackSaveSlot); // Call LLVM's EH setjmp, which is lightweight. - Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); + Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); Buf = Builder.CreateBitCast(Buf, Int8PtrTy); return RValue::get(Builder.CreateCall(F, Buf.getPointer())); } @@ -2719,7 +2808,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CGFunctionInfo &FuncInfo = CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); + llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName); return EmitCall(FuncInfo, CGCallee::forDirect(Func), ReturnValueSlot(), Args); } @@ -2959,14 +3048,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } // Build and MDTuple of MDStrings and emit the intrinsic call. - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); return RValue::getIgnored(); } case Builtin::BI__builtin_annotation: { llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, AnnVal->getType()); // Get the annotation string, go through casts. Sema requires this to be a @@ -3311,6 +3401,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI_interlockedbittestandreset_nf: return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E)); + // These builtins exist to emit regular volatile loads and stores not + // affected by the -fms-volatile setting. + case Builtin::BI__iso_volatile_load8: + case Builtin::BI__iso_volatile_load16: + case Builtin::BI__iso_volatile_load32: + case Builtin::BI__iso_volatile_load64: + return RValue::get(EmitISOVolatileLoad(*this, E)); + case Builtin::BI__iso_volatile_store8: + case Builtin::BI__iso_volatile_store16: + case Builtin::BI__iso_volatile_store32: + case Builtin::BI__iso_volatile_store64: + return RValue::get(EmitISOVolatileStore(*this, E)); + case Builtin::BI__exception_code: case Builtin::BI_exception_code: return RValue::get(EmitSEHExceptionCode()); @@ -3348,7 +3451,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto & Context = getContext(); auto SizeTy = Context.getSizeType(); auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); - Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); + Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T); return RValue::get(Builder.CreateCall(F)); } @@ -3666,21 +3769,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Any calls now have event arguments passed. if (NumArgs >= 7) { llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); - llvm::Type *EventPtrTy = EventTy->getPointerTo( + llvm::PointerType *EventPtrTy = EventTy->getPointerTo( CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *NumEvents = Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); - llvm::Value *EventList = - E->getArg(4)->getType()->isArrayType() - ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() - : EmitScalarExpr(E->getArg(4)); - llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); - // Convert to generic address space. - EventList = Builder.CreatePointerCast(EventList, EventPtrTy); - ClkEvent = ClkEvent->getType()->isIntegerTy() - ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy) - : Builder.CreatePointerCast(ClkEvent, EventPtrTy); + + // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments + // to be a null pointer constant (including `0` literal), we can take it + // into account and emit null pointer directly. + llvm::Value *EventWaitList = nullptr; + if (E->getArg(4)->isNullPointerConstant( + getContext(), Expr::NPC_ValueDependentIsNotNull)) { + EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy); + } else { + EventWaitList = E->getArg(4)->getType()->isArrayType() + ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() + : EmitScalarExpr(E->getArg(4)); + // Convert to generic address space. + EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy); + } + llvm::Value *EventRet = nullptr; + if (E->getArg(5)->isNullPointerConstant( + getContext(), Expr::NPC_ValueDependentIsNotNull)) { + EventRet = llvm::ConstantPointerNull::get(EventPtrTy); + } else { + EventRet = + Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy); + } + auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); llvm::Value *Kernel = @@ -3692,8 +3809,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, QueueTy, Int32Ty, RangeTy, Int32Ty, EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; - std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, - EventList, ClkEvent, Kernel, Block}; + std::vector<llvm::Value *> Args = {Queue, Flags, Range, + NumEvents, EventWaitList, EventRet, + Kernel, Block}; if (NumArgs == 7) { // Has events but no variadics. @@ -5065,6 +5183,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( switch (BuiltinID) { default: break; + case NEON::BI__builtin_neon_vpadd_v: + case NEON::BI__builtin_neon_vpaddq_v: + // We don't allow fp/int overloading of intrinsics. + if (VTy->getElementType()->isFloatingPointTy() && + Int == Intrinsic::aarch64_neon_addp) + Int = Intrinsic::aarch64_neon_faddp; + break; case NEON::BI__builtin_neon_vabs_v: case NEON::BI__builtin_neon_vabsq_v: if (VTy->getElementType()->isFloatingPointTy()) @@ -5262,7 +5387,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vfma_v: case NEON::BI__builtin_neon_vfmaq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); @@ -5731,7 +5856,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, && "Can't fit 64-bit value in 32-bit register"); if (IsRead) { - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); llvm::Value *Call = Builder.CreateCall(F, Metadata); if (MixedTypes) @@ -5745,7 +5870,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, return Call; } - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); if (MixedTypes) { // Extend 32 bit write value to 64 bit to pass to write. @@ -5798,34 +5923,6 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { return true; } -Value *CodeGenFunction::EmitISOVolatileLoad(const CallExpr *E) { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - LoadSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); - llvm::LoadInst *Load = - Builder.CreateAlignedLoad(Ptr, LoadSize); - Load->setVolatile(true); - return Load; -} - -Value *CodeGenFunction::EmitISOVolatileStore(const CallExpr *E) { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - Value *Value = EmitScalarExpr(E->getArg(1)); - QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - StoreSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); - llvm::StoreInst *Store = - Builder.CreateAlignedStore(Value, Ptr, - StoreSize); - Store->setVolatile(true); - return Store; -} - Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { @@ -5866,7 +5963,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Locality is not supported on ARM target Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6065,19 +6162,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); } - switch (BuiltinID) { - case ARM::BI__iso_volatile_load8: - case ARM::BI__iso_volatile_load16: - case ARM::BI__iso_volatile_load32: - case ARM::BI__iso_volatile_load64: - return EmitISOVolatileLoad(E); - case ARM::BI__iso_volatile_store8: - case ARM::BI__iso_volatile_store16: - case ARM::BI__iso_volatile_store32: - case ARM::BI__iso_volatile_store64: - return EmitISOVolatileStore(E); - } - if (BuiltinID == ARM::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); return Builder.CreateCall(F); @@ -6818,7 +6902,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify // PLDL3STRM or PLDL2STRM. - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6956,7 +7040,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); - llvm::Value *F = + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); return Builder.CreateCall(F, Metadata); } @@ -7002,6 +7086,84 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Arg0, Arg1}); } + // Memory Tagging Extensions (MTE) Intrinsics + Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; + switch (BuiltinID) { + case AArch64::BI__builtin_arm_irg: + MTEIntrinsicID = Intrinsic::aarch64_irg; break; + case AArch64::BI__builtin_arm_addg: + MTEIntrinsicID = Intrinsic::aarch64_addg; break; + case AArch64::BI__builtin_arm_gmi: + MTEIntrinsicID = Intrinsic::aarch64_gmi; break; + case AArch64::BI__builtin_arm_ldg: + MTEIntrinsicID = Intrinsic::aarch64_ldg; break; + case AArch64::BI__builtin_arm_stg: + MTEIntrinsicID = Intrinsic::aarch64_stg; break; + case AArch64::BI__builtin_arm_subp: + MTEIntrinsicID = Intrinsic::aarch64_subp; break; + } + + if (MTEIntrinsicID != Intrinsic::not_intrinsic) { + llvm::Type *T = ConvertType(E->getType()); + + if (MTEIntrinsicID == Intrinsic::aarch64_irg) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *Mask = EmitScalarExpr(E->getArg(1)); + + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + Mask = Builder.CreateZExt(Mask, Int64Ty); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask}); + return Builder.CreatePointerCast(RV, T); + } + if (MTEIntrinsicID == Intrinsic::aarch64_addg) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *TagOffset = EmitScalarExpr(E->getArg(1)); + + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + TagOffset = Builder.CreateZExt(TagOffset, Int64Ty); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset}); + return Builder.CreatePointerCast(RV, T); + } + if (MTEIntrinsicID == Intrinsic::aarch64_gmi) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *ExcludedMask = EmitScalarExpr(E->getArg(1)); + + ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty); + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask}); + } + // Although it is possible to supply a different return + // address (first arg) to this intrinsic, for now we set + // return address same as input address. + if (MTEIntrinsicID == Intrinsic::aarch64_ldg) { + Value *TagAddress = EmitScalarExpr(E->getArg(0)); + TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); + return Builder.CreatePointerCast(RV, T); + } + // Although it is possible to supply a different tag (to set) + // to this intrinsic (as first arg), for now we supply + // the tag that is in input address arg (common use case). + if (MTEIntrinsicID == Intrinsic::aarch64_stg) { + Value *TagAddress = EmitScalarExpr(E->getArg(0)); + TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); + } + if (MTEIntrinsicID == Intrinsic::aarch64_subp) { + Value *PointerA = EmitScalarExpr(E->getArg(0)); + Value *PointerB = EmitScalarExpr(E->getArg(1)); + PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy); + PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB}); + } + } + if (BuiltinID == AArch64::BI__builtin_arm_rsr || BuiltinID == AArch64::BI__builtin_arm_rsr64 || BuiltinID == AArch64::BI__builtin_arm_rsrp || @@ -7052,25 +7214,27 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Type *RegisterType = Int64Ty; - llvm::Type *ValueType = Int32Ty; llvm::Type *Types[] = { RegisterType }; if (BuiltinID == AArch64::BI_ReadStatusReg) { - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); - llvm::Value *Call = Builder.CreateCall(F, Metadata); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); - return Builder.CreateTrunc(Call, ValueType); + return Builder.CreateCall(F, Metadata); } - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); - ArgValue = Builder.CreateZExt(ArgValue, RegisterType); return Builder.CreateCall(F, { Metadata, ArgValue }); } if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { - llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + return Builder.CreateCall(F); + } + + if (BuiltinID == AArch64::BI__builtin_sponentry) { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry); return Builder.CreateCall(F); } @@ -7608,13 +7772,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops.push_back(EmitScalarExpr(E->getArg(1))); return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); case NEON::BI__builtin_neon_vfmah_f16: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); // NEON intrinsic puts accumulator first, unlike the LLVM fma. return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); } case NEON::BI__builtin_neon_vfmsh_f16: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); // NEON intrinsic puts accumulator first, unlike the LLVM fma. @@ -7775,6 +7939,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, : Intrinsic::aarch64_neon_sqsub; return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); } + case NEON::BI__builtin_neon_vduph_lane_f16: { + return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), + "vget_lane"); + } + case NEON::BI__builtin_neon_vduph_laneq_f16: { + return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), + "vgetq_lane"); + } } llvm::VectorType *VTy = GetNeonType(this, Type); @@ -7845,11 +8017,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); Ops[2] = Builder.CreateBitCast(Ops[2], VTy); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); return Builder.CreateBitCast(Result, Ty); } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); @@ -7863,7 +8035,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); } case NEON::BI__builtin_neon_vfmaq_laneq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); @@ -7879,7 +8051,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vfmad_laneq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(3))); llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); } @@ -8892,16 +9064,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_suqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } - case AArch64::BI__iso_volatile_load8: - case AArch64::BI__iso_volatile_load16: - case AArch64::BI__iso_volatile_load32: - case AArch64::BI__iso_volatile_load64: - return EmitISOVolatileLoad(E); - case AArch64::BI__iso_volatile_store8: - case AArch64::BI__iso_volatile_store16: - case AArch64::BI__iso_volatile_store32: - case AArch64::BI__iso_volatile_store64: - return EmitISOVolatileStore(E); case AArch64::BI_BitScanForward: case AArch64::BI_BitScanForward64: return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); @@ -9139,6 +9301,20 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); } +static Value *EmitX86CompressExpand(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, + bool IsCompress) { + llvm::Type *ResultTy = Ops[1]->getType(); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + ResultTy->getVectorNumElements()); + + Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress + : Intrinsic::x86_avx512_mask_expand; + llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy); + return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec }); +} + static Value *EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { llvm::Type *ResultTy = Ops[1]->getType(); @@ -9184,10 +9360,50 @@ static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, } unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl; - Value *F = CGF.CGM.getIntrinsic(IID, Ty); + Function *F = CGF.CGM.getIntrinsic(IID, Ty); return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); } +static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops, + bool IsSigned) { + Value *Op0 = Ops[0]; + Value *Op1 = Ops[1]; + llvm::Type *Ty = Op0->getType(); + uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; + + CmpInst::Predicate Pred; + switch (Imm) { + case 0x0: + Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + break; + case 0x1: + Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + break; + case 0x2: + Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + break; + case 0x3: + Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + break; + case 0x4: + Pred = ICmpInst::ICMP_EQ; + break; + case 0x5: + Pred = ICmpInst::ICMP_NE; + break; + case 0x6: + return llvm::Constant::getNullValue(Ty); // FALSE + case 0x7: + return llvm::Constant::getAllOnesValue(Ty); // TRUE + default: + llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); + } + + Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1); + Value *Res = CGF.Builder.CreateSExt(Cmp, Ty); + return Res; +} + static Value *EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { @@ -9278,6 +9494,25 @@ static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); } +static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, bool IsSigned) { + unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue(); + llvm::Type *Ty = Ops[1]->getType(); + + Value *Res; + if (Rnd != 4) { + Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round + : Intrinsic::x86_avx512_uitofp_round; + Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() }); + Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] }); + } else { + Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty) + : CGF.Builder.CreateUIToFP(Ops[0], Ty); + } + + return EmitX86Select(CGF, Ops[2], Res, Ops[1]); +} + static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { llvm::Type *Ty = Ops[0]->getType(); @@ -9650,10 +9885,11 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { Value *CodeGenFunction::EmitX86CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic*/ false); - llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); - cast<llvm::GlobalValue>(Func)->setDSOLocal(true); - cast<llvm::GlobalValue>(Func)->setDLLStorageClass( - llvm::GlobalValue::DefaultStorageClass); + llvm::FunctionCallee Func = + CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); + cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); + cast<llvm::GlobalValue>(Func.getCallee()) + ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); return Builder.CreateCall(Func); } @@ -9722,7 +9958,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); Value *Data = ConstantInt::get(Int32Ty, 1); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } case X86::BI_mm_clflush: { @@ -9753,13 +9989,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_lzcnt_u16: case X86::BI__builtin_ia32_lzcnt_u32: case X86::BI__builtin_ia32_lzcnt_u64: { - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } case X86::BI__builtin_ia32_tzcnt_u16: case X86::BI__builtin_ia32_tzcnt_u32: case X86::BI__builtin_ia32_tzcnt_u64: { - Value *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } case X86::BI__builtin_ia32_undef128: @@ -9833,7 +10069,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_xsavec: case X86::BI__builtin_ia32_xsavec64: case X86::BI__builtin_ia32_xsaves: - case X86::BI__builtin_ia32_xsaves64: { + case X86::BI__builtin_ia32_xsaves64: + case X86::BI__builtin_ia32_xsetbv: + case X86::BI_xsetbv: { Intrinsic::ID ID; #define INTRINSIC_X86_XSAVE_ID(NAME) \ case X86::BI__builtin_ia32_##NAME: \ @@ -9853,6 +10091,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, INTRINSIC_X86_XSAVE_ID(xsavec64); INTRINSIC_X86_XSAVE_ID(xsaves); INTRINSIC_X86_XSAVE_ID(xsaves64); + INTRINSIC_X86_XSAVE_ID(xsetbv); + case X86::BI_xsetbv: + ID = Intrinsic::x86_xsetbv; + break; } #undef INTRINSIC_X86_XSAVE_ID Value *Mhi = Builder.CreateTrunc( @@ -9862,6 +10104,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops.push_back(Mlo); return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } + case X86::BI__builtin_ia32_xgetbv: + case X86::BI_xgetbv: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops); case X86::BI__builtin_ia32_storedqudi128_mask: case X86::BI__builtin_ia32_storedqusi128_mask: case X86::BI__builtin_ia32_storedquhi128_mask: @@ -9930,6 +10175,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cvtq2mask512: return EmitX86ConvertToMask(*this, Ops[0]); + case X86::BI__builtin_ia32_cvtdq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2pd512_mask: + return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true); + case X86::BI__builtin_ia32_cvtudq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false); + case X86::BI__builtin_ia32_vfmaddss3: case X86::BI__builtin_ia32_vfmaddsd3: case X86::BI__builtin_ia32_vfmaddss3_mask: @@ -10073,6 +10327,262 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_compressstoreqi512_mask: return EmitX86CompressStore(*this, Ops); + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false); + + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true); + + case X86::BI__builtin_ia32_gather3div2df: + case X86::BI__builtin_ia32_gather3div2di: + case X86::BI__builtin_ia32_gather3div4df: + case X86::BI__builtin_ia32_gather3div4di: + case X86::BI__builtin_ia32_gather3div4sf: + case X86::BI__builtin_ia32_gather3div4si: + case X86::BI__builtin_ia32_gather3div8sf: + case X86::BI__builtin_ia32_gather3div8si: + case X86::BI__builtin_ia32_gather3siv2df: + case X86::BI__builtin_ia32_gather3siv2di: + case X86::BI__builtin_ia32_gather3siv4df: + case X86::BI__builtin_ia32_gather3siv4di: + case X86::BI__builtin_ia32_gather3siv4sf: + case X86::BI__builtin_ia32_gather3siv4si: + case X86::BI__builtin_ia32_gather3siv8sf: + case X86::BI__builtin_ia32_gather3siv8si: + case X86::BI__builtin_ia32_gathersiv8df: + case X86::BI__builtin_ia32_gathersiv16sf: + case X86::BI__builtin_ia32_gatherdiv8df: + case X86::BI__builtin_ia32_gatherdiv16sf: + case X86::BI__builtin_ia32_gathersiv8di: + case X86::BI__builtin_ia32_gathersiv16si: + case X86::BI__builtin_ia32_gatherdiv8di: + case X86::BI__builtin_ia32_gatherdiv16si: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_gather3div2df: + IID = Intrinsic::x86_avx512_mask_gather3div2_df; + break; + case X86::BI__builtin_ia32_gather3div2di: + IID = Intrinsic::x86_avx512_mask_gather3div2_di; + break; + case X86::BI__builtin_ia32_gather3div4df: + IID = Intrinsic::x86_avx512_mask_gather3div4_df; + break; + case X86::BI__builtin_ia32_gather3div4di: + IID = Intrinsic::x86_avx512_mask_gather3div4_di; + break; + case X86::BI__builtin_ia32_gather3div4sf: + IID = Intrinsic::x86_avx512_mask_gather3div4_sf; + break; + case X86::BI__builtin_ia32_gather3div4si: + IID = Intrinsic::x86_avx512_mask_gather3div4_si; + break; + case X86::BI__builtin_ia32_gather3div8sf: + IID = Intrinsic::x86_avx512_mask_gather3div8_sf; + break; + case X86::BI__builtin_ia32_gather3div8si: + IID = Intrinsic::x86_avx512_mask_gather3div8_si; + break; + case X86::BI__builtin_ia32_gather3siv2df: + IID = Intrinsic::x86_avx512_mask_gather3siv2_df; + break; + case X86::BI__builtin_ia32_gather3siv2di: + IID = Intrinsic::x86_avx512_mask_gather3siv2_di; + break; + case X86::BI__builtin_ia32_gather3siv4df: + IID = Intrinsic::x86_avx512_mask_gather3siv4_df; + break; + case X86::BI__builtin_ia32_gather3siv4di: + IID = Intrinsic::x86_avx512_mask_gather3siv4_di; + break; + case X86::BI__builtin_ia32_gather3siv4sf: + IID = Intrinsic::x86_avx512_mask_gather3siv4_sf; + break; + case X86::BI__builtin_ia32_gather3siv4si: + IID = Intrinsic::x86_avx512_mask_gather3siv4_si; + break; + case X86::BI__builtin_ia32_gather3siv8sf: + IID = Intrinsic::x86_avx512_mask_gather3siv8_sf; + break; + case X86::BI__builtin_ia32_gather3siv8si: + IID = Intrinsic::x86_avx512_mask_gather3siv8_si; + break; + case X86::BI__builtin_ia32_gathersiv8df: + IID = Intrinsic::x86_avx512_mask_gather_dpd_512; + break; + case X86::BI__builtin_ia32_gathersiv16sf: + IID = Intrinsic::x86_avx512_mask_gather_dps_512; + break; + case X86::BI__builtin_ia32_gatherdiv8df: + IID = Intrinsic::x86_avx512_mask_gather_qpd_512; + break; + case X86::BI__builtin_ia32_gatherdiv16sf: + IID = Intrinsic::x86_avx512_mask_gather_qps_512; + break; + case X86::BI__builtin_ia32_gathersiv8di: + IID = Intrinsic::x86_avx512_mask_gather_dpq_512; + break; + case X86::BI__builtin_ia32_gathersiv16si: + IID = Intrinsic::x86_avx512_mask_gather_dpi_512; + break; + case X86::BI__builtin_ia32_gatherdiv8di: + IID = Intrinsic::x86_avx512_mask_gather_qpq_512; + break; + case X86::BI__builtin_ia32_gatherdiv16si: + IID = Intrinsic::x86_avx512_mask_gather_qpi_512; + break; + } + + unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(), + Ops[2]->getType()->getVectorNumElements()); + Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, Ops); + } + + case X86::BI__builtin_ia32_scattersiv8df: + case X86::BI__builtin_ia32_scattersiv16sf: + case X86::BI__builtin_ia32_scatterdiv8df: + case X86::BI__builtin_ia32_scatterdiv16sf: + case X86::BI__builtin_ia32_scattersiv8di: + case X86::BI__builtin_ia32_scattersiv16si: + case X86::BI__builtin_ia32_scatterdiv8di: + case X86::BI__builtin_ia32_scatterdiv16si: + case X86::BI__builtin_ia32_scatterdiv2df: + case X86::BI__builtin_ia32_scatterdiv2di: + case X86::BI__builtin_ia32_scatterdiv4df: + case X86::BI__builtin_ia32_scatterdiv4di: + case X86::BI__builtin_ia32_scatterdiv4sf: + case X86::BI__builtin_ia32_scatterdiv4si: + case X86::BI__builtin_ia32_scatterdiv8sf: + case X86::BI__builtin_ia32_scatterdiv8si: + case X86::BI__builtin_ia32_scattersiv2df: + case X86::BI__builtin_ia32_scattersiv2di: + case X86::BI__builtin_ia32_scattersiv4df: + case X86::BI__builtin_ia32_scattersiv4di: + case X86::BI__builtin_ia32_scattersiv4sf: + case X86::BI__builtin_ia32_scattersiv4si: + case X86::BI__builtin_ia32_scattersiv8sf: + case X86::BI__builtin_ia32_scattersiv8si: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_scattersiv8df: + IID = Intrinsic::x86_avx512_mask_scatter_dpd_512; + break; + case X86::BI__builtin_ia32_scattersiv16sf: + IID = Intrinsic::x86_avx512_mask_scatter_dps_512; + break; + case X86::BI__builtin_ia32_scatterdiv8df: + IID = Intrinsic::x86_avx512_mask_scatter_qpd_512; + break; + case X86::BI__builtin_ia32_scatterdiv16sf: + IID = Intrinsic::x86_avx512_mask_scatter_qps_512; + break; + case X86::BI__builtin_ia32_scattersiv8di: + IID = Intrinsic::x86_avx512_mask_scatter_dpq_512; + break; + case X86::BI__builtin_ia32_scattersiv16si: + IID = Intrinsic::x86_avx512_mask_scatter_dpi_512; + break; + case X86::BI__builtin_ia32_scatterdiv8di: + IID = Intrinsic::x86_avx512_mask_scatter_qpq_512; + break; + case X86::BI__builtin_ia32_scatterdiv16si: + IID = Intrinsic::x86_avx512_mask_scatter_qpi_512; + break; + case X86::BI__builtin_ia32_scatterdiv2df: + IID = Intrinsic::x86_avx512_mask_scatterdiv2_df; + break; + case X86::BI__builtin_ia32_scatterdiv2di: + IID = Intrinsic::x86_avx512_mask_scatterdiv2_di; + break; + case X86::BI__builtin_ia32_scatterdiv4df: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_df; + break; + case X86::BI__builtin_ia32_scatterdiv4di: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_di; + break; + case X86::BI__builtin_ia32_scatterdiv4sf: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf; + break; + case X86::BI__builtin_ia32_scatterdiv4si: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_si; + break; + case X86::BI__builtin_ia32_scatterdiv8sf: + IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf; + break; + case X86::BI__builtin_ia32_scatterdiv8si: + IID = Intrinsic::x86_avx512_mask_scatterdiv8_si; + break; + case X86::BI__builtin_ia32_scattersiv2df: + IID = Intrinsic::x86_avx512_mask_scattersiv2_df; + break; + case X86::BI__builtin_ia32_scattersiv2di: + IID = Intrinsic::x86_avx512_mask_scattersiv2_di; + break; + case X86::BI__builtin_ia32_scattersiv4df: + IID = Intrinsic::x86_avx512_mask_scattersiv4_df; + break; + case X86::BI__builtin_ia32_scattersiv4di: + IID = Intrinsic::x86_avx512_mask_scattersiv4_di; + break; + case X86::BI__builtin_ia32_scattersiv4sf: + IID = Intrinsic::x86_avx512_mask_scattersiv4_sf; + break; + case X86::BI__builtin_ia32_scattersiv4si: + IID = Intrinsic::x86_avx512_mask_scattersiv4_si; + break; + case X86::BI__builtin_ia32_scattersiv8sf: + IID = Intrinsic::x86_avx512_mask_scattersiv8_sf; + break; + case X86::BI__builtin_ia32_scattersiv8si: + IID = Intrinsic::x86_avx512_mask_scattersiv8_si; + break; + } + + unsigned MinElts = std::min(Ops[2]->getType()->getVectorNumElements(), + Ops[3]->getType()->getVectorNumElements()); + Ops[1] = getMaskVecValue(*this, Ops[1], MinElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, Ops); + } + case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); @@ -10693,6 +11203,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_vpcomb: + case X86::BI__builtin_ia32_vpcomw: + case X86::BI__builtin_ia32_vpcomd: + case X86::BI__builtin_ia32_vpcomq: + return EmitX86vpcom(*this, Ops, true); + case X86::BI__builtin_ia32_vpcomub: + case X86::BI__builtin_ia32_vpcomuw: + case X86::BI__builtin_ia32_vpcomud: + case X86::BI__builtin_ia32_vpcomuq: + return EmitX86vpcom(*this, Ops, false); case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: @@ -11336,6 +11856,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpordsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); +// AVX512 bf16 intrinsics + case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { + Ops[2] = getMaskVecValue(*this, Ops[2], + Ops[0]->getType()->getVectorNumElements()); + Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; + return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + } + case X86::BI__emul: case X86::BI__emulu: { llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); @@ -11386,9 +11914,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); // return Builder.CreateCall(F, Ops); llvm::Type *Int128Ty = Builder.getInt128Ty(); - Value *Val = Builder.CreateOr( - Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64), - Builder.CreateZExt(Ops[0], Int128Ty)); + Value *HighPart128 = + Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64); + Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty); + Value *Val = Builder.CreateOr(HighPart128, LowPart128); Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), llvm::ConstantInt::get(Int128Ty, 0x3f)); Value *Res; @@ -11465,7 +11994,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI_AddressOfReturnAddress: { - Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); return Builder.CreateCall(F); } case X86::BI__stosb: { @@ -11484,9 +12013,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); - CallSite CS = Builder.CreateCall(IA); - CS.setAttributes(NoReturnAttr); - return CS.getInstruction(); + llvm::CallInst *CI = Builder.CreateCall(IA); + CI->setAttributes(NoReturnAttr); + return CI; } case X86::BI__readfsbyte: case X86::BI__readfsword: @@ -12001,7 +12530,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Z = EmitScalarExpr(E->getArg(2)); - llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, + llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, X->getType()); llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); @@ -12023,7 +12552,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); - llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, + llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, Src0->getType()); llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); @@ -12039,7 +12568,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType())); - Value *F = + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } @@ -12080,13 +12609,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_frexp_exp: case AMDGPU::BI__builtin_amdgcn_frexp_expf: { Value *Src0 = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, { Builder.getInt32Ty(), Src0->getType() }); return Builder.CreateCall(F, Src0); } case AMDGPU::BI__builtin_amdgcn_frexp_exph: { Value *Src0 = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, { Builder.getInt16Ty(), Src0->getType() }); return Builder.CreateCall(F, Src0); } @@ -12111,6 +12640,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); + case AMDGPU::BI__builtin_amdgcn_ds_append: + case AMDGPU::BI__builtin_amdgcn_ds_consume: { + Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? + Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; + Value *Src0 = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); + return Builder.CreateCall(F, { Src0, Builder.getFalse() }); + } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); @@ -12160,7 +12697,7 @@ static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, for (unsigned I = 0; I < NumArgs; ++I) Args[I] = CGF.EmitScalarExpr(E->getArg(I)); Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID); Value *Call = CGF.Builder.CreateCall(F, Args); Value *CC = CGF.Builder.CreateExtractValue(Call, 1); CGF.Builder.CreateStore(CC, CCPtr); @@ -12173,30 +12710,30 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, case SystemZ::BI__builtin_tbegin: { Value *TDB = EmitScalarExpr(E->getArg(0)); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tbegin_nofloat: { Value *TDB = EmitScalarExpr(E->getArg(0)); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tbeginc: { Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tabort: { Value *Data = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort); return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); } case SystemZ::BI__builtin_non_tx_store: { Value *Address = EmitScalarExpr(E->getArg(0)); Value *Data = EmitScalarExpr(E->getArg(1)); - Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); + Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); return Builder.CreateCall(F, {Data, Address}); } @@ -12488,8 +13025,252 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, } } -Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +namespace { +// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant. +struct NVPTXMmaLdstInfo { + unsigned NumResults; // Number of elements to load/store + // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported. + unsigned IID_col; + unsigned IID_row; +}; + +#define MMA_INTR(geom_op_type, layout) \ + Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride +#define MMA_LDST(n, geom_op_type) \ + { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) } + +static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { + switch (BuiltinID) { + // FP MMA loads + case NVPTX::BI__hmma_m16n16k16_ld_a: + return MMA_LDST(8, m16n16k16_load_a_f16); + case NVPTX::BI__hmma_m16n16k16_ld_b: + return MMA_LDST(8, m16n16k16_load_b_f16); + case NVPTX::BI__hmma_m16n16k16_ld_c_f16: + return MMA_LDST(4, m16n16k16_load_c_f16); + case NVPTX::BI__hmma_m16n16k16_ld_c_f32: + return MMA_LDST(8, m16n16k16_load_c_f32); + case NVPTX::BI__hmma_m32n8k16_ld_a: + return MMA_LDST(8, m32n8k16_load_a_f16); + case NVPTX::BI__hmma_m32n8k16_ld_b: + return MMA_LDST(8, m32n8k16_load_b_f16); + case NVPTX::BI__hmma_m32n8k16_ld_c_f16: + return MMA_LDST(4, m32n8k16_load_c_f16); + case NVPTX::BI__hmma_m32n8k16_ld_c_f32: + return MMA_LDST(8, m32n8k16_load_c_f32); + case NVPTX::BI__hmma_m8n32k16_ld_a: + return MMA_LDST(8, m8n32k16_load_a_f16); + case NVPTX::BI__hmma_m8n32k16_ld_b: + return MMA_LDST(8, m8n32k16_load_b_f16); + case NVPTX::BI__hmma_m8n32k16_ld_c_f16: + return MMA_LDST(4, m8n32k16_load_c_f16); + case NVPTX::BI__hmma_m8n32k16_ld_c_f32: + return MMA_LDST(8, m8n32k16_load_c_f32); + + // Integer MMA loads + case NVPTX::BI__imma_m16n16k16_ld_a_s8: + return MMA_LDST(2, m16n16k16_load_a_s8); + case NVPTX::BI__imma_m16n16k16_ld_a_u8: + return MMA_LDST(2, m16n16k16_load_a_u8); + case NVPTX::BI__imma_m16n16k16_ld_b_s8: + return MMA_LDST(2, m16n16k16_load_b_s8); + case NVPTX::BI__imma_m16n16k16_ld_b_u8: + return MMA_LDST(2, m16n16k16_load_b_u8); + case NVPTX::BI__imma_m16n16k16_ld_c: + return MMA_LDST(8, m16n16k16_load_c_s32); + case NVPTX::BI__imma_m32n8k16_ld_a_s8: + return MMA_LDST(4, m32n8k16_load_a_s8); + case NVPTX::BI__imma_m32n8k16_ld_a_u8: + return MMA_LDST(4, m32n8k16_load_a_u8); + case NVPTX::BI__imma_m32n8k16_ld_b_s8: + return MMA_LDST(1, m32n8k16_load_b_s8); + case NVPTX::BI__imma_m32n8k16_ld_b_u8: + return MMA_LDST(1, m32n8k16_load_b_u8); + case NVPTX::BI__imma_m32n8k16_ld_c: + return MMA_LDST(8, m32n8k16_load_c_s32); + case NVPTX::BI__imma_m8n32k16_ld_a_s8: + return MMA_LDST(1, m8n32k16_load_a_s8); + case NVPTX::BI__imma_m8n32k16_ld_a_u8: + return MMA_LDST(1, m8n32k16_load_a_u8); + case NVPTX::BI__imma_m8n32k16_ld_b_s8: + return MMA_LDST(4, m8n32k16_load_b_s8); + case NVPTX::BI__imma_m8n32k16_ld_b_u8: + return MMA_LDST(4, m8n32k16_load_b_u8); + case NVPTX::BI__imma_m8n32k16_ld_c: + return MMA_LDST(8, m8n32k16_load_c_s32); + + // Sub-integer MMA loads. + // Only row/col layout is supported by A/B fragments. + case NVPTX::BI__imma_m8n8k32_ld_a_s4: + return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)}; + case NVPTX::BI__imma_m8n8k32_ld_a_u4: + return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)}; + case NVPTX::BI__imma_m8n8k32_ld_b_s4: + return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0}; + case NVPTX::BI__imma_m8n8k32_ld_b_u4: + return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0}; + case NVPTX::BI__imma_m8n8k32_ld_c: + return MMA_LDST(2, m8n8k32_load_c_s32); + case NVPTX::BI__bmma_m8n8k128_ld_a_b1: + return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)}; + case NVPTX::BI__bmma_m8n8k128_ld_b_b1: + return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0}; + case NVPTX::BI__bmma_m8n8k128_ld_c: + return MMA_LDST(2, m8n8k128_load_c_s32); + + // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike + // PTX and LLVM IR where stores always use fragment D, NVCC builtins always + // use fragment C for both loads and stores. + // FP MMA stores. + case NVPTX::BI__hmma_m16n16k16_st_c_f16: + return MMA_LDST(4, m16n16k16_store_d_f16); + case NVPTX::BI__hmma_m16n16k16_st_c_f32: + return MMA_LDST(8, m16n16k16_store_d_f32); + case NVPTX::BI__hmma_m32n8k16_st_c_f16: + return MMA_LDST(4, m32n8k16_store_d_f16); + case NVPTX::BI__hmma_m32n8k16_st_c_f32: + return MMA_LDST(8, m32n8k16_store_d_f32); + case NVPTX::BI__hmma_m8n32k16_st_c_f16: + return MMA_LDST(4, m8n32k16_store_d_f16); + case NVPTX::BI__hmma_m8n32k16_st_c_f32: + return MMA_LDST(8, m8n32k16_store_d_f32); + + // Integer and sub-integer MMA stores. + // Another naming quirk. Unlike other MMA builtins that use PTX types in the + // name, integer loads/stores use LLVM's i32. + case NVPTX::BI__imma_m16n16k16_st_c_i32: + return MMA_LDST(8, m16n16k16_store_d_s32); + case NVPTX::BI__imma_m32n8k16_st_c_i32: + return MMA_LDST(8, m32n8k16_store_d_s32); + case NVPTX::BI__imma_m8n32k16_st_c_i32: + return MMA_LDST(8, m8n32k16_store_d_s32); + case NVPTX::BI__imma_m8n8k32_st_c_i32: + return MMA_LDST(2, m8n8k32_store_d_s32); + case NVPTX::BI__bmma_m8n8k128_st_c_i32: + return MMA_LDST(2, m8n8k128_store_d_s32); + + default: + llvm_unreachable("Unknown MMA builtin"); + } +} +#undef MMA_LDST +#undef MMA_INTR + + +struct NVPTXMmaInfo { + unsigned NumEltsA; + unsigned NumEltsB; + unsigned NumEltsC; + unsigned NumEltsD; + std::array<unsigned, 8> Variants; + + unsigned getMMAIntrinsic(int Layout, bool Satf) { + unsigned Index = Layout * 2 + Satf; + if (Index >= Variants.size()) + return 0; + return Variants[Index]; + } +}; + + // Returns an intrinsic that matches Layout and Satf for valid combinations of + // Layout and Satf, 0 otherwise. +static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { + // clang-format off +#define MMA_VARIANTS(geom, type) {{ \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ + }} +// Sub-integer MMA only supports row.col layout. +#define MMA_VARIANTS_I4(geom, type) {{ \ + 0, \ + 0, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ + 0, \ + 0, \ + 0, \ + 0 \ + }} +// b1 MMA does not support .satfinite. +#define MMA_VARIANTS_B1(geom, type) {{ \ + 0, \ + 0, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + 0, \ + 0, \ + 0, \ + 0, \ + 0 \ + }} + // clang-format on + switch (BuiltinID) { + // FP MMA + // Note that 'type' argument of MMA_VARIANT uses D_C notation, while + // NumEltsN of return value are ordered as A,B,C,D. + case NVPTX::BI__hmma_m16n16k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m16n16k16, f16_f16)}; + case NVPTX::BI__hmma_m16n16k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m16n16k16, f32_f16)}; + case NVPTX::BI__hmma_m16n16k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m16n16k16, f16_f32)}; + case NVPTX::BI__hmma_m16n16k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m16n16k16, f32_f32)}; + case NVPTX::BI__hmma_m32n8k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m32n8k16, f16_f16)}; + case NVPTX::BI__hmma_m32n8k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m32n8k16, f32_f16)}; + case NVPTX::BI__hmma_m32n8k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m32n8k16, f16_f32)}; + case NVPTX::BI__hmma_m32n8k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m32n8k16, f32_f32)}; + case NVPTX::BI__hmma_m8n32k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m8n32k16, f16_f16)}; + case NVPTX::BI__hmma_m8n32k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m8n32k16, f32_f16)}; + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m8n32k16, f16_f32)}; + case NVPTX::BI__hmma_m8n32k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m8n32k16, f32_f32)}; + + // Integer MMA + case NVPTX::BI__imma_m16n16k16_mma_s8: + return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, s8)}; + case NVPTX::BI__imma_m16n16k16_mma_u8: + return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, u8)}; + case NVPTX::BI__imma_m32n8k16_mma_s8: + return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, s8)}; + case NVPTX::BI__imma_m32n8k16_mma_u8: + return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, u8)}; + case NVPTX::BI__imma_m8n32k16_mma_s8: + return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, s8)}; + case NVPTX::BI__imma_m8n32k16_mma_u8: + return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, u8)}; + + // Sub-integer MMA + case NVPTX::BI__imma_m8n8k32_mma_s4: + return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, s4)}; + case NVPTX::BI__imma_m8n8k32_mma_u4: + return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, u4)}; + case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: + return {1, 1, 2, 2, MMA_VARIANTS_B1(m8n8k128, b1)}; + default: + llvm_unreachable("Unexpected builtin ID."); + } +#undef MMA_VARIANTS +#undef MMA_VARIANTS_I4 +#undef MMA_VARIANTS_B1 +} + +} // namespace + +Value * +CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { auto MakeLdg = [&](unsigned IntrinsicID) { Value *Ptr = EmitScalarExpr(E->getArg(0)); clang::CharUnits Align = @@ -12569,7 +13350,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Value *Val = EmitScalarExpr(E->getArg(1)); // atomicrmw only deals with integer arguments so we need to use // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. - Value *FnALAF32 = + Function *FnALAF32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); return Builder.CreateCall(FnALAF32, {Ptr, Val}); } @@ -12579,7 +13360,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Value *Val = EmitScalarExpr(E->getArg(1)); // atomicrmw only deals with integer arguments, so we need to use // LLVM's nvvm_atomic_load_add_f64 intrinsic. - Value *FnALAF64 = + Function *FnALAF64 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType()); return Builder.CreateCall(FnALAF64, {Ptr, Val}); } @@ -12587,7 +13368,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_atom_inc_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - Value *FnALI32 = + Function *FnALI32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); return Builder.CreateCall(FnALI32, {Ptr, Val}); } @@ -12595,7 +13376,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_atom_dec_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - Value *FnALD32 = + Function *FnALD32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); return Builder.CreateCall(FnALD32, {Ptr, Val}); } @@ -12752,6 +13533,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Builder.CreateStore(Pred, PredOutPtr); return Builder.CreateExtractValue(ResultPair, 0); } + + // FP MMA loads case NVPTX::BI__hmma_m16n16k16_ld_a: case NVPTX::BI__hmma_m16n16k16_ld_b: case NVPTX::BI__hmma_m16n16k16_ld_c_f16: @@ -12763,7 +13546,33 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m8n32k16_ld_a: case NVPTX::BI__hmma_m8n32k16_ld_b: case NVPTX::BI__hmma_m8n32k16_ld_c_f16: - case NVPTX::BI__hmma_m8n32k16_ld_c_f32: { + case NVPTX::BI__hmma_m8n32k16_ld_c_f32: + // Integer MMA loads. + case NVPTX::BI__imma_m16n16k16_ld_a_s8: + case NVPTX::BI__imma_m16n16k16_ld_a_u8: + case NVPTX::BI__imma_m16n16k16_ld_b_s8: + case NVPTX::BI__imma_m16n16k16_ld_b_u8: + case NVPTX::BI__imma_m16n16k16_ld_c: + case NVPTX::BI__imma_m32n8k16_ld_a_s8: + case NVPTX::BI__imma_m32n8k16_ld_a_u8: + case NVPTX::BI__imma_m32n8k16_ld_b_s8: + case NVPTX::BI__imma_m32n8k16_ld_b_u8: + case NVPTX::BI__imma_m32n8k16_ld_c: + case NVPTX::BI__imma_m8n32k16_ld_a_s8: + case NVPTX::BI__imma_m8n32k16_ld_a_u8: + case NVPTX::BI__imma_m8n32k16_ld_b_s8: + case NVPTX::BI__imma_m8n32k16_ld_b_u8: + case NVPTX::BI__imma_m8n32k16_ld_c: + // Sub-integer MMA loads. + case NVPTX::BI__imma_m8n8k32_ld_a_s4: + case NVPTX::BI__imma_m8n8k32_ld_a_u4: + case NVPTX::BI__imma_m8n8k32_ld_b_s4: + case NVPTX::BI__imma_m8n8k32_ld_b_u4: + case NVPTX::BI__imma_m8n8k32_ld_c: + case NVPTX::BI__bmma_m8n8k128_ld_a_b1: + case NVPTX::BI__bmma_m8n8k128_ld_b_b1: + case NVPTX::BI__bmma_m8n8k128_ld_c: + { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Value *Src = EmitScalarExpr(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -12771,82 +13580,28 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) return nullptr; bool isColMajor = isColMajorArg.getSExtValue(); - unsigned IID; - unsigned NumResults; - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m16n16k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m16n16k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m16n16k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m32n8k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m8n32k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride; - NumResults = 8; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } + NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); + unsigned IID = isColMajor ? II.IID_col : II.IID_row; + if (IID == 0) + return nullptr; + Value *Result = Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm}); // Save returned values. - for (unsigned i = 0; i < NumResults; ++i) { - Builder.CreateAlignedStore( - Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), - Dst.getElementType()), - Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), - CharUnits::fromQuantity(4)); + assert(II.NumResults); + if (II.NumResults == 1) { + Builder.CreateAlignedStore(Result, Dst.getPointer(), + CharUnits::fromQuantity(4)); + } else { + for (unsigned i = 0; i < II.NumResults; ++i) { + Builder.CreateAlignedStore( + Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), + Dst.getElementType()), + Builder.CreateGEP(Dst.getPointer(), + llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + } } return Result; } @@ -12856,7 +13611,12 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m32n8k16_st_c_f16: case NVPTX::BI__hmma_m32n8k16_st_c_f32: case NVPTX::BI__hmma_m8n32k16_st_c_f16: - case NVPTX::BI__hmma_m8n32k16_st_c_f32: { + case NVPTX::BI__hmma_m8n32k16_st_c_f32: + case NVPTX::BI__imma_m16n16k16_st_c_i32: + case NVPTX::BI__imma_m32n8k16_st_c_i32: + case NVPTX::BI__imma_m8n32k16_st_c_i32: + case NVPTX::BI__imma_m8n8k32_st_c_i32: + case NVPTX::BI__bmma_m8n8k128_st_c_i32: { Value *Dst = EmitScalarExpr(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -12864,45 +13624,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) return nullptr; bool isColMajor = isColMajorArg.getSExtValue(); - unsigned IID; - unsigned NumResults = 8; - // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet - // for some reason nvcc builtins use _c_. - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m16n16k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride; - break; - case NVPTX::BI__hmma_m32n8k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m32n8k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride; - break; - case NVPTX::BI__hmma_m8n32k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m8n32k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } - Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType()); + NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); + unsigned IID = isColMajor ? II.IID_col : II.IID_row; + if (IID == 0) + return nullptr; + Function *Intrinsic = + CGM.getIntrinsic(IID, Dst->getType()); llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); SmallVector<Value *, 10> Values = {Dst}; - for (unsigned i = 0; i < NumResults; ++i) { + for (unsigned i = 0; i < II.NumResults; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); @@ -12926,7 +13656,16 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m8n32k16_mma_f16f16: case NVPTX::BI__hmma_m8n32k16_mma_f32f16: case NVPTX::BI__hmma_m8n32k16_mma_f32f32: - case NVPTX::BI__hmma_m8n32k16_mma_f16f32: { + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: + case NVPTX::BI__imma_m16n16k16_mma_s8: + case NVPTX::BI__imma_m16n16k16_mma_u8: + case NVPTX::BI__imma_m32n8k16_mma_s8: + case NVPTX::BI__imma_m32n8k16_mma_u8: + case NVPTX::BI__imma_m8n32k16_mma_s8: + case NVPTX::BI__imma_m8n32k16_mma_u8: + case NVPTX::BI__imma_m8n8k32_mma_s4: + case NVPTX::BI__imma_m8n8k32_mma_u4: + case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Address SrcA = EmitPointerWithAlignment(E->getArg(1)); Address SrcB = EmitPointerWithAlignment(E->getArg(2)); @@ -12938,119 +13677,40 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (Layout < 0 || Layout > 3) return nullptr; llvm::APSInt SatfArg; - if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) + if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1) + SatfArg = 0; // .b1 does not have satf argument. + else if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) return nullptr; bool Satf = SatfArg.getSExtValue(); - - // clang-format off -#define MMA_VARIANTS(geom, type) {{ \ - Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ - }} - // clang-format on - - auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) { - unsigned Index = Layout * 2 + Satf; - assert(Index < 8); - return Variants[Index]; - }; - unsigned IID; - unsigned NumEltsC; - unsigned NumEltsD; - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } -#undef MMA_VARIANTS + NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID); + unsigned IID = MI.getMMAIntrinsic(Layout, Satf); + if (IID == 0) // Unsupported combination of Layout/Satf. + return nullptr; SmallVector<Value *, 24> Values; Function *Intrinsic = CGM.getIntrinsic(IID); - llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0); + llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0); // Load A - for (unsigned i = 0; i < 8; ++i) { + for (unsigned i = 0; i < MI.NumEltsA; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcA.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); - Values.push_back(Builder.CreateBitCast(V, ABType)); + Values.push_back(Builder.CreateBitCast(V, AType)); } // Load B - for (unsigned i = 0; i < 8; ++i) { + llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA); + for (unsigned i = 0; i < MI.NumEltsB; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcB.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); - Values.push_back(Builder.CreateBitCast(V, ABType)); + Values.push_back(Builder.CreateBitCast(V, BType)); } // Load C - llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16); - for (unsigned i = 0; i < NumEltsC; ++i) { + llvm::Type *CType = + Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB); + for (unsigned i = 0; i < MI.NumEltsC; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcC.getPointer(), llvm::ConstantInt::get(IntTy, i)), @@ -13059,7 +13719,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, } Value *Result = Builder.CreateCall(Intrinsic, Values); llvm::Type *DType = Dst.getElementType(); - for (unsigned i = 0; i < NumEltsD; ++i) + for (unsigned i = 0; i < MI.NumEltsD; ++i) Builder.CreateAlignedStore( Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), @@ -13077,7 +13737,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_memory_size: { llvm::Type *ResultType = ConvertType(E->getType()); Value *I = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); return Builder.CreateCall(Callee, I); } case WebAssembly::BI__builtin_wasm_memory_grow: { @@ -13086,37 +13746,61 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)) }; - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); return Builder.CreateCall(Callee, Args); } + case WebAssembly::BI__builtin_wasm_memory_init: { + llvm::APSInt SegConst; + if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + llvm::APSInt MemConst; + if (!E->getArg(1)->isIntegerConstantExpr(MemConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + if (!MemConst.isNullValue()) + ErrorUnsupported(E, "non-zero memory index"); + Value *Args[] = {llvm::ConstantInt::get(getLLVMContext(), SegConst), + llvm::ConstantInt::get(getLLVMContext(), MemConst), + EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), + EmitScalarExpr(E->getArg(4))}; + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_init); + return Builder.CreateCall(Callee, Args); + } + case WebAssembly::BI__builtin_wasm_data_drop: { + llvm::APSInt SegConst; + if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Value *Arg = llvm::ConstantInt::get(getLLVMContext(), SegConst); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop); + return Builder.CreateCall(Callee, {Arg}); + } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); return Builder.CreateCall(Callee, {Tag, Obj}); } - case WebAssembly::BI__builtin_wasm_rethrow: { - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); + case WebAssembly::BI__builtin_wasm_rethrow_in_catch: { + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow_in_catch); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_atomic_wait_i32: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } case WebAssembly::BI__builtin_wasm_atomic_wait_i64: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } case WebAssembly::BI__builtin_wasm_atomic_notify: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Count = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); return Builder.CreateCall(Callee, {Addr, Count}); } case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: @@ -13127,7 +13811,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } @@ -13139,7 +13823,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } @@ -13149,7 +13833,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_min_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::minimum, + Function *Callee = CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } @@ -13159,7 +13843,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_max_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::maximum, + Function *Callee = CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } @@ -13252,14 +13936,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_bitselect: { Value *V1 = EmitScalarExpr(E->getArg(0)); Value *V2 = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType())); return Builder.CreateCall(Callee, {V1, V2, C}); } @@ -13289,19 +13973,19 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, llvm_unreachable("unexpected builtin ID"); } Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); + Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_abs_f32x4: case WebAssembly::BI__builtin_wasm_abs_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_sqrt_f32x4: case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index 1c578bd151..42d2b15a4e 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -1,9 +1,8 @@ //===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,9 +15,10 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/Decl.h" +#include "clang/Basic/Cuda.h" +#include "clang/CodeGen/CodeGenABITypes.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/Format.h" @@ -42,17 +42,28 @@ private: /// Convenience reference to the current module llvm::Module &TheModule; /// Keeps track of kernel launch stubs emitted in this module - llvm::SmallVector<llvm::Function *, 16> EmittedKernels; - llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars; + struct KernelInfo { + llvm::Function *Kernel; + const Decl *D; + }; + llvm::SmallVector<KernelInfo, 16> EmittedKernels; + struct VarInfo { + llvm::GlobalVariable *Var; + const VarDecl *D; + unsigned Flag; + }; + llvm::SmallVector<VarInfo, 16> DeviceVars; /// Keeps track of variable containing handle of GPU binary. Populated by /// ModuleCtorFunction() and used to create corresponding cleanup calls in /// ModuleDtorFunction() llvm::GlobalVariable *GpuBinaryHandle = nullptr; /// Whether we generate relocatable device code. bool RelocatableDeviceCode; + /// Mangle context for device. + std::unique_ptr<MangleContext> DeviceMC; - llvm::Constant *getSetupArgumentFn() const; - llvm::Constant *getLaunchFn() const; + llvm::FunctionCallee getSetupArgumentFn() const; + llvm::FunctionCallee getLaunchFn() const; llvm::FunctionType *getRegisterGlobalsFnTy() const; llvm::FunctionType *getCallbackFnTy() const; @@ -104,14 +115,17 @@ private: return DummyFunc; } - void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args); + void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args); + void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args); + std::string getDeviceSideName(const Decl *ND); public: CGNVCUDARuntime(CodeGenModule &CGM); void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; - void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override { - DeviceVars.push_back(std::make_pair(&Var, Flags)); + void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, + unsigned Flags) override { + DeviceVars.push_back({&Var, VD, Flags}); } /// Creates module constructor function @@ -137,7 +151,9 @@ CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const { CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), TheModule(CGM.getModule()), - RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) { + RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode), + DeviceMC(CGM.getContext().createMangleContext( + CGM.getContext().getAuxTargetInfo())) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); @@ -150,7 +166,7 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) VoidPtrPtrTy = VoidPtrTy->getPointerTo(); } -llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { +llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const { // cudaError_t cudaSetupArgument(void *, size_t, size_t) llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy}; return CGM.CreateRuntimeFunction( @@ -158,7 +174,7 @@ llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { addPrefixToName("SetupArgument")); } -llvm::Constant *CGNVCUDARuntime::getLaunchFn() const { +llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const { if (CGM.getLangOpts().HIP) { // hipError_t hipLaunchByPtr(char *); return CGM.CreateRuntimeFunction( @@ -186,16 +202,133 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { return llvm::FunctionType::get(VoidTy, Params, false); } +std::string CGNVCUDARuntime::getDeviceSideName(const Decl *D) { + auto *ND = cast<const NamedDecl>(D); + std::string DeviceSideName; + if (DeviceMC->shouldMangleDeclName(ND)) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + DeviceMC->mangleName(ND, Out); + DeviceSideName = Out.str(); + } else + DeviceSideName = ND->getIdentifier()->getName(); + return DeviceSideName; +} + void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { - EmittedKernels.push_back(CGF.CurFn); - emitDeviceStubBody(CGF, Args); + assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() || + getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() || + CGF.CGM.getContext().getTargetInfo().getCXXABI() != + CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI()); + + EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); + if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), + CudaFeature::CUDA_USES_NEW_LAUNCH)) + emitDeviceStubBodyNew(CGF, Args); + else + emitDeviceStubBodyLegacy(CGF, Args); } -void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, - FunctionArgList &Args) { +// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local +// array and kernels are launched using cudaLaunchKernel(). +void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, + FunctionArgList &Args) { + // Build the shadow stack entry at the very start of the function. + + // Calculate amount of space we will need for all arguments. If we have no + // args, allocate a single pointer so we still have a valid pointer to the + // argument array that we can pass to runtime, even if it will be unused. + Address KernelArgs = CGF.CreateTempAlloca( + VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args", + llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size()))); + // Store pointers to the arguments in a locally allocated launch_args. + for (unsigned i = 0; i < Args.size(); ++i) { + llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer(); + llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy); + CGF.Builder.CreateDefaultAlignedStore( + VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i)); + } + + llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); + + // Lookup cudaLaunchKernel function. + // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + // void **args, size_t sharedMem, + // cudaStream_t stream); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + IdentifierInfo &cudaLaunchKernelII = + CGM.getContext().Idents.get("cudaLaunchKernel"); + FunctionDecl *cudaLaunchKernelFD = nullptr; + for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { + if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result)) + cudaLaunchKernelFD = FD; + } + + if (cudaLaunchKernelFD == nullptr) { + CGM.Error(CGF.CurFuncDecl->getLocation(), + "Can't find declaration for cudaLaunchKernel()"); + return; + } + // Create temporary dim3 grid_dim, block_dim. + ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1); + QualType Dim3Ty = GridDimParam->getType(); + Address GridDim = + CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim"); + Address BlockDim = + CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim"); + Address ShmemSize = + CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size"); + Address Stream = + CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream"); + llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, + {/*gridDim=*/GridDim.getType(), + /*blockDim=*/BlockDim.getType(), + /*ShmemSize=*/ShmemSize.getType(), + /*Stream=*/Stream.getType()}, + /*isVarArg=*/false), + "__cudaPopCallConfiguration"); + + CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn, + {GridDim.getPointer(), BlockDim.getPointer(), + ShmemSize.getPointer(), Stream.getPointer()}); + + // Emit the call to cudaLaunch + llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy); + CallArgList LaunchKernelArgs; + LaunchKernelArgs.add(RValue::get(Kernel), + cudaLaunchKernelFD->getParamDecl(0)->getType()); + LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty); + LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty); + LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()), + cudaLaunchKernelFD->getParamDecl(3)->getType()); + LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)), + cudaLaunchKernelFD->getParamDecl(4)->getType()); + LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)), + cudaLaunchKernelFD->getParamDecl(5)->getType()); + + QualType QT = cudaLaunchKernelFD->getType(); + QualType CQT = QT.getCanonicalType(); + llvm::Type *Ty = CGM.getTypes().ConvertType(CQT); + llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty); + + const CGFunctionInfo &FI = + CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); + llvm::FunctionCallee cudaLaunchKernelFn = + CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel"); + CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), + LaunchKernelArgs); + CGF.EmitBranch(EndBlock); + + CGF.EmitBlock(EndBlock); +} + +void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF, + FunctionArgList &Args) { // Emit a call to cudaSetupArgument for each arg in Args. - llvm::Constant *cudaSetupArgFn = getSetupArgumentFn(); + llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn(); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); CharUnits Offset = CharUnits::Zero(); for (const VarDecl *A : Args) { @@ -209,17 +342,17 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, llvm::ConstantInt::get(SizeTy, TyWidth.getQuantity()), llvm::ConstantInt::get(SizeTy, Offset.getQuantity()), }; - llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); + llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0); - llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero); + llvm::Value *CBZero = CGF.Builder.CreateICmpEQ(CB, Zero); llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next"); - CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock); + CGF.Builder.CreateCondBr(CBZero, NextBlock, EndBlock); CGF.EmitBlock(NextBlock); Offset += TyWidth; } // Emit the call to cudaLaunch - llvm::Constant *cudaLaunchFn = getLaunchFn(); + llvm::FunctionCallee cudaLaunchFn = getLaunchFn(); llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy); CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); CGF.EmitBranch(EndBlock); @@ -259,7 +392,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Type *RegisterFuncParams[] = { VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()}; - llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterFuncParams, false), addUnderscoredPrefixToName("RegisterFunction")); @@ -267,13 +400,19 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { // __cuda_register_globals() and generate __cudaRegisterFunction() call for // each emitted kernel. llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin(); - for (llvm::Function *Kernel : EmittedKernels) { - llvm::Constant *KernelName = makeConstantString(Kernel->getName()); + for (auto &&I : EmittedKernels) { + llvm::Constant *KernelName = makeConstantString(getDeviceSideName(I.D)); llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); llvm::Value *Args[] = { - &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy), - KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr, - NullPtr, NullPtr, NullPtr, + &GpuBinaryHandlePtr, + Builder.CreateBitCast(I.Kernel, VoidPtrTy), + KernelName, + KernelName, + llvm::ConstantInt::get(IntTy, -1), + NullPtr, + NullPtr, + NullPtr, + NullPtr, llvm::ConstantPointerNull::get(IntTy->getPointerTo())}; Builder.CreateCall(RegisterFunc, Args); } @@ -283,13 +422,13 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy, IntTy}; - llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterVarParams, false), addUnderscoredPrefixToName("RegisterVar")); - for (auto &Pair : DeviceVars) { - llvm::GlobalVariable *Var = Pair.first; - unsigned Flags = Pair.second; - llvm::Constant *VarName = makeConstantString(Var->getName()); + for (auto &&Info : DeviceVars) { + llvm::GlobalVariable *Var = Info.Var; + unsigned Flags = Info.Flag; + llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D)); uint64_t VarSize = CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); llvm::Value *Args[] = { @@ -329,10 +468,14 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { bool IsHIP = CGM.getLangOpts().HIP; + bool IsCUDA = CGM.getLangOpts().CUDA; // No need to generate ctors/dtors if there is no GPU binary. StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName; if (CudaGpuBinaryFileName.empty() && !IsHIP) return nullptr; + if ((IsHIP || (IsCUDA && !RelocatableDeviceCode)) && EmittedKernels.empty() && + DeviceVars.empty()) + return nullptr; // void __{cuda|hip}_register_globals(void* handle); llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); @@ -342,7 +485,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy()); // void ** __{cuda|hip}RegisterFatBinary(void *); - llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false), addUnderscoredPrefixToName("RegisterFatBinary")); // struct { int magic, int version, void * gpu_binary, void * dont_care }; @@ -516,6 +659,16 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // Call __cuda_register_globals(GpuBinaryHandle); if (RegisterGlobalsFunc) CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); + + // Call __cudaRegisterFatBinaryEnd(Handle) if this CUDA version needs it. + if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), + CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) { + // void __cudaRegisterFatBinaryEnd(void **); + llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), + "__cudaRegisterFatBinaryEnd"); + CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall); + } } else { // Generate a unique module ID. SmallString<64> ModuleID; @@ -532,7 +685,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // void *, void (*)(void **)) SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary"); RegisterLinkedBinaryName += ModuleID; - llvm::Constant *RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction( getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName); assert(RegisterGlobalsFunc && "Expecting at least dummy function!"); @@ -550,7 +703,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // extern "C" int atexit(void (*f)(void)); llvm::FunctionType *AtExitTy = llvm::FunctionType::get(IntTy, CleanupFn->getType(), false); - llvm::Constant *AtExitFunc = + llvm::FunctionCallee AtExitFunc = CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(), /*Local=*/true); CtorBuilder.CreateCall(AtExitFunc, CleanupFn); @@ -585,7 +738,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { return nullptr; // void __cudaUnregisterFatBinary(void ** handle); - llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), addUnderscoredPrefixToName("UnregisterFatBinary")); diff --git a/lib/CodeGen/CGCUDARuntime.cpp b/lib/CodeGen/CGCUDARuntime.cpp index 1936f9f136..c14a9d3f2b 100644 --- a/lib/CodeGen/CGCUDARuntime.cpp +++ b/lib/CodeGen/CGCUDARuntime.cpp @@ -1,9 +1,8 @@ //===----- CGCUDARuntime.cpp - Interface to CUDA Runtimes -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGCUDARuntime.h b/lib/CodeGen/CGCUDARuntime.h index 0168f4f9e9..ada6734a56 100644 --- a/lib/CodeGen/CGCUDARuntime.h +++ b/lib/CodeGen/CGCUDARuntime.h @@ -1,9 +1,8 @@ //===----- CGCUDARuntime.h - Interface to CUDA Runtimes ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -24,6 +23,7 @@ class GlobalVariable; namespace clang { class CUDAKernelCallExpr; +class VarDecl; namespace CodeGen { @@ -53,7 +53,8 @@ public: /// Emits a kernel launch stub. virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0; - virtual void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) = 0; + virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, + unsigned Flags) = 0; /// Constructs and returns a module initialization function or nullptr if it's /// not needed. Must be called after all kernels have been emitted. diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index 8b0733fbec..adaeacfe86 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -1,9 +1,8 @@ //===--- CGCXX.cpp - Emit LLVM Code for declarations ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -204,55 +203,44 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { return false; } -llvm::Function *CodeGenModule::codegenCXXStructor(const CXXMethodDecl *MD, - StructorType Type) { - const CGFunctionInfo &FnInfo = - getTypes().arrangeCXXStructorDeclaration(MD, Type); +llvm::Function *CodeGenModule::codegenCXXStructor(GlobalDecl GD) { + const CGFunctionInfo &FnInfo = getTypes().arrangeCXXStructorDeclaration(GD); auto *Fn = cast<llvm::Function>( - getAddrOfCXXStructor(MD, Type, &FnInfo, /*FnType=*/nullptr, + getAddrOfCXXStructor(GD, &FnInfo, /*FnType=*/nullptr, /*DontDefer=*/true, ForDefinition)); - GlobalDecl GD; - if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD)) { - GD = GlobalDecl(DD, toCXXDtorType(Type)); - } else { - const auto *CD = cast<CXXConstructorDecl>(MD); - GD = GlobalDecl(CD, toCXXCtorType(Type)); - } - setFunctionLinkage(GD, Fn); CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo); setNonAliasAttributes(GD, Fn); - SetLLVMFunctionAttributesForDefinition(MD, Fn); + SetLLVMFunctionAttributesForDefinition(cast<CXXMethodDecl>(GD.getDecl()), Fn); return Fn; } -llvm::Constant *CodeGenModule::getAddrOfCXXStructor( - const CXXMethodDecl *MD, StructorType Type, const CGFunctionInfo *FnInfo, - llvm::FunctionType *FnType, bool DontDefer, - ForDefinition_t IsForDefinition) { - GlobalDecl GD; - if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { - GD = GlobalDecl(CD, toCXXCtorType(Type)); - } else { +llvm::FunctionCallee CodeGenModule::getAddrAndTypeOfCXXStructor( + GlobalDecl GD, const CGFunctionInfo *FnInfo, llvm::FunctionType *FnType, + bool DontDefer, ForDefinition_t IsForDefinition) { + auto *MD = cast<CXXMethodDecl>(GD.getDecl()); + + if (isa<CXXDestructorDecl>(MD)) { // Always alias equivalent complete destructors to base destructors in the // MS ABI. if (getTarget().getCXXABI().isMicrosoft() && - Type == StructorType::Complete && MD->getParent()->getNumVBases() == 0) - Type = StructorType::Base; - GD = GlobalDecl(cast<CXXDestructorDecl>(MD), toCXXDtorType(Type)); + GD.getDtorType() == Dtor_Complete && + MD->getParent()->getNumVBases() == 0) + GD = GD.getWithDtorType(Dtor_Base); } if (!FnType) { if (!FnInfo) - FnInfo = &getTypes().arrangeCXXStructorDeclaration(MD, Type); + FnInfo = &getTypes().arrangeCXXStructorDeclaration(GD); FnType = getTypes().GetFunctionType(*FnInfo); } - return GetOrCreateLLVMFunction( + llvm::Constant *Ptr = GetOrCreateLLVMFunction( getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer, /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition); + return {FnType, Ptr}; } static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, @@ -312,7 +300,7 @@ CodeGenFunction::BuildAppleKextVirtualDestructorCall( assert(DD->isVirtual() && Type != Dtor_Base); // Compute the function type we're calling. const CGFunctionInfo &FInfo = CGM.getTypes().arrangeCXXStructorDeclaration( - DD, StructorType::Complete); + GlobalDecl(DD, Dtor_Complete)); llvm::Type *Ty = CGM.getTypes().GetFunctionType(FInfo); return ::BuildAppleKextVirtualCall(*this, GlobalDecl(DD, Type), Ty, RD); } diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp index ed168b1ce7..9c3973fb9f 100644 --- a/lib/CodeGen/CGCXXABI.cpp +++ b/lib/CodeGen/CGCXXABI.cpp @@ -1,9 +1,8 @@ //===----- CGCXXABI.cpp - Interface to C++ ABIs ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -29,12 +28,6 @@ void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) { << S; } -bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const { - // We can only copy the argument if there exists at least one trivial, - // non-deleted copy or move constructor. - return RD->canPassInRegisters(); -} - llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) { return llvm::Constant::getNullValue(CGM.getTypes().ConvertType(T)); } diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h index 65b50e14f4..511bcd00d4 100644 --- a/lib/CodeGen/CGCXXABI.h +++ b/lib/CodeGen/CGCXXABI.h @@ -1,9 +1,8 @@ //===----- CGCXXABI.h - Interface to C++ ABIs -------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -137,10 +136,6 @@ public: RAA_Indirect }; - /// Returns true if C++ allows us to copy the memory of an object of type RD - /// when it is passed as an argument. - bool canCopyArgument(const CXXRecordDecl *RD) const; - /// Returns how an argument of the given record type should be passed. virtual RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const = 0; @@ -310,7 +305,7 @@ public: /// adding any required parameters. For convenience, ArgTys has been /// initialized with the type of 'this'. virtual AddedStructorArgs - buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) = 0; /// Returns true if the given destructor type should be emitted as a linkonce @@ -557,7 +552,7 @@ public: /// \param Dtor - a function taking a single pointer argument /// \param Addr - a pointer to pass to the destructor function. virtual void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, - llvm::Constant *Dtor, + llvm::FunctionCallee Dtor, llvm::Constant *Addr) = 0; /*************************** thread_local initialization ********************/ @@ -589,7 +584,7 @@ public: /// Emit a single constructor/destructor with the given type from a C++ /// constructor Decl. - virtual void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) = 0; + virtual void emitCXXStructor(GlobalDecl GD) = 0; /// Load a vtable from This, an object of polymorphic type RD, or from one of /// its virtual bases if it does not have its own vtable. Returns the vtable diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 7d494bb1f1..bc9be14ede 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -1,9 +1,8 @@ //===--- CGCall.cpp - Encapsulate calling convention details --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -32,7 +31,6 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -69,12 +67,19 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { } /// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR -/// qualification. -static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD, - const CXXMethodDecl *MD) { - QualType RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal(); +/// qualification. Either or both of RD and MD may be null. A null RD indicates +/// that there is no meaningful 'this' type, and a null MD can occur when +/// calling a method pointer. +CanQualType CodeGenTypes::DeriveThisType(const CXXRecordDecl *RD, + const CXXMethodDecl *MD) { + QualType RecTy; + if (RD) + RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal(); + else + RecTy = Context.VoidTy; + if (MD) - RecTy = Context.getAddrSpaceQualType(RecTy, MD->getTypeQualifiers().getAddressSpace()); + RecTy = Context.getAddrSpaceQualType(RecTy, MD->getMethodQualifiers().getAddressSpace()); return Context.getPointerType(CanQualType::CreateUnsafe(RecTy)); } @@ -169,11 +174,9 @@ static void appendParameterTypes(const CodeGenTypes &CGT, static const CGFunctionInfo & arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, SmallVectorImpl<CanQualType> &prefix, - CanQual<FunctionProtoType> FTP, - const FunctionDecl *FD) { + CanQual<FunctionProtoType> FTP) { SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; - RequiredArgs Required = - RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD); + RequiredArgs Required = RequiredArgs::forPrototypePlus(FTP, prefix.size()); // FIXME: Kill copy. appendParameterTypes(CGT, prefix, paramInfos, FTP); CanQualType resultType = FTP->getReturnType().getUnqualifiedType(); @@ -187,11 +190,10 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, /// Arrange the argument and result information for a value of the /// given freestanding function type. const CGFunctionInfo & -CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP, - const FunctionDecl *FD) { +CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) { SmallVector<CanQualType, 16> argTypes; return ::arrangeLLVMFunctionInfo(*this, /*instanceMethod=*/false, argTypes, - FTP, FD); + FTP); } static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { @@ -240,7 +242,7 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { /// Arrange the argument and result information for a call to an /// unknown C++ non-static member function of the given abstract type. -/// (Zero value of RD means we don't have any meaningful "this" argument type, +/// (A null RD means we don't have any meaningful "this" argument type, /// so fall back to a generic pointer type). /// The member function must be an ordinary function, i.e. not a /// constructor or destructor. @@ -251,14 +253,11 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD, SmallVector<CanQualType, 16> argTypes; // Add the 'this' pointer. - if (RD) - argTypes.push_back(GetThisType(Context, RD, MD)); - else - argTypes.push_back(Context.VoidPtrTy); + argTypes.push_back(DeriveThisType(RD, MD)); return ::arrangeLLVMFunctionInfo( *this, true, argTypes, - FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD); + FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>()); } /// Set calling convention for CUDA/HIP kernel. @@ -290,7 +289,7 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) { return arrangeCXXMethodType(ThisType, prototype.getTypePtr(), MD); } - return arrangeFreeFunctionType(prototype, MD); + return arrangeFreeFunctionType(prototype); } bool CodeGenTypes::inheritingCtorHasParams( @@ -300,29 +299,23 @@ bool CodeGenTypes::inheritingCtorHasParams( return Type == Ctor_Complete || !Inherited.getShadowDecl()->constructsVirtualBase() || !Target.getCXXABI().hasConstructorVariants(); - } +} const CGFunctionInfo & -CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, - StructorType Type) { +CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) { + auto *MD = cast<CXXMethodDecl>(GD.getDecl()); SmallVector<CanQualType, 16> argTypes; SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; - argTypes.push_back(GetThisType(Context, MD->getParent(), MD)); + argTypes.push_back(DeriveThisType(MD->getParent(), MD)); bool PassParams = true; - GlobalDecl GD; if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { - GD = GlobalDecl(CD, toCXXCtorType(Type)); - // A base class inheriting constructor doesn't get forwarded arguments // needed to construct a virtual base (or base class thereof). if (auto Inherited = CD->getInheritedConstructor()) - PassParams = inheritingCtorHasParams(Inherited, toCXXCtorType(Type)); - } else { - auto *DD = dyn_cast<CXXDestructorDecl>(MD); - GD = GlobalDecl(DD, toCXXDtorType(Type)); + PassParams = inheritingCtorHasParams(Inherited, GD.getCtorType()); } CanQual<FunctionProtoType> FTP = GetFormalType(MD); @@ -332,7 +325,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, appendParameterTypes(*this, argTypes, paramInfos, FTP); CGCXXABI::AddedStructorArgs AddedArgs = - TheCXXABI.buildStructorSignature(MD, Type, argTypes); + TheCXXABI.buildStructorSignature(GD, argTypes); if (!paramInfos.empty()) { // Note: prefix implies after the first param. if (AddedArgs.Prefix) @@ -408,8 +401,11 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, unsigned TotalPrefixArgs = 1 + ExtraPrefixArgs; CanQual<FunctionProtoType> FPT = GetFormalType(D); - RequiredArgs Required = - RequiredArgs::forPrototypePlus(FPT, TotalPrefixArgs + ExtraSuffixArgs, D); + RequiredArgs Required = PassProtoArgs + ? RequiredArgs::forPrototypePlus( + FPT, TotalPrefixArgs + ExtraSuffixArgs) + : RequiredArgs::All; + GlobalDecl GD(D, CtorKind); CanQualType ResultType = TheCXXABI.HasThisReturn(GD) ? ArgTypes.front() @@ -452,7 +448,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All); } - return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>(), FD); + return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>()); } /// Arrange the argument and result information for the declaration or @@ -517,11 +513,9 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) { // FIXME: Do we need to handle ObjCMethodDecl? const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); - if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD)) - return arrangeCXXStructorDeclaration(CD, getFromCtorType(GD.getCtorType())); - - if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(FD)) - return arrangeCXXStructorDeclaration(DD, getFromDtorType(GD.getDtorType())); + if (isa<CXXConstructorDecl>(GD.getDecl()) || + isa<CXXDestructorDecl>(GD.getDecl())) + return arrangeCXXStructorDeclaration(GD); return arrangeFunctionDeclaration(FD); } @@ -535,7 +529,7 @@ const CGFunctionInfo & CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) { assert(MD->isVirtual() && "only methods have thunks"); CanQual<FunctionProtoType> FTP = GetFormalType(MD); - CanQualType ArgTys[] = { GetThisType(Context, MD->getParent(), MD) }; + CanQualType ArgTys[] = {DeriveThisType(MD->getParent(), MD)}; return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, ArgTys, FTP->getExtInfo(), {}, RequiredArgs(1)); @@ -549,7 +543,7 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD, CanQual<FunctionProtoType> FTP = GetFormalType(CD); SmallVector<CanQualType, 2> ArgTys; const CXXRecordDecl *RD = CD->getParent(); - ArgTys.push_back(GetThisType(Context, RD, CD)); + ArgTys.push_back(DeriveThisType(RD, CD)); if (CT == Ctor_CopyingClosure) ArgTys.push_back(*FTP->param_type_begin()); if (RD->getNumVBases() > 0) @@ -582,7 +576,7 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, // extra prefix plus the arguments in the prototype. if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) { if (proto->isVariadic()) - required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs); + required = RequiredArgs::forPrototypePlus(proto, numExtraRequiredArgs); if (proto->hasExtParameterInfos()) addExtParameterInfosForCall(paramInfos, proto, numExtraRequiredArgs, @@ -635,11 +629,10 @@ CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto, auto paramInfos = getExtParameterInfosForCall(proto, 1, params.size()); auto argTypes = getArgTypesForDeclaration(Context, params); - return arrangeLLVMFunctionInfo( - GetReturnType(proto->getReturnType()), - /*instanceMethod*/ false, /*chainCall*/ false, argTypes, - proto->getExtInfo(), paramInfos, - RequiredArgs::forPrototypePlus(proto, 1, nullptr)); + return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()), + /*instanceMethod*/ false, /*chainCall*/ false, + argTypes, proto->getExtInfo(), paramInfos, + RequiredArgs::forPrototypePlus(proto, 1)); } const CGFunctionInfo & @@ -808,6 +801,8 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, ArrayRef<CanQualType> argTypes, RequiredArgs required) { assert(paramInfos.empty() || paramInfos.size() == argTypes.size()); + assert(!required.allowsOptionalArgs() || + required.getNumRequiredArgs() <= argTypes.size()); void *buffer = operator new(totalSizeToAlloc<ArgInfo, ExtParameterInfo>( @@ -1148,7 +1143,7 @@ EnterStructPointerForCoercedAccess(Address SrcPtr, return SrcPtr; // GEP into the first element. - SrcPtr = CGF.Builder.CreateStructGEP(SrcPtr, 0, CharUnits(), "coerce.dive"); + SrcPtr = CGF.Builder.CreateStructGEP(SrcPtr, 0, "coerce.dive"); // If the first element is a struct, recurse. llvm::Type *SrcTy = SrcPtr.getElementType(); @@ -1276,12 +1271,8 @@ static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val, // Prefer scalar stores to first-class aggregate stores. if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) { - const llvm::StructLayout *Layout = - CGF.CGM.getDataLayout().getStructLayout(STy); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto EltOffset = CharUnits::fromQuantity(Layout->getElementOffset(i)); - Address EltPtr = CGF.Builder.CreateStructGEP(Dest, i, EltOffset); + Address EltPtr = CGF.Builder.CreateStructGEP(Dest, i); llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i); CGF.Builder.CreateStore(Elt, EltPtr, DestIsVolatile); } @@ -1682,13 +1673,7 @@ llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) { if (!isFuncTypeConvertible(FPT)) return llvm::StructType::get(getLLVMContext()); - const CGFunctionInfo *Info; - if (isa<CXXDestructorDecl>(MD)) - Info = - &arrangeCXXStructorDeclaration(MD, getFromDtorType(GD.getDtorType())); - else - Info = &arrangeCXXMethodDeclaration(MD); - return GetFunctionType(*Info); + return GetFunctionType(GD); } static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, @@ -1793,8 +1778,6 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, if (CodeGenOpts.Backchain) FuncAttrs.addAttribute("backchain"); - // FIXME: The interaction of this attribute with the SLH command line flag - // has not been determined. if (CodeGenOpts.SpeculativeLoadHardening) FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); } @@ -1826,8 +1809,7 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { llvm::AttrBuilder FuncAttrs; - ConstructDefaultFnAttrList(F.getName(), - F.hasFnAttribute(llvm::Attribute::OptimizeNone), + ConstructDefaultFnAttrList(F.getName(), F.hasOptNone(), /* AttrOnCallsite = */ false, FuncAttrs); F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); } @@ -1864,8 +1846,6 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate); if (TargetDecl->hasAttr<ConvergentAttr>()) FuncAttrs.addAttribute(llvm::Attribute::Convergent); - if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>()) - FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { AddAttributesFromFunctionProtoType( @@ -1910,6 +1890,16 @@ void CodeGenModule::ConstructAttributeList( ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs); + // This must run after constructing the default function attribute list + // to ensure that the speculative load hardening attribute is removed + // in the case where the -mspeculative-load-hardening flag was passed. + if (TargetDecl) { + if (TargetDecl->hasAttr<NoSpeculativeLoadHardeningAttr>()) + FuncAttrs.removeAttribute(llvm::Attribute::SpeculativeLoadHardening); + if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>()) + FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); + } + if (CodeGenOpts.EnableSegmentedStacks && !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>())) FuncAttrs.addAttribute("split-stack"); @@ -2009,8 +1999,7 @@ void CodeGenModule::ConstructAttributeList( // Attach attributes to sret. if (IRFunctionArgs.hasSRetArg()) { llvm::AttrBuilder SRETAttrs; - if (!RetAI.getSuppressSRet()) - SRETAttrs.addAttribute(llvm::Attribute::StructRet); + SRETAttrs.addAttribute(llvm::Attribute::StructRet); hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); @@ -2262,9 +2251,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // If we're using inalloca, all the memory arguments are GEPs off of the last // parameter, which is a pointer to the complete memory area. Address ArgStruct = Address::invalid(); - const llvm::StructLayout *ArgStructLayout = nullptr; if (IRFunctionArgs.hasInallocaArg()) { - ArgStructLayout = CGM.getDataLayout().getStructLayout(FI.getArgStruct()); ArgStruct = Address(FnArgs[IRFunctionArgs.getInallocaArgNo()], FI.getArgStructAlignment()); @@ -2313,10 +2300,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, case ABIArgInfo::InAlloca: { assert(NumIRArgs == 0); auto FieldIndex = ArgI.getInAllocaFieldIndex(); - CharUnits FieldOffset = - CharUnits::fromQuantity(ArgStructLayout->getElementOffset(FieldIndex)); - Address V = Builder.CreateStructGEP(ArgStruct, FieldIndex, FieldOffset, - Arg->getName()); + Address V = + Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName()); ArgVals.push_back(ParamValue::forIndirect(V)); break; } @@ -2476,7 +2461,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType()); if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy && STy->getNumElements() > 1) { - auto SrcLayout = CGM.getDataLayout().getStructLayout(STy); uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy); llvm::Type *DstTy = Ptr.getElementType(); uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy); @@ -2493,9 +2477,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { auto AI = FnArgs[FirstIRArg + i]; AI->setName(Arg->getName() + ".coerce" + Twine(i)); - auto Offset = CharUnits::fromQuantity(SrcLayout->getElementOffset(i)); - Address EltPtr = - Builder.CreateStructGEP(AddrToStoreInto, i, Offset); + Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); Builder.CreateStore(AI, EltPtr); } @@ -2531,7 +2513,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, auto coercionType = ArgI.getCoerceAndExpandType(); alloca = Builder.CreateElementBitCast(alloca, coercionType); - auto layout = CGM.getDataLayout().getStructLayout(coercionType); unsigned argIndex = FirstIRArg; for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { @@ -2539,7 +2520,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; - auto eltAddr = Builder.CreateStructGEP(alloca, i, layout); + auto eltAddr = Builder.CreateStructGEP(alloca, i); auto elt = FnArgs[argIndex++]; Builder.CreateStore(elt, eltAddr); } @@ -2891,15 +2872,6 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, RV = SI->getValueOperand(); SI->eraseFromParent(); - // If that was the only use of the return value, nuke it as well now. - auto returnValueInst = ReturnValue.getPointer(); - if (returnValueInst->use_empty()) { - if (auto alloca = dyn_cast<llvm::AllocaInst>(returnValueInst)) { - alloca->eraseFromParent(); - ReturnValue = Address::invalid(); - } - } - // Otherwise, we have to do a simple load. } else { RV = Builder.CreateLoad(ReturnValue); @@ -2944,7 +2916,6 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, case ABIArgInfo::CoerceAndExpand: { auto coercionType = RetAI.getCoerceAndExpandType(); - auto layout = CGM.getDataLayout().getStructLayout(coercionType); // Load all of the coerced elements out into results. llvm::SmallVector<llvm::Value*, 4> results; @@ -2954,7 +2925,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType)) continue; - auto eltAddr = Builder.CreateStructGEP(addr, i, layout); + auto eltAddr = Builder.CreateStructGEP(addr, i); auto elt = Builder.CreateLoad(eltAddr); results.push_back(elt); } @@ -3368,7 +3339,7 @@ void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) { void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const { if (StackBase) { // Restore the stack after the call. - llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); + llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); CGF.Builder.CreateCall(F, StackBase); } } @@ -3455,7 +3426,8 @@ void CodeGenFunction::EmitCallArgs( auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); assert(EmittedArg.getScalarVal() && "We emitted nothing for the arg?"); llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T, - EmittedArg.getScalarVal()); + EmittedArg.getScalarVal(), + PS->isDynamic()); Args.add(RValue::get(V), SizeTy); // If we're emitting args in reverse, be sure to do so with // pass_object_size, as well. @@ -3678,15 +3650,15 @@ CodeGenFunction::AddObjCARCExceptionMetadata(llvm::Instruction *Inst) { /// Emits a call to the given no-arguments nounwind runtime function. llvm::CallInst * -CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee, +CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const llvm::Twine &name) { return EmitNounwindRuntimeCall(callee, None, name); } /// Emits a call to the given nounwind runtime function. llvm::CallInst * -CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, +CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, const llvm::Twine &name) { llvm::CallInst *call = EmitRuntimeCall(callee, args, name); call->setDoesNotThrow(); @@ -3695,9 +3667,8 @@ CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee, /// Emits a simple call (never an invoke) to the given no-arguments /// runtime function. -llvm::CallInst * -CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, - const llvm::Twine &name) { +llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, + const llvm::Twine &name) { return EmitRuntimeCall(callee, None, name); } @@ -3721,21 +3692,20 @@ CodeGenFunction::getBundlesForFunclet(llvm::Value *Callee) { } /// Emits a simple call (never an invoke) to the given runtime function. -llvm::CallInst * -CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, - const llvm::Twine &name) { - llvm::CallInst *call = - Builder.CreateCall(callee, args, getBundlesForFunclet(callee), name); +llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, + const llvm::Twine &name) { + llvm::CallInst *call = Builder.CreateCall( + callee, args, getBundlesForFunclet(callee.getCallee()), name); call->setCallingConv(getRuntimeCC()); return call; } /// Emits a call or invoke to the given noreturn runtime function. -void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, - ArrayRef<llvm::Value*> args) { +void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke( + llvm::FunctionCallee callee, ArrayRef<llvm::Value *> args) { SmallVector<llvm::OperandBundleDef, 1> BundleList = - getBundlesForFunclet(callee); + getBundlesForFunclet(callee.getCallee()); if (getInvokeDest()) { llvm::InvokeInst *invoke = @@ -3755,33 +3725,32 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, } /// Emits a call or invoke instruction to the given nullary runtime function. -llvm::CallSite -CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee, +llvm::CallBase * +CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, const Twine &name) { return EmitRuntimeCallOrInvoke(callee, None, name); } /// Emits a call or invoke instruction to the given runtime function. -llvm::CallSite -CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee, - ArrayRef<llvm::Value*> args, +llvm::CallBase * +CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, const Twine &name) { - llvm::CallSite callSite = EmitCallOrInvoke(callee, args, name); - callSite.setCallingConv(getRuntimeCC()); - return callSite; + llvm::CallBase *call = EmitCallOrInvoke(callee, args, name); + call->setCallingConv(getRuntimeCC()); + return call; } /// Emits a call or invoke instruction to the given function, depending /// on the current state of the EH stack. -llvm::CallSite -CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee, - ArrayRef<llvm::Value *> Args, - const Twine &Name) { +llvm::CallBase *CodeGenFunction::EmitCallOrInvoke(llvm::FunctionCallee Callee, + ArrayRef<llvm::Value *> Args, + const Twine &Name) { llvm::BasicBlock *InvokeDest = getInvokeDest(); SmallVector<llvm::OperandBundleDef, 1> BundleList = - getBundlesForFunclet(Callee); + getBundlesForFunclet(Callee.getCallee()); - llvm::Instruction *Inst; + llvm::CallBase *Inst; if (!InvokeDest) Inst = Builder.CreateCall(Callee, Args, BundleList, Name); else { @@ -3796,7 +3765,7 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee, if (CGM.getLangOpts().ObjCAutoRefCount) AddObjCARCExceptionMetadata(Inst); - return llvm::CallSite(Inst); + return Inst; } void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old, @@ -3808,7 +3777,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &CallArgs, - llvm::Instruction **callOrInvoke, + llvm::CallBase **callOrInvoke, SourceLocation Loc) { // FIXME: We no longer need the types from CallArgs; lift up and simplify. @@ -3819,17 +3788,36 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, QualType RetTy = CallInfo.getReturnType(); const ABIArgInfo &RetAI = CallInfo.getReturnInfo(); - llvm::FunctionType *IRFuncTy = Callee.getFunctionType(); + llvm::FunctionType *IRFuncTy = getTypes().GetFunctionType(CallInfo); + + const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); + +#ifndef NDEBUG + if (!(CallInfo.isVariadic() && CallInfo.getArgStruct())) { + // For an inalloca varargs function, we don't expect CallInfo to match the + // function pointer's type, because the inalloca struct a will have extra + // fields in it for the varargs parameters. Code later in this function + // bitcasts the function pointer to the type derived from CallInfo. + // + // In other cases, we assert that the types match up (until pointers stop + // having pointee types). + llvm::Type *TypeFromVal; + if (Callee.isVirtual()) + TypeFromVal = Callee.getVirtualFunctionType(); + else + TypeFromVal = + Callee.getFunctionPointer()->getType()->getPointerElementType(); + assert(IRFuncTy == TypeFromVal); + } +#endif // 1. Set up the arguments. // If we're using inalloca, insert the allocation after the stack save. // FIXME: Do this earlier rather than hacking it in here! Address ArgMemory = Address::invalid(); - const llvm::StructLayout *ArgMemoryLayout = nullptr; if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) { const llvm::DataLayout &DL = CGM.getDataLayout(); - ArgMemoryLayout = DL.getStructLayout(ArgStruct); llvm::Instruction *IP = CallArgs.getStackBase(); llvm::AllocaInst *AI; if (IP) { @@ -3846,13 +3834,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, ArgMemory = Address(AI, Align); } - // Helper function to drill into the inalloca allocation. - auto createInAllocaStructGEP = [&](unsigned FieldIndex) -> Address { - auto FieldOffset = - CharUnits::fromQuantity(ArgMemoryLayout->getElementOffset(FieldIndex)); - return Builder.CreateStructGEP(ArgMemory, FieldIndex, FieldOffset); - }; - ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), CallInfo); SmallVector<llvm::Value *, 16> IRCallArgs(IRFunctionArgs.totalIRArgs()); @@ -3875,7 +3856,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (IRFunctionArgs.hasSRetArg()) { IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer(); } else if (RetAI.isInAlloca()) { - Address Addr = createInAllocaStructGEP(RetAI.getInAllocaFieldIndex()); + Address Addr = + Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex()); Builder.CreateStore(SRetPtr.getPointer(), Addr); } } @@ -3913,12 +3895,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, cast<llvm::Instruction>(Addr.getPointer()); CGBuilderTy::InsertPoint IP = Builder.saveIP(); Builder.SetInsertPoint(Placeholder); - Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex()); + Addr = + Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex()); Builder.restoreIP(IP); deferPlaceholderReplacement(Placeholder, Addr.getPointer()); } else { // Store the RValue into the argument struct. - Address Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex()); + Address Addr = + Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex()); unsigned AS = Addr.getType()->getPointerAddressSpace(); llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS); // There are some cases where a trivial bitcast is not avoidable. The @@ -4099,11 +4083,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, STy->getPointerTo(Src.getAddressSpace())); } - auto SrcLayout = CGM.getDataLayout().getStructLayout(STy); assert(NumIRArgs == STy->getNumElements()); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto Offset = CharUnits::fromQuantity(SrcLayout->getElementOffset(i)); - Address EltPtr = Builder.CreateStructGEP(Src, i, Offset); + Address EltPtr = Builder.CreateStructGEP(Src, i); llvm::Value *LI = Builder.CreateLoad(EltPtr); IRCallArgs[FirstIRArg + i] = LI; } @@ -4153,7 +4135,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { llvm::Type *eltType = coercionType->getElementType(i); if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; - Address eltAddr = Builder.CreateStructGEP(addr, i, layout); + Address eltAddr = Builder.CreateStructGEP(addr, i); llvm::Value *elt = Builder.CreateLoad(eltAddr); IRCallArgs[IRArgPos++] = elt; } @@ -4186,8 +4168,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // cases, we can't do any parameter mismatch checks. Give up and bitcast // the callee. unsigned CalleeAS = CalleePtr->getType()->getPointerAddressSpace(); - auto FnTy = getTypes().GetFunctionType(CallInfo)->getPointerTo(CalleeAS); - CalleePtr = Builder.CreateBitCast(CalleePtr, FnTy); + CalleePtr = + Builder.CreateBitCast(CalleePtr, IRFuncTy->getPointerTo(CalleeAS)); } else { llvm::Type *LastParamTy = IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1); @@ -4219,19 +4201,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // // This makes the IR nicer, but more importantly it ensures that we // can inline the function at -O0 if it is marked always_inline. - auto simplifyVariadicCallee = [](llvm::Value *Ptr) -> llvm::Value* { - llvm::FunctionType *CalleeFT = - cast<llvm::FunctionType>(Ptr->getType()->getPointerElementType()); + auto simplifyVariadicCallee = [](llvm::FunctionType *CalleeFT, + llvm::Value *Ptr) -> llvm::Function * { if (!CalleeFT->isVarArg()) - return Ptr; + return nullptr; - llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Ptr); - if (!CE || CE->getOpcode() != llvm::Instruction::BitCast) - return Ptr; + // Get underlying value if it's a bitcast + if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Ptr)) { + if (CE->getOpcode() == llvm::Instruction::BitCast) + Ptr = CE->getOperand(0); + } - llvm::Function *OrigFn = dyn_cast<llvm::Function>(CE->getOperand(0)); + llvm::Function *OrigFn = dyn_cast<llvm::Function>(Ptr); if (!OrigFn) - return Ptr; + return nullptr; llvm::FunctionType *OrigFT = OrigFn->getFunctionType(); @@ -4240,15 +4223,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (OrigFT->isVarArg() || OrigFT->getNumParams() != CalleeFT->getNumParams() || OrigFT->getReturnType() != CalleeFT->getReturnType()) - return Ptr; + return nullptr; for (unsigned i = 0, e = OrigFT->getNumParams(); i != e; ++i) if (OrigFT->getParamType(i) != CalleeFT->getParamType(i)) - return Ptr; + return nullptr; return OrigFn; }; - CalleePtr = simplifyVariadicCallee(CalleePtr); + + if (llvm::Function *OrigFn = simplifyVariadicCallee(IRFuncTy, CalleePtr)) { + CalleePtr = OrigFn; + IRFuncTy = OrigFn->getFunctionType(); + } // 3. Perform the actual call. @@ -4293,11 +4280,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Apply always_inline to all calls within flatten functions. // FIXME: should this really take priority over __try, below? if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() && - !(Callee.getAbstractInfo().getCalleeDecl().getDecl() && - Callee.getAbstractInfo() - .getCalleeDecl() - .getDecl() - ->hasAttr<NoInlineAttr>())) { + !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) { Attrs = Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline); @@ -4341,22 +4324,21 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, getBundlesForFunclet(CalleePtr); // Emit the actual call/invoke instruction. - llvm::CallSite CS; + llvm::CallBase *CI; if (!InvokeDest) { - CS = Builder.CreateCall(CalleePtr, IRCallArgs, BundleList); + CI = Builder.CreateCall(IRFuncTy, CalleePtr, IRCallArgs, BundleList); } else { llvm::BasicBlock *Cont = createBasicBlock("invoke.cont"); - CS = Builder.CreateInvoke(CalleePtr, Cont, InvokeDest, IRCallArgs, + CI = Builder.CreateInvoke(IRFuncTy, CalleePtr, Cont, InvokeDest, IRCallArgs, BundleList); EmitBlock(Cont); } - llvm::Instruction *CI = CS.getInstruction(); if (callOrInvoke) *callOrInvoke = CI; // Apply the attributes and calling convention. - CS.setAttributes(Attrs); - CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); + CI->setAttributes(Attrs); + CI->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); // Apply various metadata. @@ -4371,7 +4353,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of // IPVK_IndirectCallTarget in InstrProfData.inc. - if (!CS.getCalledFunction()) + if (!CI->getCalledFunction()) PGO.valueProfile(Builder, llvm::IPVK_IndirectCallTarget, CI, CalleePtr); @@ -4382,26 +4364,45 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Suppress tail calls if requested. if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) { - const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>()) Call->setTailCallKind(llvm::CallInst::TCK_NoTail); } + // Add metadata for calls to MSAllocator functions + if (getDebugInfo() && TargetDecl && + TargetDecl->hasAttr<MSAllocatorAttr>()) + getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy, Loc); + // 4. Finish the call. // If the call doesn't return, finish the basic block and clear the // insertion point; this allows the rest of IRGen to discard // unreachable code. - if (CS.doesNotReturn()) { + if (CI->doesNotReturn()) { if (UnusedReturnSizePtr) PopCleanupBlock(); // Strip away the noreturn attribute to better diagnose unreachable UB. if (SanOpts.has(SanitizerKind::Unreachable)) { - if (auto *F = CS.getCalledFunction()) + // Also remove from function since CallBase::hasFnAttr additionally checks + // attributes of the called function. + if (auto *F = CI->getCalledFunction()) F->removeFnAttr(llvm::Attribute::NoReturn); - CS.removeAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoReturn); + CI->removeAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); + + // Avoid incompatibility with ASan which relies on the `noreturn` + // attribute to insert handler calls. + if (SanOpts.hasOneOf(SanitizerKind::Address | + SanitizerKind::KernelAddress)) { + SanitizerScope SanScope(this); + llvm::IRBuilder<>::InsertPointGuard IPGuard(Builder); + Builder.SetInsertPoint(CI); + auto *FnType = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); + llvm::FunctionCallee Fn = + CGM.CreateRuntimeFunction(FnType, "__asan_handle_no_return"); + EmitNounwindRuntimeCall(Fn); + } } EmitUnreachable(Loc); @@ -4436,7 +4437,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, switch (RetAI.getKind()) { case ABIArgInfo::CoerceAndExpand: { auto coercionType = RetAI.getCoerceAndExpandType(); - auto layout = CGM.getDataLayout().getStructLayout(coercionType); Address addr = SRetPtr; addr = Builder.CreateElementBitCast(addr, coercionType); @@ -4448,7 +4448,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { llvm::Type *eltType = coercionType->getElementType(i); if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; - Address eltAddr = Builder.CreateStructGEP(addr, i, layout); + Address eltAddr = Builder.CreateStructGEP(addr, i); llvm::Value *elt = CI; if (requiresExtract) elt = Builder.CreateExtractValue(elt, unpaddedIndex++); @@ -4529,7 +4529,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } (); // Emit the assume_aligned check on the return value. - const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); if (Ret.isScalar() && TargetDecl) { if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) { llvm::Value *OffsetValue = nullptr; @@ -4556,7 +4555,7 @@ CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const { if (isVirtual()) { const CallExpr *CE = getVirtualCallExpr(); return CGF.CGM.getCXXABI().getVirtualFunctionPointer( - CGF, getVirtualMethodDecl(), getThisAddress(), getFunctionType(), + CGF, getVirtualMethodDecl(), getThisAddress(), getVirtualFunctionType(), CE ? CE->getBeginLoc() : SourceLocation()); } diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h index c300808bea..cc11ded704 100644 --- a/lib/CodeGen/CGCall.h +++ b/lib/CodeGen/CGCall.h @@ -1,9 +1,8 @@ //===----- CGCall.h - Encapsulate calling convention details ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -136,6 +135,12 @@ public: return CGCallee(abstractInfo, functionPtr); } + static CGCallee + forDirect(llvm::FunctionCallee functionPtr, + const CGCalleeInfo &abstractInfo = CGCalleeInfo()) { + return CGCallee(abstractInfo, functionPtr.getCallee()); + } + static CGCallee forVirtual(const CallExpr *CE, GlobalDecl MD, Address Addr, llvm::FunctionType *FTy) { CGCallee result(SpecialKind::Virtual); @@ -199,12 +204,9 @@ public: assert(isVirtual()); return VirtualInfo.Addr; } - - llvm::FunctionType *getFunctionType() const { - if (isVirtual()) - return VirtualInfo.FTy; - return cast<llvm::FunctionType>( - getFunctionPointer()->getType()->getPointerElementType()); + llvm::FunctionType *getVirtualFunctionType() const { + assert(isVirtual()); + return VirtualInfo.FTy; } /// If this is a delayed callee computation of some sort, prepare diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index ee150a792b..9e1312b786 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -1,9 +1,8 @@ //===--- CGClass.cpp - Emit LLVM Code for C++ classes -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -303,7 +302,8 @@ Address CodeGenFunction::GetAddressOfBaseClass( // Get the base pointer type. llvm::Type *BasePtrTy = - ConvertType((PathEnd[-1])->getType())->getPointerTo(); + ConvertType((PathEnd[-1])->getType()) + ->getPointerTo(Value.getType()->getPointerAddressSpace()); QualType DerivedTy = getContext().getRecordType(Derived); CharUnits DerivedAlign = CGM.getClassPointerAlignment(Derived); @@ -526,8 +526,7 @@ static bool BaseInitializerUsesThis(ASTContext &C, const Expr *Init) { static void EmitBaseInitializer(CodeGenFunction &CGF, const CXXRecordDecl *ClassDecl, - CXXCtorInitializer *BaseInit, - CXXCtorType CtorType) { + CXXCtorInitializer *BaseInit) { assert(BaseInit->isBaseInitializer() && "Must have base initializer!"); @@ -539,10 +538,6 @@ static void EmitBaseInitializer(CodeGenFunction &CGF, bool isBaseVirtual = BaseInit->isBaseVirtual(); - // The base constructor doesn't construct virtual bases. - if (CtorType == Ctor_Base && isBaseVirtual) - return; - // If the initializer for the base (other than the constructor // itself) accesses 'this' in any way, we need to initialize the // vtables. @@ -793,7 +788,7 @@ void CodeGenFunction::EmitAsanPrologueOrEpilogue(bool Prologue) { llvm::Type *Args[2] = {IntPtrTy, IntPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, Args, false); - llvm::Constant *F = CGM.CreateRuntimeFunction( + llvm::FunctionCallee F = CGM.CreateRuntimeFunction( FTy, Prologue ? "__asan_poison_intra_object_redzone" : "__asan_unpoison_intra_object_redzone"); @@ -1013,7 +1008,7 @@ namespace { if (FOffset < FirstFieldOffset) { FirstField = F; FirstFieldOffset = FOffset; - } else if (FOffset > LastFieldOffset) { + } else if (FOffset >= LastFieldOffset) { LastField = F; LastFieldOffset = FOffset; } @@ -1264,24 +1259,37 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, CXXConstructorDecl::init_const_iterator B = CD->init_begin(), E = CD->init_end(); + // Virtual base initializers first, if any. They aren't needed if: + // - This is a base ctor variant + // - There are no vbases + // - The class is abstract, so a complete object of it cannot be constructed + // + // The check for an abstract class is necessary because sema may not have + // marked virtual base destructors referenced. + bool ConstructVBases = CtorType != Ctor_Base && + ClassDecl->getNumVBases() != 0 && + !ClassDecl->isAbstract(); + + // In the Microsoft C++ ABI, there are no constructor variants. Instead, the + // constructor of a class with virtual bases takes an additional parameter to + // conditionally construct the virtual bases. Emit that check here. llvm::BasicBlock *BaseCtorContinueBB = nullptr; - if (ClassDecl->getNumVBases() && + if (ConstructVBases && !CGM.getTarget().getCXXABI().hasConstructorVariants()) { - // The ABIs that don't have constructor variants need to put a branch - // before the virtual base initialization code. BaseCtorContinueBB = - CGM.getCXXABI().EmitCtorCompleteObjectHandler(*this, ClassDecl); + CGM.getCXXABI().EmitCtorCompleteObjectHandler(*this, ClassDecl); assert(BaseCtorContinueBB); } llvm::Value *const OldThis = CXXThisValue; - // Virtual base initializers first. for (; B != E && (*B)->isBaseInitializer() && (*B)->isBaseVirtual(); B++) { + if (!ConstructVBases) + continue; if (CGM.getCodeGenOpts().StrictVTablePointers && CGM.getCodeGenOpts().OptimizationLevel > 0 && isInitializerOfDynamicClass(*B)) CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); - EmitBaseInitializer(*this, ClassDecl, *B, CtorType); + EmitBaseInitializer(*this, ClassDecl, *B); } if (BaseCtorContinueBB) { @@ -1298,7 +1306,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, CGM.getCodeGenOpts().OptimizationLevel > 0 && isInitializerOfDynamicClass(*B)) CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); - EmitBaseInitializer(*this, ClassDecl, *B, CtorType); + EmitBaseInitializer(*this, ClassDecl, *B); } CXXThisValue = OldThis; @@ -1627,7 +1635,7 @@ namespace { llvm::FunctionType *FnType = llvm::FunctionType::get(CGF.VoidTy, ArgTypes, false); - llvm::Value *Fn = + llvm::FunctionCallee Fn = CGF.CGM.CreateRuntimeFunction(FnType, "__sanitizer_dtor_callback"); CGF.EmitNounwindRuntimeCall(Fn, Args); } @@ -1970,10 +1978,14 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor, pushRegularPartialArrayCleanup(arrayBegin, cur, type, eltAlignment, *destroyer); } - + auto currAVS = AggValueSlot::forAddr( + curAddr, type.getQualifiers(), AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap, AggValueSlot::IsNotZeroed, + NewPointerIsChecked ? AggValueSlot::IsSanitizerChecked + : AggValueSlot::IsNotSanitizerChecked); EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, curAddr, E, - AggValueSlot::DoesNotOverlap, NewPointerIsChecked); + /*Delegating=*/false, currAVS, E); } // Go to the next element. @@ -2007,16 +2019,16 @@ void CodeGenFunction::destroyCXXObject(CodeGenFunction &CGF, void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, - bool Delegating, Address This, - const CXXConstructExpr *E, - AggValueSlot::Overlap_t Overlap, - bool NewPointerIsChecked) { + bool Delegating, + AggValueSlot ThisAVS, + const CXXConstructExpr *E) { CallArgList Args; - - LangAS SlotAS = E->getType().getAddressSpace(); + Address This = ThisAVS.getAddress(); + LangAS SlotAS = ThisAVS.getQualifiers().getAddressSpace(); QualType ThisType = D->getThisType(); LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace(); llvm::Value *ThisPtr = This.getPointer(); + if (SlotAS != ThisAS) { unsigned TargetThisAS = getContext().getTargetAddressSpace(ThisAS); llvm::Type *NewType = @@ -2024,6 +2036,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, ThisPtr = getTargetHooks().performAddrSpaceCast(*this, This.getPointer(), ThisAS, SlotAS, NewType); } + // Push the this ptr. Args.add(RValue::get(ThisPtr), D->getThisType()); @@ -2037,7 +2050,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, LValue Src = EmitLValue(Arg); QualType DestTy = getContext().getTypeDeclType(D->getParent()); LValue Dest = MakeAddrLValue(This, DestTy); - EmitAggregateCopyCtor(Dest, Src, Overlap); + EmitAggregateCopyCtor(Dest, Src, ThisAVS.mayOverlap()); return; } @@ -2050,7 +2063,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, /*ParamsToSkip*/ 0, Order); EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args, - Overlap, E->getExprLoc(), NewPointerIsChecked); + ThisAVS.mayOverlap(), E->getExprLoc(), + ThisAVS.isSanitizerChecked()); } static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, @@ -2130,8 +2144,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, Delegating, Args); // Emit the call. - llvm::Constant *CalleePtr = - CGM.getAddrOfCXXStructor(D, getFromCtorType(Type)); + llvm::Constant *CalleePtr = CGM.getAddrOfCXXStructor(GlobalDecl(D, Type)); const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall( Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs); CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type)); diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index 3743d24f11..5594f30302 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -1,9 +1,8 @@ //===--- CGCleanup.cpp - Bookkeeping and code emission for cleanups -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -53,12 +52,8 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) { llvm::Type *ComplexTy = llvm::StructType::get(V.first->getType(), V.second->getType()); Address addr = CGF.CreateDefaultAlignTempAlloca(ComplexTy, "saved-complex"); - CGF.Builder.CreateStore(V.first, - CGF.Builder.CreateStructGEP(addr, 0, CharUnits())); - CharUnits offset = CharUnits::fromQuantity( - CGF.CGM.getDataLayout().getTypeAllocSize(V.first->getType())); - CGF.Builder.CreateStore(V.second, - CGF.Builder.CreateStructGEP(addr, 1, offset)); + CGF.Builder.CreateStore(V.first, CGF.Builder.CreateStructGEP(addr, 0)); + CGF.Builder.CreateStore(V.second, CGF.Builder.CreateStructGEP(addr, 1)); return saved_type(addr.getPointer(), ComplexAddress); } @@ -96,12 +91,10 @@ RValue DominatingValue<RValue>::saved_type::restore(CodeGenFunction &CGF) { } case ComplexAddress: { Address address = getSavingAddress(Value); - llvm::Value *real = CGF.Builder.CreateLoad( - CGF.Builder.CreateStructGEP(address, 0, CharUnits())); - CharUnits offset = CharUnits::fromQuantity( - CGF.CGM.getDataLayout().getTypeAllocSize(real->getType())); - llvm::Value *imag = CGF.Builder.CreateLoad( - CGF.Builder.CreateStructGEP(address, 1, offset)); + llvm::Value *real = + CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(address, 0)); + llvm::Value *imag = + CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(address, 1)); return RValue::getComplex(real, imag); } } diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h index 15d6f46dcb..ffe0f9d9dd 100644 --- a/lib/CodeGen/CGCleanup.h +++ b/lib/CodeGen/CGCleanup.h @@ -1,9 +1,8 @@ //===-- CGCleanup.h - Classes for cleanups IR generation --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp index 80fa7c8736..9ebd84d308 100644 --- a/lib/CodeGen/CGCoroutine.cpp +++ b/lib/CodeGen/CGCoroutine.cpp @@ -1,9 +1,8 @@ //===----- CGCoroutine.cpp - Emit LLVM Code for C++ coroutines ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -733,10 +732,10 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, Args.push_back(llvm::ConstantTokenNone::get(getLLVMContext())); break; } - for (auto &Arg : E->arguments()) + for (const Expr *Arg : E->arguments()) Args.push_back(EmitScalarExpr(Arg)); - llvm::Value *F = CGM.getIntrinsic(IID); + llvm::Function *F = CGM.getIntrinsic(IID); llvm::CallInst *Call = Builder.CreateCall(F, Args); // Note: The following code is to enable to emit coro.id and coro.begin by diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 41f8721468..3656602c3d 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -1,9 +1,8 @@ //===--- CGDebugInfo.cpp - Emit Debug Information for a Module ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -373,7 +372,7 @@ CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const { SourceManager &SM = CGM.getContext().getSourceManager(); bool Invalid; - llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid); + const llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid); if (Invalid) return None; @@ -451,8 +450,8 @@ CGDebugInfo::createFile(StringRef FileName, for (; CurDirIt != CurDirE && *CurDirIt == *FileIt; ++CurDirIt, ++FileIt) llvm::sys::path::append(DirBuf, *CurDirIt); if (std::distance(llvm::sys::path::begin(CurDir), CurDirIt) == 1) { - // The common prefix only the root; stripping it would cause - // LLVM diagnostic locations to be more confusing. + // Don't strip the common prefix if it is only the root "/" + // since that would make LLVM diagnostic locations confusing. Dir = {}; File = RemappedFile; } else { @@ -916,6 +915,11 @@ static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM, if (!needsTypeIdentifier(TD, CGM, TheCU)) return Identifier; + if (const auto *RD = dyn_cast<CXXRecordDecl>(TD)) + if (RD->getDefinition()) + if (RD->isDynamicClass() && + CGM.getVTableLinkage(RD) == llvm::GlobalValue::ExternalLinkage) + return Identifier; // TODO: This is using the RTTI name. Is there a better way to get // a unique string for a type? @@ -1726,31 +1730,37 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, QualType T = TA.getParamTypeForDecl().getDesugaredType(CGM.getContext()); llvm::DIType *TTy = getOrCreateType(T, Unit); llvm::Constant *V = nullptr; - const CXXMethodDecl *MD; - // Variable pointer template parameters have a value that is the address - // of the variable. - if (const auto *VD = dyn_cast<VarDecl>(D)) - V = CGM.GetAddrOfGlobalVar(VD); - // Member function pointers have special support for building them, though - // this is currently unsupported in LLVM CodeGen. - else if ((MD = dyn_cast<CXXMethodDecl>(D)) && MD->isInstance()) - V = CGM.getCXXABI().EmitMemberFunctionPointer(MD); - else if (const auto *FD = dyn_cast<FunctionDecl>(D)) - V = CGM.GetAddrOfFunction(FD); - // Member data pointers have special handling too to compute the fixed - // offset within the object. - else if (const auto *MPT = dyn_cast<MemberPointerType>(T.getTypePtr())) { - // These five lines (& possibly the above member function pointer - // handling) might be able to be refactored to use similar code in - // CodeGenModule::getMemberPointerConstant - uint64_t fieldOffset = CGM.getContext().getFieldOffset(D); - CharUnits chars = - CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset); - V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars); + // Skip retrieve the value if that template parameter has cuda device + // attribute, i.e. that value is not available at the host side. + if (!CGM.getLangOpts().CUDA || CGM.getLangOpts().CUDAIsDevice || + !D->hasAttr<CUDADeviceAttr>()) { + const CXXMethodDecl *MD; + // Variable pointer template parameters have a value that is the address + // of the variable. + if (const auto *VD = dyn_cast<VarDecl>(D)) + V = CGM.GetAddrOfGlobalVar(VD); + // Member function pointers have special support for building them, + // though this is currently unsupported in LLVM CodeGen. + else if ((MD = dyn_cast<CXXMethodDecl>(D)) && MD->isInstance()) + V = CGM.getCXXABI().EmitMemberFunctionPointer(MD); + else if (const auto *FD = dyn_cast<FunctionDecl>(D)) + V = CGM.GetAddrOfFunction(FD); + // Member data pointers have special handling too to compute the fixed + // offset within the object. + else if (const auto *MPT = + dyn_cast<MemberPointerType>(T.getTypePtr())) { + // These five lines (& possibly the above member function pointer + // handling) might be able to be refactored to use similar code in + // CodeGenModule::getMemberPointerConstant + uint64_t fieldOffset = CGM.getContext().getFieldOffset(D); + CharUnits chars = + CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset); + V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars); + } + V = V->stripPointerCasts(); } TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, - cast_or_null<llvm::Constant>(V->stripPointerCasts()))); + TheCU, Name, TTy, cast_or_null<llvm::Constant>(V))); } break; case TemplateArgument::NullPtr: { QualType T = TA.getNullPtrType(); @@ -1817,32 +1827,24 @@ CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD, } llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL, - llvm::DIFile *Unit) { - if (auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL)) { - auto T = TS->getSpecializedTemplateOrPartial(); - auto TA = TS->getTemplateArgs().asArray(); - // Collect parameters for a partial specialization - if (T.is<VarTemplatePartialSpecializationDecl *>()) { - const TemplateParameterList *TList = - T.get<VarTemplatePartialSpecializationDecl *>() - ->getTemplateParameters(); - return CollectTemplateParams(TList, TA, Unit); - } - - // Collect parameters for an explicit specialization - if (T.is<VarTemplateDecl *>()) { - const TemplateParameterList *TList = T.get<VarTemplateDecl *>() - ->getTemplateParameters(); - return CollectTemplateParams(TList, TA, Unit); - } - } - return llvm::DINodeArray(); + llvm::DIFile *Unit) { + // Always get the full list of parameters, not just the ones from the + // specialization. A partial specialization may have fewer parameters than + // there are arguments. + auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL); + if (!TS) + return llvm::DINodeArray(); + VarTemplateDecl *T = TS->getSpecializedTemplate(); + const TemplateParameterList *TList = T->getTemplateParameters(); + auto TA = TS->getTemplateArgs().asArray(); + return CollectTemplateParams(TList, TA, Unit); } llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams( const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) { - // Always get the full list of parameters, not just the ones from - // the specialization. + // Always get the full list of parameters, not just the ones from the + // specialization. A partial specialization may have fewer parameters than + // there are arguments. TemplateParameterList *TPList = TSpecial->getSpecializedTemplate()->getTemplateParameters(); const TemplateArgumentList &TAList = TSpecial->getTemplateArgs(); @@ -1875,6 +1877,58 @@ StringRef CGDebugInfo::getVTableName(const CXXRecordDecl *RD) { return internString("_vptr$", RD->getNameAsString()); } +StringRef CGDebugInfo::getDynamicInitializerName(const VarDecl *VD, + DynamicInitKind StubKind, + llvm::Function *InitFn) { + // If we're not emitting codeview, use the mangled name. For Itanium, this is + // arbitrary. + if (!CGM.getCodeGenOpts().EmitCodeView) + return InitFn->getName(); + + // Print the normal qualified name for the variable, then break off the last + // NNS, and add the appropriate other text. Clang always prints the global + // variable name without template arguments, so we can use rsplit("::") and + // then recombine the pieces. + SmallString<128> QualifiedGV; + StringRef Quals; + StringRef GVName; + { + llvm::raw_svector_ostream OS(QualifiedGV); + VD->printQualifiedName(OS, getPrintingPolicy()); + std::tie(Quals, GVName) = OS.str().rsplit("::"); + if (GVName.empty()) + std::swap(Quals, GVName); + } + + SmallString<128> InitName; + llvm::raw_svector_ostream OS(InitName); + if (!Quals.empty()) + OS << Quals << "::"; + + switch (StubKind) { + case DynamicInitKind::NoStub: + llvm_unreachable("not an initializer"); + case DynamicInitKind::Initializer: + OS << "`dynamic initializer for '"; + break; + case DynamicInitKind::AtExit: + OS << "`dynamic atexit destructor for '"; + break; + } + + OS << GVName; + + // Add any template specialization args. + if (const auto *VTpl = dyn_cast<VarTemplateSpecializationDecl>(VD)) { + printTemplateArgumentList(OS, VTpl->getTemplateArgs().asArray(), + getPrintingPolicy()); + } + + OS << '\''; + + return internString(OS.str()); +} + void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, SmallVectorImpl<llvm::Metadata *> &EltTys, llvm::DICompositeType *RecordTy) { @@ -1954,6 +2008,20 @@ llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D, return T; } +void CGDebugInfo::addHeapAllocSiteMetadata(llvm::Instruction *CI, + QualType D, + SourceLocation Loc) { + llvm::MDNode *node; + if (D.getTypePtr()->isVoidPointerType()) { + node = llvm::MDNode::get(CGM.getLLVMContext(), None); + } else { + QualType PointeeTy = D.getTypePtr()->getPointeeType(); + node = getOrCreateType(PointeeTy, getOrCreateFile(Loc)); + } + + CI->setMetadata("heapallocsite", node); +} + void CGDebugInfo::completeType(const EnumDecl *ED) { if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; @@ -2297,7 +2365,14 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, } bool IsRootModule = M ? !M->Parent : true; - if (CreateSkeletonCU && IsRootModule) { + // When a module name is specified as -fmodule-name, that module gets a + // clang::Module object, but it won't actually be built or imported; it will + // be textual. + if (CreateSkeletonCU && IsRootModule && Mod.getASTFile().empty() && M) + assert(StringRef(M->Name).startswith(CGM.getLangOpts().ModuleName) && + "clang module without ASTFile must be specified by -fmodule-name"); + + if (CreateSkeletonCU && IsRootModule && !Mod.getASTFile().empty()) { // PCH files don't have a signature field in the control block, // but LLVM detects skeleton CUs by looking for a non-zero DWO id. // We use the lower 64 bits for debug info. @@ -2314,6 +2389,7 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, Signature); DIB.finalize(); } + llvm::DIModule *Parent = IsRootModule ? nullptr : getOrCreateModuleRef( @@ -3021,9 +3097,9 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { else Flags |= llvm::DINode::FlagTypePassByValue; - // Record if a C++ record is trivial type. - if (CXXRD->isTrivial()) - Flags |= llvm::DINode::FlagTrivial; + // Record if a C++ record is non-trivial type. + if (!CXXRD->isTrivial()) + Flags |= llvm::DINode::FlagNonTrivial; } llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType( @@ -3443,6 +3519,11 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) { Name = getObjCMethodName(OMD); Flags |= llvm::DINode::FlagPrototyped; + } else if (isa<VarDecl>(D) && + GD.getDynamicInitKind() != DynamicInitKind::NoStub) { + // This is a global initializer or atexit destructor for a global variable. + Name = getDynamicInitializerName(cast<VarDecl>(D), GD.getDynamicInitKind(), + Fn); } else { // Use llvm function name. Name = Fn->getName(); @@ -3863,6 +3944,32 @@ CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage, return EmitDeclare(VD, Storage, llvm::None, Builder); } +void CGDebugInfo::EmitLabel(const LabelDecl *D, CGBuilderTy &Builder) { + assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); + + if (D->hasAttr<NoDebugAttr>()) + return; + + auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back()); + llvm::DIFile *Unit = getOrCreateFile(D->getLocation()); + + // Get location information. + unsigned Line = getLineNumber(D->getLocation()); + unsigned Column = getColumnNumber(D->getLocation()); + + StringRef Name = D->getName(); + + // Create the descriptor for the label. + auto *L = + DBuilder.createLabel(Scope, Name, Unit, Line, CGM.getLangOpts().Optimize); + + // Insert an llvm.dbg.label into the current block. + DBuilder.insertLabel(L, + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), + Builder.GetInsertBlock()); +} + llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy, llvm::DIType *Ty) { llvm::DIType *CachedTy = getTypeOrNull(QualTy); @@ -4207,6 +4314,14 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, SmallVector<int64_t, 4> Expr; unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(D->getType()); + if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) { + if (D->hasAttr<CUDASharedAttr>()) + AddressSpace = + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared); + else if (D->hasAttr<CUDAConstantAttr>()) + AddressSpace = + CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant); + } AppendAddressSpaceXDeref(AddressSpace, Expr); GVE = DBuilder.createGlobalVariableExpression( diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index 031e40b9dd..054df01d97 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -1,9 +1,8 @@ //===--- CGDebugInfo.h - DebugInfo for LLVM CodeGen -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -42,6 +41,7 @@ class ObjCInterfaceDecl; class ObjCIvarDecl; class UsingDecl; class VarDecl; +enum class DynamicInitKind : unsigned; namespace CodeGen { class CodeGenModule; @@ -426,6 +426,9 @@ public: llvm::Value *AI, CGBuilderTy &Builder); + /// Emit call to \c llvm.dbg.label for an label. + void EmitLabel(const LabelDecl *D, CGBuilderTy &Builder); + /// Emit call to \c llvm.dbg.declare for an imported variable /// declaration in a block. void EmitDeclareOfBlockDeclRefVariable( @@ -474,6 +477,10 @@ public: /// Emit standalone debug info for a type. llvm::DIType *getOrCreateStandaloneType(QualType Ty, SourceLocation Loc); + /// Add heapallocsite metadata for MSAllocator calls. + void addHeapAllocSiteMetadata(llvm::Instruction *CallSite, QualType Ty, + SourceLocation Loc); + void completeType(const EnumDecl *ED); void completeType(const RecordDecl *RD); void completeRequiredType(const RecordDecl *RD); @@ -642,6 +649,12 @@ private: /// Get the vtable name for the given class. StringRef getVTableName(const CXXRecordDecl *Decl); + /// Get the name to use in the debug info for a dynamic initializer or atexit + /// stub function. + StringRef getDynamicInitializerName(const VarDecl *VD, + DynamicInitKind StubKind, + llvm::Function *InitFn); + /// Get line number for the location. If location is invalid /// then use current location. unsigned getLineNumber(SourceLocation Loc); diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 5959d889b4..61f9de9a29 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -1,9 +1,8 @@ //===--- CGDecl.cpp - Emit LLVM Code for declarations ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -20,6 +19,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" @@ -104,6 +104,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::Label: // __label__ x; case Decl::Import: case Decl::OMPThreadPrivate: + case Decl::OMPAllocate: case Decl::OMPCapturedExpr: case Decl::OMPRequires: case Decl::Empty: @@ -142,6 +143,9 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::OMPDeclareReduction: return CGM.EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(&D), this); + case Decl::OMPDeclareMapper: + return CGM.EmitOMPDeclareMapper(cast<OMPDeclareMapperDecl>(&D), this); + case Decl::Typedef: // typedef int X; case Decl::TypeAlias: { // using X = int; [C++0x] const TypedefNameDecl &TD = cast<TypedefNameDecl>(D); @@ -535,7 +539,7 @@ namespace { CallStackRestore(Address Stack) : Stack(Stack) {} void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::Value *V = CGF.Builder.CreateLoad(Stack); - llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); + llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); CGF.Builder.CreateCall(F, V); } }; @@ -915,9 +919,8 @@ static void emitStoresForInitAfterBZero(CodeGenModule &CGM, // If necessary, get a pointer to the element and emit it. if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt)) emitStoresForInitAfterBZero( - CGM, Elt, - Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()), - isVolatile, Builder); + CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), isVolatile, + Builder); } return; } @@ -930,10 +933,9 @@ static void emitStoresForInitAfterBZero(CodeGenModule &CGM, // If necessary, get a pointer to the element and emit it. if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt)) - emitStoresForInitAfterBZero( - CGM, Elt, - Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()), - isVolatile, Builder); + emitStoresForInitAfterBZero(CGM, Elt, + Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), + isVolatile, Builder); } } @@ -969,83 +971,110 @@ static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init, return llvm::isBytewiseValue(Init); } -static llvm::Constant *patternFor(CodeGenModule &CGM, llvm::Type *Ty) { - // The following value is a guaranteed unmappable pointer value and has a - // repeated byte-pattern which makes it easier to synthesize. We use it for - // pointers as well as integers so that aggregates are likely to be - // initialized with this repeated value. - constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull; - // For 32-bit platforms it's a bit trickier because, across systems, only the - // zero page can reasonably be expected to be unmapped, and even then we need - // a very low address. We use a smaller value, and that value sadly doesn't - // have a repeated byte-pattern. We don't use it for integers. - constexpr uint32_t SmallValue = 0x000000AA; - // Floating-point values are initialized as NaNs because they propagate. Using - // a repeated byte pattern means that it will be easier to initialize - // all-floating-point aggregates and arrays with memset. Further, aggregates - // which mix integral and a few floats might also initialize with memset - // followed by a handful of stores for the floats. Using fairly unique NaNs - // also means they'll be easier to distinguish in a crash. - constexpr bool NegativeNaN = true; - constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull; - if (Ty->isIntOrIntVectorTy()) { - unsigned BitWidth = cast<llvm::IntegerType>( - Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) - ->getBitWidth(); - if (BitWidth <= 64) - return llvm::ConstantInt::get(Ty, LargeValue); - return llvm::ConstantInt::get( - Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue))); - } - if (Ty->isPtrOrPtrVectorTy()) { - auto *PtrTy = cast<llvm::PointerType>( - Ty->isVectorTy() ? Ty->getVectorElementType() : Ty); - unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( - PtrTy->getAddressSpace()); - llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth); - uint64_t IntValue; - switch (PtrWidth) { - default: - llvm_unreachable("pattern initialization of unsupported pointer width"); - case 64: - IntValue = LargeValue; - break; - case 32: - IntValue = SmallValue; - break; +/// Decide whether we want to split a constant structure or array store into a +/// sequence of its fields' stores. This may cost us code size and compilation +/// speed, but plays better with store optimizations. +static bool shouldSplitConstantStore(CodeGenModule &CGM, + uint64_t GlobalByteSize) { + // Don't break things that occupy more than one cacheline. + uint64_t ByteSizeLimit = 64; + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return false; + if (GlobalByteSize <= ByteSizeLimit) + return true; + return false; +} + +enum class IsPattern { No, Yes }; + +/// Generate a constant filled with either a pattern or zeroes. +static llvm::Constant *patternOrZeroFor(CodeGenModule &CGM, IsPattern isPattern, + llvm::Type *Ty) { + if (isPattern == IsPattern::Yes) + return initializationPatternFor(CGM, Ty); + else + return llvm::Constant::getNullValue(Ty); +} + +static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern, + llvm::Constant *constant); + +/// Helper function for constWithPadding() to deal with padding in structures. +static llvm::Constant *constStructWithPadding(CodeGenModule &CGM, + IsPattern isPattern, + llvm::StructType *STy, + llvm::Constant *constant) { + const llvm::DataLayout &DL = CGM.getDataLayout(); + const llvm::StructLayout *Layout = DL.getStructLayout(STy); + llvm::Type *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext()); + unsigned SizeSoFar = 0; + SmallVector<llvm::Constant *, 8> Values; + bool NestedIntact = true; + for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { + unsigned CurOff = Layout->getElementOffset(i); + if (SizeSoFar < CurOff) { + assert(!STy->isPacked()); + auto *PadTy = llvm::ArrayType::get(Int8Ty, CurOff - SizeSoFar); + Values.push_back(patternOrZeroFor(CGM, isPattern, PadTy)); + } + llvm::Constant *CurOp; + if (constant->isZeroValue()) + CurOp = llvm::Constant::getNullValue(STy->getElementType(i)); + else + CurOp = cast<llvm::Constant>(constant->getAggregateElement(i)); + auto *NewOp = constWithPadding(CGM, isPattern, CurOp); + if (CurOp != NewOp) + NestedIntact = false; + Values.push_back(NewOp); + SizeSoFar = CurOff + DL.getTypeAllocSize(CurOp->getType()); + } + unsigned TotalSize = Layout->getSizeInBytes(); + if (SizeSoFar < TotalSize) { + auto *PadTy = llvm::ArrayType::get(Int8Ty, TotalSize - SizeSoFar); + Values.push_back(patternOrZeroFor(CGM, isPattern, PadTy)); + } + if (NestedIntact && Values.size() == STy->getNumElements()) + return constant; + return llvm::ConstantStruct::getAnon(Values, STy->isPacked()); +} + +/// Replace all padding bytes in a given constant with either a pattern byte or +/// 0x00. +static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern, + llvm::Constant *constant) { + llvm::Type *OrigTy = constant->getType(); + if (const auto STy = dyn_cast<llvm::StructType>(OrigTy)) + return constStructWithPadding(CGM, isPattern, STy, constant); + if (auto *STy = dyn_cast<llvm::SequentialType>(OrigTy)) { + llvm::SmallVector<llvm::Constant *, 8> Values; + unsigned Size = STy->getNumElements(); + if (!Size) + return constant; + llvm::Type *ElemTy = STy->getElementType(); + bool ZeroInitializer = constant->isZeroValue(); + llvm::Constant *OpValue, *PaddedOp; + if (ZeroInitializer) { + OpValue = llvm::Constant::getNullValue(ElemTy); + PaddedOp = constWithPadding(CGM, isPattern, OpValue); + } + for (unsigned Op = 0; Op != Size; ++Op) { + if (!ZeroInitializer) { + OpValue = constant->getAggregateElement(Op); + PaddedOp = constWithPadding(CGM, isPattern, OpValue); + } + Values.push_back(PaddedOp); + } + auto *NewElemTy = Values[0]->getType(); + if (NewElemTy == ElemTy) + return constant; + if (OrigTy->isArrayTy()) { + auto *ArrayTy = llvm::ArrayType::get(NewElemTy, Size); + return llvm::ConstantArray::get(ArrayTy, Values); + } else { + return llvm::ConstantVector::get(Values); } - auto *Int = llvm::ConstantInt::get(IntTy, IntValue); - return llvm::ConstantExpr::getIntToPtr(Int, PtrTy); - } - if (Ty->isFPOrFPVectorTy()) { - unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( - (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) - ->getFltSemantics()); - llvm::APInt Payload(64, NaNPayload); - if (BitWidth >= 64) - Payload = llvm::APInt::getSplat(BitWidth, Payload); - return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload); - } - if (Ty->isArrayTy()) { - // Note: this doesn't touch tail padding (at the end of an object, before - // the next array object). It is instead handled by replaceUndef. - auto *ArrTy = cast<llvm::ArrayType>(Ty); - llvm::SmallVector<llvm::Constant *, 8> Element( - ArrTy->getNumElements(), patternFor(CGM, ArrTy->getElementType())); - return llvm::ConstantArray::get(ArrTy, Element); - } - - // Note: this doesn't touch struct padding. It will initialize as much union - // padding as is required for the largest type in the union. Padding is - // instead handled by replaceUndef. Stores to structs with volatile members - // don't have a volatile qualifier when initialized according to C++. This is - // fine because stack-based volatiles don't really have volatile semantics - // anyways, and the initialization shouldn't be observable. - auto *StructTy = cast<llvm::StructType>(Ty); - llvm::SmallVector<llvm::Constant *, 8> Struct(StructTy->getNumElements()); - for (unsigned El = 0; El != Struct.size(); ++El) - Struct[El] = patternFor(CGM, StructTy->getElementType(El)); - return llvm::ConstantStruct::get(StructTy, Struct); + } + return constant; } static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D, @@ -1096,9 +1125,9 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, CGBuilderTy &Builder, llvm::Constant *constant) { auto *Ty = constant->getType(); - bool isScalar = Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy() || - Ty->isFPOrFPVectorTy(); - if (isScalar) { + bool canDoSingleStore = Ty->isIntOrIntVectorTy() || + Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy(); + if (canDoSingleStore) { Builder.CreateStore(constant, Loc, isVolatile); return; } @@ -1106,10 +1135,13 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, auto *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext()); auto *IntPtrTy = CGM.getDataLayout().getIntPtrType(CGM.getLLVMContext()); - // If the initializer is all or mostly the same, codegen with bzero / memset - // then do a few stores afterward. uint64_t ConstantSize = CGM.getDataLayout().getTypeAllocSize(Ty); + if (!ConstantSize) + return; auto *SizeVal = llvm::ConstantInt::get(IntPtrTy, ConstantSize); + + // If the initializer is all or mostly the same, codegen with bzero / memset + // then do a few stores afterward. if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, isVolatile); @@ -1123,6 +1155,7 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, return; } + // If the initializer is a repeated byte pattern, use memset. llvm::Value *Pattern = shouldUseMemSetToInitialize(constant, ConstantSize); if (Pattern) { uint64_t Value = 0x00; @@ -1136,6 +1169,34 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, return; } + // If the initializer is small, use a handful of stores. + if (shouldSplitConstantStore(CGM, ConstantSize)) { + if (auto *STy = dyn_cast<llvm::StructType>(Ty)) { + // FIXME: handle the case when STy != Loc.getElementType(). + if (STy == Loc.getElementType()) { + for (unsigned i = 0; i != constant->getNumOperands(); i++) { + Address EltPtr = Builder.CreateStructGEP(Loc, i); + emitStoresForConstant( + CGM, D, EltPtr, isVolatile, Builder, + cast<llvm::Constant>(Builder.CreateExtractValue(constant, i))); + } + return; + } + } else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) { + // FIXME: handle the case when ATy != Loc.getElementType(). + if (ATy == Loc.getElementType()) { + for (unsigned i = 0; i != ATy->getNumElements(); i++) { + Address EltPtr = Builder.CreateConstArrayGEP(Loc, i); + emitStoresForConstant( + CGM, D, EltPtr, isVolatile, Builder, + cast<llvm::Constant>(Builder.CreateExtractValue(constant, i))); + } + return; + } + } + } + + // Copy from a global. Builder.CreateMemCpy( Loc, createUnnamedGlobalFrom(CGM, D, Builder, constant, Loc.getAlignment()), @@ -1146,7 +1207,8 @@ static void emitStoresForZeroInit(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder) { llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *constant = llvm::Constant::getNullValue(ElTy); + llvm::Constant *constant = + constWithPadding(CGM, IsPattern::No, llvm::Constant::getNullValue(ElTy)); emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); } @@ -1154,7 +1216,8 @@ static void emitStoresForPatternInit(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder) { llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *constant = patternFor(CGM, ElTy); + llvm::Constant *constant = constWithPadding( + CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy)); assert(!isa<llvm::UndefValue>(constant)); emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); } @@ -1170,13 +1233,11 @@ static bool containsUndef(llvm::Constant *constant) { return false; } -static llvm::Constant *replaceUndef(llvm::Constant *constant) { - // FIXME: when doing pattern initialization, replace undef with 0xAA instead. - // FIXME: also replace padding between values by creating a new struct type - // which has no padding. +static llvm::Constant *replaceUndef(CodeGenModule &CGM, IsPattern isPattern, + llvm::Constant *constant) { auto *Ty = constant->getType(); if (isa<llvm::UndefValue>(constant)) - return llvm::Constant::getNullValue(Ty); + return patternOrZeroFor(CGM, isPattern, Ty); if (!(Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy())) return constant; if (!containsUndef(constant)) @@ -1184,7 +1245,7 @@ static llvm::Constant *replaceUndef(llvm::Constant *constant) { llvm::SmallVector<llvm::Constant *, 8> Values(constant->getNumOperands()); for (unsigned Op = 0, NumOp = constant->getNumOperands(); Op != NumOp; ++Op) { auto *OpValue = cast<llvm::Constant>(constant->getOperand(Op)); - Values[Op] = replaceUndef(OpValue); + Values[Op] = replaceUndef(CGM, isPattern, OpValue); } if (Ty->isStructTy()) return llvm::ConstantStruct::get(cast<llvm::StructType>(Ty), Values); @@ -1318,7 +1379,13 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { Address address = Address::invalid(); Address AllocaAddr = Address::invalid(); - if (Ty->isConstantSizeType()) { + Address OpenMPLocalAddr = + getLangOpts().OpenMP + ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) + : Address::invalid(); + if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { + address = OpenMPLocalAddr; + } else if (Ty->isConstantSizeType()) { bool NRVO = getLangOpts().ElideConstructors && D.isNRVOVariable(); @@ -1361,14 +1428,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // unless: // - it's an NRVO variable. // - we are compiling OpenMP and it's an OpenMP local variable. - - Address OpenMPLocalAddr = - getLangOpts().OpenMP - ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) - : Address::invalid(); - if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { - address = OpenMPLocalAddr; - } else if (NRVO) { + if (NRVO) { // The named return value optimization: allocate this variable in the // return slot, so that we can elide the copy when returning this // variable (C++0x [class.copy]p34). @@ -1451,7 +1511,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { Address Stack = CreateTempAlloca(Int8PtrTy, getPointerAlign(), "saved_stack"); - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave); llvm::Value *V = Builder.CreateCall(F); Builder.CreateStore(V, Stack); @@ -1485,7 +1545,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { (void)DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder); } - if (D.hasAttr<AnnotateAttr>()) + if (D.hasAttr<AnnotateAttr>() && HaveInsertPoint()) EmitVarAnnotations(&D, address.getPointer()); // Make sure we call @llvm.lifetime.end. @@ -1620,8 +1680,9 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { bool capturedByInit = Init && emission.IsEscapingByRef && isCapturedBy(D, Init); - Address Loc = - capturedByInit ? emission.Addr : emission.getObjectAddress(*this); + bool locIsByrefHeader = !capturedByInit; + const Address Loc = + locIsByrefHeader ? emission.getObjectAddress(*this) : emission.Addr; // Note: constexpr already initializes everything correctly. LangOptions::TrivialAutoVarInitKind trivialAutoVarInit = @@ -1631,11 +1692,15 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { ? LangOptions::TrivialAutoVarInitKind::Uninitialized : getContext().getLangOpts().getTrivialAutoVarInit())); - auto initializeWhatIsTechnicallyUninitialized = [&]() { + auto initializeWhatIsTechnicallyUninitialized = [&](Address Loc) { if (trivialAutoVarInit == LangOptions::TrivialAutoVarInitKind::Uninitialized) return; + // Only initialize a __block's storage: we always initialize the header. + if (emission.IsEscapingByRef && !locIsByrefHeader) + Loc = emitBlockByrefAddress(Loc, &D, /*follow=*/false); + CharUnits Size = getContext().getTypeSizeInChars(type); if (!Size.isZero()) { switch (trivialAutoVarInit) { @@ -1656,8 +1721,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { // Technically zero-sized or negative-sized VLAs are undefined, and UBSan // will catch that code, but there exists code which generates zero-sized // VLAs. Be nice and initialize whatever they requested. - const VariableArrayType *VlaType = - dyn_cast_or_null<VariableArrayType>(getContext().getAsArrayType(type)); + const auto *VlaType = getContext().getAsVariableArrayType(type); if (!VlaType) return; auto VlaSize = getVLASize(VlaType); @@ -1676,7 +1740,8 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { case LangOptions::TrivialAutoVarInitKind::Pattern: { llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *Constant = patternFor(CGM, ElTy); + llvm::Constant *Constant = constWithPadding( + CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy)); CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type); llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop"); llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop"); @@ -1713,21 +1778,35 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { }; if (isTrivialInitializer(Init)) { - initializeWhatIsTechnicallyUninitialized(); + initializeWhatIsTechnicallyUninitialized(Loc); return; } llvm::Constant *constant = nullptr; - if (emission.IsConstantAggregate || D.isConstexpr()) { + if (emission.IsConstantAggregate || D.isConstexpr() || + D.isUsableInConstantExpressions(getContext())) { assert(!capturedByInit && "constant init contains a capturing block?"); constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D); - if (constant && trivialAutoVarInit != - LangOptions::TrivialAutoVarInitKind::Uninitialized) - constant = replaceUndef(constant); + if (constant && !constant->isZeroValue() && + (trivialAutoVarInit != + LangOptions::TrivialAutoVarInitKind::Uninitialized)) { + IsPattern isPattern = + (trivialAutoVarInit == LangOptions::TrivialAutoVarInitKind::Pattern) + ? IsPattern::Yes + : IsPattern::No; + // C guarantees that brace-init with fewer initializers than members in + // the aggregate will initialize the rest of the aggregate as-if it were + // static initialization. In turn static initialization guarantees that + // padding is initialized to zero bits. We could instead pattern-init if D + // has any ImplicitValueInitExpr, but that seems to be unintuitive + // behavior. + constant = constWithPadding(CGM, IsPattern::No, + replaceUndef(CGM, isPattern, constant)); + } } if (!constant) { - initializeWhatIsTechnicallyUninitialized(); + initializeWhatIsTechnicallyUninitialized(Loc); LValue lv = MakeAddrLValue(Loc, type); lv.setNonGC(true); return EmitExprAsInit(Init, &D, lv, capturedByInit); @@ -1741,10 +1820,9 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { } llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace()); - if (Loc.getType() != BP) - Loc = Builder.CreateBitCast(Loc, BP); - - emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); + emitStoresForConstant( + CGM, D, (Loc.getType() == BP) ? Loc : Builder.CreateBitCast(Loc, BP), + isVolatile, Builder, constant); } /// Emit an expression as an initializer for an object (variable, field, etc.) @@ -2199,7 +2277,7 @@ void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin, } /// Lazily declare the @llvm.lifetime.start intrinsic. -llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() { +llvm::Function *CodeGenModule::getLLVMLifetimeStartFn() { if (LifetimeStartFn) return LifetimeStartFn; LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(), @@ -2208,7 +2286,7 @@ llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() { } /// Lazily declare the @llvm.lifetime.end intrinsic. -llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() { +llvm::Function *CodeGenModule::getLLVMLifetimeEndFn() { if (LifetimeEndFn) return LifetimeEndFn; LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(), @@ -2417,6 +2495,13 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, getOpenMPRuntime().emitUserDefinedReduction(CGF, D); } +void CodeGenModule::EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF) { + if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed())) + return; + // FIXME: need to implement mapper code generation +} + void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { - getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D); + getOpenMPRuntime().checkArchForUnifiedAddressing(D); } diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index 9aa31f181e..c7d65f1619 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -1,9 +1,8 @@ //===--- CGDeclCXX.cpp - Emit LLVM Code for C++ declarations --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -98,7 +97,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, return; } - llvm::Constant *Func; + llvm::FunctionCallee Func; llvm::Constant *Argument; // Special-case non-array C++ destructors, if they have the right signature. @@ -118,7 +117,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, assert(!Record->hasTrivialDestructor()); CXXDestructorDecl *Dtor = Record->getDestructor(); - Func = CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete); + Func = CGM.getAddrAndTypeOfCXXStructor(GlobalDecl(Dtor, Dtor_Complete)); Argument = llvm::ConstantExpr::getBitCast( Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo()); @@ -150,7 +149,7 @@ void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) { llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start; // Overloaded address space type. llvm::Type *ObjectPtr[1] = {Int8PtrTy}; - llvm::Constant *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); + llvm::Function *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); // Emit a call with the size in bytes of the object. uint64_t Width = Size.getQuantity(); @@ -215,8 +214,8 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, /// Create a stub function, suitable for being passed to atexit, /// which passes the given address to the given destructor function. -llvm::Constant *CodeGenFunction::createAtExitStub(const VarDecl &VD, - llvm::Constant *dtor, +llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD, + llvm::FunctionCallee dtor, llvm::Constant *addr) { // Get the destructor function type, void(*)(void). llvm::FunctionType *ty = llvm::FunctionType::get(CGM.VoidTy, false); @@ -227,19 +226,19 @@ llvm::Constant *CodeGenFunction::createAtExitStub(const VarDecl &VD, } const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(ty, FnName.str(), - FI, - VD.getLocation()); + llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction( + ty, FnName.str(), FI, VD.getLocation()); CodeGenFunction CGF(CGM); - CGF.StartFunction(&VD, CGM.getContext().VoidTy, fn, FI, FunctionArgList()); + CGF.StartFunction(GlobalDecl(&VD, DynamicInitKind::AtExit), + CGM.getContext().VoidTy, fn, FI, FunctionArgList()); llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr); // Make sure the call and the callee agree on calling convention. if (llvm::Function *dtorFn = - dyn_cast<llvm::Function>(dtor->stripPointerCasts())) + dyn_cast<llvm::Function>(dtor.getCallee()->stripPointerCasts())) call->setCallingConv(dtorFn->getCallingConv()); CGF.FinishFunction(); @@ -249,7 +248,7 @@ llvm::Constant *CodeGenFunction::createAtExitStub(const VarDecl &VD, /// Register a global destructor using the C atexit runtime function. void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, - llvm::Constant *dtor, + llvm::FunctionCallee dtor, llvm::Constant *addr) { // Create a function which calls the destructor. llvm::Constant *dtorStub = createAtExitStub(VD, dtor, addr); @@ -261,10 +260,10 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) { llvm::FunctionType *atexitTy = llvm::FunctionType::get(IntTy, dtorStub->getType(), false); - llvm::Constant *atexit = + llvm::FunctionCallee atexit = CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(), /*Local=*/true); - if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit)) + if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit.getCallee())) atexitFn->setDoesNotThrow(); EmitNounwindRuntimeCall(atexit, dtorStub); @@ -468,7 +467,8 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, } else if (auto *IPA = D->getAttr<InitPriorityAttr>()) { OrderGlobalInits Key(IPA->getPriority(), PrioritizedCXXGlobalInits.size()); PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn)); - } else if (isTemplateInstantiation(D->getTemplateSpecializationKind())) { + } else if (isTemplateInstantiation(D->getTemplateSpecializationKind()) || + getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR) { // C++ [basic.start.init]p2: // Definitions of explicitly specialized class template static data // members have ordered initialization. Other class template static data @@ -482,6 +482,11 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, // minor startup time optimization. In the MS C++ ABI, there are no guard // variables, so this COMDAT key is required for correctness. AddGlobalCtor(Fn, 65535, COMDATKey); + if (getTarget().getCXXABI().isMicrosoft() && COMDATKey) { + // In The MS C++, MS add template static data member in the linker + // drective. + addUsedGlobal(COMDATKey); + } } else if (D->hasAttr<SelectAnyAttr>()) { // SelectAny globals will be comdat-folded. Put the initializer into a // COMDAT group associated with the global, so the initializers get folded @@ -604,8 +609,8 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, CurEHLocation = D->getBeginLoc(); - StartFunction(GlobalDecl(D), getContext().VoidTy, Fn, - getTypes().arrangeNullaryFunction(), + StartFunction(GlobalDecl(D, DynamicInitKind::Initializer), + getContext().VoidTy, Fn, getTypes().arrangeNullaryFunction(), FunctionArgList(), D->getLocation(), D->getInit()->getExprLoc()); @@ -682,8 +687,8 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, void CodeGenFunction::GenerateCXXGlobalDtorsFunc( llvm::Function *Fn, - const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> - &DtorsAndObjects) { + const std::vector<std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, + llvm::Constant *>> &DtorsAndObjects) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, @@ -693,9 +698,11 @@ void CodeGenFunction::GenerateCXXGlobalDtorsFunc( // Emit the dtors, in reverse order from construction. for (unsigned i = 0, e = DtorsAndObjects.size(); i != e; ++i) { - llvm::Value *Callee = DtorsAndObjects[e - i - 1].first; - llvm::CallInst *CI = Builder.CreateCall(Callee, - DtorsAndObjects[e - i - 1].second); + llvm::FunctionType *CalleeTy; + llvm::Value *Callee; + llvm::Constant *Arg; + std::tie(CalleeTy, Callee, Arg) = DtorsAndObjects[e - i - 1]; + llvm::CallInst *CI = Builder.CreateCall(CalleeTy, Callee, Arg); // Make sure the call and the callee agree on calling convention. if (llvm::Function *F = dyn_cast<llvm::Function>(Callee)) CI->setCallingConv(F->getCallingConv()); diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 5756e13d26..748029b860 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -1,9 +1,8 @@ //===--- CGException.cpp - Emit LLVM Code for C++ exceptions ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -22,7 +21,6 @@ #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/TargetBuiltins.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/SaveAndRestore.h" @@ -30,7 +28,7 @@ using namespace clang; using namespace CodeGen; -static llvm::Constant *getFreeExceptionFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getFreeExceptionFn(CodeGenModule &CGM) { // void __cxa_free_exception(void *thrown_exception); llvm::FunctionType *FTy = @@ -39,7 +37,7 @@ static llvm::Constant *getFreeExceptionFn(CodeGenModule &CGM) { return CGM.CreateRuntimeFunction(FTy, "__cxa_free_exception"); } -static llvm::Constant *getUnexpectedFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getUnexpectedFn(CodeGenModule &CGM) { // void __cxa_call_unexpected(void *thrown_exception); llvm::FunctionType *FTy = @@ -48,7 +46,7 @@ static llvm::Constant *getUnexpectedFn(CodeGenModule &CGM) { return CGM.CreateRuntimeFunction(FTy, "__cxa_call_unexpected"); } -llvm::Constant *CodeGenModule::getTerminateFn() { +llvm::FunctionCallee CodeGenModule::getTerminateFn() { // void __terminate(); llvm::FunctionType *FTy = @@ -74,8 +72,8 @@ llvm::Constant *CodeGenModule::getTerminateFn() { return CreateRuntimeFunction(FTy, name); } -static llvm::Constant *getCatchallRethrowFn(CodeGenModule &CGM, - StringRef Name) { +static llvm::FunctionCallee getCatchallRethrowFn(CodeGenModule &CGM, + StringRef Name) { llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); @@ -240,8 +238,8 @@ const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) { return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(FD)); } -static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, - const EHPersonality &Personality) { +static llvm::FunctionCallee getPersonalityFn(CodeGenModule &CGM, + const EHPersonality &Personality) { return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, true), Personality.PersonalityFn, llvm::AttributeList(), /*Local=*/true); @@ -249,12 +247,13 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { - llvm::Constant *Fn = getPersonalityFn(CGM, Personality); + llvm::FunctionCallee Fn = getPersonalityFn(CGM, Personality); llvm::PointerType* Int8PtrTy = llvm::PointerType::get( llvm::Type::getInt8Ty(CGM.getLLVMContext()), CGM.getDataLayout().getProgramAddressSpace()); - return llvm::ConstantExpr::getBitCast(Fn, Int8PtrTy); + return llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(Fn.getCallee()), + Int8PtrTy); } /// Check whether a landingpad instruction only uses C++ features. @@ -345,12 +344,13 @@ void CodeGenModule::SimplifyPersonality() { // Create the C++ personality function and kill off the old // function. - llvm::Constant *CXXFn = getPersonalityFn(*this, CXX); + llvm::FunctionCallee CXXFn = getPersonalityFn(*this, CXX); // This can happen if the user is screwing with us. - if (Fn->getType() != CXXFn->getType()) return; + if (Fn->getType() != CXXFn.getCallee()->getType()) + return; - Fn->replaceAllUsesWith(CXXFn); + Fn->replaceAllUsesWith(CXXFn.getCallee()); Fn->eraseFromParent(); } @@ -977,15 +977,15 @@ static void emitWasmCatchPadBlock(CodeGenFunction &CGF, // Create calls to wasm.get.exception and wasm.get.ehselector intrinsics. // Before they are lowered appropriately later, they provide values for the // exception and selector. - llvm::Value *GetExnFn = + llvm::Function *GetExnFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception); - llvm::Value *GetSelectorFn = + llvm::Function *GetSelectorFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_ehselector); llvm::CallInst *Exn = CGF.Builder.CreateCall(GetExnFn, CPI); CGF.Builder.CreateStore(Exn, CGF.getExceptionSlot()); llvm::CallInst *Selector = CGF.Builder.CreateCall(GetSelectorFn, CPI); - llvm::Value *TypeIDFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); + llvm::Function *TypeIDFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); // If there's only a single catch-all, branch directly to its handler. if (CatchScope.getNumHandlers() == 1 && @@ -1069,7 +1069,7 @@ static void emitCatchDispatchBlock(CodeGenFunction &CGF, CGF.EmitBlockAfterUses(dispatchBlock); // Select the right handler. - llvm::Value *llvm_eh_typeid_for = + llvm::Function *llvm_eh_typeid_for = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); // Load the selector value. @@ -1259,7 +1259,9 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { } assert(RethrowBlock != WasmCatchStartBlock && RethrowBlock->empty()); Builder.SetInsertPoint(RethrowBlock); - CGM.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true); + llvm::Function *RethrowInCatchFn = + CGM.getIntrinsic(llvm::Intrinsic::wasm_rethrow_in_catch); + EmitNoreturnRuntimeCallOrInvoke(RethrowInCatchFn, {}); } EmitBlock(ContBB); @@ -1269,9 +1271,10 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { namespace { struct CallEndCatchForFinally final : EHScopeStack::Cleanup { llvm::Value *ForEHVar; - llvm::Value *EndCatchFn; - CallEndCatchForFinally(llvm::Value *ForEHVar, llvm::Value *EndCatchFn) - : ForEHVar(ForEHVar), EndCatchFn(EndCatchFn) {} + llvm::FunctionCallee EndCatchFn; + CallEndCatchForFinally(llvm::Value *ForEHVar, + llvm::FunctionCallee EndCatchFn) + : ForEHVar(ForEHVar), EndCatchFn(EndCatchFn) {} void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::BasicBlock *EndCatchBB = CGF.createBasicBlock("finally.endcatch"); @@ -1290,15 +1293,15 @@ namespace { struct PerformFinally final : EHScopeStack::Cleanup { const Stmt *Body; llvm::Value *ForEHVar; - llvm::Value *EndCatchFn; - llvm::Value *RethrowFn; + llvm::FunctionCallee EndCatchFn; + llvm::FunctionCallee RethrowFn; llvm::Value *SavedExnVar; PerformFinally(const Stmt *Body, llvm::Value *ForEHVar, - llvm::Value *EndCatchFn, - llvm::Value *RethrowFn, llvm::Value *SavedExnVar) - : Body(Body), ForEHVar(ForEHVar), EndCatchFn(EndCatchFn), - RethrowFn(RethrowFn), SavedExnVar(SavedExnVar) {} + llvm::FunctionCallee EndCatchFn, + llvm::FunctionCallee RethrowFn, llvm::Value *SavedExnVar) + : Body(Body), ForEHVar(ForEHVar), EndCatchFn(EndCatchFn), + RethrowFn(RethrowFn), SavedExnVar(SavedExnVar) {} void Emit(CodeGenFunction &CGF, Flags flags) override { // Enter a cleanup to call the end-catch function if one was provided. @@ -1360,12 +1363,11 @@ namespace { /// Enters a finally block for an implementation using zero-cost /// exceptions. This is mostly general, but hard-codes some /// language/ABI-specific behavior in the catch-all sections. -void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF, - const Stmt *body, - llvm::Constant *beginCatchFn, - llvm::Constant *endCatchFn, - llvm::Constant *rethrowFn) { - assert((beginCatchFn != nullptr) == (endCatchFn != nullptr) && +void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF, const Stmt *body, + llvm::FunctionCallee beginCatchFn, + llvm::FunctionCallee endCatchFn, + llvm::FunctionCallee rethrowFn) { + assert((!!beginCatchFn) == (!!endCatchFn) && "begin/end catch functions not paired"); assert(rethrowFn && "rethrow function is required"); @@ -1377,9 +1379,7 @@ void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF, // In the latter case we need to pass it the exception object. // But we can't use the exception slot because the @finally might // have a landing pad (which would overwrite the exception slot). - llvm::FunctionType *rethrowFnTy = - cast<llvm::FunctionType>( - cast<llvm::PointerType>(rethrowFn->getType())->getElementType()); + llvm::FunctionType *rethrowFnTy = rethrowFn.getFunctionType(); SavedExnVar = nullptr; if (rethrowFnTy->getNumParams()) SavedExnVar = CGF.CreateTempAlloca(CGF.Int8PtrTy, "finally.exn"); @@ -1545,7 +1545,7 @@ llvm::BasicBlock *CodeGenFunction::getTerminateFunclet() { // __clang_call_terminate function. if (getLangOpts().CPlusPlus && EHPersonality::get(*this).isWasmPersonality()) { - llvm::Value *GetExnFn = + llvm::Function *GetExnFn = CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception); Exn = Builder.CreateCall(GetExnFn, CurrentFuncletPad); } @@ -1632,7 +1632,7 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup { if (CGF.IsOutlinedSEHHelper) { FP = &CGF.CurFn->arg_begin()[1]; } else { - llvm::Value *LocalAddrFn = + llvm::Function *LocalAddrFn = CGM.getIntrinsic(llvm::Intrinsic::localaddress); FP = CGF.Builder.CreateCall(LocalAddrFn); } diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 34a921e2dc..5641d54383 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -1,9 +1,8 @@ //===--- CGExpr.cpp - Emit LLVM Code from Expressions ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -331,7 +330,7 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M, switch (M->getStorageDuration()) { case SD_Static: case SD_Thread: { - llvm::Constant *CleanupFn; + llvm::FunctionCallee CleanupFn; llvm::Constant *CleanupArg; if (E->getType()->isArrayType()) { CleanupFn = CodeGenFunction(CGF.CGM).generateDestroyHelper( @@ -340,8 +339,8 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M, dyn_cast_or_null<VarDecl>(M->getExtendingDecl())); CleanupArg = llvm::Constant::getNullValue(CGF.Int8PtrTy); } else { - CleanupFn = CGF.CGM.getAddrOfCXXStructor(ReferenceTemporaryDtor, - StructorType::Complete); + CleanupFn = CGF.CGM.getAddrAndTypeOfCXXStructor( + GlobalDecl(ReferenceTemporaryDtor, Dtor_Complete)); CleanupArg = cast<llvm::Constant>(ReferenceTemporary.getPointer()); } CGF.CGM.getCXXABI().registerGlobalDtor( @@ -653,7 +652,8 @@ bool CodeGenFunction::sanitizePerformTypeCheck() const { void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *Ptr, QualType Ty, CharUnits Alignment, - SanitizerSet SkippedChecks) { + SanitizerSet SkippedChecks, + llvm::Value *ArraySize) { if (!sanitizePerformTypeCheck()) return; @@ -711,21 +711,28 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, if (SanOpts.has(SanitizerKind::ObjectSize) && !SkippedChecks.has(SanitizerKind::ObjectSize) && !Ty->isIncompleteType()) { - uint64_t Size = getContext().getTypeSizeInChars(Ty).getQuantity(); - - // The glvalue must refer to a large enough storage region. - // FIXME: If Address Sanitizer is enabled, insert dynamic instrumentation - // to check this. - // FIXME: Get object address space - llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy }; - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys); - llvm::Value *Min = Builder.getFalse(); - llvm::Value *NullIsUnknown = Builder.getFalse(); - llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy); - llvm::Value *LargeEnough = Builder.CreateICmpUGE( - Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown}), - llvm::ConstantInt::get(IntPtrTy, Size)); - Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize)); + uint64_t TySize = getContext().getTypeSizeInChars(Ty).getQuantity(); + llvm::Value *Size = llvm::ConstantInt::get(IntPtrTy, TySize); + if (ArraySize) + Size = Builder.CreateMul(Size, ArraySize); + + // Degenerate case: new X[0] does not need an objectsize check. + llvm::Constant *ConstantSize = dyn_cast<llvm::Constant>(Size); + if (!ConstantSize || !ConstantSize->isNullValue()) { + // The glvalue must refer to a large enough storage region. + // FIXME: If Address Sanitizer is enabled, insert dynamic instrumentation + // to check this. + // FIXME: Get object address space + llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy }; + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys); + llvm::Value *Min = Builder.getFalse(); + llvm::Value *NullIsUnknown = Builder.getFalse(); + llvm::Value *Dynamic = Builder.getFalse(); + llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy); + llvm::Value *LargeEnough = Builder.CreateICmpUGE( + Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown, Dynamic}), Size); + Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize)); + } } uint64_t AlignVal = 0; @@ -1288,7 +1295,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::CXXUuidofExprClass: return EmitCXXUuidofLValue(cast<CXXUuidofExpr>(E)); case Expr::LambdaExprClass: - return EmitLambdaLValue(cast<LambdaExpr>(E)); + return EmitAggExprToLValue(E); case Expr::ExprWithCleanupsClass: { const auto *cleanups = cast<ExprWithCleanups>(E); @@ -1879,7 +1886,6 @@ Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) { Address VectorBasePtrPlusIx = Builder.CreateConstInBoundsGEP(CastToPointerElement, ix, - getContext().getTypeSizeInChars(EQT), "vector.elt"); return VectorBasePtrPlusIx; @@ -1899,7 +1905,7 @@ RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) { Ty = CGM.getTypes().getDataLayout().getIntPtrType(OrigTy); llvm::Type *Types[] = { Ty }; - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); llvm::Value *Call = Builder.CreateCall( F, llvm::MetadataAsValue::get(Ty->getContext(), RegName)); if (OrigTy->isPointerTy()) @@ -2160,7 +2166,7 @@ void CodeGenFunction::EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst) { Ty = CGM.getTypes().getDataLayout().getIntPtrType(OrigTy); llvm::Type *Types[] = { Ty }; - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *Value = Src.getScalarVal(); if (OrigTy->isPointerTy()) Value = Builder.CreatePtrToInt(Value, Ty); @@ -2851,16 +2857,13 @@ enum class CheckRecoverableKind { } static CheckRecoverableKind getRecoverableKind(SanitizerMask Kind) { - assert(llvm::countPopulation(Kind) == 1); - switch (Kind) { - case SanitizerKind::Vptr: + assert(Kind.countPopulation() == 1); + if (Kind == SanitizerKind::Vptr) return CheckRecoverableKind::AlwaysRecoverable; - case SanitizerKind::Return: - case SanitizerKind::Unreachable: + else if (Kind == SanitizerKind::Return || Kind == SanitizerKind::Unreachable) return CheckRecoverableKind::Unrecoverable; - default: + else return CheckRecoverableKind::Recoverable; - } } namespace { @@ -2910,7 +2913,7 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, } B.addAttribute(llvm::Attribute::UWTable); - llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee Fn = CGF.CGM.CreateRuntimeFunction( FnType, FnName, llvm::AttributeList::get(CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, B), @@ -3051,7 +3054,7 @@ void CodeGenFunction::EmitCfiSlowPathCheck( bool WithDiag = !CGM.getCodeGenOpts().SanitizeTrap.has(Kind); llvm::CallInst *CheckCall; - llvm::Constant *SlowPathFn; + llvm::FunctionCallee SlowPathFn; if (WithDiag) { llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); auto *InfoPtr = @@ -3073,7 +3076,8 @@ void CodeGenFunction::EmitCfiSlowPathCheck( CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr}); } - CGM.setDSOLocal(cast<llvm::GlobalValue>(SlowPathFn->stripPointerCasts())); + CGM.setDSOLocal( + cast<llvm::GlobalValue>(SlowPathFn.getCallee()->stripPointerCasts())); CheckCall->setDoesNotThrow(); EmitBlock(Cont); @@ -3252,7 +3256,7 @@ Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, if (!E->getType()->isVariableArrayType()) { assert(isa<llvm::ArrayType>(Addr.getElementType()) && "Expected pointer to array"); - Addr = Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), "arraydecay"); + Addr = Builder.CreateConstArrayGEP(Addr, 0, "arraydecay"); } // The result of this decay conversion points to an array element within the @@ -3529,8 +3533,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, if (!BaseTy->isVariableArrayType()) { assert(isa<llvm::ArrayType>(Addr.getElementType()) && "Expected pointer to array"); - Addr = CGF.Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), - "arraydecay"); + Addr = CGF.Builder.CreateConstArrayGEP(Addr, 0, "arraydecay"); } return CGF.Builder.CreateElementBitCast(Addr, @@ -3819,20 +3822,7 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base, unsigned idx = CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); - CharUnits offset; - // Adjust the alignment down to the given offset. - // As a special case, if the LLVM field index is 0, we know that this - // is zero. - assert((idx != 0 || CGF.getContext().getASTRecordLayout(rec) - .getFieldOffset(field->getFieldIndex()) == 0) && - "LLVM field at index zero had non-zero offset?"); - if (idx != 0) { - auto &recLayout = CGF.getContext().getASTRecordLayout(rec); - auto offsetInBits = recLayout.getFieldOffset(field->getFieldIndex()); - offset = CGF.getContext().toCharUnitsFromBits(offsetInBits); - } - - return CGF.Builder.CreateStructGEP(base, idx, offset, field->getName()); + return CGF.Builder.CreateStructGEP(base, idx, field->getName()); } static bool hasAnyVptr(const QualType Type, const ASTContext &Context) { @@ -3866,8 +3856,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, unsigned Idx = RL.getLLVMFieldNo(field); if (Idx != 0) // For structs, we GEP to the field that the record layout suggests. - Addr = Builder.CreateStructGEP(Addr, Idx, Info.StorageOffset, - field->getName()); + Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); // Get the access type. llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize); @@ -4175,6 +4164,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: return EmitUnsupportedLValue(E, "unexpected cast lvalue"); case CK_Dependent: @@ -4548,13 +4539,6 @@ CodeGenFunction::EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E) { return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); } -LValue -CodeGenFunction::EmitLambdaLValue(const LambdaExpr *E) { - AggValueSlot Slot = CreateAggTemp(E->getType(), "temp.lvalue"); - EmitLambdaExpr(E, Slot); - return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); -} - LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) { RValue RV = EmitObjCMessageExpr(E); @@ -4688,7 +4672,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee llvm::Constant *StaticData[] = {EmitCheckSourceLocation(E->getBeginLoc()), EmitCheckTypeDescriptor(CalleeType)}; EmitCheck(std::make_pair(CalleeRTTIMatch, SanitizerKind::Function), - SanitizerHandler::FunctionTypeMismatch, StaticData, CalleePtr); + SanitizerHandler::FunctionTypeMismatch, StaticData, + {CalleePtr, CalleeRTTI, FTRTTIConst}); Builder.CreateBr(Cont); EmitBlock(Cont); diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index db49b3f28a..cd49be4bd4 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -1,9 +1,8 @@ //===--- CGExprAgg.cpp - Emit LLVM Code from Aggregate Expressions --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -760,8 +759,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { // Build a GEP to refer to the subobject. Address valueAddr = - CGF.Builder.CreateStructGEP(valueDest.getAddress(), 0, - CharUnits()); + CGF.Builder.CreateStructGEP(valueDest.getAddress(), 0); valueDest = AggValueSlot::forAddr(valueAddr, valueDest.getQualifiers(), valueDest.isExternallyDestructed(), @@ -781,11 +779,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { CGF.CreateAggTemp(atomicType, "atomic-to-nonatomic.temp"); CGF.EmitAggExpr(E->getSubExpr(), atomicSlot); - Address valueAddr = - Builder.CreateStructGEP(atomicSlot.getAddress(), 0, CharUnits()); + Address valueAddr = Builder.CreateStructGEP(atomicSlot.getAddress(), 0); RValue rvalue = RValue::getAggregate(valueAddr, atomicSlot.isVolatile()); return EmitFinalDestCopy(valueType, rvalue); } + case CK_AddressSpaceConversion: + return Visit(E->getSubExpr()); case CK_LValueToRValue: // If we're loading from a volatile type, force the destination @@ -797,6 +796,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { LLVM_FALLTHROUGH; + case CK_NoOp: case CK_UserDefinedConversion: case CK_ConstructorConversion: @@ -852,10 +852,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { case CK_CopyAndAutoreleaseBlockObject: case CK_BuiltinFnToFnPtr: case CK_ZeroToOCLOpaqueType: - case CK_AddressSpaceConversion: + case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: llvm_unreachable("cast kind invalid for aggregate types"); } } @@ -1264,7 +1266,52 @@ void AggExprEmitter::VisitCXXInheritedCtorInitExpr( void AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { AggValueSlot Slot = EnsureSlot(E->getType()); - CGF.EmitLambdaExpr(E, Slot); + LValue SlotLV = CGF.MakeAddrLValue(Slot.getAddress(), E->getType()); + + // We'll need to enter cleanup scopes in case any of the element + // initializers throws an exception. + SmallVector<EHScopeStack::stable_iterator, 16> Cleanups; + llvm::Instruction *CleanupDominator = nullptr; + + CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin(); + for (LambdaExpr::const_capture_init_iterator i = E->capture_init_begin(), + e = E->capture_init_end(); + i != e; ++i, ++CurField) { + // Emit initialization + LValue LV = CGF.EmitLValueForFieldInitialization(SlotLV, *CurField); + if (CurField->hasCapturedVLAType()) { + CGF.EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV); + continue; + } + + EmitInitializationToLValue(*i, LV); + + // Push a destructor if necessary. + if (QualType::DestructionKind DtorKind = + CurField->getType().isDestructedType()) { + assert(LV.isSimple()); + if (CGF.needsEHCleanup(DtorKind)) { + if (!CleanupDominator) + CleanupDominator = CGF.Builder.CreateAlignedLoad( + CGF.Int8Ty, + llvm::Constant::getNullValue(CGF.Int8PtrTy), + CharUnits::One()); // placeholder + + CGF.pushDestroy(EHCleanup, LV.getAddress(), CurField->getType(), + CGF.getDestroyer(DtorKind), false); + Cleanups.push_back(CGF.EHStack.stable_begin()); + } + } + } + + // Deactivate all the partial cleanups in reverse order, which + // generally means popping them. + for (unsigned i = Cleanups.size(); i != 0; --i) + CGF.DeactivateCleanupBlock(Cleanups[i-1], CleanupDominator); + + // Destroy the placeholder if we made one. + if (CleanupDominator) + CleanupDominator->eraseFromParent(); } void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 884ce96859..25b0abbc03 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -1,9 +1,8 @@ //===--- CGExprCXX.cpp - Emit LLVM Code for C++ expressions ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -19,7 +18,6 @@ #include "ConstantEmitter.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Intrinsics.h" using namespace clang; @@ -42,13 +40,11 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, isa<CXXOperatorCallExpr>(CE)); assert(MD->isInstance() && "Trying to emit a member or operator call expr on a static method!"); - ASTContext &C = CGF.getContext(); // Push the this ptr. const CXXRecordDecl *RD = CGF.CGM.getCXXABI().getThisArgumentTypeForMethod(MD); - Args.add(RValue::get(This), - RD ? C.getPointerType(C.getTypeDeclType(RD)) : C.VoidPtrTy); + Args.add(RValue::get(This), CGF.getTypes().DeriveThisType(RD, MD)); // If there is an implicit parameter (e.g. VTT), emit it. if (ImplicitParam) { @@ -56,7 +52,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, } const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); - RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD); + RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size()); unsigned PrefixSize = Args.size() - 1; // And the rest of the call args. @@ -94,14 +90,14 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall( } RValue CodeGenFunction::EmitCXXDestructorCall( - const CXXDestructorDecl *DD, const CGCallee &Callee, llvm::Value *This, - llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE, - StructorType Type) { + GlobalDecl Dtor, const CGCallee &Callee, llvm::Value *This, + llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE) { CallArgList Args; - commonEmitCXXMemberOrOperatorCall(*this, DD, This, ImplicitParam, - ImplicitParamTy, CE, Args, nullptr); - return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(DD, Type), - Callee, ReturnValueSlot(), Args); + commonEmitCXXMemberOrOperatorCall(*this, cast<CXXMethodDecl>(Dtor.getDecl()), + This, ImplicitParam, ImplicitParamTy, CE, + Args, nullptr); + return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee, + ReturnValueSlot(), Args); } RValue CodeGenFunction::EmitCXXPseudoDestructorExpr( @@ -253,13 +249,25 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( This = EmitLValue(Base); } + if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) { + // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's + // constructing a new complete object of type Ctor. + assert(!RtlArgs); + assert(ReturnValue.isNull() && "Constructor shouldn't have return value"); + CallArgList Args; + commonEmitCXXMemberOrOperatorCall( + *this, Ctor, This.getPointer(), /*ImplicitParam=*/nullptr, + /*ImplicitParamTy=*/QualType(), CE, Args, nullptr); + + EmitCXXConstructorCall(Ctor, Ctor_Complete, /*ForVirtualBase=*/false, + /*Delegating=*/false, This.getAddress(), Args, + AggValueSlot::DoesNotOverlap, CE->getExprLoc(), + /*NewPointerIsChecked=*/false); + return RValue::get(nullptr); + } if (MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion())) { if (isa<CXXDestructorDecl>(MD)) return RValue::get(nullptr); - if (isa<CXXConstructorDecl>(MD) && - cast<CXXConstructorDecl>(MD)->isDefaultConstructor()) - return RValue::get(nullptr); - if (!MD->getParent()->mayInsertExtraPadding()) { if (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) { // We don't like to generate the trivial copy/move assignment operator @@ -272,20 +280,6 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( EmitAggregateAssign(This, RHS, CE->getType()); return RValue::get(This.getPointer()); } - - if (isa<CXXConstructorDecl>(MD) && - cast<CXXConstructorDecl>(MD)->isCopyOrMoveConstructor()) { - // Trivial move and copy ctor are the same. - assert(CE->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); - const Expr *Arg = *CE->arg_begin(); - LValue RHS = EmitLValue(Arg); - LValue Dest = MakeAddrLValue(This.getAddress(), Arg->getType()); - // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's - // constructing a new complete object of type Ctor. - EmitAggregateCopy(Dest, RHS, Arg->getType(), - AggValueSlot::DoesNotOverlap); - return RValue::get(This.getPointer()); - } llvm_unreachable("unknown trivial member function"); } } @@ -296,10 +290,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( const CGFunctionInfo *FInfo = nullptr; if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl)) FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( - Dtor, StructorType::Complete); - else if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(CalleeDecl)) - FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( - Ctor, StructorType::Complete); + GlobalDecl(Dtor, Dtor_Complete)); else FInfo = &CGM.getTypes().arrangeCXXMethodDeclaration(CalleeDecl); @@ -322,14 +313,9 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (IsImplicitObjectCXXThis || isa<DeclRefExpr>(IOA)) SkippedChecks.set(SanitizerKind::Null, true); } - EmitTypeCheck( - isa<CXXConstructorDecl>(CalleeDecl) ? CodeGenFunction::TCK_ConstructorCall - : CodeGenFunction::TCK_MemberCall, - CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent()), - /*Alignment=*/CharUnits::Zero(), SkippedChecks); - - // FIXME: Uses of 'MD' past this point need to be audited. We may need to use - // 'CalleeDecl' instead. + EmitTypeCheck(CodeGenFunction::TCK_MemberCall, CallLoc, This.getPointer(), + C.getRecordType(CalleeDecl->getParent()), + /*Alignment=*/CharUnits::Zero(), SkippedChecks); // C++ [class.virtual]p12: // Explicit qualification with the scope operator (5.1) suppresses the @@ -339,7 +325,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( // because then we know what the type is. bool UseVirtualCall = CanUseVirtualCall && !DevirtualizedMethod; - if (const CXXDestructorDecl *Dtor = dyn_cast<CXXDestructorDecl>(MD)) { + if (const CXXDestructorDecl *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl)) { assert(CE->arg_begin() == CE->arg_end() && "Destructor shouldn't have explicit parameters"); assert(ReturnValue.isNull() && "Destructor shouldn't have return value"); @@ -348,33 +334,29 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( *this, Dtor, Dtor_Complete, This.getAddress(), cast<CXXMemberCallExpr>(CE)); } else { + GlobalDecl GD(Dtor, Dtor_Complete); CGCallee Callee; - if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier) - Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty); + if (getLangOpts().AppleKext && Dtor->isVirtual() && HasQualifier) + Callee = BuildAppleKextVirtualCall(Dtor, Qualifier, Ty); else if (!DevirtualizedMethod) - Callee = CGCallee::forDirect( - CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete, FInfo, Ty), - GlobalDecl(Dtor, Dtor_Complete)); + Callee = + CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD, FInfo, Ty), GD); else { - const CXXDestructorDecl *DDtor = - cast<CXXDestructorDecl>(DevirtualizedMethod); - Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty), - GlobalDecl(DDtor, Dtor_Complete)); + Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(GD, Ty), GD); } - EmitCXXMemberOrOperatorCall( - CalleeDecl, Callee, ReturnValue, This.getPointer(), - /*ImplicitParam=*/nullptr, QualType(), CE, nullptr); + + EmitCXXDestructorCall(GD, Callee, This.getPointer(), + /*ImplicitParam=*/nullptr, + /*ImplicitParamTy=*/QualType(), nullptr); } return RValue::get(nullptr); } + // FIXME: Uses of 'MD' past this point need to be audited. We may need to use + // 'CalleeDecl' instead. + CGCallee Callee; - if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) { - Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty), - GlobalDecl(Ctor, Ctor_Complete)); - } else if (UseVirtualCall) { + if (UseVirtualCall) { Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty); } else { if (SanOpts.has(SanitizerKind::CFINVCall) && @@ -454,8 +436,7 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, // Push the this ptr. Args.add(RValue::get(ThisPtrForCall), ThisType); - RequiredArgs required = - RequiredArgs::forPrototypePlus(FPT, 1, /*FD=*/nullptr); + RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, 1); // And the rest of the call args EmitCallArgs(Args, FPT, E->arguments()); @@ -633,12 +614,10 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, case CXXConstructExpr::CK_NonVirtualBase: Type = Ctor_Base; - } + } - // Call the constructor. - EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, - Dest.getAddress(), E, Dest.mayOverlap(), - Dest.isSanitizerChecked()); + // Call the constructor. + EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, Dest, E); } } @@ -702,9 +681,9 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // We multiply the size of all dimensions for NumElements. // e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6. numElements = - ConstantEmitter(CGF).tryEmitAbstract(e->getArraySize(), e->getType()); + ConstantEmitter(CGF).tryEmitAbstract(*e->getArraySize(), e->getType()); if (!numElements) - numElements = CGF.EmitScalarExpr(e->getArraySize()); + numElements = CGF.EmitScalarExpr(*e->getArraySize()); assert(isa<llvm::IntegerType>(numElements->getType())); // The number of elements can be have an arbitrary integer type; @@ -714,7 +693,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // important way: if the count is negative, it's an error even if // the cookie size would bring the total size >= 0. bool isSigned - = e->getArraySize()->getType()->isSignedIntegerOrEnumerationType(); + = (*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType(); llvm::IntegerType *numElementsType = cast<llvm::IntegerType>(numElements->getType()); unsigned numElementsWidth = numElementsType->getBitWidth(); @@ -866,7 +845,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // can be ignored because the result shouldn't be used if // allocation fails. if (typeSizeMultiplier != 1) { - llvm::Value *umul_with_overflow + llvm::Function *umul_with_overflow = CGF.CGM.getIntrinsic(llvm::Intrinsic::umul_with_overflow, CGF.SizeTy); llvm::Value *tsmV = @@ -906,7 +885,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, if (cookieSize != 0) { sizeWithoutCookie = size; - llvm::Value *uadd_with_overflow + llvm::Function *uadd_with_overflow = CGF.CGM.getIntrinsic(llvm::Intrinsic::uadd_with_overflow, CGF.SizeTy); llvm::Value *cookieSizeV = llvm::ConstantInt::get(CGF.SizeTy, cookieSize); @@ -1293,7 +1272,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, const FunctionDecl *CalleeDecl, const FunctionProtoType *CalleeType, const CallArgList &Args) { - llvm::Instruction *CallOrInvoke; + llvm::CallBase *CallOrInvoke; llvm::Constant *CalleePtr = CGF.CGM.GetAddrOfFunction(CalleeDecl); CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl)); RValue RV = @@ -1309,15 +1288,8 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, llvm::Function *Fn = dyn_cast<llvm::Function>(CalleePtr); if (CalleeDecl->isReplaceableGlobalAllocationFunction() && Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) { - // FIXME: Add addAttribute to CallSite. - if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(CallOrInvoke)) - CI->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Builtin); - else if (llvm::InvokeInst *II = dyn_cast<llvm::InvokeInst>(CallOrInvoke)) - II->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Builtin); - else - llvm_unreachable("unexpected kind of call instruction"); + CallOrInvoke->addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::Builtin); } return RV; @@ -1715,10 +1687,16 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { result.getAlignment()); // Emit sanitizer checks for pointer value now, so that in the case of an - // array it was checked only once and not at each constructor call. + // array it was checked only once and not at each constructor call. We may + // have already checked that the pointer is non-null. + // FIXME: If we have an array cookie and a potentially-throwing allocator, + // we'll null check the wrong pointer here. + SanitizerSet SkippedChecks; + SkippedChecks.set(SanitizerKind::Null, nullCheck); EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, - E->getAllocatedTypeSourceInfo()->getTypeLoc().getBeginLoc(), - result.getPointer(), allocType); + E->getAllocatedTypeSourceInfo()->getTypeLoc().getBeginLoc(), + result.getPointer(), allocType, result.getAlignment(), + SkippedChecks, numElements); EmitNewInitializer(*this, E, allocType, elementTy, result, numElements, allocSizeWithoutCookie); @@ -2253,21 +2231,3 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, return Value; } - -void CodeGenFunction::EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Slot) { - LValue SlotLV = MakeAddrLValue(Slot.getAddress(), E->getType()); - - CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin(); - for (LambdaExpr::const_capture_init_iterator i = E->capture_init_begin(), - e = E->capture_init_end(); - i != e; ++i, ++CurField) { - // Emit initialization - LValue LV = EmitLValueForFieldInitialization(SlotLV, *CurField); - if (CurField->hasCapturedVLAType()) { - auto VAT = CurField->getCapturedVLAType(); - EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV); - } else { - EmitInitializerForField(*CurField, LV, *i); - } - } -} diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index 2db693b44c..3ae08edd5a 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -1,9 +1,8 @@ //===--- CGExprComplex.cpp - Emit LLVM Code for Complex Exprs -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -328,15 +327,12 @@ public: Address CodeGenFunction::emitAddrOfRealComponent(Address addr, QualType complexType) { - CharUnits offset = CharUnits::Zero(); - return Builder.CreateStructGEP(addr, 0, offset, addr.getName() + ".realp"); + return Builder.CreateStructGEP(addr, 0, addr.getName() + ".realp"); } Address CodeGenFunction::emitAddrOfImagComponent(Address addr, QualType complexType) { - QualType eltType = complexType->castAs<ComplexType>()->getElementType(); - CharUnits offset = getContext().getTypeSizeInChars(eltType); - return Builder.CreateStructGEP(addr, 1, offset, addr.getName() + ".imagp"); + return Builder.CreateStructGEP(addr, 1, addr.getName() + ".imagp"); } /// EmitLoadOfLValue - Given an RValue reference for a complex, emit code to @@ -513,6 +509,8 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: llvm_unreachable("invalid cast kind for complex value"); case CK_FloatingRealToComplex: @@ -628,12 +626,13 @@ ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName, Args, cast<FunctionType>(FQTy.getTypePtr()), false); llvm::FunctionType *FTy = CGF.CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Constant *Func = CGF.CGM.CreateBuiltinFunction(FTy, LibCallName); + llvm::FunctionCallee Func = CGF.CGM.CreateRuntimeFunction( + FTy, LibCallName, llvm::AttributeList(), true); CGCallee Callee = CGCallee::forDirect(Func, FQTy->getAs<FunctionProtoType>()); - llvm::Instruction *Call; + llvm::CallBase *Call; RValue Res = CGF.EmitCall(FuncInfo, Callee, ReturnValueSlot(), Args, &Call); - cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getRuntimeCC()); + Call->setCallingConv(CGF.CGM.getRuntimeCC()); return Res.getComplexVal(); } diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index c9475840ae..4c0256fe38 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -1,9 +1,8 @@ //===--- CGExprConstant.cpp - Emit LLVM Code from Constant Expressions ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -460,7 +459,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, CharUnits BaseOffset = Layout.getBaseClassOffset(BD); Bases.push_back(BaseInfo(BD, BaseOffset, BaseNo)); } - std::stable_sort(Bases.begin(), Bases.end()); + llvm::stable_sort(Bases); for (unsigned I = 0, N = Bases.size(); I != N; ++I) { BaseInfo &Base = Bases[I]; @@ -701,10 +700,12 @@ EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType, return llvm::ConstantStruct::get(SType, Elements); } -/// This class only needs to handle two cases: -/// 1) Literals (this is used by APValue emission to emit literals). -/// 2) Arrays, structs and unions (outside C++11 mode, we don't currently -/// constant fold these types). +// This class only needs to handle arrays, structs and unions. Outside C++11 +// mode, we don't currently constant fold those types. All other types are +// handled by constant folding. +// +// Constant folding is currently missing support for a few features supported +// here: CK_ToUnion, CK_ReinterpretMemberPointer, and DesignatedInitUpdateExpr. class ConstExprEmitter : public StmtVisitor<ConstExprEmitter, llvm::Constant*, QualType> { CodeGenModule &CGM; @@ -875,6 +876,8 @@ public: case CK_FloatingCast: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: case CK_ZeroToOCLOpaqueType: return nullptr; } @@ -1077,6 +1080,7 @@ public: } llvm::Constant *VisitStringLiteral(StringLiteral *E, QualType T) { + // This is a string literal initializing an array in an initializer. return CGM.GetConstantArrayFromStringLiteral(E); } @@ -1609,6 +1613,7 @@ private: ConstantLValue VisitConstantExpr(const ConstantExpr *E); ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); ConstantLValue VisitStringLiteral(const StringLiteral *E); + ConstantLValue VisitObjCBoxedExpr(const ObjCBoxedExpr *E); ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E); ConstantLValue VisitObjCStringLiteral(const ObjCStringLiteral *E); ConstantLValue VisitPredefinedExpr(const PredefinedExpr *E); @@ -1650,17 +1655,7 @@ private: llvm::Constant *ConstantLValueEmitter::tryEmit() { const APValue::LValueBase &base = Value.getLValueBase(); - // Certain special array initializers are represented in APValue - // as l-values referring to the base expression which generates the - // array. This happens with e.g. string literals. These should - // probably just get their own representation kind in APValue. - if (DestType->isArrayType()) { - assert(!hasNonZeroOffset() && "offset on array initializer"); - auto expr = const_cast<Expr*>(base.get<const Expr*>()); - return ConstExprEmitter(Emitter).Visit(expr, DestType); - } - - // Otherwise, the destination type should be a pointer or reference + // The destination type should be a pointer or reference // type, but it might also be a cast thereof. // // FIXME: the chain of casts required should be reflected in the APValue. @@ -1703,31 +1698,20 @@ ConstantLValueEmitter::tryEmitAbsolute(llvm::Type *destTy) { auto offset = getOffset(); // If we're producing a pointer, this is easy. - if (auto destPtrTy = cast<llvm::PointerType>(destTy)) { - if (Value.isNullPointer()) { - // FIXME: integer offsets from non-zero null pointers. - return CGM.getNullPointer(destPtrTy, DestType); - } - - // Convert the integer to a pointer-sized integer before converting it - // to a pointer. - // FIXME: signedness depends on the original integer type. - auto intptrTy = CGM.getDataLayout().getIntPtrType(destPtrTy); - llvm::Constant *C = offset; - C = llvm::ConstantExpr::getIntegerCast(getOffset(), intptrTy, - /*isSigned*/ false); - C = llvm::ConstantExpr::getIntToPtr(C, destPtrTy); - return C; - } - - // Otherwise, we're basically returning an integer constant. - - // FIXME: this does the wrong thing with ptrtoint of a null pointer, - // but since we don't know the original pointer type, there's not much - // we can do about it. - - auto C = getOffset(); - C = llvm::ConstantExpr::getIntegerCast(C, destTy, /*isSigned*/ false); + auto destPtrTy = cast<llvm::PointerType>(destTy); + if (Value.isNullPointer()) { + // FIXME: integer offsets from non-zero null pointers. + return CGM.getNullPointer(destPtrTy, DestType); + } + + // Convert the integer to a pointer-sized integer before converting it + // to a pointer. + // FIXME: signedness depends on the original integer type. + auto intptrTy = CGM.getDataLayout().getIntPtrType(destPtrTy); + llvm::Constant *C = offset; + C = llvm::ConstantExpr::getIntegerCast(getOffset(), intptrTy, + /*isSigned*/ false); + C = llvm::ConstantExpr::getIntToPtr(C, destPtrTy); return C; } @@ -1781,25 +1765,29 @@ ConstantLValueEmitter::VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return CGM.GetAddrOfConstantStringFromObjCEncode(E); } +static ConstantLValue emitConstantObjCStringLiteral(const StringLiteral *S, + QualType T, + CodeGenModule &CGM) { + auto C = CGM.getObjCRuntime().GenerateConstantString(S); + return C.getElementBitCast(CGM.getTypes().ConvertTypeForMem(T)); +} + ConstantLValue ConstantLValueEmitter::VisitObjCStringLiteral(const ObjCStringLiteral *E) { - auto C = CGM.getObjCRuntime().GenerateConstantString(E->getString()); - return C.getElementBitCast(CGM.getTypes().ConvertTypeForMem(E->getType())); + return emitConstantObjCStringLiteral(E->getString(), E->getType(), CGM); } ConstantLValue -ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) { - if (auto CGF = Emitter.CGF) { - LValue Res = CGF->EmitPredefinedLValue(E); - return cast<ConstantAddress>(Res.getAddress()); - } - - auto kind = E->getIdentKind(); - if (kind == PredefinedExpr::PrettyFunction) { - return CGM.GetAddrOfConstantCString("top level", ".tmp"); - } +ConstantLValueEmitter::VisitObjCBoxedExpr(const ObjCBoxedExpr *E) { + assert(E->isExpressibleAsConstantInitializer() && + "this boxed expression can't be emitted as a compile-time constant"); + auto *SL = cast<StringLiteral>(E->getSubExpr()->IgnoreParenCasts()); + return emitConstantObjCStringLiteral(SL, E->getType(), CGM); +} - return CGM.GetAddrOfConstantCString("", ".tmp"); +ConstantLValue +ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) { + return CGM.GetAddrOfConstantStringFromLiteral(E->getFunctionName()); } ConstantLValue @@ -1873,6 +1861,9 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, return ConstantLValueEmitter(*this, Value, DestType).tryEmit(); case APValue::Int: return llvm::ConstantInt::get(CGM.getLLVMContext(), Value.getInt()); + case APValue::FixedPoint: + return llvm::ConstantInt::get(CGM.getLLVMContext(), + Value.getFixedPoint().getValue()); case APValue::ComplexInt: { llvm::Constant *Complex[2]; diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 1c14d4c99a..777e1dc893 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -1,9 +1,8 @@ //===--- CGExprScalar.cpp - Emit LLVM Code for Scalar Exprs ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -125,6 +124,21 @@ struct BinOpInfo { return CFP->isZero(); return true; } + + /// Check if either operand is a fixed point type or integer type, with at + /// least one being a fixed point type. In any case, this + /// operation did not follow usual arithmetic conversion and both operands may + /// not be the same. + bool isFixedPointBinOp() const { + // We cannot simply check the result type since comparison operations return + // an int. + if (const auto *BinOp = dyn_cast<BinaryOperator>(E)) { + QualType LHSType = BinOp->getLHS()->getType(); + QualType RHSType = BinOp->getRHS()->getType(); + return LHSType->isFixedPointType() || RHSType->isFixedPointType(); + } + return false; + } }; static bool MustVisitNullValue(const Expr *E) { @@ -349,8 +363,14 @@ public: SourceLocation Loc, ScalarConversionOpts Opts = ScalarConversionOpts()); + /// Convert between either a fixed point and other fixed point or fixed point + /// and an integer. Value *EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc); + Value *EmitFixedPointConversion(Value *Src, FixedPointSemantics &SrcFixedSema, + FixedPointSemantics &DstFixedSema, + SourceLocation Loc, + bool DstIsInteger = false); /// Emit a conversion from the specified complex type to the specified /// destination type, where the destination type is an LLVM scalar type. @@ -729,6 +749,9 @@ public: return Builder.CreateOr(Ops.LHS, Ops.RHS, "or"); } + // Helper functions for fixed point binary operations. + Value *EmitFixedPointBinOp(const BinOpInfo &Ops); + BinOpInfo EmitBinOps(const BinaryOperator *E); LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E, Value *(ScalarExprEmitter::*F)(const BinOpInfo &), @@ -1205,17 +1228,25 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, // TODO(leonardchan): When necessary, add another if statement checking for // conversions to fixed point types from other types. if (SrcType->isFixedPointType()) { - if (DstType->isFixedPointType()) { - return EmitFixedPointConversion(Src, SrcType, DstType, Loc); - } else if (DstType->isBooleanType()) { + if (DstType->isBooleanType()) + // It is important that we check this before checking if the dest type is + // an integer because booleans are technically integer types. // We do not need to check the padding bit on unsigned types if unsigned // padding is enabled because overflow into this bit is undefined // behavior. return Builder.CreateIsNotNull(Src, "tobool"); - } + if (DstType->isFixedPointType() || DstType->isIntegerType()) + return EmitFixedPointConversion(Src, SrcType, DstType, Loc); llvm_unreachable( - "Unhandled scalar conversion involving a fixed point type."); + "Unhandled scalar conversion from a fixed point type to another type."); + } else if (DstType->isFixedPointType()) { + if (SrcType->isIntegerType()) + // This also includes converting booleans and enums to fixed point types. + return EmitFixedPointConversion(Src, SrcType, DstType, Loc); + + llvm_unreachable( + "Unhandled scalar conversion to a fixed point type from another type."); } QualType NoncanonicalSrcType = SrcType; @@ -1423,17 +1454,21 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc) { - using llvm::APInt; - using llvm::ConstantInt; - using llvm::Value; - - assert(SrcTy->isFixedPointType()); - assert(DstTy->isFixedPointType()); - FixedPointSemantics SrcFPSema = CGF.getContext().getFixedPointSemantics(SrcTy); FixedPointSemantics DstFPSema = CGF.getContext().getFixedPointSemantics(DstTy); + return EmitFixedPointConversion(Src, SrcFPSema, DstFPSema, Loc, + DstTy->isIntegerType()); +} + +Value *ScalarExprEmitter::EmitFixedPointConversion( + Value *Src, FixedPointSemantics &SrcFPSema, FixedPointSemantics &DstFPSema, + SourceLocation Loc, bool DstIsInteger) { + using llvm::APInt; + using llvm::ConstantInt; + using llvm::Value; + unsigned SrcWidth = SrcFPSema.getWidth(); unsigned DstWidth = DstFPSema.getWidth(); unsigned SrcScale = SrcFPSema.getScale(); @@ -1446,13 +1481,26 @@ Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, Value *Result = Src; unsigned ResultWidth = SrcWidth; - if (!DstFPSema.isSaturated()) { - // Downscale. - if (DstScale < SrcScale) - Result = SrcIsSigned ? - Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") : - Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); + // Downscale. + if (DstScale < SrcScale) { + // When converting to integers, we round towards zero. For negative numbers, + // right shifting rounds towards negative infinity. In this case, we can + // just round up before shifting. + if (DstIsInteger && SrcIsSigned) { + Value *Zero = llvm::Constant::getNullValue(Result->getType()); + Value *IsNegative = Builder.CreateICmpSLT(Result, Zero); + Value *LowBits = ConstantInt::get( + CGF.getLLVMContext(), APInt::getLowBitsSet(ResultWidth, SrcScale)); + Value *Rounded = Builder.CreateAdd(Result, LowBits); + Result = Builder.CreateSelect(IsNegative, Rounded, Result); + } + + Result = SrcIsSigned + ? Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") + : Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); + } + if (!DstFPSema.isSaturated()) { // Resize. Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); @@ -1462,14 +1510,11 @@ Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, } else { // Adjust the number of fractional bits. if (DstScale > SrcScale) { - ResultWidth = SrcWidth + DstScale - SrcScale; + // Compare to DstWidth to prevent resizing twice. + ResultWidth = std::max(SrcWidth + DstScale - SrcScale, DstWidth); llvm::Type *UpscaledTy = Builder.getIntNTy(ResultWidth); Result = Builder.CreateIntCast(Result, UpscaledTy, SrcIsSigned, "resize"); Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale"); - } else if (DstScale < SrcScale) { - Result = SrcIsSigned ? - Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") : - Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); } // Handle saturation. @@ -1493,7 +1538,8 @@ Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, } // Resize the integer part to get the final destination size. - Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); + if (ResultWidth != DstWidth) + Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); } return Result; } @@ -2017,6 +2063,12 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } } + // Update heapallocsite metadata when there is an explicit cast. + if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(Src)) + if (CI->getMetadata("heapallocsite") && isa<ExplicitCastExpr>(CE)) + CGF.getDebugInfo()-> + addHeapAllocSiteMetadata(CI, CE->getType(), CE->getExprLoc()); + return Builder.CreateBitCast(Src, DstTy); } case CK_AddressSpaceConversion: { @@ -2200,6 +2252,21 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return EmitScalarConversion(Visit(E), E->getType(), DestTy, CE->getExprLoc()); + case CK_FixedPointToIntegral: + assert(E->getType()->isFixedPointType() && + "Expected src type to be fixed point type"); + assert(DestTy->isIntegerType() && "Expected dest type to be an integer"); + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc()); + + case CK_IntegralToFixedPoint: + assert(E->getType()->isIntegerType() && + "Expected src type to be an integer"); + assert(DestTy->isFixedPointType() && + "Expected dest type to be fixed point type"); + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc()); + case CK_IntegralCast: { ScalarConversionOpts Opts; if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) { @@ -2527,14 +2594,14 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } if (atomicPHI) { - llvm::BasicBlock *opBB = Builder.GetInsertBlock(); + llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn); auto Pair = CGF.EmitAtomicCompareExchange( LV, RValue::get(atomicPHI), RValue::get(value), E->getExprLoc()); llvm::Value *old = CGF.EmitToMemory(Pair.first.getScalarVal(), type); llvm::Value *success = Pair.second; - atomicPHI->addIncoming(old, opBB); - Builder.CreateCondBr(success, contBB, opBB); + atomicPHI->addIncoming(old, curBlock); + Builder.CreateCondBr(success, contBB, atomicPHI->getParent()); Builder.SetInsertPoint(contBB); return isPre ? value : input; } @@ -2881,14 +2948,14 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( Loc, ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { - llvm::BasicBlock *opBB = Builder.GetInsertBlock(); + llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn); auto Pair = CGF.EmitAtomicCompareExchange( LHSLV, RValue::get(atomicPHI), RValue::get(Result), E->getExprLoc()); llvm::Value *old = CGF.EmitToMemory(Pair.first.getScalarVal(), LHSTy); llvm::Value *success = Pair.second; - atomicPHI->addIncoming(old, opBB); - Builder.CreateCondBr(success, contBB, opBB); + atomicPHI->addIncoming(old, curBlock); + Builder.CreateCondBr(success, contBB, atomicPHI->getParent()); Builder.SetInsertPoint(contBB); return LHSLV; } @@ -3090,7 +3157,8 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) { llvm::Type *argTypes[] = { CGF.Int64Ty, CGF.Int64Ty, Int8Ty, Int8Ty }; llvm::FunctionType *handlerTy = llvm::FunctionType::get(CGF.Int64Ty, argTypes, true); - llvm::Value *handler = CGF.CGM.CreateRuntimeFunction(handlerTy, *handlerName); + llvm::FunctionCallee handler = + CGF.CGM.CreateRuntimeFunction(handlerTy, *handlerName); // Sign extend the args to 64-bit, so that we can use the same handler for // all types of overflow. @@ -3338,9 +3406,119 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { return propagateFMFlags(V, op); } + if (op.isFixedPointBinOp()) + return EmitFixedPointBinOp(op); + return Builder.CreateAdd(op.LHS, op.RHS, "add"); } +/// The resulting value must be calculated with exact precision, so the operands +/// may not be the same type. +Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) { + using llvm::APSInt; + using llvm::ConstantInt; + + const auto *BinOp = cast<BinaryOperator>(op.E); + + // The result is a fixed point type and at least one of the operands is fixed + // point while the other is either fixed point or an int. This resulting type + // should be determined by Sema::handleFixedPointConversions(). + QualType ResultTy = op.Ty; + QualType LHSTy = BinOp->getLHS()->getType(); + QualType RHSTy = BinOp->getRHS()->getType(); + ASTContext &Ctx = CGF.getContext(); + Value *LHS = op.LHS; + Value *RHS = op.RHS; + + auto LHSFixedSema = Ctx.getFixedPointSemantics(LHSTy); + auto RHSFixedSema = Ctx.getFixedPointSemantics(RHSTy); + auto ResultFixedSema = Ctx.getFixedPointSemantics(ResultTy); + auto CommonFixedSema = LHSFixedSema.getCommonSemantics(RHSFixedSema); + + // Convert the operands to the full precision type. + Value *FullLHS = EmitFixedPointConversion(LHS, LHSFixedSema, CommonFixedSema, + BinOp->getExprLoc()); + Value *FullRHS = EmitFixedPointConversion(RHS, RHSFixedSema, CommonFixedSema, + BinOp->getExprLoc()); + + // Perform the actual addition. + Value *Result; + switch (BinOp->getOpcode()) { + case BO_Add: { + if (ResultFixedSema.isSaturated()) { + llvm::Intrinsic::ID IID = ResultFixedSema.isSigned() + ? llvm::Intrinsic::sadd_sat + : llvm::Intrinsic::uadd_sat; + Result = Builder.CreateBinaryIntrinsic(IID, FullLHS, FullRHS); + } else { + Result = Builder.CreateAdd(FullLHS, FullRHS); + } + break; + } + case BO_Sub: { + if (ResultFixedSema.isSaturated()) { + llvm::Intrinsic::ID IID = ResultFixedSema.isSigned() + ? llvm::Intrinsic::ssub_sat + : llvm::Intrinsic::usub_sat; + Result = Builder.CreateBinaryIntrinsic(IID, FullLHS, FullRHS); + } else { + Result = Builder.CreateSub(FullLHS, FullRHS); + } + break; + } + case BO_LT: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSLT(FullLHS, FullRHS) + : Builder.CreateICmpULT(FullLHS, FullRHS); + case BO_GT: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSGT(FullLHS, FullRHS) + : Builder.CreateICmpUGT(FullLHS, FullRHS); + case BO_LE: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSLE(FullLHS, FullRHS) + : Builder.CreateICmpULE(FullLHS, FullRHS); + case BO_GE: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSGE(FullLHS, FullRHS) + : Builder.CreateICmpUGE(FullLHS, FullRHS); + case BO_EQ: + // For equality operations, we assume any padding bits on unsigned types are + // zero'd out. They could be overwritten through non-saturating operations + // that cause overflow, but this leads to undefined behavior. + return Builder.CreateICmpEQ(FullLHS, FullRHS); + case BO_NE: + return Builder.CreateICmpNE(FullLHS, FullRHS); + case BO_Mul: + case BO_Div: + case BO_Shl: + case BO_Shr: + case BO_Cmp: + case BO_LAnd: + case BO_LOr: + case BO_MulAssign: + case BO_DivAssign: + case BO_AddAssign: + case BO_SubAssign: + case BO_ShlAssign: + case BO_ShrAssign: + llvm_unreachable("Found unimplemented fixed point binary operation"); + case BO_PtrMemD: + case BO_PtrMemI: + case BO_Rem: + case BO_Xor: + case BO_And: + case BO_Or: + case BO_Assign: + case BO_RemAssign: + case BO_AndAssign: + case BO_XorAssign: + case BO_OrAssign: + case BO_Comma: + llvm_unreachable("Found unsupported binary operation for fixed point types."); + } + + // Convert to the result type. + return EmitFixedPointConversion(Result, CommonFixedSema, ResultFixedSema, + BinOp->getExprLoc()); +} + Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { // The LHS is always a pointer if either side is. if (!op.LHS->getType()->isPointerTy()) { @@ -3372,6 +3550,9 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { return propagateFMFlags(V, op); } + if (op.isFixedPointBinOp()) + return EmitFixedPointBinOp(op); + return Builder.CreateSub(op.LHS, op.RHS, "sub"); } @@ -3591,8 +3772,9 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, Result = CGF.CGM.getCXXABI().EmitMemberPointerComparison( CGF, LHS, RHS, MPT, E->getOpcode() == BO_NE); } else if (!LHSTy->isAnyComplexType() && !RHSTy->isAnyComplexType()) { - Value *LHS = Visit(E->getLHS()); - Value *RHS = Visit(E->getRHS()); + BinOpInfo BOInfo = EmitBinOps(E); + Value *LHS = BOInfo.LHS; + Value *RHS = BOInfo.RHS; // If AltiVec, the comparison results in a numeric type, so we use // intrinsics comparing vectors and giving 0 or 1 as a result @@ -3670,7 +3852,9 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, E->getExprLoc()); } - if (LHS->getType()->isFPOrFPVectorTy()) { + if (BOInfo.isFixedPointBinOp()) { + Result = EmitFixedPointBinOp(BOInfo); + } else if (LHS->getType()->isFPOrFPVectorTy()) { Result = Builder.CreateFCmp(FCmpOpc, LHS, RHS, "cmp"); } else if (LHSTy->hasSignedIntegerRepresentation()) { Result = Builder.CreateICmp(SICmpOpc, LHS, RHS, "cmp"); diff --git a/lib/CodeGen/CGGPUBuiltin.cpp b/lib/CodeGen/CGGPUBuiltin.cpp index b5375ffb8d..d7e2676307 100644 --- a/lib/CodeGen/CGGPUBuiltin.cpp +++ b/lib/CodeGen/CGGPUBuiltin.cpp @@ -1,9 +1,8 @@ //===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp index fd0a9c773a..b2bc42bfa0 100644 --- a/lib/CodeGen/CGLoopInfo.cpp +++ b/lib/CodeGen/CGLoopInfo.cpp @@ -1,9 +1,8 @@ //===---- CGLoopInfo.cpp - LLVM CodeGen for loop metadata -*- C++ -*-------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -19,138 +18,396 @@ using namespace clang::CodeGen; using namespace llvm; -static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc, MDNode *&AccGroup) { +MDNode * +LoopInfo::createLoopPropertiesMetadata(ArrayRef<Metadata *> LoopProperties) { + LLVMContext &Ctx = Header->getContext(); + SmallVector<Metadata *, 4> NewLoopProperties; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + NewLoopProperties.push_back(TempNode.get()); + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && - Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && - Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && - Attrs.PipelineInitiationInterval == 0 && - Attrs.VectorizeEnable == LoopAttributes::Unspecified && - Attrs.UnrollEnable == LoopAttributes::Unspecified && - Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && - Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && - !EndLoc) - return nullptr; + MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties); + LoopID->replaceOperandWith(0, LoopID); + return LoopID; +} + +MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.PipelineDisabled) + Enabled = false; + else if (Attrs.PipelineInitiationInterval != 0) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.pipeline.disable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 1))})); + LoopProperties = NewLoopProperties; + } + return createLoopPropertiesMetadata(LoopProperties); + } SmallVector<Metadata *, 4> Args; - // Reserve operand 0 for loop id self reference. - auto TempNode = MDNode::getTemporary(Ctx, None); + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); - // If we have a valid start debug location for the loop, add it. - if (StartLoc) { - Args.push_back(StartLoc.getAsMDNode()); - - // If we also have a valid end debug location for the loop, add it. - if (EndLoc) - Args.push_back(EndLoc.getAsMDNode()); - } - - // Setting vectorize.width - if (Attrs.VectorizeWidth > 0) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.VectorizeWidth))}; + if (Attrs.PipelineInitiationInterval > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), Attrs.PipelineInitiationInterval))}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting interleave.count - if (Attrs.InterleaveCount > 0) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.interleave.count"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.InterleaveCount))}; - Args.push_back(MDNode::get(Ctx, Vals)); + // No follow-up: This is the last transformation. + + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode * +LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.UnrollEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.UnrollEnable == LoopAttributes::Full) + Enabled = None; + else if (Attrs.UnrollEnable != LoopAttributes::Unspecified || + Attrs.UnrollCount != 0) + Enabled = true; + + if (Enabled != true) { + // createFullUnrollMetadata will already have added llvm.loop.unroll.disable + // if unrolling is disabled. + return createPipeliningMetadata(Attrs, LoopProperties, HasUserTransforms); } + SmallVector<Metadata *, 4> FollowupLoopProperties; + + // Apply all loop properties to the unrolled loop. + FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + + // Don't unroll an already unrolled loop. + FollowupLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable"))); + + bool FollowupHasTransforms = false; + MDNode *Followup = createPipeliningMetadata(Attrs, FollowupLoopProperties, + FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + // Setting unroll.count if (Attrs.UnrollCount > 0) { Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.count"), ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.UnrollCount))}; + llvm::Type::getInt32Ty(Ctx), Attrs.UnrollCount))}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting unroll_and_jam.count - if (Attrs.UnrollAndJamCount > 0) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll_and_jam.count"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.UnrollAndJamCount))}; + // Setting unroll.full or unroll.disable + if (Attrs.UnrollEnable == LoopAttributes::Enable) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.enable")}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting vectorize.enable - if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.enable"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt1Ty(Ctx), (Attrs.VectorizeEnable == - LoopAttributes::Enable)))}; - Args.push_back(MDNode::get(Ctx, Vals)); - } + if (FollowupHasTransforms) + Args.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.unroll.followup_all"), Followup})); - // Setting unroll.full or unroll.disable - if (Attrs.UnrollEnable != LoopAttributes::Unspecified) { - std::string Name; - if (Attrs.UnrollEnable == LoopAttributes::Enable) - Name = "llvm.loop.unroll.enable"; - else if (Attrs.UnrollEnable == LoopAttributes::Full) - Name = "llvm.loop.unroll.full"; - else - Name = "llvm.loop.unroll.disable"; - Metadata *Vals[] = {MDString::get(Ctx, Name)}; - Args.push_back(MDNode::get(Ctx, Vals)); + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode * +LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.UnrollAndJamEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable || + Attrs.UnrollAndJamCount != 0) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back(MDNode::get( + Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable"))); + LoopProperties = NewLoopProperties; + } + return createPartialUnrollMetadata(Attrs, LoopProperties, + HasUserTransforms); } - // Setting unroll_and_jam.full or unroll_and_jam.disable - if (Attrs.UnrollAndJamEnable != LoopAttributes::Unspecified) { - std::string Name; - if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable) - Name = "llvm.loop.unroll_and_jam.enable"; - else if (Attrs.UnrollAndJamEnable == LoopAttributes::Full) - Name = "llvm.loop.unroll_and_jam.full"; - else - Name = "llvm.loop.unroll_and_jam.disable"; - Metadata *Vals[] = {MDString::get(Ctx, Name)}; + SmallVector<Metadata *, 4> FollowupLoopProperties; + FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + FollowupLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable"))); + + bool FollowupHasTransforms = false; + MDNode *Followup = createPartialUnrollMetadata(Attrs, FollowupLoopProperties, + FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + + // Setting unroll_and_jam.count + if (Attrs.UnrollAndJamCount > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.unroll_and_jam.count"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.UnrollAndJamCount))}; Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.DistributeEnable != LoopAttributes::Unspecified) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt1Ty(Ctx), (Attrs.DistributeEnable == - LoopAttributes::Enable)))}; + if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll_and_jam.enable")}; Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.IsParallel) { - AccGroup = MDNode::getDistinct(Ctx, {}); + if (FollowupHasTransforms) Args.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup})); + Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_outer"), + Followup})); + + if (UnrollAndJamInnerFollowup) + Args.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_inner"), + UnrollAndJamInnerFollowup})); + + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode * +LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.VectorizeEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || + Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 0))})); + LoopProperties = NewLoopProperties; + } + return createUnrollAndJamMetadata(Attrs, LoopProperties, HasUserTransforms); + } + + // Apply all loop properties to the vectorized loop. + SmallVector<Metadata *, 4> FollowupLoopProperties; + FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + + // Don't vectorize an already vectorized loop. + FollowupLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); + + bool FollowupHasTransforms = false; + MDNode *Followup = createUnrollAndJamMetadata(Attrs, FollowupLoopProperties, + FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + + // Setting vectorize.width + if (Attrs.VectorizeWidth > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.vectorize.width"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.VectorizeWidth))}; + Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.PipelineDisabled) { + // Setting interleave.count + if (Attrs.InterleaveCount > 0) { Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.pipeline.disable"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt1Ty(Ctx), (Attrs.PipelineDisabled == true)))}; + MDString::get(Ctx, "llvm.loop.interleave.count"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.InterleaveCount))}; Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.PipelineInitiationInterval > 0) { + // Setting vectorize.enable + if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) { Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"), + MDString::get(Ctx, "llvm.loop.vectorize.enable"), ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.PipelineInitiationInterval))}; + llvm::Type::getInt1Ty(Ctx), + (Attrs.VectorizeEnable == LoopAttributes::Enable)))}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Set the first operand to itself. + if (FollowupHasTransforms) + Args.push_back(MDNode::get( + Ctx, + {MDString::get(Ctx, "llvm.loop.vectorize.followup_all"), Followup})); + MDNode *LoopID = MDNode::get(Ctx, Args); LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; return LoopID; } +MDNode * +LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.DistributeEnable == LoopAttributes::Disable) + Enabled = false; + if (Attrs.DistributeEnable == LoopAttributes::Enable) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.distribute.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 0))})); + LoopProperties = NewLoopProperties; + } + return createLoopVectorizeMetadata(Attrs, LoopProperties, + HasUserTransforms); + } + + bool FollowupHasTransforms = false; + MDNode *Followup = + createLoopVectorizeMetadata(Attrs, LoopProperties, FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), + (Attrs.DistributeEnable == LoopAttributes::Enable)))}; + Args.push_back(MDNode::get(Ctx, Vals)); + + if (FollowupHasTransforms) + Args.push_back(MDNode::get( + Ctx, + {MDString::get(Ctx, "llvm.loop.distribute.followup_all"), Followup})); + + MDNode *LoopID = MDNode::get(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.UnrollEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.UnrollEnable == LoopAttributes::Full) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable"))); + LoopProperties = NewLoopProperties; + } + return createLoopDistributeMetadata(Attrs, LoopProperties, + HasUserTransforms); + } + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + Args.push_back(MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))); + + // No follow-up: there is no loop after full unrolling. + // TODO: Warn if there are transformations after full unrolling. + + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode *LoopInfo::createMetadata( + const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> AdditionalLoopProperties, + bool &HasUserTransforms) { + SmallVector<Metadata *, 3> LoopProperties; + + // If we have a valid start debug location for the loop, add it. + if (StartLoc) { + LoopProperties.push_back(StartLoc.getAsMDNode()); + + // If we also have a valid end debug location for the loop, add it. + if (EndLoc) + LoopProperties.push_back(EndLoc.getAsMDNode()); + } + + assert(!!AccGroup == Attrs.IsParallel && + "There must be an access group iff the loop is parallel"); + if (Attrs.IsParallel) { + LLVMContext &Ctx = Header->getContext(); + LoopProperties.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup})); + } + + LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(), + AdditionalLoopProperties.end()); + return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms); +} + LoopAttributes::LoopAttributes(bool IsParallel) : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), @@ -174,15 +431,114 @@ void LoopAttributes::clear() { } LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc) - : LoopID(nullptr), Header(Header), Attrs(Attrs) { - LoopID = - createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc, AccGroup); + const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, + LoopInfo *Parent) + : Header(Header), Attrs(Attrs), StartLoc(StartLoc), EndLoc(EndLoc), + Parent(Parent) { + + if (Attrs.IsParallel) { + // Create an access group for this loop. + LLVMContext &Ctx = Header->getContext(); + AccGroup = MDNode::getDistinct(Ctx, {}); + } + + if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && + Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && + Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && + Attrs.PipelineInitiationInterval == 0 && + Attrs.VectorizeEnable == LoopAttributes::Unspecified && + Attrs.UnrollEnable == LoopAttributes::Unspecified && + Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && + Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && + !EndLoc) + return; + + TempLoopID = MDNode::getTemporary(Header->getContext(), None); +} + +void LoopInfo::finish() { + // We did not annotate the loop body instructions because there are no + // attributes for this loop. + if (!TempLoopID) + return; + + MDNode *LoopID; + LoopAttributes CurLoopAttr = Attrs; + LLVMContext &Ctx = Header->getContext(); + + if (Parent && (Parent->Attrs.UnrollAndJamEnable || + Parent->Attrs.UnrollAndJamCount != 0)) { + // Parent unroll-and-jams this loop. + // Split the transformations in those that happens before the unroll-and-jam + // and those after. + + LoopAttributes BeforeJam, AfterJam; + + BeforeJam.IsParallel = AfterJam.IsParallel = Attrs.IsParallel; + + BeforeJam.VectorizeWidth = Attrs.VectorizeWidth; + BeforeJam.InterleaveCount = Attrs.InterleaveCount; + BeforeJam.VectorizeEnable = Attrs.VectorizeEnable; + BeforeJam.DistributeEnable = Attrs.DistributeEnable; + + switch (Attrs.UnrollEnable) { + case LoopAttributes::Unspecified: + case LoopAttributes::Disable: + BeforeJam.UnrollEnable = Attrs.UnrollEnable; + AfterJam.UnrollEnable = Attrs.UnrollEnable; + break; + case LoopAttributes::Full: + BeforeJam.UnrollEnable = LoopAttributes::Full; + break; + case LoopAttributes::Enable: + AfterJam.UnrollEnable = LoopAttributes::Enable; + break; + } + + AfterJam.UnrollCount = Attrs.UnrollCount; + AfterJam.PipelineDisabled = Attrs.PipelineDisabled; + AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval; + + // If this loop is subject of an unroll-and-jam by the parent loop, and has + // an unroll-and-jam annotation itself, we have to decide whether to first + // apply the parent's unroll-and-jam or this loop's unroll-and-jam. The + // UnrollAndJam pass processes loops from inner to outer, so we apply the + // inner first. + BeforeJam.UnrollAndJamCount = Attrs.UnrollAndJamCount; + BeforeJam.UnrollAndJamEnable = Attrs.UnrollAndJamEnable; + + // Set the inner followup metadata to process by the outer loop. Only + // consider the first inner loop. + if (!Parent->UnrollAndJamInnerFollowup) { + // Splitting the attributes into a BeforeJam and an AfterJam part will + // stop 'llvm.loop.isvectorized' (generated by vectorization in BeforeJam) + // to be forwarded to the AfterJam part. We detect the situation here and + // add it manually. + SmallVector<Metadata *, 1> BeforeLoopProperties; + if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified || + BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0) + BeforeLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); + + bool InnerFollowupHasTransform = false; + MDNode *InnerFollowup = createMetadata(AfterJam, BeforeLoopProperties, + InnerFollowupHasTransform); + if (InnerFollowupHasTransform) + Parent->UnrollAndJamInnerFollowup = InnerFollowup; + } + + CurLoopAttr = BeforeJam; + } + + bool HasUserTransforms = false; + LoopID = createMetadata(CurLoopAttr, {}, HasUserTransforms); + TempLoopID->replaceAllUsesWith(LoopID); } void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc) { - Active.push_back(LoopInfo(Header, StagedAttrs, StartLoc, EndLoc)); + Active.push_back(LoopInfo(Header, StagedAttrs, StartLoc, EndLoc, + Active.empty() ? nullptr : &Active.back())); // Clear the attributes so nested loops do not inherit them. StagedAttrs.clear(); } @@ -209,13 +565,13 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, // Translate opencl_unroll_hint attribute argument to // equivalent LoopHintAttr enums. // OpenCL v2.0 s6.11.5: - // 0 - full unroll (no argument). + // 0 - enable unroll (no argument). // 1 - disable unroll. // other positive integer n - unroll by n. if (OpenCLHint) { ValueInt = OpenCLHint->getUnrollHint(); if (ValueInt == 0) { - State = LoopHintAttr::Full; + State = LoopHintAttr::Enable; } else if (ValueInt != 1) { Option = LoopHintAttr::UnrollCount; State = LoopHintAttr::Numeric; @@ -365,6 +721,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, void LoopInfoStack::pop() { assert(!Active.empty() && "No active loops to pop"); + Active.back().finish(); Active.pop_back(); } diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h index 84ba03bfb0..35d0e00527 100644 --- a/lib/CodeGen/CGLoopInfo.h +++ b/lib/CodeGen/CGLoopInfo.h @@ -1,9 +1,8 @@ //===---- CGLoopInfo.h - LLVM CodeGen for loop metadata -*- C++ -*---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -79,10 +78,11 @@ class LoopInfo { public: /// Construct a new LoopInfo for the loop with entry Header. LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc); + const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, + LoopInfo *Parent); /// Get the loop id metadata for this loop. - llvm::MDNode *getLoopID() const { return LoopID; } + llvm::MDNode *getLoopID() const { return TempLoopID.get(); } /// Get the header block of this loop. llvm::BasicBlock *getHeader() const { return Header; } @@ -93,15 +93,92 @@ public: /// Return this loop's access group or nullptr if it does not have one. llvm::MDNode *getAccessGroup() const { return AccGroup; } + /// Create the loop's metadata. Must be called after its nested loops have + /// been processed. + void finish(); + private: /// Loop ID metadata. - llvm::MDNode *LoopID; + llvm::TempMDTuple TempLoopID; /// Header block of this loop. llvm::BasicBlock *Header; /// The attributes for this loop. LoopAttributes Attrs; /// The access group for memory accesses parallel to this loop. llvm::MDNode *AccGroup = nullptr; + /// Start location of this loop. + llvm::DebugLoc StartLoc; + /// End location of this loop. + llvm::DebugLoc EndLoc; + /// The next outer loop, or nullptr if this is the outermost loop. + LoopInfo *Parent; + /// If this loop has unroll-and-jam metadata, this can be set by the inner + /// loop's LoopInfo to set the llvm.loop.unroll_and_jam.followup_inner + /// metadata. + llvm::MDNode *UnrollAndJamInnerFollowup = nullptr; + + /// Create a LoopID without any transformations. + llvm::MDNode * + createLoopPropertiesMetadata(llvm::ArrayRef<llvm::Metadata *> LoopProperties); + + /// Create a LoopID for transformations. + /// + /// The methods call each other in case multiple transformations are applied + /// to a loop. The transformation first to be applied will use LoopID of the + /// next transformation in its followup attribute. + /// + /// @param Attrs The loop's transformations. + /// @param LoopProperties Non-transformation properties such as debug + /// location, parallel accesses and disabled + /// transformations. These are added to the returned + /// LoopID. + /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes + /// at least one transformation. + /// + /// @return A LoopID (metadata node) that can be used for the llvm.loop + /// annotation or followup-attribute. + /// @{ + llvm::MDNode * + createPipeliningMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createPartialUnrollMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createUnrollAndJamMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createLoopVectorizeMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createLoopDistributeMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createFullUnrollMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + /// @} + + /// Create a LoopID for this loop, including transformation-unspecific + /// metadata such as debug location. + /// + /// @param Attrs This loop's attributes and transformations. + /// @param LoopProperties Additional non-transformation properties to add + /// to the LoopID, such as transformation-specific + /// metadata that are not covered by @p Attrs. + /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes + /// at least one transformation. + /// + /// @return A LoopID (metadata node) that can be used for the llvm.loop + /// annotation. + llvm::MDNode *createMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); }; /// A stack of loop information corresponding to loop nesting levels. diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp index c6a96a9126..caf62d2ac9 100644 --- a/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/lib/CodeGen/CGNonTrivialStruct.cpp @@ -1,9 +1,8 @@ //===--- CGNonTrivialStruct.cpp - Emit Special Functions for C Structs ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,6 +14,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/NonTrivialTypeVisitor.h" +#include "clang/CodeGen/CodeGenABITypes.h" #include "llvm/Support/ScopedPrinter.h" #include <array> @@ -84,23 +84,22 @@ struct CopyStructVisitor : StructVisitor<Derived>, template <class... Ts> void preVisit(QualType::PrimitiveCopyKind PCK, QualType FT, - const FieldDecl *FD, CharUnits CurStructOffsset, - Ts &&... Args) { + const FieldDecl *FD, CharUnits CurStructOffset, Ts &&... Args) { if (PCK) asDerived().flushTrivialFields(std::forward<Ts>(Args)...); } template <class... Ts> void visitWithKind(QualType::PrimitiveCopyKind PCK, QualType FT, - const FieldDecl *FD, CharUnits CurStructOffsset, + const FieldDecl *FD, CharUnits CurStructOffset, Ts &&... Args) { if (const auto *AT = asDerived().getContext().getAsArrayType(FT)) { asDerived().visitArray(PCK, AT, FT.isVolatileQualified(), FD, - CurStructOffsset, std::forward<Ts>(Args)...); + CurStructOffset, std::forward<Ts>(Args)...); return; } - Super::visitWithKind(PCK, FT, FD, CurStructOffsset, + Super::visitWithKind(PCK, FT, FD, CurStructOffset, std::forward<Ts>(Args)...); } @@ -140,8 +139,8 @@ struct CopyStructVisitor : StructVisitor<Derived>, // <alignment-info> ::= <dst-alignment> ["_" <src-alignment>] // <struct-field-info> ::= <field-info>+ // <field-info> ::= <struct-or-scalar-field-info> | <array-field-info> -// <struct-or-scalar-field-info> ::= <struct-field-info> | <strong-field-info> | -// <trivial-field-info> +// <struct-or-scalar-field-info> ::= "_S" <struct-field-info> | +// <strong-field-info> | <trivial-field-info> // <array-field-info> ::= "_AB" <array-offset> "s" <element-size> "n" // <num-elements> <innermost-element-info> "_AE" // <innermost-element-info> ::= <struct-or-scalar-field-info> @@ -176,6 +175,7 @@ template <class Derived> struct GenFuncNameBase { void visitStruct(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset) { CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD); + appendStr("_S"); asDerived().visitStructFields(QT, FieldOffset); } @@ -253,11 +253,11 @@ struct GenBinaryFuncName : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>, } void visitVolatileTrivial(QualType FT, const FieldDecl *FD, - CharUnits CurStackOffset) { + CharUnits CurStructOffset) { // Because volatile fields can be bit-fields and are individually copied, // their offset and width are in bits. uint64_t OffsetInBits = - this->Ctx.toBits(CurStackOffset) + this->getFieldOffsetInBits(FD); + this->Ctx.toBits(CurStructOffset) + this->getFieldOffsetInBits(FD); this->appendStr("_tv" + llvm::to_string(OffsetInBits) + "w" + llvm::to_string(getFieldSize(FD, FT, this->Ctx))); } @@ -286,8 +286,7 @@ struct GenDestructorFuncName : GenUnaryFuncName<GenDestructorFuncName>, using Super = DestructedTypeVisitor<GenDestructorFuncName>; GenDestructorFuncName(const char *Prefix, CharUnits DstAlignment, ASTContext &Ctx) - : GenUnaryFuncName<GenDestructorFuncName>(Prefix, DstAlignment, - Ctx) {} + : GenUnaryFuncName<GenDestructorFuncName>(Prefix, DstAlignment, Ctx) {} void visitWithKind(QualType::DestructionKind DK, QualType FT, const FieldDecl *FD, CharUnits CurStructOffset) { if (const auto *AT = getContext().getAsArrayType(FT)) { @@ -322,19 +321,19 @@ static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM, // functions. template <class Derived> struct GenFuncBase { template <size_t N> - void visitStruct(QualType FT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitStruct(QualType FT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, N> Addrs) { this->asDerived().callSpecialFunction( - FT, CurStackOffset + asDerived().getFieldOffset(FD), Addrs); + FT, CurStructOffset + asDerived().getFieldOffset(FD), Addrs); } template <class FieldKind, size_t N> void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile, - const FieldDecl *FD, CharUnits CurStackOffset, + const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, N> Addrs) { // Non-volatile trivial fields are copied when flushTrivialFields is called. if (!FK) - return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset, + return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset, Addrs); asDerived().flushTrivialFields(Addrs); @@ -345,7 +344,7 @@ template <class Derived> struct GenFuncBase { QualType BaseEltQT; std::array<Address, N> StartAddrs = Addrs; for (unsigned I = 0; I < N; ++I) - StartAddrs[I] = getAddrWithOffset(Addrs[I], CurStackOffset, FD); + StartAddrs[I] = getAddrWithOffset(Addrs[I], CurStructOffset, FD); Address DstAddr = StartAddrs[DstIdx]; llvm::Value *NumElts = CGF.emitArrayLength(AT, BaseEltQT, DstAddr); unsigned BaseEltSize = Ctx.getTypeSizeInChars(BaseEltQT).getQuantity(); @@ -414,8 +413,7 @@ template <class Derived> struct GenFuncBase { if (Offset.getQuantity() == 0) return Addr; Addr = CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrTy); - Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity(), - CharUnits::One()); + Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity()); return CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrPtrTy); } @@ -586,15 +584,15 @@ struct GenDestructor : StructVisitor<GenDestructor>, } void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 1> Addrs) { + CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->destroyARCStrongImprecise( - *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + *CGF, getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->destroyARCWeak( - *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + *CGF, getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } void callSpecialFunction(QualType FT, CharUnits Offset, @@ -627,35 +625,35 @@ struct GenDefaultInitialize } void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 1> Addrs) { + CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->EmitNullInitialization( - getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->EmitNullInitialization( - getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } template <class FieldKind, size_t... Is> void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile, - const FieldDecl *FD, CharUnits CurStackOffset, + const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 1> Addrs) { if (!FK) - return visitTrivial(QualType(AT, 0), FD, CurStackOffset, Addrs); + return visitTrivial(QualType(AT, 0), FD, CurStructOffset, Addrs); ASTContext &Ctx = getContext(); CharUnits Size = Ctx.getTypeSizeInChars(QualType(AT, 0)); QualType EltTy = Ctx.getBaseElementType(QualType(AT, 0)); if (Size < CharUnits::fromQuantity(16) || EltTy->getAs<RecordType>()) { - GenFuncBaseTy::visitArray(FK, AT, IsVolatile, FD, CurStackOffset, Addrs); + GenFuncBaseTy::visitArray(FK, AT, IsVolatile, FD, CurStructOffset, Addrs); return; } llvm::Constant *SizeVal = CGF->Builder.getInt64(Size.getQuantity()); - Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); Address Loc = CGF->Builder.CreateElementBitCast(DstAddr, CGF->Int8Ty); CGF->Builder.CreateMemSet(Loc, CGF->Builder.getInt8(0), SizeVal, IsVolatile); @@ -673,24 +671,26 @@ struct GenCopyConstructor : GenBinaryFunc<GenCopyConstructor, false> { : GenBinaryFunc<GenCopyConstructor, false>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); llvm::Value *SrcVal = CGF->EmitLoadOfScalar( Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation()); llvm::Value *Val = CGF->EmitARCRetain(QT, SrcVal); CGF->EmitStoreOfScalar(Val, CGF->MakeAddrLValue(Addrs[DstIdx], QT), true); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->EmitARCCopyWeak(Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructCopyConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); } @@ -701,9 +701,9 @@ struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> { : GenBinaryFunc<GenMoveConstructor, true>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); llvm::Value *SrcVal = CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); @@ -712,15 +712,17 @@ struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> { /* isInitialization */ true); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->EmitARCMoveWeak(Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructMoveConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); } @@ -731,24 +733,26 @@ struct GenCopyAssignment : GenBinaryFunc<GenCopyAssignment, false> { : GenBinaryFunc<GenCopyAssignment, false>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); llvm::Value *SrcVal = CGF->EmitLoadOfScalar( Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation()); CGF->EmitARCStoreStrong(CGF->MakeAddrLValue(Addrs[DstIdx], QT), SrcVal, false); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->emitARCCopyAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructCopyAssignmentOperator( CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); @@ -760,9 +764,9 @@ struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> { : GenBinaryFunc<GenMoveAssignment, true>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); llvm::Value *SrcVal = CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); @@ -774,15 +778,17 @@ struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> { CGF->EmitARCRelease(DstVal, ARCImpreciseLifetime); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->emitARCMoveAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructMoveAssignmentOperator( CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); @@ -817,6 +823,29 @@ static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, Gen.callFunc(FuncName, QT, Addrs, CGF); } +template <size_t N> std::array<Address, N> createNullAddressArray(); + +template <> std::array<Address, 1> createNullAddressArray() { + return std::array<Address, 1>({{Address(nullptr, CharUnits::Zero())}}); +} + +template <> std::array<Address, 2> createNullAddressArray() { + return std::array<Address, 2>({{Address(nullptr, CharUnits::Zero()), + Address(nullptr, CharUnits::Zero())}}); +} + +template <class G, size_t N> +static llvm::Function * +getSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, bool IsVolatile, + std::array<CharUnits, N> Alignments, CodeGenModule &CGM) { + QT = IsVolatile ? QT.withVolatile() : QT; + // The following call requires an array of addresses as arguments, but doesn't + // actually use them (it overwrites them with the addresses of the arguments + // of the created function). + return Gen.getFunction(FuncName, QT, createNullAddressArray<N>(), Alignments, + CGM); +} + // Functions to emit calls to the special functions of a non-trivial C struct. void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) { bool IsVolatile = Dst.isVolatile(); @@ -828,18 +857,16 @@ void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) { IsVolatile, *this, std::array<Address, 1>({{DstPtr}})); } -std::string -CodeGenFunction::getNonTrivialCopyConstructorStr(QualType QT, - CharUnits Alignment, - bool IsVolatile, - ASTContext &Ctx) { +std::string CodeGenFunction::getNonTrivialCopyConstructorStr( + QualType QT, CharUnits Alignment, bool IsVolatile, ASTContext &Ctx) { GenBinaryFuncName<false> GenName("", Alignment, Alignment, Ctx); return GenName.getName(QT, IsVolatile); } -std::string -CodeGenFunction::getNonTrivialDestructorStr(QualType QT, CharUnits Alignment, - bool IsVolatile, ASTContext &Ctx) { +std::string CodeGenFunction::getNonTrivialDestructorStr(QualType QT, + CharUnits Alignment, + bool IsVolatile, + ASTContext &Ctx) { GenDestructorFuncName GenName("", Alignment, Ctx); return GenName.getName(QT, IsVolatile); } @@ -904,3 +931,69 @@ void CodeGenFunction::callCStructMoveAssignmentOperator(LValue Dst, LValue Src callSpecialFunction(GenMoveAssignment(getContext()), FuncName, QT, IsVolatile, *this, std::array<Address, 2>({{DstPtr, SrcPtr}})); } + +llvm::Function *clang::CodeGen::getNonTrivialCStructDefaultConstructor( + CodeGenModule &CGM, CharUnits DstAlignment, bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenDefaultInitializeFuncName GenName(DstAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction(GenDefaultInitialize(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 1>({{DstAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructCopyConstructor( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<false> GenName("__copy_constructor_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenCopyConstructor(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructMoveConstructor( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<true> GenName("__move_constructor_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenMoveConstructor(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructCopyAssignmentOperator( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<false> GenName("__copy_assignment_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenCopyAssignment(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructMoveAssignmentOperator( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<true> GenName("__move_assignment_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenMoveAssignment(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructDestructor( + CodeGenModule &CGM, CharUnits DstAlignment, bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenDestructorFuncName GenName("__destructor_", DstAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction(GenDestructor(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 1>({{DstAlignment}}), CGM); +} diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 9c66ff0e8f..d5906cf994 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -1,9 +1,8 @@ //===---- CGObjC.cpp - Emit LLVM Code for Objective-C ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,6 +14,7 @@ #include "CGObjCRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" @@ -22,7 +22,6 @@ #include "clang/Basic/Diagnostic.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" using namespace clang; @@ -62,7 +61,12 @@ CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) { // Get the method. const ObjCMethodDecl *BoxingMethod = E->getBoxingMethod(); const Expr *SubExpr = E->getSubExpr(); - assert(BoxingMethod && "BoxingMethod is null"); + + if (E->isExpressibleAsConstantInitializer()) { + ConstantEmitter ConstEmitter(CGM); + return ConstEmitter.tryEmitAbstract(E, E->getType()); + } + assert(BoxingMethod->isClassMethod() && "BoxingMethod must be a class method"); Selector Sel = BoxingMethod->getSelector(); @@ -160,9 +164,8 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, if (ALE) { // Emit the element and store it to the appropriate array slot. const Expr *Rhs = ALE->getElement(i); - LValue LV = MakeAddrLValue( - Builder.CreateConstArrayGEP(Objects, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + LValue LV = MakeAddrLValue(Builder.CreateConstArrayGEP(Objects, i), + ElementType, AlignmentSource::Decl); llvm::Value *value = EmitScalarExpr(Rhs); EmitStoreThroughLValue(RValue::get(value), LV, true); @@ -172,17 +175,15 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, } else { // Emit the key and store it to the appropriate array slot. const Expr *Key = DLE->getKeyValueElement(i).Key; - LValue KeyLV = MakeAddrLValue( - Builder.CreateConstArrayGEP(Keys, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + LValue KeyLV = MakeAddrLValue(Builder.CreateConstArrayGEP(Keys, i), + ElementType, AlignmentSource::Decl); llvm::Value *keyValue = EmitScalarExpr(Key); EmitStoreThroughLValue(RValue::get(keyValue), KeyLV, /*isInit=*/true); // Emit the value and store it to the appropriate array slot. const Expr *Value = DLE->getKeyValueElement(i).Value; - LValue ValueLV = MakeAddrLValue( - Builder.CreateConstArrayGEP(Objects, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + LValue ValueLV = MakeAddrLValue(Builder.CreateConstArrayGEP(Objects, i), + ElementType, AlignmentSource::Decl); llvm::Value *valueValue = EmitScalarExpr(Value); EmitStoreThroughLValue(RValue::get(valueValue), ValueLV, /*isInit=*/true); if (TrackNeededObjects) { @@ -427,6 +428,41 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, return None; } +/// Instead of '[[MyClass alloc] init]', try to generate +/// 'objc_alloc_init(MyClass)'. This provides a code size improvement on the +/// caller side, as well as the optimized objc_alloc. +static Optional<llvm::Value *> +tryEmitSpecializedAllocInit(CodeGenFunction &CGF, const ObjCMessageExpr *OME) { + auto &Runtime = CGF.getLangOpts().ObjCRuntime; + if (!Runtime.shouldUseRuntimeFunctionForCombinedAllocInit()) + return None; + + // Match the exact pattern '[[MyClass alloc] init]'. + Selector Sel = OME->getSelector(); + if (OME->getReceiverKind() != ObjCMessageExpr::Instance || + !OME->getType()->isObjCObjectPointerType() || !Sel.isUnarySelector() || + Sel.getNameForSlot(0) != "init") + return None; + + // Okay, this is '[receiver init]', check if 'receiver' is '[cls alloc]'. + auto *SubOME = + dyn_cast<ObjCMessageExpr>(OME->getInstanceReceiver()->IgnoreParens()); + if (!SubOME) + return None; + Selector SubSel = SubOME->getSelector(); + if (SubOME->getReceiverKind() != ObjCMessageExpr::Class || + !SubOME->getType()->isObjCObjectPointerType() || + !SubSel.isUnarySelector() || SubSel.getNameForSlot(0) != "alloc") + return None; + + QualType ReceiverType = SubOME->getClassReceiver(); + const ObjCObjectType *ObjTy = ReceiverType->getAs<ObjCObjectType>(); + const ObjCInterfaceDecl *ID = ObjTy->getInterface(); + assert(ID && "null interface should be impossible here"); + llvm::Value *Receiver = CGF.CGM.getObjCRuntime().GetClass(CGF, ID); + return CGF.EmitObjCAllocInit(Receiver, CGF.ConvertType(OME->getType())); +} + RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, ReturnValueSlot Return) { // Only the lookup mechanism and first two arguments of the method @@ -448,6 +484,9 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, } } + if (Optional<llvm::Value *> Val = tryEmitSpecializedAllocInit(*this, E)) + return AdjustObjCObjectType(*this, E->getType(), RValue::get(*Val)); + // We don't retain the receiver in delegate init calls, and this is // safe because the receiver value is always loaded from 'self', // which we zero out. We don't want to Block_copy block receivers, @@ -685,7 +724,7 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar, args.add(RValue::get(CGF.Builder.getInt1(isAtomic)), Context.BoolTy); args.add(RValue::get(CGF.Builder.getInt1(hasStrong)), Context.BoolTy); - llvm::Constant *fn = CGF.CGM.getObjCRuntime().GetGetStructFunction(); + llvm::FunctionCallee fn = CGF.CGM.getObjCRuntime().GetGetStructFunction(); CGCallee callee = CGCallee::forDirect(fn); CGF.EmitCall(CGF.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, args), callee, ReturnValueSlot(), args); @@ -949,8 +988,8 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF, // Third argument is the helper function. args.add(RValue::get(AtomicHelperFn), CGF.getContext().VoidPtrTy); - llvm::Constant *copyCppAtomicObjectFn = - CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction(); + llvm::FunctionCallee copyCppAtomicObjectFn = + CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction(); CGCallee callee = CGCallee::forDirect(copyCppAtomicObjectFn); CGF.EmitCall( CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), @@ -1026,8 +1065,8 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, } case PropertyImplStrategy::GetSetProperty: { - llvm::Constant *getPropertyFn = - CGM.getObjCRuntime().GetPropertyGetFunction(); + llvm::FunctionCallee getPropertyFn = + CGM.getObjCRuntime().GetPropertyGetFunction(); if (!getPropertyFn) { CGM.ErrorUnsupported(propImpl, "Obj-C getter requiring atomic copy"); return; @@ -1052,10 +1091,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // FIXME: We shouldn't need to get the function info here, the // runtime already should have computed it to build the function. - llvm::Instruction *CallInstruction; - RValue RV = EmitCall( - getTypes().arrangeBuiltinFunctionCall(propType, args), - callee, ReturnValueSlot(), args, &CallInstruction); + llvm::CallBase *CallInstruction; + RValue RV = EmitCall(getTypes().arrangeBuiltinFunctionCall( + getContext().getObjCIdType(), args), + callee, ReturnValueSlot(), args, &CallInstruction); if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction)) call->setTailCall(); @@ -1170,7 +1209,7 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, // FIXME: should this really always be false? args.add(RValue::get(CGF.Builder.getFalse()), CGF.getContext().BoolTy); - llvm::Constant *fn = CGF.CGM.getObjCRuntime().GetSetStructFunction(); + llvm::FunctionCallee fn = CGF.CGM.getObjCRuntime().GetSetStructFunction(); CGCallee callee = CGCallee::forDirect(fn); CGF.EmitCall( CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), @@ -1207,8 +1246,8 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, // Third argument is the helper function. args.add(RValue::get(AtomicHelperFn), CGF.getContext().VoidPtrTy); - llvm::Constant *fn = - CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction(); + llvm::FunctionCallee fn = + CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction(); CGCallee callee = CGCallee::forDirect(fn); CGF.EmitCall( CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), @@ -1302,14 +1341,13 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, case PropertyImplStrategy::GetSetProperty: case PropertyImplStrategy::SetPropertyAndExpressionGet: { - llvm::Constant *setOptimizedPropertyFn = nullptr; - llvm::Constant *setPropertyFn = nullptr; + llvm::FunctionCallee setOptimizedPropertyFn = nullptr; + llvm::FunctionCallee setPropertyFn = nullptr; if (UseOptimizedSetter(CGM)) { // 10.8 and iOS 6.0 code and GC is off setOptimizedPropertyFn = - CGM.getObjCRuntime() - .GetOptimizedPropertySetFunction(strategy.isAtomic(), - strategy.isCopy()); + CGM.getObjCRuntime().GetOptimizedPropertySetFunction( + strategy.isAtomic(), strategy.isCopy()); if (!setOptimizedPropertyFn) { CGM.ErrorUnsupported(propImpl, "Obj-C optimized setter - NYI"); return; @@ -1560,8 +1598,8 @@ QualType CodeGenFunction::TypeOfSelfObject() { } void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ - llvm::Constant *EnumerationMutationFnPtr = - CGM.getObjCRuntime().EnumerationMutationFunction(); + llvm::FunctionCallee EnumerationMutationFnPtr = + CGM.getObjCRuntime().EnumerationMutationFunction(); if (!EnumerationMutationFnPtr) { CGM.ErrorUnsupported(&S, "Obj-C fast enumeration for this runtime"); return; @@ -1669,8 +1707,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ // Save the initial mutations value. This is the value at an // address that was written into the state object by // countByEnumeratingWithState:objects:count:. - Address StateMutationsPtrPtr = Builder.CreateStructGEP( - StatePtr, 2, 2 * getPointerSize(), "mutationsptr.ptr"); + Address StateMutationsPtrPtr = + Builder.CreateStructGEP(StatePtr, 2, "mutationsptr.ptr"); llvm::Value *StateMutationsPtr = Builder.CreateLoad(StateMutationsPtrPtr, "mutationsptr"); @@ -1751,8 +1789,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ // Fetch the buffer out of the enumeration state. // TODO: this pointer should actually be invariant between // refreshes, which would help us do certain loop optimizations. - Address StateItemsPtr = Builder.CreateStructGEP( - StatePtr, 1, getPointerSize(), "stateitems.ptr"); + Address StateItemsPtr = + Builder.CreateStructGEP(StatePtr, 1, "stateitems.ptr"); llvm::Value *EnumStateItems = Builder.CreateLoad(StateItemsPtr, "stateitems"); @@ -1891,7 +1929,7 @@ llvm::Value *CodeGenFunction::EmitObjCExtendObjectLifetime(QualType type, /// Given a number of pointers, inform the optimizer that they're /// being intrinsically used up until this point in the program. void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) { - llvm::Constant *&fn = CGM.getObjCEntrypoints().clang_arc_use; + llvm::Function *&fn = CGM.getObjCEntrypoints().clang_arc_use; if (!fn) fn = CGM.getIntrinsic(llvm::Intrinsic::objc_clang_arc_use); @@ -1900,8 +1938,7 @@ void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) { EmitNounwindRuntimeCall(fn, values); } -static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, - llvm::Constant *RTF) { +static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, llvm::Value *RTF) { if (auto *F = dyn_cast<llvm::Function>(RTF)) { // If the target runtime doesn't naturally support ARC, emit weak // references to the runtime support library. We don't really @@ -1913,15 +1950,18 @@ static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, } } +static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, + llvm::FunctionCallee RTF) { + setARCRuntimeFunctionLinkage(CGM, RTF.getCallee()); +} + /// Perform an operation having the signature /// i8* (i8*) /// where a null input causes a no-op and returns null. -static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, - llvm::Value *value, - llvm::Type *returnType, - llvm::Constant *&fn, - llvm::Intrinsic::ID IntID, - bool isTailCall = false) { +static llvm::Value *emitARCValueOperation( + CodeGenFunction &CGF, llvm::Value *value, llvm::Type *returnType, + llvm::Function *&fn, llvm::Intrinsic::ID IntID, + llvm::CallInst::TailCallKind tailKind = llvm::CallInst::TCK_None) { if (isa<llvm::ConstantPointerNull>(value)) return value; @@ -1936,8 +1976,7 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, // Call the function. llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value); - if (isTailCall) - call->setTailCall(); + call->setTailCallKind(tailKind); // Cast the result back to the original type. return CGF.Builder.CreateBitCast(call, origType); @@ -1945,9 +1984,8 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, /// Perform an operation having the following signature: /// i8* (i8**) -static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, - Address addr, - llvm::Constant *&fn, +static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, Address addr, + llvm::Function *&fn, llvm::Intrinsic::ID IntID) { if (!fn) { fn = CGF.CGM.getIntrinsic(IntID); @@ -1970,10 +2008,9 @@ static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, /// Perform an operation having the following signature: /// i8* (i8**, i8*) -static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, - Address addr, +static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, Address addr, llvm::Value *value, - llvm::Constant *&fn, + llvm::Function *&fn, llvm::Intrinsic::ID IntID, bool ignored) { assert(addr.getElementType() == value->getType()); @@ -1998,10 +2035,8 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, /// Perform an operation having the following signature: /// void (i8**, i8**) -static void emitARCCopyOperation(CodeGenFunction &CGF, - Address dst, - Address src, - llvm::Constant *&fn, +static void emitARCCopyOperation(CodeGenFunction &CGF, Address dst, Address src, + llvm::Function *&fn, llvm::Intrinsic::ID IntID) { assert(dst.getType() == src.getType()); @@ -2023,8 +2058,8 @@ static void emitARCCopyOperation(CodeGenFunction &CGF, static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, llvm::Value *value, llvm::Type *returnType, - llvm::Constant *&fn, - StringRef fnName) { + llvm::FunctionCallee &fn, + StringRef fnName, bool MayThrow) { if (isa<llvm::ConstantPointerNull>(value)) return value; @@ -2034,7 +2069,7 @@ static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, fn = CGF.CGM.CreateRuntimeFunction(fnType, fnName); // We have Native ARC, so set nonlazybind attribute for performance - if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) + if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) if (fnName == "objc_retain") f->addFnAttr(llvm::Attribute::NonLazyBind); } @@ -2044,10 +2079,14 @@ static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy); // Call the function. - llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value); + llvm::CallBase *Inst = nullptr; + if (MayThrow) + Inst = CGF.EmitCallOrInvoke(fn, value); + else + Inst = CGF.EmitNounwindRuntimeCall(fn, value); // Cast the result back to the original type. - return CGF.Builder.CreateBitCast(call, origType); + return CGF.Builder.CreateBitCast(Inst, origType); } /// Produce the code to do a retain. Based on the type, calls one of: @@ -2122,14 +2161,10 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { // with this marker yet, so leave a breadcrumb for the ARC // optimizer to pick up. } else { - llvm::NamedMDNode *metadata = - CGF.CGM.getModule().getOrInsertNamedMetadata( - "clang.arc.retainAutoreleasedReturnValueMarker"); - assert(metadata->getNumOperands() <= 1); - if (metadata->getNumOperands() == 0) { - auto &ctx = CGF.getLLVMContext(); - metadata->addOperand(llvm::MDNode::get(ctx, - llvm::MDString::get(ctx, assembly))); + const char *markerKey = "clang.arc.retainAutoreleasedReturnValueMarker"; + if (!CGF.CGM.getModule().getModuleFlag(markerKey)) { + auto *str = llvm::MDString::get(CGF.getLLVMContext(), assembly); + CGF.CGM.getModule().addModuleFlag(llvm::Module::Error, markerKey, str); } } } @@ -2147,9 +2182,15 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { llvm::Value * CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { emitAutoreleasedReturnValueMarker(*this); - return emitARCValueOperation(*this, value, nullptr, - CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, - llvm::Intrinsic::objc_retainAutoreleasedReturnValue); + llvm::CallInst::TailCallKind tailKind = + CGM.getTargetCodeGenInfo() + .shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() + ? llvm::CallInst::TCK_NoTail + : llvm::CallInst::TCK_None; + return emitARCValueOperation( + *this, value, nullptr, + CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, + llvm::Intrinsic::objc_retainAutoreleasedReturnValue, tailKind); } /// Claim a possibly-autoreleased return value at +0. This is only @@ -2173,7 +2214,7 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value, ARCPreciseLifetime_t precise) { if (isa<llvm::ConstantPointerNull>(value)) return; - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_release; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_release); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2219,7 +2260,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(Address addr, bool ignored) { assert(addr.getElementType() == value->getType()); - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_storeStrong; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_storeStrong; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_storeStrong); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2286,7 +2327,7 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) { return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_autoreleaseReturnValue, llvm::Intrinsic::objc_autoreleaseReturnValue, - /*isTailCall*/ true); + llvm::CallInst::TCK_Tail); } /// Do a fused retain/autorelease of the given object. @@ -2296,7 +2337,7 @@ CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) { return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue, llvm::Intrinsic::objc_retainAutoreleaseReturnValue, - /*isTailCall*/ true); + llvm::CallInst::TCK_Tail); } /// Do a fused retain/autorelease of the given object. @@ -2375,7 +2416,7 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) { /// void \@objc_destroyWeak(i8** %addr) /// Essentially objc_storeWeak(addr, nil). void CodeGenFunction::EmitARCDestroyWeak(Address addr) { - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_destroyWeak; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_destroyWeak; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_destroyWeak); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2423,7 +2464,7 @@ void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr, /// Produce the code to do a objc_autoreleasepool_push. /// call i8* \@objc_autoreleasePoolPush(void) llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() { - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPush); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2439,8 +2480,8 @@ void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) { if (getInvokeDest()) { // Call the runtime method not the intrinsic if we are handling exceptions - llvm::Constant *&fn = - CGM.getObjCEntrypoints().objc_autoreleasePoolPopInvoke; + llvm::FunctionCallee &fn = + CGM.getObjCEntrypoints().objc_autoreleasePoolPopInvoke; if (!fn) { llvm::FunctionType *fnType = llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); @@ -2451,7 +2492,7 @@ void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) { // objc_autoreleasePoolPop can throw. EmitRuntimeCallOrInvoke(fn, value); } else { - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop; + llvm::FunctionCallee &fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPop); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2495,7 +2536,7 @@ llvm::Value *CodeGenFunction::EmitObjCAlloc(llvm::Value *value, llvm::Type *resultType) { return emitObjCValueOperation(*this, value, resultType, CGM.getObjCEntrypoints().objc_alloc, - "objc_alloc"); + "objc_alloc", /*MayThrow=*/true); } /// Allocate the given objc object. @@ -2504,7 +2545,14 @@ llvm::Value *CodeGenFunction::EmitObjCAllocWithZone(llvm::Value *value, llvm::Type *resultType) { return emitObjCValueOperation(*this, value, resultType, CGM.getObjCEntrypoints().objc_allocWithZone, - "objc_allocWithZone"); + "objc_allocWithZone", /*MayThrow=*/true); +} + +llvm::Value *CodeGenFunction::EmitObjCAllocInit(llvm::Value *value, + llvm::Type *resultType) { + return emitObjCValueOperation(*this, value, resultType, + CGM.getObjCEntrypoints().objc_alloc_init, + "objc_alloc_init", /*MayThrow=*/true); } /// Produce the code to do a primitive release. @@ -2545,18 +2593,20 @@ void CodeGenFunction::emitARCIntrinsicUse(CodeGenFunction &CGF, Address addr, /// call i8* \@objc_autorelease(i8* %value) llvm::Value *CodeGenFunction::EmitObjCAutorelease(llvm::Value *value, llvm::Type *returnType) { - return emitObjCValueOperation(*this, value, returnType, - CGM.getObjCEntrypoints().objc_autoreleaseRuntimeFunction, - "objc_autorelease"); + return emitObjCValueOperation( + *this, value, returnType, + CGM.getObjCEntrypoints().objc_autoreleaseRuntimeFunction, + "objc_autorelease", /*MayThrow=*/false); } /// Retain the given object, with normal retain semantics. /// call i8* \@objc_retain(i8* %value) llvm::Value *CodeGenFunction::EmitObjCRetainNonBlock(llvm::Value *value, llvm::Type *returnType) { - return emitObjCValueOperation(*this, value, returnType, - CGM.getObjCEntrypoints().objc_retainRuntimeFunction, - "objc_retain"); + return emitObjCValueOperation( + *this, value, returnType, + CGM.getObjCEntrypoints().objc_retainRuntimeFunction, "objc_retain", + /*MayThrow=*/false); } /// Release the given object. @@ -2565,17 +2615,16 @@ void CodeGenFunction::EmitObjCRelease(llvm::Value *value, ARCPreciseLifetime_t precise) { if (isa<llvm::ConstantPointerNull>(value)) return; - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release; + llvm::FunctionCallee &fn = + CGM.getObjCEntrypoints().objc_releaseRuntimeFunction; if (!fn) { - if (!fn) { - llvm::FunctionType *fnType = + llvm::FunctionType *fnType = llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); - fn = CGM.CreateRuntimeFunction(fnType, "objc_release"); - setARCRuntimeFunctionLinkage(CGM, fn); - // We have Native ARC, so set nonlazybind attribute for performance - if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) - f->addFnAttr(llvm::Attribute::NonLazyBind); - } + fn = CGM.CreateRuntimeFunction(fnType, "objc_release"); + setARCRuntimeFunctionLinkage(CGM, fn); + // We have Native ARC, so set nonlazybind attribute for performance + if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) + f->addFnAttr(llvm::Attribute::NonLazyBind); } // Cast the argument to 'id'. @@ -2829,6 +2878,7 @@ public: Result visit(const Expr *e); Result visitCastExpr(const CastExpr *e); Result visitPseudoObjectExpr(const PseudoObjectExpr *e); + Result visitBlockExpr(const BlockExpr *e); Result visitBinaryOperator(const BinaryOperator *e); Result visitBinAssign(const BinaryOperator *e); Result visitBinAssignUnsafeUnretained(const BinaryOperator *e); @@ -2905,6 +2955,12 @@ ARCExprEmitter<Impl,Result>::visitPseudoObjectExpr(const PseudoObjectExpr *E) { } template <typename Impl, typename Result> +Result ARCExprEmitter<Impl, Result>::visitBlockExpr(const BlockExpr *e) { + // The default implementation just forwards the expression to visitExpr. + return asImpl().visitExpr(e); +} + +template <typename Impl, typename Result> Result ARCExprEmitter<Impl,Result>::visitCastExpr(const CastExpr *e) { switch (e->getCastKind()) { @@ -3047,7 +3103,8 @@ Result ARCExprEmitter<Impl,Result>::visit(const Expr *e) { // Look through pseudo-object expressions. } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) { return asImpl().visitPseudoObjectExpr(pseudo); - } + } else if (auto *be = dyn_cast<BlockExpr>(e)) + return asImpl().visitBlockExpr(be); return asImpl().visitExpr(e); } @@ -3082,6 +3139,15 @@ struct ARCRetainExprEmitter : return TryEmitResult(result, true); } + TryEmitResult visitBlockExpr(const BlockExpr *e) { + TryEmitResult result = visitExpr(e); + // Avoid the block-retain if this is a block literal that doesn't need to be + // copied to the heap. + if (e->getBlockDecl()->canAvoidCopyToHeap()) + result.setInt(true); + return result; + } + /// Block extends are net +0. Naively, we could just recurse on /// the subexpression, but actually we need to ensure that the /// value is copied as a block, so there's a little filter here. @@ -3384,11 +3450,10 @@ void CodeGenFunction::EmitExtendGCLifetime(llvm::Value *object) { // We just use an inline assembly. llvm::FunctionType *extenderType = llvm::FunctionType::get(VoidTy, VoidPtrTy, RequiredArgs::All); - llvm::Value *extender - = llvm::InlineAsm::get(extenderType, - /* assembly */ "", - /* constraints */ "r", - /* side effects */ true); + llvm::InlineAsm *extender = llvm::InlineAsm::get(extenderType, + /* assembly */ "", + /* constraints */ "r", + /* side effects */ true); object = Builder.CreateBitCast(object, VoidPtrTy); EmitNounwindRuntimeCall(extender, object); @@ -3647,19 +3712,25 @@ void CodeGenModule::emitAtAvailableLinkGuard() { // CoreFoundation is linked into the final binary. llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, {VoidPtrTy}, false); - llvm::Constant *CFFunc = + llvm::FunctionCallee CFFunc = CreateRuntimeFunction(FTy, "CFBundleGetVersionNumber"); llvm::FunctionType *CheckFTy = llvm::FunctionType::get(VoidTy, {}, false); - llvm::Function *CFLinkCheckFunc = cast<llvm::Function>(CreateBuiltinFunction( - CheckFTy, "__clang_at_available_requires_core_foundation_framework")); - CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); - CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility); - CodeGenFunction CGF(*this); - CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc)); - CGF.EmitNounwindRuntimeCall(CFFunc, llvm::Constant::getNullValue(VoidPtrTy)); - CGF.Builder.CreateUnreachable(); - addCompilerUsedGlobal(CFLinkCheckFunc); + llvm::FunctionCallee CFLinkCheckFuncRef = CreateRuntimeFunction( + CheckFTy, "__clang_at_available_requires_core_foundation_framework", + llvm::AttributeList(), /*IsLocal=*/true); + llvm::Function *CFLinkCheckFunc = + cast<llvm::Function>(CFLinkCheckFuncRef.getCallee()->stripPointerCasts()); + if (CFLinkCheckFunc->empty()) { + CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility); + CodeGenFunction CGF(*this); + CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc)); + CGF.EmitNounwindRuntimeCall(CFFunc, + llvm::Constant::getNullValue(VoidPtrTy)); + CGF.Builder.CreateUnreachable(); + addCompilerUsedGlobal(CFLinkCheckFunc); + } } CGObjCRuntime::~CGObjCRuntime() {} diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index 548bd6b3fd..ee5c12aa35 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -1,9 +1,8 @@ //===------- CGObjCGNU.cpp - Emit LLVM Code from ASTs for a Module --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -29,7 +28,6 @@ #include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" @@ -60,7 +58,7 @@ class LazyRuntimeFunction { CodeGenModule *CGM; llvm::FunctionType *FTy; const char *FunctionName; - llvm::Constant *Function; + llvm::FunctionCallee Function; public: /// Constructor leaves this class uninitialized, because it is intended to @@ -90,7 +88,7 @@ public: /// Overloaded cast operator, allows the class to be implicitly cast to an /// LLVM constant. - operator llvm::Constant *() { + operator llvm::FunctionCallee() { if (!Function) { if (!FunctionName) return nullptr; @@ -98,9 +96,6 @@ public: } return Function; } - operator llvm::Function *() { - return cast<llvm::Function>((llvm::Constant *)*this); - } }; @@ -190,12 +185,16 @@ protected: (R.getVersion() >= VersionTuple(major, minor)); } - std::string SymbolForProtocol(StringRef Name) { - return (StringRef("._OBJC_PROTOCOL_") + Name).str(); + std::string ManglePublicSymbol(StringRef Name) { + return (StringRef(CGM.getTriple().isOSBinFormatCOFF() ? "$_" : "._") + Name).str(); + } + + std::string SymbolForProtocol(Twine Name) { + return (ManglePublicSymbol("OBJC_PROTOCOL_") + Name).str(); } std::string SymbolForProtocolRef(StringRef Name) { - return (StringRef("._OBJC_REF_PROTOCOL_") + Name).str(); + return (ManglePublicSymbol("OBJC_REF_PROTOCOL_") + Name).str(); } @@ -614,15 +613,15 @@ public: const ObjCProtocolDecl *PD) override; void GenerateProtocol(const ObjCProtocolDecl *PD) override; llvm::Function *ModuleInitFunction() override; - llvm::Constant *GetPropertyGetFunction() override; - llvm::Constant *GetPropertySetFunction() override; - llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) override; - llvm::Constant *GetSetStructFunction() override; - llvm::Constant *GetGetStructFunction() override; - llvm::Constant *GetCppAtomicObjectGetFunction() override; - llvm::Constant *GetCppAtomicObjectSetFunction() override; - llvm::Constant *EnumerationMutationFunction() override; + llvm::FunctionCallee GetPropertyGetFunction() override; + llvm::FunctionCallee GetPropertySetFunction() override; + llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) override; + llvm::FunctionCallee GetSetStructFunction() override; + llvm::FunctionCallee GetGetStructFunction() override; + llvm::FunctionCallee GetCppAtomicObjectGetFunction() override; + llvm::FunctionCallee GetCppAtomicObjectSetFunction() override; + llvm::FunctionCallee EnumerationMutationFunction() override; void EmitTryStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S) override; @@ -691,9 +690,9 @@ protected: llvm::Value *args[] = { EnforceType(Builder, Receiver, IdTy), EnforceType(Builder, cmd, SelectorTy) }; - llvm::CallSite imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args); + llvm::CallBase *imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args); imp->setMetadata(msgSendMDKind, node); - return imp.getInstruction(); + return imp; } llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, @@ -750,7 +749,7 @@ class CGObjCGNUstep : public CGObjCGNU { llvm::Value *cmd, llvm::MDNode *node, MessageSendInfo &MSI) override { CGBuilderTy &Builder = CGF.Builder; - llvm::Function *LookupFn = SlotLookupFn; + llvm::FunctionCallee LookupFn = SlotLookupFn; // Store the receiver on the stack so that we can reload it later Address ReceiverPtr = @@ -766,20 +765,20 @@ class CGObjCGNUstep : public CGObjCGNU { } // The lookup function is guaranteed not to capture the receiver pointer. - LookupFn->addParamAttr(0, llvm::Attribute::NoCapture); + if (auto *LookupFn2 = dyn_cast<llvm::Function>(LookupFn.getCallee())) + LookupFn2->addParamAttr(0, llvm::Attribute::NoCapture); llvm::Value *args[] = { EnforceType(Builder, ReceiverPtr.getPointer(), PtrToIdTy), EnforceType(Builder, cmd, SelectorTy), EnforceType(Builder, self, IdTy) }; - llvm::CallSite slot = CGF.EmitRuntimeCallOrInvoke(LookupFn, args); - slot.setOnlyReadsMemory(); + llvm::CallBase *slot = CGF.EmitRuntimeCallOrInvoke(LookupFn, args); + slot->setOnlyReadsMemory(); slot->setMetadata(msgSendMDKind, node); // Load the imp from the slot llvm::Value *imp = Builder.CreateAlignedLoad( - Builder.CreateStructGEP(nullptr, slot.getInstruction(), 4), - CGF.getPointerAlign()); + Builder.CreateStructGEP(nullptr, slot, 4), CGF.getPointerAlign()); // The lookup function may have changed the receiver, so make sure we use // the new one. @@ -859,7 +858,7 @@ class CGObjCGNUstep : public CGObjCGNU { PtrTy, PtrTy); } - llvm::Constant *GetCppAtomicObjectGetFunction() override { + llvm::FunctionCallee GetCppAtomicObjectGetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. assert (CGM.getLangOpts().ObjCRuntime.getVersion() >= @@ -867,7 +866,7 @@ class CGObjCGNUstep : public CGObjCGNU { return CxxAtomicObjectGetFn; } - llvm::Constant *GetCppAtomicObjectSetFunction() override { + llvm::FunctionCallee GetCppAtomicObjectSetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. assert (CGM.getLangOpts().ObjCRuntime.getVersion() >= @@ -875,8 +874,8 @@ class CGObjCGNUstep : public CGObjCGNU { return CxxAtomicObjectSetFn; } - llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) override { + llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) override { // The optimised property functions omit the GC check, and so are not // safe to use in GC mode. The standard functions are fast in GC mode, // so there is less advantage in using them. @@ -911,12 +910,15 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ConstantStringSection }; static const char *const SectionsBaseNames[8]; + static const char *const PECOFFSectionsBaseNames[8]; template<SectionKind K> std::string sectionName() { - std::string name(SectionsBaseNames[K]); - if (CGM.getTriple().isOSBinFormatCOFF()) + if (CGM.getTriple().isOSBinFormatCOFF()) { + std::string name(PECOFFSectionsBaseNames[K]); name += "$m"; - return name; + return name; + } + return SectionsBaseNames[K]; } /// The GCC ABI superclass message lookup function. Takes a pointer to a /// structure describing the receiver and the class, and a selector as @@ -937,15 +939,19 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { bool EmittedClass = false; /// Generate the name of a symbol for a reference to a class. Accesses to /// classes should be indirected via this. + + typedef std::pair<std::string, std::pair<llvm::Constant*, int>> EarlyInitPair; + std::vector<EarlyInitPair> EarlyInitList; + std::string SymbolForClassRef(StringRef Name, bool isWeak) { if (isWeak) - return (StringRef("._OBJC_WEAK_REF_CLASS_") + Name).str(); + return (ManglePublicSymbol("OBJC_WEAK_REF_CLASS_") + Name).str(); else - return (StringRef("._OBJC_REF_CLASS_") + Name).str(); + return (ManglePublicSymbol("OBJC_REF_CLASS_") + Name).str(); } /// Generate the name of a class symbol. std::string SymbolForClass(StringRef Name) { - return (StringRef("._OBJC_CLASS_") + Name).str(); + return (ManglePublicSymbol("OBJC_CLASS_") + Name).str(); } void CallRuntimeFunction(CGBuilderTy &B, StringRef FunctionName, ArrayRef<llvm::Value*> Args) { @@ -954,7 +960,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { Types.push_back(Arg->getType()); llvm::FunctionType *FT = llvm::FunctionType::get(B.getVoidTy(), Types, false); - llvm::Value *Fn = CGM.CreateRuntimeFunction(FT, FunctionName); + llvm::FunctionCallee Fn = CGM.CreateRuntimeFunction(FT, FunctionName); B.CreateCall(Fn, Args); } @@ -999,10 +1005,13 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { llvm::Constant *isa = TheModule.getNamedGlobal(Sym); - if (!isa) + if (!isa) { isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false, llvm::GlobalValue::ExternalLinkage, nullptr, Sym); - else if (isa->getType() != PtrToIdTy) + if (CGM.getTriple().isOSBinFormatCOFF()) { + cast<llvm::GlobalValue>(isa)->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + } + } else if (isa->getType() != PtrToIdTy) isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy); // struct @@ -1017,7 +1026,11 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ConstantInitBuilder Builder(CGM); auto Fields = Builder.beginStruct(); - Fields.add(isa); + if (!CGM.getTriple().isOSBinFormatCOFF()) { + Fields.add(isa); + } else { + Fields.addNullPointer(PtrTy); + } // For now, all non-ASCII strings are represented as UTF-16. As such, the // number of bytes is simply double the number of UTF-16 codepoints. In // ASCII strings, the number of bytes is equal to the number of non-ASCII @@ -1088,6 +1101,10 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ObjCStrGV->setComdat(TheModule.getOrInsertComdat(StringName)); ObjCStrGV->setVisibility(llvm::GlobalValue::HiddenVisibility); } + if (CGM.getTriple().isOSBinFormatCOFF()) { + std::pair<llvm::Constant*, int> v{ObjCStrGV, 0}; + EarlyInitList.emplace_back(Sym, v); + } llvm::Constant *ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStrGV, IdTy); ObjCStrings[Str] = ObjCStr; ConstantStrings.push_back(ObjCStr); @@ -1201,6 +1218,33 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ClassSymbol->setInitializer(new llvm::GlobalVariable(TheModule, Int8Ty, false, llvm::GlobalValue::ExternalWeakLinkage, nullptr, SymbolForClass(Name))); + else { + if (CGM.getTriple().isOSBinFormatCOFF()) { + IdentifierInfo &II = CGM.getContext().Idents.get(Name); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + + const ObjCInterfaceDecl *OID = nullptr; + for (const auto &Result : DC->lookup(&II)) + if ((OID = dyn_cast<ObjCInterfaceDecl>(Result))) + break; + + // The first Interface we find may be a @class, + // which should only be treated as the source of + // truth in the absence of a true declaration. + const ObjCInterfaceDecl *OIDDef = OID->getDefinition(); + if (OIDDef != nullptr) + OID = OIDDef; + + auto Storage = llvm::GlobalValue::DefaultStorageClass; + if (OID->hasAttr<DLLImportAttr>()) + Storage = llvm::GlobalValue::DLLImportStorageClass; + else if (OID->hasAttr<DLLExportAttr>()) + Storage = llvm::GlobalValue::DLLExportStorageClass; + + cast<llvm::GlobalValue>(ClassSymbol)->setDLLStorageClass(Storage); + } + } assert(ClassSymbol->getName() == SymbolName); return ClassSymbol; } @@ -1453,7 +1497,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { Sym->setSection((Section + SecSuffix).str()); Sym->setComdat(TheModule.getOrInsertComdat((Prefix + Section).str())); - Sym->setAlignment(1); + Sym->setAlignment(CGM.getPointerAlign().getQuantity()); return Sym; }; return { Sym("__start_", "$a"), Sym("__stop", "$z") }; @@ -1488,11 +1532,12 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ConstantInitBuilder builder(CGM); auto InitStructBuilder = builder.beginStruct(); InitStructBuilder.addInt(Int64Ty, 0); - for (auto *s : SectionsBaseNames) { + auto §ionVec = CGM.getTriple().isOSBinFormatCOFF() ? PECOFFSectionsBaseNames : SectionsBaseNames; + for (auto *s : sectionVec) { auto bounds = GetSectionBounds(s); InitStructBuilder.add(bounds.first); InitStructBuilder.add(bounds.second); - }; + } auto *InitStruct = InitStructBuilder.finishAndCreateGlobal(".objc_init", CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage); InitStruct->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -1519,7 +1564,12 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { if (CGM.getTriple().isOSBinFormatCOFF()) InitVar->setSection(".CRT$XCLz"); else - InitVar->setSection(".ctors"); + { + if (CGM.getCodeGenOpts().UseInitArray) + InitVar->setSection(".init_array"); + else + InitVar->setSection(".ctors"); + } InitVar->setVisibility(llvm::GlobalValue::HiddenVisibility); InitVar->setComdat(TheModule.getOrInsertComdat(".objc_ctor")); CGM.addUsedGlobal(InitVar); @@ -1582,6 +1632,29 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ConstantStrings.clear(); Categories.clear(); Classes.clear(); + + if (EarlyInitList.size() > 0) { + auto *Init = llvm::Function::Create(llvm::FunctionType::get(CGM.VoidTy, + {}), llvm::GlobalValue::InternalLinkage, ".objc_early_init", + &CGM.getModule()); + llvm::IRBuilder<> b(llvm::BasicBlock::Create(CGM.getLLVMContext(), "entry", + Init)); + for (const auto &lateInit : EarlyInitList) { + auto *global = TheModule.getGlobalVariable(lateInit.first); + if (global) { + b.CreateAlignedStore(global, + b.CreateStructGEP(lateInit.second.first, lateInit.second.second), CGM.getPointerAlign().getQuantity()); + } + } + b.CreateRetVoid(); + // We can't use the normal LLVM global initialisation array, because we + // need to specify that this runs early in library initialisation. + auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), + /*isConstant*/true, llvm::GlobalValue::InternalLinkage, + Init, ".objc_early_init_ptr"); + InitVar->setSection(".CRT$XCLb"); + CGM.addUsedGlobal(InitVar); + } return nullptr; } /// In the v2 ABI, ivar offset variables use the type encoding in their name @@ -1613,6 +1686,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { } void GenerateClass(const ObjCImplementationDecl *OID) override { ASTContext &Context = CGM.getContext(); + bool IsCOFF = CGM.getTriple().isOSBinFormatCOFF(); // Get the class name ObjCInterfaceDecl *classDecl = @@ -1671,8 +1745,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // struct objc_property_list *properties metaclassFields.add(GeneratePropertyList(OID, classDecl, /*isClassProperty*/true)); - auto *metaclass = metaclassFields.finishAndCreateGlobal("._OBJC_METACLASS_" - + className, CGM.getPointerAlign()); + auto *metaclass = metaclassFields.finishAndCreateGlobal( + ManglePublicSymbol("OBJC_METACLASS_") + className, + CGM.getPointerAlign()); auto classFields = builder.beginStruct(); // struct objc_class *isa; @@ -1681,15 +1756,28 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // Get the superclass name. const ObjCInterfaceDecl * SuperClassDecl = OID->getClassInterface()->getSuperClass(); + llvm::Constant *SuperClass = nullptr; if (SuperClassDecl) { auto SuperClassName = SymbolForClass(SuperClassDecl->getNameAsString()); - llvm::Constant *SuperClass = TheModule.getNamedGlobal(SuperClassName); + SuperClass = TheModule.getNamedGlobal(SuperClassName); if (!SuperClass) { SuperClass = new llvm::GlobalVariable(TheModule, PtrTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, SuperClassName); + if (IsCOFF) { + auto Storage = llvm::GlobalValue::DefaultStorageClass; + if (SuperClassDecl->hasAttr<DLLImportAttr>()) + Storage = llvm::GlobalValue::DLLImportStorageClass; + else if (SuperClassDecl->hasAttr<DLLExportAttr>()) + Storage = llvm::GlobalValue::DLLExportStorageClass; + + cast<llvm::GlobalValue>(SuperClass)->setDLLStorageClass(Storage); + } } - classFields.add(llvm::ConstantExpr::getBitCast(SuperClass, PtrTy)); + if (!IsCOFF) + classFields.add(llvm::ConstantExpr::getBitCast(SuperClass, PtrTy)); + else + classFields.addNullPointer(PtrTy); } else classFields.addNullPointer(PtrTy); // const char *name; @@ -1731,7 +1819,6 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { CGM.getContext().getCharWidth()); // struct objc_ivar ivars[] auto ivarArrayBuilder = ivarListBuilder.beginArray(); - CodeGenTypes &Types = CGM.getTypes(); for (const ObjCIvarDecl *IVD = classDecl->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) { auto ivarTy = IVD->getType(); @@ -1765,8 +1852,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ivarBuilder.add(OffsetVar); // Ivar size ivarBuilder.addInt(Int32Ty, - td.getTypeSizeInBits(Types.ConvertType(ivarTy)) / - CGM.getContext().getCharWidth()); + CGM.getContext().getTypeSizeInChars(ivarTy).getQuantity()); // Alignment will be stored as a base-2 log of the alignment. int align = llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity()); // Objects that require more than 2^64-byte alignment should be impossible! @@ -1839,19 +1925,24 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { classFields.finishAndCreateGlobal(SymbolForClass(className), CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage); - if (CGM.getTriple().isOSBinFormatCOFF()) { - auto Storage = llvm::GlobalValue::DefaultStorageClass; - if (OID->getClassInterface()->hasAttr<DLLImportAttr>()) - Storage = llvm::GlobalValue::DLLImportStorageClass; - else if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) - Storage = llvm::GlobalValue::DLLExportStorageClass; - cast<llvm::GlobalValue>(classStruct)->setDLLStorageClass(Storage); - } - auto *classRefSymbol = GetClassVar(className); classRefSymbol->setSection(sectionName<ClassReferenceSection>()); classRefSymbol->setInitializer(llvm::ConstantExpr::getBitCast(classStruct, IdTy)); + if (IsCOFF) { + // we can't import a class struct. + if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) { + cast<llvm::GlobalValue>(classStruct)->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + cast<llvm::GlobalValue>(classRefSymbol)->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + } + + if (SuperClass) { + std::pair<llvm::Constant*, int> v{classStruct, 1}; + EarlyInitList.emplace_back(SuperClass->getName(), std::move(v)); + } + + } + // Resolve the class aliases, if they exist. // FIXME: Class pointer aliases shouldn't exist! @@ -1879,7 +1970,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { auto classInitRef = new llvm::GlobalVariable(TheModule, classStruct->getType(), false, llvm::GlobalValue::ExternalLinkage, - classStruct, "._OBJC_INIT_CLASS_" + className); + classStruct, ManglePublicSymbol("OBJC_INIT_CLASS_") + className); classInitRef->setSection(sectionName<ClassSection>()); CGM.addUsedGlobal(classInitRef); @@ -1916,6 +2007,18 @@ const char *const CGObjCGNUstep2::SectionsBaseNames[8] = "__objc_constant_string" }; +const char *const CGObjCGNUstep2::PECOFFSectionsBaseNames[8] = +{ +".objcrt$SEL", +".objcrt$CLS", +".objcrt$CLR", +".objcrt$CAT", +".objcrt$PCL", +".objcrt$PCR", +".objcrt$CAL", +".objcrt$STR" +}; + /// Support for the ObjFW runtime. class CGObjCObjFW: public CGObjCGNU { protected: @@ -1938,14 +2041,14 @@ protected: EnforceType(Builder, Receiver, IdTy), EnforceType(Builder, cmd, SelectorTy) }; - llvm::CallSite imp; + llvm::CallBase *imp; if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFnSRet, args); else imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args); imp->setMetadata(msgSendMDKind, node); - return imp.getInstruction(); + return imp; } llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, @@ -2174,9 +2277,8 @@ llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF, if (!isWeak) EmitClassRef(Name); - llvm::Constant *ClassLookupFn = - CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, PtrToInt8Ty, true), - "objc_lookup_class"); + llvm::FunctionCallee ClassLookupFn = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IdTy, PtrToInt8Ty, true), "objc_lookup_class"); return CGF.EmitNounwindRuntimeCall(ClassLookupFn, ClassName); } @@ -2432,7 +2534,7 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, ReceiverClass = EnforceType(Builder, ReceiverClass, IdTy); } else { if (isCategoryImpl) { - llvm::Constant *classLookupFunction = nullptr; + llvm::FunctionCallee classLookupFunction = nullptr; if (IsClassMessage) { classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get( IdTy, PtrTy, true), "objc_get_meta_class"); @@ -2481,10 +2583,8 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, Address ObjCSuper = CGF.CreateTempAlloca(ObjCSuperTy, CGF.getPointerAlign()); - Builder.CreateStore(Receiver, - Builder.CreateStructGEP(ObjCSuper, 0, CharUnits::Zero())); - Builder.CreateStore(ReceiverClass, - Builder.CreateStructGEP(ObjCSuper, 1, CGF.getPointerSize())); + Builder.CreateStore(Receiver, Builder.CreateStructGEP(ObjCSuper, 0)); + Builder.CreateStore(ReceiverClass, Builder.CreateStructGEP(ObjCSuper, 1)); ObjCSuper = EnforceType(Builder, ObjCSuper, PtrToObjCSuperTy); @@ -2501,7 +2601,7 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, CGCallee callee(CGCalleeInfo(), imp); - llvm::Instruction *call; + llvm::CallBase *call; RValue msgRet = CGF.EmitCall(MSI.CallInfo, callee, Return, ActualArgs, &call); call->setMetadata(msgSendMDKind, node); return msgRet; @@ -2595,16 +2695,21 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, case CodeGenOptions::Mixed: case CodeGenOptions::NonLegacy: if (CGM.ReturnTypeUsesFPRet(ResultType)) { - imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), - "objc_msgSend_fpret"); + imp = + CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), + "objc_msgSend_fpret") + .getCallee(); } else if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) { // The actual types here don't matter - we're going to bitcast the // function anyway - imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), - "objc_msgSend_stret"); + imp = + CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), + "objc_msgSend_stret") + .getCallee(); } else { - imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), - "objc_msgSend"); + imp = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IdTy, IdTy, true), "objc_msgSend") + .getCallee(); } } @@ -2613,7 +2718,7 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, imp = EnforceType(Builder, imp, MSI.MessengerType); - llvm::Instruction *call; + llvm::CallBase *call; CGCallee callee(CGCalleeInfo(), imp); RValue msgRet = CGF.EmitCall(MSI.CallInfo, callee, Return, ActualArgs, &call); call->setMetadata(msgSendMDKind, node); @@ -3697,7 +3802,8 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { llvm::FunctionType *FT = llvm::FunctionType::get(Builder.getVoidTy(), module->getType(), true); - llvm::Value *Register = CGM.CreateRuntimeFunction(FT, "__objc_exec_class"); + llvm::FunctionCallee Register = + CGM.CreateRuntimeFunction(FT, "__objc_exec_class"); Builder.CreateCall(Register, module); if (!ClassAliases.empty()) { @@ -3766,36 +3872,36 @@ llvm::Function *CGObjCGNU::GenerateMethod(const ObjCMethodDecl *OMD, return Method; } -llvm::Constant *CGObjCGNU::GetPropertyGetFunction() { +llvm::FunctionCallee CGObjCGNU::GetPropertyGetFunction() { return GetPropertyFn; } -llvm::Constant *CGObjCGNU::GetPropertySetFunction() { +llvm::FunctionCallee CGObjCGNU::GetPropertySetFunction() { return SetPropertyFn; } -llvm::Constant *CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic, - bool copy) { +llvm::FunctionCallee CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic, + bool copy) { return nullptr; } -llvm::Constant *CGObjCGNU::GetGetStructFunction() { +llvm::FunctionCallee CGObjCGNU::GetGetStructFunction() { return GetStructPropertyFn; } -llvm::Constant *CGObjCGNU::GetSetStructFunction() { +llvm::FunctionCallee CGObjCGNU::GetSetStructFunction() { return SetStructPropertyFn; } -llvm::Constant *CGObjCGNU::GetCppAtomicObjectGetFunction() { +llvm::FunctionCallee CGObjCGNU::GetCppAtomicObjectGetFunction() { return nullptr; } -llvm::Constant *CGObjCGNU::GetCppAtomicObjectSetFunction() { +llvm::FunctionCallee CGObjCGNU::GetCppAtomicObjectSetFunction() { return nullptr; } -llvm::Constant *CGObjCGNU::EnumerationMutationFunction() { +llvm::FunctionCallee CGObjCGNU::EnumerationMutationFunction() { return EnumerationMutationFn; } @@ -3844,13 +3950,14 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF, // that was passed into the `@catch` block, then this code path is not // reached and we will instead call `objc_exception_throw` with an explicit // argument. - CGF.EmitRuntimeCallOrInvoke(ExceptionReThrowFn).setDoesNotReturn(); + llvm::CallBase *Throw = CGF.EmitRuntimeCallOrInvoke(ExceptionReThrowFn); + Throw->setDoesNotReturn(); } else { ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy); - llvm::CallSite Throw = + llvm::CallBase *Throw = CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject); - Throw.setDoesNotReturn(); + Throw->setDoesNotReturn(); } CGF.Builder.CreateUnreachable(); if (ClearInsertionPoint) @@ -3861,8 +3968,7 @@ llvm::Value * CGObjCGNU::EmitObjCWeakRead(CodeGenFunction &CGF, Address AddrWeakObj) { CGBuilderTy &B = CGF.Builder; AddrWeakObj = EnforceType(B, AddrWeakObj, PtrToIdTy); - return B.CreateCall(WeakReadFn.getType(), WeakReadFn, - AddrWeakObj.getPointer()); + return B.CreateCall(WeakReadFn, AddrWeakObj.getPointer()); } void CGObjCGNU::EmitObjCWeakAssign(CodeGenFunction &CGF, @@ -3870,8 +3976,7 @@ void CGObjCGNU::EmitObjCWeakAssign(CodeGenFunction &CGF, CGBuilderTy &B = CGF.Builder; src = EnforceType(B, src, IdTy); dst = EnforceType(B, dst, PtrToIdTy); - B.CreateCall(WeakAssignFn.getType(), WeakAssignFn, - {src, dst.getPointer()}); + B.CreateCall(WeakAssignFn, {src, dst.getPointer()}); } void CGObjCGNU::EmitObjCGlobalAssign(CodeGenFunction &CGF, @@ -3882,8 +3987,7 @@ void CGObjCGNU::EmitObjCGlobalAssign(CodeGenFunction &CGF, dst = EnforceType(B, dst, PtrToIdTy); // FIXME. Add threadloca assign API assert(!threadlocal && "EmitObjCGlobalAssign - Threal Local API NYI"); - B.CreateCall(GlobalAssignFn.getType(), GlobalAssignFn, - {src, dst.getPointer()}); + B.CreateCall(GlobalAssignFn, {src, dst.getPointer()}); } void CGObjCGNU::EmitObjCIvarAssign(CodeGenFunction &CGF, @@ -3892,8 +3996,7 @@ void CGObjCGNU::EmitObjCIvarAssign(CodeGenFunction &CGF, CGBuilderTy &B = CGF.Builder; src = EnforceType(B, src, IdTy); dst = EnforceType(B, dst, IdTy); - B.CreateCall(IvarAssignFn.getType(), IvarAssignFn, - {src, dst.getPointer(), ivarOffset}); + B.CreateCall(IvarAssignFn, {src, dst.getPointer(), ivarOffset}); } void CGObjCGNU::EmitObjCStrongCastAssign(CodeGenFunction &CGF, @@ -3901,8 +4004,7 @@ void CGObjCGNU::EmitObjCStrongCastAssign(CodeGenFunction &CGF, CGBuilderTy &B = CGF.Builder; src = EnforceType(B, src, IdTy); dst = EnforceType(B, dst, PtrToIdTy); - B.CreateCall(StrongCastAssignFn.getType(), StrongCastAssignFn, - {src, dst.getPointer()}); + B.CreateCall(StrongCastAssignFn, {src, dst.getPointer()}); } void CGObjCGNU::EmitGCMemmoveCollectable(CodeGenFunction &CGF, @@ -3913,8 +4015,7 @@ void CGObjCGNU::EmitGCMemmoveCollectable(CodeGenFunction &CGF, DestPtr = EnforceType(B, DestPtr, PtrTy); SrcPtr = EnforceType(B, SrcPtr, PtrTy); - B.CreateCall(MemMoveFn.getType(), MemMoveFn, - {DestPtr.getPointer(), SrcPtr.getPointer(), Size}); + B.CreateCall(MemMoveFn, {DestPtr.getPointer(), SrcPtr.getPointer(), Size}); } llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable( diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index d91eb43ca3..ad141d6191 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -1,9 +1,8 @@ //===------- CGObjCMac.cpp - Interface to Apple Objective-C Runtime -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -31,7 +30,6 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" @@ -61,7 +59,7 @@ private: /// /// The default messenger, used for sends whose ABI is unchanged from /// the all-integer/pointer case. - llvm::Constant *getMessageSendFn() const { + llvm::FunctionCallee getMessageSendFn() const { // Add the non-lazy-bind attribute, since objc_msgSend is likely to // be called a lot. llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; @@ -77,12 +75,11 @@ private: /// The messenger used when the return value is an aggregate returned /// by indirect reference in the first argument, and therefore the /// self and selector parameters are shifted over by one. - llvm::Constant *getMessageSendStretFn() const { + llvm::FunctionCallee getMessageSendStretFn() const { llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.VoidTy, params, true), "objc_msgSend_stret"); - } /// [double | long double] objc_msgSend_fpret(id self, SEL op, ...) @@ -90,12 +87,11 @@ private: /// The messenger used when the return value is returned on the x87 /// floating-point stack; without a special entrypoint, the nil case /// would be unbalanced. - llvm::Constant *getMessageSendFpretFn() const { + llvm::FunctionCallee getMessageSendFpretFn() const { llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.DoubleTy, params, true), "objc_msgSend_fpret"); - } /// _Complex long double objc_msgSend_fp2ret(id self, SEL op, ...) @@ -103,7 +99,7 @@ private: /// The messenger used when the return value is returned in two values on the /// x87 floating point stack; without a special entrypoint, the nil case /// would be unbalanced. Only used on 64-bit X86. - llvm::Constant *getMessageSendFp2retFn() const { + llvm::FunctionCallee getMessageSendFp2retFn() const { llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; llvm::Type *longDoubleType = llvm::Type::getX86_FP80Ty(VMContext); llvm::Type *resultType = @@ -119,7 +115,7 @@ private: /// The messenger used for super calls, which have different dispatch /// semantics. The class passed is the superclass of the current /// class. - llvm::Constant *getMessageSendSuperFn() const { + llvm::FunctionCallee getMessageSendSuperFn() const { llvm::Type *params[] = { SuperPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, params, true), @@ -130,7 +126,7 @@ private: /// /// A slightly different messenger used for super calls. The class /// passed is the current class. - llvm::Constant *getMessageSendSuperFn2() const { + llvm::FunctionCallee getMessageSendSuperFn2() const { llvm::Type *params[] = { SuperPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, params, true), @@ -141,7 +137,7 @@ private: /// SEL op, ...) /// /// The messenger used for super calls which return an aggregate indirectly. - llvm::Constant *getMessageSendSuperStretFn() const { + llvm::FunctionCallee getMessageSendSuperStretFn() const { llvm::Type *params[] = { Int8PtrTy, SuperPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.VoidTy, params, true), @@ -152,19 +148,19 @@ private: /// SEL op, ...) /// /// objc_msgSendSuper_stret with the super2 semantics. - llvm::Constant *getMessageSendSuperStretFn2() const { + llvm::FunctionCallee getMessageSendSuperStretFn2() const { llvm::Type *params[] = { Int8PtrTy, SuperPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.VoidTy, params, true), "objc_msgSendSuper2_stret"); } - llvm::Constant *getMessageSendSuperFpretFn() const { + llvm::FunctionCallee getMessageSendSuperFpretFn() const { // There is no objc_msgSendSuper_fpret? How can that work? return getMessageSendSuperFn(); } - llvm::Constant *getMessageSendSuperFpretFn2() const { + llvm::FunctionCallee getMessageSendSuperFpretFn2() const { // There is no objc_msgSendSuper_fpret? How can that work? return getMessageSendSuperFn2(); } @@ -233,7 +229,7 @@ public: /// CachePtrTy - LLVM type for struct objc_cache *. llvm::PointerType *CachePtrTy; - llvm::Constant *getGetPropertyFn() { + llvm::FunctionCallee getGetPropertyFn() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // id objc_getProperty (id, SEL, ptrdiff_t, bool) @@ -248,7 +244,7 @@ public: return CGM.CreateRuntimeFunction(FTy, "objc_getProperty"); } - llvm::Constant *getSetPropertyFn() { + llvm::FunctionCallee getSetPropertyFn() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // void objc_setProperty (id, SEL, ptrdiff_t, id, bool, bool) @@ -267,7 +263,7 @@ public: return CGM.CreateRuntimeFunction(FTy, "objc_setProperty"); } - llvm::Constant *getOptimizedSetPropertyFn(bool atomic, bool copy) { + llvm::FunctionCallee getOptimizedSetPropertyFn(bool atomic, bool copy) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // void objc_setProperty_atomic(id self, SEL _cmd, @@ -302,7 +298,7 @@ public: return CGM.CreateRuntimeFunction(FTy, name); } - llvm::Constant *getCopyStructFn() { + llvm::FunctionCallee getCopyStructFn() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // void objc_copyStruct (void *, const void *, size_t, bool, bool) @@ -322,7 +318,7 @@ public: /// void objc_copyCppObjectAtomic( /// void *dest, const void *src, /// void (*copyHelper) (void *dest, const void *source)); - llvm::Constant *getCppAtomicObjectFunction() { + llvm::FunctionCallee getCppAtomicObjectFunction() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); /// void objc_copyCppObjectAtomic(void *dest, const void *src, void *helper); @@ -336,7 +332,7 @@ public: return CGM.CreateRuntimeFunction(FTy, "objc_copyCppObjectAtomic"); } - llvm::Constant *getEnumerationMutationFn() { + llvm::FunctionCallee getEnumerationMutationFn() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // void objc_enumerationMutation (id) @@ -348,7 +344,7 @@ public: return CGM.CreateRuntimeFunction(FTy, "objc_enumerationMutation"); } - llvm::Constant *getLookUpClassFn() { + llvm::FunctionCallee getLookUpClassFn() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // Class objc_lookUpClass (const char *) @@ -363,7 +359,7 @@ public: } /// GcReadWeakFn -- LLVM objc_read_weak (id *src) function. - llvm::Constant *getGcReadWeakFn() { + llvm::FunctionCallee getGcReadWeakFn() { // id objc_read_weak (id *) llvm::Type *args[] = { ObjectPtrTy->getPointerTo() }; llvm::FunctionType *FTy = @@ -372,7 +368,7 @@ public: } /// GcAssignWeakFn -- LLVM objc_assign_weak function. - llvm::Constant *getGcAssignWeakFn() { + llvm::FunctionCallee getGcAssignWeakFn() { // id objc_assign_weak (id, id *) llvm::Type *args[] = { ObjectPtrTy, ObjectPtrTy->getPointerTo() }; llvm::FunctionType *FTy = @@ -381,7 +377,7 @@ public: } /// GcAssignGlobalFn -- LLVM objc_assign_global function. - llvm::Constant *getGcAssignGlobalFn() { + llvm::FunctionCallee getGcAssignGlobalFn() { // id objc_assign_global(id, id *) llvm::Type *args[] = { ObjectPtrTy, ObjectPtrTy->getPointerTo() }; llvm::FunctionType *FTy = @@ -390,7 +386,7 @@ public: } /// GcAssignThreadLocalFn -- LLVM objc_assign_threadlocal function. - llvm::Constant *getGcAssignThreadLocalFn() { + llvm::FunctionCallee getGcAssignThreadLocalFn() { // id objc_assign_threadlocal(id src, id * dest) llvm::Type *args[] = { ObjectPtrTy, ObjectPtrTy->getPointerTo() }; llvm::FunctionType *FTy = @@ -399,7 +395,7 @@ public: } /// GcAssignIvarFn -- LLVM objc_assign_ivar function. - llvm::Constant *getGcAssignIvarFn() { + llvm::FunctionCallee getGcAssignIvarFn() { // id objc_assign_ivar(id, id *, ptrdiff_t) llvm::Type *args[] = { ObjectPtrTy, ObjectPtrTy->getPointerTo(), CGM.PtrDiffTy }; @@ -409,7 +405,7 @@ public: } /// GcMemmoveCollectableFn -- LLVM objc_memmove_collectable function. - llvm::Constant *GcMemmoveCollectableFn() { + llvm::FunctionCallee GcMemmoveCollectableFn() { // void *objc_memmove_collectable(void *dst, const void *src, size_t size) llvm::Type *args[] = { Int8PtrTy, Int8PtrTy, LongTy }; llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, args, false); @@ -417,7 +413,7 @@ public: } /// GcAssignStrongCastFn -- LLVM objc_assign_strongCast function. - llvm::Constant *getGcAssignStrongCastFn() { + llvm::FunctionCallee getGcAssignStrongCastFn() { // id objc_assign_strongCast(id, id *) llvm::Type *args[] = { ObjectPtrTy, ObjectPtrTy->getPointerTo() }; llvm::FunctionType *FTy = @@ -426,7 +422,7 @@ public: } /// ExceptionThrowFn - LLVM objc_exception_throw function. - llvm::Constant *getExceptionThrowFn() { + llvm::FunctionCallee getExceptionThrowFn() { // void objc_exception_throw(id) llvm::Type *args[] = { ObjectPtrTy }; llvm::FunctionType *FTy = @@ -435,14 +431,14 @@ public: } /// ExceptionRethrowFn - LLVM objc_exception_rethrow function. - llvm::Constant *getExceptionRethrowFn() { + llvm::FunctionCallee getExceptionRethrowFn() { // void objc_exception_rethrow(void) llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, false); return CGM.CreateRuntimeFunction(FTy, "objc_exception_rethrow"); } /// SyncEnterFn - LLVM object_sync_enter function. - llvm::Constant *getSyncEnterFn() { + llvm::FunctionCallee getSyncEnterFn() { // int objc_sync_enter (id) llvm::Type *args[] = { ObjectPtrTy }; llvm::FunctionType *FTy = @@ -451,7 +447,7 @@ public: } /// SyncExitFn - LLVM object_sync_exit function. - llvm::Constant *getSyncExitFn() { + llvm::FunctionCallee getSyncExitFn() { // int objc_sync_exit (id) llvm::Type *args[] = { ObjectPtrTy }; llvm::FunctionType *FTy = @@ -459,35 +455,35 @@ public: return CGM.CreateRuntimeFunction(FTy, "objc_sync_exit"); } - llvm::Constant *getSendFn(bool IsSuper) const { + llvm::FunctionCallee getSendFn(bool IsSuper) const { return IsSuper ? getMessageSendSuperFn() : getMessageSendFn(); } - llvm::Constant *getSendFn2(bool IsSuper) const { + llvm::FunctionCallee getSendFn2(bool IsSuper) const { return IsSuper ? getMessageSendSuperFn2() : getMessageSendFn(); } - llvm::Constant *getSendStretFn(bool IsSuper) const { + llvm::FunctionCallee getSendStretFn(bool IsSuper) const { return IsSuper ? getMessageSendSuperStretFn() : getMessageSendStretFn(); } - llvm::Constant *getSendStretFn2(bool IsSuper) const { + llvm::FunctionCallee getSendStretFn2(bool IsSuper) const { return IsSuper ? getMessageSendSuperStretFn2() : getMessageSendStretFn(); } - llvm::Constant *getSendFpretFn(bool IsSuper) const { + llvm::FunctionCallee getSendFpretFn(bool IsSuper) const { return IsSuper ? getMessageSendSuperFpretFn() : getMessageSendFpretFn(); } - llvm::Constant *getSendFpretFn2(bool IsSuper) const { + llvm::FunctionCallee getSendFpretFn2(bool IsSuper) const { return IsSuper ? getMessageSendSuperFpretFn2() : getMessageSendFpretFn(); } - llvm::Constant *getSendFp2retFn(bool IsSuper) const { + llvm::FunctionCallee getSendFp2retFn(bool IsSuper) const { return IsSuper ? getMessageSendSuperFn() : getMessageSendFp2retFn(); } - llvm::Constant *getSendFp2RetFn2(bool IsSuper) const { + llvm::FunctionCallee getSendFp2RetFn2(bool IsSuper) const { return IsSuper ? getMessageSendSuperFn2() : getMessageSendFp2retFn(); } @@ -553,7 +549,7 @@ public: llvm::StructType *ExceptionDataTy; /// ExceptionTryEnterFn - LLVM objc_exception_try_enter function. - llvm::Constant *getExceptionTryEnterFn() { + llvm::FunctionCallee getExceptionTryEnterFn() { llvm::Type *params[] = { ExceptionDataTy->getPointerTo() }; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.VoidTy, params, false), @@ -561,7 +557,7 @@ public: } /// ExceptionTryExitFn - LLVM objc_exception_try_exit function. - llvm::Constant *getExceptionTryExitFn() { + llvm::FunctionCallee getExceptionTryExitFn() { llvm::Type *params[] = { ExceptionDataTy->getPointerTo() }; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.VoidTy, params, false), @@ -569,7 +565,7 @@ public: } /// ExceptionExtractFn - LLVM objc_exception_extract function. - llvm::Constant *getExceptionExtractFn() { + llvm::FunctionCallee getExceptionExtractFn() { llvm::Type *params[] = { ExceptionDataTy->getPointerTo() }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, params, false), @@ -577,7 +573,7 @@ public: } /// ExceptionMatchFn - LLVM objc_exception_match function. - llvm::Constant *getExceptionMatchFn() { + llvm::FunctionCallee getExceptionMatchFn() { llvm::Type *params[] = { ClassPtrTy, ObjectPtrTy }; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.Int32Ty, params, false), @@ -585,7 +581,7 @@ public: } /// SetJmpFn - LLVM _setjmp function. - llvm::Constant *getSetJmpFn() { + llvm::FunctionCallee getSetJmpFn() { // This is specifically the prototype for x86. llvm::Type *params[] = { CGM.Int32Ty->getPointerTo() }; return CGM.CreateRuntimeFunction( @@ -671,7 +667,7 @@ public: // SuperMessageRefPtrTy - LLVM for struct _super_message_ref_t* llvm::PointerType *SuperMessageRefPtrTy; - llvm::Constant *getMessageSendFixupFn() { + llvm::FunctionCallee getMessageSendFixupFn() { // id objc_msgSend_fixup(id, struct message_ref_t*, ...) llvm::Type *params[] = { ObjectPtrTy, MessageRefPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, @@ -679,7 +675,7 @@ public: "objc_msgSend_fixup"); } - llvm::Constant *getMessageSendFpretFixupFn() { + llvm::FunctionCallee getMessageSendFpretFixupFn() { // id objc_msgSend_fpret_fixup(id, struct message_ref_t*, ...) llvm::Type *params[] = { ObjectPtrTy, MessageRefPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, @@ -687,7 +683,7 @@ public: "objc_msgSend_fpret_fixup"); } - llvm::Constant *getMessageSendStretFixupFn() { + llvm::FunctionCallee getMessageSendStretFixupFn() { // id objc_msgSend_stret_fixup(id, struct message_ref_t*, ...) llvm::Type *params[] = { ObjectPtrTy, MessageRefPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, @@ -695,7 +691,7 @@ public: "objc_msgSend_stret_fixup"); } - llvm::Constant *getMessageSendSuper2FixupFn() { + llvm::FunctionCallee getMessageSendSuper2FixupFn() { // id objc_msgSendSuper2_fixup (struct objc_super *, // struct _super_message_ref_t*, ...) llvm::Type *params[] = { SuperPtrTy, SuperMessageRefPtrTy }; @@ -704,7 +700,7 @@ public: "objc_msgSendSuper2_fixup"); } - llvm::Constant *getMessageSendSuper2StretFixupFn() { + llvm::FunctionCallee getMessageSendSuper2StretFixupFn() { // id objc_msgSendSuper2_stret_fixup(struct objc_super *, // struct _super_message_ref_t*, ...) llvm::Type *params[] = { SuperPtrTy, SuperMessageRefPtrTy }; @@ -713,13 +709,12 @@ public: "objc_msgSendSuper2_stret_fixup"); } - llvm::Constant *getObjCEndCatchFn() { + llvm::FunctionCallee getObjCEndCatchFn() { return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.VoidTy, false), "objc_end_catch"); - } - llvm::Constant *getObjCBeginCatchFn() { + llvm::FunctionCallee getObjCBeginCatchFn() { llvm::Type *params[] = { Int8PtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(Int8PtrTy, params, false), @@ -1325,15 +1320,15 @@ public: llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) override; - llvm::Constant *GetPropertyGetFunction() override; - llvm::Constant *GetPropertySetFunction() override; - llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) override; - llvm::Constant *GetGetStructFunction() override; - llvm::Constant *GetSetStructFunction() override; - llvm::Constant *GetCppAtomicObjectGetFunction() override; - llvm::Constant *GetCppAtomicObjectSetFunction() override; - llvm::Constant *EnumerationMutationFunction() override; + llvm::FunctionCallee GetPropertyGetFunction() override; + llvm::FunctionCallee GetPropertySetFunction() override; + llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) override; + llvm::FunctionCallee GetGetStructFunction() override; + llvm::FunctionCallee GetSetStructFunction() override; + llvm::FunctionCallee GetCppAtomicObjectGetFunction() override; + llvm::FunctionCallee GetCppAtomicObjectSetFunction() override; + llvm::FunctionCallee EnumerationMutationFunction() override; void EmitTryStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtTryStmt &S) override; @@ -1550,6 +1545,15 @@ private: return false; } + bool isClassLayoutKnownStatically(const ObjCInterfaceDecl *ID) { + // NSObject is a fixed size. If we can see the @implementation of a class + // which inherits from NSObject then we know that all it's offsets also must + // be fixed. FIXME: Can we do this if see a chain of super classes with + // implementations leading to NSObject? + return ID->getImplementation() && ID->getSuperClass() && + ID->getSuperClass()->getName() == "NSObject"; + } + public: CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm); @@ -1598,35 +1602,35 @@ public: llvm::Constant *GetEHType(QualType T) override; - llvm::Constant *GetPropertyGetFunction() override { + llvm::FunctionCallee GetPropertyGetFunction() override { return ObjCTypes.getGetPropertyFn(); } - llvm::Constant *GetPropertySetFunction() override { + llvm::FunctionCallee GetPropertySetFunction() override { return ObjCTypes.getSetPropertyFn(); } - llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) override { + llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) override { return ObjCTypes.getOptimizedSetPropertyFn(atomic, copy); } - llvm::Constant *GetSetStructFunction() override { + llvm::FunctionCallee GetSetStructFunction() override { return ObjCTypes.getCopyStructFn(); } - llvm::Constant *GetGetStructFunction() override { + llvm::FunctionCallee GetGetStructFunction() override { return ObjCTypes.getCopyStructFn(); } - llvm::Constant *GetCppAtomicObjectSetFunction() override { + llvm::FunctionCallee GetCppAtomicObjectSetFunction() override { return ObjCTypes.getCppAtomicObjectFunction(); } - llvm::Constant *GetCppAtomicObjectGetFunction() override { + llvm::FunctionCallee GetCppAtomicObjectGetFunction() override { return ObjCTypes.getCppAtomicObjectFunction(); } - llvm::Constant *EnumerationMutationFunction() override { + llvm::FunctionCallee EnumerationMutationFunction() override { return ObjCTypes.getEnumerationMutationFn(); } @@ -2004,9 +2008,8 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF, "objc_super"); llvm::Value *ReceiverAsObject = CGF.Builder.CreateBitCast(Receiver, ObjCTypes.ObjectPtrTy); - CGF.Builder.CreateStore( - ReceiverAsObject, - CGF.Builder.CreateStructGEP(ObjCSuper, 0, CharUnits::Zero())); + CGF.Builder.CreateStore(ReceiverAsObject, + CGF.Builder.CreateStructGEP(ObjCSuper, 0)); // If this is a class message the metaclass is passed as the target. llvm::Value *Target; @@ -2041,8 +2044,7 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF, llvm::Type *ClassTy = CGM.getTypes().ConvertType(CGF.getContext().getObjCClassType()); Target = CGF.Builder.CreateBitCast(Target, ClassTy); - CGF.Builder.CreateStore(Target, - CGF.Builder.CreateStructGEP(ObjCSuper, 1, CGF.getPointerSize())); + CGF.Builder.CreateStore(Target, CGF.Builder.CreateStructGEP(ObjCSuper, 1)); return EmitMessageSend(CGF, Return, ResultType, EmitSelector(CGF, Sel), ObjCSuper.getPointer(), ObjCTypes.SuperPtrCTy, @@ -2129,7 +2131,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, bool RequiresNullCheck = false; - llvm::Constant *Fn = nullptr; + llvm::FunctionCallee Fn = nullptr; if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) { if (ReceiverCanBeNull) RequiresNullCheck = true; Fn = (ObjCABI == 2) ? ObjCTypes.getSendStretFn2(IsSuper) @@ -2149,6 +2151,10 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, : ObjCTypes.getSendFn(IsSuper); } + // Cast function to proper signature + llvm::Constant *BitcastFn = cast<llvm::Constant>( + CGF.Builder.CreateBitCast(Fn.getCallee(), MSI.MessengerType)); + // We don't need to emit a null check to zero out an indirect result if the // result is ignored. if (Return.isUnused()) @@ -2169,16 +2175,15 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, nullReturn.init(CGF, Arg0); } - llvm::Instruction *CallSite; - Fn = llvm::ConstantExpr::getBitCast(Fn, MSI.MessengerType); - CGCallee Callee = CGCallee::forDirect(Fn); + llvm::CallBase *CallSite; + CGCallee Callee = CGCallee::forDirect(BitcastFn); RValue rvalue = CGF.EmitCall(MSI.CallInfo, Callee, Return, ActualArgs, &CallSite); // Mark the call as noreturn if the method is marked noreturn and the // receiver cannot be null. if (Method && Method->hasAttr<NoReturnAttr>() && !ReceiverCanBeNull) { - llvm::CallSite(CallSite).setDoesNotReturn(); + CallSite->setDoesNotReturn(); } return nullReturn.complete(CGF, Return, rvalue, ResultType, CallArgs, @@ -2954,7 +2959,7 @@ llvm::Value *CGObjCCommonMac::EmitClassRefViaRuntime( CodeGenFunction &CGF, const ObjCInterfaceDecl *ID, ObjCCommonTypesHelper &ObjCTypes) { - llvm::Constant *lookUpClassFn = ObjCTypes.getLookUpClassFn(); + llvm::FunctionCallee lookUpClassFn = ObjCTypes.getLookUpClassFn(); llvm::Value *className = CGF.CGM.GetAddrOfConstantCString(ID->getObjCRuntimeNameAsString()) @@ -4011,36 +4016,36 @@ llvm::Function *CGObjCMac::ModuleInitFunction() { return nullptr; } -llvm::Constant *CGObjCMac::GetPropertyGetFunction() { +llvm::FunctionCallee CGObjCMac::GetPropertyGetFunction() { return ObjCTypes.getGetPropertyFn(); } -llvm::Constant *CGObjCMac::GetPropertySetFunction() { +llvm::FunctionCallee CGObjCMac::GetPropertySetFunction() { return ObjCTypes.getSetPropertyFn(); } -llvm::Constant *CGObjCMac::GetOptimizedPropertySetFunction(bool atomic, - bool copy) { +llvm::FunctionCallee CGObjCMac::GetOptimizedPropertySetFunction(bool atomic, + bool copy) { return ObjCTypes.getOptimizedSetPropertyFn(atomic, copy); } -llvm::Constant *CGObjCMac::GetGetStructFunction() { +llvm::FunctionCallee CGObjCMac::GetGetStructFunction() { return ObjCTypes.getCopyStructFn(); } -llvm::Constant *CGObjCMac::GetSetStructFunction() { +llvm::FunctionCallee CGObjCMac::GetSetStructFunction() { return ObjCTypes.getCopyStructFn(); } -llvm::Constant *CGObjCMac::GetCppAtomicObjectGetFunction() { +llvm::FunctionCallee CGObjCMac::GetCppAtomicObjectGetFunction() { return ObjCTypes.getCppAtomicObjectFunction(); } -llvm::Constant *CGObjCMac::GetCppAtomicObjectSetFunction() { +llvm::FunctionCallee CGObjCMac::GetCppAtomicObjectSetFunction() { return ObjCTypes.getCppAtomicObjectFunction(); } -llvm::Constant *CGObjCMac::EnumerationMutationFunction() { +llvm::FunctionCallee CGObjCMac::EnumerationMutationFunction() { return ObjCTypes.getEnumerationMutationFn(); } @@ -4216,14 +4221,15 @@ void FragileHazards::emitHazardsInNewBlocks() { // Ignore instructions that aren't non-intrinsic calls. // These are the only calls that can possibly call longjmp. - if (!isa<llvm::CallInst>(I) && !isa<llvm::InvokeInst>(I)) continue; + if (!isa<llvm::CallInst>(I) && !isa<llvm::InvokeInst>(I)) + continue; if (isa<llvm::IntrinsicInst>(I)) continue; // Ignore call sites marked nounwind. This may be questionable, // since 'nounwind' doesn't necessarily mean 'does not call longjmp'. - llvm::CallSite CS(&I); - if (CS.doesNotThrow()) continue; + if (cast<llvm::CallBase>(I).doesNotThrow()) + continue; // Insert a read hazard before the call. This will ensure that // any writes to the locals are performed before making the @@ -6253,9 +6259,11 @@ CGObjCNonFragileABIMac::BuildClassObject(const ObjCInterfaceDecl *CI, return GV; } -bool -CGObjCNonFragileABIMac::ImplementationIsNonLazy(const ObjCImplDecl *OD) const { - return OD->getClassMethod(GetNullarySelector("load")) != nullptr; +bool CGObjCNonFragileABIMac::ImplementationIsNonLazy( + const ObjCImplDecl *OD) const { + return OD->getClassMethod(GetNullarySelector("load")) != nullptr || + OD->getClassInterface()->hasAttr<ObjCNonLazyClassAttr>() || + OD->hasAttr<ObjCNonLazyClassAttr>(); } void CGObjCNonFragileABIMac::GetClassSizeInfo(const ObjCImplementationDecl *OID, @@ -6702,6 +6710,12 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID, IvarOffsetGV->setVisibility(llvm::GlobalValue::DefaultVisibility); } + // If ID's layout is known, then make the global constant. This serves as a + // useful assertion: we'll never use this variable to calculate ivar offsets, + // so if the runtime tries to patch it then we should crash. + if (isClassLayoutKnownStatically(ID)) + IvarOffsetGV->setConstant(true); + if (CGM.getTriple().isOSBinFormatMachO()) IvarOffsetGV->setSection("__DATA, __objc_ivar"); return IvarOffsetGV; @@ -6796,7 +6810,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef( // reference or not. At module finalization we add the empty // contents for protocols which were referenced but never defined. llvm::SmallString<64> Protocol; - llvm::raw_svector_ostream(Protocol) << "\01l_OBJC_PROTOCOL_$_" + llvm::raw_svector_ostream(Protocol) << "_OBJC_PROTOCOL_$_" << PD->getObjCRuntimeNameAsString(); Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolnfABITy, @@ -6888,7 +6902,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( } else { llvm::SmallString<64> symbolName; llvm::raw_svector_ostream(symbolName) - << "\01l_OBJC_PROTOCOL_$_" << PD->getObjCRuntimeNameAsString(); + << "_OBJC_PROTOCOL_$_" << PD->getObjCRuntimeNameAsString(); Entry = values.finishAndCreateGlobal(symbolName, CGM.getPointerAlign(), /*constant*/ false, @@ -6904,7 +6918,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( // Use this protocol meta-data to build protocol list table in section // __DATA, __objc_protolist llvm::SmallString<64> ProtocolRef; - llvm::raw_svector_ostream(ProtocolRef) << "\01l_OBJC_LABEL_PROTOCOL_$_" + llvm::raw_svector_ostream(ProtocolRef) << "_OBJC_LABEL_PROTOCOL_$_" << PD->getObjCRuntimeNameAsString(); llvm::GlobalVariable *PTGV = @@ -6990,17 +7004,24 @@ LValue CGObjCNonFragileABIMac::EmitObjCValueForIvar( Offset); } -llvm::Value *CGObjCNonFragileABIMac::EmitIvarOffset( - CodeGen::CodeGenFunction &CGF, - const ObjCInterfaceDecl *Interface, - const ObjCIvarDecl *Ivar) { - llvm::Value *IvarOffsetValue = ObjCIvarOffsetVariable(Interface, Ivar); - IvarOffsetValue = CGF.Builder.CreateAlignedLoad(IvarOffsetValue, - CGF.getSizeAlign(), "ivar"); - if (IsIvarOffsetKnownIdempotent(CGF, Ivar)) - cast<llvm::LoadInst>(IvarOffsetValue) - ->setMetadata(CGM.getModule().getMDKindID("invariant.load"), - llvm::MDNode::get(VMContext, None)); +llvm::Value * +CGObjCNonFragileABIMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF, + const ObjCInterfaceDecl *Interface, + const ObjCIvarDecl *Ivar) { + llvm::Value *IvarOffsetValue; + if (isClassLayoutKnownStatically(Interface)) { + IvarOffsetValue = llvm::ConstantInt::get( + ObjCTypes.IvarOffsetVarTy, + ComputeIvarBaseOffset(CGM, Interface->getImplementation(), Ivar)); + } else { + llvm::GlobalVariable *GV = ObjCIvarOffsetVariable(Interface, Ivar); + IvarOffsetValue = + CGF.Builder.CreateAlignedLoad(GV, CGF.getSizeAlign(), "ivar"); + if (IsIvarOffsetKnownIdempotent(CGF, Ivar)) + cast<llvm::LoadInst>(IvarOffsetValue) + ->setMetadata(CGM.getModule().getMDKindID("invariant.load"), + llvm::MDNode::get(VMContext, None)); + } // This could be 32bit int or 64bit integer depending on the architecture. // Cast it to 64bit integer value, if it is a 32bit integer ivar offset value @@ -7069,7 +7090,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, // The runtime currently never uses vtable dispatch for anything // except normal, non-super message-sends. // FIXME: don't use this for that. - llvm::Constant *fn = nullptr; + llvm::FunctionCallee fn = nullptr; std::string messageRefName("\01l_"); if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) { if (isSuper) { @@ -7105,7 +7126,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, // Build the message ref structure. ConstantInitBuilder builder(CGM); auto values = builder.beginStruct(); - values.add(fn); + values.add(cast<llvm::Constant>(fn.getCallee())); values.add(GetMethodVarName(selector)); messageRef = values.finishAndCreateGlobal(messageRefName, CharUnits::fromQuantity(16), @@ -7134,8 +7155,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, args[1].setRValue(RValue::get(mref.getPointer())); // Load the function to call from the message ref table. - Address calleeAddr = - CGF.Builder.CreateStructGEP(mref, 0, CharUnits::Zero()); + Address calleeAddr = CGF.Builder.CreateStructGEP(mref, 0); llvm::Value *calleePtr = CGF.Builder.CreateLoad(calleeAddr, "msgSend_fn"); calleePtr = CGF.Builder.CreateBitCast(calleePtr, MSI.MessengerType); @@ -7332,9 +7352,8 @@ CGObjCNonFragileABIMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF, llvm::Value *ReceiverAsObject = CGF.Builder.CreateBitCast(Receiver, ObjCTypes.ObjectPtrTy); - CGF.Builder.CreateStore( - ReceiverAsObject, - CGF.Builder.CreateStructGEP(ObjCSuper, 0, CharUnits::Zero())); + CGF.Builder.CreateStore(ReceiverAsObject, + CGF.Builder.CreateStructGEP(ObjCSuper, 0)); // If this is a class message the metaclass is passed as the target. llvm::Value *Target; @@ -7348,8 +7367,7 @@ CGObjCNonFragileABIMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF, llvm::Type *ClassTy = CGM.getTypes().ConvertType(CGF.getContext().getObjCClassType()); Target = CGF.Builder.CreateBitCast(Target, ClassTy); - CGF.Builder.CreateStore( - Target, CGF.Builder.CreateStructGEP(ObjCSuper, 1, CGF.getPointerSize())); + CGF.Builder.CreateStore(Target, CGF.Builder.CreateStructGEP(ObjCSuper, 1)); return (isVTableDispatchedSelector(Sel)) ? EmitVTableMessageSend(CGF, Return, ResultType, Sel, @@ -7509,9 +7527,8 @@ void CGObjCNonFragileABIMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF, void CGObjCNonFragileABIMac::EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtSynchronizedStmt &S) { - EmitAtSynchronizedStmt(CGF, S, - cast<llvm::Function>(ObjCTypes.getSyncEnterFn()), - cast<llvm::Function>(ObjCTypes.getSyncExitFn())); + EmitAtSynchronizedStmt(CGF, S, ObjCTypes.getSyncEnterFn(), + ObjCTypes.getSyncExitFn()); } llvm::Constant * @@ -7542,10 +7559,9 @@ CGObjCNonFragileABIMac::GetEHType(QualType T) { void CGObjCNonFragileABIMac::EmitTryStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtTryStmt &S) { - EmitTryCatchStmt(CGF, S, - cast<llvm::Function>(ObjCTypes.getObjCBeginCatchFn()), - cast<llvm::Function>(ObjCTypes.getObjCEndCatchFn()), - cast<llvm::Function>(ObjCTypes.getExceptionRethrowFn())); + EmitTryCatchStmt(CGF, S, ObjCTypes.getObjCBeginCatchFn(), + ObjCTypes.getObjCEndCatchFn(), + ObjCTypes.getExceptionRethrowFn()); } /// EmitThrowStmt - Generate code for a throw statement. @@ -7555,11 +7571,13 @@ void CGObjCNonFragileABIMac::EmitThrowStmt(CodeGen::CodeGenFunction &CGF, if (const Expr *ThrowExpr = S.getThrowExpr()) { llvm::Value *Exception = CGF.EmitObjCThrowOperand(ThrowExpr); Exception = CGF.Builder.CreateBitCast(Exception, ObjCTypes.ObjectPtrTy); - CGF.EmitRuntimeCallOrInvoke(ObjCTypes.getExceptionThrowFn(), Exception) - .setDoesNotReturn(); + llvm::CallBase *Call = + CGF.EmitRuntimeCallOrInvoke(ObjCTypes.getExceptionThrowFn(), Exception); + Call->setDoesNotReturn(); } else { - CGF.EmitRuntimeCallOrInvoke(ObjCTypes.getExceptionRethrowFn()) - .setDoesNotReturn(); + llvm::CallBase *Call = + CGF.EmitRuntimeCallOrInvoke(ObjCTypes.getExceptionRethrowFn()); + Call->setDoesNotReturn(); } CGF.Builder.CreateUnreachable(); diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp index 4b6f24a03f..f8b831d0e9 100644 --- a/lib/CodeGen/CGObjCRuntime.cpp +++ b/lib/CodeGen/CGObjCRuntime.cpp @@ -1,9 +1,8 @@ //==- CGObjCRuntime.cpp - Interface to Shared Objective-C Runtime Features ==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -22,7 +21,6 @@ #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtObjC.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/Support/SaveAndRestore.h" using namespace clang; @@ -127,10 +125,10 @@ namespace { }; struct CallObjCEndCatch final : EHScopeStack::Cleanup { - CallObjCEndCatch(bool MightThrow, llvm::Value *Fn) + CallObjCEndCatch(bool MightThrow, llvm::FunctionCallee Fn) : MightThrow(MightThrow), Fn(Fn) {} bool MightThrow; - llvm::Value *Fn; + llvm::FunctionCallee Fn; void Emit(CodeGenFunction &CGF, Flags flags) override { if (MightThrow) @@ -141,12 +139,11 @@ namespace { }; } - void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S, - llvm::Constant *beginCatchFn, - llvm::Constant *endCatchFn, - llvm::Constant *exceptionRethrowFn) { + llvm::FunctionCallee beginCatchFn, + llvm::FunctionCallee endCatchFn, + llvm::FunctionCallee exceptionRethrowFn) { // Jump destination for falling out of catch bodies. CodeGenFunction::JumpDest Cont; if (S.getNumCatchStmts()) @@ -313,10 +310,10 @@ void CGObjCRuntime::EmitInitOfCatchParam(CodeGenFunction &CGF, namespace { struct CallSyncExit final : EHScopeStack::Cleanup { - llvm::Value *SyncExitFn; + llvm::FunctionCallee SyncExitFn; llvm::Value *SyncArg; - CallSyncExit(llvm::Value *SyncExitFn, llvm::Value *SyncArg) - : SyncExitFn(SyncExitFn), SyncArg(SyncArg) {} + CallSyncExit(llvm::FunctionCallee SyncExitFn, llvm::Value *SyncArg) + : SyncExitFn(SyncExitFn), SyncArg(SyncArg) {} void Emit(CodeGenFunction &CGF, Flags flags) override { CGF.EmitNounwindRuntimeCall(SyncExitFn, SyncArg); @@ -326,8 +323,8 @@ namespace { void CGObjCRuntime::EmitAtSynchronizedStmt(CodeGenFunction &CGF, const ObjCAtSynchronizedStmt &S, - llvm::Function *syncEnterFn, - llvm::Function *syncExitFn) { + llvm::FunctionCallee syncEnterFn, + llvm::FunctionCallee syncExitFn) { CodeGenFunction::RunCleanupsScope cleanups(CGF); // Evaluate the lock operand. This is guaranteed to dominate the diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h index fa16c198ad..471816cb59 100644 --- a/lib/CodeGen/CGObjCRuntime.h +++ b/lib/CodeGen/CGObjCRuntime.h @@ -1,9 +1,8 @@ //===----- CGObjCRuntime.h - Interface to ObjC Runtimes ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -96,11 +95,10 @@ protected: /// used to rethrow exceptions. If the begin and end catch functions are /// NULL, then the function assumes that the EH personality function provides /// the thrown object directly. - void EmitTryCatchStmt(CodeGenFunction &CGF, - const ObjCAtTryStmt &S, - llvm::Constant *beginCatchFn, - llvm::Constant *endCatchFn, - llvm::Constant *exceptionRethrowFn); + void EmitTryCatchStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S, + llvm::FunctionCallee beginCatchFn, + llvm::FunctionCallee endCatchFn, + llvm::FunctionCallee exceptionRethrowFn); void EmitInitOfCatchParam(CodeGenFunction &CGF, llvm::Value *exn, const VarDecl *paramDecl); @@ -110,9 +108,9 @@ protected: /// the object. This function can be called by subclasses that use /// zero-cost exception handling. void EmitAtSynchronizedStmt(CodeGenFunction &CGF, - const ObjCAtSynchronizedStmt &S, - llvm::Function *syncEnterFn, - llvm::Function *syncExitFn); + const ObjCAtSynchronizedStmt &S, + llvm::FunctionCallee syncEnterFn, + llvm::FunctionCallee syncExitFn); public: virtual ~CGObjCRuntime(); @@ -208,25 +206,25 @@ public: const ObjCContainerDecl *CD) = 0; /// Return the runtime function for getting properties. - virtual llvm::Constant *GetPropertyGetFunction() = 0; + virtual llvm::FunctionCallee GetPropertyGetFunction() = 0; /// Return the runtime function for setting properties. - virtual llvm::Constant *GetPropertySetFunction() = 0; + virtual llvm::FunctionCallee GetPropertySetFunction() = 0; /// Return the runtime function for optimized setting properties. - virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) = 0; + virtual llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) = 0; // API for atomic copying of qualified aggregates in getter. - virtual llvm::Constant *GetGetStructFunction() = 0; + virtual llvm::FunctionCallee GetGetStructFunction() = 0; // API for atomic copying of qualified aggregates in setter. - virtual llvm::Constant *GetSetStructFunction() = 0; + virtual llvm::FunctionCallee GetSetStructFunction() = 0; /// API for atomic copying of qualified aggregates with non-trivial copy /// assignment (c++) in setter. - virtual llvm::Constant *GetCppAtomicObjectSetFunction() = 0; + virtual llvm::FunctionCallee GetCppAtomicObjectSetFunction() = 0; /// API for atomic copying of qualified aggregates with non-trivial copy /// assignment (c++) in getter. - virtual llvm::Constant *GetCppAtomicObjectGetFunction() = 0; + virtual llvm::FunctionCallee GetCppAtomicObjectGetFunction() = 0; /// GetClass - Return a reference to the class for the given /// interface decl. @@ -240,7 +238,7 @@ public: /// EnumerationMutationFunction - Return the function that's called by the /// compiler when a mutation is detected during foreach iteration. - virtual llvm::Constant *EnumerationMutationFunction() = 0; + virtual llvm::FunctionCallee EnumerationMutationFunction() = 0; virtual void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtSynchronizedStmt &S) = 0; diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp index 7f6f595dd5..191a95c629 100644 --- a/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/lib/CodeGen/CGOpenCLRuntime.cpp @@ -1,9 +1,8 @@ //===----- CGOpenCLRuntime.cpp - Interface to OpenCL Runtimes -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -123,6 +122,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); } +// Get the block literal from an expression derived from the block expression. +// OpenCL v2.0 s6.12.5: +// Block variable declarations are implicitly qualified with const. Therefore +// all block variables must be initialized at declaration time and may not be +// reassigned. +static const BlockExpr *getBlockExpr(const Expr *E) { + const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop. + while(!isa<BlockExpr>(E) && E != Prev) { + Prev = E; + E = E->IgnoreCasts(); + if (auto DR = dyn_cast<DeclRefExpr>(E)) { + E = cast<VarDecl>(DR->getDecl())->getInit(); + } + } + return cast<BlockExpr>(E); +} + /// Record emitted llvm invoke function and llvm block literal for the /// corresponding block expression. void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, @@ -137,20 +153,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, EnqueuedBlockMap[E].Kernel = nullptr; } +llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { + return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc; +} + CGOpenCLRuntime::EnqueuedBlockInfo CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { CGF.EmitScalarExpr(E); // The block literal may be assigned to a const variable. Chasing down // to get the block literal. - if (auto DR = dyn_cast<DeclRefExpr>(E)) { - E = cast<VarDecl>(DR->getDecl())->getInit(); - } - E = E->IgnoreImplicit(); - if (auto Cast = dyn_cast<CastExpr>(E)) { - E = Cast->getSubExpr(); - } - auto *Block = cast<BlockExpr>(E); + const BlockExpr *Block = getBlockExpr(E); assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && "Block expression not emitted"); diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h index 750721f1b8..3f7aa9b0d8 100644 --- a/lib/CodeGen/CGOpenCLRuntime.h +++ b/lib/CodeGen/CGOpenCLRuntime.h @@ -1,9 +1,8 @@ //===----- CGOpenCLRuntime.h - Interface to OpenCL Runtimes -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -92,6 +91,10 @@ public: /// \param Block block literal emitted for the block expression. void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, llvm::Value *Block); + + /// \return LLVM block invoke function emitted for an expression derived from + /// the block expression. + llvm::Function *getInvokeFunction(const Expr *E); }; } diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 20eb0b29f4..c3f60d7f60 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1,9 +1,8 @@ //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -22,7 +21,6 @@ #include "clang/Basic/BitmaskEnum.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" @@ -432,7 +430,7 @@ public: /// Values for bit flags used in the ident_t to describe the fields. /// All enumeric elements are named and described in accordance with the code -/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h +/// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h enum OpenMPLocationFlags : unsigned { /// Use trampoline for internal microtask. OMP_IDENT_IMD = 0x01, @@ -461,7 +459,7 @@ enum OpenMPLocationFlags : unsigned { /// Describes ident structure that describes a source location. /// All descriptions are taken from -/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h +/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h /// Original structure: /// typedef struct ident { /// kmp_int32 reserved_1; /**< might be used in Fortran; @@ -669,6 +667,10 @@ enum OpenMPRTLFunction { // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); OMPRTL__kmpc_task_reduction_get_th_data, + // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); + OMPRTL__kmpc_alloc, + // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); + OMPRTL__kmpc_free, // // Offloading related calls @@ -1340,7 +1342,7 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { return UDRMap.lookup(D); } -static llvm::Value *emitParallelOrTeamsOutlinedFunction( +static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { @@ -1370,7 +1372,7 @@ static llvm::Value *emitParallelOrTeamsOutlinedFunction( return CGF.GenerateOpenMPCapturedStmtFunction(*CS); } -llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( +llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); @@ -1378,7 +1380,7 @@ llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); } -llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( +llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); @@ -1386,7 +1388,7 @@ llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); } -llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( +llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -1417,7 +1419,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( InnermostKind, TD ? TD->hasCancel() : false, Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS); + llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); if (!Tied) NumberOfParts = Action.getNumberOfParts(); return Res; @@ -1478,7 +1480,7 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { // Initialize default location for psource field of ident_t structure of // all ident_t objects. Format is ";file;function;line;column;;". // Taken from - // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c + // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp DefaultOpenMPPSource = CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); DefaultOpenMPPSource = @@ -1665,9 +1667,8 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { return llvm::PointerType::getUnqual(Kmpc_MicroTy); } -llvm::Constant * -CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { - llvm::Constant *RTLFn = nullptr; +llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { + llvm::FunctionCallee RTLFn = nullptr; switch (static_cast<OpenMPRTLFunction>(Function)) { case OMPRTL__kmpc_fork_call: { // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro @@ -1677,6 +1678,22 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); + if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { + if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { + llvm::LLVMContext &Ctx = F->getContext(); + llvm::MDBuilder MDB(Ctx); + // Annotate the callback behavior of the __kmpc_fork_call: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the __kmpc_fork_call are passed to the + // callback callee. + F->addMetadata( + llvm::LLVMContext::MD_callback, + *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( + 2, {-1, -1}, + /* VarArgsArePassed */ true)})); + } + } break; } case OMPRTL__kmpc_global_thread_num: { @@ -2084,6 +2101,22 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); + if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { + if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { + llvm::LLVMContext &Ctx = F->getContext(); + llvm::MDBuilder MDB(Ctx); + // Annotate the callback behavior of the __kmpc_fork_teams: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the __kmpc_fork_teams are passed to the + // callback callee. + F->addMetadata( + llvm::LLVMContext::MD_callback, + *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( + 2, {-1, -1}, + /* VarArgsArePassed */ true)})); + } + } break; } case OMPRTL__kmpc_taskloop: { @@ -2166,6 +2199,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); break; } + case OMPRTL__kmpc_alloc: { + // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t + // al); omp_allocator_handle_t type is void *. + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); + break; + } + case OMPRTL__kmpc_free: { + // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t + // al); omp_allocator_handle_t type is void *. + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); + break; + } case OMPRTL__kmpc_push_target_tripcount: { // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 // size); @@ -2355,8 +2406,8 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { return RTLFn; } -llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, - bool IVSigned) { +llvm::FunctionCallee +CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" @@ -2381,8 +2432,8 @@ llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, return CGM.CreateRuntimeFunction(FnTy, Name); } -llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, - bool IVSigned) { +llvm::FunctionCallee +CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = @@ -2403,8 +2454,8 @@ llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, return CGM.CreateRuntimeFunction(FnTy, Name); } -llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, - bool IVSigned) { +llvm::FunctionCallee +CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = @@ -2420,8 +2471,8 @@ llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, return CGM.CreateRuntimeFunction(FnTy, Name); } -llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, - bool IVSigned) { +llvm::FunctionCallee +CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = @@ -2836,7 +2887,7 @@ void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, } void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { if (!CGF.HaveInsertPoint()) @@ -2854,7 +2905,8 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); + llvm::FunctionCallee RTLFn = + RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, @@ -2915,9 +2967,8 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, return ThreadIDTemp; } -llvm::Constant * -CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, - const llvm::Twine &Name) { +llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( + llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << Name; @@ -2932,7 +2983,8 @@ CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, return Elem.second = new llvm::GlobalVariable( CGM.getModule(), Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), - Elem.first()); + Elem.first(), /*InsertBefore=*/nullptr, + llvm::GlobalValue::NotThreadLocal, AddressSpace); } llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { @@ -2944,17 +2996,18 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { namespace { /// Common pre(post)-action for different OpenMP constructs. class CommonActionTy final : public PrePostActionTy { - llvm::Value *EnterCallee; + llvm::FunctionCallee EnterCallee; ArrayRef<llvm::Value *> EnterArgs; - llvm::Value *ExitCallee; + llvm::FunctionCallee ExitCallee; ArrayRef<llvm::Value *> ExitArgs; bool Conditional; llvm::BasicBlock *ContBlock = nullptr; public: - CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, - llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, - bool Conditional = false) + CommonActionTy(llvm::FunctionCallee EnterCallee, + ArrayRef<llvm::Value *> EnterArgs, + llvm::FunctionCallee ExitCallee, + ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), ExitArgs(ExitArgs), Conditional(Conditional) {} void Enter(CodeGenFunction &CGF) override { @@ -3059,8 +3112,7 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var) { // Pull out the pointer to the variable. - Address PtrAddr = - CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); + Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); @@ -3176,8 +3228,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, Address CopyprivateList = CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { - Address Elem = CGF.Builder.CreateConstArrayGEP( - CopyprivateList, I, CGF.getPointerSize()); + Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), @@ -3241,6 +3292,24 @@ unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { return Flags; } +void CGOpenMPRuntime::getDefaultScheduleAndChunk( + CodeGenFunction &CGF, const OMPLoopDirective &S, + OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { + // Check if the loop directive is actually a doacross loop directive. In this + // case choose static, 1 schedule. + if (llvm::any_of( + S.getClausesOfKind<OMPOrderedClause>(), + [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { + ScheduleKind = OMPC_SCHEDULE_static; + // Chunk size is 1 in this case. + llvm::APInt ChunkSize(32, 1); + ChunkExpr = IntegerLiteral::Create( + CGF.getContext(), ChunkSize, + CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); + } +} + void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, bool ForceSimpleCall) { @@ -3412,7 +3481,7 @@ void CGOpenMPRuntime::emitForDispatchInit( static void emitForStaticInitCall( CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, - llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, + llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values) { if (!CGF.HaveInsertPoint()) @@ -3473,7 +3542,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, ? OMP_IDENT_WORK_LOOP : OMP_IDENT_WORK_SECTIONS); llvm::Value *ThreadId = getThreadID(CGF, Loc); - llvm::Constant *StaticInitFunction = + llvm::FunctionCallee StaticInitFunction = createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); @@ -3488,7 +3557,7 @@ void CGOpenMPRuntime::emitDistributeStaticInit( llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); llvm::Value *ThreadId = getThreadID(CGF, Loc); - llvm::Constant *StaticInitFunction = + llvm::FunctionCallee StaticInitFunction = createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, @@ -3731,14 +3800,29 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: "Entry not initialized!"); assert((!Entry.getAddress() || Entry.getAddress() == Addr) && "Resetting with the new address."); - if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) + if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { + if (Entry.getVarSize().isZero()) { + Entry.setVarSize(VarSize); + Entry.setLinkage(Linkage); + } return; - Entry.setAddress(Addr); + } Entry.setVarSize(VarSize); Entry.setLinkage(Linkage); + Entry.setAddress(Addr); } else { - if (hasDeviceGlobalVarEntryInfo(VarName)) + if (hasDeviceGlobalVarEntryInfo(VarName)) { + auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; + assert(Entry.isValid() && Entry.getFlags() == Flags && + "Entry not initialized!"); + assert((!Entry.getAddress() || Entry.getAddress() == Addr) && + "Resetting with the new address."); + if (Entry.getVarSize().isZero()) { + Entry.setVarSize(VarSize); + Entry.setLinkage(Linkage); + } return; + } OffloadEntriesDeviceGlobalVar.try_emplace( VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); ++OffloadingEntriesNum; @@ -4364,12 +4448,12 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// return 0; /// } /// \endcode -static llvm::Value * +static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, - QualType SharedsPtrTy, llvm::Value *TaskFunction, + QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap) { ASTContext &C = CGM.getContext(); FunctionArgList Args; @@ -4614,11 +4698,6 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, return TaskPrivatesMap; } -static bool stable_sort_comparator(const PrivateDataTy P1, - const PrivateDataTy P2) { - return P1.first > P2.first; -} - /// Emit initialization for private variables in task-based directives. static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, @@ -4661,7 +4740,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Check if the variable is the target-based BasePointersArray, // PointersArray or SizesArray. LValue SharedRefLValue; - QualType Type = OriginalVD->getType(); + QualType Type = PrivateLValue.getType(); const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); if (IsTargetTask && !SharedField) { assert(isa<ImplicitParamDecl>(OriginalVD) && @@ -4837,7 +4916,7 @@ checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { CGOpenMPRuntime::TaskResultTy CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - llvm::Value *TaskFunction, QualType SharedsTy, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data) { ASTContext &C = CGM.getContext(); llvm::SmallVector<PrivateDataTy, 4> Privates; @@ -4872,7 +4951,9 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, /*PrivateElemInit=*/nullptr)); ++I; } - std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); + llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { + return L.first > R.first; + }); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); @@ -4911,7 +4992,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // Emit initial values for private copies (if any). llvm::Value *TaskPrivatesMap = nullptr; llvm::Type *TaskPrivatesMapTy = - std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); + std::next(TaskFunction->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); TaskPrivatesMap = emitTaskPrivateMappingFunction( @@ -4925,7 +5006,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, // kmp_task_t *tt); - llvm::Value *TaskEntry = emitProxyTaskFunction( + llvm::Function *TaskEntry = emitProxyTaskFunction( CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); @@ -4934,7 +5015,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); // Task flags. Format is taken from - // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, + // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, // description of kmp_tasking_flags struct. enum { TiedFlag = 0x1, @@ -5037,7 +5118,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - llvm::Value *TaskFunction, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) { @@ -5047,7 +5128,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, TaskResultTy Result = emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); llvm::Value *NewTask = Result.NewTask; - llvm::Value *TaskEntry = Result.TaskEntry; + llvm::Function *TaskEntry = Result.TaskEntry; llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; LValue TDBase = Result.TDBase; const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; @@ -5057,7 +5138,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, unsigned NumDependencies = Data.Dependences.size(); if (NumDependencies) { // Dependence kind for RTL. - enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; + enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; RecordDecl *KmpDependInfoRD; QualType FlagsTy = @@ -5074,7 +5155,6 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, } else { KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); } - CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); // Define type kmp_depend_info[<Dependences.size()>]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), @@ -5101,7 +5181,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, Size = CGF.getTypeSize(Ty); } LValue Base = CGF.MakeAddrLValue( - CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize), + CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), KmpDependInfoTy); // deps[i].base_addr = &<Dependences[i].second>; LValue BaseAddrLVal = CGF.EmitLValueForField( @@ -5124,6 +5204,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, case OMPC_DEPEND_inout: DepKind = DepInOut; break; + case OMPC_DEPEND_mutexinoutset: + DepKind = DepMutexInOutSet; + break; case OMPC_DEPEND_source: case OMPC_DEPEND_sink: case OMPC_DEPEND_unknown: @@ -5135,8 +5218,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, FlagsLVal); } DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), - CGF.VoidPtrTy); + CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); } // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() @@ -5231,7 +5313,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, - llvm::Value *TaskFunction, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) { @@ -5411,10 +5493,10 @@ static void emitReductionCombiner(CodeGenFunction &CGF, CGF.EmitIgnoredExpr(ReductionOp); } -llvm::Value *CGOpenMPRuntime::emitReductionFunction( - CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, - ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, - ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { +llvm::Function *CGOpenMPRuntime::emitReductionFunction( + SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps) { ASTContext &C = CGM.getContext(); // void reduction_func(void *LHSArg, void *RHSArg); @@ -5466,8 +5548,7 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction( if (PrivTy->isVariablyModifiedType()) { // Get array size and emit VLA type. ++Idx; - Address Elem = - CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); + Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); const VariableArrayType *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); @@ -5605,8 +5686,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, auto IPriv = Privates.begin(); unsigned Idx = 0; for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { - Address Elem = - CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), @@ -5614,8 +5694,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. ++Idx; - Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, - CGF.getPointerSize()); + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); llvm::Value *Size = CGF.Builder.CreateIntCast( CGF.getVLASize( CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) @@ -5627,9 +5706,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } // 2. Emit reduce_func(). - llvm::Value *ReductionFn = emitReductionFunction( - CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), - Privates, LHSExprs, RHSExprs, ReductionOps); + llvm::Function *ReductionFn = emitReductionFunction( + Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, + LHSExprs, RHSExprs, ReductionOps); // 3. Create static kmp_critical_name lock = { 0 }; std::string Name = getName({"reduction"}); @@ -6393,12 +6472,59 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); } -/// discard all CompoundStmts intervening between two constructs -static const Stmt *ignoreCompoundStmts(const Stmt *Body) { - while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) - Body = CS->body_front(); +/// Checks if the expression is constant or does not have non-trivial function +/// calls. +static bool isTrivial(ASTContext &Ctx, const Expr * E) { + // We can skip constant expressions. + // We can skip expressions with trivial calls or simple expressions. + return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || + !E->hasNonTrivialCall(Ctx)) && + !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); +} - return Body; +const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, + const Stmt *Body) { + const Stmt *Child = Body->IgnoreContainers(); + while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { + Child = nullptr; + for (const Stmt *S : C->body()) { + if (const auto *E = dyn_cast<Expr>(S)) { + if (isTrivial(Ctx, E)) + continue; + } + // Some of the statements can be ignored. + if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || + isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) + continue; + // Analyze declarations. + if (const auto *DS = dyn_cast<DeclStmt>(S)) { + if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { + if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || + isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || + isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || + isa<UsingDirectiveDecl>(D) || + isa<OMPDeclareReductionDecl>(D) || + isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) + return true; + const auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + return false; + return VD->isConstexpr() || + ((VD->getType().isTrivialType(Ctx) || + VD->getType()->isReferenceType()) && + (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); + })) + continue; + } + // Found multiple children - cannot get the one child only. + if (Child) + return nullptr; + Child = S; + } + if (Child) + Child = Child->IgnoreContainers(); + } + return Child; } /// Emit the number of teams for a target directive. Inspect the num_teams @@ -6410,63 +6536,208 @@ static const Stmt *ignoreCompoundStmts(const Stmt *Body) { /// /// Otherwise, return nullptr. static llvm::Value * -emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, +emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " - "teams directive expected to be " - "emitted only for the host!"); - + assert(!CGF.getLangOpts().OpenMPIsDevice && + "Clauses associated with the teams directive expected to be emitted " + "only for the host!"); + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + assert(isOpenMPTargetExecutionDirective(DirectiveKind) && + "Expected target-based executable directive."); CGBuilderTy &Bld = CGF.Builder; - - // If the target directive is combined with a teams directive: - // Return the value in the num_teams clause, if any. - // Otherwise, return 0 to denote the runtime default. - if (isOpenMPTeamsDirective(D.getDirectiveKind())) { - if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { + switch (DirectiveKind) { + case OMPD_target: { + const auto *CS = D.getInnermostCapturedStmt(); + const auto *Body = + CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); + const Stmt *ChildStmt = + CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); + if (const auto *NestedDir = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { + if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { + if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const Expr *NumTeams = + NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); + llvm::Value *NumTeamsVal = + CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, + /*IsSigned=*/true); + } + return Bld.getInt32(0); + } + if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || + isOpenMPSimdDirective(NestedDir->getDirectiveKind())) + return Bld.getInt32(1); + return Bld.getInt32(0); + } + return nullptr; + } + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: { + if (D.hasClausesOfKind<OMPNumTeamsClause>()) { CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); - llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), - /*IgnoreResultAssign*/ true); - return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, + const Expr *NumTeams = + D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); + llvm::Value *NumTeamsVal = + CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, /*IsSigned=*/true); } - - // The default value is 0. return Bld.getInt32(0); } - - // If the target directive is combined with a parallel directive but not a - // teams directive, start one team. - if (isOpenMPParallelDirective(D.getDirectiveKind())) + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_simd: return Bld.getInt32(1); - - // If the current target region has a teams region enclosed, we need to get - // the number of teams to pass to the runtime function call. This is done - // by generating the expression in a inlined region. This is required because - // the expression is captured in the enclosing target environment when the - // teams directive is not combined with target. - - const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - - if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( - ignoreCompoundStmts(CS.getCapturedStmt()))) { - if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { - if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, CS); + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + break; + } + llvm_unreachable("Unexpected directive kind."); +} + +static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, + llvm::Value *DefaultThreadLimitVal) { + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { + if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { + llvm::Value *NumThreads = nullptr; + llvm::Value *CondVal = nullptr; + // Handle if clause. If if clause present, the number of threads is + // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. + if (Dir->hasClausesOfKind<OMPIfClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); - return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, - /*IsSigned=*/true); + const OMPIfClause *IfClause = nullptr; + for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_parallel) { + IfClause = C; + break; + } + } + if (IfClause) { + const Expr *Cond = IfClause->getCondition(); + bool Result; + if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { + if (!Result) + return CGF.Builder.getInt32(1); + } else { + CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + CondVal = CGF.EvaluateExprAsBool(Cond); + } + } } - - // If we have an enclosed teams directive but no num_teams clause we use - // the default value 0. - return Bld.getInt32(0); + // Check the value of num_threads clause iff if clause was not specified + // or is not evaluated to false. + if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const auto *NumThreadsClause = + Dir->getSingleClause<OMPNumThreadsClause>(); + CodeGenFunction::LexicalScope Scope( + CGF, NumThreadsClause->getNumThreads()->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); + NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, + /*IsSigned=*/false); + if (DefaultThreadLimitVal) + NumThreads = CGF.Builder.CreateSelect( + CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), + DefaultThreadLimitVal, NumThreads); + } else { + NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal + : CGF.Builder.getInt32(0); + } + // Process condition of the if clause. + if (CondVal) { + NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, + CGF.Builder.getInt32(1)); + } + return NumThreads; } + if (isOpenMPSimdDirective(Dir->getDirectiveKind())) + return CGF.Builder.getInt32(1); + return DefaultThreadLimitVal; } - - // No teams associated with the directive. - return nullptr; + return DefaultThreadLimitVal ? DefaultThreadLimitVal + : CGF.Builder.getInt32(0); } /// Emit the number of threads for a target directive. Inspect the @@ -6478,98 +6749,208 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, /// /// Otherwise, return nullptr. static llvm::Value * -emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, +emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " - "teams directive expected to be " - "emitted only for the host!"); - + assert(!CGF.getLangOpts().OpenMPIsDevice && + "Clauses associated with the teams directive expected to be emitted " + "only for the host!"); + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + assert(isOpenMPTargetExecutionDirective(DirectiveKind) && + "Expected target-based executable directive."); CGBuilderTy &Bld = CGF.Builder; - - // - // If the target directive is combined with a teams directive: - // Return the value in the thread_limit clause, if any. - // - // If the target directive is combined with a parallel directive: - // Return the value in the num_threads clause, if any. - // - // If both clauses are set, select the minimum of the two. - // - // If neither teams or parallel combined directives set the number of threads - // in a team, return 0 to denote the runtime default. - // - // If this is not a teams directive return nullptr. - - if (isOpenMPTeamsDirective(D.getDirectiveKind()) || - isOpenMPParallelDirective(D.getDirectiveKind())) { - llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); - llvm::Value *NumThreadsVal = nullptr; - llvm::Value *ThreadLimitVal = nullptr; - - if (const auto *ThreadLimitClause = - D.getSingleClause<OMPThreadLimitClause>()) { + llvm::Value *ThreadLimitVal = nullptr; + llvm::Value *NumThreadsVal = nullptr; + switch (DirectiveKind) { + case OMPD_target: { + const CapturedStmt *CS = D.getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { + if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const auto *ThreadLimitClause = + Dir->getSingleClause<OMPThreadLimitClause>(); + CodeGenFunction::LexicalScope Scope( + CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); + } + if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && + !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { + CS = Dir->getInnermostCapturedStmt(); + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); + } + if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && + !isOpenMPSimdDirective(Dir->getDirectiveKind())) { + CS = Dir->getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + } + if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) + return Bld.getInt32(1); + } + return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + } + case OMPD_target_teams: { + if (D.hasClausesOfKind<OMPThreadLimitClause>()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); - llvm::Value *ThreadLimit = - CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), - /*IgnoreResultAssign*/ true); - ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, - /*IsSigned=*/true); + const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); } - - if (const auto *NumThreadsClause = - D.getSingleClause<OMPNumThreadsClause>()) { + const CapturedStmt *CS = D.getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { + if (Dir->getDirectiveKind() == OMPD_distribute) { + CS = Dir->getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + } + } + return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + } + case OMPD_target_teams_distribute: + if (D.hasClausesOfKind<OMPThreadLimitClause>()) { + CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); + const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); + } + return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: { + llvm::Value *CondVal = nullptr; + // Handle if clause. If if clause present, the number of threads is + // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. + if (D.hasClausesOfKind<OMPIfClause>()) { + const OMPIfClause *IfClause = nullptr; + for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_parallel) { + IfClause = C; + break; + } + } + if (IfClause) { + const Expr *Cond = IfClause->getCondition(); + bool Result; + if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { + if (!Result) + return Bld.getInt32(1); + } else { + CodeGenFunction::RunCleanupsScope Scope(CGF); + CondVal = CGF.EvaluateExprAsBool(Cond); + } + } + } + if (D.hasClausesOfKind<OMPThreadLimitClause>()) { + CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); + const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); + } + if (D.hasClausesOfKind<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); - llvm::Value *NumThreads = - CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), - /*IgnoreResultAssign*/ true); + const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); + llvm::Value *NumThreads = CGF.EmitScalarExpr( + NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); NumThreadsVal = - Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); - } - - // Select the lesser of thread_limit and num_threads. - if (NumThreadsVal) + Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false); ThreadLimitVal = ThreadLimitVal - ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, + ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, ThreadLimitVal), NumThreadsVal, ThreadLimitVal) : NumThreadsVal; - - // Set default value passed to the runtime if either teams or a target - // parallel type directive is found but no clause is specified. + } if (!ThreadLimitVal) - ThreadLimitVal = DefaultThreadLimitVal; - + ThreadLimitVal = Bld.getInt32(0); + if (CondVal) + return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); return ThreadLimitVal; } - - // If the current target region has a teams region enclosed, we need to get - // the thread limit to pass to the runtime function call. This is done - // by generating the expression in a inlined region. This is required because - // the expression is captured in the enclosing target environment when the - // teams directive is not combined with target. - - const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - - if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( - ignoreCompoundStmts(CS.getCapturedStmt()))) { - if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { - if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); - return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, - /*IsSigned=*/true); - } - - // If we have an enclosed teams directive but no thread_limit clause we - // use the default value 0. - return CGF.Builder.getInt32(0); - } + case OMPD_target_teams_distribute_simd: + case OMPD_target_simd: + return Bld.getInt32(1); + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + break; } - - // No teams associated with the directive. - return nullptr; + llvm_unreachable("Unsupported directive kind."); } namespace { @@ -7135,7 +7516,7 @@ private: Address HB = CGF.Builder.CreateConstGEP( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, CGF.VoidPtrTy), - TypeSize.getQuantity() - 1, CharUnits::One()); + TypeSize.getQuantity() - 1); PartialStruct.HighestElem = { std::numeric_limits<decltype( PartialStruct.HighestElem.first)>::max(), @@ -7169,15 +7550,13 @@ private: Pointers.push_back(LB.getPointer()); Sizes.push_back(Size); Types.push_back(Flags); - LB = CGF.Builder.CreateConstGEP(ComponentLB, 1, - CGF.getPointerSize()); + LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); } BasePointers.push_back(BP.getPointer()); Pointers.push_back(LB.getPointer()); Size = CGF.Builder.CreatePtrDiff( CGF.EmitCastToVoidPtr( - CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One()) - .getPointer()), + CGF.Builder.CreateConstGEP(HB, 1).getPointer()), CGF.EmitCastToVoidPtr(LB.getPointer())); Sizes.push_back(Size); Types.push_back(Flags); @@ -7260,9 +7639,17 @@ private: // A first private variable captured by reference will use only the // 'private ptr' and 'map to' flag. Return the right flags if the captured // declaration is known as first-private in this handler. - if (FirstPrivateDecls.count(Cap.getCapturedVar())) + if (FirstPrivateDecls.count(Cap.getCapturedVar())) { + if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && + Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) + return MappableExprsHandler::OMP_MAP_ALWAYS | + MappableExprsHandler::OMP_MAP_TO; + if (Cap.getCapturedVar()->getType()->isAnyPointerType()) + return MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; return MappableExprsHandler::OMP_MAP_PRIVATE | MappableExprsHandler::OMP_MAP_TO; + } return MappableExprsHandler::OMP_MAP_TO | MappableExprsHandler::OMP_MAP_FROM; } @@ -7889,9 +8276,6 @@ public: } } else { assert(CI.capturesVariable() && "Expected captured reference."); - CurBasePointers.push_back(CV); - CurPointers.push_back(CV); - const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); QualType ElementType = PtrTy->getPointeeType(); CurSizes.push_back(CGF.getTypeSize(ElementType)); @@ -7899,6 +8283,30 @@ public: // default the value doesn't have to be retrieved. For an aggregate // type, the default is 'tofrom'. CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); + const VarDecl *VD = CI.getCapturedVar(); + if (FirstPrivateDecls.count(VD) && + VD->getType().isConstant(CGF.getContext())) { + llvm::Constant *Addr = + CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); + // Copy the value of the original variable to the new global copy. + CGF.Builder.CreateMemCpy( + CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), + Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), + CurSizes.back(), /*isVolatile=*/false); + // Use new global variable as the base pointers. + CurBasePointers.push_back(Addr); + CurPointers.push_back(Addr); + } else { + CurBasePointers.push_back(CV); + if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) { + Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( + CV, ElementType, CGF.getContext().getDeclAlign(VD), + AlignmentSource::Decl)); + CurPointers.push_back(PtrAddr.getPointer()); + } else { + CurPointers.push_back(CV); + } + } } // Every default map produces a single argument which is a target parameter. CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; @@ -8065,70 +8473,17 @@ static void emitOffloadingArraysArgument( } } -/// Checks if the expression is constant or does not have non-trivial function -/// calls. -static bool isTrivial(ASTContext &Ctx, const Expr * E) { - // We can skip constant expressions. - // We can skip expressions with trivial calls or simple expressions. - return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || - !E->hasNonTrivialCall(Ctx)) && - !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); -} - -/// Checks if the \p Body is the \a CompoundStmt and returns its child statement -/// iff there is only one that is not evaluatable at the compile time. -static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { - if (const auto *C = dyn_cast<CompoundStmt>(Body)) { - const Stmt *Child = nullptr; - for (const Stmt *S : C->body()) { - if (const auto *E = dyn_cast<Expr>(S)) { - if (isTrivial(Ctx, E)) - continue; - } - // Some of the statements can be ignored. - if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || - isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) - continue; - // Analyze declarations. - if (const auto *DS = dyn_cast<DeclStmt>(S)) { - if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { - if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || - isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || - isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || - isa<UsingDirectiveDecl>(D) || - isa<OMPDeclareReductionDecl>(D) || - isa<OMPThreadPrivateDecl>(D)) - return true; - const auto *VD = dyn_cast<VarDecl>(D); - if (!VD) - return false; - return VD->isConstexpr() || - ((VD->getType().isTrivialType(Ctx) || - VD->getType()->isReferenceType()) && - (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); - })) - continue; - } - // Found multiple children - cannot get the one child only. - if (Child) - return Body; - Child = S; - } - if (Child) - return Child; - } - return Body; -} - /// Check for inner distribute directive. static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + const Stmt *ChildStmt = + CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + if (const auto *NestedDir = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: @@ -8139,8 +8494,9 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { /*IgnoreCaptured=*/true); if (!Body) return nullptr; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPDistributeDirective(DKind)) return NND; @@ -8170,6 +8526,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -8200,6 +8557,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -8244,7 +8602,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device) { if (!CGF.HaveInsertPoint()) @@ -8295,8 +8653,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Return value of the runtime offloading call. llvm::Value *Return; - llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); - llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); + llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); + llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime @@ -8592,6 +8950,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -8622,6 +8981,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -8698,6 +9058,40 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { return false; } +llvm::Constant * +CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, + const VarDecl *VD) { + assert(VD->getType().isConstant(CGM.getContext()) && + "Expected constant variable."); + StringRef VarName; + llvm::Constant *Addr; + llvm::GlobalValue::LinkageTypes Linkage; + QualType Ty = VD->getType(); + SmallString<128> Buffer; + { + unsigned DeviceID; + unsigned FileID; + unsigned Line; + getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, + FileID, Line); + llvm::raw_svector_ostream OS(Buffer); + OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) + << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; + VarName = OS.str(); + } + Linkage = llvm::GlobalValue::InternalLinkage; + Addr = + getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, + getDefaultFirstprivateAddressSpace()); + cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); + CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); + CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); + OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( + VarName, Addr, VarSize, + OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); + return Addr; +} + void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = @@ -8788,6 +9182,30 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( " Expected target-based directive."); } +bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, + LangAS &AS) { + if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) + return false; + const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); + switch(A->getAllocatorType()) { + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + // Not supported, fallback to the default mem space. + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + case OMPAllocateDeclAttr::OMPConstMemAlloc: + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + AS = LangAS::Default; + return true; + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + llvm_unreachable("Expected predefined allocator for the variables with the " + "static storage."); + } + return false; +} + CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { @@ -8836,7 +9254,7 @@ llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) { if (!CGF.HaveInsertPoint()) return; @@ -8853,7 +9271,7 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); + llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); CGF.EmitRuntimeCall(RTLFn, RealArgs); } @@ -9075,6 +9493,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -9102,6 +9521,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_target: @@ -9299,6 +9719,307 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, } } +// This are the Functions that are needed to mangle the name of the +// vector functions generated by the compiler, according to the rules +// defined in the "Vector Function ABI specifications for AArch64", +// available at +// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. + +/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. +/// +/// TODO: Need to implement the behavior for reference marked with a +/// var or no linear modifiers (1.b in the section). For this, we +/// need to extend ParamKindTy to support the linear modifiers. +static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { + QT = QT.getCanonicalType(); + + if (QT->isVoidType()) + return false; + + if (Kind == ParamKindTy::Uniform) + return false; + + if (Kind == ParamKindTy::Linear) + return false; + + // TODO: Handle linear references with modifiers + + if (Kind == ParamKindTy::LinearWithVarStride) + return false; + + return true; +} + +/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. +static bool getAArch64PBV(QualType QT, ASTContext &C) { + QT = QT.getCanonicalType(); + unsigned Size = C.getTypeSize(QT); + + // Only scalars and complex within 16 bytes wide set PVB to true. + if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) + return false; + + if (QT->isFloatingType()) + return true; + + if (QT->isIntegerType()) + return true; + + if (QT->isPointerType()) + return true; + + // TODO: Add support for complex types (section 3.1.2, item 2). + + return false; +} + +/// Computes the lane size (LS) of a return type or of an input parameter, +/// as defined by `LS(P)` in 3.2.1 of the AAVFABI. +/// TODO: Add support for references, section 3.2.1, item 1. +static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { + if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { + QualType PTy = QT.getCanonicalType()->getPointeeType(); + if (getAArch64PBV(PTy, C)) + return C.getTypeSize(PTy); + } + if (getAArch64PBV(QT, C)) + return C.getTypeSize(QT); + + return C.getTypeSize(C.getUIntPtrType()); +} + +// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the +// signature of the scalar function, as defined in 3.2.2 of the +// AAVFABI. +static std::tuple<unsigned, unsigned, bool> +getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { + QualType RetType = FD->getReturnType().getCanonicalType(); + + ASTContext &C = FD->getASTContext(); + + bool OutputBecomesInput = false; + + llvm::SmallVector<unsigned, 8> Sizes; + if (!RetType->isVoidType()) { + Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); + if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) + OutputBecomesInput = true; + } + for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { + QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); + Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); + } + + assert(!Sizes.empty() && "Unable to determine NDS and WDS."); + // The LS of a function parameter / return value can only be a power + // of 2, starting from 8 bits, up to 128. + assert(std::all_of(Sizes.begin(), Sizes.end(), + [](unsigned Size) { + return Size == 8 || Size == 16 || Size == 32 || + Size == 64 || Size == 128; + }) && + "Invalid size"); + + return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), + *std::max_element(std::begin(Sizes), std::end(Sizes)), + OutputBecomesInput); +} + +/// Mangle the parameter part of the vector function name according to +/// their OpenMP classification. The mangling function is defined in +/// section 3.5 of the AAVFABI. +static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + for (const auto &ParamAttr : ParamAttrs) { + switch (ParamAttr.Kind) { + case LinearWithVarStride: + Out << "ls" << ParamAttr.StrideOrArg; + break; + case Linear: + Out << 'l'; + // Don't print the step value if it is not present or if it is + // equal to 1. + if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) + Out << ParamAttr.StrideOrArg; + break; + case Uniform: + Out << 'u'; + break; + case Vector: + Out << 'v'; + break; + } + + if (!!ParamAttr.Alignment) + Out << 'a' << ParamAttr.Alignment; + } + + return Out.str(); +} + +// Function used to add the attribute. The parameter `VLEN` is +// templated to allow the use of "x" when targeting scalable functions +// for SVE. +template <typename T> +static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, + char ISA, StringRef ParSeq, + StringRef MangledName, bool OutputBecomesInput, + llvm::Function *Fn) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Prefix << ISA << LMask << VLEN; + if (OutputBecomesInput) + Out << "v"; + Out << ParSeq << "_" << MangledName; + Fn->addFnAttr(Out.str()); +} + +// Helper function to generate the Advanced SIMD names depending on +// the value of the NDS when simdlen is not present. +static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, + StringRef Prefix, char ISA, + StringRef ParSeq, StringRef MangledName, + bool OutputBecomesInput, + llvm::Function *Fn) { + switch (NDS) { + case 8: + addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case 16: + addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case 32: + addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case 64: + case 128: + addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + default: + llvm_unreachable("Scalar type is too wide."); + } +} + +/// Emit vector function attributes for AArch64, as defined in the AAVFABI. +static void emitAArch64DeclareSimdFunction( + CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, + ArrayRef<ParamAttrTy> ParamAttrs, + OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, + char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { + + // Get basic data for building the vector signature. + const auto Data = getNDSWDS(FD, ParamAttrs); + const unsigned NDS = std::get<0>(Data); + const unsigned WDS = std::get<1>(Data); + const bool OutputBecomesInput = std::get<2>(Data); + + // Check the values provided via `simdlen` by the user. + // 1. A `simdlen(1)` doesn't produce vector signatures, + if (UserVLEN == 1) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Warning, + "The clause simdlen(1) has no effect when targeting aarch64."); + CGM.getDiags().Report(SLoc, DiagID); + return; + } + + // 2. Section 3.3.1, item 1: user input must be a power of 2 for + // Advanced SIMD output. + if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Warning, "The value specified in simdlen must be a " + "power of 2 when targeting Advanced SIMD."); + CGM.getDiags().Report(SLoc, DiagID); + return; + } + + // 3. Section 3.4.1. SVE fixed lengh must obey the architectural + // limits. + if (ISA == 's' && UserVLEN != 0) { + if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " + "lanes in the architectural constraints " + "for SVE (min is 128-bit, max is " + "2048-bit, by steps of 128-bit)"); + CGM.getDiags().Report(SLoc, DiagID) << WDS; + return; + } + } + + // Sort out parameter sequence. + const std::string ParSeq = mangleVectorParameters(ParamAttrs); + StringRef Prefix = "_ZGV"; + // Generate simdlen from user input (if any). + if (UserVLEN) { + if (ISA == 's') { + // SVE generates only a masked function. + addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + } else { + assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); + // Advanced SIMD generates one or two functions, depending on + // the `[not]inbranch` clause. + switch (State) { + case OMPDeclareSimdDeclAttr::BS_Undefined: + addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Notinbranch: + addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Inbranch: + addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + } + } + } else { + // If no user simdlen is provided, follow the AAVFABI rules for + // generating the vector length. + if (ISA == 's') { + // SVE, section 3.4.1, item 1. + addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + } else { + assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); + // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or + // two vector names depending on the use of the clause + // `[not]inbranch`. + switch (State) { + case OMPDeclareSimdDeclAttr::BS_Undefined: + addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Notinbranch: + addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Inbranch: + addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + } + } + } +} + void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { ASTContext &C = CGM.getContext(); @@ -9385,12 +10106,26 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ++MI; } llvm::APSInt VLENVal; - if (const Expr *VLEN = Attr->getSimdlen()) - VLENVal = VLEN->EvaluateKnownConstInt(C); + SourceLocation ExprLoc; + const Expr *VLENExpr = Attr->getSimdlen(); + if (VLENExpr) { + VLENVal = VLENExpr->EvaluateKnownConstInt(C); + ExprLoc = VLENExpr->getExprLoc(); + } OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) + CGM.getTriple().getArch() == llvm::Triple::x86_64) { emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); + } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { + unsigned VLEN = VLENVal.getExtValue(); + StringRef MangledName = Fn->getName(); + if (CGM.getTarget().hasFeature("sve")) + emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, + MangledName, 's', 128, Fn, ExprLoc); + if (CGM.getTarget().hasFeature("neon")) + emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, + MangledName, 'n', 128, Fn, ExprLoc); + } } FD = FD->getPreviousDecl(); } @@ -9403,11 +10138,12 @@ public: static const int DoacrossFinArgs = 2; private: - llvm::Value *RTLFn; + llvm::FunctionCallee RTLFn; llvm::Value *Args[DoacrossFinArgs]; public: - DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) + DoacrossCleanupTy(llvm::FunctionCallee RTLFn, + ArrayRef<llvm::Value *> CallArgs) : RTLFn(RTLFn) { assert(CallArgs.size() == DoacrossFinArgs); std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); @@ -9454,10 +10190,8 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, enum { LowerFD = 0, UpperFD, StrideFD }; // Fill dims with data. for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { - LValue DimsLVal = - CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP( - DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)), - KmpDimTy); + LValue DimsLVal = CGF.MakeAddrLValue( + CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); // dims.upper = num_iterations; LValue UpperLVal = CGF.EmitLValueForField( DimsLVal, *std::next(RD->field_begin(), UpperFD)); @@ -9480,16 +10214,16 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, getThreadID(CGF, D.getBeginLoc()), llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder - .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy)) - .getPointer(), + CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), CGM.VoidPtrTy)}; - llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); + llvm::FunctionCallee RTLFn = + createRuntimeFunction(OMPRTL__kmpc_doacross_init); CGF.EmitRuntimeCall(RTLFn, Args); llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; - llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); + llvm::FunctionCallee FiniRTLFn = + createRuntimeFunction(OMPRTL__kmpc_doacross_fini); CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs)); } @@ -9508,20 +10242,14 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, llvm::Value *CntVal = CGF.EmitScalarConversion( CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, CounterVal->getExprLoc()); - CGF.EmitStoreOfScalar( - CntVal, - CGF.Builder.CreateConstArrayGEP( - CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)), - /*Volatile=*/false, Int64Ty); + CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), + /*Volatile=*/false, Int64Ty); } llvm::Value *Args[] = { emitUpdateLocation(CGF, C->getBeginLoc()), getThreadID(CGF, C->getBeginLoc()), - CGF.Builder - .CreateConstArrayGEP(CntAddr, 0, - CGM.getContext().getTypeSizeInChars(Int64Ty)) - .getPointer()}; - llvm::Value *RTLFn; + CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; + llvm::FunctionCallee RTLFn; if (C->getDependencyKind() == OMPC_DEPEND_source) { RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); } else { @@ -9532,12 +10260,12 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, } void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *Callee, + llvm::FunctionCallee Callee, ArrayRef<llvm::Value *> Args) const { assert(Loc.isValid() && "Outlined function call location must be valid."); auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); - if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { + if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { if (Fn->doesNotThrow()) { CGF.EmitNounwindRuntimeCall(Fn, Args); return; @@ -9547,7 +10275,7 @@ void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, } void CGOpenMPRuntime::emitOutlinedFunctionCall( - CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef<llvm::Value *> Args) const { emitCall(CGF, Loc, OutlinedFn, Args); } @@ -9558,24 +10286,99 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, return CGF.GetAddrOfLocalVar(NativeParam); } +namespace { +/// Cleanup action for allocate support. +class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { +public: + static const int CleanupArgs = 3; + +private: + llvm::FunctionCallee RTLFn; + llvm::Value *Args[CleanupArgs]; + +public: + OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, + ArrayRef<llvm::Value *> CallArgs) + : RTLFn(RTLFn) { + assert(CallArgs.size() == CleanupArgs && + "Size of arguments does not match."); + std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); + } + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + CGF.EmitRuntimeCall(RTLFn, Args); + } +}; +} // namespace + Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { - return Address::invalid(); -} - -llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( + if (!VD) + return Address::invalid(); + const VarDecl *CVD = VD->getCanonicalDecl(); + if (!CVD->hasAttr<OMPAllocateDeclAttr>()) + return Address::invalid(); + const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); + // Use the default allocation. + if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && + !AA->getAllocator()) + return Address::invalid(); + llvm::Value *Size; + CharUnits Align = CGM.getContext().getDeclAlign(CVD); + if (CVD->getType()->isVariablyModifiedType()) { + Size = CGF.getTypeSize(CVD->getType()); + // Align the size: ((size + align - 1) / align) * align + Size = CGF.Builder.CreateNUWAdd( + Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); + Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); + Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); + } else { + CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); + Size = CGM.getSize(Sz.alignTo(Align)); + } + llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); + assert(AA->getAllocator() && + "Expected allocator expression for non-default allocator."); + llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); + // According to the standard, the original allocator type is a enum (integer). + // Convert to pointer type, if required. + if (Allocator->getType()->isIntegerTy()) + Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); + else if (Allocator->getType()->isPointerTy()) + Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, + CGM.VoidPtrTy); + llvm::Value *Args[] = {ThreadID, Size, Allocator}; + + llvm::Value *Addr = + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, + CVD->getName() + ".void.addr"); + llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, + Allocator}; + llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); + + CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, + llvm::makeArrayRef(FiniArgs)); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, + CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), + CVD->getName() + ".addr"); + return Address(Addr, Align); +} + +llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { llvm_unreachable("Not supported in SIMD-only mode"); } -llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( +llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { llvm_unreachable("Not supported in SIMD-only mode"); } -llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( +llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -9585,7 +10388,7 @@ llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { llvm_unreachable("Not supported in SIMD-only mode"); @@ -9716,7 +10519,7 @@ void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - llvm::Value *TaskFunction, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) { @@ -9725,7 +10528,7 @@ void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, void CGOpenMPSIMDRuntime::emitTaskLoopCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, - llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -9785,9 +10588,10 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device) { + const Expr *IfCond, + const Expr *Device) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -9810,7 +10614,7 @@ llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -9857,4 +10661,3 @@ CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, const VarDecl *TargetParam) const { llvm_unreachable("Not supported in SIMD-only mode"); } - diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 1822a6fd19..42dc4d473b 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -1,9 +1,8 @@ //===----- CGOpenMPRuntime.h - Interface to OpenMP Runtimes -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -272,7 +271,8 @@ protected: virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; } /// Emits \p Callee function call with arguments \p Args with location \p Loc. - void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee, + void emitCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::FunctionCallee Callee, ArrayRef<llvm::Value *> Args = llvm::None) const; /// Emits address of the word in a memory where current thread id is @@ -672,23 +672,27 @@ private: /// Returns specified OpenMP runtime function. /// \param Function OpenMP runtime function. /// \return Specified function. - llvm::Constant *createRuntimeFunction(unsigned Function); + llvm::FunctionCallee createRuntimeFunction(unsigned Function); /// Returns __kmpc_for_static_init_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. - llvm::Constant *createForStaticInitFunction(unsigned IVSize, bool IVSigned); + llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize, + bool IVSigned); /// Returns __kmpc_dispatch_init_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. - llvm::Constant *createDispatchInitFunction(unsigned IVSize, bool IVSigned); + llvm::FunctionCallee createDispatchInitFunction(unsigned IVSize, + bool IVSigned); /// Returns __kmpc_dispatch_next_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. - llvm::Constant *createDispatchNextFunction(unsigned IVSize, bool IVSigned); + llvm::FunctionCallee createDispatchNextFunction(unsigned IVSize, + bool IVSigned); /// Returns __kmpc_dispatch_fini_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. - llvm::Constant *createDispatchFiniFunction(unsigned IVSize, bool IVSigned); + llvm::FunctionCallee createDispatchFiniFunction(unsigned IVSize, + bool IVSigned); /// If the specified mangled name is not in the module, create and /// return threadprivate cache object. This object is a pointer's worth of @@ -704,7 +708,8 @@ private: /// must be the same. /// \param Name Name of the variable. llvm::Constant *getOrCreateInternalVariable(llvm::Type *Ty, - const llvm::Twine &Name); + const llvm::Twine &Name, + unsigned AddressSpace = 0); /// Set of threadprivate variables with the generated initializer. llvm::StringSet<> ThreadPrivateWithDefinition; @@ -724,7 +729,7 @@ private: struct TaskResultTy { llvm::Value *NewTask = nullptr; - llvm::Value *TaskEntry = nullptr; + llvm::Function *TaskEntry = nullptr; llvm::Value *NewTaskNewTaskTTy = nullptr; LValue TDBase; const RecordDecl *KmpTaskTQTyRD = nullptr; @@ -754,15 +759,24 @@ private: /// state, list of privates etc. TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - llvm::Value *TaskFunction, QualType SharedsTy, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data); + /// Returns default address space for the constant firstprivates, 0 by + /// default. + virtual unsigned getDefaultFirstprivateAddressSpace() const { return 0; } + public: explicit CGOpenMPRuntime(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, ".", ".") {} virtual ~CGOpenMPRuntime() {} virtual void clear(); + /// Checks if the \p Body is the \a CompoundStmt and returns its child + /// statement iff there is only one that is not evaluatable at the compile + /// time. + static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body); + /// Get the platform-specific name separator. std::string getName(ArrayRef<StringRef> Parts) const; @@ -781,7 +795,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - virtual llvm::Value *emitParallelOutlinedFunction( + virtual llvm::Function *emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); @@ -793,7 +807,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - virtual llvm::Value *emitTeamsOutlinedFunction( + virtual llvm::Function *emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); @@ -812,7 +826,7 @@ public: /// \param NumberOfParts Number of parts in untied task. Ignored for tied /// tasks. /// - virtual llvm::Value *emitTaskOutlinedFunction( + virtual llvm::Function *emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -833,7 +847,7 @@ public: /// specified, nullptr otherwise. /// virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond); @@ -1162,7 +1176,7 @@ public: /// state, list of privates etc. virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - llvm::Value *TaskFunction, QualType SharedsTy, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data); @@ -1195,10 +1209,11 @@ public: /// otherwise. /// \param Data Additional data for task generation like tiednsee, final /// state, list of privates etc. - virtual void emitTaskLoopCall( - CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, - llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, - const Expr *IfCond, const OMPTaskDataTy &Data); + virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPLoopDirective &D, + llvm::Function *TaskFunction, + QualType SharedsTy, Address Shareds, + const Expr *IfCond, const OMPTaskDataTy &Data); /// Emit code for the directive that does not require outlining. /// @@ -1219,12 +1234,12 @@ public: /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' /// or 'operator binop(LHS, RHS)'. - llvm::Value *emitReductionFunction(CodeGenModule &CGM, SourceLocation Loc, - llvm::Type *ArgsType, - ArrayRef<const Expr *> Privates, - ArrayRef<const Expr *> LHSExprs, - ArrayRef<const Expr *> RHSExprs, - ArrayRef<const Expr *> ReductionOps); + llvm::Function *emitReductionFunction(SourceLocation Loc, + llvm::Type *ArgsType, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps); /// Emits single reduction combiner void emitSingleReductionCombiner(CodeGenFunction &CGF, @@ -1389,7 +1404,7 @@ public: /// target directive, or null if no device clause is used. virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device); @@ -1409,6 +1424,11 @@ public: virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr); + /// Registers provided target firstprivate variable as global on the + /// target. + llvm::Constant *registerTargetFirstprivateCopy(CodeGenFunction &CGF, + const VarDecl *VD); + /// Emit the global \a GD if it is meaningful for the target. Returns /// if it was emitted successfully. /// \param GD Global to scan. @@ -1429,7 +1449,7 @@ public: /// virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - SourceLocation Loc, llvm::Value *OutlinedFn, + SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars); /// Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 @@ -1550,13 +1570,13 @@ public: /// schedule clause. virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, - const Expr *&ChunkExpr) const {} + const Expr *&ChunkExpr) const; /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::FunctionCallee OutlinedFn, ArrayRef<llvm::Value *> Args = llvm::None) const; /// Emits OpenMP-specific function prolog. @@ -1582,8 +1602,12 @@ public: /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM, - const OMPRequiresDecl *D) const {} + virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const {} + + /// Checks if the variable has associated OMPAllocateDeclAttr attribute with + /// the predefined allocator and translates it into the corresponding address + /// space. + virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS); }; /// Class supports emissionof SIMD-only code. @@ -1600,7 +1624,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Value * + llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1614,7 +1638,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Value * + llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1635,7 +1659,7 @@ public: /// \param NumberOfParts Number of parts in untied task. Ignored for tied /// tasks. /// - llvm::Value *emitTaskOutlinedFunction( + llvm::Function *emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -1652,7 +1676,7 @@ public: /// specified, nullptr otherwise. /// void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) override; @@ -1878,8 +1902,9 @@ public: /// \param Data Additional data for task generation like tiednsee, final /// state, list of privates etc. void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPExecutableDirective &D, llvm::Value *TaskFunction, - QualType SharedsTy, Address Shareds, const Expr *IfCond, + const OMPExecutableDirective &D, + llvm::Function *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override; /// Emit task region for the taskloop directive. The taskloop region is @@ -1912,7 +1937,7 @@ public: /// \param Data Additional data for task generation like tiednsee, final /// state, list of privates etc. void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPLoopDirective &D, llvm::Value *TaskFunction, + const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override; @@ -2055,7 +2080,7 @@ public: /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device) override; /// Emit the target regions enclosed in \a GD function definition or @@ -2088,7 +2113,7 @@ public: /// variables used in \a OutlinedFn function. /// void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - SourceLocation Loc, llvm::Value *OutlinedFn, + SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) override; /// Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 @@ -2147,6 +2172,12 @@ public: /// \param TargetParam Corresponding target-specific parameter. Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override; + + /// Gets the OpenMP-specific address of the local variable. + Address getAddressOfLocalVariable(CodeGenFunction &CGF, + const VarDecl *VD) override { + return Address::invalid(); + } }; } // namespace CodeGen diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 7046ab3aa3..ca1e9311b6 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1,9 +1,8 @@ //===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -61,13 +60,19 @@ enum OpenMPRTLFunctionNVPTX { /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t /// lane_offset, int16_t shortCircuit), /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); - OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2, - /// Call to __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32 - /// global_tid, kmp_critical_name *lck) - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple, - /// Call to __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, - /// kmp_int32 global_tid, kmp_critical_name *lck) - OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple, + OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2, + /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 + /// global_tid, void *global_buffer, int32_t num_of_records, void* + /// reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void + /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), + /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, + /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, + /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void + /// *buffer, int idx, void *reduce_data)); + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2, /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); OMPRTL_NVPTX__kmpc_end_reduce_nowait, /// Call to void __kmpc_data_sharing_init_stack(); @@ -106,17 +111,18 @@ enum OpenMPRTLFunctionNVPTX { /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. class NVPTXActionTy final : public PrePostActionTy { - llvm::Value *EnterCallee = nullptr; + llvm::FunctionCallee EnterCallee = nullptr; ArrayRef<llvm::Value *> EnterArgs; - llvm::Value *ExitCallee = nullptr; + llvm::FunctionCallee ExitCallee = nullptr; ArrayRef<llvm::Value *> ExitArgs; bool Conditional = false; llvm::BasicBlock *ContBlock = nullptr; public: - NVPTXActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, - llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, - bool Conditional = false) + NVPTXActionTy(llvm::FunctionCallee EnterCallee, + ArrayRef<llvm::Value *> EnterArgs, + llvm::FunctionCallee ExitCallee, + ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), ExitArgs(ExitArgs), Conditional(Conditional) {} void Enter(CodeGenFunction &CGF) override { @@ -215,16 +221,13 @@ static const ValueDecl *getPrivateItem(const Expr *RefExpr) { return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl()); } -typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy; -static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { - return P1.first > P2.first; -} static RecordDecl *buildRecordForGlobalizedVars( ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls, ArrayRef<const ValueDecl *> EscapedDeclsForTeams, llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> - &MappedDeclsFields) { + &MappedDeclsFields, int BufSize) { + using VarsDataTy = std::pair<CharUnits /*Align*/, const ValueDecl *>; if (EscapedDecls.empty() && EscapedDeclsForTeams.empty()) return nullptr; SmallVector<VarsDataTy, 4> GlobalizedVars; @@ -236,8 +239,10 @@ static RecordDecl *buildRecordForGlobalizedVars( D); for (const ValueDecl *D : EscapedDeclsForTeams) GlobalizedVars.emplace_back(C.getDeclAlign(D), D); - std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(), - stable_sort_comparator); + llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) { + return L.first > R.first; + }); + // Build struct _globalized_locals_ty { // /* globalized vars */[WarSize] align (max(decl_align, // GlobalMemoryAlignment)) @@ -270,7 +275,7 @@ static RecordDecl *buildRecordForGlobalizedVars( Field->addAttr(*I); } } else { - llvm::APInt ArraySize(32, WarpSize); + llvm::APInt ArraySize(32, BufSize); Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0); Field = FieldDecl::Create( C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, @@ -312,6 +317,9 @@ class CheckVarsEscapingDeclContext final OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) return; VD = cast<ValueDecl>(VD->getCanonicalDecl()); + // Use user-specified allocation. + if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>()) + return; // Variables captured by value must be globalized. if (auto *CSI = CGF.CapturedStmtInfo) { if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) { @@ -419,7 +427,7 @@ class CheckVarsEscapingDeclContext final EscapedDeclsForParallel = EscapedDecls.getArrayRef(); GlobalizedRD = ::buildRecordForGlobalizedVars( CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams, - MappedDeclsFields); + MappedDeclsFields, WarpSize); } public: @@ -705,112 +713,37 @@ getDataSharingMode(CodeGenModule &CGM) { : CGOpenMPRuntimeNVPTX::Generic; } -/// Checks if the expression is constant or does not have non-trivial function -/// calls. -static bool isTrivial(ASTContext &Ctx, const Expr * E) { - // We can skip constant expressions. - // We can skip expressions with trivial calls or simple expressions. - return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || - !E->hasNonTrivialCall(Ctx)) && - !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); -} - -/// Checks if the \p Body is the \a CompoundStmt and returns its child statement -/// iff there is only one that is not evaluatable at the compile time. -static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { - if (const auto *C = dyn_cast<CompoundStmt>(Body)) { - const Stmt *Child = nullptr; - for (const Stmt *S : C->body()) { - if (const auto *E = dyn_cast<Expr>(S)) { - if (isTrivial(Ctx, E)) - continue; - } - // Some of the statements can be ignored. - if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || - isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) - continue; - // Analyze declarations. - if (const auto *DS = dyn_cast<DeclStmt>(S)) { - if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { - if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || - isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || - isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || - isa<UsingDirectiveDecl>(D) || - isa<OMPDeclareReductionDecl>(D) || - isa<OMPThreadPrivateDecl>(D)) - return true; - const auto *VD = dyn_cast<VarDecl>(D); - if (!VD) - return false; - return VD->isConstexpr() || - ((VD->getType().isTrivialType(Ctx) || - VD->getType()->isReferenceType()) && - (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); - })) - continue; - } - // Found multiple children - cannot get the one child only. - if (Child) - return Body; - Child = S; - } - if (Child) - return Child; - } - return Body; -} - -/// Check if the parallel directive has an 'if' clause with non-constant or -/// false condition. Also, check if the number of threads is strictly specified -/// and run those directives in non-SPMD mode. -static bool hasParallelIfNumThreadsClause(ASTContext &Ctx, - const OMPExecutableDirective &D) { - if (D.hasClausesOfKind<OMPNumThreadsClause>()) - return true; - for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { - OpenMPDirectiveKind NameModifier = C->getNameModifier(); - if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown) - continue; - const Expr *Cond = C->getCondition(); - bool Result; - if (!Cond->EvaluateAsBooleanCondition(Result, Ctx) || !Result) - return true; - } - return false; -} - /// Check for inner (nested) SPMD construct, if any static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + if (const auto *NestedDir = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: - if (isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NestedDir)) + if (isOpenMPParallelDirective(DKind)) return true; if (DKind == OMPD_teams) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); - if (isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NND)) + if (isOpenMPParallelDirective(DKind)) return true; } } return false; case OMPD_target_teams: - return isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NestedDir); + return isOpenMPParallelDirective(DKind); case OMPD_target_simd: case OMPD_target_parallel: case OMPD_target_parallel_for: @@ -829,6 +762,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -859,6 +793,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -882,10 +817,10 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: - return !hasParallelIfNumThreadsClause(Ctx, D); case OMPD_target_simd: - case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + return true; + case OMPD_target_teams_distribute: return false; case OMPD_parallel: case OMPD_for: @@ -897,6 +832,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -927,6 +863,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -958,9 +895,10 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + if (const auto *NestedDir = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: @@ -968,13 +906,16 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir)) return true; + if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd) + return true; if (DKind == OMPD_parallel) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) @@ -985,8 +926,9 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && isOpenMPWorksharingDirective(DKind) && @@ -997,8 +939,9 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) @@ -1013,13 +956,16 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir)) return true; + if (DKind == OMPD_distribute_simd || DKind == OMPD_simd) + return true; if (DKind == OMPD_parallel) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) @@ -1028,6 +974,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, } return false; case OMPD_target_parallel: + if (DKind == OMPD_simd) + return true; return isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir); case OMPD_target_teams_distribute: @@ -1047,6 +995,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -1077,6 +1026,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -1107,8 +1057,9 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, // (Last|First)-privates must be shared in parallel region. return hasStaticScheduling(D); case OMPD_target_simd: - case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + return true; + case OMPD_target_teams_distribute: return false; case OMPD_parallel: case OMPD_for: @@ -1120,6 +1071,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -1150,6 +1102,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -1512,14 +1465,14 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // directive. auto *ParallelFnTy = llvm::FunctionType::get(CGM.VoidTy, {CGM.Int16Ty, CGM.Int32Ty}, - /*isVarArg=*/false) - ->getPointerTo(); - llvm::Value *WorkFnCast = Bld.CreateBitCast(WorkID, ParallelFnTy); + /*isVarArg=*/false); + llvm::Value *WorkFnCast = + Bld.CreateBitCast(WorkID, ParallelFnTy->getPointerTo()); // Insert call to work function via shared wrapper. The shared // wrapper takes two arguments: // - the parallelism level; // - the thread ID; - emitCall(CGF, WST.Loc, WorkFnCast, + emitCall(CGF, WST.Loc, {ParallelFnTy, WorkFnCast}, {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)}); // Go to end of parallel region. CGF.EmitBranch(TerminateBB); @@ -1547,9 +1500,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, /// implementation. Specialized for the NVPTX device. /// \param Function OpenMP runtime function. /// \return Specified function. -llvm::Constant * +llvm::FunctionCallee CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { - llvm::Constant *RTLFn = nullptr; + llvm::FunctionCallee RTLFn = nullptr; switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) { case OMPRTL_NVPTX__kmpc_kernel_init: { // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t @@ -1647,7 +1600,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); break; } - case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2: { + case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: { // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void* // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t @@ -1684,28 +1637,47 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); break; } - case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple: { - // Build __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32 - // global_tid, kmp_critical_name *lck) - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, - llvm::PointerType::getUnqual(getKmpCriticalNameTy())}; + case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: { + // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 + // global_tid, void *global_buffer, int32_t num_of_records, void* + // reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t shortCircuit), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void + // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), + // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, + // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, + // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void + // *buffer, int idx, void *reduce_data)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy, + CGM.VoidPtrTy}; + auto *GlobalListFnTy = + llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo()}; auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_simple"); - break; - } - case OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple: { - // Build __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, kmp_int32 - // global_tid, kmp_critical_name *lck) - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, - llvm::PointerType::getUnqual(getKmpCriticalNameTy())}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_teams_end_reduce_nowait_simple"); + FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2"); break; } case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { @@ -1806,7 +1778,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); - cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent); + cast<llvm::Function>(RTLFn.getCallee()) + ->addFnAttr(llvm::Attribute::Convergent); break; } case OMPRTL__kmpc_barrier_simple_spmd: { @@ -1817,7 +1790,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); - cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent); + cast<llvm::Function>(RTLFn.getCallee()) + ->addFnAttr(llvm::Attribute::Convergent); break; } } @@ -1928,7 +1902,7 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc) {} -llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( +llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { // Emit target region as a standalone region. @@ -1976,11 +1950,11 @@ getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, "expected teams directive."); const OMPExecutableDirective *Dir = &D; if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { - if (const Stmt *S = getSingleCompoundChild( + if (const Stmt *S = CGOpenMPRuntime::getSingleCompoundChild( Ctx, D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true))) { - Dir = dyn_cast<OMPExecutableDirective>(S); + Dir = dyn_cast_or_null<OMPExecutableDirective>(S); if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) Dir = nullptr; } @@ -2005,7 +1979,7 @@ getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, } } -llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( +llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { SourceLocation Loc = D.getBeginLoc(); @@ -2014,13 +1988,14 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions; llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields; // Globalize team reductions variable unconditionally in all modes. - getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions); + if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD) + getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions); if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions); if (!LastPrivatesReductions.empty()) { GlobalizedRD = ::buildRecordForGlobalizedVars( CGM.getContext(), llvm::None, LastPrivatesReductions, - MappedDeclsFields); + MappedDeclsFields, WarpSize); } } else if (!LastPrivatesReductions.empty()) { assert(!TeamAndReductions.first && @@ -2068,9 +2043,8 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( } } Action(Loc, GlobalizedRD, MappedDeclsFields); CodeGen.setAction(Action); - llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction( + llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); - llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal); OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); @@ -2235,8 +2209,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, .getPointerType(CGM.getContext().VoidPtrTy) .castAs<PointerType>()); llvm::Value *GlobalRecValue = - Bld.CreateConstInBoundsGEP(FrameAddr, Offset, CharUnits::One()) - .getPointer(); + Bld.CreateConstInBoundsGEP(FrameAddr, Offset).getPointer(); I->getSecond().GlobalRecordAddr = GlobalRecValue; I->getSecond().IsInSPMDModeFlag = nullptr; GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( @@ -2429,7 +2402,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF, void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) { if (!CGF.HaveInsertPoint()) return; @@ -2446,7 +2419,7 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, } void CGOpenMPRuntimeNVPTX::emitParallelCall( - CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { if (!CGF.HaveInsertPoint()) return; @@ -2536,8 +2509,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( SharedArgs, Ctx.getPointerType(Ctx.getPointerType(Ctx.VoidPtrTy)) .castAs<PointerType>()); for (llvm::Value *V : CapturedVars) { - Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx, - CGF.getPointerSize()); + Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); llvm::Value *PtrV; if (V->getType()->isIntegerTy()) PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy); @@ -2625,7 +2597,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( } void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( - CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { // Just call the outlined function to execute the parallel region. // OutlinedFn(>id, &zero, CapturedStruct); @@ -2846,7 +2818,7 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, Address ElemPtr = DestAddr; Address Ptr = SrcAddr; Address PtrEnd = Bld.CreatePointerBitCastOrAddrSpaceCast( - Bld.CreateConstGEP(SrcAddr, 1, Size), CGF.VoidPtrTy); + Bld.CreateConstGEP(SrcAddr, 1), CGF.VoidPtrTy); for (int IntSize = 8; IntSize >= 1; IntSize /= 2) { if (Size < CharUnits::fromQuantity(IntSize)) continue; @@ -2881,10 +2853,8 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc), IntType, Offset, Loc); CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType); - Address LocalPtr = - Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize)); - Address LocalElemPtr = - Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize)); + Address LocalPtr = Bld.CreateConstGEP(Ptr, 1); + Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1); PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB); PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB); CGF.EmitBranch(PreCondBB); @@ -2894,9 +2864,8 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc), IntType, Offset, Loc); CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType); - Ptr = Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize)); - ElemPtr = - Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize)); + Ptr = Bld.CreateConstGEP(Ptr, 1); + ElemPtr = Bld.CreateConstGEP(ElemPtr, 1); } Size = Size % IntSize; } @@ -2959,16 +2928,14 @@ static void emitReductionListCopy( switch (Action) { case RemoteLaneToThread: { // Step 1.1: Get the address for the src element in the Reduce list. - Address SrcElementPtrAddr = - Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); SrcElementAddr = CGF.EmitLoadOfPointer( SrcElementPtrAddr, C.getPointerType(Private->getType())->castAs<PointerType>()); // Step 1.2: Create a temporary to store the element in the destination // Reduce list. - DestElementPtrAddr = - Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); DestElementAddr = CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); ShuffleInElement = true; @@ -2977,16 +2944,14 @@ static void emitReductionListCopy( } case ThreadCopy: { // Step 1.1: Get the address for the src element in the Reduce list. - Address SrcElementPtrAddr = - Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); SrcElementAddr = CGF.EmitLoadOfPointer( SrcElementPtrAddr, C.getPointerType(Private->getType())->castAs<PointerType>()); // Step 1.2: Get the address for dest element. The destination // element has already been created on the thread's stack. - DestElementPtrAddr = - Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); DestElementAddr = CGF.EmitLoadOfPointer( DestElementPtrAddr, C.getPointerType(Private->getType())->castAs<PointerType>()); @@ -2994,8 +2959,7 @@ static void emitReductionListCopy( } case ThreadToScratchpad: { // Step 1.1: Get the address for the src element in the Reduce list. - Address SrcElementPtrAddr = - Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); SrcElementAddr = CGF.EmitLoadOfPointer( SrcElementPtrAddr, C.getPointerType(Private->getType())->castAs<PointerType>()); @@ -3030,8 +2994,7 @@ static void emitReductionListCopy( // Step 1.2: Create a temporary to store the element in the destination // Reduce list. - DestElementPtrAddr = - Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); DestElementAddr = CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); UpdateDestListPtr = true; @@ -3052,18 +3015,31 @@ static void emitReductionListCopy( shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(), RemoteLaneOffset, Private->getExprLoc()); } else { - if (Private->getType()->isScalarType()) { + switch (CGF.getEvaluationKind(Private->getType())) { + case TEK_Scalar: { llvm::Value *Elem = CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false, Private->getType(), Private->getExprLoc()); // Store the source element value to the dest element address. CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false, Private->getType()); - } else { + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy Elem = CGF.EmitLoadOfComplex( + CGF.MakeAddrLValue(SrcElementAddr, Private->getType()), + Private->getExprLoc()); + CGF.EmitStoreOfComplex( + Elem, CGF.MakeAddrLValue(DestElementAddr, Private->getType()), + /*isInit=*/false); + break; + } + case TEK_Aggregate: CGF.EmitAggregateCopy( CGF.MakeAddrLValue(DestElementAddr, Private->getType()), CGF.MakeAddrLValue(SrcElementAddr, Private->getType()), Private->getType(), AggValueSlot::DoesNotOverlap); + break; } } @@ -3147,9 +3123,9 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, const CGFunctionInfo &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *Fn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - "_omp_reduction_inter_warp_copy_func", &CGM.getModule()); + auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), + llvm::GlobalValue::InternalLinkage, + "_omp_reduction_inter_warp_copy_func", &M); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); @@ -3246,8 +3222,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, CGF.EmitBlock(ThenBB); // Reduce element = LocalReduceList[i] - Address ElemPtrPtrAddr = - Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); // elemptr = ((CopyType*)(elemptrptr)) + I @@ -3313,8 +3288,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType); // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I - Address TargetElemPtrPtr = - Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + Address TargetElemPtrPtr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar( TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc); Address TargetElemPtr = Address(TargetElemPtrVal, Align); @@ -3418,9 +3392,9 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, /// (2k+1)th thread is ignored in the value aggregation. Therefore /// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so /// that the contiguity assumption still holds. -static llvm::Value *emitShuffleAndReduceFunction( +static llvm::Function *emitShuffleAndReduceFunction( CodeGenModule &CGM, ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) { + QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc) { ASTContext &C = CGM.getContext(); // Thread local Reduce list used to host the values of data to be reduced. @@ -3568,6 +3542,406 @@ static llvm::Value *emitShuffleAndReduceFunction( return Fn; } +/// This function emits a helper that copies all the reduction variables from +/// the team into the provided global buffer for the reduction variables. +/// +/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data) +/// For all data entries D in reduce_data: +/// Copy local D to buffer.D[Idx] +static llvm::Value *emitListToGlobalCopyFunction( + CodeGenModule &CGM, ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, SourceLocation Loc, + const RecordDecl *TeamReductionRec, + const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &VarFieldMap) { + ASTContext &C = CGM.getContext(); + + // Buffer: global reduction buffer. + ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + // Idx: index of the buffer. + ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, + ImplicitParamDecl::Other); + // ReduceList: thread local Reduce list. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&BufferArg); + Args.push_back(&IdxArg); + Args.push_back(&ReduceListArg); + + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_list_to_global_copy_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); + + CGBuilderTy &Bld = CGF.Builder; + + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); + Address LocalReduceList( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, Loc), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + QualType StaticTy = C.getRecordType(TeamReductionRec); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), + LLVMReductionsBufferTy->getPointerTo()); + llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), + CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + /*Volatile=*/false, C.IntTy, + Loc)}; + unsigned Idx = 0; + for (const Expr *Private : Privates) { + // Reduce element = LocalReduceList[i] + Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); + llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( + ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + // elemptr = ((CopyType*)(elemptrptr)) + I + ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo()); + Address ElemPtr = + Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); + const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl(); + // Global = Buffer.VD[Idx]; + const FieldDecl *FD = VarFieldMap.lookup(VD); + LValue GlobLVal = CGF.EmitLValueForField( + CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment())); + switch (CGF.getEvaluationKind(Private->getType())) { + case TEK_Scalar: { + llvm::Value *V = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, + Private->getType(), Loc); + CGF.EmitStoreOfScalar(V, GlobLVal); + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex( + CGF.MakeAddrLValue(ElemPtr, Private->getType()), Loc); + CGF.EmitStoreOfComplex(V, GlobLVal, /*isInit=*/false); + break; + } + case TEK_Aggregate: + CGF.EmitAggregateCopy(GlobLVal, + CGF.MakeAddrLValue(ElemPtr, Private->getType()), + Private->getType(), AggValueSlot::DoesNotOverlap); + break; + } + ++Idx; + } + + CGF.FinishFunction(); + return Fn; +} + +/// This function emits a helper that reduces all the reduction variables from +/// the team into the provided global buffer for the reduction variables. +/// +/// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data) +/// void *GlobPtrs[]; +/// GlobPtrs[0] = (void*)&buffer.D0[Idx]; +/// ... +/// GlobPtrs[N] = (void*)&buffer.DN[Idx]; +/// reduce_function(GlobPtrs, reduce_data); +static llvm::Value *emitListToGlobalReduceFunction( + CodeGenModule &CGM, ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, SourceLocation Loc, + const RecordDecl *TeamReductionRec, + const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &VarFieldMap, + llvm::Function *ReduceFn) { + ASTContext &C = CGM.getContext(); + + // Buffer: global reduction buffer. + ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + // Idx: index of the buffer. + ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, + ImplicitParamDecl::Other); + // ReduceList: thread local Reduce list. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&BufferArg); + Args.push_back(&IdxArg); + Args.push_back(&ReduceListArg); + + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_list_to_global_reduce_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); + + CGBuilderTy &Bld = CGF.Builder; + + Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); + QualType StaticTy = C.getRecordType(TeamReductionRec); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), + LLVMReductionsBufferTy->getPointerTo()); + + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + Address ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + auto IPriv = Privates.begin(); + llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), + CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + /*Volatile=*/false, C.IntTy, + Loc)}; + unsigned Idx = 0; + for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) { + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + // Global = Buffer.VD[Idx]; + const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl(); + const FieldDecl *FD = VarFieldMap.lookup(VD); + LValue GlobLVal = CGF.EmitLValueForField( + CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); + CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy); + if ((*IPriv)->getType()->isVariablyModifiedType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .NumElts, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); + } + } + + // Call reduce_function(GlobalReduceList, ReduceList) + llvm::Value *GlobalReduceList = + CGF.EmitCastToVoidPtr(ReductionList.getPointer()); + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar( + AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr}); + CGF.FinishFunction(); + return Fn; +} + +/// This function emits a helper that copies all the reduction variables from +/// the team into the provided global buffer for the reduction variables. +/// +/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data) +/// For all data entries D in reduce_data: +/// Copy buffer.D[Idx] to local D; +static llvm::Value *emitGlobalToListCopyFunction( + CodeGenModule &CGM, ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, SourceLocation Loc, + const RecordDecl *TeamReductionRec, + const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &VarFieldMap) { + ASTContext &C = CGM.getContext(); + + // Buffer: global reduction buffer. + ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + // Idx: index of the buffer. + ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, + ImplicitParamDecl::Other); + // ReduceList: thread local Reduce list. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&BufferArg); + Args.push_back(&IdxArg); + Args.push_back(&ReduceListArg); + + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_global_to_list_copy_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); + + CGBuilderTy &Bld = CGF.Builder; + + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); + Address LocalReduceList( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, Loc), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + QualType StaticTy = C.getRecordType(TeamReductionRec); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), + LLVMReductionsBufferTy->getPointerTo()); + + llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), + CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + /*Volatile=*/false, C.IntTy, + Loc)}; + unsigned Idx = 0; + for (const Expr *Private : Privates) { + // Reduce element = LocalReduceList[i] + Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); + llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( + ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + // elemptr = ((CopyType*)(elemptrptr)) + I + ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo()); + Address ElemPtr = + Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); + const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl(); + // Global = Buffer.VD[Idx]; + const FieldDecl *FD = VarFieldMap.lookup(VD); + LValue GlobLVal = CGF.EmitLValueForField( + CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment())); + switch (CGF.getEvaluationKind(Private->getType())) { + case TEK_Scalar: { + llvm::Value *V = CGF.EmitLoadOfScalar(GlobLVal, Loc); + CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType()); + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(GlobLVal, Loc); + CGF.EmitStoreOfComplex(V, CGF.MakeAddrLValue(ElemPtr, Private->getType()), + /*isInit=*/false); + break; + } + case TEK_Aggregate: + CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()), + GlobLVal, Private->getType(), + AggValueSlot::DoesNotOverlap); + break; + } + ++Idx; + } + + CGF.FinishFunction(); + return Fn; +} + +/// This function emits a helper that reduces all the reduction variables from +/// the team into the provided global buffer for the reduction variables. +/// +/// void global_to_list_reduce_func(void *buffer, int Idx, void *reduce_data) +/// void *GlobPtrs[]; +/// GlobPtrs[0] = (void*)&buffer.D0[Idx]; +/// ... +/// GlobPtrs[N] = (void*)&buffer.DN[Idx]; +/// reduce_function(reduce_data, GlobPtrs); +static llvm::Value *emitGlobalToListReduceFunction( + CodeGenModule &CGM, ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, SourceLocation Loc, + const RecordDecl *TeamReductionRec, + const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &VarFieldMap, + llvm::Function *ReduceFn) { + ASTContext &C = CGM.getContext(); + + // Buffer: global reduction buffer. + ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + // Idx: index of the buffer. + ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, + ImplicitParamDecl::Other); + // ReduceList: thread local Reduce list. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&BufferArg); + Args.push_back(&IdxArg); + Args.push_back(&ReduceListArg); + + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_global_to_list_reduce_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); + + CGBuilderTy &Bld = CGF.Builder; + + Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); + QualType StaticTy = C.getRecordType(TeamReductionRec); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), + LLVMReductionsBufferTy->getPointerTo()); + + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + Address ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + auto IPriv = Privates.begin(); + llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), + CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + /*Volatile=*/false, C.IntTy, + Loc)}; + unsigned Idx = 0; + for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) { + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + // Global = Buffer.VD[Idx]; + const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl(); + const FieldDecl *FD = VarFieldMap.lookup(VD); + LValue GlobLVal = CGF.EmitLValueForField( + CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); + CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy); + if ((*IPriv)->getType()->isVariablyModifiedType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .NumElts, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); + } + } + + // Call reduce_function(ReduceList, GlobalReduceList) + llvm::Value *GlobalReduceList = + CGF.EmitCastToVoidPtr(ReductionList.getPointer()); + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar( + AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList}); + CGF.FinishFunction(); + return Fn; +} + /// /// Design of OpenMP reductions on the GPU /// @@ -3841,57 +4215,55 @@ void CGOpenMPRuntimeNVPTX::emitReduction( llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::Value *Res; - if (ParallelReduction) { - ASTContext &C = CGM.getContext(); - // 1. Build a list of reduction variables. - // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; - auto Size = RHSExprs.size(); - for (const Expr *E : Privates) { - if (E->getType()->isVariablyModifiedType()) - // Reserve place for array size. - ++Size; - } - llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); - QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, - /*IndexTypeQuals=*/0); - Address ReductionList = - CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); - auto IPriv = Privates.begin(); - unsigned Idx = 0; - for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { - Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, - CGF.getPointerSize()); - CGF.Builder.CreateStore( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), - Elem); - if ((*IPriv)->getType()->isVariablyModifiedType()) { - // Store array size. - ++Idx; - Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, - CGF.getPointerSize()); - llvm::Value *Size = CGF.Builder.CreateIntCast( - CGF.getVLASize( - CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) - .NumElts, - CGF.SizeTy, /*isSigned=*/false); - CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), - Elem); - } + ASTContext &C = CGM.getContext(); + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + auto Size = RHSExprs.size(); + for (const Expr *E : Privates) { + if (E->getType()->isVariablyModifiedType()) + // Reserve place for array size. + ++Size; + } + llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); + QualType ReductionArrayTy = + C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + Address ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + auto IPriv = Privates.begin(); + unsigned Idx = 0; + for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + Elem); + if ((*IPriv)->getType()->isVariablyModifiedType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .NumElts, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); } + } - llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); - llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - ReductionList.getPointer(), CGF.VoidPtrTy); - llvm::Value *ReductionFn = emitReductionFunction( - CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), - Privates, LHSExprs, RHSExprs, ReductionOps); - llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction( - CGM, Privates, ReductionArrayTy, ReductionFn, Loc); - llvm::Value *InterWarpCopyFn = - emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); + llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + ReductionList.getPointer(), CGF.VoidPtrTy); + llvm::Function *ReductionFn = emitReductionFunction( + Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, + LHSExprs, RHSExprs, ReductionOps); + llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + llvm::Function *ShuffleAndReduceFn = emitShuffleAndReduceFunction( + CGM, Privates, ReductionArrayTy, ReductionFn, Loc); + llvm::Value *InterWarpCopyFn = + emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); + if (ParallelReduction) { llvm::Value *Args[] = {RTLoc, ThreadId, CGF.Builder.getInt32(RHSExprs.size()), @@ -3900,17 +4272,59 @@ void CGOpenMPRuntimeNVPTX::emitReduction( ShuffleAndReduceFn, InterWarpCopyFn}; - Res = CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2), - Args); + Res = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2), + Args); } else { assert(TeamsReduction && "expected teams reduction."); - std::string Name = getName({"reduction"}); - llvm::Value *Lock = getCriticalRegionLock(Name); - llvm::Value *Args[] = {RTLoc, ThreadId, Lock}; + llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap; + llvm::SmallVector<const ValueDecl *, 4> PrivatesReductions(Privates.size()); + int Cnt = 0; + for (const Expr *DRE : Privates) { + PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl(); + ++Cnt; + } + const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars( + CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap, + C.getLangOpts().OpenMPCUDAReductionBufNum); + TeamsReductions.push_back(TeamReductionRec); + if (!KernelTeamsReductionPtr) { + KernelTeamsReductionPtr = new llvm::GlobalVariable( + CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, nullptr, + "_openmp_teams_reductions_buffer_$_$ptr"); + } + llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar( + Address(KernelTeamsReductionPtr, CGM.getPointerAlign()), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction( + CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap); + llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction( + CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap, + ReductionFn); + llvm::Value *BufferToGlobalCpyFn = ::emitGlobalToListCopyFunction( + CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap); + llvm::Value *BufferToGlobalRedFn = ::emitGlobalToListReduceFunction( + CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap, + ReductionFn); + + llvm::Value *Args[] = { + RTLoc, + ThreadId, + GlobalBufferPtr, + CGF.Builder.getInt32(C.getLangOpts().OpenMPCUDAReductionBufNum), + RL, + ShuffleAndReduceFn, + InterWarpCopyFn, + GlobalToBufferCpyFn, + GlobalToBufferRedFn, + BufferToGlobalCpyFn, + BufferToGlobalRedFn}; + Res = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple), + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2), Args); } @@ -3941,30 +4355,14 @@ void CGOpenMPRuntimeNVPTX::emitReduction( ++IRHS; } }; - if (ParallelReduction) { - llvm::Value *EndArgs[] = {ThreadId}; - RegionCodeGenTy RCG(CodeGen); - NVPTXActionTy Action( - nullptr, llvm::None, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), - EndArgs); - RCG.setAction(Action); - RCG(CGF); - } else { - assert(TeamsReduction && "expected teams reduction."); - llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); - std::string Name = getName({"reduction"}); - llvm::Value *Lock = getCriticalRegionLock(Name); - llvm::Value *EndArgs[] = {RTLoc, ThreadId, Lock}; - RegionCodeGenTy RCG(CodeGen); - NVPTXActionTy Action( - nullptr, llvm::None, - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple), - EndArgs); - RCG.setAction(Action); - RCG(CGF); - } + llvm::Value *EndArgs[] = {ThreadId}; + RegionCodeGenTy RCG(CodeGen); + NVPTXActionTy Action( + nullptr, llvm::None, + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), + EndArgs); + RCG.setAction(Action); + RCG(CGF); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(ExitBB, /*IsFinished=*/true); @@ -3983,6 +4381,10 @@ CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD, if (Attr->getCaptureKind() == OMPC_map) { PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy, LangAS::opencl_global); + } else if (Attr->getCaptureKind() == OMPC_firstprivate && + PointeeTy.isConstant(CGM.getContext())) { + PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy, + LangAS::opencl_generic); } } ArgType = CGM.getContext().getPointerType(PointeeTy); @@ -4034,12 +4436,11 @@ CGOpenMPRuntimeNVPTX::getParameterAddress(CodeGenFunction &CGF, } void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( - CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef<llvm::Value *> Args) const { SmallVector<llvm::Value *, 4> TargetArgs; TargetArgs.reserve(Args.size()); - auto *FnType = - cast<llvm::FunctionType>(OutlinedFn->getType()->getPointerElementType()); + auto *FnType = OutlinedFn.getFunctionType(); for (unsigned I = 0, E = Args.size(); I < E; ++I) { if (FnType->isVarArg() && FnType->getNumParams() <= I) { TargetArgs.append(std::next(Args.begin(), I), Args.end()); @@ -4137,8 +4538,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( } unsigned Idx = 0; if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { - Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx, - CGF.getPointerSize()); + Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( Src, CGF.SizeTy->getPointerTo()); llvm::Value *LB = CGF.EmitLoadOfScalar( @@ -4148,8 +4548,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc()); Args.emplace_back(LB); ++Idx; - Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx, - CGF.getPointerSize()); + Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( Src, CGF.SizeTy->getPointerTo()); llvm::Value *UB = CGF.EmitLoadOfScalar( @@ -4164,8 +4563,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( ASTContext &CGFContext = CGF.getContext(); for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) { QualType ElemTy = CurField->getType(); - Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx, - CGF.getPointerSize()); + Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx); Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy))); llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress, @@ -4266,6 +4664,58 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { + if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) { + const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); + switch (A->getAllocatorType()) { + // Use the default allocator here as by default local vars are + // threadlocal. + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + // Follow the user decision - use default allocation. + return Address::invalid(); + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + // TODO: implement aupport for user-defined allocators. + return Address::invalid(); + case OMPAllocateDeclAttr::OMPConstMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName(), + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant)); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName(), + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName()); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } + } + } + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) return Address::invalid(); @@ -4287,6 +4737,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, return VDI->second.PrivateAddr; } } + return Address::invalid(); } @@ -4374,6 +4825,38 @@ void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas( } } +unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const { + return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant); +} + +bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD, + LangAS &AS) { + if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) + return false; + const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); + switch(A->getAllocatorType()) { + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + // Not supported, fallback to the default mem space. + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + AS = LangAS::Default; + return true; + case OMPAllocateDeclAttr::OMPConstMemAlloc: + AS = LangAS::cuda_constant; + return true; + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + AS = LangAS::cuda_shared; + return true; + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + llvm_unreachable("Expected predefined allocator for the variables with the " + "static storage."); + } + return false; +} + // Get current CudaArch and ignore any unknown values static CudaArch getCudaArch(CodeGenModule &CGM) { if (!CGM.getTarget().hasFeature("ptx")) @@ -4395,7 +4878,7 @@ static CudaArch getCudaArch(CodeGenModule &CGM) { /// Check to see if target architecture supports unified addressing which is /// a restriction for OpenMP requires clause "unified_shared_memory". void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( - CodeGenModule &CGM, const OMPRequiresDecl *D) const { + const OMPRequiresDecl *D) const { for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { switch (getCudaArch(CGM)) { @@ -4587,9 +5070,12 @@ void CGOpenMPRuntimeNVPTX::clear() { QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal, /*IndexTypeQuals=*/0); llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty); + // FIXME: nvlink does not handle weak linkage correctly (object with the + // different size are reported as erroneous). + // Restore CommonLinkage as soon as nvlink is fixed. auto *GV = new llvm::GlobalVariable( CGM.getModule(), LLVMArr2Ty, - /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, + /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, llvm::Constant::getNullValue(LLVMArr2Ty), "_openmp_static_glob_rd_$_"); auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( @@ -4600,5 +5086,36 @@ void CGOpenMPRuntimeNVPTX::clear() { } } } + if (!TeamsReductions.empty()) { + ASTContext &C = CGM.getContext(); + RecordDecl *StaticRD = C.buildImplicitRecord( + "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::TTK_Union); + StaticRD->startDefinition(); + for (const RecordDecl *TeamReductionRec : TeamsReductions) { + QualType RecTy = C.getRecordType(TeamReductionRec); + auto *Field = FieldDecl::Create( + C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy, + C.getTrivialTypeSourceInfo(RecTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + StaticRD->addDecl(Field); + } + StaticRD->completeDefinition(); + QualType StaticTy = C.getRecordType(StaticRD); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + // FIXME: nvlink does not handle weak linkage correctly (object with the + // different size are reported as erroneous). + // Restore CommonLinkage as soon as nvlink is fixed. + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), LLVMReductionsBufferTy, + /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(LLVMReductionsBufferTy), + "_openmp_teams_reductions_buffer_$_"); + KernelTeamsReductionPtr->setInitializer( + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, + CGM.VoidPtrTy)); + } CGOpenMPRuntime::clear(); } diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 6091610c37..6709ae322a 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -1,9 +1,8 @@ //===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,7 +17,6 @@ #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "clang/AST/StmtOpenMP.h" -#include "llvm/IR/CallSite.h" namespace clang { namespace CodeGen { @@ -173,7 +171,7 @@ private: /// specified, nullptr otherwise. /// void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond); @@ -230,7 +228,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Value * + llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -245,7 +243,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Value * + llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -260,7 +258,7 @@ public: /// variables used in \a OutlinedFn function. /// void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - SourceLocation Loc, llvm::Value *OutlinedFn, + SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) override; /// Emits code for parallel or serial call of the \a OutlinedFn with @@ -273,7 +271,7 @@ public: /// \param IfCond Condition in the associated 'if' clause, if it was /// specified, nullptr otherwise. void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) override; @@ -323,7 +321,7 @@ public: /// implementation. Specialized for the NVPTX device. /// \param Function OpenMP runtime function. /// \return Specified function. - llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function); /// Translates the native parameter of outlined function if this is required /// for target. @@ -342,7 +340,7 @@ public: /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. void emitOutlinedFunctionCall( - CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef<llvm::Value *> Args = llvm::None) const override; /// Emits OpenMP-specific function prolog. @@ -385,8 +383,16 @@ public: /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - void checkArchForUnifiedAddressing(CodeGenModule &CGM, - const OMPRequiresDecl *D) const override; + void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const override; + + /// Returns default address space for the constant firstprivates, __constant__ + /// address space by default. + unsigned getDefaultFirstprivateAddressSpace() const override; + + /// Checks if the variable has associated OMPAllocateDeclAttr attribute with + /// the predefined allocator and translates it into the corresponding address + /// space. + bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override; private: /// Track the execution mode when codegening directives within a target @@ -463,6 +469,12 @@ private: unsigned RegionCounter = 0; }; llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords; + llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr; + /// List of the records with the list of fields for the reductions across the + /// teams. Used to build the intermediate buffer for the fast teams + /// reductions. + /// All the records are gathered into a union `union.type` is created. + llvm::SmallVector<const RecordDecl *, 4> TeamsReductions; /// Shared pointer for the global memory in the global memory buffer used for /// the given kernel. llvm::GlobalVariable *KernelStaticGlobalized = nullptr; diff --git a/lib/CodeGen/CGRecordLayout.h b/lib/CodeGen/CGRecordLayout.h index 41084294ab..730ee4c438 100644 --- a/lib/CodeGen/CGRecordLayout.h +++ b/lib/CodeGen/CGRecordLayout.h @@ -1,9 +1,8 @@ //===--- CGRecordLayout.h - LLVM Record Layout Information ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp index c754541ac1..b5102bb154 100644 --- a/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -1,9 +1,8 @@ //===--- CGRecordLayoutBuilder.cpp - CGRecordLayout builder ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -274,7 +273,7 @@ void CGRecordLowering::lower(bool NVBaseType) { if (!NVBaseType) accumulateVBases(); } - std::stable_sort(Members.begin(), Members.end()); + llvm::stable_sort(Members); Members.push_back(StorageInfo(Size, getIntNType(8))); clipTailPadding(); determinePacked(NVBaseType); @@ -659,7 +658,7 @@ void CGRecordLowering::insertPadding() { Pad = Padding.begin(), PadEnd = Padding.end(); Pad != PadEnd; ++Pad) Members.push_back(StorageInfo(Pad->first, getByteArrayType(Pad->second))); - std::stable_sort(Members.begin(), Members.end()); + llvm::stable_sort(Members); } void CGRecordLowering::fillOutputFields() { diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 0242b48659..c617b198d7 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -1,9 +1,8 @@ //===--- CGStmt.cpp - Emit LLVM Code from Statements ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -20,7 +19,6 @@ #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -393,26 +391,35 @@ CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S, // at the end of a statement expression, they yield the value of their // subexpression. Handle this by walking through all labels we encounter, // emitting them before we evaluate the subexpr. + // Similar issues arise for attributed statements. const Stmt *LastStmt = S.body_back(); - while (const LabelStmt *LS = dyn_cast<LabelStmt>(LastStmt)) { - EmitLabel(LS->getDecl()); - LastStmt = LS->getSubStmt(); + while (!isa<Expr>(LastStmt)) { + if (const auto *LS = dyn_cast<LabelStmt>(LastStmt)) { + EmitLabel(LS->getDecl()); + LastStmt = LS->getSubStmt(); + } else if (const auto *AS = dyn_cast<AttributedStmt>(LastStmt)) { + // FIXME: Update this if we ever have attributes that affect the + // semantics of an expression. + LastStmt = AS->getSubStmt(); + } else { + llvm_unreachable("unknown value statement"); + } } EnsureInsertPoint(); - QualType ExprTy = cast<Expr>(LastStmt)->getType(); + const Expr *E = cast<Expr>(LastStmt); + QualType ExprTy = E->getType(); if (hasAggregateEvaluationKind(ExprTy)) { - EmitAggExpr(cast<Expr>(LastStmt), AggSlot); + EmitAggExpr(E, AggSlot); } else { // We can't return an RValue here because there might be cleanups at // the end of the StmtExpr. Because of that, we have to emit the result // here into a temporary alloca. RetAlloca = CreateMemTemp(ExprTy); - EmitAnyExprToMem(cast<Expr>(LastStmt), RetAlloca, Qualifiers(), - /*IsInit*/false); + EmitAnyExprToMem(E, RetAlloca, Qualifiers(), + /*IsInit*/ false); } - } return RetAlloca; @@ -529,6 +536,16 @@ void CodeGenFunction::EmitLabel(const LabelDecl *D) { } EmitBlock(Dest.getBlock()); + + // Emit debug info for labels. + if (CGDebugInfo *DI = getDebugInfo()) { + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo) { + DI->setLocation(D->getLocation()); + DI->EmitLabel(D, Builder); + } + } + incrementProfileCounter(D->getStmt()); } @@ -1821,8 +1838,15 @@ llvm::Value* CodeGenFunction::EmitAsmInput( // (immediate or symbolic), try to emit it as such. if (!Info.allowsRegister() && !Info.allowsMemory()) { if (Info.requiresImmediateConstant()) { - llvm::APSInt AsmConst = InputExpr->EvaluateKnownConstInt(getContext()); - return llvm::ConstantInt::get(getLLVMContext(), AsmConst); + Expr::EvalResult EVResult; + InputExpr->EvaluateAsRValue(EVResult, getContext(), true); + + llvm::APSInt IntResult; + if (!EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(), + getContext())) + llvm_unreachable("Invalid immediate constant!"); + + return llvm::ConstantInt::get(getLLVMContext(), IntResult); } Expr::EvalResult Result; @@ -1915,6 +1939,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::vector<llvm::Value*> InOutArgs; std::vector<llvm::Type*> InOutArgTypes; + // Keep track of out constraints for tied input operand. + std::vector<std::string> OutputConstraints; + // An inline asm can be marked readonly if it meets the following conditions: // - it doesn't have any sideeffects // - it doesn't clobber memory @@ -1937,7 +1964,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { OutputConstraint = AddVariableConstraints(OutputConstraint, *OutExpr, getTarget(), CGM, S, Info.earlyClobber()); - + OutputConstraints.push_back(OutputConstraint); LValue Dest = EmitLValue(OutExpr); if (!Constraints.empty()) Constraints += ','; @@ -2055,6 +2082,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { InputConstraint, *InputExpr->IgnoreParenNoopCasts(getContext()), getTarget(), CGM, S, false /* No EarlyClobber */); + std::string ReplaceConstraint (InputConstraint); llvm::Value *Arg = EmitAsmInput(Info, InputExpr, Constraints); // If this input argument is tied to a larger output result, extend the @@ -2082,9 +2110,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { Arg = Builder.CreateFPExt(Arg, OutputTy); } } + // Deal with the tied operands' constraint code in adjustInlineAsmType. + ReplaceConstraint = OutputConstraints[Output]; } if (llvm::Type* AdjTy = - getTargetHooks().adjustInlineAsmType(*this, InputConstraint, + getTargetHooks().adjustInlineAsmType(*this, ReplaceConstraint, Arg->getType())) Arg = Builder.CreateBitCast(Arg, AdjTy); else diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index eb1304d893..d27afcdd33 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -1,9 +1,8 @@ //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -19,7 +18,6 @@ #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/DeclOpenMP.h" -#include "llvm/IR/CallSite.h" using namespace clang; using namespace CodeGen; @@ -727,6 +725,9 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return false; + bool DeviceConstTarget = + getLangOpts().OpenMPIsDevice && + isOpenMPTargetExecutionDirective(D.getDirectiveKind()); bool FirstprivateIsLastprivate = false; llvm::DenseSet<const VarDecl *> Lastprivates; for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { @@ -749,17 +750,29 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, bool ThisFirstprivateIsLastprivate = Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && - !FD->getType()->isReferenceType()) { + !FD->getType()->isReferenceType() && + (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); ++IRef; ++InitsRef; continue; } + // Do not emit copy for firstprivate constant variables in target regions, + // captured by reference. + if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && + FD && FD->getType()->isReferenceType() && + (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { + (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, + OrigVD); + ++IRef; + ++InitsRef; + continue; + } FirstprivateIsLastprivate = FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { - const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); const auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); bool IsRegistered; @@ -1227,7 +1240,7 @@ static void emitCommonOMPParallelDirective( OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, const CodeGenBoundParametersTy &CodeGenBoundParameters) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); - llvm::Value *OutlinedFn = + llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { @@ -1518,8 +1531,9 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( I < E; ++I) { const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); const auto *VD = cast<VarDecl>(DRE->getDecl()); - // Override only those variables that are really emitted already. - if (LocalDeclMap.count(VD)) { + // Override only those variables that can be captured to avoid re-emission + // of the variables declared within the loops. + if (DRE->refersToEnclosingVariableOrCapture()) { (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { return CreateMemTemp(DRE->getType(), VD->getName()); }); @@ -2893,6 +2907,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( OMPPrivateScope Scope(CGF); if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty()) { + llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( + CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); enum { PrivatesParam = 2, CopyFnParam = 3 }; llvm::Value *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); @@ -2925,8 +2941,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } - CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), - CopyFn, CallArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : LastprivateDstsOrigs) { const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), @@ -3028,7 +3044,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( Action.Enter(CGF); BodyGen(CGF); }; - llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, Data.NumberOfParts); OMPLexicalScope Scope(*this, S); @@ -3127,6 +3143,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); if (!Data.FirstprivateVars.empty()) { + llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( + CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); enum { PrivatesParam = 2, CopyFnParam = 3 }; llvm::Value *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); @@ -3144,8 +3162,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } - CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), - CopyFn, CallArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : PrivatePtrs) { Address Replacement(CGF.Builder.CreateLoad(Pair.second), CGF.getContext().getDeclAlign(Pair.first)); @@ -3156,18 +3174,18 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( (void)Scope.Privatize(); if (InputInfo.NumberOfTargetItems > 0) { InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); + CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); + CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); } Action.Enter(CGF); OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); BodyGen(CGF); }; - llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, Data.NumberOfParts); llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); @@ -3200,7 +3218,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { CGF.EmitStmt(CS->getCapturedStmt()); }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, - IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, + IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, const OMPTaskDataTy &Data) { CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, @@ -3933,6 +3951,8 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_in_reduction: case OMPC_safelen: case OMPC_simdlen: + case OMPC_allocator: + case OMPC_allocate: case OMPC_collapse: case OMPC_default: case OMPC_seq_cst: @@ -4080,8 +4100,9 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, /*IsSigned=*/false); return NumIterations; }; - CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device, - SizeEmitter); + if (IsOffloadEntry) + CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device, + SizeEmitter); CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); } @@ -4124,7 +4145,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); - llvm::Value *OutlinedFn = + llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); @@ -4970,7 +4991,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { } }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, - IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, + IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, const OMPTaskDataTy &Data) { auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, &Data](CodeGenFunction &CGF, PrePostActionTy &) { @@ -5077,4 +5098,3 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( : D.getDirectiveKind(), CodeGen); } - diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp index fbd8146702..e79f3f3dd8 100644 --- a/lib/CodeGen/CGVTT.cpp +++ b/lib/CodeGen/CGVTT.cpp @@ -1,9 +1,8 @@ //===--- CGVTT.cpp - Emit LLVM Code for C++ VTTs --------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp index bfb089ff90..3cb3d35448 100644 --- a/lib/CodeGen/CGVTables.cpp +++ b/lib/CodeGen/CGVTables.cpp @@ -1,9 +1,8 @@ //===--- CGVTables.cpp - Emit LLVM Code for C++ vtables -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -279,7 +278,7 @@ void CodeGenFunction::FinishThunk() { FinishFunction(); } -void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, +void CodeGenFunction::EmitCallAndReturnForThunk(llvm::FunctionCallee Callee, const ThunkInfo *Thunk, bool IsUnprototyped) { assert(isa<CXXMethodDecl>(CurGD.getDecl()) && @@ -304,7 +303,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, CGM.ErrorUnsupported( MD, "non-trivial argument copy for return-adjusting thunk"); } - EmitMustTailThunk(CurGD, AdjustedThisPtr, CalleePtr); + EmitMustTailThunk(CurGD, AdjustedThisPtr, Callee); return; } @@ -327,7 +326,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, #ifndef NDEBUG const CGFunctionInfo &CallFnInfo = CGM.getTypes().arrangeCXXMethodCall( - CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD), PrefixArgs); + CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1), PrefixArgs); assert(CallFnInfo.getRegParm() == CurFnInfo->getRegParm() && CallFnInfo.isNoReturn() == CurFnInfo->isNoReturn() && CallFnInfo.getCallingConvention() == CurFnInfo->getCallingConvention()); @@ -354,9 +353,9 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified()); // Now emit our call. - llvm::Instruction *CallOrInvoke; - CGCallee Callee = CGCallee::forDirect(CalleePtr, CurGD); - RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, &CallOrInvoke); + llvm::CallBase *CallOrInvoke; + RValue RV = EmitCall(*CurFnInfo, CGCallee::forDirect(Callee, CurGD), Slot, + CallArgs, &CallOrInvoke); // Consider return adjustment if we have ThunkInfo. if (Thunk && !Thunk->Return.isEmpty()) @@ -376,7 +375,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, void CodeGenFunction::EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr, - llvm::Value *CalleePtr) { + llvm::FunctionCallee Callee) { // Emitting a musttail call thunk doesn't use any of the CGCall.cpp machinery // to translate AST arguments into LLVM IR arguments. For thunks, we know // that the caller prototype more or less matches the callee prototype with @@ -405,14 +404,14 @@ void CodeGenFunction::EmitMustTailThunk(GlobalDecl GD, // Emit the musttail call manually. Even if the prologue pushed cleanups, we // don't actually want to run them. - llvm::CallInst *Call = Builder.CreateCall(CalleePtr, Args); + llvm::CallInst *Call = Builder.CreateCall(Callee, Args); Call->setTailCallKind(llvm::CallInst::TCK_MustTail); // Apply the standard set of call attributes. unsigned CallingConv; llvm::AttributeList Attrs; - CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, GD, Attrs, - CallingConv, /*AttrOnCallSite=*/true); + CGM.ConstructAttributeList(Callee.getCallee()->getName(), *CurFnInfo, GD, + Attrs, CallingConv, /*AttrOnCallSite=*/true); Call->setAttributes(Attrs); Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); @@ -450,7 +449,8 @@ void CodeGenFunction::generateThunk(llvm::Function *Fn, Callee = llvm::ConstantExpr::getBitCast(Callee, Fn->getType()); // Make the call and return the result. - EmitCallAndReturnForThunk(Callee, &Thunk, IsUnprototyped); + EmitCallAndReturnForThunk(llvm::FunctionCallee(Fn->getFunctionType(), Callee), + &Thunk, IsUnprototyped); } static bool shouldEmitVTableThunk(CodeGenModule &CGM, const CXXMethodDecl *MD, @@ -649,7 +649,8 @@ void CodeGenVTables::addVTableComponent( auto getSpecialVirtualFn = [&](StringRef name) { llvm::FunctionType *fnTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); - llvm::Constant *fn = CGM.CreateRuntimeFunction(fnTy, name); + llvm::Constant *fn = cast<llvm::Constant>( + CGM.CreateRuntimeFunction(fnTy, name).getCallee()); if (auto f = dyn_cast<llvm::Function>(fn)) f->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); return llvm::ConstantExpr::getBitCast(fn, CGM.Int8PtrTy); @@ -760,7 +761,6 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, // Create the variable that will hold the construction vtable. llvm::GlobalVariable *VTable = CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage, Align); - CGM.setGVProperties(VTable, RD); // V-tables are always unnamed_addr. VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); @@ -774,6 +774,11 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, createVTableInitializer(components, *VTLayout, RTTI); components.finishAndSetAsInitializer(VTable); + // Set properties only after the initializer has been set to ensure that the + // GV is treated as definition and not declaration. + assert(!VTable->isDeclaration() && "Shouldn't set properties on declaration"); + CGM.setGVProperties(VTable, RD); + CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get()); return VTable; diff --git a/lib/CodeGen/CGVTables.h b/lib/CodeGen/CGVTables.h index 6377659e4c..a47841bfc6 100644 --- a/lib/CodeGen/CGVTables.h +++ b/lib/CodeGen/CGVTables.h @@ -1,9 +1,8 @@ //===--- CGVTables.h - Emit LLVM Code for C++ vtables -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index da8a8efb84..71f95abe48 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -1,9 +1,8 @@ //===-- CGValue.h - LLVM CodeGen wrappers for llvm::Value* ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 29c6793c60..416bc4dc31 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -88,6 +88,7 @@ add_clang_library(clangCodeGen MicrosoftCXXABI.cpp ModuleBuilder.cpp ObjectFilePCHContainerOperations.cpp + PatternInit.cpp SanitizerMetadata.cpp SwiftCallingConv.cpp TargetInfo.cpp diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp index 27f5d53ffe..c047587dc0 100644 --- a/lib/CodeGen/CodeGenABITypes.cpp +++ b/lib/CodeGen/CodeGenABITypes.cpp @@ -1,9 +1,8 @@ //==--- CodeGenABITypes.cpp - Convert Clang types to LLVM types for ABI ----==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -35,9 +34,8 @@ CodeGen::arrangeObjCMessageSendSignature(CodeGenModule &CGM, const CGFunctionInfo & CodeGen::arrangeFreeFunctionType(CodeGenModule &CGM, - CanQual<FunctionProtoType> Ty, - const FunctionDecl *FD) { - return CGM.getTypes().arrangeFreeFunctionType(Ty, FD); + CanQual<FunctionProtoType> Ty) { + return CGM.getTypes().arrangeFreeFunctionType(Ty); } const CGFunctionInfo & @@ -68,7 +66,7 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM, llvm::FunctionType * CodeGen::convertFreeFunctionType(CodeGenModule &CGM, const FunctionDecl *FD) { assert(FD != nullptr && "Expected a non-null function declaration!"); - llvm::Type *T = CGM.getTypes().ConvertFunctionType(FD->getType(), FD); + llvm::Type *T = CGM.getTypes().ConvertType(FD->getType()); if (auto FT = dyn_cast<llvm::FunctionType>(T)) return FT; diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp index fd4506f2d1..1f61dc3783 100644 --- a/lib/CodeGen/CodeGenAction.cpp +++ b/lib/CodeGen/CodeGenAction.cpp @@ -1,9 +1,8 @@ //===--- CodeGenAction.cpp - LLVM Code Generation Frontend Action ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -20,6 +19,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/BackendUtil.h" #include "clang/CodeGen/ModuleBuilder.h" +#include "clang/Driver/DriverDiagnostic.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/Preprocessor.h" @@ -31,6 +31,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/RemarkStreamer.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Pass.h" @@ -277,8 +278,14 @@ namespace clang { return; } - Ctx.setDiagnosticsOutputFile( - llvm::make_unique<yaml::Output>(OptRecordFile->os())); + Ctx.setRemarkStreamer(llvm::make_unique<RemarkStreamer>( + CodeGenOpts.OptRecordFile, OptRecordFile->os())); + + if (!CodeGenOpts.OptRecordPasses.empty()) + if (Error E = Ctx.getRemarkStreamer()->setFilter( + CodeGenOpts.OptRecordPasses)) + Diags.Report(diag::err_drv_optimization_remark_pattern) + << toString(std::move(E)) << CodeGenOpts.OptRecordPasses; if (CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone) Ctx.setDiagnosticsHotnessRequested(true); @@ -936,7 +943,8 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM, Diags->Report(DiagID).AddString("cannot compile inline asm"); } -std::unique_ptr<llvm::Module> CodeGenAction::loadModule(MemoryBufferRef MBRef) { +std::unique_ptr<llvm::Module> +CodeGenAction::loadModule(MemoryBufferRef MBRef) { CompilerInstance &CI = getCompilerInstance(); SourceManager &SM = CI.getSourceManager(); @@ -1014,7 +1022,7 @@ void CodeGenAction::ExecuteAction() { bool Invalid; SourceManager &SM = CI.getSourceManager(); FileID FID = SM.getMainFileID(); - llvm::MemoryBuffer *MainFile = SM.getBuffer(FID, &Invalid); + const llvm::MemoryBuffer *MainFile = SM.getBuffer(FID, &Invalid); if (Invalid) return; diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index 1713e40c31..7a8d79ba3b 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -1,9 +1,8 @@ //===--- CodeGenFunction.cpp - Emit LLVM Code from ASTs for a Function ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -256,6 +255,7 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() { if (CurBB->empty() || ReturnBlock.getBlock()->use_empty()) { ReturnBlock.getBlock()->replaceAllUsesWith(CurBB); delete ReturnBlock.getBlock(); + ReturnBlock = JumpDest(); } else EmitBlock(ReturnBlock.getBlock()); return llvm::DebugLoc(); @@ -275,6 +275,7 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() { Builder.SetInsertPoint(BI->getParent()); BI->eraseFromParent(); delete ReturnBlock.getBlock(); + ReturnBlock = JumpDest(); return Loc; } } @@ -449,6 +450,19 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // 5. Width of vector aguments and return types for functions called by this // function. CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); + + // If we generated an unreachable return block, delete it now. + if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty()) { + Builder.ClearInsertionPoint(); + ReturnBlock.getBlock()->eraseFromParent(); + } + if (ReturnValue.isValid()) { + auto *RetAlloca = dyn_cast<llvm::AllocaInst>(ReturnValue.getPointer()); + if (RetAlloca && RetAlloca->use_empty()) { + RetAlloca->eraseFromParent(); + ReturnValue = Address::invalid(); + } + } } /// ShouldInstrumentFunction - Return true if the current function should be @@ -2250,7 +2264,7 @@ void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, OffsetValue); } -llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Value *AnnotationFn, +llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location) { @@ -2278,7 +2292,7 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D, assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute"); llvm::Value *V = Addr.getPointer(); llvm::Type *VTy = V->getType(); - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, CGM.Int8PtrTy); for (const auto *I : D->specific_attrs<AnnotateAttr>()) { diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 89cb850ab1..fddbe1443c 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -1,9 +1,8 @@ //===-- CodeGenFunction.h - Per-Function state for LLVM CodeGen -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -48,7 +47,6 @@ class Module; class SwitchInst; class Twine; class Value; -class CallSite; } namespace clang { @@ -568,7 +566,7 @@ public: JumpDest RethrowDest; /// A function to call to enter the catch. - llvm::Constant *BeginCatchFn; + llvm::FunctionCallee BeginCatchFn; /// An i1 variable indicating whether or not the @finally is /// running for an exception. @@ -580,8 +578,8 @@ public: public: void enter(CodeGenFunction &CGF, const Stmt *Finally, - llvm::Constant *beginCatchFn, llvm::Constant *endCatchFn, - llvm::Constant *rethrowFn); + llvm::FunctionCallee beginCatchFn, + llvm::FunctionCallee endCatchFn, llvm::FunctionCallee rethrowFn); void exit(CodeGenFunction &CGF); }; @@ -1836,6 +1834,9 @@ public: void EmitLambdaBlockInvokeBody(); void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD); void EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD); + void EmitLambdaVLACapture(const VariableArrayType *VAT, LValue LV) { + EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV); + } void EmitAsanPrologueOrEpilogue(bool Prologue); /// Emit the unified return block, trying to avoid its emission when @@ -1851,14 +1852,14 @@ public: void StartThunk(llvm::Function *Fn, GlobalDecl GD, const CGFunctionInfo &FnInfo, bool IsUnprototyped); - void EmitCallAndReturnForThunk(llvm::Constant *Callee, const ThunkInfo *Thunk, - bool IsUnprototyped); + void EmitCallAndReturnForThunk(llvm::FunctionCallee Callee, + const ThunkInfo *Thunk, bool IsUnprototyped); void FinishThunk(); /// Emit a musttail call for a thunk with a potentially adjusted this pointer. void EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr, - llvm::Value *Callee); + llvm::FunctionCallee Callee); /// Generate a thunk for the given method. void generateThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo, @@ -2502,16 +2503,13 @@ public: void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, - Address This, const CXXConstructExpr *E, - AggValueSlot::Overlap_t Overlap, - bool NewPointerIsChecked); + AggValueSlot ThisAVS, const CXXConstructExpr *E); void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, Address This, CallArgList &Args, AggValueSlot::Overlap_t Overlap, - SourceLocation Loc, - bool NewPointerIsChecked); + SourceLocation Loc, bool NewPointerIsChecked); /// Emit assumption load for all bases. Requires to be be called only on /// most-derived class and not under construction of the object. @@ -2618,10 +2616,12 @@ public: bool sanitizePerformTypeCheck() const; /// Emit a check that \p V is the address of storage of the - /// appropriate size and alignment for an object of type \p Type. + /// appropriate size and alignment for an object of type \p Type + /// (or if ArraySize is provided, for an array of that bound). void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V, QualType Type, CharUnits Alignment = CharUnits::Zero(), - SanitizerSet SkippedChecks = SanitizerSet()); + SanitizerSet SkippedChecks = SanitizerSet(), + llvm::Value *ArraySize = nullptr); /// Emit a check that \p Base points into an array object, which /// we can access at index \p Index. \p Accessed should be \c false if we @@ -3084,7 +3084,7 @@ public: bool EmitOMPLinearClauseInit(const OMPLoopDirective &D); typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/, - llvm::Value * /*OutlinedFn*/, + llvm::Function * /*OutlinedFn*/, const OMPTaskDataTy & /*Data*/)> TaskGenTy; void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, @@ -3560,7 +3560,6 @@ public: LValue EmitCXXConstructLValue(const CXXConstructExpr *E); LValue EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E); - LValue EmitLambdaLValue(const LambdaExpr *E); LValue EmitCXXTypeidLValue(const CXXTypeidExpr *E); LValue EmitCXXUuidofLValue(const CXXUuidofExpr *E); @@ -3580,10 +3579,10 @@ public: /// LLVM arguments and the types they were derived from. RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, - llvm::Instruction **callOrInvoke, SourceLocation Loc); + llvm::CallBase **callOrInvoke, SourceLocation Loc); RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, - llvm::Instruction **callOrInvoke = nullptr) { + llvm::CallBase **callOrInvoke = nullptr) { return EmitCall(CallInfo, Callee, ReturnValue, Args, callOrInvoke, SourceLocation()); } @@ -3596,30 +3595,30 @@ public: void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl); - llvm::CallInst *EmitRuntimeCall(llvm::Value *callee, + llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name = ""); - llvm::CallInst *EmitRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, + llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, const Twine &name = ""); - llvm::CallInst *EmitNounwindRuntimeCall(llvm::Value *callee, + llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name = ""); - llvm::CallInst *EmitNounwindRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, + llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, const Twine &name = ""); SmallVector<llvm::OperandBundleDef, 1> getBundlesForFunclet(llvm::Value *Callee); - llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee, - ArrayRef<llvm::Value *> Args, - const Twine &Name = ""); - llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee, - ArrayRef<llvm::Value*> args, - const Twine &name = ""); - llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee, - const Twine &name = ""); - void EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, - ArrayRef<llvm::Value*> args); + llvm::CallBase *EmitCallOrInvoke(llvm::FunctionCallee Callee, + ArrayRef<llvm::Value *> Args, + const Twine &Name = ""); + llvm::CallBase *EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, + const Twine &name = ""); + llvm::CallBase *EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, + const Twine &name = ""); + void EmitNoreturnRuntimeCallOrInvoke(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args); CGCallee BuildAppleKextVirtualCall(const CXXMethodDecl *MD, NestedNameSpecifier *Qual, @@ -3659,11 +3658,10 @@ public: llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *E, CallArgList *RtlArgs); - RValue EmitCXXDestructorCall(const CXXDestructorDecl *DD, + RValue EmitCXXDestructorCall(GlobalDecl Dtor, const CGCallee &Callee, llvm::Value *This, llvm::Value *ImplicitParam, - QualType ImplicitParamTy, const CallExpr *E, - StructorType Type); + QualType ImplicitParamTy, const CallExpr *E); RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E, ReturnValueSlot ReturnValue); RValue EmitCXXMemberOrOperatorMemberCallExpr(const CallExpr *CE, @@ -3727,9 +3725,6 @@ public: Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch); - llvm::Value *EmitISOVolatileLoad(const CallExpr *E); - llvm::Value *EmitISOVolatileStore(const CallExpr *E); - llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E); @@ -3825,6 +3820,8 @@ public: llvm::Type *returnType); llvm::Value *EmitObjCAllocWithZone(llvm::Value *value, llvm::Type *returnType); + llvm::Value *EmitObjCAllocInit(llvm::Value *value, llvm::Type *resultType); + llvm::Value *EmitObjCThrowOperand(const Expr *expr); llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr); llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr); @@ -3922,12 +3919,12 @@ public: void EmitCXXGlobalVarDeclInit(const VarDecl &D, llvm::Constant *DeclPtr, bool PerformInit); - llvm::Constant *createAtExitStub(const VarDecl &VD, llvm::Constant *Dtor, + llvm::Function *createAtExitStub(const VarDecl &VD, llvm::FunctionCallee Dtor, llvm::Constant *Addr); /// Call atexit() with a function that passes the given argument to /// the given function. - void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::Constant *fn, + void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::FunctionCallee fn, llvm::Constant *addr); /// Call atexit() with function dtorStub. @@ -3960,8 +3957,8 @@ public: /// variables. void GenerateCXXGlobalDtorsFunc( llvm::Function *Fn, - const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> - &DtorsAndObjects); + const std::vector<std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, + llvm::Constant *>> &DtorsAndObjects); void GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, const VarDecl *D, @@ -3982,16 +3979,14 @@ public: void EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint = true); - void EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Dest); - RValue EmitAtomicExpr(AtomicExpr *E); //===--------------------------------------------------------------------===// // Annotations Emission //===--------------------------------------------------------------------===// - /// Emit an annotation call (intrinsic or builtin). - llvm::Value *EmitAnnotationCall(llvm::Value *AnnotationFn, + /// Emit an annotation call (intrinsic). + llvm::Value *EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location); @@ -4084,8 +4079,8 @@ public: /// passing to a runtime sanitizer handler. llvm::Constant *EmitCheckSourceLocation(SourceLocation Loc); - /// Create a basic block that will call a handler function in a - /// sanitizer runtime with the provided arguments, and create a conditional + /// Create a basic block that will either trap or call a handler function in + /// the UBSan runtime with the provided arguments, and create a conditional /// branch to it. void EmitCheck(ArrayRef<std::pair<llvm::Value *, SanitizerMask>> Checked, SanitizerHandler Check, ArrayRef<llvm::Constant *> StaticArgs, @@ -4177,14 +4172,16 @@ private: /// If EmittedExpr is non-null, this will use that instead of re-emitting E. llvm::Value *evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE); + llvm::Value *EmittedE, + bool IsDynamic); /// Emits the size of E, as required by __builtin_object_size. This /// function is aware of pass_object_size parameters, and will act accordingly /// if E is a parameter with the pass_object_size attribute. llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE); + llvm::Value *EmittedE, + bool IsDynamic); public: #ifndef NDEBUG diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 244738042c..b490fa0faf 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -1,9 +1,8 @@ //===--- CodeGenModule.cpp - Emit LLVM Code from ASTs for a Module --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -34,6 +33,7 @@ #include "clang/AST/Mangle.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/CodeGenOptions.h" @@ -47,17 +47,18 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/TimeProfiler.h" using namespace clang; using namespace CodeGen; @@ -418,7 +419,9 @@ void CodeGenModule::Release() { OpenMPRuntime->clear(); } if (PGOReader) { - getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext)); + getModule().setProfileSummary( + PGOReader->getSummary(/* UseCS */ false).getMD(VMContext), + llvm::ProfileSummary::PSK_Instr); if (PGOStats.hasDiagnostics()) PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName); } @@ -731,9 +734,11 @@ void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, } if (!D) return; - // Set visibility for definitions. + // Set visibility for definitions, and for declarations if requested globally + // or set explicitly. LinkageInfo LV = D->getLinkageAndVisibility(); - if (LV.isVisibilityExplicit() || !GV->isDeclarationForLinker()) + if (LV.isVisibilityExplicit() || getLangOpts().SetVisibilityForExternDecls || + !GV->isDeclarationForLinker()) GV->setVisibility(GetLLVMVisibility(LV.getVisibility())); } @@ -1047,8 +1052,17 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { // Keep the first result in the case of a mangling collision. const auto *ND = cast<NamedDecl>(GD.getDecl()); - auto Result = - Manglings.insert(std::make_pair(getMangledNameImpl(*this, GD, ND), GD)); + std::string MangledName = getMangledNameImpl(*this, GD, ND); + + // Postfix kernel stub names with .stub to differentiate them from kernel + // names in device binaries. This is to facilitate the debugger to find + // the correct symbols for kernels in the device binary. + if (auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) + if (getLangOpts().HIP && !getLangOpts().CUDAIsDevice && + FD->hasAttr<CUDAGlobalAttr>()) + MangledName = MangledName + ".stub"; + + auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); } @@ -1544,12 +1558,8 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, const auto *FD = cast<FunctionDecl>(GD.getDecl()); - if (!IsIncompleteFunction) { + if (!IsIncompleteFunction) SetLLVMFunctionAttributes(GD, getTypes().arrangeGlobalDeclaration(GD), F); - // Setup target-specific attributes. - if (F->isDeclaration()) - getTargetCodeGenInfo().setTargetAttributes(FD, F, *this); - } // Add the Returned attribute for "this", except for iOS 5 and earlier // where substantial code, including the libstdc++ dylib, was compiled with @@ -1569,6 +1579,10 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, setLinkageForGV(F, FD); setGVProperties(F, FD); + // Setup target-specific attributes. + if (!IsIncompleteFunction && F->isDeclaration()) + getTargetCodeGenInfo().setTargetAttributes(FD, F, *this); + if (const auto *CSA = FD->getAttr<CodeSegAttr>()) F->setSection(CSA->getName()); else if (const auto *SA = FD->getAttr<SectionAttr>()) @@ -1603,6 +1617,23 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>()) getOpenMPRuntime().emitDeclareSimdFunction(FD, F); + + if (const auto *CB = FD->getAttr<CallbackAttr>()) { + // Annotate the callback behavior as metadata: + // - The callback callee (as argument number). + // - The callback payloads (as argument numbers). + llvm::LLVMContext &Ctx = F->getContext(); + llvm::MDBuilder MDB(Ctx); + + // The payload indices are all but the first one in the encoding. The first + // identifies the callback callee. + int CalleeIdx = *CB->encoding_begin(); + ArrayRef<int> PayloadIndices(CB->encoding_begin() + 1, CB->encoding_end()); + F->addMetadata(llvm::LLVMContext::MD_callback, + *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( + CalleeIdx, PayloadIndices, + /* VarArgsArePassed */ false)})); + } } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { @@ -2173,6 +2204,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { if (MustBeEmitted(Global)) EmitOMPDeclareReduction(DRD); return; + } else if (auto *DMD = dyn_cast<OMPDeclareMapperDecl>(Global)) { + if (MustBeEmitted(Global)) + EmitOMPDeclareMapper(DMD); + return; } } @@ -2265,35 +2300,36 @@ static bool HasNonDllImportDtor(QualType T) { } namespace { - struct FunctionIsDirectlyRecursive : - public RecursiveASTVisitor<FunctionIsDirectlyRecursive> { + struct FunctionIsDirectlyRecursive + : public ConstStmtVisitor<FunctionIsDirectlyRecursive, bool> { const StringRef Name; const Builtin::Context &BI; - bool Result; - FunctionIsDirectlyRecursive(StringRef N, const Builtin::Context &C) : - Name(N), BI(C), Result(false) { - } - typedef RecursiveASTVisitor<FunctionIsDirectlyRecursive> Base; + FunctionIsDirectlyRecursive(StringRef N, const Builtin::Context &C) + : Name(N), BI(C) {} - bool TraverseCallExpr(CallExpr *E) { + bool VisitCallExpr(const CallExpr *E) { const FunctionDecl *FD = E->getDirectCallee(); if (!FD) - return true; - AsmLabelAttr *Attr = FD->getAttr<AsmLabelAttr>(); - if (Attr && Name == Attr->getLabel()) { - Result = true; return false; - } + AsmLabelAttr *Attr = FD->getAttr<AsmLabelAttr>(); + if (Attr && Name == Attr->getLabel()) + return true; unsigned BuiltinID = FD->getBuiltinID(); if (!BuiltinID || !BI.isLibFunction(BuiltinID)) - return true; + return false; StringRef BuiltinName = BI.getName(BuiltinID); if (BuiltinName.startswith("__builtin_") && Name == BuiltinName.slice(strlen("__builtin_"), StringRef::npos)) { - Result = true; - return false; + return true; } - return true; + return false; + } + + bool VisitStmt(const Stmt *S) { + for (const Stmt *Child : S->children()) + if (Child && this->Visit(Child)) + return true; + return false; } }; @@ -2378,8 +2414,8 @@ CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) { } FunctionIsDirectlyRecursive Walker(Name, Context.BuiltinInfo); - Walker.TraverseFunctionDecl(const_cast<FunctionDecl*>(FD)); - return Walker.Result; + const Stmt *Body = FD->getBody(); + return Body ? Walker.Visit(Body) : false; } bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { @@ -2447,13 +2483,14 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { if (!shouldEmitFunction(GD)) return; + llvm::TimeTraceScope TimeScope( + "CodeGen Function", [&]() { return FD->getQualifiedNameAsString(); }); + if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) { // Make sure to emit the definition(s) before we emit the thunks. // This is necessary for the generation of certain thunks. - if (const auto *CD = dyn_cast<CXXConstructorDecl>(Method)) - ABI->emitCXXStructor(CD, getFromCtorType(GD.getCtorType())); - else if (const auto *DD = dyn_cast<CXXDestructorDecl>(Method)) - ABI->emitCXXStructor(DD, getFromDtorType(GD.getDtorType())); + if (isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method)) + ABI->emitCXXStructor(GD); else if (FD->isMultiVersion()) EmitMultiVersionFunctionDefinition(GD, GV); else @@ -2537,10 +2574,9 @@ void CodeGenModule::emitMultiVersionFunctions() { ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); - std::stable_sort( - Options.begin(), Options.end(), - [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, - const CodeGenFunction::MultiVersionResolverOption &RHS) { + llvm::stable_sort( + Options, [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, + const CodeGenFunction::MultiVersionResolverOption &RHS) { return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); }); CodeGenFunction CGF(*this); @@ -2553,8 +2589,7 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { assert(FD && "Not a FunctionDecl?"); const auto *DD = FD->getAttr<CPUDispatchAttr>(); assert(DD && "Not a cpu_dispatch Function?"); - QualType CanonTy = Context.getCanonicalType(FD->getType()); - llvm::Type *DeclTy = getTypes().ConvertFunctionType(CanonTy, FD); + llvm::Type *DeclTy = getTypes().ConvertType(FD->getType()); if (const auto *CXXFD = dyn_cast<CXXMethodDecl>(FD)) { const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD); @@ -2893,8 +2928,7 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD, // If there was no specific requested type, just convert it now. if (!Ty) { const auto *FD = cast<FunctionDecl>(GD.getDecl()); - auto CanonTy = Context.getCanonicalType(FD->getType()); - Ty = getTypes().ConvertFunctionType(CanonTy, FD); + Ty = getTypes().ConvertType(FD->getType()); } // Devirtualized destructor calls may come through here instead of via @@ -2953,7 +2987,7 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { /// CreateRuntimeFunction - Create a new runtime function with the specified /// type and name. -llvm::Constant * +llvm::FunctionCallee CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, llvm::AttributeList ExtraAttrs, bool Local) { @@ -2966,9 +3000,13 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, if (F->empty()) { F->setCallingConv(getRuntimeCC()); - if (!Local && getTriple().isOSBinFormatCOFF() && - !getCodeGenOpts().LTOVisibilityPublicStd && - !getTriple().isWindowsGNUEnvironment()) { + // In Windows Itanium environments, try to mark runtime functions + // dllimport. For Mingw and MSVC, don't. We don't really know if the user + // will link their standard library statically or dynamically. Marking + // functions imported when they are not imported can cause linker errors + // and warnings. + if (!Local && getTriple().isWindowsItaniumEnvironment() && + !getCodeGenOpts().LTOVisibilityPublicStd) { const FunctionDecl *FD = GetRuntimeFunctionDecl(Context, Name); if (!FD || FD->hasAttr<DLLImportAttr>()) { F->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); @@ -2979,15 +3017,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, } } - return C; -} - -/// CreateBuiltinFunction - Create a new builtin function with the specified -/// type and name. -llvm::Constant * -CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, StringRef Name, - llvm::AttributeList ExtraAttrs) { - return CreateRuntimeFunction(FTy, Name, ExtraAttrs, true); + return {FTy, C}; } /// isTypeConstant - Determine whether an object of this type can be emitted @@ -3199,6 +3229,9 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace, ExpectedAS, Ty); + if (GV->isDeclaration()) + getTargetCodeGenInfo().setTargetAttributes(D, GV, *this); + return GV; } @@ -3206,15 +3239,8 @@ llvm::Constant * CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); - if (isa<CXXConstructorDecl>(D)) - return getAddrOfCXXStructor(cast<CXXConstructorDecl>(D), - getFromCtorType(GD.getCtorType()), - /*FnInfo=*/nullptr, /*FnType=*/nullptr, - /*DontDefer=*/false, IsForDefinition); - else if (isa<CXXDestructorDecl>(D)) - return getAddrOfCXXStructor(cast<CXXDestructorDecl>(D), - getFromDtorType(GD.getDtorType()), - /*FnInfo=*/nullptr, /*FnType=*/nullptr, + if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D)) + return getAddrOfCXXStructor(GD, /*FnInfo=*/nullptr, /*FnType=*/nullptr, /*DontDefer=*/false, IsForDefinition); else if (isa<CXXMethodDecl>(D)) { auto FInfo = &getTypes().arrangeCXXMethodDeclaration( @@ -3359,6 +3385,11 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { return LangAS::cuda_device; } + if (LangOpts.OpenMP) { + LangAS AS; + if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS)) + return AS; + } return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } @@ -3619,7 +3650,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Extern global variables will be registered in the TU where they are // defined. if (!D->hasExternalStorage()) - getCUDARuntime().registerDeviceVar(*GV, Flags); + getCUDARuntime().registerDeviceVar(D, *GV, Flags); } else if (D->hasAttr<CUDASharedAttr>()) // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they @@ -3762,13 +3793,15 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, } } - // Microsoft's link.exe doesn't support alignments greater than 32 for common - // symbols, so symbols with greater alignment requirements cannot be common. + // Microsoft's link.exe doesn't support alignments greater than 32 bytes for + // common symbols, so symbols with greater alignment requirements cannot be + // common. // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two // alignments for common symbols via the aligncomm directive, so this // restriction only applies to MSVC environments. if (Context.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() && - Context.getTypeAlignIfKnown(D->getType()) > 32) + Context.getTypeAlignIfKnown(D->getType()) > + Context.toBits(CharUnits::fromQuantity(32))) return true; return false; @@ -3877,9 +3910,10 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, } // Recognize calls to the function. - llvm::CallSite callSite(user); + llvm::CallBase *callSite = dyn_cast<llvm::CallBase>(user); if (!callSite) continue; - if (!callSite.isCallee(&*use)) continue; + if (!callSite->isCallee(&*use)) + continue; // If the return types don't match exactly, then we can't // transform this call unless it's dead. @@ -3888,18 +3922,19 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, // Get the call site's attribute list. SmallVector<llvm::AttributeSet, 8> newArgAttrs; - llvm::AttributeList oldAttrs = callSite.getAttributes(); + llvm::AttributeList oldAttrs = callSite->getAttributes(); // If the function was passed too few arguments, don't transform. unsigned newNumArgs = newFn->arg_size(); - if (callSite.arg_size() < newNumArgs) continue; + if (callSite->arg_size() < newNumArgs) + continue; // If extra arguments were passed, we silently drop them. // If any of the types mismatch, we don't transform. unsigned argNo = 0; bool dontTransform = false; for (llvm::Argument &A : newFn->args()) { - if (callSite.getArgument(argNo)->getType() != A.getType()) { + if (callSite->getArgOperand(argNo)->getType() != A.getType()) { dontTransform = true; break; } @@ -3913,35 +3948,33 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, // Okay, we can transform this. Create the new call instruction and copy // over the required information. - newArgs.append(callSite.arg_begin(), callSite.arg_begin() + argNo); + newArgs.append(callSite->arg_begin(), callSite->arg_begin() + argNo); // Copy over any operand bundles. - callSite.getOperandBundlesAsDefs(newBundles); + callSite->getOperandBundlesAsDefs(newBundles); - llvm::CallSite newCall; - if (callSite.isCall()) { - newCall = llvm::CallInst::Create(newFn, newArgs, newBundles, "", - callSite.getInstruction()); + llvm::CallBase *newCall; + if (dyn_cast<llvm::CallInst>(callSite)) { + newCall = + llvm::CallInst::Create(newFn, newArgs, newBundles, "", callSite); } else { - auto *oldInvoke = cast<llvm::InvokeInst>(callSite.getInstruction()); - newCall = llvm::InvokeInst::Create(newFn, - oldInvoke->getNormalDest(), - oldInvoke->getUnwindDest(), - newArgs, newBundles, "", - callSite.getInstruction()); + auto *oldInvoke = cast<llvm::InvokeInst>(callSite); + newCall = llvm::InvokeInst::Create(newFn, oldInvoke->getNormalDest(), + oldInvoke->getUnwindDest(), newArgs, + newBundles, "", callSite); } newArgs.clear(); // for the next iteration if (!newCall->getType()->isVoidTy()) - newCall->takeName(callSite.getInstruction()); - newCall.setAttributes(llvm::AttributeList::get( + newCall->takeName(callSite); + newCall->setAttributes(llvm::AttributeList::get( newFn->getContext(), oldAttrs.getFnAttributes(), oldAttrs.getRetAttributes(), newArgAttrs)); - newCall.setCallingConv(callSite.getCallingConv()); + newCall->setCallingConv(callSite->getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. if (!callSite->use_empty()) - callSite->replaceAllUsesWith(newCall.getInstruction()); + callSite->replaceAllUsesWith(newCall); // Copy debug location attached to CI. if (callSite->getDebugLoc()) @@ -4376,6 +4409,8 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { switch (Triple.getObjectFormat()) { case llvm::Triple::UnknownObjectFormat: llvm_unreachable("unknown file format"); + case llvm::Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); case llvm::Triple::COFF: case llvm::Triple::ELF: case llvm::Triple::Wasm: @@ -4504,7 +4539,8 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S, if (auto GV = *Entry) { if (Alignment.getQuantity() > GV->getAlignment()) GV->setAlignment(Alignment.getQuantity()); - return ConstantAddress(GV, Alignment); + return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), + Alignment); } } @@ -4566,7 +4602,8 @@ ConstantAddress CodeGenModule::GetAddrOfConstantCString( if (auto GV = *Entry) { if (Alignment.getQuantity() > GV->getAlignment()) GV->setAlignment(Alignment.getQuantity()); - return ConstantAddress(GV, Alignment); + return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), + Alignment); } } @@ -5030,10 +5067,17 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { EmitOMPThreadPrivateDecl(cast<OMPThreadPrivateDecl>(D)); break; + case Decl::OMPAllocate: + break; + case Decl::OMPDeclareReduction: EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D)); break; + case Decl::OMPDeclareMapper: + EmitOMPDeclareMapper(cast<OMPDeclareMapperDecl>(D)); + break; + case Decl::OMPRequires: EmitOMPRequiresDecl(cast<OMPRequiresDecl>(D)); break; diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 75679d11c1..83ddac70b0 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -1,9 +1,8 @@ //===--- CodeGenModule.h - Per-Module state for LLVM CodeGen ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -120,90 +119,93 @@ struct ObjCEntrypoints { ObjCEntrypoints() { memset(this, 0, sizeof(*this)); } /// void objc_alloc(id); - llvm::Constant *objc_alloc; + llvm::FunctionCallee objc_alloc; /// void objc_allocWithZone(id); - llvm::Constant *objc_allocWithZone; + llvm::FunctionCallee objc_allocWithZone; + + /// void objc_alloc_init(id); + llvm::FunctionCallee objc_alloc_init; /// void objc_autoreleasePoolPop(void*); - llvm::Constant *objc_autoreleasePoolPop; + llvm::FunctionCallee objc_autoreleasePoolPop; /// void objc_autoreleasePoolPop(void*); /// Note this method is used when we are using exception handling - llvm::Constant *objc_autoreleasePoolPopInvoke; + llvm::FunctionCallee objc_autoreleasePoolPopInvoke; /// void *objc_autoreleasePoolPush(void); - llvm::Constant *objc_autoreleasePoolPush; + llvm::Function *objc_autoreleasePoolPush; /// id objc_autorelease(id); - llvm::Constant *objc_autorelease; + llvm::Function *objc_autorelease; /// id objc_autorelease(id); /// Note this is the runtime method not the intrinsic. - llvm::Constant *objc_autoreleaseRuntimeFunction; + llvm::FunctionCallee objc_autoreleaseRuntimeFunction; /// id objc_autoreleaseReturnValue(id); - llvm::Constant *objc_autoreleaseReturnValue; + llvm::Function *objc_autoreleaseReturnValue; /// void objc_copyWeak(id *dest, id *src); - llvm::Constant *objc_copyWeak; + llvm::Function *objc_copyWeak; /// void objc_destroyWeak(id*); - llvm::Constant *objc_destroyWeak; + llvm::Function *objc_destroyWeak; /// id objc_initWeak(id*, id); - llvm::Constant *objc_initWeak; + llvm::Function *objc_initWeak; /// id objc_loadWeak(id*); - llvm::Constant *objc_loadWeak; + llvm::Function *objc_loadWeak; /// id objc_loadWeakRetained(id*); - llvm::Constant *objc_loadWeakRetained; + llvm::Function *objc_loadWeakRetained; /// void objc_moveWeak(id *dest, id *src); - llvm::Constant *objc_moveWeak; + llvm::Function *objc_moveWeak; /// id objc_retain(id); - llvm::Constant *objc_retain; + llvm::Function *objc_retain; /// id objc_retain(id); /// Note this is the runtime method not the intrinsic. - llvm::Constant *objc_retainRuntimeFunction; + llvm::FunctionCallee objc_retainRuntimeFunction; /// id objc_retainAutorelease(id); - llvm::Constant *objc_retainAutorelease; + llvm::Function *objc_retainAutorelease; /// id objc_retainAutoreleaseReturnValue(id); - llvm::Constant *objc_retainAutoreleaseReturnValue; + llvm::Function *objc_retainAutoreleaseReturnValue; /// id objc_retainAutoreleasedReturnValue(id); - llvm::Constant *objc_retainAutoreleasedReturnValue; + llvm::Function *objc_retainAutoreleasedReturnValue; /// id objc_retainBlock(id); - llvm::Constant *objc_retainBlock; + llvm::Function *objc_retainBlock; /// void objc_release(id); - llvm::Constant *objc_release; + llvm::Function *objc_release; /// void objc_release(id); /// Note this is the runtime method not the intrinsic. - llvm::Constant *objc_releaseRuntimeFunction; + llvm::FunctionCallee objc_releaseRuntimeFunction; /// void objc_storeStrong(id*, id); - llvm::Constant *objc_storeStrong; + llvm::Function *objc_storeStrong; /// id objc_storeWeak(id*, id); - llvm::Constant *objc_storeWeak; + llvm::Function *objc_storeWeak; /// id objc_unsafeClaimAutoreleasedReturnValue(id); - llvm::Constant *objc_unsafeClaimAutoreleasedReturnValue; + llvm::Function *objc_unsafeClaimAutoreleasedReturnValue; /// A void(void) inline asm to use to mark that the return value of /// a call will be immediately retain. llvm::InlineAsm *retainAutoreleasedReturnValueMarker; /// void clang.arc.use(...); - llvm::Constant *clang_arc_use; + llvm::Function *clang_arc_use; }; /// This class records statistics on instrumentation based profiling. @@ -452,7 +454,9 @@ private: SmallVector<GlobalInitData, 8> PrioritizedCXXGlobalInits; /// Global destructor functions and arguments that need to run on termination. - std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> CXXGlobalDtors; + std::vector< + std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, llvm::Constant *>> + CXXGlobalDtors; /// The complete set of modules that has been imported. llvm::SetVector<clang::Module *> ImportedModules; @@ -501,8 +505,8 @@ private: llvm::Constant *NSConcreteGlobalBlock = nullptr; llvm::Constant *NSConcreteStackBlock = nullptr; - llvm::Constant *BlockObjectAssign = nullptr; - llvm::Constant *BlockObjectDispose = nullptr; + llvm::FunctionCallee BlockObjectAssign = nullptr; + llvm::FunctionCallee BlockObjectDispose = nullptr; llvm::Type *BlockDescriptorType = nullptr; llvm::Type *GenericBlockLiteralType = nullptr; @@ -512,10 +516,10 @@ private: } Block; /// void @llvm.lifetime.start(i64 %size, i8* nocapture <ptr>) - llvm::Constant *LifetimeStartFn = nullptr; + llvm::Function *LifetimeStartFn = nullptr; /// void @llvm.lifetime.end(i64 %size, i8* nocapture <ptr>) - llvm::Constant *LifetimeEndFn = nullptr; + llvm::Function *LifetimeEndFn = nullptr; GlobalDecl initializedGlobalDecl; @@ -586,7 +590,7 @@ public: // Version checking function, used to implement ObjC's @available: // i32 @__isOSVersionAtLeast(i32, i32, i32) - llvm::Constant *IsOSVersionAtLeastFn = nullptr; + llvm::FunctionCallee IsOSVersionAtLeastFn = nullptr; InstrProfStats &getPGOStats() { return PGOStats; } llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); } @@ -950,16 +954,24 @@ public: // Produce code for this constructor/destructor. This method doesn't try // to apply any ABI rules about which other constructors/destructors // are needed or if they are alias to each other. - llvm::Function *codegenCXXStructor(const CXXMethodDecl *MD, - StructorType Type); + llvm::Function *codegenCXXStructor(GlobalDecl GD); /// Return the address of the constructor/destructor of the given type. llvm::Constant * - getAddrOfCXXStructor(const CXXMethodDecl *MD, StructorType Type, - const CGFunctionInfo *FnInfo = nullptr, + getAddrOfCXXStructor(GlobalDecl GD, const CGFunctionInfo *FnInfo = nullptr, llvm::FunctionType *FnType = nullptr, bool DontDefer = false, - ForDefinition_t IsForDefinition = NotForDefinition); + ForDefinition_t IsForDefinition = NotForDefinition) { + return cast<llvm::Constant>(getAddrAndTypeOfCXXStructor(GD, FnInfo, FnType, + DontDefer, + IsForDefinition) + .getCallee()); + } + + llvm::FunctionCallee getAddrAndTypeOfCXXStructor( + GlobalDecl GD, const CGFunctionInfo *FnInfo = nullptr, + llvm::FunctionType *FnType = nullptr, bool DontDefer = false, + ForDefinition_t IsForDefinition = NotForDefinition); /// Given a builtin id for a function like "__builtin_fabsf", return a /// Function* for "fabsf". @@ -999,20 +1011,18 @@ public: void addCompilerUsedGlobal(llvm::GlobalValue *GV); /// Add a destructor and object to add to the C++ global destructor function. - void AddCXXDtorEntry(llvm::Constant *DtorFn, llvm::Constant *Object) { - CXXGlobalDtors.emplace_back(DtorFn, Object); + void AddCXXDtorEntry(llvm::FunctionCallee DtorFn, llvm::Constant *Object) { + CXXGlobalDtors.emplace_back(DtorFn.getFunctionType(), DtorFn.getCallee(), + Object); } - /// Create a new runtime function with the specified type and name. - llvm::Constant * + /// Create or return a runtime function declaration with the specified type + /// and name. + llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs = llvm::AttributeList(), bool Local = false); - /// Create a new compiler builtin function with the specified type and name. - llvm::Constant * - CreateBuiltinFunction(llvm::FunctionType *Ty, StringRef Name, - llvm::AttributeList ExtraAttrs = llvm::AttributeList()); /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, StringRef Name); @@ -1022,13 +1032,13 @@ public: llvm::Constant *getNSConcreteGlobalBlock(); llvm::Constant *getNSConcreteStackBlock(); - llvm::Constant *getBlockObjectAssign(); - llvm::Constant *getBlockObjectDispose(); + llvm::FunctionCallee getBlockObjectAssign(); + llvm::FunctionCallee getBlockObjectDispose(); ///@} - llvm::Constant *getLLVMLifetimeStartFn(); - llvm::Constant *getLLVMLifetimeEndFn(); + llvm::Function *getLLVMLifetimeStartFn(); + llvm::Function *getLLVMLifetimeEndFn(); // Make sure that this type is translated. void UpdateCompletedType(const TagDecl *TD); @@ -1244,6 +1254,10 @@ public: void EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, CodeGenFunction *CGF = nullptr); + /// Emit a code for declare mapper construct. + void EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF = nullptr); + /// Emit a code for requires directive. /// \param D Requires declaration void EmitOMPRequiresDecl(const OMPRequiresDecl *D); @@ -1294,7 +1308,7 @@ public: getMostBaseClasses(const CXXRecordDecl *RD); /// Get the declaration of std::terminate for the platform. - llvm::Constant *getTerminateFn(); + llvm::FunctionCallee getTerminateFn(); llvm::SanitizerStatReport &getSanStats(); diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp index 776060743a..d10a321dc3 100644 --- a/lib/CodeGen/CodeGenPGO.cpp +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -1,9 +1,8 @@ //===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -772,14 +771,14 @@ void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { // If so, instrument only base variant, others are implemented by delegation // to the base one, it would be counted twice otherwise. if (CGM.getTarget().getCXXABI().hasConstructorVariants()) { - if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base) - return; - if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D)) if (GD.getCtorType() != Ctor_Base && CodeGenFunction::IsConstructorDelegationValid(CCD)) return; } + if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base) + return; + CGM.ClearUnusedCoverageMapping(D); setFuncName(Fn); diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h index 120ab651a4..2e740f7892 100644 --- a/lib/CodeGen/CodeGenPGO.h +++ b/lib/CodeGen/CodeGenPGO.h @@ -1,9 +1,8 @@ //===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp index 27d39716d2..a41cfcee4c 100644 --- a/lib/CodeGen/CodeGenTBAA.cpp +++ b/lib/CodeGen/CodeGenTBAA.cpp @@ -1,9 +1,8 @@ -//===--- CodeGenTypes.cpp - TBAA information for LLVM CodeGen -------------===// +//===-- CodeGenTBAA.cpp - TBAA information for LLVM CodeGen ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CodeGenTBAA.h b/lib/CodeGen/CodeGenTBAA.h index 86ba407c05..e8e006f416 100644 --- a/lib/CodeGen/CodeGenTBAA.h +++ b/lib/CodeGen/CodeGenTBAA.h @@ -1,9 +1,8 @@ //===--- CodeGenTBAA.h - TBAA information for LLVM CodeGen ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CodeGenTypeCache.h b/lib/CodeGen/CodeGenTypeCache.h index 901aed6c00..ed4b773afd 100644 --- a/lib/CodeGen/CodeGenTypeCache.h +++ b/lib/CodeGen/CodeGenTypeCache.h @@ -1,9 +1,8 @@ //===--- CodeGenTypeCache.h - Commonly used LLVM types and info -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp index 2acf1ac161..79b29b3d91 100644 --- a/lib/CodeGen/CodeGenTypes.cpp +++ b/lib/CodeGen/CodeGenTypes.cpp @@ -1,9 +1,8 @@ //===--- CodeGenTypes.cpp - Type translation for LLVM CodeGen -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -309,8 +308,7 @@ static llvm::Type *getTypeForFormat(llvm::LLVMContext &VMContext, llvm_unreachable("Unknown float format!"); } -llvm::Type *CodeGenTypes::ConvertFunctionType(QualType QFT, - const FunctionDecl *FD) { +llvm::Type *CodeGenTypes::ConvertFunctionTypeInternal(QualType QFT) { assert(QFT.isCanonical()); const Type *Ty = QFT.getTypePtr(); const FunctionType *FT = cast<FunctionType>(QFT.getTypePtr()); @@ -348,7 +346,7 @@ llvm::Type *CodeGenTypes::ConvertFunctionType(QualType QFT, const CGFunctionInfo *FI; if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT)) { FI = &arrangeFreeFunctionType( - CanQual<FunctionProtoType>::CreateUnsafe(QualType(FPT, 0)), FD); + CanQual<FunctionProtoType>::CreateUnsafe(QualType(FPT, 0))); } else { const FunctionNoProtoType *FNPT = cast<FunctionNoProtoType>(FT); FI = &arrangeFreeFunctionType( @@ -597,7 +595,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { } case Type::FunctionNoProto: case Type::FunctionProto: - ResultType = ConvertFunctionType(T); + ResultType = ConvertFunctionTypeInternal(T); break; case Type::ObjCObject: ResultType = ConvertType(cast<ObjCObjectType>(Ty)->getBaseType()); @@ -637,7 +635,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case Type::BlockPointer: { const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType(); - llvm::Type *PointeeType = ConvertTypeForMem(FTy); + llvm::Type *PointeeType = CGM.getLangOpts().OpenCL + ? CGM.getGenericBlockLiteralType() + : ConvertTypeForMem(FTy); unsigned AS = Context.getTargetAddressSpace(FTy); ResultType = llvm::PointerType::get(PointeeType, AS); break; diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h index 8e344e91b8..0310232950 100644 --- a/lib/CodeGen/CodeGenTypes.h +++ b/lib/CodeGen/CodeGenTypes.h @@ -1,9 +1,8 @@ //===--- CodeGenTypes.h - Type translation for LLVM CodeGen -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -55,65 +54,6 @@ class CGRecordLayout; class CodeGenModule; class RequiredArgs; -enum class StructorType { - Complete, // constructor or destructor - Base, // constructor or destructor - Deleting // destructor only -}; - -inline CXXCtorType toCXXCtorType(StructorType T) { - switch (T) { - case StructorType::Complete: - return Ctor_Complete; - case StructorType::Base: - return Ctor_Base; - case StructorType::Deleting: - llvm_unreachable("cannot have a deleting ctor"); - } - llvm_unreachable("not a StructorType"); -} - -inline StructorType getFromCtorType(CXXCtorType T) { - switch (T) { - case Ctor_Complete: - return StructorType::Complete; - case Ctor_Base: - return StructorType::Base; - case Ctor_Comdat: - llvm_unreachable("not expecting a COMDAT"); - case Ctor_CopyingClosure: - case Ctor_DefaultClosure: - llvm_unreachable("not expecting a closure"); - } - llvm_unreachable("not a CXXCtorType"); -} - -inline CXXDtorType toCXXDtorType(StructorType T) { - switch (T) { - case StructorType::Complete: - return Dtor_Complete; - case StructorType::Base: - return Dtor_Base; - case StructorType::Deleting: - return Dtor_Deleting; - } - llvm_unreachable("not a StructorType"); -} - -inline StructorType getFromDtorType(CXXDtorType T) { - switch (T) { - case Dtor_Deleting: - return StructorType::Deleting; - case Dtor_Complete: - return StructorType::Complete; - case Dtor_Base: - return StructorType::Base; - case Dtor_Comdat: - llvm_unreachable("not expecting a COMDAT"); - } - llvm_unreachable("not a CXXDtorType"); -} - /// This class organizes the cross-module state that is used while lowering /// AST types to LLVM types. class CodeGenTypes { @@ -163,6 +103,9 @@ class CodeGenTypes { llvm::SmallSet<const Type *, 8> RecordsWithOpaqueMemberPointers; + /// Helper for ConvertType. + llvm::Type *ConvertFunctionTypeInternal(QualType FT); + public: CodeGenTypes(CodeGenModule &cgm); ~CodeGenTypes(); @@ -180,17 +123,13 @@ public: /// Convert clang calling convention to LLVM callilng convention. unsigned ClangCallConvToLLVMCallConv(CallingConv CC); + /// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR + /// qualification. + CanQualType DeriveThisType(const CXXRecordDecl *RD, const CXXMethodDecl *MD); + /// ConvertType - Convert type T into a llvm::Type. llvm::Type *ConvertType(QualType T); - /// Converts the GlobalDecl into an llvm::Type. This should be used - /// when we know the target of the function we want to convert. This is - /// because some functions (explicitly, those with pass_object_size - /// parameters) may not have the same signature as their type portrays, and - /// can only be called directly. - llvm::Type *ConvertFunctionType(QualType FT, - const FunctionDecl *FD = nullptr); - /// ConvertTypeForMem - Convert type T into a llvm::Type. This differs from /// ConvertType in that it is used to convert to the memory representation for /// a type. For example, the scalar representation for _Bool is i1, but the @@ -263,8 +202,7 @@ public: const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args, const FunctionType *Ty, bool ChainCall); - const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty, - const FunctionDecl *FD); + const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty); const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty); /// A nullary function is a freestanding function of type 'void ()'. @@ -299,8 +237,7 @@ public: /// C++ methods have some special rules and also have implicit parameters. const CGFunctionInfo &arrangeCXXMethodDeclaration(const CXXMethodDecl *MD); - const CGFunctionInfo &arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, - StructorType Type); + const CGFunctionInfo &arrangeCXXStructorDeclaration(GlobalDecl GD); const CGFunctionInfo &arrangeCXXConstructorCall(const CallArgList &Args, const CXXConstructorDecl *D, CXXCtorType CtorKind, diff --git a/lib/CodeGen/ConstantEmitter.h b/lib/CodeGen/ConstantEmitter.h index 7ad8e5d37c..59a19730f4 100644 --- a/lib/CodeGen/ConstantEmitter.h +++ b/lib/CodeGen/ConstantEmitter.h @@ -1,9 +1,8 @@ //===--- ConstantEmitter.h - IR constant emission ---------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/ConstantInitBuilder.cpp b/lib/CodeGen/ConstantInitBuilder.cpp index 59e66b88fb..40b1607b56 100644 --- a/lib/CodeGen/ConstantInitBuilder.cpp +++ b/lib/CodeGen/ConstantInitBuilder.cpp @@ -1,9 +1,8 @@ //===--- ConstantInitBuilder.cpp - Global initializer builder -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp index 35962c73d9..ad014b5a17 100644 --- a/lib/CodeGen/CoverageMappingGen.cpp +++ b/lib/CodeGen/CoverageMappingGen.cpp @@ -1,9 +1,8 @@ //===--- CoverageMappingGen.cpp - Coverage mapping generation ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -244,7 +243,7 @@ public: ++Depth; FileLocs.push_back(std::make_pair(Loc, Depth)); } - std::stable_sort(FileLocs.begin(), FileLocs.end(), llvm::less_second()); + llvm::stable_sort(FileLocs, llvm::less_second()); for (const auto &FL : FileLocs) { SourceLocation Loc = FL.first; diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h index c62db09695..3bf51f5904 100644 --- a/lib/CodeGen/CoverageMappingGen.h +++ b/lib/CodeGen/CoverageMappingGen.h @@ -1,9 +1,8 @@ //===---- CoverageMappingGen.h - Coverage mapping generation ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h index c7bdeac58a..3b0db35d98 100644 --- a/lib/CodeGen/EHScopeStack.h +++ b/lib/CodeGen/EHScopeStack.h @@ -1,9 +1,8 @@ //===-- EHScopeStack.h - Stack for cleanup IR generation --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index b53304528c..d46130432b 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -1,9 +1,8 @@ //===------- ItaniumCXXABI.cpp - Emit LLVM Code from ASTs for a Module ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -29,7 +28,6 @@ #include "clang/AST/Mangle.h" #include "clang/AST/Type.h" #include "clang/AST/StmtCXX.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -64,13 +62,9 @@ public: bool classifyReturnType(CGFunctionInfo &FI) const override; - bool passClassIndirect(const CXXRecordDecl *RD) const { - return !canCopyArgument(RD); - } - RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override { // If C++ prohibits us from making a copy, pass by address. - if (passClassIndirect(RD)) + if (!RD->canPassInRegisters()) return RAA_Indirect; return RAA_Default; } @@ -218,7 +212,7 @@ public: void EmitCXXConstructors(const CXXConstructorDecl *D) override; AddedStructorArgs - buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) override; bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor, @@ -330,7 +324,8 @@ public: llvm::GlobalVariable *DeclPtr, bool PerformInit) override; void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, - llvm::Constant *dtor, llvm::Constant *addr) override; + llvm::FunctionCallee dtor, + llvm::Constant *addr) override; llvm::Function *getOrCreateThreadLocalWrapper(const VarDecl *VD, llvm::Value *Val); @@ -377,7 +372,7 @@ public: llvm::GlobalValue::LinkageTypes Linkage) const; friend class ItaniumRTTIBuilder; - void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override; + void emitCXXStructor(GlobalDecl GD) override; std::pair<llvm::Value *, const CXXRecordDecl *> LoadVTablePtr(CodeGenFunction &CGF, Address This, @@ -1094,7 +1089,7 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { return false; // If C++ prohibits us from making a copy, return by address. - if (passClassIndirect(RD)) { + if (!RD->canPassInRegisters()) { auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); return true; @@ -1158,7 +1153,7 @@ void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) { llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false); - llvm::Constant *Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow"); + llvm::FunctionCallee Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow"); if (isNoReturn) CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, None); @@ -1166,7 +1161,7 @@ void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) { CGF.EmitRuntimeCallOrInvoke(Fn); } -static llvm::Constant *getAllocateExceptionFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getAllocateExceptionFn(CodeGenModule &CGM) { // void *__cxa_allocate_exception(size_t thrown_size); llvm::FunctionType *FTy = @@ -1175,7 +1170,7 @@ static llvm::Constant *getAllocateExceptionFn(CodeGenModule &CGM) { return CGM.CreateRuntimeFunction(FTy, "__cxa_allocate_exception"); } -static llvm::Constant *getThrowFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getThrowFn(CodeGenModule &CGM) { // void __cxa_throw(void *thrown_exception, std::type_info *tinfo, // void (*dest) (void *)); @@ -1192,7 +1187,7 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { llvm::Type *SizeTy = CGF.ConvertType(getContext().getSizeType()); uint64_t TypeSize = getContext().getTypeSizeInChars(ThrowType).getQuantity(); - llvm::Constant *AllocExceptionFn = getAllocateExceptionFn(CGM); + llvm::FunctionCallee AllocExceptionFn = getAllocateExceptionFn(CGM); llvm::CallInst *ExceptionPtr = CGF.EmitNounwindRuntimeCall( AllocExceptionFn, llvm::ConstantInt::get(SizeTy, TypeSize), "exception"); @@ -1210,7 +1205,7 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordTy->getDecl()); if (!Record->hasTrivialDestructor()) { CXXDestructorDecl *DtorD = Record->getDestructor(); - Dtor = CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete); + Dtor = CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete)); Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy); } } @@ -1220,7 +1215,7 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(CGM), args); } -static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) { +static llvm::FunctionCallee getItaniumDynamicCastFn(CodeGenFunction &CGF) { // void *__dynamic_cast(const void *sub, // const abi::__class_type_info *src, // const abi::__class_type_info *dst, @@ -1243,7 +1238,7 @@ static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) { return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs); } -static llvm::Constant *getBadCastFn(CodeGenFunction &CGF) { +static llvm::FunctionCallee getBadCastFn(CodeGenFunction &CGF) { // void __cxa_bad_cast(); llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false); return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_cast"); @@ -1301,7 +1296,7 @@ static CharUnits computeOffsetHint(ASTContext &Context, return Offset; } -static llvm::Constant *getBadTypeidFn(CodeGenFunction &CGF) { +static llvm::FunctionCallee getBadTypeidFn(CodeGenFunction &CGF) { // void __cxa_bad_typeid(); llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false); @@ -1314,8 +1309,9 @@ bool ItaniumCXXABI::shouldTypeidBeNullChecked(bool IsDeref, } void ItaniumCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) { - llvm::Value *Fn = getBadTypeidFn(CGF); - CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn(); + llvm::FunctionCallee Fn = getBadTypeidFn(CGF); + llvm::CallBase *Call = CGF.EmitRuntimeCallOrInvoke(Fn); + Call->setDoesNotReturn(); CGF.Builder.CreateUnreachable(); } @@ -1411,8 +1407,9 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, } bool ItaniumCXXABI::EmitBadCastCall(CodeGenFunction &CGF) { - llvm::Value *Fn = getBadCastFn(CGF); - CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn(); + llvm::FunctionCallee Fn = getBadCastFn(CGF); + llvm::CallBase *Call = CGF.EmitRuntimeCallOrInvoke(Fn); + Call->setDoesNotReturn(); CGF.Builder.CreateUnreachable(); return true; } @@ -1457,7 +1454,7 @@ void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) { } CGCXXABI::AddedStructorArgs -ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, +ItaniumCXXABI::buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) { ASTContext &Context = getContext(); @@ -1465,7 +1462,9 @@ ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, // These are Clang types, so we don't need to worry about sret yet. // Check if we need to add a VTT parameter (which has type void **). - if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) { + if ((isa<CXXConstructorDecl>(GD.getDecl()) ? GD.getCtorType() == Ctor_Base + : GD.getDtorType() == Dtor_Base) && + cast<CXXMethodDecl>(GD.getDecl())->getParent()->getNumVBases() != 0) { ArgTys.insert(ArgTys.begin() + 1, Context.getPointerType(Context.VoidPtrTy)); return AddedStructorArgs::prefix(1); @@ -1563,12 +1562,9 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF, Type != Dtor_Base && DD->isVirtual()) Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent()); else - Callee = CGCallee::forDirect( - CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), GD); + Callee = CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD), GD); - CGF.EmitCXXMemberOrOperatorCall(DD, Callee, ReturnValueSlot(), - This.getPointer(), VTT, VTTTy, - nullptr, nullptr); + CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), VTT, VTTTy, nullptr); } void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, @@ -1760,15 +1756,14 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall( assert(CE == nullptr || CE->arg_begin() == CE->arg_end()); assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete); - const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( - Dtor, getFromDtorType(DtorType)); + GlobalDecl GD(Dtor, DtorType); + const CGFunctionInfo *FInfo = + &CGM.getTypes().arrangeCXXStructorDeclaration(GD); llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo); - CGCallee Callee = - CGCallee::forVirtual(CE, GlobalDecl(Dtor, DtorType), This, Ty); + CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty); - CGF.EmitCXXMemberOrOperatorCall(Dtor, Callee, ReturnValueSlot(), - This.getPointer(), /*ImplicitParam=*/nullptr, - QualType(), CE, nullptr); + CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), nullptr, QualType(), + nullptr); return nullptr; } @@ -1958,7 +1953,7 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, NumElementsPtr.getType(), false); - llvm::Constant *F = + llvm::FunctionCallee F = CGM.CreateRuntimeFunction(FTy, "__asan_poison_cxx_array_cookie"); CGF.Builder.CreateCall(F, NumElementsPtr.getPointer()); } @@ -1989,7 +1984,7 @@ llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, // the metadata may be lost. llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.SizeTy, CGF.SizeTy->getPointerTo(0), false); - llvm::Constant *F = + llvm::FunctionCallee F = CGM.CreateRuntimeFunction(FTy, "__asan_load_cxx_array_cookie"); return CGF.Builder.CreateCall(F, numElementsPtr.getPointer()); } @@ -2024,7 +2019,7 @@ Address ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, CGF.Builder.CreateStore(elementSize, cookie); // The second element is the element count. - cookie = CGF.Builder.CreateConstInBoundsGEP(cookie, 1, CGF.getSizeSize()); + cookie = CGF.Builder.CreateConstInBoundsGEP(cookie, 1); CGF.Builder.CreateStore(numElements, cookie); // Finally, compute a pointer to the actual data buffer by skipping @@ -2047,8 +2042,8 @@ llvm::Value *ARMCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, /*********************** Static local initialization **************************/ -static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM, - llvm::PointerType *GuardPtrTy) { +static llvm::FunctionCallee getGuardAcquireFn(CodeGenModule &CGM, + llvm::PointerType *GuardPtrTy) { // int __cxa_guard_acquire(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.getTypes().ConvertType(CGM.getContext().IntTy), @@ -2060,8 +2055,8 @@ static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM, llvm::Attribute::NoUnwind)); } -static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM, - llvm::PointerType *GuardPtrTy) { +static llvm::FunctionCallee getGuardReleaseFn(CodeGenModule &CGM, + llvm::PointerType *GuardPtrTy) { // void __cxa_guard_release(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false); @@ -2072,8 +2067,8 @@ static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM, llvm::Attribute::NoUnwind)); } -static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM, - llvm::PointerType *GuardPtrTy) { +static llvm::FunctionCallee getGuardAbortFn(CodeGenModule &CGM, + llvm::PointerType *GuardPtrTy) { // void __cxa_guard_abort(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false); @@ -2286,9 +2281,8 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, /// Register a global destructor using __cxa_atexit. static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, - llvm::Constant *dtor, - llvm::Constant *addr, - bool TLS) { + llvm::FunctionCallee dtor, + llvm::Constant *addr, bool TLS) { const char *Name = "__cxa_atexit"; if (TLS) { const llvm::Triple &T = CGF.getTarget().getTriple(); @@ -2307,8 +2301,8 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, llvm::FunctionType::get(CGF.IntTy, paramTys, false); // Fetch the actual function. - llvm::Constant *atexit = CGF.CGM.CreateRuntimeFunction(atexitTy, Name); - if (llvm::Function *fn = dyn_cast<llvm::Function>(atexit)) + llvm::FunctionCallee atexit = CGF.CGM.CreateRuntimeFunction(atexitTy, Name); + if (llvm::Function *fn = dyn_cast<llvm::Function>(atexit.getCallee())) fn->setDoesNotThrow(); // Create a variable that binds the atexit to this shared object. @@ -2324,11 +2318,10 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, // function. addr = llvm::Constant::getNullValue(CGF.Int8PtrTy); - llvm::Value *args[] = { - llvm::ConstantExpr::getBitCast(dtor, dtorTy), - llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy), - handle - }; + llvm::Value *args[] = {llvm::ConstantExpr::getBitCast( + cast<llvm::Constant>(dtor.getCallee()), dtorTy), + llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy), + handle}; CGF.EmitNounwindRuntimeCall(atexit, args); } @@ -2377,9 +2370,8 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() { } /// Register a global destructor as best as we know how. -void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, - const VarDecl &D, - llvm::Constant *dtor, +void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, + llvm::FunctionCallee dtor, llvm::Constant *addr) { if (D.isNoDestroy(CGM.getContext())) return; @@ -2463,10 +2455,12 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD, CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper); // Always resolve references to the wrapper at link time. - if (!Wrapper->hasLocalLinkage() && !(isThreadWrapperReplaceable(VD, CGM) && - !llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) && - !llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage()))) - Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility); + if (!Wrapper->hasLocalLinkage()) + if (!isThreadWrapperReplaceable(VD, CGM) || + llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) || + llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage()) || + VD->getVisibility() == HiddenVisibility) + Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility); if (isThreadWrapperReplaceable(VD, CGM)) { Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); @@ -2541,6 +2535,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( getMangleContext().mangleItaniumThreadLocalInit(VD, Out); } + llvm::FunctionType *InitFnTy = llvm::FunctionType::get(CGM.VoidTy, false); + // If we have a definition for the variable, emit the initialization // function as an alias to the global Init function (if any). Otherwise, // produce a declaration of the initialization function. @@ -2559,8 +2555,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( // This function will not exist if the TU defining the thread_local // variable in question does not need any dynamic initialization for // its thread_local variables. - llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, false); - Init = llvm::Function::Create(FnTy, + Init = llvm::Function::Create(InitFnTy, llvm::GlobalVariable::ExternalWeakLinkage, InitFnName.str(), &CGM.getModule()); const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); @@ -2578,7 +2573,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( CGBuilderTy Builder(CGM, Entry); if (InitIsInitFunc) { if (Init) { - llvm::CallInst *CallVal = Builder.CreateCall(Init); + llvm::CallInst *CallVal = Builder.CreateCall(InitFnTy, Init); if (isThreadWrapperReplaceable(VD, CGM)) { CallVal->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); llvm::Function *Fn = @@ -2594,7 +2589,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( Builder.CreateCondBr(Have, InitBB, ExitBB); Builder.SetInsertPoint(InitBB); - Builder.CreateCall(Init); + Builder.CreateCall(InitFnTy, Init); Builder.CreateBr(ExitBB); Builder.SetInsertPoint(ExitBB); @@ -2960,7 +2955,7 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM, bool IsDLLImport = RD->hasAttr<DLLImportAttr>(); // Don't import the RTTI but emit it locally. - if (CGM.getTriple().isWindowsGNUEnvironment() && IsDLLImport) + if (CGM.getTriple().isWindowsGNUEnvironment()) return false; if (CGM.getVTables().isVTableExternal(RD)) @@ -3846,31 +3841,28 @@ static void emitConstructorDestructorAlias(CodeGenModule &CGM, CGM.SetCommonAttributes(AliasDecl, Alias); } -void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, - StructorType Type) { +void ItaniumCXXABI::emitCXXStructor(GlobalDecl GD) { + auto *MD = cast<CXXMethodDecl>(GD.getDecl()); auto *CD = dyn_cast<CXXConstructorDecl>(MD); const CXXDestructorDecl *DD = CD ? nullptr : cast<CXXDestructorDecl>(MD); StructorCodegen CGType = getCodegenToUse(CGM, MD); - if (Type == StructorType::Complete) { - GlobalDecl CompleteDecl; + if (CD ? GD.getCtorType() == Ctor_Complete + : GD.getDtorType() == Dtor_Complete) { GlobalDecl BaseDecl; - if (CD) { - CompleteDecl = GlobalDecl(CD, Ctor_Complete); - BaseDecl = GlobalDecl(CD, Ctor_Base); - } else { - CompleteDecl = GlobalDecl(DD, Dtor_Complete); - BaseDecl = GlobalDecl(DD, Dtor_Base); - } + if (CD) + BaseDecl = GD.getWithCtorType(Ctor_Base); + else + BaseDecl = GD.getWithDtorType(Dtor_Base); if (CGType == StructorCodegen::Alias || CGType == StructorCodegen::COMDAT) { - emitConstructorDestructorAlias(CGM, CompleteDecl, BaseDecl); + emitConstructorDestructorAlias(CGM, GD, BaseDecl); return; } if (CGType == StructorCodegen::RAUW) { - StringRef MangledName = CGM.getMangledName(CompleteDecl); + StringRef MangledName = CGM.getMangledName(GD); auto *Aliasee = CGM.GetAddrOfGlobal(BaseDecl); CGM.addReplacement(MangledName, Aliasee); return; @@ -3881,7 +3873,8 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, // base class if there is exactly one non-virtual base class with a // non-trivial destructor, there are no fields with a non-trivial // destructor, and the body of the destructor is trivial. - if (DD && Type == StructorType::Base && CGType != StructorCodegen::COMDAT && + if (DD && GD.getDtorType() == Dtor_Base && + CGType != StructorCodegen::COMDAT && !CGM.TryEmitBaseDestructorAsAlias(DD)) return; @@ -3897,7 +3890,7 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, // In such cases we should try to emit the deleting dtor as an alias to the // selected 'operator delete'. - llvm::Function *Fn = CGM.codegenCXXStructor(MD, Type); + llvm::Function *Fn = CGM.codegenCXXStructor(GD); if (CGType == StructorCodegen::COMDAT) { SmallString<256> Buffer; @@ -3913,7 +3906,7 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, } } -static llvm::Constant *getBeginCatchFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getBeginCatchFn(CodeGenModule &CGM) { // void *__cxa_begin_catch(void*); llvm::FunctionType *FTy = llvm::FunctionType::get( CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); @@ -3921,7 +3914,7 @@ static llvm::Constant *getBeginCatchFn(CodeGenModule &CGM) { return CGM.CreateRuntimeFunction(FTy, "__cxa_begin_catch"); } -static llvm::Constant *getEndCatchFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getEndCatchFn(CodeGenModule &CGM) { // void __cxa_end_catch(); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false); @@ -3929,7 +3922,7 @@ static llvm::Constant *getEndCatchFn(CodeGenModule &CGM) { return CGM.CreateRuntimeFunction(FTy, "__cxa_end_catch"); } -static llvm::Constant *getGetExceptionPtrFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getGetExceptionPtrFn(CodeGenModule &CGM) { // void *__cxa_get_exception_ptr(void*); llvm::FunctionType *FTy = llvm::FunctionType::get( CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); @@ -4204,14 +4197,14 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF, /// Get or define the following function: /// void @__clang_call_terminate(i8* %exn) nounwind noreturn /// This code is used only in C++. -static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getClangCallTerminateFn(CodeGenModule &CGM) { llvm::FunctionType *fnTy = llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); - llvm::Constant *fnRef = CGM.CreateRuntimeFunction( - fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true); - - llvm::Function *fn = dyn_cast<llvm::Function>(fnRef); - if (fn && fn->empty()) { + llvm::FunctionCallee fnRef = CGM.CreateRuntimeFunction( + fnTy, "__clang_call_terminate", llvm::AttributeList(), /*IsLocal=*/true); + llvm::Function *fn = + cast<llvm::Function>(fnRef.getCallee()->stripPointerCasts()); + if (fn->empty()) { fn->setDoesNotThrow(); fn->setDoesNotReturn(); @@ -4229,7 +4222,7 @@ static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) { // Set up the function. llvm::BasicBlock *entry = - llvm::BasicBlock::Create(CGM.getLLVMContext(), "", fn); + llvm::BasicBlock::Create(CGM.getLLVMContext(), "", fn); CGBuilderTy builder(CGM, entry); // Pull the exception pointer out of the parameter list. @@ -4249,7 +4242,6 @@ static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) { // std::terminate cannot return. builder.CreateUnreachable(); } - return fnRef; } diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp index 013ca15e23..92800e738b 100644 --- a/lib/CodeGen/MacroPPCallbacks.cpp +++ b/lib/CodeGen/MacroPPCallbacks.cpp @@ -1,9 +1,8 @@ //===--- MacroPPCallbacks.cpp ---------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MacroPPCallbacks.h b/lib/CodeGen/MacroPPCallbacks.h index b87a4005d4..32906a0002 100644 --- a/lib/CodeGen/MacroPPCallbacks.h +++ b/lib/CodeGen/MacroPPCallbacks.h @@ -1,9 +1,8 @@ //===--- MacroPPCallbacks.h -------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index 5545bc6647..c37bfe3a59 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1,9 +1,8 @@ //===--- MicrosoftCXXABI.cpp - Emit LLVM Code from ASTs for a Module ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -27,7 +26,6 @@ #include "clang/AST/VTableBuilder.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Intrinsics.h" using namespace clang; @@ -207,7 +205,7 @@ public: // delegate to or alias the base destructor. AddedStructorArgs - buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) override; /// Non-base dtors should be emitted as delegating thunks in this ABI. @@ -396,7 +394,8 @@ public: llvm::GlobalVariable *DeclPtr, bool PerformInit) override; void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, - llvm::Constant *Dtor, llvm::Constant *Addr) override; + llvm::FunctionCallee Dtor, + llvm::Constant *Addr) override; // ==== Notes on array cookies ========= // @@ -674,7 +673,7 @@ public: llvm::Value *MemPtr, const MemberPointerType *MPT) override; - void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override; + void emitCXXStructor(GlobalDecl GD) override; llvm::StructType *getCatchableTypeType() { if (CatchableTypeType) @@ -726,18 +725,20 @@ public: return ThrowInfoType; } - llvm::Constant *getThrowFn() { + llvm::FunctionCallee getThrowFn() { // _CxxThrowException is passed an exception object and a ThrowInfo object // which describes the exception. llvm::Type *Args[] = {CGM.Int8PtrTy, getThrowInfoType()->getPointerTo()}; llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false); - auto *Fn = cast<llvm::Function>( - CGM.CreateRuntimeFunction(FTy, "_CxxThrowException")); + llvm::FunctionCallee Throw = + CGM.CreateRuntimeFunction(FTy, "_CxxThrowException"); // _CxxThrowException is stdcall on 32-bit x86 platforms. - if (CGM.getTarget().getTriple().getArch() == llvm::Triple::x86) - Fn->setCallingConv(llvm::CallingConv::X86_StdCall); - return Fn; + if (CGM.getTarget().getTriple().getArch() == llvm::Triple::x86) { + if (auto *Fn = dyn_cast<llvm::Function>(Throw.getCallee())) + Fn->setCallingConv(llvm::CallingConv::X86_StdCall); + } + return Throw; } llvm::Function *getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, @@ -810,7 +811,7 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { // Use the simple Itanium rules for now. // FIXME: This is incompatible with MSVC for arguments with a dtor and no // copy ctor. - return !canCopyArgument(RD) ? RAA_Indirect : RAA_Default; + return !RD->canPassInRegisters() ? RAA_Indirect : RAA_Default; case llvm::Triple::x86: // All record arguments are passed in memory on x86. Decide whether to @@ -819,7 +820,7 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { // If C++ prohibits us from making a copy, construct the arguments directly // into argument memory. - if (!canCopyArgument(RD)) + if (!RD->canPassInRegisters()) return RAA_DirectInMemory; // Otherwise, construct the argument into a temporary and copy the bytes @@ -828,7 +829,7 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { case llvm::Triple::x86_64: case llvm::Triple::aarch64: - return !canCopyArgument(RD) ? RAA_Indirect : RAA_Default; + return !RD->canPassInRegisters() ? RAA_Indirect : RAA_Default; } llvm_unreachable("invalid enum"); @@ -853,7 +854,7 @@ void MicrosoftCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) { llvm::Value *Args[] = { llvm::ConstantPointerNull::get(CGM.Int8PtrTy), llvm::ConstantPointerNull::get(getThrowInfoType()->getPointerTo())}; - auto *Fn = getThrowFn(); + llvm::FunctionCallee Fn = getThrowFn(); if (isNoReturn) CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, Args); else @@ -927,20 +928,20 @@ bool MicrosoftCXXABI::shouldTypeidBeNullChecked(bool IsDeref, !getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr(); } -static llvm::CallSite emitRTtypeidCall(CodeGenFunction &CGF, - llvm::Value *Argument) { +static llvm::CallBase *emitRTtypeidCall(CodeGenFunction &CGF, + llvm::Value *Argument) { llvm::Type *ArgTypes[] = {CGF.Int8PtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false); llvm::Value *Args[] = {Argument}; - llvm::Constant *Fn = CGF.CGM.CreateRuntimeFunction(FTy, "__RTtypeid"); + llvm::FunctionCallee Fn = CGF.CGM.CreateRuntimeFunction(FTy, "__RTtypeid"); return CGF.EmitRuntimeCallOrInvoke(Fn, Args); } void MicrosoftCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) { - llvm::CallSite Call = + llvm::CallBase *Call = emitRTtypeidCall(CGF, llvm::Constant::getNullValue(CGM.VoidPtrTy)); - Call.setDoesNotReturn(); + Call->setDoesNotReturn(); CGF.Builder.CreateUnreachable(); } @@ -950,7 +951,7 @@ llvm::Value *MicrosoftCXXABI::EmitTypeid(CodeGenFunction &CGF, llvm::Type *StdTypeInfoPtrTy) { std::tie(ThisPtr, std::ignore, std::ignore) = performBaseAdjustment(CGF, ThisPtr, SrcRecordTy); - auto Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()).getInstruction(); + llvm::CallBase *Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()); return CGF.Builder.CreateBitCast(Typeid, StdTypeInfoPtrTy); } @@ -985,13 +986,13 @@ llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall( // BOOL isReference) llvm::Type *ArgTypes[] = {CGF.Int8PtrTy, CGF.Int32Ty, CGF.Int8PtrTy, CGF.Int8PtrTy, CGF.Int32Ty}; - llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee Function = CGF.CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false), "__RTDynamicCast"); llvm::Value *Args[] = { ThisPtr, Offset, SrcRTTI, DestRTTI, llvm::ConstantInt::get(CGF.Int32Ty, DestTy->isReferenceType())}; - ThisPtr = CGF.EmitRuntimeCallOrInvoke(Function, Args).getInstruction(); + ThisPtr = CGF.EmitRuntimeCallOrInvoke(Function, Args); return CGF.Builder.CreateBitCast(ThisPtr, DestLTy); } @@ -1005,7 +1006,7 @@ MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value, // PVOID __RTCastToVoid( // PVOID inptr) llvm::Type *ArgTypes[] = {CGF.Int8PtrTy}; - llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee Function = CGF.CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false), "__RTCastToVoid"); llvm::Value *Args[] = {Value.getPointer()}; @@ -1050,33 +1051,55 @@ bool MicrosoftCXXABI::hasMostDerivedReturn(GlobalDecl GD) const { return isDeletingDtor(GD); } +static bool IsSizeGreaterThan128(const CXXRecordDecl *RD) { + return RD->getASTContext().getTypeSize(RD->getTypeForDecl()) > 128; +} + +static bool hasMicrosoftABIRestrictions(const CXXRecordDecl *RD) { + // For AArch64, we use the C++14 definition of an aggregate, so we also + // check for: + // No private or protected non static data members. + // No base classes + // No virtual functions + // Additionally, we need to ensure that there is a trivial copy assignment + // operator, a trivial destructor and no user-provided constructors. + if (RD->hasProtectedFields() || RD->hasPrivateFields()) + return true; + if (RD->getNumBases() > 0) + return true; + if (RD->isPolymorphic()) + return true; + if (RD->hasNonTrivialCopyAssignment()) + return true; + for (const CXXConstructorDecl *Ctor : RD->ctors()) + if (Ctor->isUserProvided()) + return true; + if (RD->hasNonTrivialDestructor()) + return true; + return false; +} + bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { const CXXRecordDecl *RD = FI.getReturnType()->getAsCXXRecordDecl(); if (!RD) return false; - CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - if (FI.isInstanceMethod()) { - // If it's an instance method, aggregates are always returned indirectly via - // the second parameter. - FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); - FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod()); + bool isAArch64 = CGM.getTarget().getTriple().isAArch64(); + bool isSimple = !isAArch64 || !hasMicrosoftABIRestrictions(RD); + bool isIndirectReturn = + isAArch64 ? (!RD->canPassInRegisters() || + IsSizeGreaterThan128(RD)) + : !RD->isPOD(); + bool isInstanceMethod = FI.isInstanceMethod(); - // aarch64-windows requires that instance methods use X1 for the return - // address. So for aarch64-windows we do not mark the - // return as SRet. - FI.getReturnInfo().setSuppressSRet(CGM.getTarget().getTriple().getArch() == - llvm::Triple::aarch64); - return true; - } else if (!RD->isPOD()) { - // If it's a free function, non-POD types are returned indirectly. + if (isIndirectReturn || !isSimple || isInstanceMethod) { + CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + FI.getReturnInfo().setSRetAfterThis(isInstanceMethod); + + FI.getReturnInfo().setInReg(isAArch64 && + !(isSimple && IsSizeGreaterThan128(RD))); - // aarch64-windows requires that non-POD, non-instance returns use X0 for - // the return address. So for aarch64-windows we do not mark the return as - // SRet. - FI.getReturnInfo().setSuppressSRet(CGM.getTarget().getTriple().getArch() == - llvm::Triple::aarch64); return true; } @@ -1233,16 +1256,17 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF, } CGCXXABI::AddedStructorArgs -MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, +MicrosoftCXXABI::buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) { AddedStructorArgs Added; // TODO: 'for base' flag - if (T == StructorType::Deleting) { + if (isa<CXXDestructorDecl>(GD.getDecl()) && + GD.getDtorType() == Dtor_Deleting) { // The scalar deleting destructor takes an implicit int parameter. ArgTys.push_back(getContext().IntTy); ++Added.Suffix; } - auto *CD = dyn_cast<CXXConstructorDecl>(MD); + auto *CD = dyn_cast<CXXConstructorDecl>(GD.getDecl()); if (!CD) return Added; @@ -1552,9 +1576,8 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, if (Type == Dtor_Complete && DD->getParent()->getNumVBases() == 0) Type = Dtor_Base; - CGCallee Callee = - CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), - GlobalDecl(DD, Type)); + GlobalDecl GD(DD, Type); + CGCallee Callee = CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD), GD); if (DD->isVirtual()) { assert(Type != CXXDtorType::Dtor_Deleting && @@ -1568,10 +1591,9 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, BaseDtorEndBB = EmitDtorCompleteObjectHandler(CGF); } - CGF.EmitCXXDestructorCall(DD, Callee, This.getPointer(), + CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), /*ImplicitParam=*/nullptr, - /*ImplicitParamTy=*/QualType(), nullptr, - getFromDtorType(Type)); + /*ImplicitParamTy=*/QualType(), nullptr); if (BaseDtorEndBB) { // Complete object handler should continue to be the remaining CGF.Builder.CreateBr(BaseDtorEndBB); @@ -1885,8 +1907,8 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( // We have only one destructor in the vftable but can get both behaviors // by passing an implicit int parameter. GlobalDecl GD(Dtor, Dtor_Deleting); - const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( - Dtor, StructorType::Deleting); + const CGFunctionInfo *FInfo = + &CGM.getTypes().arrangeCXXStructorDeclaration(GD); llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo); CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty); @@ -1896,9 +1918,8 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( DtorType == Dtor_Deleting); This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true); - RValue RV = - CGF.EmitCXXDestructorCall(Dtor, Callee, This.getPointer(), ImplicitParam, - Context.IntTy, CE, StructorType::Deleting); + RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), + ImplicitParam, Context.IntTy, CE); return RV.getScalarVal(); } @@ -1996,7 +2017,7 @@ MicrosoftCXXABI::EmitVirtualMemPtrThunk(const CXXMethodDecl *MD, llvm::Value *Callee = CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign()); - CGF.EmitMustTailThunk(MD, getThisValue(CGF), Callee); + CGF.EmitMustTailThunk(MD, getThisValue(CGF), {ThunkTy, Callee}); return ThunkFn; } @@ -2222,7 +2243,7 @@ Address MicrosoftCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, } static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD, - llvm::Constant *Dtor, + llvm::FunctionCallee Dtor, llvm::Constant *Addr) { // Create a function which calls the destructor. llvm::Constant *DtorStub = CGF.createAtExitStub(VD, Dtor, Addr); @@ -2231,16 +2252,17 @@ static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD, llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get( CGF.IntTy, DtorStub->getType(), /*IsVarArg=*/false); - llvm::Constant *TLRegDtor = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee TLRegDtor = CGF.CGM.CreateRuntimeFunction( TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /*Local=*/true); - if (llvm::Function *TLRegDtorFn = dyn_cast<llvm::Function>(TLRegDtor)) + if (llvm::Function *TLRegDtorFn = + dyn_cast<llvm::Function>(TLRegDtor.getCallee())) TLRegDtorFn->setDoesNotThrow(); CGF.EmitNounwindRuntimeCall(TLRegDtor, DtorStub); } void MicrosoftCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, - llvm::Constant *Dtor, + llvm::FunctionCallee Dtor, llvm::Constant *Addr) { if (D.isNoDestroy(CGM.getContext())) return; @@ -2325,7 +2347,7 @@ static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) { return ConstantAddress(GV, Align); } -static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getInitThreadHeaderFn(CodeGenModule &CGM) { llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()), CGM.IntTy->getPointerTo(), /*isVarArg=*/false); @@ -2337,7 +2359,7 @@ static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) { /*Local=*/true); } -static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getInitThreadFooterFn(CodeGenModule &CGM) { llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()), CGM.IntTy->getPointerTo(), /*isVarArg=*/false); @@ -2349,7 +2371,7 @@ static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) { /*Local=*/true); } -static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getInitThreadAbortFn(CodeGenModule &CGM) { llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()), CGM.IntTy->getPointerTo(), /*isVarArg=*/false); @@ -3816,44 +3838,36 @@ MicrosoftCXXABI::getMSCompleteObjectLocator(const CXXRecordDecl *RD, return MSRTTIBuilder(*this, RD).getCompleteObjectLocator(Info); } -static void emitCXXConstructor(CodeGenModule &CGM, - const CXXConstructorDecl *ctor, - StructorType ctorType) { - // There are no constructor variants, always emit the complete destructor. - llvm::Function *Fn = CGM.codegenCXXStructor(ctor, StructorType::Complete); - CGM.maybeSetTrivialComdat(*ctor, *Fn); -} +void MicrosoftCXXABI::emitCXXStructor(GlobalDecl GD) { + if (auto *ctor = dyn_cast<CXXConstructorDecl>(GD.getDecl())) { + // There are no constructor variants, always emit the complete destructor. + llvm::Function *Fn = + CGM.codegenCXXStructor(GD.getWithCtorType(Ctor_Complete)); + CGM.maybeSetTrivialComdat(*ctor, *Fn); + return; + } + + auto *dtor = cast<CXXDestructorDecl>(GD.getDecl()); -static void emitCXXDestructor(CodeGenModule &CGM, const CXXDestructorDecl *dtor, - StructorType dtorType) { // Emit the base destructor if the base and complete (vbase) destructors are // equivalent. This effectively implements -mconstructor-aliases as part of // the ABI. - if (dtorType == StructorType::Complete && + if (GD.getDtorType() == Dtor_Complete && dtor->getParent()->getNumVBases() == 0) - dtorType = StructorType::Base; + GD = GD.getWithDtorType(Dtor_Base); // The base destructor is equivalent to the base destructor of its // base class if there is exactly one non-virtual base class with a // non-trivial destructor, there are no fields with a non-trivial // destructor, and the body of the destructor is trivial. - if (dtorType == StructorType::Base && !CGM.TryEmitBaseDestructorAsAlias(dtor)) + if (GD.getDtorType() == Dtor_Base && !CGM.TryEmitBaseDestructorAsAlias(dtor)) return; - llvm::Function *Fn = CGM.codegenCXXStructor(dtor, dtorType); + llvm::Function *Fn = CGM.codegenCXXStructor(GD); if (Fn->isWeakForLinker()) Fn->setComdat(CGM.getModule().getOrInsertComdat(Fn->getName())); } -void MicrosoftCXXABI::emitCXXStructor(const CXXMethodDecl *MD, - StructorType Type) { - if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { - emitCXXConstructor(CGM, CD, Type); - return; - } - emitCXXDestructor(CGM, cast<CXXDestructorDecl>(MD), Type); -} - llvm::Function * MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, CXXCtorType CT) { @@ -3955,7 +3969,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, /*Delegating=*/false, Args); // Call the destructor with our arguments. llvm::Constant *CalleePtr = - CGM.getAddrOfCXXStructor(CD, StructorType::Complete); + CGM.getAddrOfCXXStructor(GlobalDecl(CD, Ctor_Complete)); CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CD, Ctor_Complete)); const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall( @@ -4006,7 +4020,7 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T, if (CT == Ctor_CopyingClosure) CopyCtor = getAddrOfCXXCtorClosure(CD, Ctor_CopyingClosure); else - CopyCtor = CGM.getAddrOfCXXStructor(CD, StructorType::Complete); + CopyCtor = CGM.getAddrOfCXXStructor(GlobalDecl(CD, Ctor_Complete)); CopyCtor = llvm::ConstantExpr::getBitCast(CopyCtor, CGM.Int8PtrTy); } else { @@ -4219,7 +4233,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) { if (CXXDestructorDecl *DtorD = RD->getDestructor()) if (!DtorD->isTrivial()) CleanupFn = llvm::ConstantExpr::getBitCast( - CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete), + CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete)), CGM.Int8PtrTy); // This is unused as far as we can tell, initialize it to null. llvm::Constant *ForwardCompat = diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp index c0a37698e7..3b4e06045a 100644 --- a/lib/CodeGen/ModuleBuilder.cpp +++ b/lib/CodeGen/ModuleBuilder.cpp @@ -1,9 +1,8 @@ //===--- ModuleBuilder.cpp - Emit LLVM Code from ASTs ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 6f00c836f9..db53959ea0 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -1,9 +1,8 @@ //===--- ObjectFilePCHContainerOperations.cpp -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/PatternInit.cpp b/lib/CodeGen/PatternInit.cpp new file mode 100644 index 0000000000..7a1baf96cf --- /dev/null +++ b/lib/CodeGen/PatternInit.cpp @@ -0,0 +1,93 @@ +//===--- PatternInit.cpp - Pattern Initialization -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PatternInit.h" +#include "CodeGenModule.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Type.h" + +llvm::Constant *clang::CodeGen::initializationPatternFor(CodeGenModule &CGM, + llvm::Type *Ty) { + // The following value is a guaranteed unmappable pointer value and has a + // repeated byte-pattern which makes it easier to synthesize. We use it for + // pointers as well as integers so that aggregates are likely to be + // initialized with this repeated value. + constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull; + // For 32-bit platforms it's a bit trickier because, across systems, only the + // zero page can reasonably be expected to be unmapped, and even then we need + // a very low address. We use a smaller value, and that value sadly doesn't + // have a repeated byte-pattern. We don't use it for integers. + constexpr uint32_t SmallValue = 0x000000AA; + // Floating-point values are initialized as NaNs because they propagate. Using + // a repeated byte pattern means that it will be easier to initialize + // all-floating-point aggregates and arrays with memset. Further, aggregates + // which mix integral and a few floats might also initialize with memset + // followed by a handful of stores for the floats. Using fairly unique NaNs + // also means they'll be easier to distinguish in a crash. + constexpr bool NegativeNaN = true; + constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull; + if (Ty->isIntOrIntVectorTy()) { + unsigned BitWidth = cast<llvm::IntegerType>( + Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) + ->getBitWidth(); + if (BitWidth <= 64) + return llvm::ConstantInt::get(Ty, LargeValue); + return llvm::ConstantInt::get( + Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue))); + } + if (Ty->isPtrOrPtrVectorTy()) { + auto *PtrTy = cast<llvm::PointerType>( + Ty->isVectorTy() ? Ty->getVectorElementType() : Ty); + unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( + PtrTy->getAddressSpace()); + llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth); + uint64_t IntValue; + switch (PtrWidth) { + default: + llvm_unreachable("pattern initialization of unsupported pointer width"); + case 64: + IntValue = LargeValue; + break; + case 32: + IntValue = SmallValue; + break; + } + auto *Int = llvm::ConstantInt::get(IntTy, IntValue); + return llvm::ConstantExpr::getIntToPtr(Int, PtrTy); + } + if (Ty->isFPOrFPVectorTy()) { + unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( + (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) + ->getFltSemantics()); + llvm::APInt Payload(64, NaNPayload); + if (BitWidth >= 64) + Payload = llvm::APInt::getSplat(BitWidth, Payload); + return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload); + } + if (Ty->isArrayTy()) { + // Note: this doesn't touch tail padding (at the end of an object, before + // the next array object). It is instead handled by replaceUndef. + auto *ArrTy = cast<llvm::ArrayType>(Ty); + llvm::SmallVector<llvm::Constant *, 8> Element( + ArrTy->getNumElements(), + initializationPatternFor(CGM, ArrTy->getElementType())); + return llvm::ConstantArray::get(ArrTy, Element); + } + + // Note: this doesn't touch struct padding. It will initialize as much union + // padding as is required for the largest type in the union. Padding is + // instead handled by replaceUndef. Stores to structs with volatile members + // don't have a volatile qualifier when initialized according to C++. This is + // fine because stack-based volatiles don't really have volatile semantics + // anyways, and the initialization shouldn't be observable. + auto *StructTy = cast<llvm::StructType>(Ty); + llvm::SmallVector<llvm::Constant *, 8> Struct(StructTy->getNumElements()); + for (unsigned El = 0; El != Struct.size(); ++El) + Struct[El] = initializationPatternFor(CGM, StructTy->getElementType(El)); + return llvm::ConstantStruct::get(StructTy, Struct); +} diff --git a/lib/CodeGen/PatternInit.h b/lib/CodeGen/PatternInit.h new file mode 100644 index 0000000000..f117dde9ac --- /dev/null +++ b/lib/CodeGen/PatternInit.h @@ -0,0 +1,27 @@ +//===- PatternInit - Pattern initialization ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_PATTERNINIT_H +#define LLVM_CLANG_LIB_CODEGEN_PATTERNINIT_H + +namespace llvm { +class Constant; +class Type; +} // namespace llvm + +namespace clang { +namespace CodeGen { + +class CodeGenModule; + +llvm::Constant *initializationPatternFor(CodeGenModule &, llvm::Type *); + +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/lib/CodeGen/SanitizerMetadata.cpp b/lib/CodeGen/SanitizerMetadata.cpp index 23cf9e4908..3211a3e74d 100644 --- a/lib/CodeGen/SanitizerMetadata.cpp +++ b/lib/CodeGen/SanitizerMetadata.cpp @@ -1,9 +1,8 @@ //===--- SanitizerMetadata.cpp - Blacklist for sanitizers -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/SanitizerMetadata.h b/lib/CodeGen/SanitizerMetadata.h index 166f0e6c9b..7ffac4360d 100644 --- a/lib/CodeGen/SanitizerMetadata.h +++ b/lib/CodeGen/SanitizerMetadata.h @@ -1,9 +1,8 @@ //===--- SanitizerMetadata.h - Metadata for sanitizers ----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp index 75a0fa5ce1..8bce93b71c 100644 --- a/lib/CodeGen/SwiftCallingConv.cpp +++ b/lib/CodeGen/SwiftCallingConv.cpp @@ -1,9 +1,8 @@ //===--- SwiftCallingConv.cpp - Lowering for the Swift calling convention -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 89ec73670a..432e55da41 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -1,9 +1,8 @@ //===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -310,10 +309,9 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, // Advance the pointer past the argument, then store that back. CharUnits FullDirectSize = DirectSize.alignTo(SlotSize); - llvm::Value *NextPtr = - CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), FullDirectSize, - "argp.next"); - CGF.Builder.CreateStore(NextPtr, VAListAddr); + Address NextPtr = + CGF.Builder.CreateConstInBoundsByteGEP(Addr, FullDirectSize, "argp.next"); + CGF.Builder.CreateStore(NextPtr.getPointer(), VAListAddr); // If the argument is smaller than a slot, and this is a big-endian // target, the argument will be right-adjusted in its slot. @@ -464,8 +462,11 @@ TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src, } llvm::SyncScope::ID -TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const { - return C.getOrInsertSyncScopeID(""); /* default sync scope */ +TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const { + return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */ } static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); @@ -761,6 +762,22 @@ public: void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { + TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (const auto *Attr = FD->getAttr<WebAssemblyImportModuleAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B; + B.addAttribute("wasm-import-module", Attr->getImportModule()); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B; + B.addAttribute("wasm-import-name", Attr->getImportName()); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + } + if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { llvm::Function *Fn = cast<llvm::Function>(GV); if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype()) @@ -2254,6 +2271,12 @@ public: return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo()); } + /// Disable tail call on x86-64. The epilogue code before the tail jump blocks + /// the autoreleaseRV/retainRV optimization. + bool shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() const override { + return true; + } + int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; } @@ -3627,8 +3650,8 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) { - Address overflow_arg_area_p = CGF.Builder.CreateStructGEP( - VAListAddr, 2, CharUnits::fromQuantity(8), "overflow_arg_area_p"); + Address overflow_arg_area_p = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p"); llvm::Value *overflow_arg_area = CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); @@ -3699,18 +3722,14 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; if (neededInt) { - gp_offset_p = - CGF.Builder.CreateStructGEP(VAListAddr, 0, CharUnits::Zero(), - "gp_offset_p"); + gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p"); gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); } if (neededSSE) { - fp_offset_p = - CGF.Builder.CreateStructGEP(VAListAddr, 1, CharUnits::fromQuantity(4), - "fp_offset_p"); + fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p"); fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); llvm::Value *FitsInFP = llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); @@ -3739,8 +3758,7 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // loads than necessary. Can we clean this up? llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( - CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(16)), - "reg_save_area"); + CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area"); Address RegAddr = Address::invalid(); if (neededInt && neededSSE) { @@ -3766,16 +3784,13 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Value *V = CGF.Builder.CreateAlignedLoad( TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyLo))); - CGF.Builder.CreateStore(V, - CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); // Copy the second element. V = CGF.Builder.CreateAlignedLoad( TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyHi))); - CharUnits Offset = CharUnits::fromQuantity( - getDataLayout().getStructLayout(ST)->getElementOffset(1)); - CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, Offset)); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); } else if (neededInt) { @@ -3822,12 +3837,10 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( RegAddrLo, ST->getStructElementType(0))); - CGF.Builder.CreateStore(V, - CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( RegAddrHi, ST->getStructElementType(1))); - CGF.Builder.CreateStore(V, - CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8))); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); } @@ -4169,9 +4182,9 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, // The calling convention either uses 1-2 GPRs or 1 FPR. Address NumRegsAddr = Address::invalid(); if (isInt || IsSoftFloatABI) { - NumRegsAddr = Builder.CreateStructGEP(VAList, 0, CharUnits::Zero(), "gpr"); + NumRegsAddr = Builder.CreateStructGEP(VAList, 0, "gpr"); } else { - NumRegsAddr = Builder.CreateStructGEP(VAList, 1, CharUnits::One(), "fpr"); + NumRegsAddr = Builder.CreateStructGEP(VAList, 1, "fpr"); } llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs"); @@ -4199,8 +4212,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, { CGF.EmitBlock(UsingRegs); - Address RegSaveAreaPtr = - Builder.CreateStructGEP(VAList, 4, CharUnits::fromQuantity(8)); + Address RegSaveAreaPtr = Builder.CreateStructGEP(VAList, 4); RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr), CharUnits::fromQuantity(8)); assert(RegAddr.getElementType() == CGF.Int8Ty); @@ -4248,8 +4260,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, Size = CGF.getPointerSize(); } - Address OverflowAreaAddr = - Builder.CreateStructGEP(VAList, 3, CharUnits::fromQuantity(4)); + Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3); Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), OverflowAreaAlign); // Round up address of argument to alignment @@ -5289,25 +5300,18 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, Address reg_offs_p = Address::invalid(); llvm::Value *reg_offs = nullptr; int reg_top_index; - CharUnits reg_top_offset; int RegSize = IsIndirect ? 8 : TyInfo.first.getQuantity(); if (!IsFPR) { // 3 is the field number of __gr_offs - reg_offs_p = - CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24), - "gr_offs_p"); + reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); reg_top_index = 1; // field number for __gr_top - reg_top_offset = CharUnits::fromQuantity(8); RegSize = llvm::alignTo(RegSize, 8); } else { // 4 is the field number of __vr_offs. - reg_offs_p = - CGF.Builder.CreateStructGEP(VAListAddr, 4, CharUnits::fromQuantity(28), - "vr_offs_p"); + reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); reg_top_index = 2; // field number for __vr_top - reg_top_offset = CharUnits::fromQuantity(16); RegSize = 16 * NumRegs; } @@ -5369,8 +5373,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, CGF.EmitBlock(InRegBlock); llvm::Value *reg_top = nullptr; - Address reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, - reg_top_offset, "reg_top_p"); + Address reg_top_p = + CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p"); reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top"); Address BaseAddr(CGF.Builder.CreateInBoundsGEP(reg_top, reg_offs), CharUnits::fromQuantity(IsFPR ? 16 : 8)); @@ -5410,8 +5414,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset); LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy); - Address StoreAddr = - CGF.Builder.CreateConstArrayGEP(Tmp, i, BaseTyInfo.first); + Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i); llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr); CGF.Builder.CreateStore(Elem, StoreAddr); @@ -5440,8 +5443,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, //======================================= CGF.EmitBlock(OnStackBlock); - Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, - CharUnits::Zero(), "stack_p"); + Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p"); llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack"); // Again, stack arguments may need realignment. In this case both integer and @@ -5598,8 +5600,10 @@ public: ABIKind getABIKind() const { return Kind; } private: - ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const; - ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const; + ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const; + ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const; ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, uint64_t Members) const; ABIArgInfo coerceIllegalVector(QualType Ty) const; @@ -5609,6 +5613,8 @@ private: bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; + bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const; + void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, @@ -5729,11 +5735,13 @@ void WindowsARMTargetCodeGenInfo::setTargetAttributes( void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!::classifyReturnType(getCXXABI(), FI, *this)) - FI.getReturnInfo() = - classifyReturnType(FI.getReturnType(), FI.isVariadic()); + FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(), + FI.getCallingConvention()); for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type, FI.isVariadic()); + I.info = classifyArgumentType(I.type, FI.isVariadic(), + FI.getCallingConvention()); + // Always honor user-specified calling convention. if (FI.getCallingConvention() != llvm::CallingConv::C) @@ -5812,8 +5820,8 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, - bool isVariadic) const { +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, + unsigned functionCallConv) const { // 6.1.2.1 The following argument types are VFP CPRCs: // A single-precision floating-point type (including promoted // half-precision types); A double-precision floating-point type; @@ -5821,7 +5829,9 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, // with a Base Type of a single- or double-precision floating-point type, // 64-bit containerized vectors or 128-bit containerized vectors with one // to four Elements. - bool IsEffectivelyAAPCS_VFP = getABIKind() == AAPCS_VFP && !isVariadic; + // Variadic functions should always marshal to the base standard. + bool IsAAPCS_VFP = + !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false); Ty = useFirstFieldIfTransparentUnion(Ty); @@ -5834,7 +5844,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, // half type natively, and does not need to interwork with AAPCS code. if ((Ty->isFloat16Type() || Ty->isHalfType()) && !getContext().getLangOpts().NativeHalfArgsAndReturns) { - llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? + llvm::Type *ResType = IsAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); @@ -5858,7 +5868,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); - if (IsEffectivelyAAPCS_VFP) { + if (IsAAPCS_VFP) { // Homogeneous Aggregates need to be expanded when we can fit the aggregate // into VFP registers. const Type *Base = nullptr; @@ -6015,10 +6025,12 @@ static bool isIntegerLikeType(QualType Ty, ASTContext &Context, return true; } -ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, - bool isVariadic) const { - bool IsEffectivelyAAPCS_VFP = - (getABIKind() == AAPCS_VFP || getABIKind() == AAPCS16_VFP) && !isVariadic; +ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const { + + // Variadic functions should always marshal to the base standard. + bool IsAAPCS_VFP = + !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true); if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); @@ -6039,7 +6051,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, // half type natively, and does not need to interwork with AAPCS code. if ((RetTy->isFloat16Type() || RetTy->isHalfType()) && !getContext().getLangOpts().NativeHalfArgsAndReturns) { - llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? + llvm::Type *ResType = IsAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); @@ -6088,7 +6100,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, return ABIArgInfo::getIgnore(); // Check for homogeneous aggregates with AAPCS-VFP. - if (IsEffectivelyAAPCS_VFP) { + if (IsAAPCS_VFP) { const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members)) @@ -6193,6 +6205,16 @@ bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, return Members <= 4; } +bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention, + bool acceptHalf) const { + // Give precedence to user-specified calling conventions. + if (callConvention != llvm::CallingConv::C) + return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP); + else + return (getABIKind() == AAPCS_VFP) || + (acceptHalf && (getABIKind() == AAPCS16_VFP)); +} + Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CharUnits SlotSize = CharUnits::fromQuantity(4); @@ -6275,10 +6297,56 @@ private: static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand); }; +/// Checks if the type is unsupported directly by the current target. +static bool isUnsupportedType(ASTContext &Context, QualType T) { + if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) + return true; + if (!Context.getTargetInfo().hasFloat128Type() && T->isFloat128Type()) + return true; + if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && + Context.getTypeSize(T) > 64) + return true; + if (const auto *AT = T->getAsArrayTypeUnsafe()) + return isUnsupportedType(Context, AT->getElementType()); + const auto *RT = T->getAs<RecordType>(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + for (const CXXBaseSpecifier &I : CXXRD->bases()) + if (isUnsupportedType(Context, I.getType())) + return true; + + for (const FieldDecl *I : RD->fields()) + if (isUnsupportedType(Context, I->getType())) + return true; + return false; +} + +/// Coerce the given type into an array with maximum allowed size of elements. +static ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, ASTContext &Context, + llvm::LLVMContext &LLVMContext, + unsigned MaxSize) { + // Alignment and Size are measured in bits. + const uint64_t Size = Context.getTypeSize(Ty); + const uint64_t Alignment = Context.getTypeAlign(Ty); + const unsigned Div = std::min<unsigned>(MaxSize, Alignment); + llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Div); + const uint64_t NumElements = (Size + Div - 1) / Div; + return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); +} + ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); + if (getContext().getLangOpts().OpenMP && + getContext().getLangOpts().OpenMPIsDevice && + isUnsupportedType(getContext(), RetTy)) + return coerceToIntArrayWithLimit(RetTy, getContext(), getVMContext(), 64); + // note: this is different from default ABI if (!RetTy->isScalarType()) return ABIArgInfo::getDirect(); @@ -6584,8 +6652,7 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // Vector arguments are always passed in the high bits of a // single (8 byte) or double (16 byte) stack slot. Address OverflowArgAreaPtr = - CGF.Builder.CreateStructGEP(VAListAddr, 2, CharUnits::fromQuantity(16), - "overflow_arg_area_ptr"); + CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr"); Address OverflowArgArea = Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), TyInfo.second); @@ -6617,9 +6684,8 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, RegPadding = Padding; // values are passed in the low bits of a GPR } - Address RegCountPtr = CGF.Builder.CreateStructGEP( - VAListAddr, RegCountField, RegCountField * CharUnits::fromQuantity(8), - "reg_count_ptr"); + Address RegCountPtr = + CGF.Builder.CreateStructGEP(VAListAddr, RegCountField, "reg_count_ptr"); llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count"); llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs); llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV, @@ -6642,8 +6708,7 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Value *RegOffset = CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset"); Address RegSaveAreaPtr = - CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24), - "reg_save_area_ptr"); + CGF.Builder.CreateStructGEP(VAListAddr, 3, "reg_save_area_ptr"); llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area"); Address RawRegAddr(CGF.Builder.CreateGEP(RegSaveArea, RegOffset, @@ -6663,8 +6728,8 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CGF.EmitBlock(InMemBlock); // Work out the address of a stack argument. - Address OverflowArgAreaPtr = CGF.Builder.CreateStructGEP( - VAListAddr, 2, CharUnits::fromQuantity(16), "overflow_arg_area_ptr"); + Address OverflowArgAreaPtr = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr"); Address OverflowArgArea = Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), PaddedSize); @@ -6774,21 +6839,19 @@ void MSP430TargetCodeGenInfo::setTargetAttributes( if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) { - // Handle 'interrupt' attribute: - llvm::Function *F = cast<llvm::Function>(GV); + const auto *InterruptAttr = FD->getAttr<MSP430InterruptAttr>(); + if (!InterruptAttr) + return; - // Step 1: Set ISR calling convention. - F->setCallingConv(llvm::CallingConv::MSP430_INTR); + // Handle 'interrupt' attribute: + llvm::Function *F = cast<llvm::Function>(GV); - // Step 2: Add attributes goodness. - F->addFnAttr(llvm::Attribute::NoInline); + // Step 1: Set ISR calling convention. + F->setCallingConv(llvm::CallingConv::MSP430_INTR); - // Step 3: Emit ISR vector alias. - unsigned Num = attr->getNumber() / 2; - llvm::GlobalAlias::create(llvm::Function::ExternalLinkage, - "__isr_" + Twine(Num), F); - } + // Step 2: Add attributes goodness. + F->addFnAttr(llvm::Attribute::NoInline); + F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber())); } } @@ -7764,8 +7827,10 @@ public: } LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; - llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, - llvm::LLVMContext &C) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const override; llvm::Function * createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *BlockInvokeFunc, @@ -7775,8 +7840,24 @@ public: }; } +static bool requiresAMDGPUProtectedVisibility(const Decl *D, + llvm::GlobalValue *GV) { + if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) + return false; + + return D->hasAttr<OpenCLKernelAttr>() || + (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || + (isa<VarDecl>(D) && + (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>())); +} + void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (requiresAMDGPUProtectedVisibility(D, GV)) { + GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); + GV->setDSOLocal(true); + } + if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); @@ -7794,8 +7875,16 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); if (ReqdWGS || FlatWGS) { - unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; - unsigned Max = FlatWGS ? FlatWGS->getMax() : 0; + unsigned Min = 0; + unsigned Max = 0; + if (FlatWGS) { + Min = FlatWGS->getMin() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + Max = FlatWGS->getMax() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + } if (ReqdWGS && Min == 0 && Max == 0) Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); @@ -7809,8 +7898,12 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( } if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { - unsigned Min = Attr->getMin(); - unsigned Max = Attr->getMax(); + unsigned Min = + Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue(); + unsigned Max = Attr->getMax() ? Attr->getMax() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue() + : 0; if (Min != 0) { assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); @@ -7884,10 +7977,12 @@ AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, } llvm::SyncScope::ID -AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, - llvm::LLVMContext &C) const { - StringRef Name; - switch (S) { +AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const { + std::string Name; + switch (Scope) { case SyncScope::OpenCLWorkGroup: Name = "workgroup"; break; @@ -7898,9 +7993,17 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, Name = ""; break; case SyncScope::OpenCLSubGroup: - Name = "subgroup"; + Name = "wavefront"; + } + + if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { + if (!Name.empty()) + Name = Twine(Twine(Name) + Twine("-")).str(); + + Name = Twine(Twine(Name) + Twine("one-as")).str(); } - return C.getOrInsertSyncScopeID(Name); + + return Ctx.getOrInsertSyncScopeID(Name); } bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { @@ -8198,9 +8301,8 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, } // Update VAList. - llvm::Value *NextPtr = - Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), Stride, "ap.next"); - Builder.CreateStore(NextPtr, VAListAddr); + Address NextPtr = Builder.CreateConstInBoundsByteGEP(Addr, Stride, "ap.next"); + Builder.CreateStore(NextPtr.getPointer(), VAListAddr); return Builder.CreateBitCast(ArgAddr, ArgPtrTy, "arg.addr"); } @@ -8553,9 +8655,8 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // Increment the VAList. if (!ArgSize.isZero()) { - llvm::Value *APN = - Builder.CreateConstInBoundsByteGEP(AP.getPointer(), ArgSize); - Builder.CreateStore(APN, VAListAddr); + Address APN = Builder.CreateConstInBoundsByteGEP(AP, ArgSize); + Builder.CreateStore(APN.getPointer(), VAListAddr); } return Val; diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h index b530260ea4..d7e9eee9c5 100644 --- a/lib/CodeGen/TargetInfo.h +++ b/lib/CodeGen/TargetInfo.h @@ -1,9 +1,8 @@ //===---- TargetInfo.h - Encapsulate target details -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -157,6 +156,12 @@ public: return ""; } + /// Determine whether a call to objc_retainAutoreleasedReturnValue should be + /// marked as 'notail'. + virtual bool shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() const { + return false; + } + /// Return a constant used by UBSan as a signature to identify functions /// possessing type information, or 0 if the platform is unsupported. virtual llvm::Constant * @@ -263,8 +268,10 @@ public: llvm::Type *DestTy) const; /// Get the syncscope used in LLVM IR. - virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, - llvm::LLVMContext &C) const; + virtual llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const; /// Interface class for filling custom fields of a block literal for OpenCL. class TargetOpenCLBlockHelper { diff --git a/lib/CodeGen/VarBypassDetector.cpp b/lib/CodeGen/VarBypassDetector.cpp index 859cdd4282..f3a172e91c 100644 --- a/lib/CodeGen/VarBypassDetector.cpp +++ b/lib/CodeGen/VarBypassDetector.cpp @@ -1,9 +1,8 @@ //===--- VarBypassDetector.h - Bypass jumps detector --------------*- C++ -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/VarBypassDetector.h b/lib/CodeGen/VarBypassDetector.h index 47fe13cfac..8a2e388eae 100644 --- a/lib/CodeGen/VarBypassDetector.h +++ b/lib/CodeGen/VarBypassDetector.h @@ -1,9 +1,8 @@ //===--- VarBypassDetector.cpp - Bypass jumps detector ------------*- C++ -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // |