diff options
Diffstat (limited to 'lib/CodeGen')
67 files changed, 11140 insertions, 4530 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h index c0be60ef53..575506da84 100644 --- a/lib/CodeGen/ABIInfo.h +++ b/lib/CodeGen/ABIInfo.h @@ -24,6 +24,7 @@ namespace llvm { namespace clang { class ASTContext; + class CodeGenOptions; class TargetInfo; namespace CodeGen { @@ -68,6 +69,7 @@ namespace swiftcall { llvm::LLVMContext &getVMContext() const; const llvm::DataLayout &getDataLayout() const; const TargetInfo &getTarget() const; + const CodeGenOptions &getCodeGenOpts() const; /// Return the calling convention to use for system runtime /// functions. @@ -149,7 +151,6 @@ namespace swiftcall { return info->supportsSwift(); } }; - } // end namespace CodeGen } // end namespace clang diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index f0e0b71ddf..893967a9d6 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -35,7 +35,6 @@ #include "llvm/LTO/LTOBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" @@ -50,10 +49,12 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> using namespace clang; @@ -83,9 +84,6 @@ class EmitAssemblyHelper { return TargetIRAnalysis(); } - /// Set LLVM command line options passed through -backend-option. - void setCommandLineOpts(); - void CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM); /// Generates the TargetMachine. @@ -132,16 +130,20 @@ public: // that we add to the PassManagerBuilder. class PassManagerBuilderWrapper : public PassManagerBuilder { public: - PassManagerBuilderWrapper(const CodeGenOptions &CGOpts, + PassManagerBuilderWrapper(const Triple &TargetTriple, + const CodeGenOptions &CGOpts, const LangOptions &LangOpts) - : PassManagerBuilder(), CGOpts(CGOpts), LangOpts(LangOpts) {} + : PassManagerBuilder(), TargetTriple(TargetTriple), CGOpts(CGOpts), + LangOpts(LangOpts) {} + const Triple &getTargetTriple() const { return TargetTriple; } const CodeGenOptions &getCGOpts() const { return CGOpts; } const LangOptions &getLangOpts() const { return LangOpts; } + private: + const Triple &TargetTriple; const CodeGenOptions &CGOpts; const LangOptions &LangOpts; }; - } static void addObjCARCAPElimPass(const PassManagerBuilder &Builder, PassManagerBase &PM) { @@ -185,19 +187,44 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, Opts.Use8bitCounters = CGOpts.SanitizeCoverage8bitCounters; Opts.TracePC = CGOpts.SanitizeCoverageTracePC; Opts.TracePCGuard = CGOpts.SanitizeCoverageTracePCGuard; + Opts.NoPrune = CGOpts.SanitizeCoverageNoPrune; + Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters; + Opts.PCTable = CGOpts.SanitizeCoveragePCTable; + Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth; PM.add(createSanitizerCoverageModulePass(Opts)); } +// Check if ASan should use GC-friendly instrumentation for globals. +// First of all, there is no point if -fdata-sections is off (expect for MachO, +// where this is not a factor). Also, on ELF this feature requires an assembler +// extension that only works with -integrated-as at the moment. +static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) { + if (!CGOpts.SanitizeAddressGlobalsDeadStripping) + return false; + switch (T.getObjectFormat()) { + case Triple::MachO: + case Triple::COFF: + return true; + case Triple::ELF: + return CGOpts.DataSections && !CGOpts.DisableIntegratedAS; + default: + return false; + } +} + static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { const PassManagerBuilderWrapper &BuilderWrapper = static_cast<const PassManagerBuilderWrapper&>(Builder); + const Triple &T = BuilderWrapper.getTargetTriple(); const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address); bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope; + bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts); PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover, UseAfterScope)); - PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false, Recover)); + PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover, + UseGlobalsGC)); } static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -294,6 +321,142 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts, MPM->add(createRewriteSymbolsPass(DL)); } +static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) { + switch (CodeGenOpts.OptimizationLevel) { + default: + llvm_unreachable("Invalid optimization level!"); + case 0: + return CodeGenOpt::None; + case 1: + return CodeGenOpt::Less; + case 2: + return CodeGenOpt::Default; // O2/Os/Oz + case 3: + return CodeGenOpt::Aggressive; + } +} + +static Optional<llvm::CodeModel::Model> +getCodeModel(const CodeGenOptions &CodeGenOpts) { + unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) + .Case("small", llvm::CodeModel::Small) + .Case("kernel", llvm::CodeModel::Kernel) + .Case("medium", llvm::CodeModel::Medium) + .Case("large", llvm::CodeModel::Large) + .Case("default", ~1u) + .Default(~0u); + assert(CodeModel != ~0u && "invalid code model!"); + if (CodeModel == ~1u) + return None; + return static_cast<llvm::CodeModel::Model>(CodeModel); +} + +static llvm::Reloc::Model getRelocModel(const CodeGenOptions &CodeGenOpts) { + // Keep this synced with the equivalent code in + // lib/Frontend/CompilerInvocation.cpp + llvm::Optional<llvm::Reloc::Model> RM; + RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel) + .Case("static", llvm::Reloc::Static) + .Case("pic", llvm::Reloc::PIC_) + .Case("ropi", llvm::Reloc::ROPI) + .Case("rwpi", llvm::Reloc::RWPI) + .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI) + .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC); + assert(RM.hasValue() && "invalid PIC model!"); + return *RM; +} + +static TargetMachine::CodeGenFileType getCodeGenFileType(BackendAction Action) { + if (Action == Backend_EmitObj) + return TargetMachine::CGFT_ObjectFile; + else if (Action == Backend_EmitMCNull) + return TargetMachine::CGFT_Null; + else { + assert(Action == Backend_EmitAssembly && "Invalid action!"); + return TargetMachine::CGFT_AssemblyFile; + } +} + +static void initTargetOptions(llvm::TargetOptions &Options, + const CodeGenOptions &CodeGenOpts, + const clang::TargetOptions &TargetOpts, + const LangOptions &LangOpts, + const HeaderSearchOptions &HSOpts) { + Options.ThreadModel = + llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel) + .Case("posix", llvm::ThreadModel::POSIX) + .Case("single", llvm::ThreadModel::Single); + + // Set float ABI type. + assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" || + CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) && + "Invalid Floating Point ABI!"); + Options.FloatABIType = + llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI) + .Case("soft", llvm::FloatABI::Soft) + .Case("softfp", llvm::FloatABI::Soft) + .Case("hard", llvm::FloatABI::Hard) + .Default(llvm::FloatABI::Default); + + // Set FP fusion mode. + switch (LangOpts.getDefaultFPContractMode()) { + case LangOptions::FPC_Off: + // Preserve any contraction performed by the front-end. (Strict performs + // splitting of the muladd instrinsic in the backend.) + Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; + break; + case LangOptions::FPC_On: + Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; + break; + case LangOptions::FPC_Fast: + Options.AllowFPOpFusion = llvm::FPOpFusion::Fast; + break; + } + + Options.UseInitArray = CodeGenOpts.UseInitArray; + Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS; + Options.CompressDebugSections = CodeGenOpts.getCompressDebugSections(); + Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations; + + // Set EABI version. + Options.EABIVersion = TargetOpts.EABIVersion; + + if (LangOpts.SjLjExceptions) + Options.ExceptionModel = llvm::ExceptionHandling::SjLj; + + Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; + Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; + Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; + Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath; + Options.StackAlignmentOverride = CodeGenOpts.StackAlignment; + Options.FunctionSections = CodeGenOpts.FunctionSections; + Options.DataSections = CodeGenOpts.DataSections; + Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; + Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; + Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); + + if (CodeGenOpts.EnableSplitDwarf) + Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; + Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; + Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; + Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm; + Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack; + Options.MCOptions.MCIncrementalLinkerCompatible = + CodeGenOpts.IncrementalLinkerCompatible; + Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations; + Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings; + Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; + Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments; + Options.MCOptions.ABIName = TargetOpts.ABI; + for (const auto &Entry : HSOpts.UserEntries) + if (!Entry.IsFramework && + (Entry.Group == frontend::IncludeDirGroup::Quoted || + Entry.Group == frontend::IncludeDirGroup::Angled || + Entry.Group == frontend::IncludeDirGroup::System)) + Options.MCOptions.IASSearchPaths.push_back( + Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path); +} + void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM) { // Handle disabling of all LLVM passes, where we want to preserve the @@ -301,8 +464,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, if (CodeGenOpts.DisableLLVMPasses) return; - PassManagerBuilderWrapper PMBuilder(CodeGenOpts, LangOpts); - // Figure out TargetLibraryInfo. This needs to be added to MPM and FPM // manually (and not via PMBuilder), since some passes (eg. InstrProfiling) // are inserted before PMBuilder ones - they'd get the default-constructed @@ -311,6 +472,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, std::unique_ptr<TargetLibraryInfoImpl> TLII( createTLII(TargetTriple, CodeGenOpts)); + PassManagerBuilderWrapper PMBuilder(TargetTriple, CodeGenOpts, LangOpts); + // At O0 and O1 we only run the always inliner which is more efficient. At // higher optimization levels we run the normal inliner. if (CodeGenOpts.OptimizationLevel <= 1) { @@ -318,13 +481,17 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, !CodeGenOpts.DisableLifetimeMarkers); PMBuilder.Inliner = createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics); } else { + // We do not want to inline hot callsites for SamplePGO module-summary build + // because profile annotation will happen again in ThinLTO backend, and we + // want the IR of the hot path to match the profile. PMBuilder.Inliner = createFunctionInliningPass( - CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize); + CodeGenOpts.OptimizationLevel, CodeGenOpts.OptimizeSize, + (!CodeGenOpts.SampleProfileFile.empty() && + CodeGenOpts.EmitSummaryIndex)); } PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel; PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize; - PMBuilder.BBVectorize = CodeGenOpts.VectorizeBB; PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP; PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; @@ -465,7 +632,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.populateModulePassManager(MPM); } -void EmitAssemblyHelper::setCommandLineOpts() { +static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) { SmallVector<const char *, 16> BackendArgs; BackendArgs.push_back("clang"); // Fake program name. if (!CodeGenOpts.DebugPass.empty()) { @@ -494,126 +661,14 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { return; } - unsigned CodeModel = - llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) - .Case("small", llvm::CodeModel::Small) - .Case("kernel", llvm::CodeModel::Kernel) - .Case("medium", llvm::CodeModel::Medium) - .Case("large", llvm::CodeModel::Large) - .Case("default", llvm::CodeModel::Default) - .Default(~0u); - assert(CodeModel != ~0u && "invalid code model!"); - llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel); - + Optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts); std::string FeaturesStr = llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ","); - - // Keep this synced with the equivalent code in tools/driver/cc1as_main.cpp. - llvm::Optional<llvm::Reloc::Model> RM; - RM = llvm::StringSwitch<llvm::Reloc::Model>(CodeGenOpts.RelocationModel) - .Case("static", llvm::Reloc::Static) - .Case("pic", llvm::Reloc::PIC_) - .Case("ropi", llvm::Reloc::ROPI) - .Case("rwpi", llvm::Reloc::RWPI) - .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI) - .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC); - assert(RM.hasValue() && "invalid PIC model!"); - - CodeGenOpt::Level OptLevel; - switch (CodeGenOpts.OptimizationLevel) { - default: - llvm_unreachable("Invalid optimization level!"); - case 0: - OptLevel = CodeGenOpt::None; - break; - case 1: - OptLevel = CodeGenOpt::Less; - break; - case 2: - OptLevel = CodeGenOpt::Default; - break; // O2/Os/Oz - case 3: - OptLevel = CodeGenOpt::Aggressive; - break; - } + llvm::Reloc::Model RM = getRelocModel(CodeGenOpts); + CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts); llvm::TargetOptions Options; - - Options.ThreadModel = - llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel) - .Case("posix", llvm::ThreadModel::POSIX) - .Case("single", llvm::ThreadModel::Single); - - // Set float ABI type. - assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" || - CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) && - "Invalid Floating Point ABI!"); - Options.FloatABIType = - llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI) - .Case("soft", llvm::FloatABI::Soft) - .Case("softfp", llvm::FloatABI::Soft) - .Case("hard", llvm::FloatABI::Hard) - .Default(llvm::FloatABI::Default); - - // Set FP fusion mode. - switch (CodeGenOpts.getFPContractMode()) { - case CodeGenOptions::FPC_Off: - Options.AllowFPOpFusion = llvm::FPOpFusion::Strict; - break; - case CodeGenOptions::FPC_On: - Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; - break; - case CodeGenOptions::FPC_Fast: - Options.AllowFPOpFusion = llvm::FPOpFusion::Fast; - break; - } - - Options.UseInitArray = CodeGenOpts.UseInitArray; - Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS; - Options.CompressDebugSections = CodeGenOpts.CompressDebugSections; - Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations; - - // Set EABI version. - Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion) - .Case("4", llvm::EABI::EABI4) - .Case("5", llvm::EABI::EABI5) - .Case("gnu", llvm::EABI::GNU) - .Default(llvm::EABI::Default); - - if (LangOpts.SjLjExceptions) - Options.ExceptionModel = llvm::ExceptionHandling::SjLj; - - Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD; - Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; - Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; - Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; - Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath; - Options.StackAlignmentOverride = CodeGenOpts.StackAlignment; - Options.FunctionSections = CodeGenOpts.FunctionSections; - Options.DataSections = CodeGenOpts.DataSections; - Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; - Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; - Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); - - Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; - Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; - Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm; - Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack; - Options.MCOptions.MCIncrementalLinkerCompatible = - CodeGenOpts.IncrementalLinkerCompatible; - Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations; - Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings; - Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; - Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments; - Options.MCOptions.ABIName = TargetOpts.ABI; - for (const auto &Entry : HSOpts.UserEntries) - if (!Entry.IsFramework && - (Entry.Group == frontend::IncludeDirGroup::Quoted || - Entry.Group == frontend::IncludeDirGroup::Angled || - Entry.Group == frontend::IncludeDirGroup::System)) - Options.MCOptions.IASSearchPaths.push_back( - Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path); - + initTargetOptions(Options, CodeGenOpts, TargetOpts, LangOpts, HSOpts); TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr, Options, RM, CM, OptLevel)); } @@ -629,13 +684,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, // Normal mode, emit a .s or .o file by running the code generator. Note, // this also adds codegenerator level optimization passes. - TargetMachine::CodeGenFileType CGFT = TargetMachine::CGFT_AssemblyFile; - if (Action == Backend_EmitObj) - CGFT = TargetMachine::CGFT_ObjectFile; - else if (Action == Backend_EmitMCNull) - CGFT = TargetMachine::CGFT_Null; - else - assert(Action == Backend_EmitAssembly && "Invalid action!"); + TargetMachine::CodeGenFileType CGFT = getCodeGenFileType(Action); // Add ObjC ARC final-cleanup optimizations. This is done as part of the // "codegen" passes so that it isn't run multiple times when there is @@ -656,7 +705,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr); - setCommandLineOpts(); + setCommandLineOpts(CodeGenOpts); bool UsesCodeGen = (Action != Backend_EmitNothing && Action != Backend_EmitBC && @@ -682,13 +731,28 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); + std::unique_ptr<raw_fd_ostream> ThinLinkOS; + switch (Action) { case Backend_EmitNothing: break; case Backend_EmitBC: - if (CodeGenOpts.EmitSummaryIndex) - PerModulePasses.add(createWriteThinLTOBitcodePass(*OS)); + if (CodeGenOpts.EmitSummaryIndex) { + if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { + std::error_code EC; + ThinLinkOS.reset(new llvm::raw_fd_ostream( + CodeGenOpts.ThinLinkBitcodeFile, EC, + llvm::sys::fs::F_None)); + if (EC) { + Diags.Report(diag::err_fe_unable_to_open_output) << CodeGenOpts.ThinLinkBitcodeFile + << EC.message(); + return; + } + } + PerModulePasses.add( + createWriteThinLTOBitcodePass(*OS, ThinLinkOS.get())); + } else PerModulePasses.add( createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists)); @@ -770,7 +834,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr); - setCommandLineOpts(); + setCommandLineOpts(CodeGenOpts); // The new pass manager always makes a target machine available to passes // during construction. @@ -780,24 +844,27 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( return; TheModule->setDataLayout(TM->createDataLayout()); - PGOOptions PGOOpt; - - // -fprofile-generate. - PGOOpt.RunProfileGen = CodeGenOpts.hasProfileIRInstr(); - if (PGOOpt.RunProfileGen) - PGOOpt.ProfileGenFile = CodeGenOpts.InstrProfileOutput.empty() ? - DefaultProfileGenName : CodeGenOpts.InstrProfileOutput; - - // -fprofile-use. - if (CodeGenOpts.hasProfileIRUse()) - PGOOpt.ProfileUseFile = CodeGenOpts.ProfileInstrumentUsePath; - - // Only pass a PGO options struct if -fprofile-generate or - // -fprofile-use were passed on the cmdline. - PassBuilder PB(TM.get(), - (PGOOpt.RunProfileGen || - !PGOOpt.ProfileUseFile.empty()) ? - Optional<PGOOptions>(PGOOpt) : None); + Optional<PGOOptions> PGOOpt; + + if (CodeGenOpts.hasProfileIRInstr()) + // -fprofile-generate. + PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() + ? DefaultProfileGenName + : CodeGenOpts.InstrProfileOutput, + "", "", true, CodeGenOpts.DebugInfoForProfiling); + else if (CodeGenOpts.hasProfileIRUse()) + // -fprofile-use. + PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", false, + CodeGenOpts.DebugInfoForProfiling); + else if (!CodeGenOpts.SampleProfileFile.empty()) + // -fprofile-sample-use + PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, false, + CodeGenOpts.DebugInfoForProfiling); + else if (CodeGenOpts.DebugInfoForProfiling) + // -fdebug-info-for-profiling + PGOOpt = PGOOptions("", "", "", false, true); + + PassBuilder PB(TM.get(), PGOOpt); LoopAnalysisManager LAM; FunctionAnalysisManager FAM; @@ -807,6 +874,14 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // Register the AA manager first so that our version is the one used. FAM.registerPass([&] { return PB.buildDefaultAAPipeline(); }); + // Register the target library analysis directly and give it a customized + // preset TLI. + Triple TargetTriple(TheModule->getTargetTriple()); + std::unique_ptr<TargetLibraryInfoImpl> TLII( + createTLII(TargetTriple, CodeGenOpts)); + FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); + MAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); + // Register all the basic analyses with the managers. PB.registerModuleAnalyses(MAM); PB.registerCGSCCAnalyses(CGAM); @@ -814,20 +889,34 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PB.registerLoopAnalyses(LAM); PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); - ModulePassManager MPM; + ModulePassManager MPM(CodeGenOpts.DebugPassManager); if (!CodeGenOpts.DisableLLVMPasses) { + bool IsThinLTO = CodeGenOpts.EmitSummaryIndex; + bool IsLTO = CodeGenOpts.PrepareForLTO; + if (CodeGenOpts.OptimizationLevel == 0) { // Build a minimal pipeline based on the semantics required by Clang, // which is just that always inlining occurs. MPM.addPass(AlwaysInlinerPass()); + if (IsThinLTO) + MPM.addPass(NameAnonGlobalPass()); } else { - // Otherwise, use the default pass pipeline. We also have to map our - // optimization levels into one of the distinct levels used to configure - // the pipeline. + // Map our optimization levels into one of the distinct levels used to + // configure the pipeline. PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts); - MPM = PB.buildPerModuleDefaultPipeline(Level); + if (IsThinLTO) { + MPM = PB.buildThinLTOPreLinkDefaultPipeline( + Level, CodeGenOpts.DebugPassManager); + MPM.addPass(NameAnonGlobalPass()); + } else if (IsLTO) { + MPM = PB.buildLTOPreLinkDefaultPipeline(Level, + CodeGenOpts.DebugPassManager); + } else { + MPM = PB.buildPerModuleDefaultPipeline(Level, + CodeGenOpts.DebugPassManager); + } } } @@ -835,6 +924,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // create that pass manager here and use it as needed below. legacy::PassManager CodeGenPasses; bool NeedCodeGen = false; + Optional<raw_fd_ostream> ThinLinkOS; // Append any output we need to the pass manager. switch (Action) { @@ -842,9 +932,24 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( break; case Backend_EmitBC: - MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, - CodeGenOpts.EmitSummaryIndex, - CodeGenOpts.EmitSummaryIndex)); + if (CodeGenOpts.EmitSummaryIndex) { + if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { + std::error_code EC; + ThinLinkOS.emplace(CodeGenOpts.ThinLinkBitcodeFile, EC, + llvm::sys::fs::F_None); + if (EC) { + Diags.Report(diag::err_fe_unable_to_open_output) + << CodeGenOpts.ThinLinkBitcodeFile << EC.message(); + return; + } + } + MPM.addPass( + ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &*ThinLinkOS : nullptr)); + } else { + MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, + CodeGenOpts.EmitSummaryIndex, + CodeGenOpts.EmitSummaryIndex)); + } break; case Backend_EmitLL: @@ -884,11 +989,11 @@ Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) { if (!BMsOrErr) return BMsOrErr.takeError(); - // The bitcode file may contain multiple modules, we want the one with a - // summary. + // The bitcode file may contain multiple modules, we want the one that is + // marked as being the ThinLTO module. for (BitcodeModule &BM : *BMsOrErr) { - Expected<bool> HasSummary = BM.hasSummary(); - if (HasSummary && *HasSummary) + Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo(); + if (LTOInfo && LTOInfo->IsThinLTO) return BM; } @@ -897,21 +1002,32 @@ Expected<BitcodeModule> clang::FindThinLTOModule(MemoryBufferRef MBRef) { } static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, + const HeaderSearchOptions &HeaderOpts, + const CodeGenOptions &CGOpts, + const clang::TargetOptions &TOpts, + const LangOptions &LOpts, std::unique_ptr<raw_pwrite_stream> OS, - std::string SampleProfile) { - StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>> + std::string SampleProfile, + BackendAction Action) { + StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>> ModuleToDefinedGVSummaries; CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + setCommandLineOpts(CGOpts); + // We can simply import the values mentioned in the combined index, since // we should only invoke this using the individual indexes written out // via a WriteIndexesThinBackend. FunctionImporter::ImportMapTy ImportList; for (auto &GlobalList : *CombinedIndex) { + // Ignore entries for undefined references. + if (GlobalList.second.SummaryList.empty()) + continue; + auto GUID = GlobalList.first; - assert(GlobalList.second.size() == 1 && + assert(GlobalList.second.SummaryList.size() == 1 && "Expected individual combined index to have one summary per GUID"); - auto &Summary = GlobalList.second[0]; + auto &Summary = GlobalList.second.SummaryList[0]; // Skip the summaries for the importing module. These are included to // e.g. record required linkage changes. if (Summary->modulePath() == M->getModuleIdentifier()) @@ -949,7 +1065,36 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, return llvm::make_unique<lto::NativeObjectStream>(std::move(OS)); }; lto::Config Conf; - Conf.SampleProfile = SampleProfile; + Conf.CPU = TOpts.CPU; + Conf.CodeModel = getCodeModel(CGOpts); + Conf.MAttrs = TOpts.Features; + Conf.RelocModel = getRelocModel(CGOpts); + Conf.CGOptLevel = getCGOptLevel(CGOpts); + initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); + Conf.SampleProfile = std::move(SampleProfile); + Conf.UseNewPM = CGOpts.ExperimentalNewPassManager; + switch (Action) { + case Backend_EmitNothing: + Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) { + return false; + }; + break; + case Backend_EmitLL: + Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) { + M->print(*OS, nullptr, CGOpts.EmitLLVMUseLists); + return false; + }; + break; + case Backend_EmitBC: + Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) { + WriteBitcodeToFile(M, *OS, CGOpts.EmitLLVMUseLists); + return false; + }; + break; + default: + Conf.CGFileType = getCodeGenFileType(Action); + break; + } if (Error E = thinBackend( Conf, 0, AddStream, *M, *CombinedIndex, ImportList, ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) { @@ -972,7 +1117,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, // into memory and pass it into runThinLTOBackend, which will run the // function importer and invoke LTO passes. Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr = - llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile); + llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile, + /*IgnoreEmptyThinLTOIndexFile*/true); if (!IndexOrErr) { logAllUnhandledErrors(IndexOrErr.takeError(), errs(), "Error loading index file '" + @@ -985,8 +1131,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, // of an error). bool DoThinLTOBackend = CombinedIndex != nullptr; if (DoThinLTOBackend) { - runThinLTOBackend(CombinedIndex.get(), M, std::move(OS), - CGOpts.SampleProfileFile); + runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts, + LOpts, std::move(OS), CGOpts.SampleProfileFile, Action); return; } } diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index 28e20b53d6..d90c3a53a6 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -15,8 +15,10 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" @@ -94,9 +96,8 @@ namespace { BFI.StorageSize = AtomicSizeInBits; BFI.StorageOffset += OffsetInChars; LVal = LValue::MakeBitfield(Address(Addr, lvalue.getAlignment()), - BFI, lvalue.getType(), - lvalue.getAlignmentSource()); - LVal.setTBAAInfo(lvalue.getTBAAInfo()); + BFI, lvalue.getType(), lvalue.getBaseInfo(), + lvalue.getTBAAInfo()); AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned); if (AtomicTy.isNull()) { llvm::APInt Size( @@ -203,7 +204,7 @@ namespace { addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits()); return LValue::MakeAddr(addr, getValueType(), CGF.getContext(), - LVal.getAlignmentSource(), LVal.getTBAAInfo()); + LVal.getBaseInfo(), LVal.getTBAAInfo()); } /// \brief Emits atomic load. @@ -359,13 +360,15 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, Address Val1, Address Val2, uint64_t Size, llvm::AtomicOrdering SuccessOrder, - llvm::AtomicOrdering FailureOrder) { + llvm::AtomicOrdering FailureOrder, + llvm::SyncScope::ID Scope) { // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment. llvm::Value *Expected = CGF.Builder.CreateLoad(Val1); llvm::Value *Desired = CGF.Builder.CreateLoad(Val2); llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg( - Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder); + Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder, + Scope); Pair->setVolatile(E->isVolatile()); Pair->setWeak(IsWeak); @@ -407,7 +410,8 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, Address Val1, Address Val2, llvm::Value *FailureOrderVal, uint64_t Size, - llvm::AtomicOrdering SuccessOrder) { + llvm::AtomicOrdering SuccessOrder, + llvm::SyncScope::ID Scope) { llvm::AtomicOrdering FailureOrder; if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) { auto FOS = FO->getSExtValue(); @@ -435,7 +439,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder); } emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, - FailureOrder); + FailureOrder, Scope); return; } @@ -460,13 +464,13 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, // doesn't fold to a constant for the ordering. CGF.Builder.SetInsertPoint(MonotonicBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::AtomicOrdering::Monotonic); + Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope); CGF.Builder.CreateBr(ContBB); if (AcquireBB) { CGF.Builder.SetInsertPoint(AcquireBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::AtomicOrdering::Acquire); + Size, SuccessOrder, llvm::AtomicOrdering::Acquire, Scope); CGF.Builder.CreateBr(ContBB); SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); @@ -476,7 +480,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, if (SeqCstBB) { CGF.Builder.SetInsertPoint(SeqCstBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, - llvm::AtomicOrdering::SequentiallyConsistent); + llvm::AtomicOrdering::SequentiallyConsistent, Scope); CGF.Builder.CreateBr(ContBB); SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); @@ -488,27 +492,31 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, Address Ptr, Address Val1, Address Val2, llvm::Value *IsWeak, llvm::Value *FailureOrder, - uint64_t Size, llvm::AtomicOrdering Order) { + uint64_t Size, llvm::AtomicOrdering Order, + llvm::SyncScope::ID Scope) { llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add; llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled!"); case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); return; case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); return; case AtomicExpr::AO__atomic_compare_exchange: case AtomicExpr::AO__atomic_compare_exchange_n: { if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) { emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr, - Val1, Val2, FailureOrder, Size, Order); + Val1, Val2, FailureOrder, Size, Order, Scope); } else { // Create all the relevant BB's llvm::BasicBlock *StrongBB = @@ -522,12 +530,12 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, CGF.Builder.SetInsertPoint(StrongBB); emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(WeakBB); emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(ContBB); @@ -535,26 +543,29 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, return; } case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: { llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr); - Load->setAtomic(Order); + Load->setAtomic(Order, Scope); Load->setVolatile(E->isVolatile()); CGF.Builder.CreateStore(Load, Dest); return; } case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: { llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr); - Store->setAtomic(Order); + Store->setAtomic(Order, Scope); Store->setVolatile(E->isVolatile()); return; } case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: Op = llvm::AtomicRMWInst::Xchg; @@ -564,6 +575,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::Add; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: Op = llvm::AtomicRMWInst::Add; break; @@ -572,14 +584,26 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::Sub; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: Op = llvm::AtomicRMWInst::Sub; break; + case AtomicExpr::AO__opencl_atomic_fetch_min: + Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min + : llvm::AtomicRMWInst::UMin; + break; + + case AtomicExpr::AO__opencl_atomic_fetch_max: + Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max + : llvm::AtomicRMWInst::UMax; + break; + case AtomicExpr::AO__atomic_and_fetch: PostOp = llvm::Instruction::And; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: Op = llvm::AtomicRMWInst::And; break; @@ -588,6 +612,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::Or; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: Op = llvm::AtomicRMWInst::Or; break; @@ -596,6 +621,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::Xor; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: Op = llvm::AtomicRMWInst::Xor; break; @@ -610,7 +636,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::AtomicRMWInst *RMWI = - CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order); + CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope); RMWI->setVolatile(E->isVolatile()); // For __atomic_*_fetch operations, perform the operation again to @@ -633,6 +659,61 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) { return DeclPtr; } +static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, + Address Ptr, Address Val1, Address Val2, + llvm::Value *IsWeak, llvm::Value *FailureOrder, + uint64_t Size, llvm::AtomicOrdering Order, + llvm::Value *Scope) { + auto ScopeModel = Expr->getScopeModel(); + + // LLVM atomic instructions always have synch scope. If clang atomic + // expression has no scope operand, use default LLVM synch scope. + if (!ScopeModel) { + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, + Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID("")); + return; + } + + // Handle constant scope. + if (auto SC = dyn_cast<llvm::ConstantInt>(Scope)) { + auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID( + ScopeModel->map(SC->getZExtValue()), CGF.CGM.getLLVMContext()); + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, + Order, SCID); + return; + } + + // Handle non-constant scope. + auto &Builder = CGF.Builder; + auto Scopes = ScopeModel->getRuntimeValues(); + llvm::DenseMap<unsigned, llvm::BasicBlock *> BB; + for (auto S : Scopes) + BB[S] = CGF.createBasicBlock(getAsString(ScopeModel->map(S)), CGF.CurFn); + + llvm::BasicBlock *ContBB = + CGF.createBasicBlock("atomic.scope.continue", CGF.CurFn); + + auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false); + // If unsupported synch scope is encountered at run time, assume a fallback + // synch scope value. + auto FallBack = ScopeModel->getFallBackValue(); + llvm::SwitchInst *SI = Builder.CreateSwitch(SC, BB[FallBack]); + for (auto S : Scopes) { + auto *B = BB[S]; + if (S != FallBack) + SI->addCase(Builder.getInt32(S), B); + + Builder.SetInsertPoint(B); + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, + Order, + CGF.getTargetHooks().getLLVMSyncScopeID(ScopeModel->map(S), + CGF.getLLVMContext())); + Builder.CreateBr(ContBB); + } + + Builder.SetInsertPoint(ContBB); +} + static void AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args, bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy, @@ -663,33 +744,38 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { QualType MemTy = AtomicTy; if (const AtomicType *AT = AtomicTy->getAs<AtomicType>()) MemTy = AT->getValueType(); - CharUnits sizeChars, alignChars; - std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy); - uint64_t Size = sizeChars.getQuantity(); - unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth(); - bool UseLibcall = (sizeChars != alignChars || - getContext().toBits(sizeChars) > MaxInlineWidthInBits); - llvm::Value *IsWeak = nullptr, *OrderFail = nullptr; Address Val1 = Address::invalid(); Address Val2 = Address::invalid(); Address Dest = Address::invalid(); - Address Ptr(EmitScalarExpr(E->getPtr()), alignChars); + Address Ptr = EmitPointerWithAlignment(E->getPtr()); + + CharUnits sizeChars, alignChars; + std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy); + uint64_t Size = sizeChars.getQuantity(); + unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth(); + bool UseLibcall = ((Ptr.getAlignment() % sizeChars) != 0 || + getContext().toBits(sizeChars) > MaxInlineWidthInBits); - if (E->getOp() == AtomicExpr::AO__c11_atomic_init) { + if (E->getOp() == AtomicExpr::AO__c11_atomic_init || + E->getOp() == AtomicExpr::AO__opencl_atomic_init) { LValue lvalue = MakeAddrLValue(Ptr, AtomicTy); EmitAtomicInit(E->getVal1(), lvalue); return RValue::get(nullptr); } llvm::Value *Order = EmitScalarExpr(E->getOrder()); + llvm::Value *Scope = + E->getScopeModel() ? EmitScalarExpr(E->getScope()) : nullptr; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled above with EmitAtomicInit!"); case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load_n: break; @@ -708,6 +794,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: case AtomicExpr::AO__atomic_compare_exchange_n: case AtomicExpr::AO__atomic_compare_exchange: Val1 = EmitPointerWithAlignment(E->getVal1()); @@ -716,12 +804,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { else Val2 = EmitValToTemp(*this, E->getVal2()); OrderFail = EmitScalarExpr(E->getOrderFail()); - if (E->getNumSubExprs() == 6) + if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n || + E->getOp() == AtomicExpr::AO__atomic_compare_exchange) IsWeak = EmitScalarExpr(E->getWeak()); break; case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_sub: if (MemTy->isPointerType()) { // For pointer arithmetic, we're required to do a bit of math: // adding 1 to an int* is not the same as adding 1 to a uintptr_t. @@ -744,11 +835,18 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__atomic_sub_fetch: case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_store_n: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_xor: @@ -784,18 +882,26 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { bool UseOptimizedLibcall = false; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled above with EmitAtomicInit!"); case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_nand: case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_xor: case AtomicExpr::AO__atomic_add_fetch: case AtomicExpr::AO__atomic_and_fetch: @@ -812,6 +918,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__opencl_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: case AtomicExpr::AO__atomic_store_n: @@ -833,7 +944,24 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { getContext().getSizeType()); } // Atomic address is the first or second parameter - Args.add(RValue::get(EmitCastToVoidPtr(Ptr.getPointer())), + // The OpenCL atomic library functions only accept pointer arguments to + // generic address space. + auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) { + if (!E->isOpenCL()) + return V; + auto AS = PT->getAs<PointerType>()->getPointeeType().getAddressSpace(); + if (AS == LangAS::opencl_generic) + return V; + auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); + auto T = V->getType(); + auto *DestType = T->getPointerElementType()->getPointerTo(DestAS); + + return getTargetHooks().performAddrSpaceCast( + *this, V, AS, LangAS::opencl_generic, DestType, false); + }; + + Args.add(RValue::get(CastToGenericAddrSpace( + EmitCastToVoidPtr(Ptr.getPointer()), E->getPtr()->getType())), getContext().VoidPtrTy); std::string LibCallName; @@ -844,6 +972,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled!"); // There is only one libcall for compare an exchange, because there is no @@ -855,13 +984,17 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // int success, int failure) case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_compare_exchange: case AtomicExpr::AO__atomic_compare_exchange_n: LibCallName = "__atomic_compare_exchange"; RetTy = getContext().BoolTy; HaveRetTy = true; - Args.add(RValue::get(EmitCastToVoidPtr(Val1.getPointer())), - getContext().VoidPtrTy); + Args.add( + RValue::get(CastToGenericAddrSpace( + EmitCastToVoidPtr(Val1.getPointer()), E->getVal1()->getType())), + getContext().VoidPtrTy); AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(), MemTy, E->getExprLoc(), sizeChars); Args.add(RValue::get(Order), getContext().IntTy); @@ -871,6 +1004,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // int order) // T __atomic_exchange_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: LibCallName = "__atomic_exchange"; @@ -880,6 +1014,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // void __atomic_store(size_t size, void *mem, void *val, int order) // void __atomic_store_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: LibCallName = "__atomic_store"; @@ -891,6 +1026,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // void __atomic_load(size_t size, void *mem, void *return, int order) // T __atomic_load_N(T *mem, int order) case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load: case AtomicExpr::AO__atomic_load_n: LibCallName = "__atomic_load"; @@ -901,6 +1037,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { PostOp = llvm::Instruction::Add; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: LibCallName = "__atomic_fetch_add"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -912,6 +1049,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { PostOp = llvm::Instruction::And; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: LibCallName = "__atomic_fetch_and"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -923,6 +1061,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { PostOp = llvm::Instruction::Or; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: LibCallName = "__atomic_fetch_or"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -934,6 +1073,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { PostOp = llvm::Instruction::Sub; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: LibCallName = "__atomic_fetch_sub"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -945,11 +1085,26 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { PostOp = llvm::Instruction::Xor; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: LibCallName = "__atomic_fetch_xor"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), sizeChars); break; + case AtomicExpr::AO__opencl_atomic_fetch_min: + LibCallName = E->getValueType()->isSignedIntegerType() + ? "__atomic_fetch_min" + : "__atomic_fetch_umin"; + AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), + LoweredMemTy, E->getExprLoc(), sizeChars); + break; + case AtomicExpr::AO__opencl_atomic_fetch_max: + LibCallName = E->getValueType()->isSignedIntegerType() + ? "__atomic_fetch_max" + : "__atomic_fetch_umax"; + AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), + LoweredMemTy, E->getExprLoc(), sizeChars); + break; // T __atomic_nand_fetch_N(T *mem, T val, int order) // T __atomic_fetch_nand_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_nand_fetch: @@ -962,6 +1117,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { break; } + if (E->isOpenCL()) { + LibCallName = std::string("__opencl") + + StringRef(LibCallName).drop_front(1).str(); + + } // Optimized functions have the size in their name. if (UseOptimizedLibcall) LibCallName += "_" + llvm::utostr(Size); @@ -982,6 +1142,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // order is always the last parameter Args.add(RValue::get(Order), getContext().IntTy); + if (E->isOpenCL()) + Args.add(RValue::get(Scope), getContext().IntTy); // PostOp is only needed for the atomic_*_fetch operations, and // thus is only needed for and implemented in the @@ -1018,9 +1180,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { } bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store || + E->getOp() == AtomicExpr::AO__opencl_atomic_store || E->getOp() == AtomicExpr::AO__atomic_store || E->getOp() == AtomicExpr::AO__atomic_store_n; bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load || + E->getOp() == AtomicExpr::AO__opencl_atomic_load || E->getOp() == AtomicExpr::AO__atomic_load || E->getOp() == AtomicExpr::AO__atomic_load_n; @@ -1032,37 +1196,38 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { switch ((llvm::AtomicOrderingCABI)ord) { case llvm::AtomicOrderingCABI::relaxed: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Monotonic); + llvm::AtomicOrdering::Monotonic, Scope); break; case llvm::AtomicOrderingCABI::consume: case llvm::AtomicOrderingCABI::acquire: if (IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Acquire); + llvm::AtomicOrdering::Acquire, Scope); break; case llvm::AtomicOrderingCABI::release: if (IsLoad) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Release); + llvm::AtomicOrdering::Release, Scope); break; case llvm::AtomicOrderingCABI::acq_rel: if (IsLoad || IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::AcquireRelease); + llvm::AtomicOrdering::AcquireRelease, Scope); break; case llvm::AtomicOrderingCABI::seq_cst: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::SequentiallyConsistent); + llvm::AtomicOrdering::SequentiallyConsistent, Scope); break; } if (RValTy->isVoidType()) return RValue::get(nullptr); return convertTempToRValue( - Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo( + Dest.getAddressSpace())), RValTy, E->getExprLoc()); } @@ -1091,13 +1256,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // Emit all the different atomics Builder.SetInsertPoint(MonotonicBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::Monotonic); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Monotonic, Scope); Builder.CreateBr(ContBB); if (!IsStore) { Builder.SetInsertPoint(AcquireBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::Acquire); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Acquire, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); @@ -1106,23 +1271,23 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { } if (!IsLoad) { Builder.SetInsertPoint(ReleaseBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::Release); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Release, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release), ReleaseBB); } if (!IsLoad && !IsStore) { Builder.SetInsertPoint(AcqRelBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::AcquireRelease); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::AcquireRelease, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel), AcqRelBB); } Builder.SetInsertPoint(SeqCstBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::SequentiallyConsistent); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::SequentiallyConsistent, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); @@ -1134,7 +1299,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits()); return convertTempToRValue( - Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo( + Dest.getAddressSpace())), RValTy, E->getExprLoc()); } @@ -1181,15 +1347,15 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr, if (LVal.isBitField()) return CGF.EmitLoadOfBitfieldLValue( LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(), - LVal.getAlignmentSource()), loc); + LVal.getBaseInfo(), TBAAAccessInfo()), loc); if (LVal.isVectorElt()) return CGF.EmitLoadOfLValue( LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(), - LVal.getAlignmentSource()), loc); + LVal.getBaseInfo(), TBAAAccessInfo()), loc); assert(LVal.isExtVectorElt()); return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt( addr, LVal.getExtVectorElts(), LVal.getType(), - LVal.getAlignmentSource())); + LVal.getBaseInfo(), TBAAAccessInfo())); } RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal, @@ -1260,8 +1426,7 @@ llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO, // Other decoration. if (IsVolatile) Load->setVolatile(true); - if (LVal.getTBAAInfo()) - CGF.CGM.DecorateInstructionWithTBAA(Load, LVal.getTBAAInfo()); + CGF.CGM.DecorateInstructionWithTBAA(Load, LVal.getTBAAInfo()); return Load; } @@ -1506,29 +1671,30 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal, UpdateLVal = LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), - AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getType(), AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); } else if (AtomicLVal.isVectorElt()) { UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); DesiredLVal = LValue::MakeVectorElt( DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } else { assert(AtomicLVal.isExtVectorElt()); UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); DesiredLVal = LValue::MakeExtVectorElt( DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } - UpdateLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); - DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); UpRVal = CGF.EmitLoadOfLValue(UpdateLVal, SourceLocation()); } // Store new value in the corresponding memory area @@ -1611,20 +1777,19 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, if (AtomicLVal.isBitField()) { DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), - AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getType(), AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); } else if (AtomicLVal.isVectorElt()) { DesiredLVal = LValue::MakeVectorElt(DesiredAddr, AtomicLVal.getVectorIdx(), - AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getType(), AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); } else { assert(AtomicLVal.isExtVectorElt()); DesiredLVal = LValue::MakeExtVectorElt( DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getAlignmentSource()); + AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } - DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); // Store new value in the corresponding memory area assert(UpdateRVal.isScalar()); CGF.EmitStoreThroughLValue(UpdateRVal, DesiredLVal); @@ -1777,8 +1942,7 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, // Other decoration. if (IsVolatile) store->setVolatile(true); - if (dest.getTBAAInfo()) - CGM.DecorateInstructionWithTBAA(store, dest.getTBAAInfo()); + CGM.DecorateInstructionWithTBAA(store, dest.getTBAAInfo()); return; } diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index 31632ab6de..850681471d 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -14,10 +14,13 @@ #include "CGBlocks.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CGOpenCLRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "clang/CodeGen/ConstantInitBuilder.h" +#include "ConstantEmitter.h" +#include "TargetInfo.h" #include "clang/AST/DeclObjC.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -266,7 +269,7 @@ static bool isSafeForCXXConstantCapture(QualType type) { static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM, CodeGenFunction *CGF, const VarDecl *var) { - // Return if this is a function paramter. We shouldn't try to + // Return if this is a function parameter. We shouldn't try to // rematerialize default arguments of function parameters. if (isa<ParmVarDecl>(var)) return nullptr; @@ -290,7 +293,7 @@ static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM, const Expr *init = var->getInit(); if (!init) return nullptr; - return CGM.EmitConstantInit(*var, CGF); + return ConstantEmitter(CGM, CGF).tryEmitAbstractForInitializer(*var); } /// Get the low bit of a nonzero character count. This is the @@ -301,21 +304,57 @@ static CharUnits getLowBit(CharUnits v) { static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, SmallVectorImpl<llvm::Type*> &elementTypes) { - // The header is basically 'struct { void *; int; int; void *; void *; }'. - // Assert that that struct is packed. - assert(CGM.getIntSize() <= CGM.getPointerSize()); - assert(CGM.getIntAlign() <= CGM.getPointerAlign()); - assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign())); - - info.BlockAlign = CGM.getPointerAlign(); - info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); assert(elementTypes.empty()); - elementTypes.push_back(CGM.VoidPtrTy); - elementTypes.push_back(CGM.IntTy); - elementTypes.push_back(CGM.IntTy); - elementTypes.push_back(CGM.VoidPtrTy); - elementTypes.push_back(CGM.getBlockDescriptorType()); + if (CGM.getLangOpts().OpenCL) { + // The header is basically 'struct { int; int; generic void *; + // custom_fields; }'. Assert that struct is packed. + auto GenericAS = + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic); + auto GenPtrAlign = + CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8); + auto GenPtrSize = + CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8); + assert(CGM.getIntSize() <= GenPtrSize); + assert(CGM.getIntAlign() <= GenPtrAlign); + assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign)); + elementTypes.push_back(CGM.IntTy); /* total size */ + elementTypes.push_back(CGM.IntTy); /* align */ + elementTypes.push_back( + CGM.getOpenCLRuntime() + .getGenericVoidPointerType()); /* invoke function */ + unsigned Offset = + 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity(); + unsigned BlockAlign = GenPtrAlign.getQuantity(); + if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ { + // TargetOpenCLBlockHelp needs to make sure the struct is packed. + // If necessary, add padding fields to the custom fields. + unsigned Align = CGM.getDataLayout().getABITypeAlignment(I); + if (BlockAlign < Align) + BlockAlign = Align; + assert(Offset % Align == 0); + Offset += CGM.getDataLayout().getTypeAllocSize(I); + elementTypes.push_back(I); + } + } + info.BlockAlign = CharUnits::fromQuantity(BlockAlign); + info.BlockSize = CharUnits::fromQuantity(Offset); + } else { + // The header is basically 'struct { void *; int; int; void *; void *; }'. + // Assert that that struct is packed. + assert(CGM.getIntSize() <= CGM.getPointerSize()); + assert(CGM.getIntAlign() <= CGM.getPointerAlign()); + assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign())); + info.BlockAlign = CGM.getPointerAlign(); + info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); + elementTypes.push_back(CGM.VoidPtrTy); + elementTypes.push_back(CGM.IntTy); + elementTypes.push_back(CGM.IntTy); + elementTypes.push_back(CGM.VoidPtrTy); + elementTypes.push_back(CGM.getBlockDescriptorType()); + } } static QualType getCaptureFieldType(const CodeGenFunction &CGF, @@ -340,8 +379,12 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, SmallVector<llvm::Type*, 8> elementTypes; initializeForBlockHeader(CGM, info, elementTypes); - - if (!block->hasCaptures()) { + bool hasNonConstantCustomFields = false; + if (auto *OpenCLHelper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) + hasNonConstantCustomFields = + !OpenCLHelper->areAllCustomFieldValuesConstant(info); + if (!block->hasCaptures() && !hasNonConstantCustomFields) { info.StructureType = llvm::StructType::get(CGM.getLLVMContext(), elementTypes, true); info.CanBeGlobal = true; @@ -619,7 +662,17 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { // Block captures count as local values and have imprecise semantics. // They also can't be arrays, so need to worry about that. - if (dtorKind == QualType::DK_objc_strong_lifetime) { + // + // For const-qualified captures, emit clang.arc.use to ensure the captured + // object doesn't get released while we are still depending on its validity + // within the block. + if (VT.isConstQualified() && + VT.getObjCLifetime() == Qualifiers::OCL_Strong && + CGF.CGM.getCodeGenOpts().OptimizationLevel != 0) { + assert(CGF.CGM.getLangOpts().ObjCAutoRefCount && + "expected ObjC ARC to be enabled"); + destroyer = CodeGenFunction::emitARCIntrinsicUse; + } else if (dtorKind == QualType::DK_objc_strong_lifetime) { destroyer = CodeGenFunction::destroyARCStrongImprecise; } else { destroyer = CGF.getDestroyer(dtorKind); @@ -687,16 +740,27 @@ void CodeGenFunction::destroyBlockInfos(CGBlockInfo *head) { } /// Emit a block literal expression in the current function. -llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr, + llvm::Function **InvokeF) { // If the block has no captures, we won't have a pre-computed // layout for it. if (!blockExpr->getBlockDecl()->hasCaptures()) { - if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) + // The block literal is emitted as a global variable, and the block invoke + // function has to be extracted from its initializer. + if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) { + if (InvokeF) { + auto *GV = cast<llvm::GlobalVariable>( + cast<llvm::Constant>(Block)->stripPointerCasts()); + auto *BlockInit = cast<llvm::ConstantStruct>(GV->getInitializer()); + *InvokeF = cast<llvm::Function>( + BlockInit->getAggregateElement(2)->stripPointerCasts()); + } return Block; + } CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); computeBlockInfo(CGM, this, blockInfo); blockInfo.BlockExpression = blockExpr; - return EmitBlockLiteral(blockInfo); + return EmitBlockLiteral(blockInfo, InvokeF); } // Find the block info for this block and take ownership of it. @@ -705,44 +769,57 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { blockExpr->getBlockDecl())); blockInfo->BlockExpression = blockExpr; - return EmitBlockLiteral(*blockInfo); + return EmitBlockLiteral(*blockInfo, InvokeF); } -llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, + llvm::Function **InvokeF) { + bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; + auto GenVoidPtrTy = + IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; + LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default; + auto GenVoidPtrSize = CharUnits::fromQuantity( + CGM.getTarget().getPointerWidth( + CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) / + 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); - llvm::Constant *blockFn - = CodeGenFunction(CGM, true).GenerateBlockFunction(CurGD, blockInfo, - LocalDeclMap, - isLambdaConv); - blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy); + auto *InvokeFn = CodeGenFunction(CGM, true).GenerateBlockFunction( + CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); + if (InvokeF) + *InvokeF = InvokeFn; + auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) - return buildGlobalBlock(CGM, blockInfo, blockFn); + return CGM.getAddrOfGlobalBlockIfEmitted(blockInfo.BlockExpression); // Otherwise, we have to emit this as a local block. - llvm::Constant *isa = - (!CGM.getContext().getLangOpts().OpenCL) - ? CGM.getNSConcreteStackBlock() - : CGM.getNullPointer(cast<llvm::PointerType>( - CGM.getNSConcreteStackBlock()->getType()), - QualType(getContext().VoidPtrTy)); - isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy); - - // Build the block descriptor. - llvm::Constant *descriptor = buildBlockDescriptor(CGM, blockInfo); - Address blockAddr = blockInfo.LocalAddress; assert(blockAddr.isValid() && "block has no address!"); - // Compute the initial on-stack block flags. - BlockFlags flags = BLOCK_HAS_SIGNATURE; - if (blockInfo.HasCapturedVariableLayout) flags |= BLOCK_HAS_EXTENDED_LAYOUT; - if (blockInfo.NeedsCopyDispose) flags |= BLOCK_HAS_COPY_DISPOSE; - if (blockInfo.HasCXXObject) flags |= BLOCK_HAS_CXX_OBJ; - if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; + llvm::Constant *isa; + llvm::Constant *descriptor; + BlockFlags flags; + if (!IsOpenCL) { + isa = llvm::ConstantExpr::getBitCast(CGM.getNSConcreteStackBlock(), + VoidPtrTy); + + // Build the block descriptor. + descriptor = buildBlockDescriptor(CGM, blockInfo); + + // Compute the initial on-stack block flags. + flags = BLOCK_HAS_SIGNATURE; + if (blockInfo.HasCapturedVariableLayout) + flags |= BLOCK_HAS_EXTENDED_LAYOUT; + if (blockInfo.NeedsCopyDispose) + flags |= BLOCK_HAS_COPY_DISPOSE; + if (blockInfo.HasCXXObject) + flags |= BLOCK_HAS_CXX_OBJ; + if (blockInfo.UsesStret) + flags |= BLOCK_USE_STRET; + } auto projectField = [&](unsigned index, CharUnits offset, const Twine &name) -> Address { @@ -766,13 +843,33 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { index++; }; - addHeaderField(isa, getPointerSize(), "block.isa"); - addHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()), - getIntSize(), "block.flags"); - addHeaderField(llvm::ConstantInt::get(IntTy, 0), - getIntSize(), "block.reserved"); - addHeaderField(blockFn, getPointerSize(), "block.invoke"); - addHeaderField(descriptor, getPointerSize(), "block.descriptor"); + if (!IsOpenCL) { + addHeaderField(isa, getPointerSize(), "block.isa"); + addHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()), + getIntSize(), "block.flags"); + addHeaderField(llvm::ConstantInt::get(IntTy, 0), getIntSize(), + "block.reserved"); + } else { + addHeaderField( + llvm::ConstantInt::get(IntTy, blockInfo.BlockSize.getQuantity()), + getIntSize(), "block.size"); + addHeaderField( + llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()), + getIntSize(), "block.align"); + } + addHeaderField(blockFn, GenVoidPtrSize, "block.invoke"); + if (!IsOpenCL) + addHeaderField(descriptor, getPointerSize(), "block.descriptor"); + else if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) { + addHeaderField( + I.first, + CharUnits::fromQuantity( + CGM.getDataLayout().getTypeAllocSize(I.first->getType())), + I.second); + } + } } // Finally, capture all the values into the block. @@ -866,6 +963,13 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { } else if (type->isReferenceType()) { Builder.CreateStore(src.getPointer(), blockField); + // If type is const-qualified, copy the value into the block field. + } else if (type.isConstQualified() && + type.getObjCLifetime() == Qualifiers::OCL_Strong && + CGM.getCodeGenOpts().OptimizationLevel != 0) { + llvm::Value *value = Builder.CreateLoad(src, "captured"); + Builder.CreateStore(value, blockField); + // If this is an ARC __strong block-pointer variable, don't do a // block copy. // @@ -886,9 +990,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { } else { // Fake up a new variable so that EmitScalarInit doesn't think // we're referring to the variable in its own initializer. - ImplicitParamDecl blockFieldPseudoVar(getContext(), /*DC*/ nullptr, - SourceLocation(), /*name*/ nullptr, - type); + ImplicitParamDecl BlockFieldPseudoVar(getContext(), type, + ImplicitParamDecl::Other); // We use one of these or the other depending on whether the // reference is nested. @@ -901,7 +1004,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // FIXME: Pass a specific location for the expr init so that the store is // attributed to a reasonable location - otherwise it may be attributed to // locations of subexpressions in the initialization. - EmitExprAsInit(&l2r, &blockFieldPseudoVar, + EmitExprAsInit(&l2r, &BlockFieldPseudoVar, MakeAddrLValue(blockField, type, AlignmentSource::Decl), /*captured by init*/ false); } @@ -944,9 +1047,8 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() { // const char *signature; // the block signature // const char *layout; // reserved // }; - BlockDescriptorType = - llvm::StructType::create("struct.__block_descriptor", - UnsignedLongTy, UnsignedLongTy, nullptr); + BlockDescriptorType = llvm::StructType::create( + "struct.__block_descriptor", UnsignedLongTy, UnsignedLongTy); // Now form a pointer to that. unsigned AddrSpace = 0; @@ -962,22 +1064,38 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() { llvm::Type *BlockDescPtrTy = getBlockDescriptorType(); - // struct __block_literal_generic { - // void *__isa; - // int __flags; - // int __reserved; - // void (*__invoke)(void *); - // struct __block_descriptor *__descriptor; - // }; - GenericBlockLiteralType = - llvm::StructType::create("struct.__block_literal_generic", - VoidPtrTy, IntTy, IntTy, VoidPtrTy, - BlockDescPtrTy, nullptr); + if (getLangOpts().OpenCL) { + // struct __opencl_block_literal_generic { + // int __size; + // int __align; + // __generic void *__invoke; + // /* custom fields */ + // }; + SmallVector<llvm::Type *, 8> StructFields( + {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()}); + if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldTypes()) + StructFields.push_back(I); + } + GenericBlockLiteralType = llvm::StructType::create( + StructFields, "struct.__opencl_block_literal_generic"); + } else { + // struct __block_literal_generic { + // void *__isa; + // int __flags; + // int __reserved; + // void (*__invoke)(void *); + // struct __block_descriptor *__descriptor; + // }; + GenericBlockLiteralType = + llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, + IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); + } return GenericBlockLiteralType; } -RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, +RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { const BlockPointerType *BPT = E->getCallee()->getType()->getAs<BlockPointerType>(); @@ -1002,8 +1120,8 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, // Get the function pointer from the literal. llvm::Value *FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3); - + Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, + CGM.getLangOpts().OpenCL ? 2 : 3); // Add the block literal. CallArgList Args; @@ -1011,8 +1129,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, QualType VoidPtrQualTy = getContext().VoidPtrTy; llvm::Type *GenericVoidPtrTy = VoidPtrTy; if (getLangOpts().OpenCL) { - GenericVoidPtrTy = Builder.getInt8PtrTy( - getContext().getTargetAddressSpace(LangAS::opencl_generic)); + GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); VoidPtrQualTy = getContext().getPointerType(getContext().getAddrSpaceQualType( getContext().VoidTy, LangAS::opencl_generic)); @@ -1037,7 +1154,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, llvm::Type *BlockFTy = CGM.getTypes().GetFunctionType(FnInfo); llvm::Type *BlockFTyPtr = llvm::PointerType::getUnqual(BlockFTy); - Func = Builder.CreateBitCast(Func, BlockFTyPtr); + Func = Builder.CreatePointerCast(Func, BlockFTyPtr); // Prepare the callee. CGCallee Callee(CGCalleeInfo(), Func); @@ -1072,8 +1189,8 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable, variable->getName()); } - if (auto refType = capture.fieldType()->getAs<ReferenceType>()) - addr = EmitLoadOfReference(addr, refType); + if (capture.fieldType()->isReferenceType()) + addr = EmitLoadOfReference(MakeAddrLValue(addr, capture.fieldType())); return addr; } @@ -1098,17 +1215,14 @@ CodeGenModule::GetAddrOfGlobalBlock(const BlockExpr *BE, computeBlockInfo(*this, nullptr, blockInfo); // Using that metadata, generate the actual block function. - llvm::Constant *blockFn; { CodeGenFunction::DeclMapTy LocalDeclMap; - blockFn = CodeGenFunction(*this).GenerateBlockFunction(GlobalDecl(), - blockInfo, - LocalDeclMap, - false); + CodeGenFunction(*this).GenerateBlockFunction( + GlobalDecl(), blockInfo, LocalDeclMap, + /*IsLambdaConversionToBlock*/ false, /*BuildGlobalBlock*/ true); } - blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy); - return buildGlobalBlock(*this, blockInfo, blockFn); + return getAddrOfGlobalBlockIfEmitted(BE); } static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, @@ -1125,28 +1239,37 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, ConstantInitBuilder builder(CGM); auto fields = builder.beginStruct(); - // isa - fields.add( - (!CGM.getContext().getLangOpts().OpenCL) - ? CGM.getNSConcreteGlobalBlock() - : CGM.getNullPointer(cast<llvm::PointerType>( - CGM.getNSConcreteGlobalBlock()->getType()), - QualType(CGM.getContext().VoidPtrTy))); + bool IsOpenCL = CGM.getLangOpts().OpenCL; + if (!IsOpenCL) { + // isa + fields.add(CGM.getNSConcreteGlobalBlock()); + + // __flags + BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; + if (blockInfo.UsesStret) + flags |= BLOCK_USE_STRET; - // __flags - BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; - if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; - - fields.addInt(CGM.IntTy, flags.getBitMask()); + fields.addInt(CGM.IntTy, flags.getBitMask()); - // Reserved - fields.addInt(CGM.IntTy, 0); + // Reserved + fields.addInt(CGM.IntTy, 0); + } else { + fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity()); + fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity()); + } // Function fields.add(blockFn); - // Descriptor - fields.add(buildBlockDescriptor(CGM, blockInfo)); + if (!IsOpenCL) { + // Descriptor + fields.add(buildBlockDescriptor(CGM, blockInfo)); + } else if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldValues(CGM, blockInfo)) { + fields.add(I); + } + } unsigned AddrSpace = 0; if (CGM.getContext().getLangOpts().OpenCL) @@ -1170,20 +1293,17 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, llvm::Value *arg) { assert(BlockInfo && "not emitting prologue of block invocation function?!"); - llvm::Value *localAddr = nullptr; - if (CGM.getCodeGenOpts().OptimizationLevel == 0) { - // Allocate a stack slot to let the debug info survive the RA. - Address alloc = CreateMemTemp(D->getType(), D->getName() + ".addr"); - Builder.CreateStore(arg, alloc); - localAddr = Builder.CreateLoad(alloc); - } - + // Allocate a stack slot like for any local variable to guarantee optimal + // debug info at -O0. The mem2reg pass will eliminate it when optimizing. + Address alloc = CreateMemTemp(D->getType(), D->getName() + ".addr"); + Builder.CreateStore(arg, alloc); if (CGDebugInfo *DI = getDebugInfo()) { if (CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { DI->setLocation(D->getLocation()); - DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, arg, argNum, - localAddr, Builder); + DI->EmitDeclareOfBlockLiteralArgVariable( + *BlockInfo, D->getName(), argNum, + cast<llvm::AllocaInst>(alloc.getPointer()), Builder); } } @@ -1211,7 +1331,8 @@ llvm::Function * CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, const CGBlockInfo &blockInfo, const DeclMapTy &ldm, - bool IsLambdaConversionToBlock) { + bool IsLambdaConversionToBlock, + bool BuildGlobalBlock) { const BlockDecl *blockDecl = blockInfo.getBlockDecl(); CurGD = GD; @@ -1240,7 +1361,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, // For OpenCL passed block pointer can be private AS local variable or // global AS program scope variable (for the case with and without captures). - // Generic AS is used therefore to be able to accomodate both private and + // Generic AS is used therefore to be able to accommodate both private and // generic AS in one implementation. if (getLangOpts().OpenCL) selfTy = getContext().getPointerType(getContext().getAddrSpaceQualType( @@ -1248,9 +1369,10 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); - ImplicitParamDecl selfDecl(getContext(), const_cast<BlockDecl*>(blockDecl), - SourceLocation(), II, selfTy); - args.push_back(&selfDecl); + ImplicitParamDecl SelfDecl(getContext(), const_cast<BlockDecl *>(blockDecl), + SourceLocation(), II, selfTy, + ImplicitParamDecl::ObjCSelf); + args.push_back(&SelfDecl); // Now add the rest of the parameters. args.append(blockDecl->param_begin(), blockDecl->param_end()); @@ -1269,6 +1391,14 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, fnLLVMType, llvm::GlobalValue::InternalLinkage, name, &CGM.getModule()); CGM.SetInternalFunctionAttributes(blockDecl, fn, fnInfo); + if (BuildGlobalBlock) { + auto GenVoidPtrTy = getContext().getLangOpts().OpenCL + ? CGM.getOpenCLRuntime().getGenericVoidPointerType() + : VoidPtrTy; + buildGlobalBlock(CGM, blockInfo, + llvm::ConstantExpr::getPointerCast(fn, GenVoidPtrTy)); + } + // Begin generating the function. StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args, blockDecl->getLocation(), @@ -1483,12 +1613,12 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { ASTContext &C = getContext(); FunctionArgList args; - ImplicitParamDecl dstDecl(getContext(), nullptr, SourceLocation(), nullptr, - C.VoidPtrTy); - args.push_back(&dstDecl); - ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr, - C.VoidPtrTy); - args.push_back(&srcDecl); + ImplicitParamDecl DstDecl(getContext(), C.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&DstDecl); + ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&SrcDecl); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); @@ -1514,17 +1644,15 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); - auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(FD, C.VoidTy, Fn, FI, args); - // Create a scope with an artificial location for the body of this function. - auto AL = ApplyDebugLocation::CreateArtificial(*this); + ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()}; llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); - Address src = GetAddrOfLocalVar(&srcDecl); + Address src = GetAddrOfLocalVar(&SrcDecl); src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign); src = Builder.CreateBitCast(src, structPtrTy, "block.source"); - Address dst = GetAddrOfLocalVar(&dstDecl); + Address dst = GetAddrOfLocalVar(&DstDecl); dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign); dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest"); @@ -1660,9 +1788,9 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { ASTContext &C = getContext(); FunctionArgList args; - ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr, - C.VoidPtrTy); - args.push_back(&srcDecl); + ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&SrcDecl); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); @@ -1686,14 +1814,12 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); - // Create a scope with an artificial location for the body of this function. - auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(FD, C.VoidTy, Fn, FI, args); - auto AL = ApplyDebugLocation::CreateArtificial(*this); + ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()}; llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); - Address src = GetAddrOfLocalVar(&srcDecl); + Address src = GetAddrOfLocalVar(&SrcDecl); src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign); src = Builder.CreateBitCast(src, structPtrTy, "block"); @@ -1902,13 +2028,13 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, QualType R = Context.VoidTy; FunctionArgList args; - ImplicitParamDecl dst(CGF.getContext(), nullptr, SourceLocation(), nullptr, - Context.VoidPtrTy); - args.push_back(&dst); + ImplicitParamDecl Dst(CGF.getContext(), Context.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&Dst); - ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr, - Context.VoidPtrTy); - args.push_back(&src); + ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&Src); const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); @@ -1939,7 +2065,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0); // dst->x - Address destField = CGF.GetAddrOfLocalVar(&dst); + Address destField = CGF.GetAddrOfLocalVar(&Dst); destField = Address(CGF.Builder.CreateLoad(destField), byrefInfo.ByrefAlignment); destField = CGF.Builder.CreateBitCast(destField, byrefPtrType); @@ -1947,7 +2073,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, "dest-object"); // src->x - Address srcField = CGF.GetAddrOfLocalVar(&src); + Address srcField = CGF.GetAddrOfLocalVar(&Src); srcField = Address(CGF.Builder.CreateLoad(srcField), byrefInfo.ByrefAlignment); srcField = CGF.Builder.CreateBitCast(srcField, byrefPtrType); @@ -1979,9 +2105,9 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, QualType R = Context.VoidTy; FunctionArgList args; - ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr, - Context.VoidPtrTy); - args.push_back(&src); + ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&Src); const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); @@ -2010,7 +2136,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, CGF.StartFunction(FD, R, Fn, FI, args); if (generator.needsDispose()) { - Address addr = CGF.GetAddrOfLocalVar(&src); + Address addr = CGF.GetAddrOfLocalVar(&Src); addr = Address(CGF.Builder.CreateLoad(addr), byrefInfo.ByrefAlignment); auto byrefPtrType = byrefInfo.Type->getPointerTo(0); addr = CGF.Builder.CreateBitCast(addr, byrefPtrType); diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h index 42f9a428bb..61fe4aac3a 100644 --- a/lib/CodeGen/CGBuilder.h +++ b/lib/CodeGen/CGBuilder.h @@ -145,6 +145,13 @@ public: Addr.getAlignment()); } + using CGBuilderBaseTy::CreateAddrSpaceCast; + Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, + const llvm::Twine &Name = "") { + return Address(CreateAddrSpaceCast(Addr.getPointer(), Ty, Name), + Addr.getAlignment()); + } + /// Cast the element type of the given address to a different type, /// preserving information like the alignment and address space. Address CreateElementBitCast(Address Addr, llvm::Type *Ty, diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 23b3a80180..bc37afa284 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -16,6 +16,7 @@ #include "CGOpenCLRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -29,6 +30,8 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/ConvertUTF.h" #include <sstream> using namespace clang; @@ -470,10 +473,13 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, assert(Ptr->getType()->isPointerTy() && "Non-pointer passed to __builtin_object_size?"); - // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. - auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1); Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); - return Builder.CreateCall(F, {Ptr, CI}); + + // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. + Value *Min = Builder.getInt1((Type & 2) != 0); + // For GCC compatability, __builtin_object_size treat NULL as unknown size. + Value *NullIsUnknown = Builder.getTrue(); + return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); } // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we @@ -489,6 +495,7 @@ enum class CodeGenFunction::MSVCIntrin { _InterlockedIncrement, _InterlockedOr, _InterlockedXor, + _interlockedbittestandset, __fastfail, }; @@ -556,6 +563,22 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, case MSVCIntrin::_InterlockedXor: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); + case MSVCIntrin::_interlockedbittestandset: { + llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); + llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Or, Addr, + Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), + llvm::AtomicOrdering::SequentiallyConsistent); + // Shift the relevant bit to the least significant position, truncate to + // the result type, and test the low bit. + llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); + llvm::Value *Truncated = + Builder.CreateTrunc(Shifted, ConvertType(E->getType())); + return Builder.CreateAnd(Truncated, + ConstantInt::get(Truncated->getType(), 1)); + } + case MSVCIntrin::_InterlockedDecrement: { llvm::Type *IntTy = ConvertType(E->getType()); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( @@ -598,9 +621,9 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); - llvm::AttributeSet NoReturnAttr = - AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoReturn); + llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); CS.setAttributes(NoReturnAttr); return CS.getInstruction(); @@ -621,6 +644,200 @@ struct CallObjCArcUse final : EHScopeStack::Cleanup { }; } +Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, + BuiltinCheckKind Kind) { + assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) + && "Unsupported builtin check kind"); + + Value *ArgValue = EmitScalarExpr(E); + if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef()) + return ArgValue; + + SanitizerScope SanScope(this); + Value *Cond = Builder.CreateICmpNE( + ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); + EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), + SanitizerHandler::InvalidBuiltin, + {EmitCheckSourceLocation(E->getExprLoc()), + llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, + None); + return ArgValue; +} + +/// Get the argument type for arguments to os_log_helper. +static CanQualType getOSLogArgType(ASTContext &C, int Size) { + QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false); + return C.getCanonicalType(UnsignedTy); +} + +llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( + const analyze_os_log::OSLogBufferLayout &Layout, + CharUnits BufferAlignment) { + ASTContext &Ctx = getContext(); + + llvm::SmallString<64> Name; + { + raw_svector_ostream OS(Name); + OS << "__os_log_helper"; + OS << "_" << BufferAlignment.getQuantity(); + OS << "_" << int(Layout.getSummaryByte()); + OS << "_" << int(Layout.getNumArgsByte()); + for (const auto &Item : Layout.Items) + OS << "_" << int(Item.getSizeByte()) << "_" + << int(Item.getDescriptorByte()); + } + + if (llvm::Function *F = CGM.getModule().getFunction(Name)) + return F; + + llvm::SmallVector<ImplicitParamDecl, 4> Params; + Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), + Ctx.VoidPtrTy, ImplicitParamDecl::Other); + + for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { + char Size = Layout.Items[I].getSizeByte(); + if (!Size) + continue; + + Params.emplace_back( + Ctx, nullptr, SourceLocation(), + &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), + getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other); + } + + FunctionArgList Args; + for (auto &P : Params) + Args.push_back(&P); + + // The helper function has linkonce_odr linkage to enable the linker to merge + // identical functions. To ensure the merging always happens, 'noinline' is + // attached to the function when compiling with -Oz. + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); + llvm::Function *Fn = llvm::Function::Create( + FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); + Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); + CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn); + CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); + + // Attach 'noinline' at -Oz. + if (CGM.getCodeGenOpts().OptimizeSize == 2) + Fn->addFnAttr(llvm::Attribute::NoInline); + + auto NL = ApplyDebugLocation::CreateEmpty(*this); + IdentifierInfo *II = &Ctx.Idents.get(Name); + FunctionDecl *FD = FunctionDecl::Create( + Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, + Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); + + StartFunction(FD, Ctx.VoidTy, Fn, FI, Args); + + // Create a scope with an artificial location for the body of this function. + auto AL = ApplyDebugLocation::CreateArtificial(*this); + + CharUnits Offset; + Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"), + BufferAlignment); + Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); + Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); + + unsigned I = 1; + for (const auto &Item : Layout.Items) { + Builder.CreateStore( + Builder.getInt8(Item.getDescriptorByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); + Builder.CreateStore( + Builder.getInt8(Item.getSizeByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); + + CharUnits Size = Item.size(); + if (!Size.getQuantity()) + continue; + + Address Arg = GetAddrOfLocalVar(&Params[I]); + Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); + Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(), + "argDataCast"); + Builder.CreateStore(Builder.CreateLoad(Arg), Addr); + Offset += Size; + ++I; + } + + FinishFunction(); + + return Fn; +} + +RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { + assert(E.getNumArgs() >= 2 && + "__builtin_os_log_format takes at least 2 arguments"); + ASTContext &Ctx = getContext(); + analyze_os_log::OSLogBufferLayout Layout; + analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout); + Address BufAddr = EmitPointerWithAlignment(E.getArg(0)); + llvm::SmallVector<llvm::Value *, 4> RetainableOperands; + + // Ignore argument 1, the format string. It is not currently used. + CallArgList Args; + Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy); + + for (const auto &Item : Layout.Items) { + int Size = Item.getSizeByte(); + if (!Size) + continue; + + llvm::Value *ArgVal; + + if (const Expr *TheExpr = Item.getExpr()) { + ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); + + // Check if this is a retainable type. + if (TheExpr->getType()->isObjCRetainableType()) { + assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && + "Only scalar can be a ObjC retainable type"); + // Check if the object is constant, if not, save it in + // RetainableOperands. + if (!isa<Constant>(ArgVal)) + RetainableOperands.push_back(ArgVal); + } + } else { + ArgVal = Builder.getInt32(Item.getConstValue().getQuantity()); + } + + unsigned ArgValSize = + CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType()); + llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(), + ArgValSize); + ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy); + CanQualType ArgTy = getOSLogArgType(Ctx, Size); + // If ArgVal has type x86_fp80, zero-extend ArgVal. + ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy)); + Args.add(RValue::get(ArgVal), ArgTy); + } + + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args); + llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( + Layout, BufAddr.getAlignment()); + EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); + + // Push a clang.arc.use cleanup for each object in RetainableOperands. The + // cleanup will cause the use to appear after the final log call, keeping + // the object valid while it’s held in the log buffer. Note that if there’s + // a release cleanup on the object, it will already be active; since + // cleanups are emitted in reverse order, the use will occur before the + // object is released. + if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && + CGM.getCodeGenOpts().OptimizationLevel != 0) + for (llvm::Value *Object : RetainableOperands) + pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object); + + return RValue::get(BufAddr.getPointer()); +} + RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -640,7 +857,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, default: break; // Handle intrinsics and libm functions below. case Builtin::BI__builtin___CFStringMakeConstantString: case Builtin::BI__builtin___NSStringMakeConstantString: - return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); + return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); case Builtin::BI__builtin_stdarg_start: case Builtin::BI__builtin_va_start: case Builtin::BI__va_start: @@ -772,7 +989,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: case Builtin::BI__builtin_ctzll: { - Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); @@ -789,7 +1006,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: case Builtin::BI__builtin_clzll: { - Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); @@ -1326,8 +1543,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::ConstantInt::get(Int32Ty, Offset))); } case Builtin::BI__builtin_return_address: { - Value *Depth = - CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); + Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), + getContext().UnsignedIntTy); Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Depth)); } @@ -1336,8 +1553,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); } case Builtin::BI__builtin_frame_address: { - Value *Depth = - CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); + Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), + getContext().UnsignedIntTy); Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); return RValue::get(Builder.CreateCall(F, Depth)); } @@ -1790,12 +2007,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__atomic_signal_fence: case Builtin::BI__c11_atomic_thread_fence: case Builtin::BI__c11_atomic_signal_fence: { - llvm::SynchronizationScope Scope; + llvm::SyncScope::ID SSID; if (BuiltinID == Builtin::BI__atomic_signal_fence || BuiltinID == Builtin::BI__c11_atomic_signal_fence) - Scope = llvm::SingleThread; + SSID = llvm::SyncScope::SingleThread; else - Scope = llvm::CrossThread; + SSID = llvm::SyncScope::System; Value *Order = EmitScalarExpr(E->getArg(0)); if (isa<llvm::ConstantInt>(Order)) { int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); @@ -1805,17 +2022,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; case 1: // memory_order_consume case 2: // memory_order_acquire - Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); break; case 3: // memory_order_release - Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); break; case 4: // memory_order_acq_rel - Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); + Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); break; case 5: // memory_order_seq_cst - Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, - Scope); + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); break; } return RValue::get(nullptr); @@ -1832,23 +2048,23 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); Builder.SetInsertPoint(AcquireBB); - Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(1), AcquireBB); SI->addCase(Builder.getInt32(2), AcquireBB); Builder.SetInsertPoint(ReleaseBB); - Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); + Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(3), ReleaseBB); Builder.SetInsertPoint(AcqRelBB); - Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); + Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(4), AcqRelBB); Builder.SetInsertPoint(SeqCstBB); - Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope); + Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(5), SeqCstBB); @@ -1856,24 +2072,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(nullptr); } - // Library functions with special handling. case Builtin::BIsqrt: case Builtin::BIsqrtf: - case Builtin::BIsqrtl: { - // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only - // in finite- or unsafe-math mode (the intrinsic has different semantics - // for handling negative numbers compared to the library function, so - // -fmath-errno=0 is not enough). - if (!FD->hasAttr<ConstAttr>()) - break; - if (!(CGM.getCodeGenOpts().UnsafeFPMath || - CGM.getCodeGenOpts().NoNaNsFPMath)) - break; - Value *Arg0 = EmitScalarExpr(E->getArg(0)); - llvm::Type *ArgType = Arg0->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); - return RValue::get(Builder.CreateCall(F, Arg0)); - } + case Builtin::BIsqrtl: + // Builtins have the same semantics as library functions. The LLVM intrinsic + // has the same semantics as the library function except it does not set + // errno. Thus, we can transform either sqrt or __builtin_sqrt to @llvm.sqrt + // if the call is 'const' (the call must not set errno). + // + // FIXME: The builtin cases are not here because they are marked 'const' in + // Builtins.def. So that means they are wrongly defined to have different + // semantics than the library functions. If we included them here, we would + // turn them into LLVM intrinsics regardless of whether -fmath-errno was on. + if (FD->hasAttr<ConstAttr>()) + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt)); + break; case Builtin::BI__builtin_pow: case Builtin::BI__builtin_powf: @@ -1913,6 +2126,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), ConvertType(E->getType()))); } + case Builtin::BI__annotation: { + // Re-encode each wide string to UTF8 and make an MDString. + SmallVector<Metadata *, 1> Strings; + for (const Expr *Arg : E->arguments()) { + const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts()); + assert(Str->getCharByteWidth() == 2); + StringRef WideBytes = Str->getBytes(); + std::string StrUtf8; + if (!convertUTF16ToUTF8String( + makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { + CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); + continue; + } + Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8)); + } + + // Build and MDTuple of MDStrings and emit the intrinsic call. + llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); + MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); + Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); + return RValue::getIgnored(); + } case Builtin::BI__builtin_annotation: { llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, @@ -2235,16 +2470,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedXor16: case Builtin::BI_InterlockedXor: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); - case Builtin::BI__readfsdword: { - llvm::Type *IntTy = ConvertType(E->getType()); - Value *IntToPtr = - Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), - llvm::PointerType::get(IntTy, 257)); - LoadInst *Load = Builder.CreateAlignedLoad( - IntTy, IntToPtr, getContext().getTypeAlignInChars(E->getType())); - Load->setVolatile(true); - return RValue::get(Load); - } + case Builtin::BI_interlockedbittestandset: + return RValue::get( + EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); case Builtin::BI__exception_code: case Builtin::BI_exception_code: @@ -2258,9 +2486,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_setjmpex: { if (getTarget().getTriple().isOSMSVCRT()) { llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; - llvm::AttributeSet ReturnsTwiceAttr = - AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, - llvm::Attribute::ReturnsTwice); + llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReturnsTwice); llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); @@ -2278,9 +2506,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, } case Builtin::BI_setjmp: { if (getTarget().getTriple().isOSMSVCRT()) { - llvm::AttributeSet ReturnsTwiceAttr = - AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, - llvm::Attribute::ReturnsTwice); + llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReturnsTwice); llvm::Value *Buf = Builder.CreateBitOrPointerCast( EmitScalarExpr(E->getArg(0)), Int8PtrTy); llvm::CallSite CS; @@ -2316,10 +2544,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; } - case Builtin::BI__fastfail: { + case Builtin::BI__fastfail: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); - break; - } case Builtin::BI__builtin_coro_size: { auto & Context = getContext(); @@ -2550,47 +2776,77 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // The most basic form of the call with parameters: // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) Name = "__enqueue_kernel_basic"; - llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; + llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy, + GenericVoidPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get( - Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); - llvm::Value *Block = Builder.CreatePointerCast( - EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); + llvm::Value *Kernel = + Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + llvm::Value *Block = + Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); AttrBuilder B; B.addAttribute(Attribute::ByVal); - AttributeSet ByValAttrSet = - AttributeSet::get(CGM.getModule().getContext(), 3U, B); + llvm::AttributeList ByValAttrSet = + llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); auto RTCall = Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), - {Queue, Flags, Range, Block}); + {Queue, Flags, Range, Kernel, Block}); RTCall->setAttributes(ByValAttrSet); return RValue::get(RTCall); } assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); + // Create a temporary array to hold the sizes of local pointer arguments + // for the block. \p First is the position of the first size argument. + auto CreateArrayForSizeVar = [=](unsigned First) { + auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); + auto *Arr = Builder.CreateAlloca(AT); + llvm::Value *Ptr; + // Each of the following arguments specifies the size of the corresponding + // argument passed to the enqueued block. + auto *Zero = llvm::ConstantInt::get(IntTy, 0); + for (unsigned I = First; I < NumArgs; ++I) { + auto *Index = llvm::ConstantInt::get(IntTy, I - First); + auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); + if (I == First) + Ptr = GEP; + auto *V = + Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); + Builder.CreateAlignedStore( + V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); + } + return Ptr; + }; + // Could have events and/or vaargs. if (E->getArg(3)->getType()->isBlockPointerType()) { // No events passed, but has variadic arguments. Name = "__enqueue_kernel_vaargs"; - llvm::Value *Block = Builder.CreatePointerCast( - EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); + llvm::Value *Kernel = + Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); + auto *PtrToSizeArray = CreateArrayForSizeVar(4); + // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. - std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, - ConstantInt::get(IntTy, NumArgs - 4)}; - std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, - GenericVoidPtrTy, IntTy}; - - // Each of the following arguments specifies the size of the corresponding - // argument passed to the enqueued block. - for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I) - Args.push_back( - Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); + std::vector<llvm::Value *> Args = { + Queue, Flags, Range, + Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4), + PtrToSizeArray}; + std::vector<llvm::Type *> ArgTys = { + QueueTy, IntTy, RangeTy, + GenericVoidPtrTy, GenericVoidPtrTy, IntTy, + PtrToSizeArray->getType()}; llvm::FunctionType *FTy = llvm::FunctionType::get( - Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); return RValue::get( Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), llvm::ArrayRef<llvm::Value *>(Args))); @@ -2611,15 +2867,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Convert to generic address space. EventList = Builder.CreatePointerCast(EventList, EventPtrTy); ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); - llvm::Value *Block = Builder.CreatePointerCast( - EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); + llvm::Value *Kernel = + Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + llvm::Value *Block = + Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); std::vector<llvm::Type *> ArgTys = { - QueueTy, Int32Ty, RangeTy, Int32Ty, - EventPtrTy, EventPtrTy, GenericVoidPtrTy}; + QueueTy, Int32Ty, RangeTy, Int32Ty, + EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; - std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, - EventList, ClkEvent, Block}; + std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, + EventList, ClkEvent, Kernel, Block}; if (NumArgs == 7) { // Has events but no variadics. @@ -2636,42 +2896,87 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, ArgTys.push_back(Int32Ty); Name = "__enqueue_kernel_events_vaargs"; - // Each of the following arguments specifies the size of the corresponding - // argument passed to the enqueued block. - for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I) - Args.push_back( - Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); + auto *PtrToSizeArray = CreateArrayForSizeVar(7); + Args.push_back(PtrToSizeArray); + ArgTys.push_back(PtrToSizeArray->getType()); llvm::FunctionType *FTy = llvm::FunctionType::get( - Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); return RValue::get( Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), llvm::ArrayRef<llvm::Value *>(Args))); } + LLVM_FALLTHROUGH; } // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block // parameter. case Builtin::BIget_kernel_work_group_size: { llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); - Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); + Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); return RValue::get(Builder.CreateCall( CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), + llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, + false), "__get_kernel_work_group_size_impl"), - Arg)); + {Kernel, Arg})); } case Builtin::BIget_kernel_preferred_work_group_size_multiple: { llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); - Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); + Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); return RValue::get(Builder.CreateCall( CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), + llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, + false), "__get_kernel_preferred_work_group_multiple_impl"), - Arg)); + {Kernel, Arg})); + } + case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: + case Builtin::BIget_kernel_sub_group_count_for_ndrange: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); + LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); + llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); + Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); + const char *Name = + BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange + ? "__get_kernel_max_sub_group_size_for_ndrange_impl" + : "__get_kernel_sub_group_count_for_ndrange_impl"; + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction( + llvm::FunctionType::get( + IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy}, + false), + Name), + {NDRange, Kernel, Block})); + } + + case Builtin::BI__builtin_store_half: + case Builtin::BI__builtin_store_halff: { + Value *Val = EmitScalarExpr(E->getArg(0)); + Address Address = EmitPointerWithAlignment(E->getArg(1)); + Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); + return RValue::get(Builder.CreateStore(HalfVal, Address)); + } + case Builtin::BI__builtin_load_half: { + Address Address = EmitPointerWithAlignment(E->getArg(0)); + Value *HalfVal = Builder.CreateLoad(Address); + return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); + } + case Builtin::BI__builtin_load_halff: { + Address Address = EmitPointerWithAlignment(E->getArg(0)); + Value *HalfVal = Builder.CreateLoad(Address); + return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); } case Builtin::BIprintf: if (getTarget().getTriple().isNVPTX()) @@ -2688,69 +2993,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Fall through - it's already mapped to the intrinsic by GCCBuiltin. break; } - case Builtin::BI__builtin_os_log_format: { - assert(E->getNumArgs() >= 2 && - "__builtin_os_log_format takes at least 2 arguments"); - analyze_os_log::OSLogBufferLayout Layout; - analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); - Address BufAddr = EmitPointerWithAlignment(E->getArg(0)); - // Ignore argument 1, the format string. It is not currently used. - CharUnits Offset; - Builder.CreateStore( - Builder.getInt8(Layout.getSummaryByte()), - Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); - Builder.CreateStore( - Builder.getInt8(Layout.getNumArgsByte()), - Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); - - llvm::SmallVector<llvm::Value *, 4> RetainableOperands; - for (const auto &Item : Layout.Items) { - Builder.CreateStore( - Builder.getInt8(Item.getDescriptorByte()), - Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); - Builder.CreateStore( - Builder.getInt8(Item.getSizeByte()), - Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); - Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset); - if (const Expr *TheExpr = Item.getExpr()) { - Addr = Builder.CreateElementBitCast( - Addr, ConvertTypeForMem(TheExpr->getType())); - // Check if this is a retainable type. - if (TheExpr->getType()->isObjCRetainableType()) { - assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && - "Only scalar can be a ObjC retainable type"); - llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false); - RValue RV = RValue::get(SV); - LValue LV = MakeAddrLValue(Addr, TheExpr->getType()); - EmitStoreThroughLValue(RV, LV); - // Check if the object is constant, if not, save it in - // RetainableOperands. - if (!isa<Constant>(SV)) - RetainableOperands.push_back(SV); - } else { - EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true); - } - } else { - Addr = Builder.CreateElementBitCast(Addr, Int32Ty); - Builder.CreateStore( - Builder.getInt32(Item.getConstValue().getQuantity()), Addr); - } - Offset += Item.size(); - } - - // Push a clang.arc.use cleanup for each object in RetainableOperands. The - // cleanup will cause the use to appear after the final log call, keeping - // the object valid while it’s held in the log buffer. Note that if there’s - // a release cleanup on the object, it will already be active; since - // cleanups are emitted in reverse order, the use will occur before the - // object is released. - if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && - CGM.getCodeGenOpts().OptimizationLevel != 0) - for (llvm::Value *object : RetainableOperands) - pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object); - - return RValue::get(BufAddr.getPointer()); - } + case Builtin::BI__builtin_os_log_format: + return emitBuiltinOSLogFormat(*E); case Builtin::BI__builtin_os_log_format_buffer_size: { analyze_os_log::OSLogBufferLayout Layout; @@ -2758,6 +3002,59 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(ConstantInt::get(ConvertType(E->getType()), Layout.size().getQuantity())); } + + case Builtin::BI__xray_customevent: { + if (!ShouldXRayInstrumentFunction()) + return RValue::getIgnored(); + if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) { + if (XRayAttr->neverXRayInstrument()) + return RValue::getIgnored(); + } + Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); + auto FTy = F->getFunctionType(); + auto Arg0 = E->getArg(0); + auto Arg0Val = EmitScalarExpr(Arg0); + auto Arg0Ty = Arg0->getType(); + auto PTy0 = FTy->getParamType(0); + if (PTy0 != Arg0Val->getType()) { + if (Arg0Ty->isArrayType()) + Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); + else + Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); + } + auto Arg1 = EmitScalarExpr(E->getArg(1)); + auto PTy1 = FTy->getParamType(1); + if (PTy1 != Arg1->getType()) + Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); + return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); + } + + case Builtin::BI__builtin_ms_va_start: + case Builtin::BI__builtin_ms_va_end: + return RValue::get( + EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), + BuiltinID == Builtin::BI__builtin_ms_va_start)); + + case Builtin::BI__builtin_ms_va_copy: { + // Lower this manually. We can't reliably determine whether or not any + // given va_copy() is for a Win64 va_list from the calling convention + // alone, because it's legal to do this from a System V ABI function. + // With opaque pointer types, we won't have enough information in LLVM + // IR to determine this from the argument types, either. Best to do it + // now, while we have enough information. + Address DestAddr = EmitMSVAListRef(E->getArg(0)); + Address SrcAddr = EmitMSVAListRef(E->getArg(1)); + + llvm::Type *BPP = Int8PtrPtrTy; + + DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), + DestAddr.getAlignment()); + SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), + SrcAddr.getAlignment()); + + Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); + return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); + } } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -3776,6 +4073,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcalt_v: case NEON::BI__builtin_neon_vcaltq_v: std::swap(Ops[0], Ops[1]); + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcage_v: case NEON::BI__builtin_neon_vcageq_v: case NEON::BI__builtin_neon_vcagt_v: @@ -4534,7 +4832,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd); - llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr); + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); Address Tmp = CreateMemTemp(E->getArg(0)->getType()); Value *Val = EmitScalarExpr(E->getArg(0)); @@ -5019,6 +5317,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vsri_n_v: case NEON::BI__builtin_neon_vsriq_n_v: rightShift = true; + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vsli_n_v: case NEON::BI__builtin_neon_vsliq_n_v: Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); @@ -5364,7 +5663,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp); - llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr); + llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); Address Tmp = CreateMemTemp(E->getArg(0)->getType()); EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); @@ -7163,6 +7462,18 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, std::max(NumElts, 8U))); } +static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { + + llvm::Type *Ty = Ops[0]->getType(); + Value *Zero = llvm::Constant::getNullValue(Ty); + Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); + Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); + Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); + if (Ops.size() == 1) + return Res; + return EmitX86Select(CGF, Ops[2], Res, Ops[1]); +} + static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, ArrayRef<Value *> Ops) { Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); @@ -7175,33 +7486,263 @@ static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, return EmitX86Select(CGF, Ops[3], Res, Ops[2]); } -Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - if (BuiltinID == X86::BI__builtin_ms_va_start || - BuiltinID == X86::BI__builtin_ms_va_end) - return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), - BuiltinID == X86::BI__builtin_ms_va_start); - if (BuiltinID == X86::BI__builtin_ms_va_copy) { - // Lower this manually. We can't reliably determine whether or not any - // given va_copy() is for a Win64 va_list from the calling convention - // alone, because it's legal to do this from a System V ABI function. - // With opaque pointer types, we won't have enough information in LLVM - // IR to determine this from the argument types, either. Best to do it - // now, while we have enough information. - Address DestAddr = EmitMSVAListRef(E->getArg(0)); - Address SrcAddr = EmitMSVAListRef(E->getArg(1)); +static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, + llvm::Type *DstTy) { + unsigned NumberOfElements = DstTy->getVectorNumElements(); + Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); + return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); +} - llvm::Type *BPP = Int8PtrPtrTy; +Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); + return EmitX86CpuIs(CPUStr); +} - DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), - DestAddr.getAlignment()); - SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), - SrcAddr.getAlignment()); +Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { + + // This enum contains the vendor, type, and subtype enums from the + // runtime library concatenated together. The _START labels mark + // the start and are used to adjust the value into the correct + // encoding space. + enum X86CPUs { + INTEL = 1, + AMD, + CPU_TYPE_START, + INTEL_BONNELL, + INTEL_CORE2, + INTEL_COREI7, + AMDFAM10H, + AMDFAM15H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + AMDFAM17H, + CPU_SUBTYPE_START, + INTEL_COREI7_NEHALEM, + INTEL_COREI7_WESTMERE, + INTEL_COREI7_SANDYBRIDGE, + AMDFAM10H_BARCELONA, + AMDFAM10H_SHANGHAI, + AMDFAM10H_ISTANBUL, + AMDFAM15H_BDVER1, + AMDFAM15H_BDVER2, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, + INTEL_COREI7_IVYBRIDGE, + INTEL_COREI7_HASWELL, + INTEL_COREI7_BROADWELL, + INTEL_COREI7_SKYLAKE, + INTEL_COREI7_SKYLAKE_AVX512, + }; - Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); - return Builder.CreateStore(ArgPtr, DestAddr); + X86CPUs CPU = + StringSwitch<X86CPUs>(CPUStr) + .Case("amd", AMD) + .Case("amdfam10h", AMDFAM10H) + .Case("amdfam10", AMDFAM10H) + .Case("amdfam15h", AMDFAM15H) + .Case("amdfam15", AMDFAM15H) + .Case("amdfam17h", AMDFAM17H) + .Case("atom", INTEL_BONNELL) + .Case("barcelona", AMDFAM10H_BARCELONA) + .Case("bdver1", AMDFAM15H_BDVER1) + .Case("bdver2", AMDFAM15H_BDVER2) + .Case("bdver3", AMDFAM15H_BDVER3) + .Case("bdver4", AMDFAM15H_BDVER4) + .Case("bonnell", INTEL_BONNELL) + .Case("broadwell", INTEL_COREI7_BROADWELL) + .Case("btver1", AMD_BTVER1) + .Case("btver2", AMD_BTVER2) + .Case("core2", INTEL_CORE2) + .Case("corei7", INTEL_COREI7) + .Case("haswell", INTEL_COREI7_HASWELL) + .Case("intel", INTEL) + .Case("istanbul", AMDFAM10H_ISTANBUL) + .Case("ivybridge", INTEL_COREI7_IVYBRIDGE) + .Case("knl", INTEL_KNL) + .Case("nehalem", INTEL_COREI7_NEHALEM) + .Case("sandybridge", INTEL_COREI7_SANDYBRIDGE) + .Case("shanghai", AMDFAM10H_SHANGHAI) + .Case("silvermont", INTEL_SILVERMONT) + .Case("skylake", INTEL_COREI7_SKYLAKE) + .Case("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512) + .Case("slm", INTEL_SILVERMONT) + .Case("westmere", INTEL_COREI7_WESTMERE) + .Case("znver1", AMDFAM17H_ZNVER1); + + llvm::Type *Int32Ty = Builder.getInt32Ty(); + + // Matching the struct layout from the compiler-rt/libgcc structure that is + // filled in: + // unsigned int __cpu_vendor; + // unsigned int __cpu_type; + // unsigned int __cpu_subtype; + // unsigned int __cpu_features[1]; + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, + llvm::ArrayType::get(Int32Ty, 1)); + + // Grab the global __cpu_model. + llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + + // Calculate the index needed to access the correct field based on the + // range. Also adjust the expected value. + unsigned Index; + unsigned Value; + if (CPU > CPU_SUBTYPE_START) { + Index = 2; + Value = CPU - CPU_SUBTYPE_START; + } else if (CPU > CPU_TYPE_START) { + Index = 1; + Value = CPU - CPU_TYPE_START; + } else { + Index = 0; + Value = CPU; } + // Grab the appropriate field from __cpu_model. + llvm::Value *Idxs[] = { + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, Index) + }; + llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs); + CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4)); + + // Check the value of the field against the requested value. + return Builder.CreateICmpEQ(CpuValue, + llvm::ConstantInt::get(Int32Ty, Value)); +} + +Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { + const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); + return EmitX86CpuSupports(FeatureStr); +} + +Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { + // TODO: When/if this becomes more than x86 specific then use a TargetInfo + // based mapping. + // Processor features and mapping to processor feature value. + enum X86Features { + CMOV = 0, + MMX, + POPCNT, + SSE, + SSE2, + SSE3, + SSSE3, + SSE4_1, + SSE4_2, + AVX, + AVX2, + SSE4_A, + FMA4, + XOP, + FMA, + AVX512F, + BMI, + BMI2, + AES, + PCLMUL, + AVX512VL, + AVX512BW, + AVX512DQ, + AVX512CD, + AVX512ER, + AVX512PF, + AVX512VBMI, + AVX512IFMA, + AVX5124VNNIW, + AVX5124FMAPS, + AVX512VPOPCNTDQ, + MAX + }; + + uint32_t FeaturesMask = 0; + + for (const StringRef &FeatureStr : FeatureStrs) { + X86Features Feature = + StringSwitch<X86Features>(FeatureStr) + .Case("cmov", X86Features::CMOV) + .Case("mmx", X86Features::MMX) + .Case("popcnt", X86Features::POPCNT) + .Case("sse", X86Features::SSE) + .Case("sse2", X86Features::SSE2) + .Case("sse3", X86Features::SSE3) + .Case("ssse3", X86Features::SSSE3) + .Case("sse4.1", X86Features::SSE4_1) + .Case("sse4.2", X86Features::SSE4_2) + .Case("avx", X86Features::AVX) + .Case("avx2", X86Features::AVX2) + .Case("sse4a", X86Features::SSE4_A) + .Case("fma4", X86Features::FMA4) + .Case("xop", X86Features::XOP) + .Case("fma", X86Features::FMA) + .Case("avx512f", X86Features::AVX512F) + .Case("bmi", X86Features::BMI) + .Case("bmi2", X86Features::BMI2) + .Case("aes", X86Features::AES) + .Case("pclmul", X86Features::PCLMUL) + .Case("avx512vl", X86Features::AVX512VL) + .Case("avx512bw", X86Features::AVX512BW) + .Case("avx512dq", X86Features::AVX512DQ) + .Case("avx512cd", X86Features::AVX512CD) + .Case("avx512er", X86Features::AVX512ER) + .Case("avx512pf", X86Features::AVX512PF) + .Case("avx512vbmi", X86Features::AVX512VBMI) + .Case("avx512ifma", X86Features::AVX512IFMA) + .Case("avx5124vnniw", X86Features::AVX5124VNNIW) + .Case("avx5124fmaps", X86Features::AVX5124FMAPS) + .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) + .Default(X86Features::MAX); + assert(Feature != X86Features::MAX && "Invalid feature!"); + FeaturesMask |= (1U << Feature); + } + + // Matching the struct layout from the compiler-rt/libgcc structure that is + // filled in: + // unsigned int __cpu_vendor; + // unsigned int __cpu_type; + // unsigned int __cpu_subtype; + // unsigned int __cpu_features[1]; + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, + llvm::ArrayType::get(Int32Ty, 1)); + + // Grab the global __cpu_model. + llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + + // Grab the first (0th) element from the field __cpu_features off of the + // global in the struct STy. + Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3), + ConstantInt::get(Int32Ty, 0)}; + Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); + Value *Features = + Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); + + // Check the value of the bit corresponding to the feature requested. + Value *Bitset = Builder.CreateAnd( + Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask)); + return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); +} + +Value *CodeGenFunction::EmitX86CpuInit() { + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, + /*Variadic*/ false); + llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); + return Builder.CreateCall(Func); +} + +Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + if (BuiltinID == X86::BI__builtin_cpu_is) + return EmitX86CpuIs(E); + if (BuiltinID == X86::BI__builtin_cpu_supports) + return EmitX86CpuSupports(E); + if (BuiltinID == X86::BI__builtin_cpu_init) + return EmitX86CpuInit(); + SmallVector<Value*, 4> Ops; // Find out if any arguments are required to be integer constant expressions. @@ -7252,105 +7793,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, switch (BuiltinID) { default: return nullptr; - case X86::BI__builtin_cpu_supports: { - const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); - StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); - - // TODO: When/if this becomes more than x86 specific then use a TargetInfo - // based mapping. - // Processor features and mapping to processor feature value. - enum X86Features { - CMOV = 0, - MMX, - POPCNT, - SSE, - SSE2, - SSE3, - SSSE3, - SSE4_1, - SSE4_2, - AVX, - AVX2, - SSE4_A, - FMA4, - XOP, - FMA, - AVX512F, - BMI, - BMI2, - AES, - PCLMUL, - AVX512VL, - AVX512BW, - AVX512DQ, - AVX512CD, - AVX512ER, - AVX512PF, - AVX512VBMI, - AVX512IFMA, - MAX - }; - - X86Features Feature = StringSwitch<X86Features>(FeatureStr) - .Case("cmov", X86Features::CMOV) - .Case("mmx", X86Features::MMX) - .Case("popcnt", X86Features::POPCNT) - .Case("sse", X86Features::SSE) - .Case("sse2", X86Features::SSE2) - .Case("sse3", X86Features::SSE3) - .Case("ssse3", X86Features::SSSE3) - .Case("sse4.1", X86Features::SSE4_1) - .Case("sse4.2", X86Features::SSE4_2) - .Case("avx", X86Features::AVX) - .Case("avx2", X86Features::AVX2) - .Case("sse4a", X86Features::SSE4_A) - .Case("fma4", X86Features::FMA4) - .Case("xop", X86Features::XOP) - .Case("fma", X86Features::FMA) - .Case("avx512f", X86Features::AVX512F) - .Case("bmi", X86Features::BMI) - .Case("bmi2", X86Features::BMI2) - .Case("aes", X86Features::AES) - .Case("pclmul", X86Features::PCLMUL) - .Case("avx512vl", X86Features::AVX512VL) - .Case("avx512bw", X86Features::AVX512BW) - .Case("avx512dq", X86Features::AVX512DQ) - .Case("avx512cd", X86Features::AVX512CD) - .Case("avx512er", X86Features::AVX512ER) - .Case("avx512pf", X86Features::AVX512PF) - .Case("avx512vbmi", X86Features::AVX512VBMI) - .Case("avx512ifma", X86Features::AVX512IFMA) - .Default(X86Features::MAX); - assert(Feature != X86Features::MAX && "Invalid feature!"); - - // Matching the struct layout from the compiler-rt/libgcc structure that is - // filled in: - // unsigned int __cpu_vendor; - // unsigned int __cpu_type; - // unsigned int __cpu_subtype; - // unsigned int __cpu_features[1]; - llvm::Type *STy = llvm::StructType::get( - Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr); - - // Grab the global __cpu_model. - llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); - - // Grab the first (0th) element from the field __cpu_features off of the - // global in the struct STy. - Value *Idxs[] = { - ConstantInt::get(Int32Ty, 0), - ConstantInt::get(Int32Ty, 3), - ConstantInt::get(Int32Ty, 0) - }; - Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); - Value *Features = Builder.CreateAlignedLoad(CpuFeatures, - CharUnits::fromQuantity(4)); - - // Check the value of the bit corresponding to the feature requested. - Value *Bitset = Builder.CreateAnd( - Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); - return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); - } case X86::BI_mm_prefetch: { Value *Address = Ops[0]; Value *RW = ConstantInt::get(Int32Ty, 0); @@ -7473,6 +7915,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } + case X86::BI__builtin_ia32_vpopcntd_512: + case X86::BI__builtin_ia32_vpopcntq_512: { + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); + return Builder.CreateCall(F, Ops); + } + case X86::BI__builtin_ia32_cvtmask2b128: + case X86::BI__builtin_ia32_cvtmask2b256: + case X86::BI__builtin_ia32_cvtmask2b512: + case X86::BI__builtin_ia32_cvtmask2w128: + case X86::BI__builtin_ia32_cvtmask2w256: + case X86::BI__builtin_ia32_cvtmask2w512: + case X86::BI__builtin_ia32_cvtmask2d128: + case X86::BI__builtin_ia32_cvtmask2d256: + case X86::BI__builtin_ia32_cvtmask2d512: + case X86::BI__builtin_ia32_cvtmask2q128: + case X86::BI__builtin_ia32_cvtmask2q256: + case X86::BI__builtin_ia32_cvtmask2q512: + return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); + case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: @@ -7596,6 +8058,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86Select(*this, Ops[4], Align, Ops[3]); } + case X86::BI__builtin_ia32_vperm2f128_pd256: + case X86::BI__builtin_ia32_vperm2f128_ps256: + case X86::BI__builtin_ia32_vperm2f128_si256: + case X86::BI__builtin_ia32_permti256: { + unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + + // This takes a very simple approach since there are two lanes and a + // shuffle can have 2 inputs. So we reserve the first input for the first + // lane and the second input for the second lane. This may result in + // duplicate sources, but this can be dealt with in the backend. + + Value *OutOps[2]; + uint32_t Indices[8]; + for (unsigned l = 0; l != 2; ++l) { + // Determine the source for this lane. + if (Imm & (1 << ((l * 4) + 3))) + OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType()); + else if (Imm & (1 << ((l * 4) + 1))) + OutOps[l] = Ops[1]; + else + OutOps[l] = Ops[0]; + + for (unsigned i = 0; i != NumElts/2; ++i) { + // Start with ith element of the source for this lane. + unsigned Idx = (l * NumElts) + i; + // If bit 0 of the immediate half is set, switch to the high half of + // the source. + if (Imm & (1 << (l * 4))) + Idx += NumElts/2; + Indices[(l * (NumElts/2)) + i] = Idx; + } + } + + return Builder.CreateShuffleVector(OutOps[0], OutOps[1], + makeArrayRef(Indices, NumElts), + "vperm"); + } + case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: case X86::BI__builtin_ia32_movntsd: @@ -7710,6 +8211,20 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[1]); } + case X86::BI__builtin_ia32_pabsb128: + case X86::BI__builtin_ia32_pabsw128: + case X86::BI__builtin_ia32_pabsd128: + case X86::BI__builtin_ia32_pabsb256: + case X86::BI__builtin_ia32_pabsw256: + case X86::BI__builtin_ia32_pabsd256: + case X86::BI__builtin_ia32_pabsq128_mask: + case X86::BI__builtin_ia32_pabsq256_mask: + case X86::BI__builtin_ia32_pabsb512_mask: + case X86::BI__builtin_ia32_pabsw512_mask: + case X86::BI__builtin_ia32_pabsd512_mask: + case X86::BI__builtin_ia32_pabsq512_mask: + return EmitX86Abs(*this, Ops); + case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: @@ -7853,6 +8368,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } // We can't handle 8-31 immediates with native IR, use the intrinsic. + // Except for predicates that create constants. Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); @@ -7860,12 +8376,32 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, ID = Intrinsic::x86_sse_cmp_ps; break; case X86::BI__builtin_ia32_cmpps256: + // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector + // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... + if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { + Value *Constant = (CC == 0xf || CC == 0x1f) ? + llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) : + llvm::Constant::getNullValue(Builder.getInt32Ty()); + Value *Vec = Builder.CreateVectorSplat( + Ops[0]->getType()->getVectorNumElements(), Constant); + return Builder.CreateBitCast(Vec, Ops[0]->getType()); + } ID = Intrinsic::x86_avx_cmp_ps_256; break; case X86::BI__builtin_ia32_cmppd: ID = Intrinsic::x86_sse2_cmp_pd; break; case X86::BI__builtin_ia32_cmppd256: + // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector + // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0... + if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) { + Value *Constant = (CC == 0xf || CC == 0x1f) ? + llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) : + llvm::Constant::getNullValue(Builder.getInt64Ty()); + Value *Vec = Builder.CreateVectorSplat( + Ops[0]->getType()->getVectorNumElements(), Constant); + return Builder.CreateBitCast(Vec, Ops[0]->getType()); + } ID = Intrinsic::x86_avx_cmp_pd_256; break; } @@ -7946,13 +8482,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__faststorefence: { return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, - llvm::CrossThread); + llvm::SyncScope::System); } case X86::BI_ReadWriteBarrier: case X86::BI_ReadBarrier: case X86::BI_WriteBarrier: { return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, - llvm::SingleThread); + llvm::SyncScope::SingleThread); } case X86::BI_BitScanForward: case X86::BI_BitScanForward64: @@ -7995,13 +8531,37 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); - llvm::AttributeSet NoReturnAttr = - AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoReturn); + llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); CallSite CS = Builder.CreateCall(IA); CS.setAttributes(NoReturnAttr); return CS.getInstruction(); } + case X86::BI__readfsbyte: + case X86::BI__readfsword: + case X86::BI__readfsdword: + case X86::BI__readfsqword: { + llvm::Type *IntTy = ConvertType(E->getType()); + Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), + llvm::PointerType::get(IntTy, 257)); + LoadInst *Load = Builder.CreateAlignedLoad( + IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); + Load->setVolatile(true); + return Load; + } + case X86::BI__readgsbyte: + case X86::BI__readgsword: + case X86::BI__readgsdword: + case X86::BI__readgsqword: { + llvm::Type *IntTy = ConvertType(E->getType()); + Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), + llvm::PointerType::get(IntTy, 256)); + LoadInst *Load = Builder.CreateAlignedLoad( + IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); + Load->setVolatile(true); + return Load; + } } } @@ -8359,6 +8919,80 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops); } } + + case PPC::BI__builtin_vsx_xxpermdi: { + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + assert(ArgCI && "Third arg must be constant integer!"); + + unsigned Index = ArgCI->getZExtValue(); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + + // Element zero comes from the first input vector and element one comes from + // the second. The element indices within each vector are numbered in big + // endian order so the shuffle mask must be adjusted for this on little + // endian platforms (i.e. index is complemented and source vector reversed). + unsigned ElemIdx0; + unsigned ElemIdx1; + if (getTarget().isLittleEndian()) { + ElemIdx0 = (~Index & 1) + 2; + ElemIdx1 = (~Index & 2) >> 1; + } else { // BigEndian + ElemIdx0 = (Index & 2) >> 1; + ElemIdx1 = 2 + (Index & 1); + } + + Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), + ConstantInt::get(Int32Ty, ElemIdx1)}; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + Value *ShuffleCall = + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + QualType BIRetType = E->getType(); + auto RetTy = ConvertType(BIRetType); + return Builder.CreateBitCast(ShuffleCall, RetTy); + } + + case PPC::BI__builtin_vsx_xxsldwi: { + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + assert(ArgCI && "Third argument must be a compile time constant"); + unsigned Index = ArgCI->getZExtValue() & 0x3; + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); + + // Create a shuffle mask + unsigned ElemIdx0; + unsigned ElemIdx1; + unsigned ElemIdx2; + unsigned ElemIdx3; + if (getTarget().isLittleEndian()) { + // Little endian element N comes from element 8+N-Index of the + // concatenated wide vector (of course, using modulo arithmetic on + // the total number of elements). + ElemIdx0 = (8 - Index) % 8; + ElemIdx1 = (9 - Index) % 8; + ElemIdx2 = (10 - Index) % 8; + ElemIdx3 = (11 - Index) % 8; + } else { + // Big endian ElemIdx<N> = Index + N + ElemIdx0 = Index; + ElemIdx1 = Index + 1; + ElemIdx2 = Index + 2; + ElemIdx3 = Index + 3; + } + + Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), + ConstantInt::get(Int32Ty, ElemIdx1), + ConstantInt::get(Int32Ty, ElemIdx2), + ConstantInt::get(Int32Ty, ElemIdx3)}; + + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + Value *ShuffleCall = + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + QualType BIRetType = E->getType(); + auto RetTy = ConvertType(BIRetType); + return Builder.CreateBitCast(ShuffleCall, RetTy); + } } } @@ -8488,6 +9122,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CI->setConvergent(); return CI; } + case AMDGPU::BI__builtin_amdgcn_read_exec_lo: + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { + StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ? + "exec_lo" : "exec_hi"; + CallInst *CI = cast<CallInst>( + EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName)); + CI->setConvergent(); + return CI; + } // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: @@ -8600,12 +9243,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {X, Undef}); } + case SystemZ::BI__builtin_s390_vfsqsb: case SystemZ::BI__builtin_s390_vfsqdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); return Builder.CreateCall(F, X); } + case SystemZ::BI__builtin_s390_vfmasb: case SystemZ::BI__builtin_s390_vfmadb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8614,6 +9259,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateCall(F, {X, Y, Z}); } + case SystemZ::BI__builtin_s390_vfmssb: case SystemZ::BI__builtin_s390_vfmsdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8623,12 +9269,35 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); } + case SystemZ::BI__builtin_s390_vfnmasb: + case SystemZ::BI__builtin_s390_vfnmadb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); + } + case SystemZ::BI__builtin_s390_vfnmssb: + case SystemZ::BI__builtin_s390_vfnmsdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); + return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); + } + case SystemZ::BI__builtin_s390_vflpsb: case SystemZ::BI__builtin_s390_vflpdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateCall(F, X); } + case SystemZ::BI__builtin_s390_vflnsb: case SystemZ::BI__builtin_s390_vflndb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8636,6 +9305,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); } + case SystemZ::BI__builtin_s390_vfisb: case SystemZ::BI__builtin_s390_vfidb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); @@ -8645,8 +9315,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); (void)IsConstM4; (void)IsConstM5; - // Check whether this instance of vfidb can be represented via a LLVM - // standard intrinsic. We only support some combinations of M4 and M5. + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some combinations of M4 and M5. Intrinsic::ID ID = Intrinsic::not_intrinsic; switch (M4.getZExtValue()) { default: break; @@ -8671,11 +9341,76 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, X); } - Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; + case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); return Builder.CreateCall(F, {X, M4Value, M5Value}); } + case SystemZ::BI__builtin_s390_vfmaxsb: + case SystemZ::BI__builtin_s390_vfmaxdb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + // Constant-fold the M4 mask argument. + llvm::APSInt M4; + bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); + assert(IsConstM4 && "Constant arg isn't actually constant?"); + (void)IsConstM4; + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some values of M4. + Intrinsic::ID ID = Intrinsic::not_intrinsic; + switch (M4.getZExtValue()) { + default: break; + case 4: ID = Intrinsic::maxnum; break; + } + if (ID != Intrinsic::not_intrinsic) { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; + case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); + Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); + return Builder.CreateCall(F, {X, Y, M4Value}); + } + case SystemZ::BI__builtin_s390_vfminsb: + case SystemZ::BI__builtin_s390_vfmindb: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + // Constant-fold the M4 mask argument. + llvm::APSInt M4; + bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); + assert(IsConstM4 && "Constant arg isn't actually constant?"); + (void)IsConstM4; + // Check whether this instance can be represented via a LLVM standard + // intrinsic. We only support some values of M4. + Intrinsic::ID ID = Intrinsic::not_intrinsic; + switch (M4.getZExtValue()) { + default: break; + case 4: ID = Intrinsic::minnum; break; + } + if (ID != Intrinsic::not_intrinsic) { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } + switch (BuiltinID) { + case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; + case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; + default: llvm_unreachable("Unknown BuiltinID"); + } + Function *F = CGM.getIntrinsic(ID); + Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); + return Builder.CreateCall(F, {X, Y, M4Value}); + } // Vector intrisincs that output the post-instruction CC value. @@ -8742,10 +9477,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, INTRINSIC_WITH_CC(s390_vstrczhs); INTRINSIC_WITH_CC(s390_vstrczfs); + INTRINSIC_WITH_CC(s390_vfcesbs); INTRINSIC_WITH_CC(s390_vfcedbs); + INTRINSIC_WITH_CC(s390_vfchsbs); INTRINSIC_WITH_CC(s390_vfchdbs); + INTRINSIC_WITH_CC(s390_vfchesbs); INTRINSIC_WITH_CC(s390_vfchedbs); + INTRINSIC_WITH_CC(s390_vftcisb); INTRINSIC_WITH_CC(s390_vftcidb); #undef INTRINSIC_WITH_CC @@ -8759,9 +9498,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { auto MakeLdg = [&](unsigned IntrinsicID) { Value *Ptr = EmitScalarExpr(E->getArg(0)); - AlignmentSource AlignSource; clang::CharUnits Align = - getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource); + getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); return Builder.CreateCall( CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), Ptr->getType()}), @@ -8995,6 +9733,219 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, {Ptr->getType()->getPointerElementType(), Ptr->getType()}), {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); } + case NVPTX::BI__nvvm_match_all_sync_i32p: + case NVPTX::BI__nvvm_match_all_sync_i64p: { + Value *Mask = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2)); + Value *ResultPair = Builder.CreateCall( + CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p + ? Intrinsic::nvvm_match_all_sync_i32p + : Intrinsic::nvvm_match_all_sync_i64p), + {Mask, Val}); + Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1), + PredOutPtr.getElementType()); + Builder.CreateStore(Pred, PredOutPtr); + return Builder.CreateExtractValue(ResultPair, 0); + } + case NVPTX::BI__hmma_m16n16k16_ld_a: + case NVPTX::BI__hmma_m16n16k16_ld_b: + case NVPTX::BI__hmma_m16n16k16_ld_c_f16: + case NVPTX::BI__hmma_m16n16k16_ld_c_f32: { + Address Dst = EmitPointerWithAlignment(E->getArg(0)); + Value *Src = EmitScalarExpr(E->getArg(1)); + Value *Ldm = EmitScalarExpr(E->getArg(2)); + llvm::APSInt isColMajorArg; + if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) + return nullptr; + bool isColMajor = isColMajorArg.getSExtValue(); + unsigned IID; + unsigned NumResults; + switch (BuiltinID) { + case NVPTX::BI__hmma_m16n16k16_ld_a: + IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride + : Intrinsic::nvvm_wmma_load_a_f16_row_stride; + NumResults = 8; + break; + case NVPTX::BI__hmma_m16n16k16_ld_b: + IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride + : Intrinsic::nvvm_wmma_load_b_f16_row_stride; + NumResults = 8; + break; + case NVPTX::BI__hmma_m16n16k16_ld_c_f16: + IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride + : Intrinsic::nvvm_wmma_load_c_f16_row_stride; + NumResults = 4; + break; + case NVPTX::BI__hmma_m16n16k16_ld_c_f32: + IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride + : Intrinsic::nvvm_wmma_load_c_f32_row_stride; + NumResults = 8; + break; + default: + llvm_unreachable("Unexpected builtin ID."); + } + Value *Result = + Builder.CreateCall(CGM.getIntrinsic(IID), + {Builder.CreatePointerCast(Src, VoidPtrTy), Ldm}); + + // Save returned values. + for (unsigned i = 0; i < NumResults; ++i) { + Builder.CreateAlignedStore( + Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), + Dst.getElementType()), + Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + } + return Result; + } + + case NVPTX::BI__hmma_m16n16k16_st_c_f16: + case NVPTX::BI__hmma_m16n16k16_st_c_f32: { + Value *Dst = EmitScalarExpr(E->getArg(0)); + Address Src = EmitPointerWithAlignment(E->getArg(1)); + Value *Ldm = EmitScalarExpr(E->getArg(2)); + llvm::APSInt isColMajorArg; + if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) + return nullptr; + bool isColMajor = isColMajorArg.getSExtValue(); + unsigned IID; + unsigned NumResults = 8; + // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet + // for some reason nvcc builtins use _c_. + switch (BuiltinID) { + case NVPTX::BI__hmma_m16n16k16_st_c_f16: + IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride + : Intrinsic::nvvm_wmma_store_d_f16_row_stride; + NumResults = 4; + break; + case NVPTX::BI__hmma_m16n16k16_st_c_f32: + IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride + : Intrinsic::nvvm_wmma_store_d_f32_row_stride; + break; + default: + llvm_unreachable("Unexpected builtin ID."); + } + Function *Intrinsic = CGM.getIntrinsic(IID); + llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); + SmallVector<Value *, 10> Values; + Values.push_back(Builder.CreatePointerCast(Dst, VoidPtrTy)); + for (unsigned i = 0; i < NumResults; ++i) { + Value *V = Builder.CreateAlignedLoad( + Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + Values.push_back(Builder.CreateBitCast(V, ParamType)); + } + Values.push_back(Ldm); + Value *Result = Builder.CreateCall(Intrinsic, Values); + return Result; + } + + // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) + // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf> + case NVPTX::BI__hmma_m16n16k16_mma_f16f16: + case NVPTX::BI__hmma_m16n16k16_mma_f32f16: + case NVPTX::BI__hmma_m16n16k16_mma_f32f32: + case NVPTX::BI__hmma_m16n16k16_mma_f16f32: { + Address Dst = EmitPointerWithAlignment(E->getArg(0)); + Address SrcA = EmitPointerWithAlignment(E->getArg(1)); + Address SrcB = EmitPointerWithAlignment(E->getArg(2)); + Address SrcC = EmitPointerWithAlignment(E->getArg(3)); + llvm::APSInt LayoutArg; + if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext())) + return nullptr; + int Layout = LayoutArg.getSExtValue(); + if (Layout < 0 || Layout > 3) + return nullptr; + llvm::APSInt SatfArg; + if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) + return nullptr; + bool Satf = SatfArg.getSExtValue(); + + // clang-format off +#define MMA_VARIANTS(type) {{ \ + Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \ + Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \ + Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \ + Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \ + Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \ + Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \ + }} + // clang-format on + + auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) { + unsigned Index = Layout * 2 + Satf; + assert(Index < 8); + return Variants[Index]; + }; + unsigned IID; + unsigned NumEltsC; + unsigned NumEltsD; + switch (BuiltinID) { + case NVPTX::BI__hmma_m16n16k16_mma_f16f16: + IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16)); + NumEltsC = 4; + NumEltsD = 4; + break; + case NVPTX::BI__hmma_m16n16k16_mma_f32f16: + IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16)); + NumEltsC = 4; + NumEltsD = 8; + break; + case NVPTX::BI__hmma_m16n16k16_mma_f16f32: + IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32)); + NumEltsC = 8; + NumEltsD = 4; + break; + case NVPTX::BI__hmma_m16n16k16_mma_f32f32: + IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32)); + NumEltsC = 8; + NumEltsD = 8; + break; + default: + llvm_unreachable("Unexpected builtin ID."); + } +#undef MMA_VARIANTS + + SmallVector<Value *, 24> Values; + Function *Intrinsic = CGM.getIntrinsic(IID); + llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0); + // Load A + for (unsigned i = 0; i < 8; ++i) { + Value *V = Builder.CreateAlignedLoad( + Builder.CreateGEP(SrcA.getPointer(), + llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + Values.push_back(Builder.CreateBitCast(V, ABType)); + } + // Load B + for (unsigned i = 0; i < 8; ++i) { + Value *V = Builder.CreateAlignedLoad( + Builder.CreateGEP(SrcB.getPointer(), + llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + Values.push_back(Builder.CreateBitCast(V, ABType)); + } + // Load C + llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16); + for (unsigned i = 0; i < NumEltsC; ++i) { + Value *V = Builder.CreateAlignedLoad( + Builder.CreateGEP(SrcC.getPointer(), + llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + Values.push_back(Builder.CreateBitCast(V, CType)); + } + Value *Result = Builder.CreateCall(Intrinsic, Values); + llvm::Type *DType = Dst.getElementType(); + for (unsigned i = 0; i < NumEltsD; ++i) + Builder.CreateAlignedStore( + Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), + Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + return Result; + } default: return nullptr; } @@ -9013,6 +9964,16 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); return Builder.CreateCall(Callee, X); } + case WebAssembly::BI__builtin_wasm_throw: { + Value *Tag = EmitScalarExpr(E->getArg(0)); + Value *Obj = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); + return Builder.CreateCall(Callee, {Tag, Obj}); + } + case WebAssembly::BI__builtin_wasm_rethrow: { + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); + return Builder.CreateCall(Callee); + } default: return nullptr; diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index 813cd74001..d24ef0a8a9 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -265,7 +265,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { "__cudaRegisterFatBinary"); // struct { int magic, int version, void * gpu_binary, void * dont_care }; llvm::StructType *FatbinWrapperTy = - llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy, nullptr); + llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy); llvm::Function *ModuleCtorFunc = llvm::Function::Create( llvm::FunctionType::get(VoidTy, VoidPtrTy, false), diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index 59010f4407..5ef4dc45fb 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -110,16 +110,14 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { return true; return TryEmitDefinitionAsAlias(GlobalDecl(D, Dtor_Base), - GlobalDecl(BaseD, Dtor_Base), - false); + GlobalDecl(BaseD, Dtor_Base)); } /// Try to emit a definition as a global alias for another definition. /// If \p InEveryTU is true, we know that an equivalent alias can be produced /// in every translation unit. bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, - GlobalDecl TargetDecl, - bool InEveryTU) { + GlobalDecl TargetDecl) { if (!getCodeGenOpts().CXXCtorDtorAliases) return true; @@ -134,11 +132,6 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, llvm::GlobalValue::LinkageTypes TargetLinkage = getFunctionLinkage(TargetDecl); - // available_externally definitions aren't real definitions, so we cannot - // create an alias to one. - if (TargetLinkage == llvm::GlobalValue::AvailableExternallyLinkage) - return true; - // Check if we have it already. StringRef MangledName = getMangledName(AliasDecl); llvm::GlobalValue *Entry = GetGlobalValue(MangledName); @@ -161,7 +154,14 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, // Instead of creating as alias to a linkonce_odr, replace all of the uses // of the aliasee. - if (llvm::GlobalValue::isDiscardableIfUnused(Linkage)) { + if (llvm::GlobalValue::isDiscardableIfUnused(Linkage) && + !(TargetLinkage == llvm::GlobalValue::AvailableExternallyLinkage && + TargetDecl.getDecl()->hasAttr<AlwaysInlineAttr>())) { + // FIXME: An extern template instantiation will create functions with + // linkage "AvailableExternally". In libc++, some classes also define + // members with attribute "AlwaysInline" and expect no reference to + // be generated. It is desirable to reenable this optimisation after + // corresponding LLVM changes. addReplacement(MangledName, Aliasee); return false; } @@ -176,13 +176,11 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, return true; } - if (!InEveryTU) { - // If we don't have a definition for the destructor yet, don't - // emit. We can't emit aliases to declarations; that's just not - // how aliases work. - if (Ref->isDeclaration()) - return true; - } + // If we don't have a definition for the destructor yet or the definition is + // avaialable_externally, don't emit an alias. We can't emit aliases to + // declarations; that's just not how aliases work. + if (Ref->isDeclarationForLinker()) + return true; // Don't create an alias to a linker weak symbol. This avoids producing // different COMDATs in different TUs. Another option would be to @@ -256,7 +254,7 @@ llvm::Constant *CodeGenModule::getAddrOfCXXStructor( return GetOrCreateLLVMFunction( getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer, - /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeSet(), IsForDefinition); + /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition); } static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp index df75a7d7ff..033258643d 100644 --- a/lib/CodeGen/CGCXXABI.cpp +++ b/lib/CodeGen/CGCXXABI.cpp @@ -30,38 +30,9 @@ void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) { } bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const { - // If RD has a non-trivial move or copy constructor, we cannot copy the - // argument. - if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialMoveConstructor()) - return false; - - // If RD has a non-trivial destructor, we cannot copy the argument. - if (RD->hasNonTrivialDestructor()) - return false; - // We can only copy the argument if there exists at least one trivial, // non-deleted copy or move constructor. - // FIXME: This assumes that all lazily declared copy and move constructors are - // not deleted. This assumption might not be true in some corner cases. - bool CopyDeleted = false; - bool MoveDeleted = false; - for (const CXXConstructorDecl *CD : RD->ctors()) { - if (CD->isCopyConstructor() || CD->isMoveConstructor()) { - assert(CD->isTrivial()); - // We had at least one undeleted trivial copy or move ctor. Return - // directly. - if (!CD->isDeleted()) - return true; - if (CD->isCopyConstructor()) - CopyDeleted = true; - else - MoveDeleted = true; - } - } - - // If all trivial copy and move constructors are deleted, we cannot copy the - // argument. - return !(CopyDeleted && MoveDeleted); + return RD->canPassInRegisters(); } llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) { @@ -159,10 +130,10 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList ¶ms) { // FIXME: I'm not entirely sure I like using a fake decl just for code // generation. Maybe we can come up with a better way? - ImplicitParamDecl *ThisDecl - = ImplicitParamDecl::Create(CGM.getContext(), nullptr, MD->getLocation(), - &CGM.getContext().Idents.get("this"), - MD->getThisType(CGM.getContext())); + auto *ThisDecl = ImplicitParamDecl::Create( + CGM.getContext(), nullptr, MD->getLocation(), + &CGM.getContext().Idents.get("this"), MD->getThisType(CGM.getContext()), + ImplicitParamDecl::CXXThis); params.push_back(ThisDecl); CGF.CXXABIThisDecl = ThisDecl; diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h index 186abc3c6d..7b912e3aca 100644 --- a/lib/CodeGen/CGCXXABI.h +++ b/lib/CodeGen/CGCXXABI.h @@ -392,7 +392,7 @@ public: isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF, CodeGenFunction::VPtr Vptr) = 0; - /// Checks if ABI requires to initilize vptrs for given dynamic class. + /// Checks if ABI requires to initialize vptrs for given dynamic class. virtual bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) = 0; /// Get the address point of the vtable for the given base subobject. diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 5f069bfe46..971455a873 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -50,7 +50,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_X86FastCall: return llvm::CallingConv::X86_FastCall; case CC_X86RegCall: return llvm::CallingConv::X86_RegCall; case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall; - case CC_X86_64Win64: return llvm::CallingConv::X86_64_Win64; + case CC_Win64: return llvm::CallingConv::Win64; case CC_X86_64SysV: return llvm::CallingConv::X86_64_SysV; case CC_AAPCS: return llvm::CallingConv::ARM_AAPCS; case CC_AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; @@ -129,7 +129,7 @@ static void addExtParameterInfosForCall( paramInfos.resize(totalArgs); } -/// Adds the formal paramaters in FPT to the given prefix. If any parameter in +/// Adds the formal parameters in FPT to the given prefix. If any parameter in /// FPT has pass_object_size attrs, then we'll add parameters for those, too. static void appendParameterTypes(const CodeGenTypes &CGT, SmallVectorImpl<CanQualType> &prefix, @@ -218,7 +218,7 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { return CC_IntelOclBicc; if (D->hasAttr<MSABIAttr>()) - return IsWindows ? CC_C : CC_X86_64Win64; + return IsWindows ? CC_C : CC_Win64; if (D->hasAttr<SysVABIAttr>()) return IsWindows ? CC_X86_64SysV : CC_C; @@ -455,11 +455,15 @@ const CGFunctionInfo & CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, QualType receiverType) { SmallVector<CanQualType, 16> argTys; + SmallVector<FunctionProtoType::ExtParameterInfo, 4> extParamInfos(2); argTys.push_back(Context.getCanonicalParamType(receiverType)); argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType())); // FIXME: Kill copy? for (const auto *I : MD->parameters()) { argTys.push_back(Context.getCanonicalParamType(I->getType())); + auto extParamInfo = FunctionProtoType::ExtParameterInfo().withIsNoEscape( + I->hasAttr<NoEscapeAttr>()); + extParamInfos.push_back(extParamInfo); } FunctionType::ExtInfo einfo; @@ -475,7 +479,7 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, return arrangeLLVMFunctionInfo( GetReturnType(MD->getReturnType()), /*instanceMethod=*/false, - /*chainCall=*/false, argTys, einfo, {}, required); + /*chainCall=*/false, argTys, einfo, extParamInfos, required); } const CGFunctionInfo & @@ -707,6 +711,12 @@ CodeGenTypes::arrangeCall(const CGFunctionInfo &signature, signature.getRequiredArgs()); } +namespace clang { +namespace CodeGen { +void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI); +} +} + /// Arrange the argument and result information for an abstract value /// of a given function type. This is the method which all of the /// above functions ultimately defer to. @@ -719,7 +729,7 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, RequiredArgs required) { assert(std::all_of(argTypes.begin(), argTypes.end(), - std::mem_fun_ref(&CanQualType::isCanonicalAsParam))); + [](CanQualType T) { return T.isCanonicalAsParam(); })); // Lookup or create unique function info. llvm::FoldingSetNodeID ID; @@ -741,12 +751,16 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, bool inserted = FunctionsBeingProcessed.insert(FI).second; (void)inserted; assert(inserted && "Recursively being processed?"); - + // Compute ABI information. - if (info.getCC() != CC_Swift) { - getABIInfo().computeInfo(*FI); - } else { + if (CC == llvm::CallingConv::SPIR_KERNEL) { + // Force target independent argument handling for the host visible + // kernel functions. + computeSPIRKernelABIInfo(CGM, *FI); + } else if (info.getCC() == CC_Swift) { swiftcall::computeABIInfo(CGM, *FI); + } else { + getABIInfo().computeInfo(*FI); } // Loop over all of the computed argument and return value info. If any of @@ -788,6 +802,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, FI->ChainCall = chainCall; FI->NoReturn = info.getNoReturn(); FI->ReturnsRetained = info.getProducesResult(); + FI->NoCallerSavedRegs = info.getNoCallerSavedRegs(); FI->Required = required; FI->HasRegParm = info.getHasRegParm(); FI->RegParm = info.getRegParm(); @@ -1212,7 +1227,8 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // // FIXME: Assert that we aren't truncating non-padding bits when have access // to that information. - Src = CGF.Builder.CreateBitCast(Src, llvm::PointerType::getUnqual(Ty)); + Src = CGF.Builder.CreateBitCast(Src, + Ty->getPointerTo(Src.getAddressSpace())); return CGF.Builder.CreateLoad(Src); } @@ -1286,7 +1302,7 @@ static void CreateCoercedStore(llvm::Value *Src, // If store is legal, just bitcast the src pointer. if (SrcSize <= DstSize) { - Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy)); + Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy); BuildAggStore(CGF, Src, Dst, DstIsVolatile); } else { // Otherwise do coercion through memory. This is stupid, but @@ -1586,9 +1602,10 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { case ABIArgInfo::Indirect: { assert(NumIRArgs == 1); - // indirect arguments are always on the stack, which is addr space #0. + // indirect arguments are always on the stack, which is alloca addr space. llvm::Type *LTy = ConvertTypeForMem(it->type); - ArgTypes[FirstIRArg] = LTy->getPointerTo(); + ArgTypes[FirstIRArg] = LTy->getPointerTo( + CGM.getDataLayout().getAllocaAddrSpace()); break; } @@ -1725,7 +1742,7 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals; if (!Recips.empty()) FuncAttrs.addAttribute("reciprocal-estimates", - llvm::join(Recips.begin(), Recips.end(), ",")); + llvm::join(Recips, ",")); if (CodeGenOpts.StackRealignment) FuncAttrs.addAttribute("stackrealign"); @@ -1733,13 +1750,16 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, FuncAttrs.addAttribute("backchain"); } - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { - // Conservatively, mark all functions and calls in CUDA as convergent - // (meaning, they may call an intrinsically convergent op, such as - // __syncthreads(), and so can't have certain optimizations applied around - // them). LLVM will remove this attribute where it safely can. + if (getLangOpts().assumeFunctionsAreConvergent()) { + // Conservatively, mark all functions and calls in CUDA and OpenCL as + // convergent (meaning, they may call an intrinsically convergent op, such + // as __syncthreads() / barrier(), and so can't have certain optimizations + // applied around them). LLVM will remove this attribute where it safely + // can. FuncAttrs.addAttribute(llvm::Attribute::Convergent); + } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { // Exceptions aren't supported in CUDA device code. FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); @@ -1754,14 +1774,12 @@ void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { ConstructDefaultFnAttrList(F.getName(), F.hasFnAttribute(llvm::Attribute::OptimizeNone), /* AttrOnCallsite = */ false, FuncAttrs); - llvm::AttributeSet AS = llvm::AttributeSet::get( - getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs); - F.addAttributes(llvm::AttributeSet::FunctionIndex, AS); + F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); } void CodeGenModule::ConstructAttributeList( StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo, - AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite) { + llvm::AttributeList &AttrList, unsigned &CallingConv, bool AttrOnCallSite) { llvm::AttrBuilder FuncAttrs; llvm::AttrBuilder RetAttrs; @@ -1785,6 +1803,8 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); if (TargetDecl->hasAttr<NoReturnAttr>()) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); + if (TargetDecl->hasAttr<ColdAttr>()) + FuncAttrs.addAttribute(llvm::Attribute::Cold); if (TargetDecl->hasAttr<NoDuplicateAttr>()) FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate); if (TargetDecl->hasAttr<ConvergentAttr>()) @@ -1815,6 +1835,8 @@ void CodeGenModule::ConstructAttributeList( RetAttrs.addAttribute(llvm::Attribute::NoAlias); if (TargetDecl->hasAttr<ReturnsNonNullAttr>()) RetAttrs.addAttribute(llvm::Attribute::NonNull); + if (TargetDecl->hasAttr<AnyX86NoCallerSavedRegistersAttr>()) + FuncAttrs.addAttribute("no_caller_saved_registers"); HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>(); if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) { @@ -1863,15 +1885,16 @@ void CodeGenModule::ConstructAttributeList( // the function. const auto *TD = FD->getAttr<TargetAttr>(); TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); - if (ParsedAttr.second != "") - TargetCPU = ParsedAttr.second; + if (ParsedAttr.Architecture != "" && + getTarget().isValidCPUName(ParsedAttr.Architecture)) + TargetCPU = ParsedAttr.Architecture; if (TargetCPU != "") - FuncAttrs.addAttribute("target-cpu", TargetCPU); + FuncAttrs.addAttribute("target-cpu", TargetCPU); if (!Features.empty()) { std::sort(Features.begin(), Features.end()); FuncAttrs.addAttribute( "target-features", - llvm::join(Features.begin(), Features.end(), ",")); + llvm::join(Features, ",")); } } else { // Otherwise just add the existing target cpu and target features to the @@ -1883,7 +1906,7 @@ void CodeGenModule::ConstructAttributeList( std::sort(Features.begin(), Features.end()); FuncAttrs.addAttribute( "target-features", - llvm::join(Features.begin(), Features.end(), ",")); + llvm::join(Features, ",")); } } } @@ -1930,13 +1953,8 @@ void CodeGenModule::ConstructAttributeList( RetAttrs.addAttribute(llvm::Attribute::NonNull); } - // Attach return attributes. - if (RetAttrs.hasAttributes()) { - PAL.push_back(llvm::AttributeSet::get( - getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs)); - } - bool hasUsedSRet = false; + SmallVector<llvm::AttributeSet, 4> ArgAttrs(IRFunctionArgs.totalIRArgs()); // Attach attributes to sret. if (IRFunctionArgs.hasSRetArg()) { @@ -1945,16 +1963,16 @@ void CodeGenModule::ConstructAttributeList( hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); - PAL.push_back(llvm::AttributeSet::get( - getLLVMContext(), IRFunctionArgs.getSRetArgNo() + 1, SRETAttrs)); + ArgAttrs[IRFunctionArgs.getSRetArgNo()] = + llvm::AttributeSet::get(getLLVMContext(), SRETAttrs); } // Attach attributes to inalloca argument. if (IRFunctionArgs.hasInallocaArg()) { llvm::AttrBuilder Attrs; Attrs.addAttribute(llvm::Attribute::InAlloca); - PAL.push_back(llvm::AttributeSet::get( - getLLVMContext(), IRFunctionArgs.getInallocaArgNo() + 1, Attrs)); + ArgAttrs[IRFunctionArgs.getInallocaArgNo()] = + llvm::AttributeSet::get(getLLVMContext(), Attrs); } unsigned ArgNo = 0; @@ -1967,10 +1985,12 @@ void CodeGenModule::ConstructAttributeList( // Add attribute for padding argument, if necessary. if (IRFunctionArgs.hasPaddingArg(ArgNo)) { - if (AI.getPaddingInReg()) - PAL.push_back(llvm::AttributeSet::get( - getLLVMContext(), IRFunctionArgs.getPaddingArgNo(ArgNo) + 1, - llvm::Attribute::InReg)); + if (AI.getPaddingInReg()) { + ArgAttrs[IRFunctionArgs.getPaddingArgNo(ArgNo)] = + llvm::AttributeSet::get( + getLLVMContext(), + llvm::AttrBuilder().addAttribute(llvm::Attribute::InReg)); + } } // 'restrict' -> 'noalias' is done in EmitFunctionProlog when we @@ -2081,21 +2101,22 @@ void CodeGenModule::ConstructAttributeList( break; } + if (FI.getExtParameterInfo(ArgNo).isNoEscape()) + Attrs.addAttribute(llvm::Attribute::NoCapture); + if (Attrs.hasAttributes()) { unsigned FirstIRArg, NumIRArgs; std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo); for (unsigned i = 0; i < NumIRArgs; i++) - PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), - FirstIRArg + i + 1, Attrs)); + ArgAttrs[FirstIRArg + i] = + llvm::AttributeSet::get(getLLVMContext(), Attrs); } } assert(ArgNo == FI.arg_size()); - if (FuncAttrs.hasAttributes()) - PAL.push_back(llvm:: - AttributeSet::get(getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - FuncAttrs)); + AttrList = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeSet::get(getLLVMContext(), FuncAttrs), + llvm::AttributeSet::get(getLLVMContext(), RetAttrs), ArgAttrs); } /// An argument came in as a promoted argument; demote it back to its @@ -2206,8 +2227,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (IRFunctionArgs.hasSRetArg()) { auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]); AI->setName("agg.result"); - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1, - llvm::Attribute::NoAlias)); + AI->addAttr(llvm::Attribute::NoAlias); } // Track if we received the parameter as a pointer (indirect, byval, or @@ -2298,9 +2318,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) { if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(), PVD->getFunctionScopeIndex())) - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NonNull)); + AI->addAttr(llvm::Attribute::NonNull); QualType OTy = PVD->getOriginalType(); if (const auto *ArrTy = @@ -2317,12 +2335,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::AttrBuilder Attrs; Attrs.addDereferenceableAttr( getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize); - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, Attrs)); + AI->addAttrs(Attrs); } else if (getContext().getTargetAddressSpace(ETy) == 0) { - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NonNull)); + AI->addAttr(llvm::Attribute::NonNull); } } } else if (const auto *ArrTy = @@ -2332,35 +2347,26 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // we know that it must be nonnull. if (ArrTy->getSizeModifier() == VariableArrayType::Static && !getContext().getTargetAddressSpace(ArrTy->getElementType())) - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NonNull)); + AI->addAttr(llvm::Attribute::NonNull); } const auto *AVAttr = PVD->getAttr<AlignValueAttr>(); if (!AVAttr) if (const auto *TOTy = dyn_cast<TypedefType>(OTy)) AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>(); - if (AVAttr) { + if (AVAttr) { llvm::Value *AlignmentValue = EmitScalarExpr(AVAttr->getAlignment()); llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue); - unsigned Alignment = - std::min((unsigned) AlignmentCI->getZExtValue(), - +llvm::Value::MaximumAlignment); - - llvm::AttrBuilder Attrs; - Attrs.addAlignmentAttr(Alignment); - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, Attrs)); + unsigned Alignment = std::min((unsigned)AlignmentCI->getZExtValue(), + +llvm::Value::MaximumAlignment); + AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr(Alignment)); } } if (Arg->getType().isRestrictQualified()) - AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), - AI->getArgNo() + 1, - llvm::Attribute::NoAlias)); + AI->addAttr(llvm::Attribute::NoAlias); // LLVM expects swifterror parameters to be used in very restricted // ways. Copy the value into a less-restricted temporary. @@ -2418,8 +2424,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, Address AddrToStoreInto = Address::invalid(); if (SrcSize <= DstSize) { - AddrToStoreInto = - Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy)); + AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy); } else { AddrToStoreInto = CreateTempAlloca(STy, Alloca.getAlignment(), "coerce"); @@ -2912,7 +2917,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, llvm::Instruction *Ret; if (RV) { - EmitReturnValueCheck(RV, EndLoc); + EmitReturnValueCheck(RV); Ret = Builder.CreateRet(RV); } else { Ret = Builder.CreateRetVoid(); @@ -2922,8 +2927,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, Ret->setDebugLoc(std::move(RetDbgLoc)); } -void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV, - SourceLocation EndLoc) { +void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV) { // A current decl may not be available when emitting vtable thunks. if (!CurCodeDecl) return; @@ -2938,44 +2942,48 @@ void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV, // Prefer the returns_nonnull attribute if it's present. SourceLocation AttrLoc; SanitizerMask CheckKind; + SanitizerHandler Handler; if (RetNNAttr) { assert(!requiresReturnValueNullabilityCheck() && "Cannot check nullability and the nonnull attribute"); AttrLoc = RetNNAttr->getLocation(); CheckKind = SanitizerKind::ReturnsNonnullAttribute; + Handler = SanitizerHandler::NonnullReturn; } else { - // FIXME: The runtime shouldn't refer to the 'returns_nonnull' attribute. if (auto *DD = dyn_cast<DeclaratorDecl>(CurCodeDecl)) if (auto *TSI = DD->getTypeSourceInfo()) if (auto FTL = TSI->getTypeLoc().castAs<FunctionTypeLoc>()) AttrLoc = FTL.getReturnLoc().findNullabilityLoc(); CheckKind = SanitizerKind::NullabilityReturn; + Handler = SanitizerHandler::NullabilityReturn; } SanitizerScope SanScope(this); - llvm::BasicBlock *Check = nullptr; - llvm::BasicBlock *NoCheck = nullptr; - if (requiresReturnValueNullabilityCheck()) { - // Before doing the nullability check, make sure that the preconditions for - // the check are met. - Check = createBasicBlock("nullcheck"); - NoCheck = createBasicBlock("no.nullcheck"); - Builder.CreateCondBr(RetValNullabilityPrecondition, Check, NoCheck); - EmitBlock(Check); - } + // Make sure the "return" source location is valid. If we're checking a + // nullability annotation, make sure the preconditions for the check are met. + llvm::BasicBlock *Check = createBasicBlock("nullcheck"); + llvm::BasicBlock *NoCheck = createBasicBlock("no.nullcheck"); + llvm::Value *SLocPtr = Builder.CreateLoad(ReturnLocation, "return.sloc.load"); + llvm::Value *CanNullCheck = Builder.CreateIsNotNull(SLocPtr); + if (requiresReturnValueNullabilityCheck()) + CanNullCheck = + Builder.CreateAnd(CanNullCheck, RetValNullabilityPrecondition); + Builder.CreateCondBr(CanNullCheck, Check, NoCheck); + EmitBlock(Check); - // Now do the null check. If the returns_nonnull attribute is present, this - // is done unconditionally. + // Now do the null check. llvm::Value *Cond = Builder.CreateIsNotNull(RV); - llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(EndLoc), EmitCheckSourceLocation(AttrLoc), - }; - EmitCheck(std::make_pair(Cond, CheckKind), SanitizerHandler::NonnullReturn, - StaticData, None); + llvm::Constant *StaticData[] = {EmitCheckSourceLocation(AttrLoc)}; + llvm::Value *DynamicData[] = {SLocPtr}; + EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, DynamicData); - if (requiresReturnValueNullabilityCheck()) - EmitBlock(NoCheck); + EmitBlock(NoCheck); + +#ifndef NDEBUG + // The return location should not be used after the check has been emitted. + ReturnLocation = Address::invalid(); +#endif } static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) { @@ -3058,7 +3066,8 @@ static void emitWriteback(CodeGenFunction &CGF, // If the argument wasn't provably non-null, we need to null check // before doing the store. - bool provablyNonNull = llvm::isKnownNonNull(srcAddr.getPointer()); + bool provablyNonNull = llvm::isKnownNonZero(srcAddr.getPointer(), + CGF.CGM.getDataLayout()); if (!provablyNonNull) { llvm::BasicBlock *writebackBB = CGF.createBasicBlock("icr.writeback"); contBB = CGF.createBasicBlock("icr.done"); @@ -3198,7 +3207,8 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args, // If the address is *not* known to be non-null, we need to switch. llvm::Value *finalArgument; - bool provablyNonNull = llvm::isKnownNonNull(srcAddr.getPointer()); + bool provablyNonNull = llvm::isKnownNonZero(srcAddr.getPointer(), + CGF.CGM.getDataLayout()); if (provablyNonNull) { finalArgument = temp.getPointer(); } else { @@ -3314,12 +3324,15 @@ void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation AttrLoc; SanitizerMask CheckKind; + SanitizerHandler Handler; if (NNAttr) { AttrLoc = NNAttr->getLocation(); CheckKind = SanitizerKind::NonnullAttribute; + Handler = SanitizerHandler::NonnullArg; } else { AttrLoc = PVD->getTypeSourceInfo()->getTypeLoc().findNullabilityLoc(); CheckKind = SanitizerKind::NullabilityArg; + Handler = SanitizerHandler::NullabilityArg; } SanitizerScope SanScope(this); @@ -3331,8 +3344,7 @@ void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType, EmitCheckSourceLocation(ArgLoc), EmitCheckSourceLocation(AttrLoc), llvm::ConstantInt::get(Int32Ty, ArgNo + 1), }; - EmitCheck(std::make_pair(Cond, CheckKind), SanitizerHandler::NonnullArg, - StaticData, None); + EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None); } void CodeGenFunction::EmitCallArgs( @@ -3390,6 +3402,14 @@ void CodeGenFunction::EmitCallArgs( unsigned Idx = LeftToRight ? I : E - I - 1; CallExpr::const_arg_iterator Arg = ArgRange.begin() + Idx; unsigned InitialArgSize = Args.size(); + // If *Arg is an ObjCIndirectCopyRestoreExpr, check that either the types of + // the argument and parameter match or the objc method is parameterized. + assert((!isa<ObjCIndirectCopyRestoreExpr>(*Arg) || + getContext().hasSameUnqualifiedType((*Arg)->getType(), + ArgTypes[Idx]) || + (isa<ObjCMethodDecl>(AC.getDecl()) && + isObjCMethodWithTypeParams(cast<ObjCMethodDecl>(AC.getDecl())))) && + "Argument and parameter types don't match"); EmitCallArg(Args, *Arg, ArgTypes[Idx]); // In particular, we depend on it being the last arg in Args, and the // objectsize bits depend on there only being one arg if !LeftToRight. @@ -3450,7 +3470,6 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, if (const ObjCIndirectCopyRestoreExpr *CRE = dyn_cast<ObjCIndirectCopyRestoreExpr>(E)) { assert(getLangOpts().ObjCAutoRefCount); - assert(getContext().hasSameUnqualifiedType(E->getType(), type)); return emitWritebackArg(*this, args, CRE); } @@ -3710,12 +3729,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Address ArgMemory = Address::invalid(); const llvm::StructLayout *ArgMemoryLayout = nullptr; if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) { - ArgMemoryLayout = CGM.getDataLayout().getStructLayout(ArgStruct); + const llvm::DataLayout &DL = CGM.getDataLayout(); + ArgMemoryLayout = DL.getStructLayout(ArgStruct); llvm::Instruction *IP = CallArgs.getStackBase(); llvm::AllocaInst *AI; if (IP) { IP = IP->getNextNode(); - AI = new llvm::AllocaInst(ArgStruct, "argmem", IP); + AI = new llvm::AllocaInst(ArgStruct, DL.getAllocaAddrSpace(), + "argmem", IP); } else { AI = CreateTempAlloca(ArgStruct, "argmem"); } @@ -3814,7 +3835,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, assert(NumIRArgs == 1); if (RV.isScalar() || RV.isComplex()) { // Make a temporary alloca to pass the argument. - Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign()); + Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "indirect-arg-temp", false); IRCallArgs[FirstIRArg] = Addr.getPointer(); LValue argLV = MakeAddrLValue(Addr, I->Ty); @@ -3843,7 +3865,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, < Align.getQuantity()) || (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) { // Create an aligned temporary, and copy to it. - Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign()); + Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "byval-temp", false); IRCallArgs[FirstIRArg] = AI.getPointer(); EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified()); } else { @@ -3937,7 +3960,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Builder.CreateMemCpy(TempAlloca, Src, SrcSize); Src = TempAlloca; } else { - Src = Builder.CreateBitCast(Src, llvm::PointerType::getUnqual(STy)); + Src = Builder.CreateBitCast(Src, + STy->getPointerTo(Src.getAddressSpace())); } auto SrcLayout = CGM.getDataLayout().getStructLayout(STy); @@ -4111,13 +4135,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Compute the calling convention and attributes. unsigned CallingConv; - CodeGen::AttributeListType AttributeList; + llvm::AttributeList Attrs; CGM.ConstructAttributeList(CalleePtr->getName(), CallInfo, - Callee.getAbstractInfo(), - AttributeList, CallingConv, + Callee.getAbstractInfo(), Attrs, CallingConv, /*AttrOnCallSite=*/true); - llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(), - AttributeList); // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. @@ -4128,15 +4149,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, !(Callee.getAbstractInfo().getCalleeDecl() && Callee.getAbstractInfo().getCalleeDecl()->hasAttr<NoInlineAttr>())) { Attrs = - Attrs.addAttribute(getLLVMContext(), - llvm::AttributeSet::FunctionIndex, + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline); } // Disable inlining inside SEH __try blocks. if (isSEHTryScope()) { Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex, + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoInline); } @@ -4153,7 +4173,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CannotThrow = true; } else { // Otherwise, nounwind call sites will never throw. - CannotThrow = Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex, + CannotThrow = Attrs.hasAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoUnwind); } llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); @@ -4266,6 +4286,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Builder.CreateStore(elt, eltAddr); } // FALLTHROUGH + LLVM_FALLTHROUGH; } case ABIArgInfo::InAlloca: @@ -4349,6 +4370,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment); EmitAlignmentAssumption(Ret.getScalarVal(), AlignmentCI->getZExtValue(), OffsetValue); + } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) { + llvm::Value *ParamVal = + CallArgs[AA->getParamIndex() - 1].RV.getScalarVal(); + EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal); } } diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h index 031ce831cb..7e10407fc3 100644 --- a/lib/CodeGen/CGCall.h +++ b/lib/CodeGen/CGCall.h @@ -25,10 +25,10 @@ #include "ABIInfo.h" namespace llvm { - class AttributeSet; - class Function; - class Type; - class Value; +class AttributeList; +class Function; +class Type; +class Value; } namespace clang { @@ -39,28 +39,27 @@ namespace clang { class VarDecl; namespace CodeGen { - typedef SmallVector<llvm::AttributeSet, 8> AttributeListType; - /// Abstract information about a function or function prototype. - class CGCalleeInfo { - /// \brief The function prototype of the callee. - const FunctionProtoType *CalleeProtoTy; - /// \brief The function declaration of the callee. - const Decl *CalleeDecl; - - public: - explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {} - CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl) - : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {} - CGCalleeInfo(const FunctionProtoType *calleeProtoTy) - : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {} - CGCalleeInfo(const Decl *calleeDecl) - : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {} - - const FunctionProtoType *getCalleeFunctionProtoType() const { - return CalleeProtoTy; - } - const Decl *getCalleeDecl() const { return CalleeDecl; } +/// Abstract information about a function or function prototype. +class CGCalleeInfo { + /// \brief The function prototype of the callee. + const FunctionProtoType *CalleeProtoTy; + /// \brief The function declaration of the callee. + const Decl *CalleeDecl; + +public: + explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {} + CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl) + : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {} + CGCalleeInfo(const FunctionProtoType *calleeProtoTy) + : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {} + CGCalleeInfo(const Decl *calleeDecl) + : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {} + + const FunctionProtoType *getCalleeFunctionProtoType() const { + return CalleeProtoTy; + } + const Decl *getCalleeDecl() const { return CalleeDecl; } }; /// All available information about a concrete callee. diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index 7ba03a0d42..1cd59d1442 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -129,14 +129,16 @@ Address CodeGenFunction::EmitCXXMemberDataPointerAddress(const Expr *E, Address base, llvm::Value *memberPtr, const MemberPointerType *memberPtrType, - AlignmentSource *alignSource) { + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { // Ask the ABI to compute the actual address. llvm::Value *ptr = CGM.getCXXABI().EmitMemberDataPointerAddress(*this, E, base, memberPtr, memberPtrType); QualType memberType = memberPtrType->getPointeeType(); - CharUnits memberAlign = getNaturalTypeAlignment(memberType, alignSource); + CharUnits memberAlign = getNaturalTypeAlignment(memberType, BaseInfo, + TBAAInfo); memberAlign = CGM.getDynamicOffsetAlignment(base.getAlignment(), memberPtrType->getClass()->getAsCXXRecordDecl(), @@ -1413,10 +1415,11 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { // possible to delegate the destructor body to the complete // destructor. Do so. if (DtorType == Dtor_Deleting) { + RunCleanupsScope DtorEpilogue(*this); EnterDtorCleanups(Dtor, Dtor_Deleting); - EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, LoadCXXThisAddress()); - PopCleanupBlock(); + if (HaveInsertPoint()) + EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, + /*Delegating=*/false, LoadCXXThisAddress()); return; } @@ -1512,6 +1515,13 @@ void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) } namespace { + llvm::Value *LoadThisForDtorDelete(CodeGenFunction &CGF, + const CXXDestructorDecl *DD) { + if (Expr *ThisArg = DD->getOperatorDeleteThisArg()) + return CGF.EmitScalarExpr(ThisArg); + return CGF.LoadCXXThis(); + } + /// Call the operator delete associated with the current destructor. struct CallDtorDelete final : EHScopeStack::Cleanup { CallDtorDelete() {} @@ -1519,11 +1529,38 @@ namespace { void Emit(CodeGenFunction &CGF, Flags flags) override { const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CGF.CurCodeDecl); const CXXRecordDecl *ClassDecl = Dtor->getParent(); - CGF.EmitDeleteCall(Dtor->getOperatorDelete(), CGF.LoadCXXThis(), + CGF.EmitDeleteCall(Dtor->getOperatorDelete(), + LoadThisForDtorDelete(CGF, Dtor), CGF.getContext().getTagDeclType(ClassDecl)); } }; + void EmitConditionalDtorDeleteCall(CodeGenFunction &CGF, + llvm::Value *ShouldDeleteCondition, + bool ReturnAfterDelete) { + llvm::BasicBlock *callDeleteBB = CGF.createBasicBlock("dtor.call_delete"); + llvm::BasicBlock *continueBB = CGF.createBasicBlock("dtor.continue"); + llvm::Value *ShouldCallDelete + = CGF.Builder.CreateIsNull(ShouldDeleteCondition); + CGF.Builder.CreateCondBr(ShouldCallDelete, continueBB, callDeleteBB); + + CGF.EmitBlock(callDeleteBB); + const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CGF.CurCodeDecl); + const CXXRecordDecl *ClassDecl = Dtor->getParent(); + CGF.EmitDeleteCall(Dtor->getOperatorDelete(), + LoadThisForDtorDelete(CGF, Dtor), + CGF.getContext().getTagDeclType(ClassDecl)); + assert(Dtor->getOperatorDelete()->isDestroyingOperatorDelete() == + ReturnAfterDelete && + "unexpected value for ReturnAfterDelete"); + if (ReturnAfterDelete) + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + else + CGF.Builder.CreateBr(continueBB); + + CGF.EmitBlock(continueBB); + } + struct CallDtorDeleteConditional final : EHScopeStack::Cleanup { llvm::Value *ShouldDeleteCondition; @@ -1534,20 +1571,8 @@ namespace { } void Emit(CodeGenFunction &CGF, Flags flags) override { - llvm::BasicBlock *callDeleteBB = CGF.createBasicBlock("dtor.call_delete"); - llvm::BasicBlock *continueBB = CGF.createBasicBlock("dtor.continue"); - llvm::Value *ShouldCallDelete - = CGF.Builder.CreateIsNull(ShouldDeleteCondition); - CGF.Builder.CreateCondBr(ShouldCallDelete, continueBB, callDeleteBB); - - CGF.EmitBlock(callDeleteBB); - const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CGF.CurCodeDecl); - const CXXRecordDecl *ClassDecl = Dtor->getParent(); - CGF.EmitDeleteCall(Dtor->getOperatorDelete(), CGF.LoadCXXThis(), - CGF.getContext().getTagDeclType(ClassDecl)); - CGF.Builder.CreateBr(continueBB); - - CGF.EmitBlock(continueBB); + EmitConditionalDtorDeleteCall(CGF, ShouldDeleteCondition, + /*ReturnAfterDelete*/false); } }; @@ -1577,6 +1602,7 @@ namespace { static void EmitSanitizerDtorCallback(CodeGenFunction &CGF, llvm::Value *Ptr, CharUnits::QuantityType PoisonSize) { + CodeGenFunction::SanitizerScope SanScope(&CGF); // Pass in void pointer and size of region as arguments to runtime // function llvm::Value *Args[] = {CGF.Builder.CreateBitCast(Ptr, CGF.VoidPtrTy), @@ -1705,6 +1731,9 @@ namespace { /// \brief Emit all code that comes at the end of class's /// destructor. This is to call destructors on members and base classes /// in reverse order of their construction. +/// +/// For a deleting destructor, this also handles the case where a destroying +/// operator delete completely overrides the definition. void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD, CXXDtorType DtorType) { assert((!DD->isTrivial() || DD->hasAttr<DLLExportAttr>()) && @@ -1717,11 +1746,23 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD, "operator delete missing - EnterDtorCleanups"); if (CXXStructorImplicitParamValue) { // If there is an implicit param to the deleting dtor, it's a boolean - // telling whether we should call delete at the end of the dtor. - EHStack.pushCleanup<CallDtorDeleteConditional>( - NormalAndEHCleanup, CXXStructorImplicitParamValue); + // telling whether this is a deleting destructor. + if (DD->getOperatorDelete()->isDestroyingOperatorDelete()) + EmitConditionalDtorDeleteCall(*this, CXXStructorImplicitParamValue, + /*ReturnAfterDelete*/true); + else + EHStack.pushCleanup<CallDtorDeleteConditional>( + NormalAndEHCleanup, CXXStructorImplicitParamValue); } else { - EHStack.pushCleanup<CallDtorDelete>(NormalAndEHCleanup); + if (DD->getOperatorDelete()->isDestroyingOperatorDelete()) { + const CXXRecordDecl *ClassDecl = DD->getParent(); + EmitDeleteCall(DD->getOperatorDelete(), + LoadThisForDtorDelete(*this, DD), + getContext().getTagDeclType(ClassDecl)); + EmitBranchThroughCleanup(ReturnBlock); + } else { + EHStack.pushCleanup<CallDtorDelete>(NormalAndEHCleanup); + } } return; } @@ -2382,7 +2423,7 @@ void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) { VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy); llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField); - CGM.DecorateInstructionWithTBAA(Store, CGM.getTBAAInfoForVTablePtr()); + CGM.DecorateInstructionWithTBAA(Store, CGM.getTBAAVTablePtrAccessInfo()); if (CGM.getCodeGenOpts().OptimizationLevel > 0 && CGM.getCodeGenOpts().StrictVTablePointers) CGM.DecorateInstructionWithInvariantGroup(Store, Vptr.VTableClass); @@ -2476,7 +2517,7 @@ llvm::Value *CodeGenFunction::GetVTablePtr(Address This, const CXXRecordDecl *RD) { Address VTablePtrSrc = Builder.CreateElementBitCast(This, VTableTy); llvm::Instruction *VTable = Builder.CreateLoad(VTablePtrSrc, "vtable"); - CGM.DecorateInstructionWithTBAA(VTable, CGM.getTBAAInfoForVTablePtr()); + CGM.DecorateInstructionWithTBAA(VTable, CGM.getTBAAVTablePtrAccessInfo()); if (CGM.getCodeGenOpts().OptimizationLevel > 0 && CGM.getCodeGenOpts().StrictVTablePointers) @@ -2523,8 +2564,10 @@ LeastDerivedClassWithSameLayout(const CXXRecordDecl *RD) { void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, llvm::Value *VTable, SourceLocation Loc) { - if (CGM.getCodeGenOpts().WholeProgramVTables && - CGM.HasHiddenLTOVisibility(RD)) { + if (SanOpts.has(SanitizerKind::CFIVCall)) + EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); + else if (CGM.getCodeGenOpts().WholeProgramVTables && + CGM.HasHiddenLTOVisibility(RD)) { llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); llvm::Value *TypeId = @@ -2536,9 +2579,6 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, {CastedVTable, TypeId}); Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::assume), TypeTest); } - - if (SanOpts.has(SanitizerKind::CFIVCall)) - EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); } void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXRecordDecl *RD, @@ -2604,28 +2644,34 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, !CGM.HasHiddenLTOVisibility(RD)) return; - std::string TypeName = RD->getQualifiedNameAsString(); - if (getContext().getSanitizerBlacklist().isBlacklistedType(TypeName)) - return; - - SanitizerScope SanScope(this); + SanitizerMask M; llvm::SanitizerStatKind SSK; switch (TCK) { case CFITCK_VCall: + M = SanitizerKind::CFIVCall; SSK = llvm::SanStat_CFI_VCall; break; case CFITCK_NVCall: + M = SanitizerKind::CFINVCall; SSK = llvm::SanStat_CFI_NVCall; break; case CFITCK_DerivedCast: + M = SanitizerKind::CFIDerivedCast; SSK = llvm::SanStat_CFI_DerivedCast; break; case CFITCK_UnrelatedCast: + M = SanitizerKind::CFIUnrelatedCast; SSK = llvm::SanStat_CFI_UnrelatedCast; break; case CFITCK_ICall: llvm_unreachable("not expecting CFITCK_ICall"); } + + std::string TypeName = RD->getQualifiedNameAsString(); + if (getContext().getSanitizerBlacklist().isBlacklistedType(M, TypeName)) + return; + + SanitizerScope SanScope(this); EmitSanitizerStatReport(SSK); llvm::Metadata *MD = @@ -2636,24 +2682,6 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *TypeTest = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, TypeId}); - SanitizerMask M; - switch (TCK) { - case CFITCK_VCall: - M = SanitizerKind::CFIVCall; - break; - case CFITCK_NVCall: - M = SanitizerKind::CFINVCall; - break; - case CFITCK_DerivedCast: - M = SanitizerKind::CFIDerivedCast; - break; - case CFITCK_UnrelatedCast: - M = SanitizerKind::CFIUnrelatedCast; - break; - case CFITCK_ICall: - llvm_unreachable("not expecting CFITCK_ICall"); - } - llvm::Constant *StaticData[] = { llvm::ConstantInt::get(Int8Ty, TCK), EmitCheckSourceLocation(Loc), @@ -2688,7 +2716,8 @@ bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { return false; std::string TypeName = RD->getQualifiedNameAsString(); - return !getContext().getSanitizerBlacklist().isBlacklistedType(TypeName); + return !getContext().getSanitizerBlacklist().isBlacklistedType( + SanitizerKind::CFIVCall, TypeName); } llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( @@ -2716,79 +2745,6 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( cast<llvm::PointerType>(VTable->getType())->getElementType()); } -bool -CodeGenFunction::CanDevirtualizeMemberFunctionCall(const Expr *Base, - const CXXMethodDecl *MD) { - // When building with -fapple-kext, all calls must go through the vtable since - // the kernel linker can do runtime patching of vtables. - if (getLangOpts().AppleKext) - return false; - - // If the member function is marked 'final', we know that it can't be - // overridden and can therefore devirtualize it unless it's pure virtual. - if (MD->hasAttr<FinalAttr>()) - return !MD->isPure(); - - // If the base expression (after skipping derived-to-base conversions) is a - // class prvalue, then we can devirtualize. - Base = Base->getBestDynamicClassTypeExpr(); - if (Base->isRValue() && Base->getType()->isRecordType()) - return true; - - // If we don't even know what we would call, we can't devirtualize. - const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType(); - if (!BestDynamicDecl) - return false; - - // There may be a method corresponding to MD in a derived class. - const CXXMethodDecl *DevirtualizedMethod = - MD->getCorrespondingMethodInClass(BestDynamicDecl); - - // If that method is pure virtual, we can't devirtualize. If this code is - // reached, the result would be UB, not a direct call to the derived class - // function, and we can't assume the derived class function is defined. - if (DevirtualizedMethod->isPure()) - return false; - - // If that method is marked final, we can devirtualize it. - if (DevirtualizedMethod->hasAttr<FinalAttr>()) - return true; - - // Similarly, if the class itself is marked 'final' it can't be overridden - // and we can therefore devirtualize the member function call. - if (BestDynamicDecl->hasAttr<FinalAttr>()) - return true; - - if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Base)) { - if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) { - // This is a record decl. We know the type and can devirtualize it. - return VD->getType()->isRecordType(); - } - - return false; - } - - // We can devirtualize calls on an object accessed by a class member access - // expression, since by C++11 [basic.life]p6 we know that it can't refer to - // a derived class object constructed in the same location. - if (const MemberExpr *ME = dyn_cast<MemberExpr>(Base)) - if (const ValueDecl *VD = dyn_cast<ValueDecl>(ME->getMemberDecl())) - return VD->getType()->isRecordType(); - - // Likewise for calls on an object accessed by a (non-reference) pointer to - // member access. - if (auto *BO = dyn_cast<BinaryOperator>(Base)) { - if (BO->isPtrMemOp()) { - auto *MPT = BO->getRHS()->getType()->castAs<MemberPointerType>(); - if (MPT->getPointeeType()->isRecordType()) - return true; - } - } - - // We can't devirtualize the call. - return false; -} - void CodeGenFunction::EmitForwardingCallToLambda( const CXXMethodDecl *callOperator, CallArgList &callArgs) { @@ -2828,6 +2784,15 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { const BlockDecl *BD = BlockInfo->getBlockDecl(); const VarDecl *variable = BD->capture_begin()->getVariable(); const CXXRecordDecl *Lambda = variable->getType()->getAsCXXRecordDecl(); + const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator(); + + if (CallOp->isVariadic()) { + // FIXME: Making this work correctly is nasty because it requires either + // cloning the body of the call operator or making the call operator + // forward. + CGM.ErrorUnsupported(CurCodeDecl, "lambda conversion to variadic function"); + return; + } // Start building arguments for forwarding call CallArgList CallArgs; @@ -2842,18 +2807,7 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { assert(!Lambda->isGenericLambda() && "generic lambda interconversion to block not implemented"); - EmitForwardingCallToLambda(Lambda->getLambdaCallOperator(), CallArgs); -} - -void CodeGenFunction::EmitLambdaToBlockPointerBody(FunctionArgList &Args) { - if (cast<CXXMethodDecl>(CurCodeDecl)->isVariadic()) { - // FIXME: Making this work correctly is nasty because it requires either - // cloning the body of the call operator or making the call operator forward. - CGM.ErrorUnsupported(CurCodeDecl, "lambda conversion to variadic function"); - return; - } - - EmitFunctionBody(Args, cast<FunctionDecl>(CurGD.getDecl())->getBody()); + EmitForwardingCallToLambda(CallOp, CallArgs); } void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { @@ -2886,7 +2840,7 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { EmitForwardingCallToLambda(CallOp, CallArgs); } -void CodeGenFunction::EmitLambdaStaticInvokeFunction(const CXXMethodDecl *MD) { +void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) { if (MD->isVariadic()) { // FIXME: Making this work correctly is nasty because it requires either // cloning the body of the call operator or making the call operator forward. diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index df6741da4a..22055b2cb9 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -51,8 +51,7 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) { if (rv.isComplex()) { CodeGenFunction::ComplexPairTy V = rv.getComplexVal(); llvm::Type *ComplexTy = - llvm::StructType::get(V.first->getType(), V.second->getType(), - (void*) nullptr); + llvm::StructType::get(V.first->getType(), V.second->getType()); Address addr = CGF.CreateDefaultAlignTempAlloca(ComplexTy, "saved-complex"); CGF.Builder.CreateStore(V.first, CGF.Builder.CreateStructGEP(addr, 0, CharUnits())); @@ -449,6 +448,13 @@ void CodeGenFunction::PopCleanupBlocks( auto *Inst = dyn_cast_or_null<llvm::Instruction>(*ReloadedValue); if (!Inst) continue; + + // Don't spill static allocas, they dominate all cleanups. These are created + // by binding a reference to a local variable or temporary. + auto *AI = dyn_cast<llvm::AllocaInst>(Inst); + if (AI && AI->isStaticAlloca()) + continue; + Address Tmp = CreateDefaultAlignTempAlloca(Inst->getType(), "tmp.exprcleanup"); @@ -609,7 +615,7 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF, llvm::SwitchInst *si = cast<llvm::SwitchInst>(use.getUser()); if (si->getNumCases() == 1 && si->getDefaultDest() == unreachableBB) { // Replace the switch with a branch. - llvm::BranchInst::Create(si->case_begin().getCaseSuccessor(), si); + llvm::BranchInst::Create(si->case_begin()->getCaseSuccessor(), si); // The switch operand is a load from the cleanup-dest alloca. llvm::LoadInst *condition = cast<llvm::LoadInst>(si->getCondition()); @@ -1090,7 +1096,7 @@ void CodeGenFunction::EmitBranchThroughCleanup(JumpDest Dest) { break; } - // Otherwise, tell the scope that there's a jump propoagating + // Otherwise, tell the scope that there's a jump propagating // through it. If this isn't new information, all the rest of // the work has been done before. if (!Scope.addBranchThrough(Dest.getBlock())) diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp index 77497f3275..5842e7b3ff 100644 --- a/lib/CodeGen/CGCoroutine.cpp +++ b/lib/CodeGen/CGCoroutine.cpp @@ -11,36 +11,70 @@ // //===----------------------------------------------------------------------===// +#include "CGCleanup.h" #include "CodeGenFunction.h" +#include "llvm/ADT/ScopeExit.h" #include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtVisitor.h" using namespace clang; using namespace CodeGen; -namespace clang { -namespace CodeGen { +using llvm::Value; +using llvm::BasicBlock; -struct CGCoroData { +namespace { +enum class AwaitKind { Init, Normal, Yield, Final }; +static constexpr llvm::StringLiteral AwaitKindStr[] = {"init", "await", "yield", + "final"}; +} - // Stores the jump destination just before the final suspend. Coreturn - // statements jumps to this point after calling return_xxx promise member. - CodeGenFunction::JumpDest FinalJD; +struct clang::CodeGen::CGCoroData { + // What is the current await expression kind and how many + // await/yield expressions were encountered so far. + // These are used to generate pretty labels for await expressions in LLVM IR. + AwaitKind CurrentAwaitKind = AwaitKind::Init; + unsigned AwaitNum = 0; + unsigned YieldNum = 0; + // How many co_return statements are in the coroutine. Used to decide whether + // we need to add co_return; equivalent at the end of the user authored body. unsigned CoreturnCount = 0; + // A branch to this block is emitted when coroutine needs to suspend. + llvm::BasicBlock *SuspendBB = nullptr; + + // Stores the jump destination just before the coroutine memory is freed. + // This is the destination that every suspend point jumps to for the cleanup + // branch. + CodeGenFunction::JumpDest CleanupJD; + + // Stores the jump destination just before the final suspend. The co_return + // statements jumps to this point after calling return_xxx promise member. + CodeGenFunction::JumpDest FinalJD; + // Stores the llvm.coro.id emitted in the function so that we can supply it // as the first argument to coro.begin, coro.alloc and coro.free intrinsics. // Note: llvm.coro.id returns a token that cannot be directly expressed in a // builtin. llvm::CallInst *CoroId = nullptr; + + // Stores the llvm.coro.begin emitted in the function so that we can replace + // all coro.frame intrinsics with direct SSA value of coro.begin that returns + // the address of the coroutine frame of the current coroutine. + llvm::CallInst *CoroBegin = nullptr; + + // Stores the last emitted coro.free for the deallocate expressions, we use it + // to wrap dealloc code with if(auto mem = coro.free) dealloc(mem). + llvm::CallInst *LastCoroFree = nullptr; + // If coro.id came from the builtin, remember the expression to give better // diagnostic. If CoroIdExpr is nullptr, the coro.id was created by // EmitCoroutineBody. CallExpr const *CoroIdExpr = nullptr; }; -} -} +// Defining these here allows to keep CGCoroData private to this file. clang::CodeGen::CodeGenFunction::CGCoroInfo::CGCoroInfo() {} CodeGenFunction::CGCoroInfo::~CGCoroInfo() {} @@ -66,46 +100,533 @@ static void createCoroData(CodeGenFunction &CGF, CurCoro.Data->CoroIdExpr = CoroIdExpr; } +// Synthesize a pretty name for a suspend point. +static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) { + unsigned No = 0; + switch (Kind) { + case AwaitKind::Init: + case AwaitKind::Final: + break; + case AwaitKind::Normal: + No = ++Coro.AwaitNum; + break; + case AwaitKind::Yield: + No = ++Coro.YieldNum; + break; + } + SmallString<32> Prefix(AwaitKindStr[static_cast<unsigned>(Kind)]); + if (No > 1) { + Twine(No).toVector(Prefix); + } + return Prefix; +} + +// Emit suspend expression which roughly looks like: +// +// auto && x = CommonExpr(); +// if (!x.await_ready()) { +// llvm_coro_save(); +// x.await_suspend(...); (*) +// llvm_coro_suspend(); (**) +// } +// x.await_resume(); +// +// where the result of the entire expression is the result of x.await_resume() +// +// (*) If x.await_suspend return type is bool, it allows to veto a suspend: +// if (x.await_suspend(...)) +// llvm_coro_suspend(); +// +// (**) llvm_coro_suspend() encodes three possible continuations as +// a switch instruction: +// +// %where-to = call i8 @llvm.coro.suspend(...) +// switch i8 %where-to, label %coro.ret [ ; jump to epilogue to suspend +// i8 0, label %yield.ready ; go here when resumed +// i8 1, label %yield.cleanup ; go here when destroyed +// ] +// +// See llvm's docs/Coroutines.rst for more details. +// +namespace { + struct LValueOrRValue { + LValue LV; + RValue RV; + }; +} +static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro, + CoroutineSuspendExpr const &S, + AwaitKind Kind, AggValueSlot aggSlot, + bool ignoreResult, bool forLValue) { + auto *E = S.getCommonExpr(); + + auto Binder = + CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E); + auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); }); + + auto Prefix = buildSuspendPrefixStr(Coro, Kind); + BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready")); + BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend")); + BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup")); + + // If expression is ready, no need to suspend. + CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0); + + // Otherwise, emit suspend logic. + CGF.EmitBlock(SuspendBlock); + + auto &Builder = CGF.Builder; + llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save); + auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy); + auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); + + auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); + if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1)) { + // Veto suspension if requested by bool returning await_suspend. + BasicBlock *RealSuspendBlock = + CGF.createBasicBlock(Prefix + Twine(".suspend.bool")); + CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock); + SuspendBlock = RealSuspendBlock; + CGF.EmitBlock(RealSuspendBlock); + } + + // Emit the suspend point. + const bool IsFinalSuspend = (Kind == AwaitKind::Final); + llvm::Function *CoroSuspend = + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend); + auto *SuspendResult = Builder.CreateCall( + CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)}); + + // Create a switch capturing three possible continuations. + auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2); + Switch->addCase(Builder.getInt8(0), ReadyBlock); + Switch->addCase(Builder.getInt8(1), CleanupBlock); + + // Emit cleanup for this suspend point. + CGF.EmitBlock(CleanupBlock); + CGF.EmitBranchThroughCleanup(Coro.CleanupJD); + + // Emit await_resume expression. + CGF.EmitBlock(ReadyBlock); + LValueOrRValue Res; + if (forLValue) + Res.LV = CGF.EmitLValue(S.getResumeExpr()); + else + Res.RV = CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult); + return Res; +} + +RValue CodeGenFunction::EmitCoawaitExpr(const CoawaitExpr &E, + AggValueSlot aggSlot, + bool ignoreResult) { + return emitSuspendExpression(*this, *CurCoro.Data, E, + CurCoro.Data->CurrentAwaitKind, aggSlot, + ignoreResult, /*forLValue*/false).RV; +} +RValue CodeGenFunction::EmitCoyieldExpr(const CoyieldExpr &E, + AggValueSlot aggSlot, + bool ignoreResult) { + return emitSuspendExpression(*this, *CurCoro.Data, E, AwaitKind::Yield, + aggSlot, ignoreResult, /*forLValue*/false).RV; +} + void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) { ++CurCoro.Data->CoreturnCount; + const Expr *RV = S.getOperand(); + if (RV && RV->getType()->isVoidType()) { + // Make sure to evaluate the expression of a co_return with a void + // expression for side effects. + RunCleanupsScope cleanupScope(*this); + EmitIgnoredExpr(RV); + } EmitStmt(S.getPromiseCall()); EmitBranchThroughCleanup(CurCoro.Data->FinalJD); } + +#ifndef NDEBUG +static QualType getCoroutineSuspendExprReturnType(const ASTContext &Ctx, + const CoroutineSuspendExpr *E) { + const auto *RE = E->getResumeExpr(); + // Is it possible for RE to be a CXXBindTemporaryExpr wrapping + // a MemberCallExpr? + assert(isa<CallExpr>(RE) && "unexpected suspend expression type"); + return cast<CallExpr>(RE)->getCallReturnType(Ctx); +} +#endif + +LValue +CodeGenFunction::EmitCoawaitLValue(const CoawaitExpr *E) { + assert(getCoroutineSuspendExprReturnType(getContext(), E)->isReferenceType() && + "Can't have a scalar return unless the return type is a " + "reference type!"); + return emitSuspendExpression(*this, *CurCoro.Data, *E, + CurCoro.Data->CurrentAwaitKind, AggValueSlot::ignored(), + /*ignoreResult*/false, /*forLValue*/true).LV; +} + +LValue +CodeGenFunction::EmitCoyieldLValue(const CoyieldExpr *E) { + assert(getCoroutineSuspendExprReturnType(getContext(), E)->isReferenceType() && + "Can't have a scalar return unless the return type is a " + "reference type!"); + return emitSuspendExpression(*this, *CurCoro.Data, *E, + AwaitKind::Yield, AggValueSlot::ignored(), + /*ignoreResult*/false, /*forLValue*/true).LV; +} + +// Hunts for the parameter reference in the parameter copy/move declaration. +namespace { +struct GetParamRef : public StmtVisitor<GetParamRef> { +public: + DeclRefExpr *Expr = nullptr; + GetParamRef() {} + void VisitDeclRefExpr(DeclRefExpr *E) { + assert(Expr == nullptr && "multilple declref in param move"); + Expr = E; + } + void VisitStmt(Stmt *S) { + for (auto *C : S->children()) { + if (C) + Visit(C); + } + } +}; +} + +// This class replaces references to parameters to their copies by changing +// the addresses in CGF.LocalDeclMap and restoring back the original values in +// its destructor. + +namespace { + struct ParamReferenceReplacerRAII { + CodeGenFunction::DeclMapTy SavedLocals; + CodeGenFunction::DeclMapTy& LocalDeclMap; + + ParamReferenceReplacerRAII(CodeGenFunction::DeclMapTy &LocalDeclMap) + : LocalDeclMap(LocalDeclMap) {} + + void addCopy(DeclStmt const *PM) { + // Figure out what param it refers to. + + assert(PM->isSingleDecl()); + VarDecl const*VD = static_cast<VarDecl const*>(PM->getSingleDecl()); + Expr const *InitExpr = VD->getInit(); + GetParamRef Visitor; + Visitor.Visit(const_cast<Expr*>(InitExpr)); + assert(Visitor.Expr); + auto *DREOrig = cast<DeclRefExpr>(Visitor.Expr); + auto *PD = DREOrig->getDecl(); + + auto it = LocalDeclMap.find(PD); + assert(it != LocalDeclMap.end() && "parameter is not found"); + SavedLocals.insert({ PD, it->second }); + + auto copyIt = LocalDeclMap.find(VD); + assert(copyIt != LocalDeclMap.end() && "parameter copy is not found"); + it->second = copyIt->getSecond(); + } + + ~ParamReferenceReplacerRAII() { + for (auto&& SavedLocal : SavedLocals) { + LocalDeclMap.insert({SavedLocal.first, SavedLocal.second}); + } + } + }; +} + +// For WinEH exception representation backend needs to know what funclet coro.end +// belongs to. That information is passed in a funclet bundle. +static SmallVector<llvm::OperandBundleDef, 1> +getBundlesForCoroEnd(CodeGenFunction &CGF) { + SmallVector<llvm::OperandBundleDef, 1> BundleList; + + if (llvm::Instruction *EHPad = CGF.CurrentFuncletPad) + BundleList.emplace_back("funclet", EHPad); + + return BundleList; +} + +namespace { +// We will insert coro.end to cut any of the destructors for objects that +// do not need to be destroyed once the coroutine is resumed. +// See llvm/docs/Coroutines.rst for more details about coro.end. +struct CallCoroEnd final : public EHScopeStack::Cleanup { + void Emit(CodeGenFunction &CGF, Flags flags) override { + auto &CGM = CGF.CGM; + auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy); + llvm::Function *CoroEndFn = CGM.getIntrinsic(llvm::Intrinsic::coro_end); + // See if we have a funclet bundle to associate coro.end with. (WinEH) + auto Bundles = getBundlesForCoroEnd(CGF); + auto *CoroEnd = CGF.Builder.CreateCall( + CoroEndFn, {NullPtr, CGF.Builder.getTrue()}, Bundles); + if (Bundles.empty()) { + // Otherwise, (landingpad model), create a conditional branch that leads + // either to a cleanup block or a block with EH resume instruction. + auto *ResumeBB = CGF.getEHResumeBlock(/*cleanup=*/true); + auto *CleanupContBB = CGF.createBasicBlock("cleanup.cont"); + CGF.Builder.CreateCondBr(CoroEnd, ResumeBB, CleanupContBB); + CGF.EmitBlock(CleanupContBB); + } + } +}; +} + +namespace { +// Make sure to call coro.delete on scope exit. +struct CallCoroDelete final : public EHScopeStack::Cleanup { + Stmt *Deallocate; + + // Emit "if (coro.free(CoroId, CoroBegin)) Deallocate;" + + // Note: That deallocation will be emitted twice: once for a normal exit and + // once for exceptional exit. This usage is safe because Deallocate does not + // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr() + // builds a single call to a deallocation function which is safe to emit + // multiple times. + void Emit(CodeGenFunction &CGF, Flags) override { + // Remember the current point, as we are going to emit deallocation code + // first to get to coro.free instruction that is an argument to a delete + // call. + BasicBlock *SaveInsertBlock = CGF.Builder.GetInsertBlock(); + + auto *FreeBB = CGF.createBasicBlock("coro.free"); + CGF.EmitBlock(FreeBB); + CGF.EmitStmt(Deallocate); + + auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free"); + CGF.EmitBlock(AfterFreeBB); + + // We should have captured coro.free from the emission of deallocate. + auto *CoroFree = CGF.CurCoro.Data->LastCoroFree; + if (!CoroFree) { + CGF.CGM.Error(Deallocate->getLocStart(), + "Deallocation expressoin does not refer to coro.free"); + return; + } + + // Get back to the block we were originally and move coro.free there. + auto *InsertPt = SaveInsertBlock->getTerminator(); + CoroFree->moveBefore(InsertPt); + CGF.Builder.SetInsertPoint(InsertPt); + + // Add if (auto *mem = coro.free) Deallocate; + auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy); + auto *Cond = CGF.Builder.CreateICmpNE(CoroFree, NullPtr); + CGF.Builder.CreateCondBr(Cond, FreeBB, AfterFreeBB); + + // No longer need old terminator. + InsertPt->eraseFromParent(); + CGF.Builder.SetInsertPoint(AfterFreeBB); + } + explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {} +}; +} + +namespace { +struct GetReturnObjectManager { + CodeGenFunction &CGF; + CGBuilderTy &Builder; + const CoroutineBodyStmt &S; + + Address GroActiveFlag; + CodeGenFunction::AutoVarEmission GroEmission; + + GetReturnObjectManager(CodeGenFunction &CGF, const CoroutineBodyStmt &S) + : CGF(CGF), Builder(CGF.Builder), S(S), GroActiveFlag(Address::invalid()), + GroEmission(CodeGenFunction::AutoVarEmission::invalid()) {} + + // The gro variable has to outlive coroutine frame and coroutine promise, but, + // it can only be initialized after coroutine promise was created, thus, we + // split its emission in two parts. EmitGroAlloca emits an alloca and sets up + // cleanups. Later when coroutine promise is available we initialize the gro + // and sets the flag that the cleanup is now active. + + void EmitGroAlloca() { + auto *GroDeclStmt = dyn_cast<DeclStmt>(S.getResultDecl()); + if (!GroDeclStmt) { + // If get_return_object returns void, no need to do an alloca. + return; + } + + auto *GroVarDecl = cast<VarDecl>(GroDeclStmt->getSingleDecl()); + + // Set GRO flag that it is not initialized yet + GroActiveFlag = + CGF.CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), "gro.active"); + Builder.CreateStore(Builder.getFalse(), GroActiveFlag); + + GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl); + + // Remember the top of EHStack before emitting the cleanup. + auto old_top = CGF.EHStack.stable_begin(); + CGF.EmitAutoVarCleanups(GroEmission); + auto top = CGF.EHStack.stable_begin(); + + // Make the cleanup conditional on gro.active + for (auto b = CGF.EHStack.find(top), e = CGF.EHStack.find(old_top); + b != e; b++) { + if (auto *Cleanup = dyn_cast<EHCleanupScope>(&*b)) { + assert(!Cleanup->hasActiveFlag() && "cleanup already has active flag?"); + Cleanup->setActiveFlag(GroActiveFlag); + Cleanup->setTestFlagInEHCleanup(); + Cleanup->setTestFlagInNormalCleanup(); + } + } + } + + void EmitGroInit() { + if (!GroActiveFlag.isValid()) { + // No Gro variable was allocated. Simply emit the call to + // get_return_object. + CGF.EmitStmt(S.getResultDecl()); + return; + } + + CGF.EmitAutoVarInit(GroEmission); + Builder.CreateStore(Builder.getTrue(), GroActiveFlag); + } +}; +} + +static void emitBodyAndFallthrough(CodeGenFunction &CGF, + const CoroutineBodyStmt &S, Stmt *Body) { + CGF.EmitStmt(Body); + const bool CanFallthrough = CGF.Builder.GetInsertBlock(); + if (CanFallthrough) + if (Stmt *OnFallthrough = S.getFallthroughHandler()) + CGF.EmitStmt(OnFallthrough); +} + void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); auto &TI = CGM.getContext().getTargetInfo(); unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth(); + auto *EntryBB = Builder.GetInsertBlock(); + auto *AllocBB = createBasicBlock("coro.alloc"); + auto *InitBB = createBasicBlock("coro.init"); auto *FinalBB = createBasicBlock("coro.final"); + auto *RetBB = createBasicBlock("coro.ret"); auto *CoroId = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::coro_id), {Builder.getInt32(NewAlign), NullPtr, NullPtr, NullPtr}); createCoroData(*this, CurCoro, CoroId); + CurCoro.Data->SuspendBB = RetBB; + + // Backend is allowed to elide memory allocations, to help it, emit + // auto mem = coro.alloc() ? 0 : ... allocation code ...; + auto *CoroAlloc = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::coro_alloc), {CoroId}); + + Builder.CreateCondBr(CoroAlloc, AllocBB, InitBB); + + EmitBlock(AllocBB); + auto *AllocateCall = EmitScalarExpr(S.getAllocate()); + auto *AllocOrInvokeContBB = Builder.GetInsertBlock(); + + // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided. + if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) { + auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure"); + + // See if allocation was successful. + auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy); + auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr); + Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB); + + // If not, return OnAllocFailure object. + EmitBlock(RetOnFailureBB); + EmitStmt(RetOnAllocFailure); + } + else { + Builder.CreateBr(InitBB); + } + + EmitBlock(InitBB); + + // Pass the result of the allocation to coro.begin. + auto *Phi = Builder.CreatePHI(VoidPtrTy, 2); + Phi->addIncoming(NullPtr, EntryBB); + Phi->addIncoming(AllocateCall, AllocOrInvokeContBB); + auto *CoroBegin = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); + CurCoro.Data->CoroBegin = CoroBegin; - EmitScalarExpr(S.getAllocate()); + GetReturnObjectManager GroManager(*this, S); + GroManager.EmitGroAlloca(); - // FIXME: Setup cleanup scopes. + CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB); + { + ParamReferenceReplacerRAII ParamReplacer(LocalDeclMap); + CodeGenFunction::RunCleanupsScope ResumeScope(*this); + EHStack.pushCleanup<CallCoroDelete>(NormalAndEHCleanup, S.getDeallocate()); + + // Create parameter copies. We do it before creating a promise, since an + // evolution of coroutine TS may allow promise constructor to observe + // parameter copies. + for (auto *PM : S.getParamMoves()) { + EmitStmt(PM); + ParamReplacer.addCopy(cast<DeclStmt>(PM)); + // TODO: if(CoroParam(...)) need to surround ctor and dtor + // for the copy, so that llvm can elide it if the copy is + // not needed. + } - EmitStmt(S.getPromiseDeclStmt()); + EmitStmt(S.getPromiseDeclStmt()); - CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB); + Address PromiseAddr = GetAddrOfLocalVar(S.getPromiseDecl()); + auto *PromiseAddrVoidPtr = + new llvm::BitCastInst(PromiseAddr.getPointer(), VoidPtrTy, "", CoroId); + // Update CoroId to refer to the promise. We could not do it earlier because + // promise local variable was not emitted yet. + CoroId->setArgOperand(1, PromiseAddrVoidPtr); - // FIXME: Emit initial suspend and more before the body. + // Now we have the promise, initialize the GRO + GroManager.EmitGroInit(); - EmitStmt(S.getBody()); + EHStack.pushCleanup<CallCoroEnd>(EHCleanup); - // See if we need to generate final suspend. - const bool CanFallthrough = Builder.GetInsertBlock(); - const bool HasCoreturns = CurCoro.Data->CoreturnCount > 0; - if (CanFallthrough || HasCoreturns) { - EmitBlock(FinalBB); - // FIXME: Emit final suspend. + CurCoro.Data->CurrentAwaitKind = AwaitKind::Init; + EmitStmt(S.getInitSuspendStmt()); + CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB); + + CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal; + + if (auto *OnException = S.getExceptionHandler()) { + auto Loc = S.getLocStart(); + CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, OnException); + auto *TryStmt = CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch); + + EnterCXXTryStmt(*TryStmt); + emitBodyAndFallthrough(*this, S, TryStmt->getTryBlock()); + ExitCXXTryStmt(*TryStmt); + } + else { + emitBodyAndFallthrough(*this, S, S.getBody()); + } + + // See if we need to generate final suspend. + const bool CanFallthrough = Builder.GetInsertBlock(); + const bool HasCoreturns = CurCoro.Data->CoreturnCount > 0; + if (CanFallthrough || HasCoreturns) { + EmitBlock(FinalBB); + CurCoro.Data->CurrentAwaitKind = AwaitKind::Final; + EmitStmt(S.getFinalSuspendStmt()); + } else { + // We don't need FinalBB. Emit it to make sure the block is deleted. + EmitBlock(FinalBB, /*IsFinished=*/true); + } } - EmitStmt(S.getDeallocate()); - // FIXME: Emit return for the coroutine return object. + EmitBlock(RetBB); + // Emit coro.end before getReturnStmt (and parameter destructors), since + // resume and destroy parts of the coroutine should not include them. + llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end); + Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()}); + + if (Stmt *Ret = S.getReturnStmt()) + EmitStmt(Ret); } // Emit coroutine intrinsic and patch up arguments of the token type. @@ -115,6 +636,17 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, switch (IID) { default: break; + // The coro.frame builtin is replaced with an SSA value of the coro.begin + // intrinsic. + case llvm::Intrinsic::coro_frame: { + if (CurCoro.Data && CurCoro.Data->CoroBegin) { + return RValue::get(CurCoro.Data->CoroBegin); + } + CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_begin " + "has been used earlier in this function"); + auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); + return RValue::get(NullPtr); + } // The following three intrinsics take a token parameter referring to a token // returned by earlier call to @llvm.coro.id. Since we cannot represent it in // builtins, we patch it up here. @@ -128,6 +660,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_id has" " been used earlier in this function"); // Fallthrough to the next case to add TokenNone as the first argument. + LLVM_FALLTHROUGH; } // @llvm.coro.suspend takes a token parameter. Add token 'none' as the first // argument. @@ -141,10 +674,22 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, llvm::Value *F = CGM.getIntrinsic(IID); llvm::CallInst *Call = Builder.CreateCall(F, Args); + // Note: The following code is to enable to emit coro.id and coro.begin by + // hand to experiment with coroutines in C. // If we see @llvm.coro.id remember it in the CoroData. We will update // coro.alloc, coro.begin and coro.free intrinsics to refer to it. if (IID == llvm::Intrinsic::coro_id) { createCoroData(*this, CurCoro, Call, E); } + else if (IID == llvm::Intrinsic::coro_begin) { + if (CurCoro.Data) + CurCoro.Data->CoroBegin = Call; + } + else if (IID == llvm::Intrinsic::coro_free) { + // Remember the last coro_free as we need it to build the conditional + // deletion of the coroutine frame. + if (CurCoro.Data) + CurCoro.Data->LastCoroFree = Call; + } return RValue::get(Call); } diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 05987be845..3e4acd208f 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -18,6 +18,7 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" @@ -28,6 +29,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/Version.h" #include "clang/Frontend/CodeGenOptions.h" +#include "clang/Frontend/FrontendOptions.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/ModuleMap.h" #include "clang/Lex/PreprocessorOptions.h" @@ -95,6 +97,10 @@ void ApplyDebugLocation::init(SourceLocation TemporaryLocation, } OriginalLocation = CGF->Builder.getCurrentDebugLocation(); + + if (OriginalLocation && !DI->CGM.getExpressionLocationsEnabled()) + return; + if (TemporaryLocation.isValid()) { DI->EmitLocation(CGF->Builder, TemporaryLocation); return; @@ -209,7 +215,7 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context, // Check namespace. if (const auto *NSDecl = dyn_cast<NamespaceDecl>(Context)) - return getOrCreateNameSpace(NSDecl); + return getOrCreateNamespace(NSDecl); if (const auto *RDecl = dyn_cast<RecordDecl>(Context)) if (!RDecl->isDependentType()) @@ -218,6 +224,19 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context, return Default; } +PrintingPolicy CGDebugInfo::getPrintingPolicy() const { + PrintingPolicy PP = CGM.getContext().getPrintingPolicy(); + + // If we're emitting codeview, it's important to try to match MSVC's naming so + // that visualizers written for MSVC will trigger for our class names. In + // particular, we can't have spaces between arguments of standard templates + // like basic_string and vector. + if (CGM.getCodeGenOpts().EmitCodeView) + PP.MSVCFormatting = true; + + return PP; +} + StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) { assert(FD && "Invalid FunctionDecl!"); IdentifierInfo *FII = FD->getIdentifier(); @@ -238,18 +257,16 @@ StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) { SmallString<128> NS; llvm::raw_svector_ostream OS(NS); - PrintingPolicy Policy(CGM.getLangOpts()); - Policy.MSVCFormatting = CGM.getCodeGenOpts().EmitCodeView; if (!UseQualifiedName) FD->printName(OS); else - FD->printQualifiedName(OS, Policy); + FD->printQualifiedName(OS, getPrintingPolicy()); // Add any template specialization args. if (Info) { const TemplateArgumentList *TArgs = Info->TemplateArguments; TemplateSpecializationType::PrintTemplateArgumentList(OS, TArgs->asArray(), - Policy); + getPrintingPolicy()); } // Copy this name on the side and use its reference. @@ -296,7 +313,7 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) { if (isa<ClassTemplateSpecializationDecl>(RD)) { SmallString<128> Name; llvm::raw_svector_ostream OS(Name); - RD->getNameForDiagnostic(OS, CGM.getContext().getPrintingPolicy(), + RD->getNameForDiagnostic(OS, getPrintingPolicy(), /*Qualified*/ false); // Copy this name on the side and use its reference. @@ -483,6 +500,16 @@ void CGDebugInfo::CreateCompileUnit() { llvm::sys::path::append(MainFileDirSS, MainFileName); MainFileName = MainFileDirSS.str(); } + // If the main file name provided is identical to the input file name, and + // if the input file is a preprocessed source, use the module name for + // debug info. The module name comes from the name specified in the first + // linemarker if the input is a preprocessed source. + if (MainFile->getName() == MainFileName && + FrontendOptions::getInputKindForExtension( + MainFile->getName().rsplit('.').second) + .isPreprocessed()) + MainFileName = CGM.getModule().getName().str(); + CSKind = computeChecksum(SM.getMainFileID(), Checksum); } @@ -527,14 +554,16 @@ void CGDebugInfo::CreateCompileUnit() { // Create new compile unit. // FIXME - Eliminate TheCU. + auto &CGOpts = CGM.getCodeGenOpts(); TheCU = DBuilder.createCompileUnit( - LangTag, DBuilder.createFile(remapDIPath(MainFileName), - remapDIPath(getCurrentDirname()), CSKind, - Checksum), - Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, - CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */, - CGM.getCodeGenOpts().SplitDwarfInlining, - CGM.getCodeGenOpts().DebugInfoForProfiling); + LangTag, + DBuilder.createFile(remapDIPath(MainFileName), + remapDIPath(getCurrentDirname()), CSKind, Checksum), + Producer, LO.Optimize || CGOpts.PrepareForLTO || CGOpts.EmitSummaryIndex, + CGOpts.DwarfDebugFlags, RuntimeVers, + CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind, + 0 /* DWOid */, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling, + CGOpts.GnuPubnames); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -643,6 +672,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::Half: case BuiltinType::Float: case BuiltinType::LongDouble: + case BuiltinType::Float16: case BuiltinType::Float128: case BuiltinType::Double: // FIXME: For targets where long double and __float128 have the same size, @@ -803,6 +833,10 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty, llvm::DICompositeType *RetTy = DBuilder.createReplaceableCompositeType( getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align, llvm::DINode::FlagFwdDecl, FullName); + if (CGM.getCodeGenOpts().DebugFwdTemplateParams) + if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD)) + DBuilder.replaceArrays(RetTy, llvm::DINodeArray(), + CollectCXXTemplateParams(TSpecial, DefUnit)); ReplaceMap.emplace_back( std::piecewise_construct, std::make_tuple(Ty), std::make_tuple(static_cast<llvm::Metadata *>(RetTy))); @@ -907,12 +941,11 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, SmallString<128> NS; llvm::raw_svector_ostream OS(NS); - Ty->getTemplateName().print(OS, CGM.getContext().getPrintingPolicy(), + Ty->getTemplateName().print(OS, getPrintingPolicy(), /*qualified*/ false); TemplateSpecializationType::PrintTemplateArgumentList( - OS, Ty->template_arguments(), - CGM.getContext().getPrintingPolicy()); + OS, Ty->template_arguments(), getPrintingPolicy()); auto *AliasDecl = cast<TypeAliasTemplateDecl>( Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl(); @@ -954,7 +987,7 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_BORLAND_pascal; // FIXME: Create new DW_CC_ codes for these calling conventions. - case CC_X86_64Win64: + case CC_Win64: case CC_X86_64SysV: case CC_AAPCS: case CC_AAPCS_VFP: @@ -1039,7 +1072,13 @@ llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, assert(SizeInBits > 0 && "found named 0-width bitfield"); uint64_t StorageOffsetInBits = CGM.getContext().toBits(BitFieldInfo.StorageOffset); - uint64_t OffsetInBits = StorageOffsetInBits + BitFieldInfo.Offset; + uint64_t Offset = BitFieldInfo.Offset; + // The bit offsets for big endian machines are reversed for big + // endian target, compensate for that as the DIDerivedType requires + // un-reversed offsets. + if (CGM.getDataLayout().isBigEndian()) + Offset = BitFieldInfo.StorageSize - BitFieldInfo.Size - Offset; + uint64_t OffsetInBits = StorageOffsetInBits + Offset; llvm::DINode::DIFlags Flags = getAccessFlag(BitFieldDecl->getAccess(), RD); return DBuilder.createBitFieldMemberType( RecordTy, Name, File, Line, SizeInBits, OffsetInBits, StorageOffsetInBits, @@ -1166,13 +1205,13 @@ void CGDebugInfo::CollectRecordNormalField( elements.push_back(FieldType); } -void CGDebugInfo::CollectRecordNestedRecord( - const RecordDecl *RD, SmallVectorImpl<llvm::Metadata *> &elements) { - QualType Ty = CGM.getContext().getTypeDeclType(RD); +void CGDebugInfo::CollectRecordNestedType( + const TypeDecl *TD, SmallVectorImpl<llvm::Metadata *> &elements) { + QualType Ty = CGM.getContext().getTypeDeclType(TD); // Injected class names are not considered nested records. if (isa<InjectedClassNameType>(Ty)) return; - SourceLocation Loc = RD->getLocation(); + SourceLocation Loc = TD->getLocation(); llvm::DIType *nestedType = getOrCreateType(Ty, getOrCreateFile(Loc)); elements.push_back(nestedType); } @@ -1188,9 +1227,9 @@ void CGDebugInfo::CollectRecordFields( else { const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(record); - // Debug info for nested records is included in the member list only for + // Debug info for nested types is included in the member list only for // CodeView. - bool IncludeNestedRecords = CGM.getCodeGenOpts().EmitCodeView; + bool IncludeNestedTypes = CGM.getCodeGenOpts().EmitCodeView; // Field number for non-static fields. unsigned fieldNo = 0; @@ -1217,10 +1256,12 @@ void CGDebugInfo::CollectRecordFields( // Bump field number for next field. ++fieldNo; - } else if (const auto *nestedRec = dyn_cast<CXXRecordDecl>(I)) - if (IncludeNestedRecords && !nestedRec->isImplicit() && - nestedRec->getDeclContext() == record) - CollectRecordNestedRecord(nestedRec, elements); + } else if (IncludeNestedTypes) { + if (const auto *nestedType = dyn_cast<TypeDecl>(I)) + if (!nestedType->isImplicit() && + nestedType->getDeclContext() == record) + CollectRecordNestedType(nestedType, elements); + } } } @@ -1371,6 +1412,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( ContainingType = RecordTy; } + if (Method->isStatic()) + Flags |= llvm::DINode::FlagStaticMember; if (Method->isImplicit()) Flags |= llvm::DINode::FlagArtificial; Flags |= getAccessFlag(Method->getAccess(), Method->getParent()); @@ -1582,7 +1625,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, QualType T = E->getType(); if (E->isGLValue()) T = CGM.getContext().getLValueReferenceType(T); - llvm::Constant *V = CGM.EmitConstantExpr(E, T); + llvm::Constant *V = ConstantEmitter(CGM).emitAbstract(E, T); assert(V && "Expression in template argument isn't constant"); llvm::DIType *TTy = getOrCreateType(T, Unit); TemplateParams.push_back(DBuilder.createTemplateValueParameter( @@ -1758,6 +1801,29 @@ static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) { return false; } +/// Does a type definition exist in an imported clang module? +static bool isDefinedInClangModule(const RecordDecl *RD) { + // Only definitions that where imported from an AST file come from a module. + if (!RD || !RD->isFromASTFile()) + return false; + // Anonymous entities cannot be addressed. Treat them as not from module. + if (!RD->isExternallyVisible() && RD->getName().empty()) + return false; + if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) { + if (!CXXDecl->isCompleteDefinition()) + return false; + auto TemplateKind = CXXDecl->getTemplateSpecializationKind(); + if (TemplateKind != TSK_Undeclared) { + // This is a template, check the origin of the first member. + if (CXXDecl->field_begin() == CXXDecl->field_end()) + return TemplateKind == TSK_ExplicitInstantiationDeclaration; + if (!CXXDecl->field_begin()->isFromASTFile()) + return false; + } + } + return true; +} + void CGDebugInfo::completeClassData(const RecordDecl *RD) { if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) if (CXXRD->isDynamicClass() && @@ -1765,6 +1831,10 @@ void CGDebugInfo::completeClassData(const RecordDecl *RD) { llvm::GlobalValue::AvailableExternallyLinkage && !isClassOrMethodDLLImport(CXXRD)) return; + + if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition())) + return; + completeClass(RD); } @@ -1791,35 +1861,16 @@ static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I, return false; } -/// Does a type definition exist in an imported clang module? -static bool isDefinedInClangModule(const RecordDecl *RD) { - // Only definitions that where imported from an AST file come from a module. - if (!RD || !RD->isFromASTFile()) - return false; - // Anonymous entities cannot be addressed. Treat them as not from module. - if (!RD->isExternallyVisible() && RD->getName().empty()) - return false; - if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) { - if (!CXXDecl->isCompleteDefinition()) - return false; - auto TemplateKind = CXXDecl->getTemplateSpecializationKind(); - if (TemplateKind != TSK_Undeclared) { - // This is a template, check the origin of the first member. - if (CXXDecl->field_begin() == CXXDecl->field_end()) - return TemplateKind == TSK_ExplicitInstantiationDeclaration; - if (!CXXDecl->field_begin()->isFromASTFile()) - return false; - } - } - return true; -} - static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, bool DebugTypeExtRefs, const RecordDecl *RD, const LangOptions &LangOpts) { if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition())) return true; + if (auto *ES = RD->getASTContext().getExternalSource()) + if (ES->hasExternalDefinitions(RD) == ExternalASTSource::EK_Always) + return true; + if (DebugKind > codegenoptions::LimitedDebugInfo) return false; @@ -2552,11 +2603,17 @@ void CGDebugInfo::completeTemplateDefinition( const ClassTemplateSpecializationDecl &SD) { if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; + completeUnusedClass(SD); +} + +void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) { + if (DebugKind <= codegenoptions::DebugLineTablesOnly) + return; - completeClassData(&SD); + completeClassData(&D); // In case this type has no member function definitions being emitted, ensure // it is retained - RetainedTypes.push_back(CGM.getContext().getRecordType(&SD).getAsOpaquePtr()); + RetainedTypes.push_back(CGM.getContext().getRecordType(&D).getAsOpaquePtr()); } llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) { @@ -2601,7 +2658,7 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) { // best to make this behavior a command line or debugger tuning // option. FullSourceLoc Loc(D->getLocation(), CGM.getContext().getSourceManager()); - if (Module *M = ClangModuleMap->inferModuleFromLocation(Loc)) { + if (Module *M = D->getOwningModule()) { // This is a (sub-)module. auto Info = ExternalASTSource::ASTSourceDescriptor(*M); return getOrCreateModuleRef(Info, /*SkeletonCU=*/false); @@ -2769,6 +2826,7 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { // them distinct if they are ODR-uniqued. if (FullName.empty()) break; + LLVM_FALLTHROUGH; case llvm::dwarf::DW_TAG_structure_type: case llvm::dwarf::DW_TAG_union_type: @@ -2849,7 +2907,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, if (DebugKind >= codegenoptions::LimitedDebugInfo) { if (const NamespaceDecl *NSDecl = dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext())) - FDContext = getOrCreateNameSpace(NSDecl); + FDContext = getOrCreateNamespace(NSDecl); else if (const RecordDecl *RDecl = dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) { llvm::DIScope *Mod = getParentModuleOrNull(RDecl); @@ -3241,7 +3299,7 @@ void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) { llvm::DISubprogram *SP = nullptr; if (FI != SPCache.end()) SP = dyn_cast_or_null<llvm::DISubprogram>(FI->second); - if (!SP) + if (!SP || !SP->isDefinition()) SP = getFunctionStub(GD); FnBeginRegionCount.push_back(LexicalBlockStack.size()); LexicalBlockStack.emplace_back(SP); @@ -3251,7 +3309,7 @@ void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) { void CGDebugInfo::EmitInlineFunctionEnd(CGBuilderTy &Builder) { assert(CurInlinedAt && "unbalanced inline scope stack"); - EmitFunctionEnd(Builder); + EmitFunctionEnd(Builder, nullptr); setInlinedAt(llvm::DebugLoc(CurInlinedAt).getInlinedAt()); } @@ -3320,7 +3378,7 @@ void CGDebugInfo::EmitLexicalBlockEnd(CGBuilderTy &Builder, LexicalBlockStack.pop_back(); } -void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder) { +void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn) { assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); unsigned RCount = FnBeginRegionCount.back(); assert(RCount <= LexicalBlockStack.size() && "Region stack mismatch"); @@ -3332,6 +3390,9 @@ void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder) { LexicalBlockStack.pop_back(); } FnBeginRegionCount.pop_back(); + + if (Fn && Fn->getSubprogram()) + DBuilder.finalizeSubprogram(Fn->getSubprogram()); } llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, @@ -3450,50 +3511,35 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType()); AppendAddressSpaceXDeref(AddressSpace, Expr); - // If this is the first argument and it is implicit then - // give it an object pointer flag. - // FIXME: There has to be a better way to do this, but for static - // functions there won't be an implicit param at arg1 and - // otherwise it is 'self' or 'this'. - if (isa<ImplicitParamDecl>(VD) && ArgNo && *ArgNo == 1) - Flags |= llvm::DINode::FlagObjectPointer; - if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) - if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() && - !VD->getType()->isPointerType()) - Expr.push_back(llvm::dwarf::DW_OP_deref); + // If this is implicit parameter of CXXThis or ObjCSelf kind, then give it an + // object pointer flag. + if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD)) { + if (IPD->getParameterKind() == ImplicitParamDecl::CXXThis || + IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf) + Flags |= llvm::DINode::FlagObjectPointer; + } + // Note: Older versions of clang used to emit byval references with an extra + // DW_OP_deref, because they referenced the IR arg directly instead of + // referencing an alloca. Newer versions of LLVM don't treat allocas + // differently from other function arguments when used in a dbg.declare. auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back()); - StringRef Name = VD->getName(); if (!Name.empty()) { if (VD->hasAttr<BlocksAttr>()) { + // Here, we need an offset *into* the alloca. CharUnits offset = CharUnits::fromQuantity(32); - Expr.push_back(llvm::dwarf::DW_OP_plus); + Expr.push_back(llvm::dwarf::DW_OP_plus_uconst); // offset of __forwarding field offset = CGM.getContext().toCharUnitsFromBits( CGM.getTarget().getPointerWidth(0)); Expr.push_back(offset.getQuantity()); Expr.push_back(llvm::dwarf::DW_OP_deref); - Expr.push_back(llvm::dwarf::DW_OP_plus); + Expr.push_back(llvm::dwarf::DW_OP_plus_uconst); // offset of x field offset = CGM.getContext().toCharUnitsFromBits(XOffset); Expr.push_back(offset.getQuantity()); - - // Create the descriptor for the variable. - auto *D = ArgNo - ? DBuilder.createParameterVariable(Scope, VD->getName(), - *ArgNo, Unit, Line, Ty) - : DBuilder.createAutoVariable(Scope, VD->getName(), Unit, - Line, Ty, Align); - - // Insert an llvm.dbg.declare into the current block. - DBuilder.insertDeclare( - Storage, D, DBuilder.createExpression(Expr), - llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), - Builder.GetInsertBlock()); - return; - } else if (isa<VariableArrayType>(VD->getType())) - Expr.push_back(llvm::dwarf::DW_OP_deref); + } } else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) { // If VD is an anonymous union then Storage represents value for // all union fields. @@ -3582,8 +3628,9 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( // Self is passed along as an implicit non-arg variable in a // block. Mark it as the object pointer. - if (isa<ImplicitParamDecl>(VD) && VD->getName() == "self") - Ty = CreateSelfType(VD->getType(), Ty); + if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD)) + if (IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf) + Ty = CreateSelfType(VD->getType(), Ty); // Get location information. unsigned Line = getLineNumber(VD->getLocation()); @@ -3596,19 +3643,18 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( ->getElementOffset(blockInfo.getCapture(VD).getIndex())); SmallVector<int64_t, 9> addr; - if (isa<llvm::AllocaInst>(Storage)) - addr.push_back(llvm::dwarf::DW_OP_deref); - addr.push_back(llvm::dwarf::DW_OP_plus); + addr.push_back(llvm::dwarf::DW_OP_deref); + addr.push_back(llvm::dwarf::DW_OP_plus_uconst); addr.push_back(offset.getQuantity()); if (isByRef) { addr.push_back(llvm::dwarf::DW_OP_deref); - addr.push_back(llvm::dwarf::DW_OP_plus); + addr.push_back(llvm::dwarf::DW_OP_plus_uconst); // offset of __forwarding field offset = CGM.getContext().toCharUnitsFromBits(target.getPointerSizeInBits(0)); addr.push_back(offset.getQuantity()); addr.push_back(llvm::dwarf::DW_OP_deref); - addr.push_back(llvm::dwarf::DW_OP_plus); + addr.push_back(llvm::dwarf::DW_OP_plus_uconst); // offset of x field offset = CGM.getContext().toCharUnitsFromBits(XOffset); addr.push_back(offset.getQuantity()); @@ -3623,12 +3669,11 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( // Insert an llvm.dbg.declare into the current block. auto DL = llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back(), CurInlinedAt); + auto *Expr = DBuilder.createExpression(addr); if (InsertPoint) - DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL, - InsertPoint); + DBuilder.insertDeclare(Storage, D, Expr, DL, InsertPoint); else - DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL, - Builder.GetInsertBlock()); + DBuilder.insertDeclare(Storage, D, Expr, DL, Builder.GetInsertBlock()); } void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI, @@ -3649,9 +3694,9 @@ bool operator<(const BlockLayoutChunk &l, const BlockLayoutChunk &r) { } void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, - llvm::Value *Arg, + StringRef Name, unsigned ArgNo, - llvm::Value *LocalAddr, + llvm::AllocaInst *Alloca, CGBuilderTy &Builder) { assert(DebugKind >= codegenoptions::LimitedDebugInfo); ASTContext &C = CGM.getContext(); @@ -3783,19 +3828,11 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, // Create the descriptor for the parameter. auto *debugVar = DBuilder.createParameterVariable( - scope, Arg->getName(), ArgNo, tunit, line, type, + scope, Name, ArgNo, tunit, line, type, CGM.getLangOpts().Optimize, flags); - if (LocalAddr) { - // Insert an llvm.dbg.value into the current block. - DBuilder.insertDbgValueIntrinsic( - LocalAddr, 0, debugVar, DBuilder.createExpression(), - llvm::DebugLoc::get(line, column, scope, CurInlinedAt), - Builder.GetInsertBlock()); - } - // Insert an llvm.dbg.declare into the current block. - DBuilder.insertDeclare(Arg, debugVar, DBuilder.createExpression(), + DBuilder.insertDeclare(Alloca, debugVar, DBuilder.createExpression(), llvm::DebugLoc::get(line, column, scope, CurInlinedAt), Builder.GetInsertBlock()); } @@ -3964,10 +4001,10 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) { const NamespaceDecl *NSDecl = UD.getNominatedNamespace(); if (!NSDecl->isAnonymousNamespace() || CGM.getCodeGenOpts().DebugExplicitImport) { + auto Loc = UD.getLocation(); DBuilder.createImportedModule( getCurrentContextDescriptor(cast<Decl>(UD.getDeclContext())), - getOrCreateNameSpace(NSDecl), - getLineNumber(UD.getLocation())); + getOrCreateNamespace(NSDecl), getOrCreateFile(Loc), getLineNumber(Loc)); } } @@ -3990,10 +4027,12 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { if (AT->getDeducedType().isNull()) return; if (llvm::DINode *Target = - getDeclarationOrDefinition(USD.getUnderlyingDecl())) + getDeclarationOrDefinition(USD.getUnderlyingDecl())) { + auto Loc = USD.getLocation(); DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(USD.getDeclContext())), Target, - getLineNumber(USD.getLocation())); + getOrCreateFile(Loc), getLineNumber(Loc)); + } } void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { @@ -4001,10 +4040,11 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { return; if (Module *M = ID.getImportedModule()) { auto Info = ExternalASTSource::ASTSourceDescriptor(*M); + auto Loc = ID.getLocation(); DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(ID.getDeclContext())), - getOrCreateModuleRef(Info, DebugTypeExtRefs), - getLineNumber(ID.getLocation())); + getOrCreateModuleRef(Info, DebugTypeExtRefs), getOrCreateFile(Loc), + getLineNumber(Loc)); } } @@ -4016,35 +4056,37 @@ CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) { if (VH) return cast<llvm::DIImportedEntity>(VH); llvm::DIImportedEntity *R; + auto Loc = NA.getLocation(); if (const auto *Underlying = dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace())) // This could cache & dedup here rather than relying on metadata deduping. R = DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())), - EmitNamespaceAlias(*Underlying), getLineNumber(NA.getLocation()), - NA.getName()); + EmitNamespaceAlias(*Underlying), getOrCreateFile(Loc), + getLineNumber(Loc), NA.getName()); else R = DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())), - getOrCreateNameSpace(cast<NamespaceDecl>(NA.getAliasedNamespace())), - getLineNumber(NA.getLocation()), NA.getName()); + getOrCreateNamespace(cast<NamespaceDecl>(NA.getAliasedNamespace())), + getOrCreateFile(Loc), getLineNumber(Loc), NA.getName()); VH.reset(R); return R; } llvm::DINamespace * -CGDebugInfo::getOrCreateNameSpace(const NamespaceDecl *NSDecl) { - NSDecl = NSDecl->getCanonicalDecl(); - auto I = NameSpaceCache.find(NSDecl); - if (I != NameSpaceCache.end()) +CGDebugInfo::getOrCreateNamespace(const NamespaceDecl *NSDecl) { + // Don't canonicalize the NamespaceDecl here: The DINamespace will be uniqued + // if necessary, and this way multiple declarations of the same namespace in + // different parent modules stay distinct. + auto I = NamespaceCache.find(NSDecl); + if (I != NamespaceCache.end()) return cast<llvm::DINamespace>(I->second); - unsigned LineNo = getLineNumber(NSDecl->getLocation()); - llvm::DIFile *FileD = getOrCreateFile(NSDecl->getLocation()); llvm::DIScope *Context = getDeclContextDescriptor(NSDecl); - llvm::DINamespace *NS = DBuilder.createNameSpace( - Context, NSDecl->getName(), FileD, LineNo, NSDecl->isInline()); - NameSpaceCache[NSDecl].reset(NS); + // Don't trust the context if it is a DIModule (see comment above). + llvm::DINamespace *NS = + DBuilder.createNameSpace(Context, NSDecl->getName(), NSDecl->isInline()); + NamespaceCache[NSDecl].reset(NS); return NS; } diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index c6d1c66e13..4f7b7f2a0d 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -125,7 +125,7 @@ class CGDebugInfo { /// Cache declarations relevant to DW_TAG_imported_declarations (C++ /// using declarations) that aren't covered by other more specific caches. llvm::DenseMap<const Decl *, llvm::TrackingMDRef> DeclCache; - llvm::DenseMap<const NamespaceDecl *, llvm::TrackingMDRef> NameSpaceCache; + llvm::DenseMap<const NamespaceDecl *, llvm::TrackingMDRef> NamespaceCache; llvm::DenseMap<const NamespaceAliasDecl *, llvm::TrackingMDRef> NamespaceAliasCache; llvm::DenseMap<const Decl *, llvm::TypedTrackingMDRef<llvm::DIDerivedType>> @@ -194,8 +194,9 @@ class CGDebugInfo { getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F); /// \return debug info descriptor for vtable. llvm::DIType *getOrCreateVTablePtrType(llvm::DIFile *F); + /// \return namespace descriptor for the given namespace decl. - llvm::DINamespace *getOrCreateNameSpace(const NamespaceDecl *N); + llvm::DINamespace *getOrCreateNamespace(const NamespaceDecl *N); llvm::DIType *CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty, QualType PointeeTy, llvm::DIFile *F); llvm::DIType *getOrCreateStructPtrType(StringRef Name, llvm::DIType *&Cache); @@ -277,8 +278,8 @@ class CGDebugInfo { llvm::DIFile *F, SmallVectorImpl<llvm::Metadata *> &E, llvm::DIType *RecordTy, const RecordDecl *RD); - void CollectRecordNestedRecord(const RecordDecl *RD, - SmallVectorImpl<llvm::Metadata *> &E); + void CollectRecordNestedType(const TypeDecl *RD, + SmallVectorImpl<llvm::Metadata *> &E); void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile *F, SmallVectorImpl<llvm::Metadata *> &E, llvm::DICompositeType *RecordTy); @@ -366,7 +367,7 @@ public: void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, QualType FnType); /// Constructs the debug code for exiting a function. - void EmitFunctionEnd(CGBuilderTy &Builder); + void EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn); /// Emit metadata to indicate the beginning of a new lexical block /// and push the block onto the stack. @@ -397,8 +398,8 @@ public: /// Emit call to \c llvm.dbg.declare for the block-literal argument /// to a block invocation function. void EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, - llvm::Value *Arg, unsigned ArgNo, - llvm::Value *LocalAddr, + StringRef Name, unsigned ArgNo, + llvm::AllocaInst *LocalAddr, CGBuilderTy &Builder); /// Emit information about a global variable. @@ -438,6 +439,7 @@ public: void completeClass(const RecordDecl *RD); void completeTemplateDefinition(const ClassTemplateSpecializationDecl &SD); + void completeUnusedClass(const CXXRecordDecl &D); /// Create debug info for a macro defined by a #define directive or a macro /// undefined by a #undef directive. @@ -556,6 +558,9 @@ private: unsigned LineNo, StringRef LinkageName, llvm::GlobalVariable *Var, llvm::DIScope *DContext); + /// Get the printing policy for producing names for debug info. + PrintingPolicy getPrintingPolicy() const; + /// Get function name for the given FunctionDecl. If the name is /// constructed on demand (e.g., C++ destructor) then the name is /// stored on the side. diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index cfadce5c22..44a6db4750 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -11,14 +11,16 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" +#include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" @@ -152,7 +154,14 @@ void CodeGenFunction::EmitDecl(const Decl &D) { /// EmitVarDecl - This method handles emission of any variable declaration /// inside a function, including static vars etc. void CodeGenFunction::EmitVarDecl(const VarDecl &D) { - if (D.isStaticLocal()) { + if (D.hasExternalStorage()) + // Don't emit it now, allow it to be emitted lazily on its first use. + return; + + // Some function-scope variable does not have static storage but still + // needs to be emitted like a static variable, e.g. a function-scope + // variable in constant address space in OpenCL. + if (D.getStorageDuration() != SD_Automatic) { llvm::GlobalValue::LinkageTypes Linkage = CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false); @@ -163,10 +172,6 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) { return EmitStaticVarDecl(D, Linkage); } - if (D.hasExternalStorage()) - // Don't emit it now, allow it to be emitted lazily on its first use. - return; - if (D.getType().getAddressSpace() == LangAS::opencl_local) return CGM.getOpenCLRuntime().EmitWorkGroupLocalVarDecl(*this, D); @@ -217,8 +222,8 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( Name = getStaticDeclName(*this, D); llvm::Type *LTy = getTypes().ConvertTypeForMem(Ty); - unsigned AddrSpace = - GetGlobalVarAddressSpace(&D, getContext().getTargetAddressSpace(Ty)); + LangAS AS = GetGlobalVarAddressSpace(&D); + unsigned TargetAS = getContext().getTargetAddressSpace(AS); // Local address space cannot have an initializer. llvm::Constant *Init = nullptr; @@ -227,12 +232,9 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( else Init = llvm::UndefValue::get(LTy); - llvm::GlobalVariable *GV = - new llvm::GlobalVariable(getModule(), LTy, - Ty.isConstant(getContext()), Linkage, - Init, Name, nullptr, - llvm::GlobalVariable::NotThreadLocal, - AddrSpace); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name, + nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); GV->setAlignment(getContext().getDeclAlign(&D).getQuantity()); setGlobalVisibility(GV, &D); @@ -250,11 +252,12 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( } // Make sure the result is of the correct type. - unsigned ExpectedAddrSpace = getContext().getTargetAddressSpace(Ty); + LangAS ExpectedAS = Ty.getAddressSpace(); llvm::Constant *Addr = GV; - if (AddrSpace != ExpectedAddrSpace) { - llvm::PointerType *PTy = llvm::PointerType::get(LTy, ExpectedAddrSpace); - Addr = llvm::ConstantExpr::getAddrSpaceCast(GV, PTy); + if (AS != ExpectedAS) { + Addr = getTargetCodeGenInfo().performAddrSpaceCast( + *this, GV, AS, ExpectedAS, + LTy->getPointerTo(getContext().getTargetAddressSpace(ExpectedAS))); } setStaticLocalDeclAddress(&D, Addr); @@ -305,7 +308,8 @@ static bool hasNontrivialDestruction(QualType T) { llvm::GlobalVariable * CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, llvm::GlobalVariable *GV) { - llvm::Constant *Init = CGM.EmitConstantInit(D, this); + ConstantEmitter emitter(*this); + llvm::Constant *Init = emitter.tryEmitForInitializer(D); // If constant emission failed, then this should be a C++ static // initializer. @@ -353,6 +357,8 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, GV->setConstant(CGM.isTypeConstant(D.getType(), true)); GV->setInitializer(Init); + emitter.finalize(GV); + if (hasNontrivialDestruction(D.getType()) && HaveInsertPoint()) { // We have a constant initializer, but a nontrivial destructor. We still // need to perform a guarded "initialization" in order to register the @@ -402,6 +408,13 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, if (D.hasAttr<AnnotateAttr>()) CGM.AddGlobalAnnotations(&D, var); + if (auto *SA = D.getAttr<PragmaClangBSSSectionAttr>()) + var->addAttribute("bss-section", SA->getName()); + if (auto *SA = D.getAttr<PragmaClangDataSectionAttr>()) + var->addAttribute("data-section", SA->getName()); + if (auto *SA = D.getAttr<PragmaClangRodataSectionAttr>()) + var->addAttribute("rodata-section", SA->getName()); + if (const SectionAttr *SA = D.getAttr<SectionAttr>()) var->setSection(SA->getName()); @@ -685,11 +698,10 @@ void CodeGenFunction::EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, // hand side must be nonnull. SanitizerScope SanScope(this); llvm::Value *IsNotNull = Builder.CreateIsNotNull(RHS); - // FIXME: The runtime shouldn't refer to a 'reference'. llvm::Constant *StaticData[] = { EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(LHS.getType()), - llvm::ConstantInt::get(Int8Ty, 1), - llvm::ConstantInt::get(Int8Ty, TCK_ReferenceBinding)}; + llvm::ConstantInt::get(Int8Ty, 0), // The LogAlignment info is unused. + llvm::ConstantInt::get(Int8Ty, TCK_NonnullAssign)}; EmitCheck({{IsNotNull, SanitizerKind::NullabilityAssign}}, SanitizerHandler::TypeMismatch, StaticData, RHS); } @@ -925,7 +937,7 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size, return nullptr; llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size); - Addr = Builder.CreateBitCast(Addr, Int8PtrTy); + Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy); llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {SizeV, Addr}); C->setDoesNotThrow(); @@ -933,7 +945,7 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size, } void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { - Addr = Builder.CreateBitCast(Addr, Int8PtrTy); + Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy); llvm::CallInst *C = Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr}); C->setDoesNotThrow(); @@ -944,6 +956,9 @@ void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { CodeGenFunction::AutoVarEmission CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { QualType Ty = D.getType(); + assert( + Ty.getAddressSpace() == LangAS::Default || + (Ty.getAddressSpace() == LangAS::opencl_private && getLangOpts().OpenCL)); AutoVarEmission emission(D); @@ -1036,8 +1051,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Create the alloca. Note that we set the name separately from // building the instruction so that it's there even in no-asserts // builds. - address = CreateTempAlloca(allocaTy, allocaAlignment); - address.getPointer()->setName(D.getName()); + address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName()); // Don't emit lifetime markers for MSVC catch parameters. The lifetime of // the catch parameter starts in the catchpad instruction, and we can't @@ -1097,10 +1111,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { llvm::Type *llvmTy = ConvertTypeForMem(elementType); // Allocate memory for the array. - llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla"); - vla->setAlignment(alignment.getQuantity()); - - address = Address(vla, alignment); + address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount); } setAddrOfLocalVar(&D, address); @@ -1119,6 +1130,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { if (D.hasAttr<AnnotateAttr>()) EmitVarAnnotations(&D, address.getPointer()); + // Make sure we call @llvm.lifetime.end. + if (emission.useLifetimeMarkers()) + EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, + emission.getAllocatedAddress(), + emission.getSizeForLifetimeMarkers()); + return emission; } @@ -1225,7 +1242,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { llvm::Constant *constant = nullptr; if (emission.IsConstantAggregate || D.isConstexpr()) { assert(!capturedByInit && "constant init contains a capturing block?"); - constant = CGM.EmitConstantInit(D, this); + constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D); } if (!constant) { @@ -1249,7 +1266,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { llvm::ConstantInt::get(IntPtrTy, getContext().getTypeSizeInChars(type).getQuantity()); - llvm::Type *BP = Int8PtrTy; + llvm::Type *BP = AllocaInt8PtrTy; if (Loc.getType() != BP) Loc = Builder.CreateBitCast(Loc, BP); @@ -1409,13 +1426,6 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { const VarDecl &D = *emission.Variable; - // Make sure we call @llvm.lifetime.end. This needs to happen - // *last*, so the cleanup needs to be pushed *first*. - if (emission.useLifetimeMarkers()) - EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, - emission.getAllocatedAddress(), - emission.getSizeForLifetimeMarkers()); - // Check the type for a cleanup. if (QualType::DestructionKind dtorKind = D.getType().isDestructedType()) emitAutoVarTypeCleanup(emission, dtorKind); @@ -1727,17 +1737,19 @@ void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin, /// Lazily declare the @llvm.lifetime.start intrinsic. llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() { - if (LifetimeStartFn) return LifetimeStartFn; + if (LifetimeStartFn) + return LifetimeStartFn; LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(), - llvm::Intrinsic::lifetime_start); + llvm::Intrinsic::lifetime_start, AllocaInt8PtrTy); return LifetimeStartFn; } /// Lazily declare the @llvm.lifetime.end intrinsic. llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() { - if (LifetimeEndFn) return LifetimeEndFn; + if (LifetimeEndFn) + return LifetimeEndFn; LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(), - llvm::Intrinsic::lifetime_end); + llvm::Intrinsic::lifetime_end, AllocaInt8PtrTy); return LifetimeEndFn; } @@ -1852,6 +1864,10 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, lt = Qualifiers::OCL_ExplicitNone; } + // Load objects passed indirectly. + if (Arg.isIndirect() && !ArgVal) + ArgVal = Builder.CreateLoad(DeclPtr); + if (lt == Qualifiers::OCL_Strong) { if (!isConsumed) { if (CGM.getCodeGenOpts().OptimizationLevel == 0) { diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index f56e182169..e3437eb936 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -18,6 +18,7 @@ #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Support/Path.h" using namespace clang; @@ -237,7 +238,7 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, llvm::FunctionType::get(IntTy, dtorStub->getType(), false); llvm::Constant *atexit = - CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeSet(), + CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(), /*Local=*/true); if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit)) atexitFn->setDoesNotThrow(); @@ -259,6 +260,43 @@ void CodeGenFunction::EmitCXXGuardedInit(const VarDecl &D, CGM.getCXXABI().EmitGuardedInit(*this, D, DeclPtr, PerformInit); } +void CodeGenFunction::EmitCXXGuardedInitBranch(llvm::Value *NeedsInit, + llvm::BasicBlock *InitBlock, + llvm::BasicBlock *NoInitBlock, + GuardKind Kind, + const VarDecl *D) { + assert((Kind == GuardKind::TlsGuard || D) && "no guarded variable"); + + // A guess at how many times we will enter the initialization of a + // variable, depending on the kind of variable. + static const uint64_t InitsPerTLSVar = 1024; + static const uint64_t InitsPerLocalVar = 1024 * 1024; + + llvm::MDNode *Weights; + if (Kind == GuardKind::VariableGuard && !D->isLocalVarDecl()) { + // For non-local variables, don't apply any weighting for now. Due to our + // use of COMDATs, we expect there to be at most one initialization of the + // variable per DSO, but we have no way to know how many DSOs will try to + // initialize the variable. + Weights = nullptr; + } else { + uint64_t NumInits; + // FIXME: For the TLS case, collect and use profiling information to + // determine a more accurate brach weight. + if (Kind == GuardKind::TlsGuard || D->getTLSKind()) + NumInits = InitsPerTLSVar; + else + NumInits = InitsPerLocalVar; + + // The probability of us entering the initializer is + // 1 / (total number of times we attempt to initialize the variable). + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); + Weights = MDHelper.createBranchWeights(1, NumInits - 1); + } + + Builder.CreateCondBr(NeedsInit, InitBlock, NoInitBlock, Weights); +} + llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( llvm::FunctionType *FTy, const Twine &Name, const CGFunctionInfo &FI, SourceLocation Loc, bool TLS) { @@ -278,17 +316,25 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( if (!getLangOpts().Exceptions) Fn->setDoesNotThrow(); - if (!isInSanitizerBlacklist(Fn, Loc)) { - if (getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | - SanitizerKind::KernelAddress)) - Fn->addFnAttr(llvm::Attribute::SanitizeAddress); - if (getLangOpts().Sanitize.has(SanitizerKind::Thread)) - Fn->addFnAttr(llvm::Attribute::SanitizeThread); - if (getLangOpts().Sanitize.has(SanitizerKind::Memory)) - Fn->addFnAttr(llvm::Attribute::SanitizeMemory); - if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack)) - Fn->addFnAttr(llvm::Attribute::SafeStack); - } + if (getLangOpts().Sanitize.has(SanitizerKind::Address) && + !isInSanitizerBlacklist(SanitizerKind::Address, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeAddress); + + if (getLangOpts().Sanitize.has(SanitizerKind::KernelAddress) && + !isInSanitizerBlacklist(SanitizerKind::KernelAddress, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeAddress); + + if (getLangOpts().Sanitize.has(SanitizerKind::Thread) && + !isInSanitizerBlacklist(SanitizerKind::Thread, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeThread); + + if (getLangOpts().Sanitize.has(SanitizerKind::Memory) && + !isInSanitizerBlacklist(SanitizerKind::Memory, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeMemory); + + if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack) && + !isInSanitizerBlacklist(SanitizerKind::SafeStack, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SafeStack); return Fn; } @@ -449,16 +495,12 @@ CodeGenModule::EmitCXXGlobalInitFunc() { PrioritizedCXXGlobalInits.clear(); } - SmallString<128> FileName; - SourceManager &SM = Context.getSourceManager(); - if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - // Include the filename in the symbol name. Including "sub_" matches gcc and - // makes sure these symbols appear lexicographically behind the symbols with - // priority emitted above. - FileName = llvm::sys::path::filename(MainFile->getName()); - } else { + // Include the filename in the symbol name. Including "sub_" matches gcc and + // makes sure these symbols appear lexicographically behind the symbols with + // priority emitted above. + SmallString<128> FileName = llvm::sys::path::filename(getModule().getName()); + if (FileName.empty()) FileName = "<null>"; - } for (size_t i = 0; i < FileName.size(); ++i) { // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens @@ -539,7 +581,8 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, "guard.uninitialized"); llvm::BasicBlock *InitBlock = createBasicBlock("init"); ExitBlock = createBasicBlock("exit"); - Builder.CreateCondBr(Uninit, InitBlock, ExitBlock); + EmitCXXGuardedInitBranch(Uninit, InitBlock, ExitBlock, + GuardKind::TlsGuard, nullptr); EmitBlock(InitBlock); // Mark as initialized before initializing anything else. If the // initializers use previously-initialized thread_local vars, that's @@ -571,9 +614,10 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, FinishFunction(); } -void CodeGenFunction::GenerateCXXGlobalDtorsFunc(llvm::Function *Fn, - const std::vector<std::pair<llvm::WeakVH, llvm::Constant*> > - &DtorsAndObjects) { +void CodeGenFunction::GenerateCXXGlobalDtorsFunc( + llvm::Function *Fn, + const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> + &DtorsAndObjects) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, @@ -602,9 +646,9 @@ llvm::Function *CodeGenFunction::generateDestroyHelper( Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray, const VarDecl *VD) { FunctionArgList args; - ImplicitParamDecl dst(getContext(), nullptr, SourceLocation(), nullptr, - getContext().VoidPtrTy); - args.push_back(&dst); + ImplicitParamDecl Dst(getContext(), getContext().VoidPtrTy, + ImplicitParamDecl::Other); + args.push_back(&Dst); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args); diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 8f999d175c..6a421e81df 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -15,6 +15,7 @@ #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGObjCRuntime.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/Mangle.h" #include "clang/AST/StmtCXX.h" @@ -164,26 +165,27 @@ static const EHPersonality &getCXXPersonality(const llvm::Triple &T, static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T, const LangOptions &L) { switch (L.ObjCRuntime.getKind()) { + // In the fragile ABI, just use C++ exception handling and hope + // they're not doing crazy exception mixing. + case ObjCRuntime::FragileMacOSX: + return getCXXPersonality(T, L); + // The ObjC personality defers to the C++ personality for non-ObjC // handlers. Unlike the C++ case, we use the same personality // function on targets using (backend-driven) SJLJ EH. case ObjCRuntime::MacOSX: case ObjCRuntime::iOS: case ObjCRuntime::WatchOS: - return EHPersonality::NeXT_ObjC; + return getObjCPersonality(T, L); - // In the fragile ABI, just use C++ exception handling and hope - // they're not doing crazy exception mixing. - case ObjCRuntime::FragileMacOSX: - return getCXXPersonality(T, L); + case ObjCRuntime::GNUstep: + return EHPersonality::GNU_ObjCXX; // The GCC runtime's personality function inherently doesn't support - // mixed EH. Use the C++ personality just to avoid returning null. + // mixed EH. Use the ObjC personality just to avoid returning null. case ObjCRuntime::GCC: - case ObjCRuntime::ObjFW: // XXX: this will change soon - return EHPersonality::GNU_ObjC; - case ObjCRuntime::GNUstep: - return EHPersonality::GNU_ObjCXX; + case ObjCRuntime::ObjFW: + return getObjCPersonality(T, L); } llvm_unreachable("bad runtime kind"); } @@ -224,14 +226,19 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM, } const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) { - return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(CGF.CurCodeDecl)); + const auto *FD = CGF.CurCodeDecl; + // For outlined finallys and filters, use the SEH personality in case they + // contain more SEH. This mostly only affects finallys. Filters could + // hypothetically use gnu statement expressions to sneak in nested SEH. + FD = FD ? FD : CGF.CurSEHParent; + return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(FD)); } static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, true), Personality.PersonalityFn, - llvm::AttributeSet(), /*Local=*/true); + llvm::AttributeList(), /*Local=*/true); } static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, @@ -765,8 +772,8 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() { llvm::BasicBlock *lpad = createBasicBlock("lpad"); EmitBlock(lpad); - llvm::LandingPadInst *LPadInst = Builder.CreateLandingPad( - llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0); + llvm::LandingPadInst *LPadInst = + Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty), 0); llvm::Value *LPadExn = Builder.CreateExtractValue(LPadInst, 0); Builder.CreateStore(LPadExn, getExceptionSlot()); @@ -1310,8 +1317,8 @@ llvm::BasicBlock *CodeGenFunction::getTerminateLandingPad() { if (!CurFn->hasPersonalityFn()) CurFn->setPersonalityFn(getOpaquePersonalityFn(CGM, Personality)); - llvm::LandingPadInst *LPadInst = Builder.CreateLandingPad( - llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0); + llvm::LandingPadInst *LPadInst = + Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty), 0); LPadInst->addClause(getCatchAllValue(*this)); llvm::Value *Exn = nullptr; @@ -1387,8 +1394,7 @@ llvm::BasicBlock *CodeGenFunction::getEHResumeBlock(bool isCleanup) { llvm::Value *Exn = getExceptionFromSlot(); llvm::Value *Sel = getSelectorFromSlot(); - llvm::Type *LPadType = llvm::StructType::get(Exn->getType(), - Sel->getType(), nullptr); + llvm::Type *LPadType = llvm::StructType::get(Exn->getType(), Sel->getType()); llvm::Value *LPadVal = llvm::UndefValue::get(LPadType); LPadVal = Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val"); @@ -1650,18 +1656,19 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, // parameters. Win32 filters take no parameters. if (IsFilter) { Args.push_back(ImplicitParamDecl::Create( - getContext(), nullptr, StartLoc, + getContext(), /*DC=*/nullptr, StartLoc, &getContext().Idents.get("exception_pointers"), - getContext().VoidPtrTy)); + getContext().VoidPtrTy, ImplicitParamDecl::Other)); } else { Args.push_back(ImplicitParamDecl::Create( - getContext(), nullptr, StartLoc, + getContext(), /*DC=*/nullptr, StartLoc, &getContext().Idents.get("abnormal_termination"), - getContext().UnsignedCharTy)); + getContext().UnsignedCharTy, ImplicitParamDecl::Other)); } Args.push_back(ImplicitParamDecl::Create( - getContext(), nullptr, StartLoc, - &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy)); + getContext(), /*DC=*/nullptr, StartLoc, + &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy, + ImplicitParamDecl::Other)); } QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy; @@ -1747,7 +1754,7 @@ void CodeGenFunction::EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF, // }; // int exceptioncode = exception_pointers->ExceptionRecord->ExceptionCode; llvm::Type *RecordTy = CGM.Int32Ty->getPointerTo(); - llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy, nullptr); + llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy); llvm::Value *Ptrs = Builder.CreateBitCast(SEHInfo, PtrsTy->getPointerTo()); llvm::Value *Rec = Builder.CreateStructGEP(PtrsTy, Ptrs, 0); Rec = Builder.CreateAlignedLoad(Rec, getPointerAlign()); @@ -1800,7 +1807,8 @@ void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) { // "catch i8* null". We can't do this on x86 because the filter has to save // the exception code. llvm::Constant *C = - CGM.EmitConstantExpr(Except->getFilterExpr(), getContext().IntTy, this); + ConstantEmitter(*this).tryEmitAbstract(Except->getFilterExpr(), + getContext().IntTy); if (CGM.getTarget().getTriple().getArch() != llvm::Triple::x86 && C && C->isOneValue()) { CatchScope->setCatchAllHandler(0, createBasicBlock("__except")); diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index ab4448bf3c..88116f7d81 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -20,6 +20,7 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" @@ -48,7 +49,7 @@ using namespace CodeGen; llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) { unsigned addressSpace = - cast<llvm::PointerType>(value->getType())->getAddressSpace(); + cast<llvm::PointerType>(value->getType())->getAddressSpace(); llvm::PointerType *destType = Int8PtrTy; if (addressSpace) @@ -61,17 +62,42 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) { /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, - const Twine &Name) { - auto Alloca = CreateTempAlloca(Ty, Name); + const Twine &Name, + llvm::Value *ArraySize, + bool CastToDefaultAddrSpace) { + auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); Alloca->setAlignment(Align.getQuantity()); - return Address(Alloca, Align); -} - -/// CreateTempAlloca - This creates a alloca and inserts it into the entry -/// block. + llvm::Value *V = Alloca; + // Alloca always returns a pointer in alloca address space, which may + // be different from the type defined by the language. For example, + // in C++ the auto variables are in the default address space. Therefore + // cast alloca to the default address space when necessary. + if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) { + auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); + llvm::IRBuilderBase::InsertPointGuard IPG(Builder); + // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt, + // otherwise alloca is inserted at the current insertion point of the + // builder. + if (!ArraySize) + Builder.SetInsertPoint(AllocaInsertPt); + V = getTargetHooks().performAddrSpaceCast( + *this, V, getASTAllocaAddressSpace(), LangAS::Default, + Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); + } + + return Address(V, Align); +} + +/// CreateTempAlloca - This creates an alloca and inserts it into the entry +/// block if \p ArraySize is nullptr, otherwise inserts it at the current +/// insertion point of the builder. llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, - const Twine &Name) { - return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt); + const Twine &Name, + llvm::Value *ArraySize) { + if (ArraySize) + return Builder.CreateAlloca(Ty, ArraySize, Name); + return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(), + ArraySize, Name, AllocaInsertPt); } /// CreateDefaultAlignTempAlloca - This creates an alloca with the @@ -98,14 +124,18 @@ Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) { return CreateTempAlloca(ConvertType(Ty), Align, Name); } -Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name) { +Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name, + bool CastToDefaultAddrSpace) { // FIXME: Should we prefer the preferred type alignment here? - return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name); + return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, + CastToDefaultAddrSpace); } Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align, - const Twine &Name) { - return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name); + const Twine &Name, + bool CastToDefaultAddrSpace) { + return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr, + CastToDefaultAddrSpace); } /// EvaluateExprAsBool - Perform the usual unary conversions on the specified @@ -315,9 +345,10 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M, } } -static Address -createReferenceTemporary(CodeGenFunction &CGF, - const MaterializeTemporaryExpr *M, const Expr *Inner) { +static Address createReferenceTemporary(CodeGenFunction &CGF, + const MaterializeTemporaryExpr *M, + const Expr *Inner) { + auto &TCG = CGF.getTargetHooks(); switch (M->getStorageDuration()) { case SD_FullExpression: case SD_Automatic: { @@ -329,14 +360,25 @@ createReferenceTemporary(CodeGenFunction &CGF, if (CGF.CGM.getCodeGenOpts().MergeAllConstants && (Ty->isArrayType() || Ty->isRecordType()) && CGF.CGM.isTypeConstant(Ty, true)) - if (llvm::Constant *Init = CGF.CGM.EmitConstantExpr(Inner, Ty, &CGF)) { - auto *GV = new llvm::GlobalVariable( - CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp"); - CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty); - GV->setAlignment(alignment.getQuantity()); - // FIXME: Should we put the new global into a COMDAT? - return Address(GV, alignment); + if (auto Init = ConstantEmitter(CGF).tryEmitAbstract(Inner, Ty)) { + if (auto AddrSpace = CGF.getTarget().getConstantAddressSpace()) { + auto AS = AddrSpace.getValue(); + auto *GV = new llvm::GlobalVariable( + CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp", nullptr, + llvm::GlobalValue::NotThreadLocal, + CGF.getContext().getTargetAddressSpace(AS)); + CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty); + GV->setAlignment(alignment.getQuantity()); + llvm::Constant *C = GV; + if (AS != LangAS::Default) + C = TCG.performAddrSpaceCast( + CGF.CGM, GV, AS, LangAS::Default, + GV->getValueType()->getPointerTo( + CGF.getContext().getTargetAddressSpace(LangAS::Default))); + // FIXME: Should we put the new global into a COMDAT? + return Address(C, alignment); + } } return CGF.CreateMemTemp(Ty, "ref.tmp"); } @@ -415,9 +457,11 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { // Create and initialize the reference temporary. Address Object = createReferenceTemporary(*this, M, E); - if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object.getPointer())) { + if (auto *Var = dyn_cast<llvm::GlobalVariable>( + Object.getPointer()->stripPointerCasts())) { Object = Address(llvm::ConstantExpr::getBitCast( - Var, ConvertTypeForMem(E->getType())->getPointerTo()), + cast<llvm::Constant>(Object.getPointer()), + ConvertTypeForMem(E->getType())->getPointerTo()), Object.getAlignment()); // If the temporary is a global and has a constant initializer or is a // constant temporary that we promoted to a global, we may have already @@ -463,8 +507,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { break; case SubobjectAdjustment::FieldAdjustment: { - LValue LV = MakeAddrLValue(Object, E->getType(), - AlignmentSource::Decl); + LValue LV = MakeAddrLValue(Object, E->getType(), AlignmentSource::Decl); LV = EmitLValueForField(LV, Adjustment.Field); assert(LV.isSimple() && "materialized temporary field is not a simple lvalue"); @@ -525,6 +568,19 @@ static llvm::Value *emitHash16Bytes(CGBuilderTy &Builder, llvm::Value *Low, return Builder.CreateMul(B1, KMul); } +bool CodeGenFunction::isNullPointerAllowed(TypeCheckKind TCK) { + return TCK == TCK_DowncastPointer || TCK == TCK_Upcast || + TCK == TCK_UpcastToVirtualBase; +} + +bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) { + CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); + return (RD && RD->hasDefinition() && RD->isDynamicClass()) && + (TCK == TCK_MemberAccess || TCK == TCK_MemberCall || + TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference || + TCK == TCK_UpcastToVirtualBase); +} + bool CodeGenFunction::sanitizePerformTypeCheck() const { return SanOpts.has(SanitizerKind::Null) | SanOpts.has(SanitizerKind::Alignment) | @@ -545,27 +601,48 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, if (Ptr->getType()->getPointerAddressSpace()) return; + // Don't check pointers to volatile data. The behavior here is implementation- + // defined. + if (Ty.isVolatileQualified()) + return; + SanitizerScope SanScope(this); SmallVector<std::pair<llvm::Value *, SanitizerMask>, 3> Checks; llvm::BasicBlock *Done = nullptr; - bool AllowNullPointers = TCK == TCK_DowncastPointer || TCK == TCK_Upcast || - TCK == TCK_UpcastToVirtualBase; + // Quickly determine whether we have a pointer to an alloca. It's possible + // to skip null checks, and some alignment checks, for these pointers. This + // can reduce compile-time significantly. + auto PtrToAlloca = + dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCastsNoFollowAliases()); + + llvm::Value *True = llvm::ConstantInt::getTrue(getLLVMContext()); + llvm::Value *IsNonNull = nullptr; + bool IsGuaranteedNonNull = + SkippedChecks.has(SanitizerKind::Null) || PtrToAlloca; + bool AllowNullPointers = isNullPointerAllowed(TCK); if ((SanOpts.has(SanitizerKind::Null) || AllowNullPointers) && - !SkippedChecks.has(SanitizerKind::Null)) { + !IsGuaranteedNonNull) { // The glvalue must not be an empty glvalue. - llvm::Value *IsNonNull = Builder.CreateIsNotNull(Ptr); - - if (AllowNullPointers) { - // When performing pointer casts, it's OK if the value is null. - // Skip the remaining checks in that case. - Done = createBasicBlock("null"); - llvm::BasicBlock *Rest = createBasicBlock("not.null"); - Builder.CreateCondBr(IsNonNull, Rest, Done); - EmitBlock(Rest); - } else { - Checks.push_back(std::make_pair(IsNonNull, SanitizerKind::Null)); + IsNonNull = Builder.CreateIsNotNull(Ptr); + + // The IR builder can constant-fold the null check if the pointer points to + // a constant. + IsGuaranteedNonNull = IsNonNull == True; + + // Skip the null check if the pointer is known to be non-null. + if (!IsGuaranteedNonNull) { + if (AllowNullPointers) { + // When performing pointer casts, it's OK if the value is null. + // Skip the remaining checks in that case. + Done = createBasicBlock("null"); + llvm::BasicBlock *Rest = createBasicBlock("not.null"); + Builder.CreateCondBr(IsNonNull, Rest, Done); + EmitBlock(Rest); + } else { + Checks.push_back(std::make_pair(IsNonNull, SanitizerKind::Null)); + } } } @@ -581,14 +658,16 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy }; llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys); llvm::Value *Min = Builder.getFalse(); + llvm::Value *NullIsUnknown = Builder.getFalse(); llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy); - llvm::Value *LargeEnough = - Builder.CreateICmpUGE(Builder.CreateCall(F, {CastAddr, Min}), - llvm::ConstantInt::get(IntPtrTy, Size)); + llvm::Value *LargeEnough = Builder.CreateICmpUGE( + Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown}), + llvm::ConstantInt::get(IntPtrTy, Size)); Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize)); } uint64_t AlignVal = 0; + llvm::Value *PtrAsInt = nullptr; if (SanOpts.has(SanitizerKind::Alignment) && !SkippedChecks.has(SanitizerKind::Alignment)) { @@ -597,13 +676,15 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity(); // The glvalue must be suitably aligned. - if (AlignVal) { - llvm::Value *Align = - Builder.CreateAnd(Builder.CreatePtrToInt(Ptr, IntPtrTy), - llvm::ConstantInt::get(IntPtrTy, AlignVal - 1)); + if (AlignVal > 1 && + (!PtrToAlloca || PtrToAlloca->getAlignment() < AlignVal)) { + PtrAsInt = Builder.CreatePtrToInt(Ptr, IntPtrTy); + llvm::Value *Align = Builder.CreateAnd( + PtrAsInt, llvm::ConstantInt::get(IntPtrTy, AlignVal - 1)); llvm::Value *Aligned = - Builder.CreateICmpEQ(Align, llvm::ConstantInt::get(IntPtrTy, 0)); - Checks.push_back(std::make_pair(Aligned, SanitizerKind::Alignment)); + Builder.CreateICmpEQ(Align, llvm::ConstantInt::get(IntPtrTy, 0)); + if (Aligned != True) + Checks.push_back(std::make_pair(Aligned, SanitizerKind::Alignment)); } } @@ -615,7 +696,8 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(Ty), llvm::ConstantInt::get(Int8Ty, AlignVal ? llvm::Log2_64(AlignVal) : 1), llvm::ConstantInt::get(Int8Ty, TCK)}; - EmitCheck(Checks, SanitizerHandler::TypeMismatch, StaticData, Ptr); + EmitCheck(Checks, SanitizerHandler::TypeMismatch, StaticData, + PtrAsInt ? PtrAsInt : Ptr); } // If possible, check that the vptr indicates that there is a subobject of @@ -626,13 +708,20 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // The program has undefined behavior if: // -- the [pointer or glvalue] is used to access a non-static data member // or call a non-static member function - CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); if (SanOpts.has(SanitizerKind::Vptr) && - !SkippedChecks.has(SanitizerKind::Vptr) && - (TCK == TCK_MemberAccess || TCK == TCK_MemberCall || - TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference || - TCK == TCK_UpcastToVirtualBase) && - RD && RD->hasDefinition() && RD->isDynamicClass()) { + !SkippedChecks.has(SanitizerKind::Vptr) && isVptrCheckRequired(TCK, Ty)) { + // Ensure that the pointer is non-null before loading it. If there is no + // compile-time guarantee, reuse the run-time null check or emit a new one. + if (!IsGuaranteedNonNull) { + if (!IsNonNull) + IsNonNull = Builder.CreateIsNotNull(Ptr); + if (!Done) + Done = createBasicBlock("vptr.null"); + llvm::BasicBlock *VptrNotNull = createBasicBlock("vptr.not.null"); + Builder.CreateCondBr(IsNonNull, VptrNotNull, Done); + EmitBlock(VptrNotNull); + } + // Compute a hash of the mangled name of the type. // // FIXME: This is not guaranteed to be deterministic! Move to a @@ -645,7 +734,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // Blacklist based on the mangled type. if (!CGM.getContext().getSanitizerBlacklist().isBlacklistedType( - Out.str())) { + SanitizerKind::Vptr, Out.str())) { llvm::hash_code TypeHash = hash_value(Out.str()); // Load the vptr, and compute hash_16_bytes(TypeHash, vptr). @@ -830,7 +919,8 @@ void CodeGenModule::EmitExplicitCastExprType(const ExplicitCastExpr *E, /// EmitPointerWithAlignment - Given an expression of pointer type, try to /// derive a more accurate bound on the alignment of the pointer. Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, - AlignmentSource *Source) { + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { // We allow this with ObjC object pointers because of fragile ABIs. assert(E->getType()->isPointerType() || E->getType()->isObjCObjectPointerType()); @@ -845,20 +935,35 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // Non-converting casts (but not C's implicit conversion from void*). case CK_BitCast: case CK_NoOp: + case CK_AddressSpaceConversion: if (auto PtrTy = CE->getSubExpr()->getType()->getAs<PointerType>()) { if (PtrTy->getPointeeType()->isVoidType()) break; - AlignmentSource InnerSource; - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), &InnerSource); - if (Source) *Source = InnerSource; - - // If this is an explicit bitcast, and the source l-value is - // opaque, honor the alignment of the casted-to type. - if (isa<ExplicitCastExpr>(CE) && - InnerSource != AlignmentSource::Decl) { - Addr = Address(Addr.getPointer(), - getNaturalPointeeTypeAlignment(E->getType(), Source)); + LValueBaseInfo InnerBaseInfo; + TBAAAccessInfo InnerTBAAInfo; + Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), + &InnerBaseInfo, + &InnerTBAAInfo); + if (BaseInfo) *BaseInfo = InnerBaseInfo; + if (TBAAInfo) *TBAAInfo = InnerTBAAInfo; + + if (isa<ExplicitCastExpr>(CE)) { + LValueBaseInfo TargetTypeBaseInfo; + TBAAAccessInfo TargetTypeTBAAInfo; + CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), + &TargetTypeBaseInfo, + &TargetTypeTBAAInfo); + if (TBAAInfo) + *TBAAInfo = CGM.mergeTBAAInfoForCast(*TBAAInfo, + TargetTypeTBAAInfo); + // If the source l-value is opaque, honor the alignment of the + // casted-to type. + if (InnerBaseInfo.getAlignmentSource() != AlignmentSource::Decl) { + if (BaseInfo) + BaseInfo->mergeForCast(TargetTypeBaseInfo); + Addr = Address(Addr.getPointer(), Align); + } } if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) && @@ -869,19 +974,22 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, CodeGenFunction::CFITCK_UnrelatedCast, CE->getLocStart()); } - - return Builder.CreateBitCast(Addr, ConvertType(E->getType())); + return CE->getCastKind() != CK_AddressSpaceConversion + ? Builder.CreateBitCast(Addr, ConvertType(E->getType())) + : Builder.CreateAddrSpaceCast(Addr, + ConvertType(E->getType())); } break; // Array-to-pointer decay. case CK_ArrayToPointerDecay: - return EmitArrayToPointerDecay(CE->getSubExpr(), Source); + return EmitArrayToPointerDecay(CE->getSubExpr(), BaseInfo, TBAAInfo); // Derived-to-base conversions. case CK_UncheckedDerivedToBase: case CK_DerivedToBase: { - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), Source); + Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo, + TBAAInfo); auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl(); return GetAddressOfBaseClass(Addr, Derived, CE->path_begin(), CE->path_end(), @@ -900,7 +1008,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) { if (UO->getOpcode() == UO_AddrOf) { LValue LV = EmitLValue(UO->getSubExpr()); - if (Source) *Source = LV.getAlignmentSource(); + if (BaseInfo) *BaseInfo = LV.getBaseInfo(); + if (TBAAInfo) *TBAAInfo = LV.getTBAAInfo(); return LV.getAddress(); } } @@ -908,7 +1017,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // TODO: conditional operators, comma. // Otherwise, use the alignment of the type. - CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), Source); + CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), BaseInfo, + TBAAInfo); return Address(EmitScalarExpr(E), Align); } @@ -952,10 +1062,7 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E, E->getType()); } -bool CodeGenFunction::IsDeclRefOrWrappedCXXThis(const Expr *Obj) { - if (isa<DeclRefExpr>(Obj)) - return true; - +bool CodeGenFunction::IsWrappedCXXThis(const Expr *Obj) { const Expr *Base = Obj; while (!isa<CXXThisExpr>(Base)) { // The result of a dynamic_cast can be null. @@ -986,9 +1093,13 @@ LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) { LV = EmitLValue(E); if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple()) { SanitizerSet SkippedChecks; - if (const auto *ME = dyn_cast<MemberExpr>(E)) - if (IsDeclRefOrWrappedCXXThis(ME->getBase())) + if (const auto *ME = dyn_cast<MemberExpr>(E)) { + bool IsBaseCXXThis = IsWrappedCXXThis(ME->getBase()); + if (IsBaseCXXThis) + SkippedChecks.set(SanitizerKind::Alignment, true); + if (IsBaseCXXThis || isa<DeclRefExpr>(ME->getBase())) SkippedChecks.set(SanitizerKind::Null, true); + } EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(), E->getType(), LV.getAlignment(), SkippedChecks); } @@ -1076,8 +1187,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { llvm::Value *V = LV.getPointer(); Scope.ForceCleanup({&V}); return LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(), - getContext(), LV.getAlignmentSource(), - LV.getTBAAInfo()); + getContext(), LV.getBaseInfo(), LV.getTBAAInfo()); } // FIXME: Is it possible to create an ExprWithCleanups that produces a // bitfield lvalue or some other non-simple lvalue? @@ -1133,6 +1243,11 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::MaterializeTemporaryExprClass: return EmitMaterializeTemporaryExpr(cast<MaterializeTemporaryExpr>(E)); + + case Expr::CoawaitExprClass: + return EmitCoawaitLValue(cast<CoawaitExpr>(E)); + case Expr::CoyieldExprClass: + return EmitCoyieldLValue(cast<CoyieldExpr>(E)); } } @@ -1229,7 +1344,8 @@ CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) { return ConstantEmission(); // Emit as a constant. - llvm::Constant *C = CGM.EmitConstantValue(result.Val, resultType, this); + auto C = ConstantEmitter(*this).emitAbstract(refExpr->getLocation(), + result.Val, resultType); // Make sure we emit a debug reference to the global variable. // This should probably fire even for @@ -1248,13 +1364,30 @@ CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) { return ConstantEmission::forValue(C); } +static DeclRefExpr *tryToConvertMemberExprToDeclRefExpr(CodeGenFunction &CGF, + const MemberExpr *ME) { + if (auto *VD = dyn_cast<VarDecl>(ME->getMemberDecl())) { + // Try to emit static variable member expressions as DREs. + return DeclRefExpr::Create( + CGF.getContext(), NestedNameSpecifierLoc(), SourceLocation(), VD, + /*RefersToEnclosingVariableOrCapture=*/false, ME->getExprLoc(), + ME->getType(), ME->getValueKind()); + } + return nullptr; +} + +CodeGenFunction::ConstantEmission +CodeGenFunction::tryEmitAsConstant(const MemberExpr *ME) { + if (DeclRefExpr *DRE = tryToConvertMemberExprToDeclRefExpr(*this, ME)) + return tryEmitAsConstant(DRE); + return ConstantEmission(); +} + llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue, SourceLocation Loc) { return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), - lvalue.getType(), Loc, lvalue.getAlignmentSource(), - lvalue.getTBAAInfo(), - lvalue.getTBAABaseType(), lvalue.getTBAAOffset(), - lvalue.isNontemporal()); + lvalue.getType(), Loc, lvalue.getBaseInfo(), + lvalue.getTBAAInfo(), lvalue.isNontemporal()); } static bool hasBooleanRepresentation(QualType Ty) { @@ -1338,17 +1471,17 @@ bool CodeGenFunction::EmitScalarRangeCheck(llvm::Value *Value, QualType Ty, if (!getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool)) return true; + auto &Ctx = getLLVMContext(); SanitizerScope SanScope(this); llvm::Value *Check; --End; if (!Min) { - Check = Builder.CreateICmpULE( - Value, llvm::ConstantInt::get(getLLVMContext(), End)); + Check = Builder.CreateICmpULE(Value, llvm::ConstantInt::get(Ctx, End)); } else { - llvm::Value *Upper = Builder.CreateICmpSLE( - Value, llvm::ConstantInt::get(getLLVMContext(), End)); - llvm::Value *Lower = Builder.CreateICmpSGE( - Value, llvm::ConstantInt::get(getLLVMContext(), Min)); + llvm::Value *Upper = + Builder.CreateICmpSLE(Value, llvm::ConstantInt::get(Ctx, End)); + llvm::Value *Lower = + Builder.CreateICmpSGE(Value, llvm::ConstantInt::get(Ctx, Min)); Check = Builder.CreateAnd(Upper, Lower); } llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc), @@ -1363,37 +1496,37 @@ bool CodeGenFunction::EmitScalarRangeCheck(llvm::Value *Value, QualType Ty, llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, - AlignmentSource AlignSource, - llvm::MDNode *TBAAInfo, - QualType TBAABaseType, - uint64_t TBAAOffset, + LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo, bool isNontemporal) { - // For better performance, handle vector loads differently. - if (Ty->isVectorType()) { - const llvm::Type *EltTy = Addr.getElementType(); - - const auto *VTy = cast<llvm::VectorType>(EltTy); - - // Handle vectors of size 3 like size 4 for better performance. - if (VTy->getNumElements() == 3) { - - // Bitcast to vec4 type. - llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(), - 4); - Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4"); - // Now load value. - llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); - - // Shuffle vector to get vec3. - V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty), - {0, 1, 2}, "extractVec"); - return EmitFromMemory(V, Ty); + if (!CGM.getCodeGenOpts().PreserveVec3Type) { + // For better performance, handle vector loads differently. + if (Ty->isVectorType()) { + const llvm::Type *EltTy = Addr.getElementType(); + + const auto *VTy = cast<llvm::VectorType>(EltTy); + + // Handle vectors of size 3 like size 4 for better performance. + if (VTy->getNumElements() == 3) { + + // Bitcast to vec4 type. + llvm::VectorType *vec4Ty = + llvm::VectorType::get(VTy->getElementType(), 4); + Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4"); + // Now load value. + llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); + + // Shuffle vector to get vec3. + V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty), + {0, 1, 2}, "extractVec"); + return EmitFromMemory(V, Ty); + } } } // Atomic operations have to be done on integral types. LValue AtomicLValue = - LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo); + LValue::MakeAddr(Addr, Ty, getContext(), BaseInfo, TBAAInfo); if (Ty->isAtomicType() || LValueIsSuitableForInlineAtomic(AtomicLValue)) { return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal(); } @@ -1404,13 +1537,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, Load->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); } - if (TBAAInfo) { - llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, - TBAAOffset); - if (TBAAPath) - CGM.DecorateInstructionWithTBAA(Load, TBAAPath, - false /*ConvertTypeToTag*/); - } + + CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); if (EmitScalarRangeCheck(Load, Ty, Loc)) { // In order to prevent the optimizer from throwing away the check, don't @@ -1449,37 +1577,35 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) { void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, - AlignmentSource AlignSource, - llvm::MDNode *TBAAInfo, - bool isInit, QualType TBAABaseType, - uint64_t TBAAOffset, - bool isNontemporal) { - - // Handle vectors differently to get better performance. - if (Ty->isVectorType()) { - llvm::Type *SrcTy = Value->getType(); - auto *VecTy = cast<llvm::VectorType>(SrcTy); - // Handle vec3 special. - if (VecTy->getNumElements() == 3) { - // Our source is a vec3, do a shuffle vector to make it a vec4. - llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1), - Builder.getInt32(2), - llvm::UndefValue::get(Builder.getInt32Ty())}; - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); - Value = Builder.CreateShuffleVector(Value, - llvm::UndefValue::get(VecTy), - MaskV, "extractVec"); - SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); - } - if (Addr.getElementType() != SrcTy) { - Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp"); + LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo, + bool isInit, bool isNontemporal) { + if (!CGM.getCodeGenOpts().PreserveVec3Type) { + // Handle vectors differently to get better performance. + if (Ty->isVectorType()) { + llvm::Type *SrcTy = Value->getType(); + auto *VecTy = dyn_cast<llvm::VectorType>(SrcTy); + // Handle vec3 special. + if (VecTy && VecTy->getNumElements() == 3) { + // Our source is a vec3, do a shuffle vector to make it a vec4. + llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1), + Builder.getInt32(2), + llvm::UndefValue::get(Builder.getInt32Ty())}; + llvm::Value *MaskV = llvm::ConstantVector::get(Mask); + Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy), + MaskV, "extractVec"); + SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); + } + if (Addr.getElementType() != SrcTy) { + Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp"); + } } } Value = EmitToMemory(Value, Ty); LValue AtomicLValue = - LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo); + LValue::MakeAddr(Addr, Ty, getContext(), BaseInfo, TBAAInfo); if (Ty->isAtomicType() || (!isInit && LValueIsSuitableForInlineAtomic(AtomicLValue))) { EmitAtomicStore(RValue::get(Value), AtomicLValue, isInit); @@ -1493,21 +1619,15 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, llvm::ConstantAsMetadata::get(Builder.getInt32(1))); Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); } - if (TBAAInfo) { - llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, - TBAAOffset); - if (TBAAPath) - CGM.DecorateInstructionWithTBAA(Store, TBAAPath, - false /*ConvertTypeToTag*/); - } + + CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); } void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, bool isInit) { EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), - lvalue.getType(), lvalue.getAlignmentSource(), - lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(), - lvalue.getTBAAOffset(), lvalue.isNontemporal()); + lvalue.getType(), lvalue.getBaseInfo(), + lvalue.getTBAAInfo(), isInit, lvalue.isNontemporal()); } /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this @@ -2040,35 +2160,45 @@ static LValue EmitThreadPrivateVarDeclLValue( return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } -Address CodeGenFunction::EmitLoadOfReference(Address Addr, - const ReferenceType *RefTy, - AlignmentSource *Source) { - llvm::Value *Ptr = Builder.CreateLoad(Addr); - return Address(Ptr, getNaturalTypeAlignment(RefTy->getPointeeType(), - Source, /*forPointee*/ true)); - +Address +CodeGenFunction::EmitLoadOfReference(LValue RefLVal, + LValueBaseInfo *PointeeBaseInfo, + TBAAAccessInfo *PointeeTBAAInfo) { + llvm::LoadInst *Load = Builder.CreateLoad(RefLVal.getAddress(), + RefLVal.isVolatile()); + CGM.DecorateInstructionWithTBAA(Load, RefLVal.getTBAAInfo()); + + CharUnits Align = getNaturalTypeAlignment(RefLVal.getType()->getPointeeType(), + PointeeBaseInfo, PointeeTBAAInfo, + /* forPointeeType= */ true); + return Address(Load, Align); } -LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr, - const ReferenceType *RefTy) { - AlignmentSource Source; - Address Addr = EmitLoadOfReference(RefAddr, RefTy, &Source); - return MakeAddrLValue(Addr, RefTy->getPointeeType(), Source); +LValue CodeGenFunction::EmitLoadOfReferenceLValue(LValue RefLVal) { + LValueBaseInfo PointeeBaseInfo; + TBAAAccessInfo PointeeTBAAInfo; + Address PointeeAddr = EmitLoadOfReference(RefLVal, &PointeeBaseInfo, + &PointeeTBAAInfo); + return MakeAddrLValue(PointeeAddr, RefLVal.getType()->getPointeeType(), + PointeeBaseInfo, PointeeTBAAInfo); } Address CodeGenFunction::EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, - AlignmentSource *Source) { + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { llvm::Value *Addr = Builder.CreateLoad(Ptr); - return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), Source, + return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), + BaseInfo, TBAAInfo, /*forPointeeType=*/true)); } LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr, const PointerType *PtrTy) { - AlignmentSource Source; - Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &Source); - return MakeAddrLValue(Addr, PtrTy->getPointeeType(), Source); + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &BaseInfo, &TBAAInfo); + return MakeAddrLValue(Addr, PtrTy->getPointeeType(), BaseInfo, TBAAInfo); } static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, @@ -2085,17 +2215,15 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); CharUnits Alignment = CGF.getContext().getDeclAlign(VD); Address Addr(V, Alignment); - LValue LV; // Emit reference to the private copy of the variable if it is an OpenMP // threadprivate variable. if (CGF.getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>()) return EmitThreadPrivateVarDeclLValue(CGF, VD, T, Addr, RealVarTy, E->getExprLoc()); - if (auto RefTy = VD->getType()->getAs<ReferenceType>()) { - LV = CGF.EmitLoadOfReferenceLValue(Addr, RefTy); - } else { - LV = CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); - } + LValue LV = VD->getType()->isReferenceType() ? + CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), + AlignmentSource::Decl) : + CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); setObjCGCLValueClass(CGF.getContext(), E, LV); return LV; } @@ -2128,7 +2256,8 @@ static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF, const Expr *E, const FunctionDecl *FD) { llvm::Value *V = EmitFunctionDeclPointer(CGF.CGM, FD); CharUnits Alignment = CGF.getContext().getDeclAlign(FD); - return CGF.MakeAddrLValue(V, E->getType(), Alignment, AlignmentSource::Decl); + return CGF.MakeAddrLValue(V, E->getType(), Alignment, + AlignmentSource::Decl); } static LValue EmitCapturedFieldLValue(CodeGenFunction &CGF, const FieldDecl *FD, @@ -2183,29 +2312,38 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { VD->isUsableInConstantExpressions(getContext()) && VD->checkInitIsICE() && // Do not emit if it is private OpenMP variable. - !(E->refersToEnclosingVariableOrCapture() && CapturedStmtInfo && - LocalDeclMap.count(VD))) { + !(E->refersToEnclosingVariableOrCapture() && + ((CapturedStmtInfo && + (LocalDeclMap.count(VD->getCanonicalDecl()) || + CapturedStmtInfo->lookup(VD->getCanonicalDecl()))) || + LambdaCaptureFields.lookup(VD->getCanonicalDecl()) || + isa<BlockDecl>(CurCodeDecl)))) { llvm::Constant *Val = - CGM.EmitConstantValue(*VD->evaluateValue(), VD->getType(), this); + ConstantEmitter(*this).emitAbstract(E->getLocation(), + *VD->evaluateValue(), + VD->getType()); assert(Val && "failed to emit reference constant expression"); // FIXME: Eventually we will want to emit vector element references. // Should we be using the alignment of the constant pointer we emitted? - CharUnits Alignment = getNaturalTypeAlignment(E->getType(), nullptr, - /*pointee*/ true); - + CharUnits Alignment = getNaturalTypeAlignment(E->getType(), + /* BaseInfo= */ nullptr, + /* TBAAInfo= */ nullptr, + /* forPointeeType= */ true); return MakeAddrLValue(Address(Val, Alignment), T, AlignmentSource::Decl); } // Check for captured variables. if (E->refersToEnclosingVariableOrCapture()) { + VD = VD->getCanonicalDecl(); if (auto *FD = LambdaCaptureFields.lookup(VD)) return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue); else if (CapturedStmtInfo) { auto I = LocalDeclMap.find(VD); if (I != LocalDeclMap.end()) { - if (auto RefTy = VD->getType()->getAs<ReferenceType>()) - return EmitLoadOfReferenceLValue(I->second, RefTy); + if (VD->getType()->isReferenceType()) + return EmitLoadOfReferenceLValue(I->second, VD->getType(), + AlignmentSource::Decl); return MakeAddrLValue(I->second, T); } LValue CapLVal = @@ -2213,7 +2351,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { CapturedStmtInfo->getContextValue()); return MakeAddrLValue( Address(CapLVal.getPointer(), getContext().getDeclAlign(VD)), - CapLVal.getType(), AlignmentSource::Decl); + CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl), + CapLVal.getTBAAInfo()); } assert(isa<BlockDecl>(CurCodeDecl)); @@ -2274,12 +2413,9 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { } // Drill into reference types. - LValue LV; - if (auto RefTy = VD->getType()->getAs<ReferenceType>()) { - LV = EmitLoadOfReferenceLValue(addr, RefTy); - } else { - LV = MakeAddrLValue(addr, T, AlignmentSource::Decl); - } + LValue LV = VD->getType()->isReferenceType() ? + EmitLoadOfReferenceLValue(addr, VD->getType(), AlignmentSource::Decl) : + MakeAddrLValue(addr, T, AlignmentSource::Decl); bool isLocalStorage = VD->hasLocalStorage(); @@ -2323,9 +2459,11 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { QualType T = E->getSubExpr()->getType()->getPointeeType(); assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type"); - AlignmentSource AlignSource; - Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &AlignSource); - LValue LV = MakeAddrLValue(Addr, T, AlignSource); + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &BaseInfo, + &TBAAInfo); + LValue LV = MakeAddrLValue(Addr, T, BaseInfo, TBAAInfo); LV.getQuals().setAddressSpace(ExprTy.getAddressSpace()); // We should not generate __weak write barrier on indirect reference @@ -2357,7 +2495,8 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { (E->getOpcode() == UO_Real ? emitAddrOfRealComponent(LV.getAddress(), LV.getType()) : emitAddrOfImagComponent(LV.getAddress(), LV.getType())); - LValue ElemLV = MakeAddrLValue(Component, T, LV.getAlignmentSource()); + LValue ElemLV = MakeAddrLValue(Component, T, LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, T)); ElemLV.getQuals().addQualifiers(LV.getQuals()); return ElemLV; } @@ -2467,6 +2606,9 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) { llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) { llvm::Type *TargetTy = IntPtrTy; + if (V->getType() == TargetTy) + return V; + // Floating-point types which fit into intptr_t are bitcast to integers // and then passed directly (after zero-extension, if necessary). if (V->getType()->isFloatingPointTy()) { @@ -2596,13 +2738,16 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, assert(IsFatal || RecoverKind != CheckRecoverableKind::Unrecoverable); bool NeedsAbortSuffix = IsFatal && RecoverKind != CheckRecoverableKind::Unrecoverable; + bool MinimalRuntime = CGF.CGM.getCodeGenOpts().SanitizeMinimalRuntime; const SanitizerHandlerInfo &CheckInfo = SanitizerHandlers[CheckHandler]; const StringRef CheckName = CheckInfo.Name; - std::string FnName = - ("__ubsan_handle_" + CheckName + - (CheckInfo.Version ? "_v" + llvm::utostr(CheckInfo.Version) : "") + - (NeedsAbortSuffix ? "_abort" : "")) - .str(); + std::string FnName = "__ubsan_handle_" + CheckName.str(); + if (CheckInfo.Version && !MinimalRuntime) + FnName += "_v" + llvm::utostr(CheckInfo.Version); + if (MinimalRuntime) + FnName += "_minimal"; + if (NeedsAbortSuffix) + FnName += "_abort"; bool MayReturn = !IsFatal || RecoverKind == CheckRecoverableKind::AlwaysRecoverable; @@ -2615,8 +2760,8 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction( FnType, FnName, - llvm::AttributeSet::get(CGF.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, B), + llvm::AttributeList::get(CGF.getLLVMContext(), + llvm::AttributeList::FunctionIndex, B), /*Local=*/true); llvm::CallInst *HandlerCall = CGF.EmitNounwindRuntimeCall(Fn, FnArgs); if (!MayReturn) { @@ -2689,24 +2834,26 @@ void CodeGenFunction::EmitCheck( // representing operand values. SmallVector<llvm::Value *, 4> Args; SmallVector<llvm::Type *, 4> ArgTypes; - Args.reserve(DynamicArgs.size() + 1); - ArgTypes.reserve(DynamicArgs.size() + 1); - - // Emit handler arguments and create handler function type. - if (!StaticArgs.empty()) { - llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); - auto *InfoPtr = - new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, - llvm::GlobalVariable::PrivateLinkage, Info); - InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); - Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy)); - ArgTypes.push_back(Int8PtrTy); - } + if (!CGM.getCodeGenOpts().SanitizeMinimalRuntime) { + Args.reserve(DynamicArgs.size() + 1); + ArgTypes.reserve(DynamicArgs.size() + 1); + + // Emit handler arguments and create handler function type. + if (!StaticArgs.empty()) { + llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); + auto *InfoPtr = + new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, + llvm::GlobalVariable::PrivateLinkage, Info); + InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); + Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy)); + ArgTypes.push_back(Int8PtrTy); + } - for (size_t i = 0, n = DynamicArgs.size(); i != n; ++i) { - Args.push_back(EmitCheckValue(DynamicArgs[i])); - ArgTypes.push_back(IntPtrTy); + for (size_t i = 0, n = DynamicArgs.size(); i != n; ++i) { + Args.push_back(EmitCheckValue(DynamicArgs[i])); + ArgTypes.push_back(IntPtrTy); + } } llvm::FunctionType *FnType = @@ -2779,6 +2926,24 @@ void CodeGenFunction::EmitCfiSlowPathCheck( EmitBlock(Cont); } +// Emit a stub for __cfi_check function so that the linker knows about this +// symbol in LTO mode. +void CodeGenFunction::EmitCfiCheckStub() { + llvm::Module *M = &CGM.getModule(); + auto &Ctx = M->getContext(); + llvm::Function *F = llvm::Function::Create( + llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false), + llvm::GlobalValue::WeakAnyLinkage, "__cfi_check", M); + llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", F); + // FIXME: consider emitting an intrinsic call like + // call void @llvm.cfi_check(i64 %0, i8* %1, i8* %2) + // which can be lowered in CrossDSOCFI pass to the actual contents of + // __cfi_check. This would allow inlining of __cfi_check calls. + llvm::CallInst::Create( + llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::trap), "", BB); + llvm::ReturnInst::Create(Ctx, nullptr, BB); +} + // This function is basically a switch over the CFI failure kind, which is // extracted from CFICheckFailData (1st function argument). Each case is either // llvm.trap or a call to one of the two runtime handlers, based on @@ -2789,10 +2954,10 @@ void CodeGenFunction::EmitCfiSlowPathCheck( void CodeGenFunction::EmitCfiCheckFail() { SanitizerScope SanScope(this); FunctionArgList Args; - ImplicitParamDecl ArgData(getContext(), nullptr, SourceLocation(), nullptr, - getContext().VoidPtrTy); - ImplicitParamDecl ArgAddr(getContext(), nullptr, SourceLocation(), nullptr, - getContext().VoidPtrTy); + ImplicitParamDecl ArgData(getContext(), getContext().VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl ArgAddr(getContext(), getContext().VoidPtrTy, + ImplicitParamDecl::Other); Args.push_back(&ArgData); Args.push_back(&ArgAddr); @@ -2820,9 +2985,9 @@ void CodeGenFunction::EmitCfiCheckFail() { EmitTrapCheck(DataIsNotNullPtr); llvm::StructType *SourceLocationTy = - llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty, nullptr); + llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty); llvm::StructType *CfiCheckFailDataTy = - llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy, nullptr); + llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy); llvm::Value *V = Builder.CreateConstGEP2_32( CfiCheckFailDataTy, @@ -2891,21 +3056,21 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) { if (!CGM.getCodeGenOpts().TrapFuncName.empty()) { auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name", CGM.getCodeGenOpts().TrapFuncName); - TrapCall->addAttribute(llvm::AttributeSet::FunctionIndex, A); + TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A); } return TrapCall; } Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, - AlignmentSource *AlignSource) { + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { assert(E->getType()->isArrayType() && "Array to pointer decay must have array source type!"); // Expressions of array type can't be bitfields or vector elements. LValue LV = EmitLValue(E); Address Addr = LV.getAddress(); - if (AlignSource) *AlignSource = LV.getAlignmentSource(); // If the array type was an incomplete type, we need to make sure // the decay ends up being the right type. @@ -2920,7 +3085,15 @@ Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, Addr = Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), "arraydecay"); } + // The result of this decay conversion points to an array element within the + // base lvalue. However, since TBAA currently does not support representing + // accesses to elements of member arrays, we conservatively represent accesses + // to the pointee object as if it had no any base lvalue specified. + // TODO: Support TBAA for member arrays. QualType EltType = E->getType()->castAsArrayTypeUnsafe()->getElementType(); + if (BaseInfo) *BaseInfo = LV.getBaseInfo(); + if (TBAAInfo) *TBAAInfo = CGM.getTBAAAccessInfo(EltType); + return Builder.CreateElementBitCast(Addr, ConvertTypeForMem(EltType)); } @@ -2944,9 +3117,13 @@ static llvm::Value *emitArraySubscriptGEP(CodeGenFunction &CGF, llvm::Value *ptr, ArrayRef<llvm::Value*> indices, bool inbounds, + bool signedIndices, + SourceLocation loc, const llvm::Twine &name = "arrayidx") { if (inbounds) { - return CGF.Builder.CreateInBoundsGEP(ptr, indices, name); + return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, + CodeGenFunction::NotSubtraction, loc, + name); } else { return CGF.Builder.CreateGEP(ptr, indices, name); } @@ -2977,8 +3154,9 @@ static QualType getFixedSizeElementType(const ASTContext &ctx, } static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, - ArrayRef<llvm::Value*> indices, + ArrayRef<llvm::Value *> indices, QualType eltType, bool inbounds, + bool signedIndices, SourceLocation loc, const llvm::Twine &name = "arrayidx") { // All the indices except that last must be zero. #ifndef NDEBUG @@ -2998,8 +3176,8 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, CharUnits eltAlign = getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize); - llvm::Value *eltPtr = - emitArraySubscriptGEP(CGF, addr.getPointer(), indices, inbounds, name); + llvm::Value *eltPtr = emitArraySubscriptGEP( + CGF, addr.getPointer(), indices, inbounds, signedIndices, loc, name); return Address(eltPtr, eltAlign); } @@ -3009,6 +3187,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // in lexical order (this complexity is, sadly, required by C++17). llvm::Value *IdxPre = (E->getLHS() == E->getIdx()) ? EmitScalarExpr(E->getIdx()) : nullptr; + bool SignedIndices = false; auto EmitIdxAfterBase = [&, IdxPre](bool Promote) -> llvm::Value * { auto *Idx = IdxPre; if (E->getLHS() != E->getIdx()) { @@ -3018,6 +3197,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, QualType IdxTy = E->getIdx()->getType(); bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType(); + SignedIndices |= IdxSigned; if (SanOpts.has(SanitizerKind::ArrayBounds)) EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, Accessed); @@ -3038,9 +3218,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); assert(LHS.isSimple() && "Can only subscript lvalue vectors here!"); - return LValue::MakeVectorElt(LHS.getAddress(), Idx, - E->getBase()->getType(), - LHS.getAlignmentSource()); + return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(), + LHS.getBaseInfo(), TBAAAccessInfo()); } // All the other cases basically behave like simple offsetting. @@ -3052,18 +3231,21 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, Address Addr = EmitExtVectorElementLValue(LV); QualType EltType = LV.getType()->castAs<VectorType>()->getElementType(); - Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true); - return MakeAddrLValue(Addr, EltType, LV.getAlignmentSource()); + Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true, + SignedIndices, E->getExprLoc()); + return MakeAddrLValue(Addr, EltType, LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, EltType)); } - AlignmentSource AlignSource; + LValueBaseInfo EltBaseInfo; + TBAAAccessInfo EltTBAAInfo; Address Addr = Address::invalid(); if (const VariableArrayType *vla = getContext().getAsVariableArrayType(E->getType())) { // The base must be a pointer, which is not an aggregate. Emit // it. It needs to be emitted first in case it's what captures // the VLA bounds. - Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource); + Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); // The element count here is the total number of non-VLA elements. @@ -3080,13 +3262,14 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, } Addr = emitArraySubscriptGEP(*this, Addr, Idx, vla->getElementType(), - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + SignedIndices, E->getExprLoc()); } else if (const ObjCObjectType *OIT = E->getType()->getAs<ObjCObjectType>()){ // Indexing over an interface, as in "NSString *P; P[4];" // Emit the base pointer. - Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource); + Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT); @@ -3106,7 +3289,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, CharUnits EltAlign = getArrayElementAlign(Addr.getAlignment(), Idx, InterfaceSize); llvm::Value *EltPtr = - emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false); + emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false, + SignedIndices, E->getExprLoc()); Addr = Address(EltPtr, EltAlign); // Cast back. @@ -3128,22 +3312,22 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, auto *Idx = EmitIdxAfterBase(/*Promote*/true); // Propagate the alignment from the array itself to the result. - Addr = emitArraySubscriptGEP(*this, ArrayLV.getAddress(), - {CGM.getSize(CharUnits::Zero()), Idx}, - E->getType(), - !getLangOpts().isSignedOverflowDefined()); - AlignSource = ArrayLV.getAlignmentSource(); + Addr = emitArraySubscriptGEP( + *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, + E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, + E->getExprLoc()); + EltBaseInfo = ArrayLV.getBaseInfo(); + EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType()); } else { // The base must be a pointer; emit it with an estimate of its alignment. - Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource); + Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(), - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + SignedIndices, E->getExprLoc()); } - LValue LV = MakeAddrLValue(Addr, E->getType(), AlignSource); - - // TODO: Preserve/extend path TBAA metadata? + LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo); if (getLangOpts().ObjC1 && getLangOpts().getGC() != LangOptions::NonGC) { @@ -3154,7 +3338,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, } static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, - AlignmentSource &AlignSource, + LValueBaseInfo &BaseInfo, + TBAAAccessInfo &TBAAInfo, QualType BaseTy, QualType ElTy, bool IsLowerBound) { LValue BaseLVal; @@ -3162,7 +3347,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound); if (BaseTy->isArrayType()) { Address Addr = BaseLVal.getAddress(); - AlignSource = BaseLVal.getAlignmentSource(); + BaseInfo = BaseLVal.getBaseInfo(); // If the array type was an incomplete type, we need to make sure // the decay ends up being the right type. @@ -3181,20 +3366,20 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(ElTy)); } - CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &AlignSource); + LValueBaseInfo TypeBaseInfo; + TBAAAccessInfo TypeTBAAInfo; + CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &TypeBaseInfo, + &TypeTBAAInfo); + BaseInfo.mergeForCast(TypeBaseInfo); + TBAAInfo = CGF.CGM.mergeTBAAInfoForCast(TBAAInfo, TypeTBAAInfo); return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align); } - return CGF.EmitPointerWithAlignment(Base, &AlignSource); + return CGF.EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo); } LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound) { - QualType BaseTy; - if (auto *ASE = - dyn_cast<OMPArraySectionExpr>(E->getBase()->IgnoreParenImpCasts())) - BaseTy = OMPArraySectionExpr::getBaseOriginalType(ASE); - else - BaseTy = E->getBase()->getType(); + QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(E->getBase()); QualType ResultExprTy; if (auto *AT = getContext().getAsArrayType(BaseTy)) ResultExprTy = AT->getElementType(); @@ -3288,14 +3473,15 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, assert(Idx); Address EltPtr = Address::invalid(); - AlignmentSource AlignSource; + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; if (auto *VLA = getContext().getAsVariableArrayType(ResultExprTy)) { // The base must be a pointer, which is not an aggregate. Emit // it. It needs to be emitted first in case it's what captures // the VLA bounds. Address Base = - emitOMPArraySectionBase(*this, E->getBase(), AlignSource, BaseTy, - VLA->getElementType(), IsLowerBound); + emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, TBAAInfo, + BaseTy, VLA->getElementType(), IsLowerBound); // The element count here is the total number of non-VLA elements. llvm::Value *NumElements = getVLASize(VLA).first; @@ -3308,7 +3494,8 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, else Idx = Builder.CreateNSWMul(Idx, NumElements); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(), - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + /*SignedIndices=*/false, E->getExprLoc()); } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { // If this is A[i] where A is an array, the frontend will have decayed the // base to be a ArrayToPointerDecay implicit cast. While correct, it is @@ -3327,16 +3514,20 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, // Propagate the alignment from the array itself to the result. EltPtr = emitArraySubscriptGEP( *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, - ResultExprTy, !getLangOpts().isSignedOverflowDefined()); - AlignSource = ArrayLV.getAlignmentSource(); + ResultExprTy, !getLangOpts().isSignedOverflowDefined(), + /*SignedIndices=*/false, E->getExprLoc()); + BaseInfo = ArrayLV.getBaseInfo(); + TBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, ResultExprTy); } else { - Address Base = emitOMPArraySectionBase(*this, E->getBase(), AlignSource, - BaseTy, ResultExprTy, IsLowerBound); + Address Base = emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, + TBAAInfo, BaseTy, ResultExprTy, + IsLowerBound); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy, - !getLangOpts().isSignedOverflowDefined()); + !getLangOpts().isSignedOverflowDefined(), + /*SignedIndices=*/false, E->getExprLoc()); } - return MakeAddrLValue(EltPtr, ResultExprTy, AlignSource); + return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo, TBAAInfo); } LValue CodeGenFunction:: @@ -3348,10 +3539,11 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { if (E->isArrow()) { // If it is a pointer to a vector, emit the address and form an lvalue with // it. - AlignmentSource AlignSource; - Address Ptr = EmitPointerWithAlignment(E->getBase(), &AlignSource); + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Address Ptr = EmitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo); const PointerType *PT = E->getBase()->getType()->getAs<PointerType>(); - Base = MakeAddrLValue(Ptr, PT->getPointeeType(), AlignSource); + Base = MakeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo, TBAAInfo); Base.getQuals().removeObjCGCAttr(); } else if (E->getBase()->isGLValue()) { // Otherwise, if the base is an lvalue ( as in the case of foo.x.x), @@ -3382,7 +3574,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { llvm::Constant *CV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); return LValue::MakeExtVectorElt(Base.getAddress(), CV, type, - Base.getAlignmentSource()); + Base.getBaseInfo(), TBAAAccessInfo()); } assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!"); @@ -3393,24 +3585,32 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { CElts.push_back(BaseElts->getAggregateElement(Indices[i])); llvm::Constant *CV = llvm::ConstantVector::get(CElts); return LValue::MakeExtVectorElt(Base.getExtVectorAddress(), CV, type, - Base.getAlignmentSource()); + Base.getBaseInfo(), TBAAAccessInfo()); } LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { - Expr *BaseExpr = E->getBase(); + if (DeclRefExpr *DRE = tryToConvertMemberExprToDeclRefExpr(*this, E)) { + EmitIgnoredExpr(E->getBase()); + return EmitDeclRefLValue(DRE); + } + Expr *BaseExpr = E->getBase(); // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar. LValue BaseLV; if (E->isArrow()) { - AlignmentSource AlignSource; - Address Addr = EmitPointerWithAlignment(BaseExpr, &AlignSource); + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Address Addr = EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); QualType PtrTy = BaseExpr->getType()->getPointeeType(); SanitizerSet SkippedChecks; - if (IsDeclRefOrWrappedCXXThis(BaseExpr)) + bool IsBaseCXXThis = IsWrappedCXXThis(BaseExpr); + if (IsBaseCXXThis) + SkippedChecks.set(SanitizerKind::Alignment, true); + if (IsBaseCXXThis || isa<DeclRefExpr>(BaseExpr)) SkippedChecks.set(SanitizerKind::Null, true); EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy, /*Alignment=*/CharUnits::Zero(), SkippedChecks); - BaseLV = MakeAddrLValue(Addr, PtrTy, AlignSource); + BaseLV = MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); } else BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess); @@ -3421,9 +3621,6 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { return LV; } - if (auto *VD = dyn_cast<VarDecl>(ND)) - return EmitGlobalVarDeclLValue(*this, E, VD); - if (const auto *FD = dyn_cast<FunctionDecl>(ND)) return EmitFunctionDeclLValue(*this, E, FD); @@ -3468,10 +3665,28 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base, return CGF.Builder.CreateStructGEP(base, idx, offset, field->getName()); } +static bool hasAnyVptr(const QualType Type, const ASTContext &Context) { + const auto *RD = Type.getTypePtr()->getAsCXXRecordDecl(); + if (!RD) + return false; + + if (RD->isDynamicClass()) + return true; + + for (const auto &Base : RD->bases()) + if (hasAnyVptr(Base.getType(), Context)) + return true; + + for (const FieldDecl *Field : RD->fields()) + if (hasAnyVptr(Field->getType(), Context)) + return true; + + return false; +} + LValue CodeGenFunction::EmitLValueForField(LValue base, const FieldDecl *field) { - AlignmentSource fieldAlignSource = - getFieldAlignmentSource(base.getAlignmentSource()); + LValueBaseInfo BaseInfo = base.getBaseInfo(); if (field->isBitField()) { const CGRecordLayout &RL = @@ -3491,54 +3706,74 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, QualType fieldType = field->getType().withCVRQualifiers(base.getVRQualifiers()); - return LValue::MakeBitfield(Addr, Info, fieldType, fieldAlignSource); + // TODO: Support TBAA for bit fields. + LValueBaseInfo FieldBaseInfo(BaseInfo.getAlignmentSource()); + return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo, + TBAAAccessInfo()); } + // Fields of may-alias structures are may-alias themselves. + // FIXME: this should get propagated down through anonymous structs + // and unions. + QualType FieldType = field->getType(); const RecordDecl *rec = field->getParent(); - QualType type = field->getType(); + AlignmentSource BaseAlignSource = BaseInfo.getAlignmentSource(); + LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(BaseAlignSource)); + TBAAAccessInfo FieldTBAAInfo; + if (base.getTBAAInfo().isMayAlias() || + rec->hasAttr<MayAliasAttr>() || FieldType->isVectorType()) { + FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo(); + } else if (rec->isUnion()) { + // TODO: Support TBAA for unions. + FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo(); + } else { + // If no base type been assigned for the base access, then try to generate + // one for this base lvalue. + FieldTBAAInfo = base.getTBAAInfo(); + if (!FieldTBAAInfo.BaseType) { + FieldTBAAInfo.BaseType = CGM.getTBAABaseTypeInfo(base.getType()); + assert(!FieldTBAAInfo.Offset && + "Nonzero offset for an access with no base type!"); + } - bool mayAlias = rec->hasAttr<MayAliasAttr>(); + // Adjust offset to be relative to the base type. + const ASTRecordLayout &Layout = + getContext().getASTRecordLayout(field->getParent()); + unsigned CharWidth = getContext().getCharWidth(); + if (FieldTBAAInfo.BaseType) + FieldTBAAInfo.Offset += + Layout.getFieldOffset(field->getFieldIndex()) / CharWidth; + + // Update the final access type. + FieldTBAAInfo.AccessType = CGM.getTBAATypeInfo(FieldType); + } Address addr = base.getAddress(); - unsigned cvr = base.getVRQualifiers(); - bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA; + unsigned RecordCVR = base.getVRQualifiers(); if (rec->isUnion()) { // For unions, there is no pointer adjustment. - assert(!type->isReferenceType() && "union has reference member"); - // TODO: handle path-aware TBAA for union. - TBAAPath = false; + assert(!FieldType->isReferenceType() && "union has reference member"); + if (CGM.getCodeGenOpts().StrictVTablePointers && + hasAnyVptr(FieldType, getContext())) + // Because unions can easily skip invariant.barriers, we need to add + // a barrier every time CXXRecord field with vptr is referenced. + addr = Address(Builder.CreateInvariantGroupBarrier(addr.getPointer()), + addr.getAlignment()); } else { // For structs, we GEP to the field that the record layout suggests. addr = emitAddrOfFieldStorage(*this, addr, field); // If this is a reference field, load the reference right now. - if (const ReferenceType *refType = type->getAs<ReferenceType>()) { - llvm::LoadInst *load = Builder.CreateLoad(addr, "ref"); - if (cvr & Qualifiers::Volatile) load->setVolatile(true); - - // Loading the reference will disable path-aware TBAA. - TBAAPath = false; - if (CGM.shouldUseTBAA()) { - llvm::MDNode *tbaa; - if (mayAlias) - tbaa = CGM.getTBAAInfo(getContext().CharTy); - else - tbaa = CGM.getTBAAInfo(type); - if (tbaa) - CGM.DecorateInstructionWithTBAA(load, tbaa); - } - - mayAlias = false; - type = refType->getPointeeType(); - - CharUnits alignment = - getNaturalTypeAlignment(type, &fieldAlignSource, /*pointee*/ true); - addr = Address(load, alignment); - - // Qualifiers on the struct don't apply to the referencee, and - // we'll pick up CVR from the actual type later, so reset these - // additional qualifiers now. - cvr = 0; + if (FieldType->isReferenceType()) { + LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo, + FieldTBAAInfo); + if (RecordCVR & Qualifiers::Volatile) + RefLVal.getQuals().setVolatile(true); + addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo); + + // Qualifiers on the struct don't apply to the referencee. + RecordCVR = 0; + FieldType = FieldType->getPointeeType(); } } @@ -3546,36 +3781,19 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, // for both unions and structs. A union needs a bitcast, a struct element // will need a bitcast if the LLVM type laid out doesn't match the desired // type. - addr = Builder.CreateElementBitCast(addr, - CGM.getTypes().ConvertTypeForMem(type), - field->getName()); + addr = Builder.CreateElementBitCast( + addr, CGM.getTypes().ConvertTypeForMem(FieldType), field->getName()); if (field->hasAttr<AnnotateAttr>()) addr = EmitFieldAnnotations(field, addr); - LValue LV = MakeAddrLValue(addr, type, fieldAlignSource); - LV.getQuals().addCVRQualifiers(cvr); - if (TBAAPath) { - const ASTRecordLayout &Layout = - getContext().getASTRecordLayout(field->getParent()); - // Set the base type to be the base type of the base LValue and - // update offset to be relative to the base type. - LV.setTBAABaseType(mayAlias ? getContext().CharTy : base.getTBAABaseType()); - LV.setTBAAOffset(mayAlias ? 0 : base.getTBAAOffset() + - Layout.getFieldOffset(field->getFieldIndex()) / - getContext().getCharWidth()); - } + LValue LV = MakeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo); + LV.getQuals().addCVRQualifiers(RecordCVR); // __weak attribute on a field is ignored. if (LV.getQuals().getObjCGCAttr() == Qualifiers::Weak) LV.getQuals().removeObjCGCAttr(); - // Fields of may_alias structs act like 'char' for TBAA purposes. - // FIXME: this should get propagated down through anonymous structs - // and unions. - if (mayAlias && LV.getTBAAInfo()) - LV.setTBAAInfo(CGM.getTBAAInfo(getContext().CharTy)); - return LV; } @@ -3593,9 +3811,14 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base, llvm::Type *llvmType = ConvertTypeForMem(FieldType); V = Builder.CreateElementBitCast(V, llvmType, Field->getName()); - // TODO: access-path TBAA? - auto FieldAlignSource = getFieldAlignmentSource(Base.getAlignmentSource()); - return MakeAddrLValue(V, FieldType, FieldAlignSource); + // TODO: Generate TBAA information that describes this access as a structure + // member access and not just an access to an object of the field's type. This + // should be similar to what we do in EmitLValueForField(). + LValueBaseInfo BaseInfo = Base.getBaseInfo(); + AlignmentSource FieldAlignSource = BaseInfo.getAlignmentSource(); + LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(FieldAlignSource)); + return MakeAddrLValue(V, FieldType, FieldBaseInfo, + CGM.getTBAAInfoForSubobject(Base, FieldType)); } LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){ @@ -3706,8 +3929,12 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { phi->addIncoming(rhs->getPointer(), rhsBlock); Address result(phi, std::min(lhs->getAlignment(), rhs->getAlignment())); AlignmentSource alignSource = - std::max(lhs->getAlignmentSource(), rhs->getAlignmentSource()); - return MakeAddrLValue(result, expr->getType(), alignSource); + std::max(lhs->getBaseInfo().getAlignmentSource(), + rhs->getBaseInfo().getAlignmentSource()); + TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForConditionalOperator( + lhs->getTBAAInfo(), rhs->getTBAAInfo()); + return MakeAddrLValue(result, expr->getType(), LValueBaseInfo(alignSource), + TBAAInfo); } else { assert((lhs || rhs) && "both operands of glvalue conditional are throw-expressions?"); @@ -3805,7 +4032,11 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { This, DerivedClassDecl, E->path_begin(), E->path_end(), /*NullCheckValue=*/false, E->getExprLoc()); - return MakeAddrLValue(Base, E->getType(), LV.getAlignmentSource()); + // TODO: Support accesses to members of base classes in TBAA. For now, we + // conservatively pretend that the complete object is of the base class + // type. + return MakeAddrLValue(Base, E->getType(), LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, E->getType())); } case CK_ToUnion: return EmitAggExprToLValue(E); @@ -3832,7 +4063,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { /*MayBeNull=*/false, CFITCK_DerivedCast, E->getLocStart()); - return MakeAddrLValue(Derived, E->getType(), LV.getAlignmentSource()); + return MakeAddrLValue(Derived, E->getType(), LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, E->getType())); } case CK_LValueBitCast: { // This must be a reinterpret_cast (or c-style equivalent). @@ -3848,13 +4080,15 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { /*MayBeNull=*/false, CFITCK_UnrelatedCast, E->getLocStart()); - return MakeAddrLValue(V, E->getType(), LV.getAlignmentSource()); + return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, E->getType())); } case CK_ObjCObjectLValueCast: { LValue LV = EmitLValue(E->getSubExpr()); Address V = Builder.CreateElementBitCast(LV.getAddress(), ConvertType(E->getType())); - return MakeAddrLValue(V, E->getType(), LV.getAlignmentSource()); + return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, E->getType())); } case CK_ZeroToOCLQueue: llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid"); @@ -4023,6 +4257,8 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { RValue RV = EmitAnyExpr(E->getRHS()); LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store); + if (RV.isScalar()) + EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); EmitStoreThroughLValue(RV, LV); return LV; } @@ -4060,8 +4296,7 @@ LValue CodeGenFunction::EmitCXXConstructLValue(const CXXConstructExpr *E) { && "binding l-value to type which needs a temporary"); AggValueSlot Slot = CreateAggTemp(E->getType()); EmitCXXConstructExpr(E, Slot); - return MakeAddrLValue(Slot.getAddress(), E->getType(), - AlignmentSource::Decl); + return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); } LValue @@ -4085,16 +4320,14 @@ CodeGenFunction::EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E) { Slot.setExternallyDestructed(); EmitAggExpr(E->getSubExpr(), Slot); EmitCXXTemporary(E->getTemporary(), E->getType(), Slot.getAddress()); - return MakeAddrLValue(Slot.getAddress(), E->getType(), - AlignmentSource::Decl); + return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); } LValue CodeGenFunction::EmitLambdaLValue(const LambdaExpr *E) { AggValueSlot Slot = CreateAggTemp(E->getType(), "temp.lvalue"); EmitLambdaExpr(E, Slot); - return MakeAddrLValue(Slot.getAddress(), E->getType(), - AlignmentSource::Decl); + return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); } LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) { @@ -4196,10 +4429,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee SanitizerScope SanScope(this); llvm::Constant *FTRTTIConst = CGM.GetAddrOfRTTIDescriptor(QualType(FnType, 0), /*ForEH=*/true); - llvm::Type *PrefixStructTyElems[] = { - PrefixSig->getType(), - FTRTTIConst->getType() - }; + llvm::Type *PrefixStructTyElems[] = {PrefixSig->getType(), Int32Ty}; llvm::StructType *PrefixStructTy = llvm::StructType::get( CGM.getLLVMContext(), PrefixStructTyElems, /*isPacked=*/true); @@ -4220,8 +4450,10 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee EmitBlock(TypeCheck); llvm::Value *CalleeRTTIPtr = Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, 0, 1); - llvm::Value *CalleeRTTI = + llvm::Value *CalleeRTTIEncoded = Builder.CreateAlignedLoad(CalleeRTTIPtr, getPointerAlign()); + llvm::Value *CalleeRTTI = + DecodeAddrUsedInPrologue(CalleePtr, CalleeRTTIEncoded); llvm::Value *CalleeRTTIMatch = Builder.CreateICmpEQ(CalleeRTTI, FTRTTIConst); llvm::Constant *StaticData[] = { @@ -4243,7 +4475,12 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee SanitizerScope SanScope(this); EmitSanitizerStatReport(llvm::SanStat_CFI_ICall); - llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0)); + llvm::Metadata *MD; + if (CGM.getCodeGenOpts().SanitizeCfiICallGeneralizePointers) + MD = CGM.CreateMetadataIdentifierGeneralized(QualType(FnType, 0)); + else + MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0)); + llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD); llvm::Value *CalleePtr = Callee.getFunctionPointer(); @@ -4350,12 +4587,13 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) { const MemberPointerType *MPT = E->getRHS()->getType()->getAs<MemberPointerType>(); - AlignmentSource AlignSource; + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; Address MemberAddr = - EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT, - &AlignSource); + EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT, &BaseInfo, + &TBAAInfo); - return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), AlignSource); + return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), BaseInfo, TBAAInfo); } /// Given the address of a temporary variable, produce an r-value of @@ -4418,7 +4656,6 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF, if (ov == resultExpr && ov->isRValue() && !forLValue && CodeGenFunction::hasAggregateEvaluationKind(ov->getType())) { CGF.EmitAggExpr(ov->getSourceExpr(), slot); - LValue LV = CGF.MakeAddrLValue(slot.getAddress(), ov->getType(), AlignmentSource::Decl); opaqueData = OVMA::bind(CGF, ov, LV); diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index 009244784e..1ab8433864 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -111,30 +111,20 @@ public: void VisitGenericSelectionExpr(GenericSelectionExpr *GE) { Visit(GE->getResultExpr()); } + void VisitCoawaitExpr(CoawaitExpr *E) { + CGF.EmitCoawaitExpr(*E, Dest, IsResultUnused); + } + void VisitCoyieldExpr(CoyieldExpr *E) { + CGF.EmitCoyieldExpr(*E, Dest, IsResultUnused); + } + void VisitUnaryCoawait(UnaryOperator *E) { Visit(E->getSubExpr()); } void VisitUnaryExtension(UnaryOperator *E) { Visit(E->getSubExpr()); } void VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *E) { return Visit(E->getReplacement()); } // l-values. - void VisitDeclRefExpr(DeclRefExpr *E) { - // For aggregates, we should always be able to emit the variable - // as an l-value unless it's a reference. This is due to the fact - // that we can't actually ever see a normal l2r conversion on an - // aggregate in C++, and in C there's no language standard - // actively preventing us from listing variables in the captures - // list of a block. - if (E->getDecl()->getType()->isReferenceType()) { - if (CodeGenFunction::ConstantEmission result - = CGF.tryEmitAsConstant(E)) { - EmitFinalDestCopy(E->getType(), result.getReferenceLValue(CGF, E)); - return; - } - } - - EmitAggLoadOfLValue(E); - } - + void VisitDeclRefExpr(DeclRefExpr *E) { EmitAggLoadOfLValue(E); } void VisitMemberExpr(MemberExpr *ME) { EmitAggLoadOfLValue(ME); } void VisitUnaryDeref(UnaryOperator *E) { EmitAggLoadOfLValue(E); } void VisitStringLiteral(StringLiteral *E) { EmitAggLoadOfLValue(E); } @@ -505,12 +495,20 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, currentElement->addIncoming(element, entryBB); // Emit the actual filler expression. - LValue elementLV = - CGF.MakeAddrLValue(Address(currentElement, elementAlign), elementType); - if (filler) - EmitInitializationToLValue(filler, elementLV); - else - EmitNullInitializationToLValue(elementLV); + { + // C++1z [class.temporary]p5: + // when a default constructor is called to initialize an element of + // an array with no corresponding initializer [...] the destruction of + // every temporary created in a default argument is sequenced before + // the construction of the next array element, if any + CodeGenFunction::RunCleanupsScope CleanupsScope(CGF); + LValue elementLV = + CGF.MakeAddrLValue(Address(currentElement, elementAlign), elementType); + if (filler) + EmitInitializationToLValue(filler, elementLV); + else + EmitNullInitializationToLValue(elementLV); + } // Move on to the next element. llvm::Value *nextElement = @@ -1267,7 +1265,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // Store the initializer into the field. EmitInitializationToLValue(E->getInit(curInitIndex++), LV); } else { - // We're out of initalizers; default-initialize to null + // We're out of initializers; default-initialize to null EmitNullInitializationToLValue(LV); } diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 04826916f7..3eb4bd6b90 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -16,6 +16,7 @@ #include "CGCXXABI.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "ConstantEmitter.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/IR/CallSite.h" @@ -199,7 +200,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( bool CanUseVirtualCall = MD->isVirtual() && !HasQualifier; const CXXMethodDecl *DevirtualizedMethod = nullptr; - if (CanUseVirtualCall && CanDevirtualizeMemberFunctionCall(Base, MD)) { + if (CanUseVirtualCall && + MD->getDevirtualizedMethod(Base, getLangOpts().AppleKext)) { const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType(); DevirtualizedMethod = MD->getCorrespondingMethodInClass(BestDynamicDecl); assert(DevirtualizedMethod); @@ -301,9 +303,14 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( CallLoc = CE->getExprLoc(); SanitizerSet SkippedChecks; - if (const auto *CMCE = dyn_cast<CXXMemberCallExpr>(CE)) - if (IsDeclRefOrWrappedCXXThis(CMCE->getImplicitObjectArgument())) + if (const auto *CMCE = dyn_cast<CXXMemberCallExpr>(CE)) { + auto *IOA = CMCE->getImplicitObjectArgument(); + bool IsImplicitObjectCXXThis = IsWrappedCXXThis(IOA); + if (IsImplicitObjectCXXThis) + SkippedChecks.set(SanitizerKind::Alignment, true); + if (IsImplicitObjectCXXThis || isa<DeclRefExpr>(IOA)) SkippedChecks.set(SanitizerKind::Null, true); + } EmitTypeCheck( isa<CXXConstructorDecl>(CalleeDecl) ? CodeGenFunction::TCK_ConstructorCall : CodeGenFunction::TCK_MemberCall, @@ -675,8 +682,8 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // Emit the array size expression. // We multiply the size of all dimensions for NumElements. // e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6. - numElements = CGF.CGM.EmitConstantExpr(e->getArraySize(), - CGF.getContext().getSizeType(), &CGF); + numElements = + ConstantEmitter(CGF).tryEmitAbstract(e->getArraySize(), e->getType()); if (!numElements) numElements = CGF.EmitScalarExpr(e->getArraySize()); assert(isa<llvm::IntegerType>(numElements->getType())); @@ -1275,10 +1282,10 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) { // FIXME: Add addAttribute to CallSite. if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(CallOrInvoke)) - CI->addAttribute(llvm::AttributeSet::FunctionIndex, + CI->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::Builtin); else if (llvm::InvokeInst *II = dyn_cast<llvm::InvokeInst>(CallOrInvoke)) - II->addAttribute(llvm::AttributeSet::FunctionIndex, + II->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::Builtin); else llvm_unreachable("unexpected kind of call instruction"); @@ -1304,29 +1311,44 @@ RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, llvm_unreachable("predeclared global operator new/delete is missing"); } -static std::pair<bool, bool> -shouldPassSizeAndAlignToUsualDelete(const FunctionProtoType *FPT) { +namespace { +/// The parameters to pass to a usual operator delete. +struct UsualDeleteParams { + bool DestroyingDelete = false; + bool Size = false; + bool Alignment = false; +}; +} + +static UsualDeleteParams getUsualDeleteParams(const FunctionDecl *FD) { + UsualDeleteParams Params; + + const FunctionProtoType *FPT = FD->getType()->castAs<FunctionProtoType>(); auto AI = FPT->param_type_begin(), AE = FPT->param_type_end(); // The first argument is always a void*. ++AI; - // Figure out what other parameters we should be implicitly passing. - bool PassSize = false; - bool PassAlignment = false; + // The next parameter may be a std::destroying_delete_t. + if (FD->isDestroyingOperatorDelete()) { + Params.DestroyingDelete = true; + assert(AI != AE); + ++AI; + } + // Figure out what other parameters we should be implicitly passing. if (AI != AE && (*AI)->isIntegerType()) { - PassSize = true; + Params.Size = true; ++AI; } if (AI != AE && (*AI)->isAlignValT()) { - PassAlignment = true; + Params.Alignment = true; ++AI; } assert(AI == AE && "unexpected usual deallocation function parameter"); - return {PassSize, PassAlignment}; + return Params; } namespace { @@ -1379,25 +1401,27 @@ namespace { OperatorDelete->getType()->getAs<FunctionProtoType>(); CallArgList DeleteArgs; - // The first argument is always a void*. + // The first argument is always a void* (or C* for a destroying operator + // delete for class type C). DeleteArgs.add(Traits::get(CGF, Ptr), FPT->getParamType(0)); // Figure out what other parameters we should be implicitly passing. - bool PassSize = false; - bool PassAlignment = false; + UsualDeleteParams Params; if (NumPlacementArgs) { // A placement deallocation function is implicitly passed an alignment // if the placement allocation function was, but is never passed a size. - PassAlignment = PassAlignmentToPlacementDelete; + Params.Alignment = PassAlignmentToPlacementDelete; } else { // For a non-placement new-expression, 'operator delete' can take a // size and/or an alignment if it has the right parameters. - std::tie(PassSize, PassAlignment) = - shouldPassSizeAndAlignToUsualDelete(FPT); + Params = getUsualDeleteParams(OperatorDelete); } + assert(!Params.DestroyingDelete && + "should not call destroying delete in a new-expression"); + // The second argument can be a std::size_t (for non-placement delete). - if (PassSize) + if (Params.Size) DeleteArgs.add(Traits::get(CGF, AllocSize), CGF.getContext().getSizeType()); @@ -1405,7 +1429,7 @@ namespace { // is an enum whose underlying type is std::size_t. // FIXME: Use the right type as the parameter type. Note that in a call // to operator delete(size_t, ...), we may not have it available. - if (PassAlignment) + if (Params.Alignment) DeleteArgs.add(RValue::get(llvm::ConstantInt::get( CGF.SizeTy, AllocAlign.getQuantity())), CGF.getContext().getSizeType()); @@ -1526,13 +1550,13 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { assert(E->getNumPlacementArgs() == 1); const Expr *arg = *E->placement_arguments().begin(); - AlignmentSource alignSource; - allocation = EmitPointerWithAlignment(arg, &alignSource); + LValueBaseInfo BaseInfo; + allocation = EmitPointerWithAlignment(arg, &BaseInfo); // The pointer expression will, in many cases, be an opaque void*. // In these cases, discard the computed alignment and use the // formal alignment of the allocated type. - if (alignSource != AlignmentSource::Decl) + if (BaseInfo.getAlignmentSource() != AlignmentSource::Decl) allocation = Address(allocation.getPointer(), allocAlign); // Set up allocatorArgs for the call to operator delete if it's not @@ -1653,8 +1677,9 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { // Passing pointer through invariant.group.barrier to avoid propagation of // vptrs information which may be included in previous type. + // To not break LTO with different optimizations levels, we do it regardless + // of optimization level. if (CGM.getCodeGenOpts().StrictVTablePointers && - CGM.getCodeGenOpts().OptimizationLevel > 0 && allocator->isReservedGlobalPlacementOperator()) result = Address(Builder.CreateInvariantGroupBarrier(result.getPointer()), result.getAlignment()); @@ -1707,9 +1732,7 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD, CallArgList DeleteArgs; - std::pair<bool, bool> PassSizeAndAlign = - shouldPassSizeAndAlignToUsualDelete(DeleteFTy); - + auto Params = getUsualDeleteParams(DeleteFD); auto ParamTypeIt = DeleteFTy->param_type_begin(); // Pass the pointer itself. @@ -1717,8 +1740,16 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD, llvm::Value *DeletePtr = Builder.CreateBitCast(Ptr, ConvertType(ArgTy)); DeleteArgs.add(RValue::get(DeletePtr), ArgTy); + // Pass the std::destroying_delete tag if present. + if (Params.DestroyingDelete) { + QualType DDTag = *ParamTypeIt++; + // Just pass an 'undef'. We expect the tag type to be an empty struct. + auto *V = llvm::UndefValue::get(getTypes().ConvertType(DDTag)); + DeleteArgs.add(RValue::get(V), DDTag); + } + // Pass the size if the delete function has a size_t parameter. - if (PassSizeAndAlign.first) { + if (Params.Size) { QualType SizeType = *ParamTypeIt++; CharUnits DeleteTypeSize = getContext().getTypeSizeInChars(DeleteTy); llvm::Value *Size = llvm::ConstantInt::get(ConvertType(SizeType), @@ -1737,7 +1768,7 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD, } // Pass the alignment if the delete function has an align_val_t parameter. - if (PassSizeAndAlign.second) { + if (Params.Alignment) { QualType AlignValType = *ParamTypeIt++; CharUnits DeleteTypeAlign = getContext().toCharUnitsFromBits( getContext().getTypeAlignIfKnown(DeleteTy)); @@ -1779,6 +1810,21 @@ CodeGenFunction::pushCallObjectDeleteCleanup(const FunctionDecl *OperatorDelete, OperatorDelete, ElementType); } +/// Emit the code for deleting a single object with a destroying operator +/// delete. If the element type has a non-virtual destructor, Ptr has already +/// been converted to the type of the parameter of 'operator delete'. Otherwise +/// Ptr points to an object of the static type. +static void EmitDestroyingObjectDelete(CodeGenFunction &CGF, + const CXXDeleteExpr *DE, Address Ptr, + QualType ElementType) { + auto *Dtor = ElementType->getAsCXXRecordDecl()->getDestructor(); + if (Dtor && Dtor->isVirtual()) + CGF.CGM.getCXXABI().emitVirtualObjectDelete(CGF, DE, Ptr, ElementType, + Dtor); + else + CGF.EmitDeleteCall(DE->getOperatorDelete(), Ptr.getPointer(), ElementType); +} + /// Emit the code for deleting a single object. static void EmitObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE, @@ -1793,6 +1839,9 @@ static void EmitObjectDelete(CodeGenFunction &CGF, DE->getExprLoc(), Ptr.getPointer(), ElementType); + const FunctionDecl *OperatorDelete = DE->getOperatorDelete(); + assert(!OperatorDelete->isDestroyingOperatorDelete()); + // Find the destructor for the type, if applicable. If the // destructor is virtual, we'll just emit the vcall and return. const CXXDestructorDecl *Dtor = nullptr; @@ -1812,7 +1861,6 @@ static void EmitObjectDelete(CodeGenFunction &CGF, // Make sure that we call delete even if the dtor throws. // This doesn't have to a conditional cleanup because we're going // to pop it off in a second. - const FunctionDecl *OperatorDelete = DE->getOperatorDelete(); CGF.EHStack.pushCleanup<CallObjectDelete>(NormalAndEHCleanup, Ptr.getPointer(), OperatorDelete, ElementType); @@ -1924,10 +1972,19 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) { Builder.CreateCondBr(IsNull, DeleteEnd, DeleteNotNull); EmitBlock(DeleteNotNull); + QualType DeleteTy = E->getDestroyedType(); + + // A destroying operator delete overrides the entire operation of the + // delete expression. + if (E->getOperatorDelete()->isDestroyingOperatorDelete()) { + EmitDestroyingObjectDelete(*this, E, Ptr, DeleteTy); + EmitBlock(DeleteEnd); + return; + } + // We might be deleting a pointer to array. If so, GEP down to the // first non-array element. // (this assumes that A(*)[3][7] is converted to [3 x [7 x %A]]*) - QualType DeleteTy = Arg->getType()->getAs<PointerType>()->getPointeeType(); if (DeleteTy->isConstantArrayType()) { llvm::Value *Zero = Builder.getInt32(0); SmallVector<llvm::Value*,8> GEP; diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index 22e7fa64db..e860b3045f 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -110,18 +110,32 @@ public: VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE) { return Visit(PE->getReplacement()); } + ComplexPairTy VisitCoawaitExpr(CoawaitExpr *S) { + return CGF.EmitCoawaitExpr(*S).getComplexVal(); + } + ComplexPairTy VisitCoyieldExpr(CoyieldExpr *S) { + return CGF.EmitCoyieldExpr(*S).getComplexVal(); + } + ComplexPairTy VisitUnaryCoawait(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } + + ComplexPairTy emitConstant(const CodeGenFunction::ConstantEmission &Constant, + Expr *E) { + assert(Constant && "not a constant"); + if (Constant.isReference()) + return EmitLoadOfLValue(Constant.getReferenceLValue(CGF, E), + E->getExprLoc()); + + llvm::Constant *pair = Constant.getValue(); + return ComplexPairTy(pair->getAggregateElement(0U), + pair->getAggregateElement(1U)); + } // l-values. ComplexPairTy VisitDeclRefExpr(DeclRefExpr *E) { - if (CodeGenFunction::ConstantEmission result = CGF.tryEmitAsConstant(E)) { - if (result.isReference()) - return EmitLoadOfLValue(result.getReferenceLValue(CGF, E), - E->getExprLoc()); - - llvm::Constant *pair = result.getValue(); - return ComplexPairTy(pair->getAggregateElement(0U), - pair->getAggregateElement(1U)); - } + if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) + return emitConstant(Constant, E); return EmitLoadOfLValue(E); } ComplexPairTy VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) { @@ -131,7 +145,14 @@ public: return CGF.EmitObjCMessageExpr(E).getComplexVal(); } ComplexPairTy VisitArraySubscriptExpr(Expr *E) { return EmitLoadOfLValue(E); } - ComplexPairTy VisitMemberExpr(const Expr *E) { return EmitLoadOfLValue(E); } + ComplexPairTy VisitMemberExpr(MemberExpr *ME) { + if (CodeGenFunction::ConstantEmission Constant = + CGF.tryEmitAsConstant(ME)) { + CGF.EmitIgnoredExpr(ME->getBase()); + return emitConstant(Constant, ME); + } + return EmitLoadOfLValue(ME); + } ComplexPairTy VisitOpaqueValueExpr(OpaqueValueExpr *E) { if (E->isGLValue()) return EmitLoadOfLValue(CGF.getOpaqueLValueMapping(E), E->getExprLoc()); @@ -754,7 +775,6 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { if (!LHSi) LibCallOp.LHS.second = llvm::Constant::getNullValue(LHSr->getType()); - StringRef LibCallName; switch (LHSr->getType()->getTypeID()) { default: llvm_unreachable("Unsupported floating point type!"); diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index 3db15c646f..3e35d4cb9c 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -16,6 +16,7 @@ #include "CGObjCRuntime.h" #include "CGRecordLayout.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/APValue.h" #include "clang/AST/ASTContext.h" @@ -37,25 +38,26 @@ namespace { class ConstExprEmitter; class ConstStructBuilder { CodeGenModule &CGM; - CodeGenFunction *CGF; + ConstantEmitter &Emitter; bool Packed; CharUnits NextFieldOffsetInChars; CharUnits LLVMStructAlignment; SmallVector<llvm::Constant *, 32> Elements; public: - static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CFG, - ConstExprEmitter *Emitter, + static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, + ConstExprEmitter *ExprEmitter, llvm::ConstantStruct *Base, - InitListExpr *Updater); - static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CGF, - InitListExpr *ILE); - static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CGF, + InitListExpr *Updater, + QualType ValTy); + static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, + InitListExpr *ILE, QualType StructTy); + static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, const APValue &Value, QualType ValTy); private: - ConstStructBuilder(CodeGenModule &CGM, CodeGenFunction *CGF) - : CGM(CGM), CGF(CGF), Packed(false), + ConstStructBuilder(ConstantEmitter &emitter) + : CGM(emitter.CGM), Emitter(emitter), Packed(false), NextFieldOffsetInChars(CharUnits::Zero()), LLVMStructAlignment(CharUnits::One()) { } @@ -76,7 +78,7 @@ private: bool Build(InitListExpr *ILE); bool Build(ConstExprEmitter *Emitter, llvm::ConstantStruct *Base, InitListExpr *Updater); - void Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, + bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, const CXXRecordDecl *VTableClass, CharUnits BaseOffset); llvm::Constant *Finalize(QualType Ty); @@ -201,7 +203,7 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field, unsigned NewFieldWidth = FieldSize - BitsInPreviousByte; if (CGM.getDataLayout().isBigEndian()) { - Tmp = Tmp.lshr(NewFieldWidth); + Tmp.lshrInPlace(NewFieldWidth); Tmp = Tmp.trunc(BitsInPreviousByte); // We want the remaining high bits. @@ -210,7 +212,7 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field, Tmp = Tmp.trunc(BitsInPreviousByte); // We want the remaining low bits. - FieldValue = FieldValue.lshr(BitsInPreviousByte); + FieldValue.lshrInPlace(BitsInPreviousByte); FieldValue = FieldValue.trunc(NewFieldWidth); } } @@ -273,7 +275,7 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field, // We want the low bits. Tmp = FieldValue.trunc(CharWidth); - FieldValue = FieldValue.lshr(CharWidth); + FieldValue.lshrInPlace(CharWidth); } Elements.push_back(llvm::ConstantInt::get(CGM.getLLVMContext(), Tmp)); @@ -391,10 +393,10 @@ bool ConstStructBuilder::Build(InitListExpr *ILE) { // we just use explicit null values for them. llvm::Constant *EltInit; if (ElementNo < ILE->getNumInits()) - EltInit = CGM.EmitConstantExpr(ILE->getInit(ElementNo++), - Field->getType(), CGF); + EltInit = Emitter.tryEmitPrivateForMemory(ILE->getInit(ElementNo++), + Field->getType()); else - EltInit = CGM.EmitNullConstant(Field->getType()); + EltInit = Emitter.emitNullForMemory(Field->getType()); if (!EltInit) return false; @@ -431,7 +433,7 @@ struct BaseInfo { }; } -void ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, +bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, const CXXRecordDecl *VTableClass, CharUnits Offset) { @@ -486,8 +488,9 @@ void ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, const APValue &FieldValue = RD->isUnion() ? Val.getUnionValue() : Val.getStructField(FieldNo); llvm::Constant *EltInit = - CGM.EmitConstantValueForMemory(FieldValue, Field->getType(), CGF); - assert(EltInit && "EmitConstantValue can't fail"); + Emitter.tryEmitPrivateForMemory(FieldValue, Field->getType()); + if (!EltInit) + return false; if (!Field->isBitField()) { // Handle non-bitfield members. @@ -498,6 +501,8 @@ void ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, cast<llvm::ConstantInt>(EltInit)); } } + + return true; } llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { @@ -559,37 +564,37 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { return Result; } -llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM, - CodeGenFunction *CGF, - ConstExprEmitter *Emitter, +llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, + ConstExprEmitter *ExprEmitter, llvm::ConstantStruct *Base, - InitListExpr *Updater) { - ConstStructBuilder Builder(CGM, CGF); - if (!Builder.Build(Emitter, Base, Updater)) + InitListExpr *Updater, + QualType ValTy) { + ConstStructBuilder Builder(Emitter); + if (!Builder.Build(ExprEmitter, Base, Updater)) return nullptr; - return Builder.Finalize(Updater->getType()); + return Builder.Finalize(ValTy); } -llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM, - CodeGenFunction *CGF, - InitListExpr *ILE) { - ConstStructBuilder Builder(CGM, CGF); +llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, + InitListExpr *ILE, + QualType ValTy) { + ConstStructBuilder Builder(Emitter); if (!Builder.Build(ILE)) return nullptr; - return Builder.Finalize(ILE->getType()); + return Builder.Finalize(ValTy); } -llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM, - CodeGenFunction *CGF, +llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, const APValue &Val, QualType ValTy) { - ConstStructBuilder Builder(CGM, CGF); + ConstStructBuilder Builder(Emitter); const RecordDecl *RD = ValTy->castAs<RecordType>()->getDecl(); const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD); - Builder.Build(Val, RD, false, CD, CharUnits::Zero()); + if (!Builder.Build(Val, RD, false, CD, CharUnits::Zero())) + return nullptr; return Builder.Finalize(ValTy); } @@ -599,57 +604,86 @@ llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM, // ConstExprEmitter //===----------------------------------------------------------------------===// +static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM, + CodeGenFunction *CGF, + const CompoundLiteralExpr *E) { + CharUnits Align = CGM.getContext().getTypeAlignInChars(E->getType()); + if (llvm::GlobalVariable *Addr = + CGM.getAddrOfConstantCompoundLiteralIfEmitted(E)) + return ConstantAddress(Addr, Align); + + LangAS addressSpace = E->getType().getAddressSpace(); + + ConstantEmitter emitter(CGM, CGF); + llvm::Constant *C = emitter.tryEmitForInitializer(E->getInitializer(), + addressSpace, E->getType()); + if (!C) { + assert(!E->isFileScope() && + "file-scope compound literal did not have constant initializer!"); + return ConstantAddress::invalid(); + } + + auto GV = new llvm::GlobalVariable(CGM.getModule(), C->getType(), + CGM.isTypeConstant(E->getType(), true), + llvm::GlobalValue::InternalLinkage, + C, ".compoundliteral", nullptr, + llvm::GlobalVariable::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(addressSpace)); + emitter.finalize(GV); + GV->setAlignment(Align.getQuantity()); + CGM.setAddrOfConstantCompoundLiteral(E, GV); + return ConstantAddress(GV, Align); +} + /// This class only needs to handle two cases: /// 1) Literals (this is used by APValue emission to emit literals). /// 2) Arrays, structs and unions (outside C++11 mode, we don't currently /// constant fold these types). class ConstExprEmitter : - public StmtVisitor<ConstExprEmitter, llvm::Constant*> { + public StmtVisitor<ConstExprEmitter, llvm::Constant*, QualType> { CodeGenModule &CGM; - CodeGenFunction *CGF; + ConstantEmitter &Emitter; llvm::LLVMContext &VMContext; public: - ConstExprEmitter(CodeGenModule &cgm, CodeGenFunction *cgf) - : CGM(cgm), CGF(cgf), VMContext(cgm.getLLVMContext()) { + ConstExprEmitter(ConstantEmitter &emitter) + : CGM(emitter.CGM), Emitter(emitter), VMContext(CGM.getLLVMContext()) { } //===--------------------------------------------------------------------===// // Visitor Methods //===--------------------------------------------------------------------===// - llvm::Constant *VisitStmt(Stmt *S) { + llvm::Constant *VisitStmt(Stmt *S, QualType T) { return nullptr; } - llvm::Constant *VisitParenExpr(ParenExpr *PE) { - return Visit(PE->getSubExpr()); + llvm::Constant *VisitParenExpr(ParenExpr *PE, QualType T) { + return Visit(PE->getSubExpr(), T); } llvm::Constant * - VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE) { - return Visit(PE->getReplacement()); + VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE, + QualType T) { + return Visit(PE->getReplacement(), T); } - llvm::Constant *VisitGenericSelectionExpr(GenericSelectionExpr *GE) { - return Visit(GE->getResultExpr()); + llvm::Constant *VisitGenericSelectionExpr(GenericSelectionExpr *GE, + QualType T) { + return Visit(GE->getResultExpr(), T); } - llvm::Constant *VisitChooseExpr(ChooseExpr *CE) { - return Visit(CE->getChosenSubExpr()); + llvm::Constant *VisitChooseExpr(ChooseExpr *CE, QualType T) { + return Visit(CE->getChosenSubExpr(), T); } - llvm::Constant *VisitCompoundLiteralExpr(CompoundLiteralExpr *E) { - return Visit(E->getInitializer()); + llvm::Constant *VisitCompoundLiteralExpr(CompoundLiteralExpr *E, QualType T) { + return Visit(E->getInitializer(), T); } - llvm::Constant *VisitCastExpr(CastExpr* E) { + llvm::Constant *VisitCastExpr(CastExpr *E, QualType destType) { if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E)) - CGM.EmitExplicitCastExprType(ECE, CGF); + CGM.EmitExplicitCastExprType(ECE, Emitter.CGF); Expr *subExpr = E->getSubExpr(); - llvm::Constant *C = CGM.EmitConstantExpr(subExpr, subExpr->getType(), CGF); - if (!C) return nullptr; - - llvm::Type *destType = ConvertType(E->getType()); switch (E->getCastKind()) { case CK_ToUnion: { @@ -657,14 +691,22 @@ public: assert(E->getType()->isUnionType() && "Destination type is not union type!"); + auto field = E->getTargetUnionField(); + + auto C = Emitter.tryEmitPrivateForMemory(subExpr, field->getType()); + if (!C) return nullptr; + + auto destTy = ConvertType(destType); + if (C->getType() == destTy) return C; + // Build a struct with the union sub-element as the first member, - // and padded to the appropriate size + // and padded to the appropriate size. SmallVector<llvm::Constant*, 2> Elts; SmallVector<llvm::Type*, 2> Types; Elts.push_back(C); Types.push_back(C->getType()); unsigned CurSize = CGM.getDataLayout().getTypeAllocSize(C->getType()); - unsigned TotalSize = CGM.getDataLayout().getTypeAllocSize(destType); + unsigned TotalSize = CGM.getDataLayout().getTypeAllocSize(destTy); assert(CurSize <= TotalSize && "Union size mismatch!"); if (unsigned NumPadBytes = TotalSize - CurSize) { @@ -676,20 +718,26 @@ public: Types.push_back(Ty); } - llvm::StructType* STy = - llvm::StructType::get(C->getType()->getContext(), Types, false); + llvm::StructType *STy = llvm::StructType::get(VMContext, Types, false); return llvm::ConstantStruct::get(STy, Elts); } - case CK_AddressSpaceConversion: - return llvm::ConstantExpr::getAddrSpaceCast(C, destType); + case CK_AddressSpaceConversion: { + auto C = Emitter.tryEmitPrivate(subExpr, subExpr->getType()); + if (!C) return nullptr; + LangAS destAS = E->getType()->getPointeeType().getAddressSpace(); + LangAS srcAS = subExpr->getType()->getPointeeType().getAddressSpace(); + llvm::Type *destTy = ConvertType(E->getType()); + return CGM.getTargetCodeGenInfo().performAddrSpaceCast(CGM, C, srcAS, + destAS, destTy); + } case CK_LValueToRValue: case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: case CK_NoOp: case CK_ConstructorConversion: - return C; + return Visit(subExpr, destType); case CK_IntToOCLSampler: llvm_unreachable("global sampler variables are not generated"); @@ -701,8 +749,11 @@ public: case CK_ReinterpretMemberPointer: case CK_DerivedToBaseMemberPointer: - case CK_BaseToDerivedMemberPointer: + case CK_BaseToDerivedMemberPointer: { + auto C = Emitter.tryEmitPrivate(subExpr, subExpr->getType()); + if (!C) return nullptr; return CGM.getCXXABI().EmitMemberPointerConversion(E, C); + } // These will never be supported. case CK_ObjCObjectLValueCast: @@ -759,27 +810,28 @@ public: llvm_unreachable("Invalid CastKind"); } - llvm::Constant *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { - return Visit(DAE->getExpr()); + llvm::Constant *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE, QualType T) { + return Visit(DAE->getExpr(), T); } - llvm::Constant *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) { + llvm::Constant *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE, QualType T) { // No need for a DefaultInitExprScope: we don't handle 'this' in a // constant expression. - return Visit(DIE->getExpr()); + return Visit(DIE->getExpr(), T); } - llvm::Constant *VisitExprWithCleanups(ExprWithCleanups *E) { + llvm::Constant *VisitExprWithCleanups(ExprWithCleanups *E, QualType T) { if (!E->cleanupsHaveSideEffects()) - return Visit(E->getSubExpr()); + return Visit(E->getSubExpr(), T); return nullptr; } - llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E) { - return Visit(E->GetTemporaryExpr()); + llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E, + QualType T) { + return Visit(E->GetTemporaryExpr(), T); } - llvm::Constant *EmitArrayInitialization(InitListExpr *ILE) { + llvm::Constant *EmitArrayInitialization(InitListExpr *ILE, QualType T) { llvm::ArrayType *AType = cast<llvm::ArrayType>(ConvertType(ILE->getType())); llvm::Type *ElemTy = AType->getElementType(); @@ -790,13 +842,14 @@ public: // initialise any elements that have not been initialised explicitly unsigned NumInitableElts = std::min(NumInitElements, NumElements); + QualType EltType = CGM.getContext().getAsArrayType(T)->getElementType(); + // Initialize remaining array elements. - // FIXME: This doesn't handle member pointers correctly! llvm::Constant *fillC; if (Expr *filler = ILE->getArrayFiller()) - fillC = CGM.EmitConstantExpr(filler, filler->getType(), CGF); + fillC = Emitter.tryEmitAbstractForMemory(filler, EltType); else - fillC = llvm::Constant::getNullValue(ElemTy); + fillC = Emitter.emitNullForMemory(EltType); if (!fillC) return nullptr; @@ -805,13 +858,13 @@ public: return llvm::ConstantAggregateZero::get(AType); // Copy initializer elements. - std::vector<llvm::Constant*> Elts; + SmallVector<llvm::Constant*, 16> Elts; Elts.reserve(NumInitableElts + NumElements); bool RewriteType = false; for (unsigned i = 0; i < NumInitableElts; ++i) { Expr *Init = ILE->getInit(i); - llvm::Constant *C = CGM.EmitConstantExpr(Init, Init->getType(), CGF); + llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType); if (!C) return nullptr; RewriteType |= (C->getType() != ElemTy); @@ -835,33 +888,33 @@ public: return llvm::ConstantArray::get(AType, Elts); } - llvm::Constant *EmitRecordInitialization(InitListExpr *ILE) { - return ConstStructBuilder::BuildStruct(CGM, CGF, ILE); + llvm::Constant *EmitRecordInitialization(InitListExpr *ILE, QualType T) { + return ConstStructBuilder::BuildStruct(Emitter, ILE, T); } - llvm::Constant *VisitImplicitValueInitExpr(ImplicitValueInitExpr* E) { - return CGM.EmitNullConstant(E->getType()); + llvm::Constant *VisitImplicitValueInitExpr(ImplicitValueInitExpr* E, + QualType T) { + return CGM.EmitNullConstant(T); } - llvm::Constant *VisitInitListExpr(InitListExpr *ILE) { + llvm::Constant *VisitInitListExpr(InitListExpr *ILE, QualType T) { if (ILE->isTransparent()) - return Visit(ILE->getInit(0)); + return Visit(ILE->getInit(0), T); if (ILE->getType()->isArrayType()) - return EmitArrayInitialization(ILE); + return EmitArrayInitialization(ILE, T); if (ILE->getType()->isRecordType()) - return EmitRecordInitialization(ILE); + return EmitRecordInitialization(ILE, T); return nullptr; } llvm::Constant *EmitDesignatedInitUpdater(llvm::Constant *Base, - InitListExpr *Updater) { - QualType ExprType = Updater->getType(); - - if (ExprType->isArrayType()) { - llvm::ArrayType *AType = cast<llvm::ArrayType>(ConvertType(ExprType)); + InitListExpr *Updater, + QualType destType) { + if (auto destAT = CGM.getContext().getAsArrayType(destType)) { + llvm::ArrayType *AType = cast<llvm::ArrayType>(ConvertType(destType)); llvm::Type *ElemType = AType->getElementType(); unsigned NumInitElements = Updater->getNumInits(); @@ -870,12 +923,12 @@ public: std::vector<llvm::Constant *> Elts; Elts.reserve(NumElements); - if (llvm::ConstantDataArray *DataArray = - dyn_cast<llvm::ConstantDataArray>(Base)) + QualType destElemType = destAT->getElementType(); + + if (auto DataArray = dyn_cast<llvm::ConstantDataArray>(Base)) for (unsigned i = 0; i != NumElements; ++i) Elts.push_back(DataArray->getElementAsConstant(i)); - else if (llvm::ConstantArray *Array = - dyn_cast<llvm::ConstantArray>(Base)) + else if (auto Array = dyn_cast<llvm::ConstantArray>(Base)) for (unsigned i = 0; i != NumElements; ++i) Elts.push_back(Array->getOperand(i)); else @@ -884,7 +937,7 @@ public: llvm::Constant *fillC = nullptr; if (Expr *filler = Updater->getArrayFiller()) if (!isa<NoInitExpr>(filler)) - fillC = CGM.EmitConstantExpr(filler, filler->getType(), CGF); + fillC = Emitter.tryEmitAbstractForMemory(filler, destElemType); bool RewriteType = (fillC && fillC->getType() != ElemType); for (unsigned i = 0; i != NumElements; ++i) { @@ -897,9 +950,9 @@ public: else if (!Init || isa<NoInitExpr>(Init)) ; // Do nothing. else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) - Elts[i] = EmitDesignatedInitUpdater(Elts[i], ChildILE); + Elts[i] = EmitDesignatedInitUpdater(Elts[i], ChildILE, destElemType); else - Elts[i] = CGM.EmitConstantExpr(Init, Init->getType(), CGF); + Elts[i] = Emitter.tryEmitPrivateForMemory(Init, destElemType); if (!Elts[i]) return nullptr; @@ -919,25 +972,24 @@ public: return llvm::ConstantArray::get(AType, Elts); } - if (ExprType->isRecordType()) - return ConstStructBuilder::BuildStruct(CGM, CGF, this, - dyn_cast<llvm::ConstantStruct>(Base), Updater); + if (destType->isRecordType()) + return ConstStructBuilder::BuildStruct(Emitter, this, + dyn_cast<llvm::ConstantStruct>(Base), Updater, destType); return nullptr; } - llvm::Constant *VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) { - return EmitDesignatedInitUpdater( - CGM.EmitConstantExpr(E->getBase(), E->getType(), CGF), - E->getUpdater()); + llvm::Constant *VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E, + QualType destType) { + auto C = Visit(E->getBase(), destType); + if (!C) return nullptr; + return EmitDesignatedInitUpdater(C, E->getUpdater(), destType); } - llvm::Constant *VisitCXXConstructExpr(CXXConstructExpr *E) { + llvm::Constant *VisitCXXConstructExpr(CXXConstructExpr *E, QualType Ty) { if (!E->getConstructor()->isTrivial()) return nullptr; - QualType Ty = E->getType(); - // FIXME: We should not have to call getBaseElementType here. const RecordType *RT = CGM.getContext().getBaseElementType(Ty)->getAs<RecordType>(); @@ -960,26 +1012,23 @@ public: assert(CGM.getContext().hasSameUnqualifiedType(Ty, Arg->getType()) && "argument to copy ctor is of wrong type"); - return Visit(Arg); + return Visit(Arg, Ty); } return CGM.EmitNullConstant(Ty); } - llvm::Constant *VisitStringLiteral(StringLiteral *E) { + llvm::Constant *VisitStringLiteral(StringLiteral *E, QualType T) { return CGM.GetConstantArrayFromStringLiteral(E); } - llvm::Constant *VisitObjCEncodeExpr(ObjCEncodeExpr *E) { + llvm::Constant *VisitObjCEncodeExpr(ObjCEncodeExpr *E, QualType T) { // This must be an @encode initializing an array in a static initializer. // Don't emit it as the address of the string, emit the string data itself // as an inline array. std::string Str; CGM.getContext().getObjCEncodingForType(E->getEncodedType(), Str); - QualType T = E->getType(); - if (T->getTypeClass() == Type::TypeOfExpr) - T = cast<TypeOfExprType>(T)->getUnderlyingExpr()->getType(); - const ConstantArrayType *CAT = cast<ConstantArrayType>(T); + const ConstantArrayType *CAT = CGM.getContext().getAsConstantArrayType(T); // Resize the string to the right size, adding zeros at the end, or // truncating as needed. @@ -987,151 +1036,19 @@ public: return llvm::ConstantDataArray::getString(VMContext, Str, false); } - llvm::Constant *VisitUnaryExtension(const UnaryOperator *E) { - return Visit(E->getSubExpr()); + llvm::Constant *VisitUnaryExtension(const UnaryOperator *E, QualType T) { + return Visit(E->getSubExpr(), T); } // Utility methods llvm::Type *ConvertType(QualType T) { return CGM.getTypes().ConvertType(T); } - -public: - ConstantAddress EmitLValue(APValue::LValueBase LVBase) { - if (const ValueDecl *Decl = LVBase.dyn_cast<const ValueDecl*>()) { - if (Decl->hasAttr<WeakRefAttr>()) - return CGM.GetWeakRefReference(Decl); - if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(Decl)) - return ConstantAddress(CGM.GetAddrOfFunction(FD), CharUnits::One()); - if (const VarDecl* VD = dyn_cast<VarDecl>(Decl)) { - // We can never refer to a variable with local storage. - if (!VD->hasLocalStorage()) { - CharUnits Align = CGM.getContext().getDeclAlign(VD); - if (VD->isFileVarDecl() || VD->hasExternalStorage()) - return ConstantAddress(CGM.GetAddrOfGlobalVar(VD), Align); - else if (VD->isLocalVarDecl()) { - auto Ptr = CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)); - return ConstantAddress(Ptr, Align); - } - } - } - return ConstantAddress::invalid(); - } - - Expr *E = const_cast<Expr*>(LVBase.get<const Expr*>()); - switch (E->getStmtClass()) { - default: break; - case Expr::CompoundLiteralExprClass: { - CompoundLiteralExpr *CLE = cast<CompoundLiteralExpr>(E); - CharUnits Align = CGM.getContext().getTypeAlignInChars(E->getType()); - if (llvm::GlobalVariable *Addr = - CGM.getAddrOfConstantCompoundLiteralIfEmitted(CLE)) - return ConstantAddress(Addr, Align); - - llvm::Constant* C = CGM.EmitConstantExpr(CLE->getInitializer(), - CLE->getType(), CGF); - // FIXME: "Leaked" on failure. - if (!C) return ConstantAddress::invalid(); - - auto GV = new llvm::GlobalVariable(CGM.getModule(), C->getType(), - E->getType().isConstant(CGM.getContext()), - llvm::GlobalValue::InternalLinkage, - C, ".compoundliteral", nullptr, - llvm::GlobalVariable::NotThreadLocal, - CGM.getContext().getTargetAddressSpace(E->getType())); - GV->setAlignment(Align.getQuantity()); - CGM.setAddrOfConstantCompoundLiteral(CLE, GV); - return ConstantAddress(GV, Align); - } - case Expr::StringLiteralClass: - return CGM.GetAddrOfConstantStringFromLiteral(cast<StringLiteral>(E)); - case Expr::ObjCEncodeExprClass: - return CGM.GetAddrOfConstantStringFromObjCEncode(cast<ObjCEncodeExpr>(E)); - case Expr::ObjCStringLiteralClass: { - ObjCStringLiteral* SL = cast<ObjCStringLiteral>(E); - ConstantAddress C = - CGM.getObjCRuntime().GenerateConstantString(SL->getString()); - return C.getElementBitCast(ConvertType(E->getType())); - } - case Expr::PredefinedExprClass: { - unsigned Type = cast<PredefinedExpr>(E)->getIdentType(); - if (CGF) { - LValue Res = CGF->EmitPredefinedLValue(cast<PredefinedExpr>(E)); - return cast<ConstantAddress>(Res.getAddress()); - } else if (Type == PredefinedExpr::PrettyFunction) { - return CGM.GetAddrOfConstantCString("top level", ".tmp"); - } - - return CGM.GetAddrOfConstantCString("", ".tmp"); - } - case Expr::AddrLabelExprClass: { - assert(CGF && "Invalid address of label expression outside function."); - llvm::Constant *Ptr = - CGF->GetAddrOfLabel(cast<AddrLabelExpr>(E)->getLabel()); - Ptr = llvm::ConstantExpr::getBitCast(Ptr, ConvertType(E->getType())); - return ConstantAddress(Ptr, CharUnits::One()); - } - case Expr::CallExprClass: { - CallExpr* CE = cast<CallExpr>(E); - unsigned builtin = CE->getBuiltinCallee(); - if (builtin != - Builtin::BI__builtin___CFStringMakeConstantString && - builtin != - Builtin::BI__builtin___NSStringMakeConstantString) - break; - const Expr *Arg = CE->getArg(0)->IgnoreParenCasts(); - const StringLiteral *Literal = cast<StringLiteral>(Arg); - if (builtin == - Builtin::BI__builtin___NSStringMakeConstantString) { - return CGM.getObjCRuntime().GenerateConstantString(Literal); - } - // FIXME: need to deal with UCN conversion issues. - return CGM.GetAddrOfConstantCFString(Literal); - } - case Expr::BlockExprClass: { - StringRef FunctionName; - if (CGF) - FunctionName = CGF->CurFn->getName(); - else - FunctionName = "global"; - - // This is not really an l-value. - llvm::Constant *Ptr = - CGM.GetAddrOfGlobalBlock(cast<BlockExpr>(E), FunctionName); - return ConstantAddress(Ptr, CGM.getPointerAlign()); - } - case Expr::CXXTypeidExprClass: { - CXXTypeidExpr *Typeid = cast<CXXTypeidExpr>(E); - QualType T; - if (Typeid->isTypeOperand()) - T = Typeid->getTypeOperand(CGM.getContext()); - else - T = Typeid->getExprOperand()->getType(); - return ConstantAddress(CGM.GetAddrOfRTTIDescriptor(T), - CGM.getPointerAlign()); - } - case Expr::CXXUuidofExprClass: { - return CGM.GetAddrOfUuidDescriptor(cast<CXXUuidofExpr>(E)); - } - case Expr::MaterializeTemporaryExprClass: { - MaterializeTemporaryExpr *MTE = cast<MaterializeTemporaryExpr>(E); - assert(MTE->getStorageDuration() == SD_Static); - SmallVector<const Expr *, 2> CommaLHSs; - SmallVector<SubobjectAdjustment, 2> Adjustments; - const Expr *Inner = MTE->GetTemporaryExpr() - ->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); - return CGM.GetAddrOfGlobalTemporary(MTE, Inner); - } - } - - return ConstantAddress::invalid(); - } }; } // end anonymous namespace. -bool ConstStructBuilder::Build(ConstExprEmitter *Emitter, +bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter, llvm::ConstantStruct *Base, InitListExpr *Updater) { assert(Base && "base expression should not be empty"); @@ -1179,9 +1096,10 @@ bool ConstStructBuilder::Build(ConstExprEmitter *Emitter, if (!Init || isa<NoInitExpr>(Init)) ; // Do nothing. else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) - EltInit = Emitter->EmitDesignatedInitUpdater(EltInit, ChildILE); + EltInit = ExprEmitter->EmitDesignatedInitUpdater(EltInit, ChildILE, + Field->getType()); else - EltInit = CGM.EmitConstantExpr(Init, Field->getType(), CGF); + EltInit = Emitter.tryEmitPrivateForMemory(Init, Field->getType()); ++ElementNo; @@ -1200,26 +1118,294 @@ bool ConstStructBuilder::Build(ConstExprEmitter *Emitter, return true; } -llvm::Constant *CodeGenModule::EmitConstantInit(const VarDecl &D, - CodeGenFunction *CGF) { +llvm::Constant *ConstantEmitter::validateAndPopAbstract(llvm::Constant *C, + AbstractState saved) { + Abstract = saved.OldValue; + + assert(saved.OldPlaceholdersSize == PlaceholderAddresses.size() && + "created a placeholder while doing an abstract emission?"); + + // No validation necessary for now. + // No cleanup to do for now. + return C; +} + +llvm::Constant * +ConstantEmitter::tryEmitAbstractForInitializer(const VarDecl &D) { + auto state = pushAbstract(); + auto C = tryEmitPrivateForVarInit(D); + return validateAndPopAbstract(C, state); +} + +llvm::Constant * +ConstantEmitter::tryEmitAbstract(const Expr *E, QualType destType) { + auto state = pushAbstract(); + auto C = tryEmitPrivate(E, destType); + return validateAndPopAbstract(C, state); +} + +llvm::Constant * +ConstantEmitter::tryEmitAbstract(const APValue &value, QualType destType) { + auto state = pushAbstract(); + auto C = tryEmitPrivate(value, destType); + return validateAndPopAbstract(C, state); +} + +llvm::Constant * +ConstantEmitter::emitAbstract(const Expr *E, QualType destType) { + auto state = pushAbstract(); + auto C = tryEmitPrivate(E, destType); + C = validateAndPopAbstract(C, state); + if (!C) { + CGM.Error(E->getExprLoc(), + "internal error: could not emit constant value \"abstractly\""); + C = CGM.EmitNullConstant(destType); + } + return C; +} + +llvm::Constant * +ConstantEmitter::emitAbstract(SourceLocation loc, const APValue &value, + QualType destType) { + auto state = pushAbstract(); + auto C = tryEmitPrivate(value, destType); + C = validateAndPopAbstract(C, state); + if (!C) { + CGM.Error(loc, + "internal error: could not emit constant value \"abstractly\""); + C = CGM.EmitNullConstant(destType); + } + return C; +} + +llvm::Constant *ConstantEmitter::tryEmitForInitializer(const VarDecl &D) { + initializeNonAbstract(D.getType().getAddressSpace()); + return markIfFailed(tryEmitPrivateForVarInit(D)); +} + +llvm::Constant *ConstantEmitter::tryEmitForInitializer(const Expr *E, + LangAS destAddrSpace, + QualType destType) { + initializeNonAbstract(destAddrSpace); + return markIfFailed(tryEmitPrivateForMemory(E, destType)); +} + +llvm::Constant *ConstantEmitter::emitForInitializer(const APValue &value, + LangAS destAddrSpace, + QualType destType) { + initializeNonAbstract(destAddrSpace); + auto C = tryEmitPrivateForMemory(value, destType); + assert(C && "couldn't emit constant value non-abstractly?"); + return C; +} + +llvm::GlobalValue *ConstantEmitter::getCurrentAddrPrivate() { + assert(!Abstract && "cannot get current address for abstract constant"); + + + + // Make an obviously ill-formed global that should blow up compilation + // if it survives. + auto global = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, true, + llvm::GlobalValue::PrivateLinkage, + /*init*/ nullptr, + /*name*/ "", + /*before*/ nullptr, + llvm::GlobalVariable::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(DestAddressSpace)); + + PlaceholderAddresses.push_back(std::make_pair(nullptr, global)); + + return global; +} + +void ConstantEmitter::registerCurrentAddrPrivate(llvm::Constant *signal, + llvm::GlobalValue *placeholder) { + assert(!PlaceholderAddresses.empty()); + assert(PlaceholderAddresses.back().first == nullptr); + assert(PlaceholderAddresses.back().second == placeholder); + PlaceholderAddresses.back().first = signal; +} + +namespace { + struct ReplacePlaceholders { + CodeGenModule &CGM; + + /// The base address of the global. + llvm::Constant *Base; + llvm::Type *BaseValueTy = nullptr; + + /// The placeholder addresses that were registered during emission. + llvm::DenseMap<llvm::Constant*, llvm::GlobalVariable*> PlaceholderAddresses; + + /// The locations of the placeholder signals. + llvm::DenseMap<llvm::GlobalVariable*, llvm::Constant*> Locations; + + /// The current index stack. We use a simple unsigned stack because + /// we assume that placeholders will be relatively sparse in the + /// initializer, but we cache the index values we find just in case. + llvm::SmallVector<unsigned, 8> Indices; + llvm::SmallVector<llvm::Constant*, 8> IndexValues; + + ReplacePlaceholders(CodeGenModule &CGM, llvm::Constant *base, + ArrayRef<std::pair<llvm::Constant*, + llvm::GlobalVariable*>> addresses) + : CGM(CGM), Base(base), + PlaceholderAddresses(addresses.begin(), addresses.end()) { + } + + void replaceInInitializer(llvm::Constant *init) { + // Remember the type of the top-most initializer. + BaseValueTy = init->getType(); + + // Initialize the stack. + Indices.push_back(0); + IndexValues.push_back(nullptr); + + // Recurse into the initializer. + findLocations(init); + + // Check invariants. + assert(IndexValues.size() == Indices.size() && "mismatch"); + assert(Indices.size() == 1 && "didn't pop all indices"); + + // Do the replacement; this basically invalidates 'init'. + assert(Locations.size() == PlaceholderAddresses.size() && + "missed a placeholder?"); + + // We're iterating over a hashtable, so this would be a source of + // non-determinism in compiler output *except* that we're just + // messing around with llvm::Constant structures, which never itself + // does anything that should be visible in compiler output. + for (auto &entry : Locations) { + assert(entry.first->getParent() == nullptr && "not a placeholder!"); + entry.first->replaceAllUsesWith(entry.second); + entry.first->eraseFromParent(); + } + } + + private: + void findLocations(llvm::Constant *init) { + // Recurse into aggregates. + if (auto agg = dyn_cast<llvm::ConstantAggregate>(init)) { + for (unsigned i = 0, e = agg->getNumOperands(); i != e; ++i) { + Indices.push_back(i); + IndexValues.push_back(nullptr); + + findLocations(agg->getOperand(i)); + + IndexValues.pop_back(); + Indices.pop_back(); + } + return; + } + + // Otherwise, check for registered constants. + while (true) { + auto it = PlaceholderAddresses.find(init); + if (it != PlaceholderAddresses.end()) { + setLocation(it->second); + break; + } + + // Look through bitcasts or other expressions. + if (auto expr = dyn_cast<llvm::ConstantExpr>(init)) { + init = expr->getOperand(0); + } else { + break; + } + } + } + + void setLocation(llvm::GlobalVariable *placeholder) { + assert(Locations.find(placeholder) == Locations.end() && + "already found location for placeholder!"); + + // Lazily fill in IndexValues with the values from Indices. + // We do this in reverse because we should always have a strict + // prefix of indices from the start. + assert(Indices.size() == IndexValues.size()); + for (size_t i = Indices.size() - 1; i != size_t(-1); --i) { + if (IndexValues[i]) { +#ifndef NDEBUG + for (size_t j = 0; j != i + 1; ++j) { + assert(IndexValues[j] && + isa<llvm::ConstantInt>(IndexValues[j]) && + cast<llvm::ConstantInt>(IndexValues[j])->getZExtValue() + == Indices[j]); + } +#endif + break; + } + + IndexValues[i] = llvm::ConstantInt::get(CGM.Int32Ty, Indices[i]); + } + + // Form a GEP and then bitcast to the placeholder type so that the + // replacement will succeed. + llvm::Constant *location = + llvm::ConstantExpr::getInBoundsGetElementPtr(BaseValueTy, + Base, IndexValues); + location = llvm::ConstantExpr::getBitCast(location, + placeholder->getType()); + + Locations.insert({placeholder, location}); + } + }; +} + +void ConstantEmitter::finalize(llvm::GlobalVariable *global) { + assert(InitializedNonAbstract && + "finalizing emitter that was used for abstract emission?"); + assert(!Finalized && "finalizing emitter multiple times"); + assert(global->getInitializer()); + + // Note that we might also be Failed. + Finalized = true; + + if (!PlaceholderAddresses.empty()) { + ReplacePlaceholders(CGM, global, PlaceholderAddresses) + .replaceInInitializer(global->getInitializer()); + PlaceholderAddresses.clear(); // satisfy + } +} + +ConstantEmitter::~ConstantEmitter() { + assert((!InitializedNonAbstract || Finalized || Failed) && + "not finalized after being initialized for non-abstract emission"); + assert(PlaceholderAddresses.empty() && "unhandled placeholders"); +} + +static QualType getNonMemoryType(CodeGenModule &CGM, QualType type) { + if (auto AT = type->getAs<AtomicType>()) { + return CGM.getContext().getQualifiedType(AT->getValueType(), + type.getQualifiers()); + } + return type; +} + +llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) { // Make a quick check if variable can be default NULL initialized // and avoid going through rest of code which may do, for c++11, // initialization of memory to all NULLs. if (!D.hasLocalStorage()) { - QualType Ty = D.getType(); - if (Ty->isArrayType()) - Ty = Context.getBaseElementType(Ty); + QualType Ty = CGM.getContext().getBaseElementType(D.getType()); if (Ty->isRecordType()) if (const CXXConstructExpr *E = dyn_cast_or_null<CXXConstructExpr>(D.getInit())) { const CXXConstructorDecl *CD = E->getConstructor(); if (CD->isTrivial() && CD->isDefaultConstructor()) - return EmitNullConstant(D.getType()); + return CGM.EmitNullConstant(D.getType()); } } - - if (const APValue *Value = D.evaluateValue()) - return EmitConstantValueForMemory(*Value, D.getType(), CGF); + + QualType destType = D.getType(); + + // Try to emit the initializer. Note that this can allow some things that + // are not allowed by tryEmitPrivateForMemory alone. + if (auto value = D.evaluateValue()) { + return tryEmitPrivateForMemory(*value, destType); + } // FIXME: Implement C++11 [basic.start.init]p2: if the initializer of a // reference is a constant expression, and the reference binds to a temporary, @@ -1227,42 +1413,95 @@ llvm::Constant *CodeGenModule::EmitConstantInit(const VarDecl &D, // incorrectly emit a prvalue constant in this case, and the calling code // interprets that as the (pointer) value of the reference, rather than the // desired value of the referee. - if (D.getType()->isReferenceType()) + if (destType->isReferenceType()) return nullptr; const Expr *E = D.getInit(); assert(E && "No initializer to emit"); - llvm::Constant* C = ConstExprEmitter(*this, CGF).Visit(const_cast<Expr*>(E)); - if (C && C->getType()->isIntegerTy(1)) { - llvm::Type *BoolTy = getTypes().ConvertTypeForMem(E->getType()); - C = llvm::ConstantExpr::getZExt(C, BoolTy); + auto nonMemoryDestType = getNonMemoryType(CGM, destType); + auto C = + ConstExprEmitter(*this).Visit(const_cast<Expr*>(E), nonMemoryDestType); + return (C ? emitForMemory(C, destType) : nullptr); +} + +llvm::Constant * +ConstantEmitter::tryEmitAbstractForMemory(const Expr *E, QualType destType) { + auto nonMemoryDestType = getNonMemoryType(CGM, destType); + auto C = tryEmitAbstract(E, nonMemoryDestType); + return (C ? emitForMemory(C, destType) : nullptr); +} + +llvm::Constant * +ConstantEmitter::tryEmitAbstractForMemory(const APValue &value, + QualType destType) { + auto nonMemoryDestType = getNonMemoryType(CGM, destType); + auto C = tryEmitAbstract(value, nonMemoryDestType); + return (C ? emitForMemory(C, destType) : nullptr); +} + +llvm::Constant *ConstantEmitter::tryEmitPrivateForMemory(const Expr *E, + QualType destType) { + auto nonMemoryDestType = getNonMemoryType(CGM, destType); + llvm::Constant *C = tryEmitPrivate(E, nonMemoryDestType); + return (C ? emitForMemory(C, destType) : nullptr); +} + +llvm::Constant *ConstantEmitter::tryEmitPrivateForMemory(const APValue &value, + QualType destType) { + auto nonMemoryDestType = getNonMemoryType(CGM, destType); + auto C = tryEmitPrivate(value, nonMemoryDestType); + return (C ? emitForMemory(C, destType) : nullptr); +} + +llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM, + llvm::Constant *C, + QualType destType) { + // For an _Atomic-qualified constant, we may need to add tail padding. + if (auto AT = destType->getAs<AtomicType>()) { + QualType destValueType = AT->getValueType(); + C = emitForMemory(CGM, C, destValueType); + + uint64_t innerSize = CGM.getContext().getTypeSize(destValueType); + uint64_t outerSize = CGM.getContext().getTypeSize(destType); + if (innerSize == outerSize) + return C; + + assert(innerSize < outerSize && "emitted over-large constant for atomic"); + llvm::Constant *elts[] = { + C, + llvm::ConstantAggregateZero::get( + llvm::ArrayType::get(CGM.Int8Ty, (outerSize - innerSize) / 8)) + }; + return llvm::ConstantStruct::getAnon(elts); + } + + // Zero-extend bool. + if (C->getType()->isIntegerTy(1)) { + llvm::Type *boolTy = CGM.getTypes().ConvertTypeForMem(destType); + return llvm::ConstantExpr::getZExt(C, boolTy); } + return C; } -llvm::Constant *CodeGenModule::EmitConstantExpr(const Expr *E, - QualType DestType, - CodeGenFunction *CGF) { +llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E, + QualType destType) { Expr::EvalResult Result; bool Success = false; - if (DestType->isReferenceType()) - Success = E->EvaluateAsLValue(Result, Context); + if (destType->isReferenceType()) + Success = E->EvaluateAsLValue(Result, CGM.getContext()); else - Success = E->EvaluateAsRValue(Result, Context); + Success = E->EvaluateAsRValue(Result, CGM.getContext()); - llvm::Constant *C = nullptr; + llvm::Constant *C; if (Success && !Result.HasSideEffects) - C = EmitConstantValue(Result.Val, DestType, CGF); + C = tryEmitPrivate(Result.Val, destType); else - C = ConstExprEmitter(*this, CGF).Visit(const_cast<Expr*>(E)); + C = ConstExprEmitter(*this).Visit(const_cast<Expr*>(E), destType); - if (C && C->getType()->isIntegerTy(1)) { - llvm::Type *BoolTy = getTypes().ConvertTypeForMem(E->getType()); - C = llvm::ConstantExpr::getZExt(C, BoolTy); - } return C; } @@ -1270,123 +1509,339 @@ llvm::Constant *CodeGenModule::getNullPointer(llvm::PointerType *T, QualType QT) return getTargetCodeGenInfo().getNullPointer(*this, T, QT); } -llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, - QualType DestType, - CodeGenFunction *CGF) { - // For an _Atomic-qualified constant, we may need to add tail padding. - if (auto *AT = DestType->getAs<AtomicType>()) { - QualType InnerType = AT->getValueType(); - auto *Inner = EmitConstantValue(Value, InnerType, CGF); - - uint64_t InnerSize = Context.getTypeSize(InnerType); - uint64_t OuterSize = Context.getTypeSize(DestType); - if (InnerSize == OuterSize) - return Inner; - - assert(InnerSize < OuterSize && "emitted over-large constant for atomic"); - llvm::Constant *Elts[] = { - Inner, - llvm::ConstantAggregateZero::get( - llvm::ArrayType::get(Int8Ty, (OuterSize - InnerSize) / 8)) - }; - return llvm::ConstantStruct::getAnon(Elts); - } +namespace { +/// A struct which can be used to peephole certain kinds of finalization +/// that normally happen during l-value emission. +struct ConstantLValue { + llvm::Constant *Value; + bool HasOffsetApplied; + + /*implicit*/ ConstantLValue(llvm::Constant *value, + bool hasOffsetApplied = false) + : Value(value), HasOffsetApplied(false) {} + + /*implicit*/ ConstantLValue(ConstantAddress address) + : ConstantLValue(address.getPointer()) {} +}; - switch (Value.getKind()) { - case APValue::Uninitialized: - llvm_unreachable("Constant expressions should be initialized."); - case APValue::LValue: { - llvm::Type *DestTy = getTypes().ConvertTypeForMem(DestType); - llvm::Constant *Offset = - llvm::ConstantInt::get(Int64Ty, Value.getLValueOffset().getQuantity()); - - llvm::Constant *C = nullptr; - - if (APValue::LValueBase LVBase = Value.getLValueBase()) { - // An array can be represented as an lvalue referring to the base. - if (isa<llvm::ArrayType>(DestTy)) { - assert(Offset->isNullValue() && "offset on array initializer"); - return ConstExprEmitter(*this, CGF).Visit( - const_cast<Expr*>(LVBase.get<const Expr*>())); - } +/// A helper class for emitting constant l-values. +class ConstantLValueEmitter : public ConstStmtVisitor<ConstantLValueEmitter, + ConstantLValue> { + CodeGenModule &CGM; + ConstantEmitter &Emitter; + const APValue &Value; + QualType DestType; - C = ConstExprEmitter(*this, CGF).EmitLValue(LVBase).getPointer(); + // Befriend StmtVisitorBase so that we don't have to expose Visit*. + friend StmtVisitorBase; - // Apply offset if necessary. - if (!Offset->isNullValue()) { - unsigned AS = C->getType()->getPointerAddressSpace(); - llvm::Type *CharPtrTy = Int8Ty->getPointerTo(AS); - llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, CharPtrTy); - Casted = llvm::ConstantExpr::getGetElementPtr(Int8Ty, Casted, Offset); - C = llvm::ConstantExpr::getPointerCast(Casted, C->getType()); - } +public: + ConstantLValueEmitter(ConstantEmitter &emitter, const APValue &value, + QualType destType) + : CGM(emitter.CGM), Emitter(emitter), Value(value), DestType(destType) {} - // Convert to the appropriate type; this could be an lvalue for - // an integer. - if (isa<llvm::PointerType>(DestTy)) - return llvm::ConstantExpr::getPointerCast(C, DestTy); + llvm::Constant *tryEmit(); - return llvm::ConstantExpr::getPtrToInt(C, DestTy); - } else { - C = Offset; - - // Convert to the appropriate type; this could be an lvalue for - // an integer. - if (auto PT = dyn_cast<llvm::PointerType>(DestTy)) { - if (Value.isNullPointer()) - return getNullPointer(PT, DestType); - // Convert the integer to a pointer-sized integer before converting it - // to a pointer. - C = llvm::ConstantExpr::getIntegerCast( - C, getDataLayout().getIntPtrType(DestTy), - /*isSigned=*/false); - return llvm::ConstantExpr::getIntToPtr(C, DestTy); - } +private: + llvm::Constant *tryEmitAbsolute(llvm::Type *destTy); + ConstantLValue tryEmitBase(const APValue::LValueBase &base); + + ConstantLValue VisitStmt(const Stmt *S) { return nullptr; } + ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); + ConstantLValue VisitStringLiteral(const StringLiteral *E); + ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E); + ConstantLValue VisitObjCStringLiteral(const ObjCStringLiteral *E); + ConstantLValue VisitPredefinedExpr(const PredefinedExpr *E); + ConstantLValue VisitAddrLabelExpr(const AddrLabelExpr *E); + ConstantLValue VisitCallExpr(const CallExpr *E); + ConstantLValue VisitBlockExpr(const BlockExpr *E); + ConstantLValue VisitCXXTypeidExpr(const CXXTypeidExpr *E); + ConstantLValue VisitCXXUuidofExpr(const CXXUuidofExpr *E); + ConstantLValue VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *E); + + bool hasNonZeroOffset() const { + return !Value.getLValueOffset().isZero(); + } - // If the types don't match this should only be a truncate. - if (C->getType() != DestTy) - return llvm::ConstantExpr::getTrunc(C, DestTy); + /// Return the value offset. + llvm::Constant *getOffset() { + return llvm::ConstantInt::get(CGM.Int64Ty, + Value.getLValueOffset().getQuantity()); + } + /// Apply the value offset to the given constant. + llvm::Constant *applyOffset(llvm::Constant *C) { + if (!hasNonZeroOffset()) return C; + + llvm::Type *origPtrTy = C->getType(); + unsigned AS = origPtrTy->getPointerAddressSpace(); + llvm::Type *charPtrTy = CGM.Int8Ty->getPointerTo(AS); + C = llvm::ConstantExpr::getBitCast(C, charPtrTy); + C = llvm::ConstantExpr::getGetElementPtr(CGM.Int8Ty, C, getOffset()); + C = llvm::ConstantExpr::getPointerCast(C, origPtrTy); + return C; + } +}; + +} + +llvm::Constant *ConstantLValueEmitter::tryEmit() { + const APValue::LValueBase &base = Value.getLValueBase(); + + // Certain special array initializers are represented in APValue + // as l-values referring to the base expression which generates the + // array. This happens with e.g. string literals. These should + // probably just get their own representation kind in APValue. + if (DestType->isArrayType()) { + assert(!hasNonZeroOffset() && "offset on array initializer"); + auto expr = const_cast<Expr*>(base.get<const Expr*>()); + return ConstExprEmitter(Emitter).Visit(expr, DestType); + } + + // Otherwise, the destination type should be a pointer or reference + // type, but it might also be a cast thereof. + // + // FIXME: the chain of casts required should be reflected in the APValue. + // We need this in order to correctly handle things like a ptrtoint of a + // non-zero null pointer and addrspace casts that aren't trivially + // represented in LLVM IR. + auto destTy = CGM.getTypes().ConvertTypeForMem(DestType); + assert(isa<llvm::IntegerType>(destTy) || isa<llvm::PointerType>(destTy)); + + // If there's no base at all, this is a null or absolute pointer, + // possibly cast back to an integer type. + if (!base) { + return tryEmitAbsolute(destTy); + } + + // Otherwise, try to emit the base. + ConstantLValue result = tryEmitBase(base); + + // If that failed, we're done. + llvm::Constant *value = result.Value; + if (!value) return nullptr; + + // Apply the offset if necessary and not already done. + if (!result.HasOffsetApplied) { + value = applyOffset(value); + } + + // Convert to the appropriate type; this could be an lvalue for + // an integer. FIXME: performAddrSpaceCast + if (isa<llvm::PointerType>(destTy)) + return llvm::ConstantExpr::getPointerCast(value, destTy); + + return llvm::ConstantExpr::getPtrToInt(value, destTy); +} + +/// Try to emit an absolute l-value, such as a null pointer or an integer +/// bitcast to pointer type. +llvm::Constant * +ConstantLValueEmitter::tryEmitAbsolute(llvm::Type *destTy) { + auto offset = getOffset(); + + // If we're producing a pointer, this is easy. + if (auto destPtrTy = cast<llvm::PointerType>(destTy)) { + if (Value.isNullPointer()) { + // FIXME: integer offsets from non-zero null pointers. + return CGM.getNullPointer(destPtrTy, DestType); } + + // Convert the integer to a pointer-sized integer before converting it + // to a pointer. + // FIXME: signedness depends on the original integer type. + auto intptrTy = CGM.getDataLayout().getIntPtrType(destPtrTy); + llvm::Constant *C = offset; + C = llvm::ConstantExpr::getIntegerCast(getOffset(), intptrTy, + /*isSigned*/ false); + C = llvm::ConstantExpr::getIntToPtr(C, destPtrTy); + return C; + } + + // Otherwise, we're basically returning an integer constant. + + // FIXME: this does the wrong thing with ptrtoint of a null pointer, + // but since we don't know the original pointer type, there's not much + // we can do about it. + + auto C = getOffset(); + C = llvm::ConstantExpr::getIntegerCast(C, destTy, /*isSigned*/ false); + return C; +} + +ConstantLValue +ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { + // Handle values. + if (const ValueDecl *D = base.dyn_cast<const ValueDecl*>()) { + if (D->hasAttr<WeakRefAttr>()) + return CGM.GetWeakRefReference(D).getPointer(); + + if (auto FD = dyn_cast<FunctionDecl>(D)) + return CGM.GetAddrOfFunction(FD); + + if (auto VD = dyn_cast<VarDecl>(D)) { + // We can never refer to a variable with local storage. + if (!VD->hasLocalStorage()) { + if (VD->isFileVarDecl() || VD->hasExternalStorage()) + return CGM.GetAddrOfGlobalVar(VD); + + if (VD->isLocalVarDecl()) { + return CGM.getOrCreateStaticVarDecl( + *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)); + } + } + } + + return nullptr; + } + + // Otherwise, it must be an expression. + return Visit(base.get<const Expr*>()); +} + +ConstantLValue +ConstantLValueEmitter::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { + return tryEmitGlobalCompoundLiteral(CGM, Emitter.CGF, E); +} + +ConstantLValue +ConstantLValueEmitter::VisitStringLiteral(const StringLiteral *E) { + return CGM.GetAddrOfConstantStringFromLiteral(E); +} + +ConstantLValue +ConstantLValueEmitter::VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { + return CGM.GetAddrOfConstantStringFromObjCEncode(E); +} + +ConstantLValue +ConstantLValueEmitter::VisitObjCStringLiteral(const ObjCStringLiteral *E) { + auto C = CGM.getObjCRuntime().GenerateConstantString(E->getString()); + return C.getElementBitCast(CGM.getTypes().ConvertTypeForMem(E->getType())); +} + +ConstantLValue +ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) { + if (auto CGF = Emitter.CGF) { + LValue Res = CGF->EmitPredefinedLValue(E); + return cast<ConstantAddress>(Res.getAddress()); + } + + auto kind = E->getIdentType(); + if (kind == PredefinedExpr::PrettyFunction) { + return CGM.GetAddrOfConstantCString("top level", ".tmp"); + } + + return CGM.GetAddrOfConstantCString("", ".tmp"); +} + +ConstantLValue +ConstantLValueEmitter::VisitAddrLabelExpr(const AddrLabelExpr *E) { + assert(Emitter.CGF && "Invalid address of label expression outside function"); + llvm::Constant *Ptr = Emitter.CGF->GetAddrOfLabel(E->getLabel()); + Ptr = llvm::ConstantExpr::getBitCast(Ptr, + CGM.getTypes().ConvertType(E->getType())); + return Ptr; +} + +ConstantLValue +ConstantLValueEmitter::VisitCallExpr(const CallExpr *E) { + unsigned builtin = E->getBuiltinCallee(); + if (builtin != Builtin::BI__builtin___CFStringMakeConstantString && + builtin != Builtin::BI__builtin___NSStringMakeConstantString) + return nullptr; + + auto literal = cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts()); + if (builtin == Builtin::BI__builtin___NSStringMakeConstantString) { + return CGM.getObjCRuntime().GenerateConstantString(literal); + } else { + // FIXME: need to deal with UCN conversion issues. + return CGM.GetAddrOfConstantCFString(literal); } +} + +ConstantLValue +ConstantLValueEmitter::VisitBlockExpr(const BlockExpr *E) { + StringRef functionName; + if (auto CGF = Emitter.CGF) + functionName = CGF->CurFn->getName(); + else + functionName = "global"; + + return CGM.GetAddrOfGlobalBlock(E, functionName); +} + +ConstantLValue +ConstantLValueEmitter::VisitCXXTypeidExpr(const CXXTypeidExpr *E) { + QualType T; + if (E->isTypeOperand()) + T = E->getTypeOperand(CGM.getContext()); + else + T = E->getExprOperand()->getType(); + return CGM.GetAddrOfRTTIDescriptor(T); +} + +ConstantLValue +ConstantLValueEmitter::VisitCXXUuidofExpr(const CXXUuidofExpr *E) { + return CGM.GetAddrOfUuidDescriptor(E); +} + +ConstantLValue +ConstantLValueEmitter::VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *E) { + assert(E->getStorageDuration() == SD_Static); + SmallVector<const Expr *, 2> CommaLHSs; + SmallVector<SubobjectAdjustment, 2> Adjustments; + const Expr *Inner = E->GetTemporaryExpr() + ->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); + return CGM.GetAddrOfGlobalTemporary(E, Inner); +} + +llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, + QualType DestType) { + switch (Value.getKind()) { + case APValue::Uninitialized: + llvm_unreachable("Constant expressions should be initialized."); + case APValue::LValue: + return ConstantLValueEmitter(*this, Value, DestType).tryEmit(); case APValue::Int: - return llvm::ConstantInt::get(VMContext, Value.getInt()); + return llvm::ConstantInt::get(CGM.getLLVMContext(), Value.getInt()); case APValue::ComplexInt: { llvm::Constant *Complex[2]; - Complex[0] = llvm::ConstantInt::get(VMContext, + Complex[0] = llvm::ConstantInt::get(CGM.getLLVMContext(), Value.getComplexIntReal()); - Complex[1] = llvm::ConstantInt::get(VMContext, + Complex[1] = llvm::ConstantInt::get(CGM.getLLVMContext(), Value.getComplexIntImag()); // FIXME: the target may want to specify that this is packed. - llvm::StructType *STy = llvm::StructType::get(Complex[0]->getType(), - Complex[1]->getType(), - nullptr); + llvm::StructType *STy = + llvm::StructType::get(Complex[0]->getType(), Complex[1]->getType()); return llvm::ConstantStruct::get(STy, Complex); } case APValue::Float: { const llvm::APFloat &Init = Value.getFloat(); if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf() && - !Context.getLangOpts().NativeHalfType && - !Context.getLangOpts().HalfArgsAndReturns) - return llvm::ConstantInt::get(VMContext, Init.bitcastToAPInt()); + !CGM.getContext().getLangOpts().NativeHalfType && + !CGM.getContext().getLangOpts().HalfArgsAndReturns) + return llvm::ConstantInt::get(CGM.getLLVMContext(), + Init.bitcastToAPInt()); else - return llvm::ConstantFP::get(VMContext, Init); + return llvm::ConstantFP::get(CGM.getLLVMContext(), Init); } case APValue::ComplexFloat: { llvm::Constant *Complex[2]; - Complex[0] = llvm::ConstantFP::get(VMContext, + Complex[0] = llvm::ConstantFP::get(CGM.getLLVMContext(), Value.getComplexFloatReal()); - Complex[1] = llvm::ConstantFP::get(VMContext, + Complex[1] = llvm::ConstantFP::get(CGM.getLLVMContext(), Value.getComplexFloatImag()); // FIXME: the target may want to specify that this is packed. - llvm::StructType *STy = llvm::StructType::get(Complex[0]->getType(), - Complex[1]->getType(), - nullptr); + llvm::StructType *STy = + llvm::StructType::get(Complex[0]->getType(), Complex[1]->getType()); return llvm::ConstantStruct::get(STy, Complex); } case APValue::Vector: { @@ -1396,9 +1851,9 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, for (unsigned I = 0; I != NumElts; ++I) { const APValue &Elt = Value.getVectorElt(I); if (Elt.isInt()) - Inits[I] = llvm::ConstantInt::get(VMContext, Elt.getInt()); + Inits[I] = llvm::ConstantInt::get(CGM.getLLVMContext(), Elt.getInt()); else if (Elt.isFloat()) - Inits[I] = llvm::ConstantFP::get(VMContext, Elt.getFloat()); + Inits[I] = llvm::ConstantFP::get(CGM.getLLVMContext(), Elt.getFloat()); else llvm_unreachable("unsupported vector element type"); } @@ -1407,13 +1862,14 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, case APValue::AddrLabelDiff: { const AddrLabelExpr *LHSExpr = Value.getAddrLabelDiffLHS(); const AddrLabelExpr *RHSExpr = Value.getAddrLabelDiffRHS(); - llvm::Constant *LHS = EmitConstantExpr(LHSExpr, LHSExpr->getType(), CGF); - llvm::Constant *RHS = EmitConstantExpr(RHSExpr, RHSExpr->getType(), CGF); + llvm::Constant *LHS = tryEmitPrivate(LHSExpr, LHSExpr->getType()); + llvm::Constant *RHS = tryEmitPrivate(RHSExpr, RHSExpr->getType()); + if (!LHS || !RHS) return nullptr; // Compute difference - llvm::Type *ResultType = getTypes().ConvertType(DestType); - LHS = llvm::ConstantExpr::getPtrToInt(LHS, IntPtrTy); - RHS = llvm::ConstantExpr::getPtrToInt(RHS, IntPtrTy); + llvm::Type *ResultType = CGM.getTypes().ConvertType(DestType); + LHS = llvm::ConstantExpr::getPtrToInt(LHS, CGM.IntPtrTy); + RHS = llvm::ConstantExpr::getPtrToInt(RHS, CGM.IntPtrTy); llvm::Constant *AddrLabelDiff = llvm::ConstantExpr::getSub(LHS, RHS); // LLVM is a bit sensitive about the exact format of the @@ -1423,21 +1879,21 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, } case APValue::Struct: case APValue::Union: - return ConstStructBuilder::BuildStruct(*this, CGF, Value, DestType); + return ConstStructBuilder::BuildStruct(*this, Value, DestType); case APValue::Array: { - const ArrayType *CAT = Context.getAsArrayType(DestType); + const ArrayType *CAT = CGM.getContext().getAsArrayType(DestType); unsigned NumElements = Value.getArraySize(); unsigned NumInitElts = Value.getArrayInitializedElts(); // Emit array filler, if there is one. llvm::Constant *Filler = nullptr; if (Value.hasArrayFiller()) - Filler = EmitConstantValueForMemory(Value.getArrayFiller(), - CAT->getElementType(), CGF); + Filler = tryEmitAbstractForMemory(Value.getArrayFiller(), + CAT->getElementType()); // Emit initializer elements. llvm::Type *CommonElementType = - getTypes().ConvertType(CAT->getElementType()); + CGM.getTypes().ConvertType(CAT->getElementType()); // Try to use a ConstantAggregateZero if we can. if (Filler && Filler->isNullValue() && !NumInitElts) { @@ -1446,15 +1902,21 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, return llvm::ConstantAggregateZero::get(AType); } - std::vector<llvm::Constant*> Elts; + SmallVector<llvm::Constant*, 16> Elts; Elts.reserve(NumElements); for (unsigned I = 0; I < NumElements; ++I) { llvm::Constant *C = Filler; - if (I < NumInitElts) - C = EmitConstantValueForMemory(Value.getArrayInitializedElt(I), - CAT->getElementType(), CGF); - else - assert(Filler && "Missing filler for implicit elements of initializer"); + if (I < NumInitElts) { + C = tryEmitPrivateForMemory(Value.getArrayInitializedElt(I), + CAT->getElementType()); + } else if (!Filler) { + assert(Value.hasArrayFiller() && + "Missing filler for implicit elements of initializer"); + C = tryEmitPrivateForMemory(Value.getArrayFiller(), + CAT->getElementType()); + } + if (!C) return nullptr; + if (I == 0) CommonElementType = C->getType(); else if (C->getType() != CommonElementType) @@ -1468,7 +1930,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, Types.reserve(NumElements); for (unsigned i = 0, e = Elts.size(); i < e; ++i) Types.push_back(Elts[i]->getType()); - llvm::StructType *SType = llvm::StructType::get(VMContext, Types, true); + llvm::StructType *SType = + llvm::StructType::get(CGM.getLLVMContext(), Types, true); return llvm::ConstantStruct::get(SType, Elts); } @@ -1477,23 +1940,11 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, return llvm::ConstantArray::get(AType, Elts); } case APValue::MemberPointer: - return getCXXABI().EmitMemberPointer(Value, DestType); + return CGM.getCXXABI().EmitMemberPointer(Value, DestType); } llvm_unreachable("Unknown APValue kind"); } -llvm::Constant * -CodeGenModule::EmitConstantValueForMemory(const APValue &Value, - QualType DestType, - CodeGenFunction *CGF) { - llvm::Constant *C = EmitConstantValue(Value, DestType, CGF); - if (C->getType()->isIntegerTy(1)) { - llvm::Type *BoolTy = getTypes().ConvertTypeForMem(DestType); - C = llvm::ConstantExpr::getZExt(C, BoolTy); - } - return C; -} - llvm::GlobalVariable *CodeGenModule::getAddrOfConstantCompoundLiteralIfEmitted( const CompoundLiteralExpr *E) { return EmittedCompoundLiterals.lookup(E); @@ -1509,7 +1960,7 @@ void CodeGenModule::setAddrOfConstantCompoundLiteral( ConstantAddress CodeGenModule::GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr *E) { assert(E->isFileScope() && "not a file-scope compound literal expr"); - return ConstExprEmitter(*this, nullptr).EmitLValue(E); + return tryEmitGlobalCompoundLiteral(*this, nullptr, E); } llvm::Constant * @@ -1631,6 +2082,11 @@ static llvm::Constant *EmitNullConstantForBase(CodeGenModule &CGM, return EmitNullConstant(CGM, base, /*asCompleteObject=*/false); } +llvm::Constant *ConstantEmitter::emitNullForMemory(CodeGenModule &CGM, + QualType T) { + return emitForMemory(CGM, CGM.EmitNullConstant(T), T); +} + llvm::Constant *CodeGenModule::EmitNullConstant(QualType T) { if (T->getAs<PointerType>()) return getNullPointer( @@ -1645,7 +2101,8 @@ llvm::Constant *CodeGenModule::EmitNullConstant(QualType T) { QualType ElementTy = CAT->getElementType(); - llvm::Constant *Element = EmitNullConstant(ElementTy); + llvm::Constant *Element = + ConstantEmitter::emitNullForMemory(*this, ElementTy); unsigned NumElements = CAT->getSize().getZExtValue(); SmallVector<llvm::Constant *, 8> Array(NumElements, Element); return llvm::ConstantArray::get(ATy, Array); diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 67f821f2f0..7c11103617 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" @@ -44,13 +45,85 @@ using llvm::Value; //===----------------------------------------------------------------------===// namespace { + +/// Determine whether the given binary operation may overflow. +/// Sets \p Result to the value of the operation for BO_Add, BO_Sub, BO_Mul, +/// and signed BO_{Div,Rem}. For these opcodes, and for unsigned BO_{Div,Rem}, +/// the returned overflow check is precise. The returned value is 'true' for +/// all other opcodes, to be conservative. +bool mayHaveIntegerOverflow(llvm::ConstantInt *LHS, llvm::ConstantInt *RHS, + BinaryOperator::Opcode Opcode, bool Signed, + llvm::APInt &Result) { + // Assume overflow is possible, unless we can prove otherwise. + bool Overflow = true; + const auto &LHSAP = LHS->getValue(); + const auto &RHSAP = RHS->getValue(); + if (Opcode == BO_Add) { + if (Signed) + Result = LHSAP.sadd_ov(RHSAP, Overflow); + else + Result = LHSAP.uadd_ov(RHSAP, Overflow); + } else if (Opcode == BO_Sub) { + if (Signed) + Result = LHSAP.ssub_ov(RHSAP, Overflow); + else + Result = LHSAP.usub_ov(RHSAP, Overflow); + } else if (Opcode == BO_Mul) { + if (Signed) + Result = LHSAP.smul_ov(RHSAP, Overflow); + else + Result = LHSAP.umul_ov(RHSAP, Overflow); + } else if (Opcode == BO_Div || Opcode == BO_Rem) { + if (Signed && !RHS->isZero()) + Result = LHSAP.sdiv_ov(RHSAP, Overflow); + else + return false; + } + return Overflow; +} + struct BinOpInfo { Value *LHS; Value *RHS; QualType Ty; // Computation Type. BinaryOperator::Opcode Opcode; // Opcode of BinOp to perform - bool FPContractable; + FPOptions FPFeatures; const Expr *E; // Entire expr, for error unsupported. May not be binop. + + /// Check if the binop can result in integer overflow. + bool mayHaveIntegerOverflow() const { + // Without constant input, we can't rule out overflow. + auto *LHSCI = dyn_cast<llvm::ConstantInt>(LHS); + auto *RHSCI = dyn_cast<llvm::ConstantInt>(RHS); + if (!LHSCI || !RHSCI) + return true; + + llvm::APInt Result; + return ::mayHaveIntegerOverflow( + LHSCI, RHSCI, Opcode, Ty->hasSignedIntegerRepresentation(), Result); + } + + /// Check if the binop computes a division or a remainder. + bool isDivremOp() const { + return Opcode == BO_Div || Opcode == BO_Rem || Opcode == BO_DivAssign || + Opcode == BO_RemAssign; + } + + /// Check if the binop can result in an integer division by zero. + bool mayHaveIntegerDivisionByZero() const { + if (isDivremOp()) + if (auto *CI = dyn_cast<llvm::ConstantInt>(RHS)) + return CI->isZero(); + return true; + } + + /// Check if the binop can result in a float division by zero. + bool mayHaveFloatDivisionByZero() const { + if (isDivremOp()) + if (auto *CFP = dyn_cast<llvm::ConstantFP>(RHS)) + return CFP->isZero(); + return true; + } }; static bool MustVisitNullValue(const Expr *E) { @@ -85,9 +158,17 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) { assert((isa<UnaryOperator>(Op.E) || isa<BinaryOperator>(Op.E)) && "Expected a unary or binary operator"); + // If the binop has constant inputs and we can prove there is no overflow, + // we can elide the overflow check. + if (!Op.mayHaveIntegerOverflow()) + return true; + + // If a unary op has a widened operand, the op cannot overflow. if (const auto *UO = dyn_cast<UnaryOperator>(Op.E)) return IsWidenedIntegerOp(Ctx, UO->getSubExpr()); + // We usually don't need overflow checks for binops with widened operands. + // Multiplication with promoted unsigned operands is a special case. const auto *BO = cast<BinaryOperator>(Op.E); auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS()); if (!OptionalLHSTy) @@ -100,19 +181,35 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) { QualType LHSTy = *OptionalLHSTy; QualType RHSTy = *OptionalRHSTy; - // We usually don't need overflow checks for binary operations with widened - // operands. Multiplication with promoted unsigned operands is a special case. + // This is the simple case: binops without unsigned multiplication, and with + // widened operands. No overflow check is needed here. if ((Op.Opcode != BO_Mul && Op.Opcode != BO_MulAssign) || !LHSTy->isUnsignedIntegerType() || !RHSTy->isUnsignedIntegerType()) return true; - // The overflow check can be skipped if either one of the unpromoted types - // are less than half the size of the promoted type. + // For unsigned multiplication the overflow check can be elided if either one + // of the unpromoted types are less than half the size of the promoted type. unsigned PromotedSize = Ctx.getTypeSize(Op.E->getType()); return (2 * Ctx.getTypeSize(LHSTy)) < PromotedSize || (2 * Ctx.getTypeSize(RHSTy)) < PromotedSize; } +/// Update the FastMathFlags of LLVM IR from the FPOptions in LangOptions. +static void updateFastMathFlags(llvm::FastMathFlags &FMF, + FPOptions FPFeatures) { + FMF.setAllowContract(FPFeatures.allowFPContractAcrossStatement()); +} + +/// Propagate fast-math flags from \p Op to the instruction in \p V. +static Value *propagateFMFlags(Value *V, const BinOpInfo &Op) { + if (auto *I = dyn_cast<llvm::Instruction>(V)) { + llvm::FastMathFlags FMF = I->getFastMathFlags(); + updateFastMathFlags(FMF, Op.FPFeatures); + I->setFastMathFlags(FMF); + } + return V; +} + class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, Value*> { CodeGenFunction &CGF; @@ -276,6 +373,15 @@ public: Value *VisitGenericSelectionExpr(GenericSelectionExpr *GE) { return Visit(GE->getResultExpr()); } + Value *VisitCoawaitExpr(CoawaitExpr *S) { + return CGF.EmitCoawaitExpr(*S).getScalarVal(); + } + Value *VisitCoyieldExpr(CoyieldExpr *S) { + return CGF.EmitCoyieldExpr(*S).getScalarVal(); + } + Value *VisitUnaryCoawait(const UnaryOperator *E) { + return Visit(E->getSubExpr()); + } // Leaves. Value *VisitIntegerLiteral(const IntegerLiteral *E) { @@ -322,14 +428,19 @@ public: return CGF.getOpaqueRValueMapping(E).getScalarVal(); } + Value *emitConstant(const CodeGenFunction::ConstantEmission &Constant, + Expr *E) { + assert(Constant && "not a constant"); + if (Constant.isReference()) + return EmitLoadOfLValue(Constant.getReferenceLValue(CGF, E), + E->getExprLoc()); + return Constant.getValue(); + } + // l-values. Value *VisitDeclRefExpr(DeclRefExpr *E) { - if (CodeGenFunction::ConstantEmission result = CGF.tryEmitAsConstant(E)) { - if (result.isReference()) - return EmitLoadOfLValue(result.getReferenceLValue(CGF, E), - E->getExprLoc()); - return result.getValue(); - } + if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) + return emitConstant(Constant, E); return EmitLoadOfLValue(E); } @@ -544,8 +655,10 @@ public: !CanElideOverflowCheck(CGF.getContext(), Ops)) return EmitOverflowCheckedBinOp(Ops); - if (Ops.LHS->getType()->isFPOrFPVectorTy()) - return Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul"); + if (Ops.LHS->getType()->isFPOrFPVectorTy()) { + Value *V = Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul"); + return propagateFMFlags(V, Ops); + } return Builder.CreateMul(Ops.LHS, Ops.RHS, "mul"); } /// Create a binary op that checks for overflow. @@ -901,10 +1014,42 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, return Builder.CreateVectorSplat(NumElements, Src, "splat"); } - // Allow bitcast from vector to integer/fp of the same size. - if (isa<llvm::VectorType>(SrcTy) || - isa<llvm::VectorType>(DstTy)) - return Builder.CreateBitCast(Src, DstTy, "conv"); + if (isa<llvm::VectorType>(SrcTy) || isa<llvm::VectorType>(DstTy)) { + // Allow bitcast from vector to integer/fp of the same size. + unsigned SrcSize = SrcTy->getPrimitiveSizeInBits(); + unsigned DstSize = DstTy->getPrimitiveSizeInBits(); + if (SrcSize == DstSize) + return Builder.CreateBitCast(Src, DstTy, "conv"); + + // Conversions between vectors of different sizes are not allowed except + // when vectors of half are involved. Operations on storage-only half + // vectors require promoting half vector operands to float vectors and + // truncating the result, which is either an int or float vector, to a + // short or half vector. + + // Source and destination are both expected to be vectors. + llvm::Type *SrcElementTy = SrcTy->getVectorElementType(); + llvm::Type *DstElementTy = DstTy->getVectorElementType(); + (void)DstElementTy; + + assert(((SrcElementTy->isIntegerTy() && + DstElementTy->isIntegerTy()) || + (SrcElementTy->isFloatingPointTy() && + DstElementTy->isFloatingPointTy())) && + "unexpected conversion between a floating-point vector and an " + "integer vector"); + + // Truncate an i32 vector to an i16 vector. + if (SrcElementTy->isIntegerTy()) + return Builder.CreateIntCast(Src, DstTy, false, "conv"); + + // Truncate a float vector to a half vector. + if (SrcSize > DstSize) + return Builder.CreateFPTrunc(Src, DstTy, "conv"); + + // Promote a half vector to a float vector. + return Builder.CreateFPExt(Src, DstTy, "conv"); + } // Finally, we have the arithmetic types: real int/float. Value *Res = nullptr; @@ -1191,13 +1336,15 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { } Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) { - llvm::APSInt Value; - if (E->EvaluateAsInt(Value, CGF.getContext(), Expr::SE_AllowSideEffects)) { - if (E->isArrow()) - CGF.EmitScalarExpr(E->getBase()); - else - EmitLValue(E->getBase()); - return Builder.getInt(Value); + if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) { + CGF.EmitIgnoredExpr(E->getBase()); + return emitConstant(Constant, E); + } else { + llvm::APSInt Value; + if (E->EvaluateAsInt(Value, CGF.getContext(), Expr::SE_AllowSideEffects)) { + CGF.EmitIgnoredExpr(E->getBase()); + return Builder.getInt(Value); + } } return EmitLoadOfLValue(E); @@ -1486,10 +1633,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. - auto *Src = Visit(E); - return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(CGF, Src, - E->getType(), - DestTy); + return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast( + CGF, Visit(E), E->getType()->getPointeeType().getAddressSpace(), + DestTy->getPointeeType().getAddressSpace(), ConvertType(DestTy)); } case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: @@ -1671,7 +1817,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } case CK_IntToOCLSampler: - return CGF.CGM.createOpenCLIntToSamplerConversion(E, CGF);
+ return CGF.CGM.createOpenCLIntToSamplerConversion(E, CGF); } // end of switch @@ -1709,7 +1855,7 @@ static BinOpInfo createBinOpInfoFromIncDec(const UnaryOperator *E, BinOp.RHS = llvm::ConstantInt::get(InVal->getType(), 1, false); BinOp.Ty = E->getType(); BinOp.Opcode = IsInc ? BO_Add : BO_Sub; - BinOp.FPContractable = false; + // FIXME: once UnaryOperator carries FPFeatures, copy it here. BinOp.E = E; return BinOp; } @@ -1744,6 +1890,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Value *input; int amount = (isInc ? 1 : -1); + bool isSubtraction = !isInc; if (const AtomicType *atomicTy = type->getAs<AtomicType>()) { type = atomicTy->getValueType(); @@ -1833,7 +1980,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, numElts, "vla.inc"); else - value = Builder.CreateInBoundsGEP(value, numElts, "vla.inc"); + value = CGF.EmitCheckedInBoundsGEP( + value, numElts, /*SignedIndices=*/false, isSubtraction, + E->getExprLoc(), "vla.inc"); // Arithmetic on function pointers (!) is just +-1. } else if (type->isFunctionType()) { @@ -1843,7 +1992,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.funcptr"); else - value = Builder.CreateInBoundsGEP(value, amt, "incdec.funcptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false, + isSubtraction, E->getExprLoc(), + "incdec.funcptr"); value = Builder.CreateBitCast(value, input->getType()); // For everything else, we can just do a simple increment. @@ -1852,7 +2003,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.ptr"); else - value = Builder.CreateInBoundsGEP(value, amt, "incdec.ptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false, + isSubtraction, E->getExprLoc(), + "incdec.ptr"); } // Vector increment/decrement. @@ -1933,7 +2086,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, sizeValue, "incdec.objptr"); else - value = Builder.CreateInBoundsGEP(value, sizeValue, "incdec.objptr"); + value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, + /*SignedIndices=*/false, isSubtraction, + E->getExprLoc(), "incdec.objptr"); value = Builder.CreateBitCast(value, input->getType()); } @@ -1975,7 +2130,7 @@ Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType()); BinOp.Ty = E->getType(); BinOp.Opcode = BO_Sub; - BinOp.FPContractable = false; + // FIXME: once UnaryOperator carries FPFeatures, copy it here. BinOp.E = E; return EmitSub(BinOp); } @@ -2196,7 +2351,7 @@ BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) { Result.RHS = Visit(E->getRHS()); Result.Ty = E->getType(); Result.Opcode = E->getOpcode(); - Result.FPContractable = E->isFPContractable(); + Result.FPFeatures = E->getFPFeatures(); Result.E = E; return Result; } @@ -2216,7 +2371,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( OpInfo.RHS = Visit(E->getRHS()); OpInfo.Ty = E->getComputationResultType(); OpInfo.Opcode = E->getOpcode(); - OpInfo.FPContractable = E->isFPContractable(); + OpInfo.FPFeatures = E->getFPFeatures(); OpInfo.E = E; // Load/convert the LHS. LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); @@ -2350,7 +2505,8 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck( const auto *BO = cast<BinaryOperator>(Ops.E); if (CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow) && Ops.Ty->hasSignedIntegerRepresentation() && - !IsWidenedIntegerOp(CGF.getContext(), BO->getLHS())) { + !IsWidenedIntegerOp(CGF.getContext(), BO->getLHS()) && + Ops.mayHaveIntegerOverflow()) { llvm::IntegerType *Ty = cast<llvm::IntegerType>(Zero->getType()); llvm::Value *IntMin = @@ -2373,11 +2529,13 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) { CodeGenFunction::SanitizerScope SanScope(&CGF); if ((CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero) || CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) && - Ops.Ty->isIntegerType()) { + Ops.Ty->isIntegerType() && + (Ops.mayHaveIntegerDivisionByZero() || Ops.mayHaveIntegerOverflow())) { llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty)); EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, true); } else if (CGF.SanOpts.has(SanitizerKind::FloatDivideByZero) && - Ops.Ty->isRealFloatingType()) { + Ops.Ty->isRealFloatingType() && + Ops.mayHaveFloatDivisionByZero()) { llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty)); llvm::Value *NonZero = Builder.CreateFCmpUNE(Ops.RHS, Zero); EmitBinOpCheck(std::make_pair(NonZero, SanitizerKind::FloatDivideByZero), @@ -2412,7 +2570,8 @@ Value *ScalarExprEmitter::EmitRem(const BinOpInfo &Ops) { // Rem in C can't be a floating point type: C99 6.5.5p2. if ((CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero) || CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) && - Ops.Ty->isIntegerType()) { + Ops.Ty->isIntegerType() && + (Ops.mayHaveIntegerDivisionByZero() || Ops.mayHaveIntegerOverflow())) { CodeGenFunction::SanitizerScope SanScope(&CGF); llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty)); EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false); @@ -2455,6 +2614,7 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) { if (isSigned) OpID |= 1; + CodeGenFunction::SanitizerScope SanScope(&CGF); llvm::Type *opTy = CGF.CGM.getTypes().ConvertType(Ops.Ty); llvm::Function *intrinsic = CGF.CGM.getIntrinsic(IID, opTy); @@ -2470,7 +2630,6 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) { // If the signed-integer-overflow sanitizer is enabled, emit a call to its // runtime. Otherwise, this is a -ftrapv check, so just emit a trap. if (!isSigned || CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) { - CodeGenFunction::SanitizerScope SanScope(&CGF); llvm::Value *NotOverflow = Builder.CreateNot(overflow); SanitizerMask Kind = isSigned ? SanitizerKind::SignedIntegerOverflow : SanitizerKind::UnsignedIntegerOverflow; @@ -2546,13 +2705,38 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, std::swap(pointerOperand, indexOperand); } + bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); + unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth(); auto &DL = CGF.CGM.getDataLayout(); auto PtrTy = cast<llvm::PointerType>(pointer->getType()); + + // Some versions of glibc and gcc use idioms (particularly in their malloc + // routines) that add a pointer-sized integer (known to be a pointer value) + // to a null pointer in order to cast the value back to an integer or as + // part of a pointer alignment algorithm. This is undefined behavior, but + // we'd like to be able to compile programs that use it. + // + // Normally, we'd generate a GEP with a null-pointer base here in response + // to that code, but it's also UB to dereference a pointer created that + // way. Instead (as an acknowledged hack to tolerate the idiom) we will + // generate a direct cast of the integer value to a pointer. + // + // The idiom (p = nullptr + N) is not met if any of the following are true: + // + // The operation is subtraction. + // The index is not pointer-sized. + // The pointer type is not byte-sized. + // + if (BinaryOperator::isNullPointerArithmeticExtension(CGF.getContext(), + op.Opcode, + expr->getLHS(), + expr->getRHS())) + return CGF.Builder.CreateIntToPtr(index, pointer->getType()); + if (width != DL.getTypeSizeInBits(PtrTy)) { // Zero-extend or sign-extend the pointer value according to // whether the index is signed or not. - bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); index = CGF.Builder.CreateIntCast(index, DL.getIntPtrType(PtrTy), isSigned, "idx.ext"); } @@ -2596,7 +2780,9 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, pointer = CGF.Builder.CreateGEP(pointer, index, "add.ptr"); } else { index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index"); - pointer = CGF.Builder.CreateInBoundsGEP(pointer, index, "add.ptr"); + pointer = + CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction, + op.E->getExprLoc(), "add.ptr"); } return pointer; } @@ -2613,7 +2799,8 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, if (CGF.getLangOpts().isSignedOverflowDefined()) return CGF.Builder.CreateGEP(pointer, index, "add.ptr"); - return CGF.Builder.CreateInBoundsGEP(pointer, index, "add.ptr"); + return CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction, + op.E->getExprLoc(), "add.ptr"); } // Construct an fmuladd intrinsic to represent a fused mul-add of MulOp and @@ -2663,12 +2850,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op, "Only fadd/fsub can be the root of an fmuladd."); // Check whether this op is marked as fusable. - if (!op.FPContractable) - return nullptr; - - // Check whether -ffp-contract=on. (If -ffp-contract=off/fast, fusing is - // either disabled, or handled entirely by the LLVM backend). - if (CGF.CGM.getCodeGenOpts().getFPContractMode() != CodeGenOptions::FPC_On) + if (!op.FPFeatures.allowFPContractWithinStatement()) return nullptr; // We have a potentially fusable op. Look for a mul on one of the operands. @@ -2691,7 +2873,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op, Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { if (op.LHS->getType()->isPointerTy() || op.RHS->getType()->isPointerTy()) - return emitPointerArithmetic(CGF, op, /*subtraction*/ false); + return emitPointerArithmetic(CGF, op, CodeGenFunction::NotSubtraction); if (op.Ty->isSignedIntegerOrEnumerationType()) { switch (CGF.getLangOpts().getSignedOverflowBehavior()) { @@ -2718,7 +2900,8 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder)) return FMulAdd; - return Builder.CreateFAdd(op.LHS, op.RHS, "add"); + Value *V = Builder.CreateFAdd(op.LHS, op.RHS, "add"); + return propagateFMFlags(V, op); } return Builder.CreateAdd(op.LHS, op.RHS, "add"); @@ -2751,7 +2934,8 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { // Try to form an fmuladd. if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true)) return FMulAdd; - return Builder.CreateFSub(op.LHS, op.RHS, "sub"); + Value *V = Builder.CreateFSub(op.LHS, op.RHS, "sub"); + return propagateFMFlags(V, op); } return Builder.CreateSub(op.LHS, op.RHS, "sub"); @@ -2760,7 +2944,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { // If the RHS is not a pointer, then we have normal pointer // arithmetic. if (!op.RHS->getType()->isPointerTy()) - return emitPointerArithmetic(CGF, op, /*subtraction*/ true); + return emitPointerArithmetic(CGF, op, CodeGenFunction::IsSubtraction); // Otherwise, this is a pointer subtraction. @@ -2936,16 +3120,25 @@ static llvm::Intrinsic::ID GetIntrinsic(IntrinsicType IT, return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpequh_p : llvm::Intrinsic::ppc_altivec_vcmpgtsh_p; case BuiltinType::UInt: - case BuiltinType::ULong: return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpequw_p : llvm::Intrinsic::ppc_altivec_vcmpgtuw_p; case BuiltinType::Int: - case BuiltinType::Long: return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpequw_p : llvm::Intrinsic::ppc_altivec_vcmpgtsw_p; + case BuiltinType::ULong: + case BuiltinType::ULongLong: + return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpequd_p : + llvm::Intrinsic::ppc_altivec_vcmpgtud_p; + case BuiltinType::Long: + case BuiltinType::LongLong: + return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpequd_p : + llvm::Intrinsic::ppc_altivec_vcmpgtsd_p; case BuiltinType::Float: return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpeqfp_p : llvm::Intrinsic::ppc_altivec_vcmpgtfp_p; + case BuiltinType::Double: + return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_vsx_xvcmpeqdp_p : + llvm::Intrinsic::ppc_vsx_xvcmpgtdp_p; } } @@ -3030,6 +3223,16 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, Value *CR6Param = Builder.getInt32(CR6); llvm::Function *F = CGF.CGM.getIntrinsic(ID); Result = Builder.CreateCall(F, {CR6Param, FirstVecArg, SecondVecArg}); + + // The result type of intrinsic may not be same as E->getType(). + // If E->getType() is not BoolTy, EmitScalarConversion will do the + // conversion work. If E->getType() is BoolTy, EmitScalarConversion will + // do nothing, if ResultTy is not i1 at the same time, it will cause + // crash later. + llvm::IntegerType *ResultTy = cast<llvm::IntegerType>(Result->getType()); + if (ResultTy->getBitWidth() > 1 && + E->getType() == CGF.getContext().BoolTy) + Result = Builder.CreateTrunc(Result, Builder.getInt1Ty()); return EmitScalarConversion(Result, CGF.getContext().BoolTy, E->getType(), E->getExprLoc()); } @@ -3589,8 +3792,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // vector to get a vec4, then a bitcast if the target type is different. if (NumElementsSrc == 3 && NumElementsDst != 3) { Src = ConvertVec3AndVec4(Builder, CGF, Src, 4); - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - DstTy); + + if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + DstTy); + } + Src->setName("astype"); return Src; } @@ -3599,9 +3806,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // to vec4 if the original type is not vec4, then a shuffle vector to // get a vec3. if (NumElementsSrc != 3 && NumElementsDst == 3) { - auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - Vec4Ty); + if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { + auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + Vec4Ty); + } + Src = ConvertVec3AndVec4(Builder, CGF, Src, 3); Src->setName("astype"); return Src; @@ -3724,3 +3934,136 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue( llvm_unreachable("Unhandled compound assignment operator"); } + +Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, + ArrayRef<Value *> IdxList, + bool SignedIndices, + bool IsSubtraction, + SourceLocation Loc, + const Twine &Name) { + Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); + + // If the pointer overflow sanitizer isn't enabled, do nothing. + if (!SanOpts.has(SanitizerKind::PointerOverflow)) + return GEPVal; + + // If the GEP has already been reduced to a constant, leave it be. + if (isa<llvm::Constant>(GEPVal)) + return GEPVal; + + // Only check for overflows in the default address space. + if (GEPVal->getType()->getPointerAddressSpace()) + return GEPVal; + + auto *GEP = cast<llvm::GEPOperator>(GEPVal); + assert(GEP->isInBounds() && "Expected inbounds GEP"); + + SanitizerScope SanScope(this); + auto &VMContext = getLLVMContext(); + const auto &DL = CGM.getDataLayout(); + auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType()); + + // Grab references to the signed add/mul overflow intrinsics for intptr_t. + auto *Zero = llvm::ConstantInt::getNullValue(IntPtrTy); + auto *SAddIntrinsic = + CGM.getIntrinsic(llvm::Intrinsic::sadd_with_overflow, IntPtrTy); + auto *SMulIntrinsic = + CGM.getIntrinsic(llvm::Intrinsic::smul_with_overflow, IntPtrTy); + + // The total (signed) byte offset for the GEP. + llvm::Value *TotalOffset = nullptr; + // The offset overflow flag - true if the total offset overflows. + llvm::Value *OffsetOverflows = Builder.getFalse(); + + /// Return the result of the given binary operation. + auto eval = [&](BinaryOperator::Opcode Opcode, llvm::Value *LHS, + llvm::Value *RHS) -> llvm::Value * { + assert((Opcode == BO_Add || Opcode == BO_Mul) && "Can't eval binop"); + + // If the operands are constants, return a constant result. + if (auto *LHSCI = dyn_cast<llvm::ConstantInt>(LHS)) { + if (auto *RHSCI = dyn_cast<llvm::ConstantInt>(RHS)) { + llvm::APInt N; + bool HasOverflow = mayHaveIntegerOverflow(LHSCI, RHSCI, Opcode, + /*Signed=*/true, N); + if (HasOverflow) + OffsetOverflows = Builder.getTrue(); + return llvm::ConstantInt::get(VMContext, N); + } + } + + // Otherwise, compute the result with checked arithmetic. + auto *ResultAndOverflow = Builder.CreateCall( + (Opcode == BO_Add) ? SAddIntrinsic : SMulIntrinsic, {LHS, RHS}); + OffsetOverflows = Builder.CreateOr( + Builder.CreateExtractValue(ResultAndOverflow, 1), OffsetOverflows); + return Builder.CreateExtractValue(ResultAndOverflow, 0); + }; + + // Determine the total byte offset by looking at each GEP operand. + for (auto GTI = llvm::gep_type_begin(GEP), GTE = llvm::gep_type_end(GEP); + GTI != GTE; ++GTI) { + llvm::Value *LocalOffset; + auto *Index = GTI.getOperand(); + // Compute the local offset contributed by this indexing step: + if (auto *STy = GTI.getStructTypeOrNull()) { + // For struct indexing, the local offset is the byte position of the + // specified field. + unsigned FieldNo = cast<llvm::ConstantInt>(Index)->getZExtValue(); + LocalOffset = llvm::ConstantInt::get( + IntPtrTy, DL.getStructLayout(STy)->getElementOffset(FieldNo)); + } else { + // Otherwise this is array-like indexing. The local offset is the index + // multiplied by the element size. + auto *ElementSize = llvm::ConstantInt::get( + IntPtrTy, DL.getTypeAllocSize(GTI.getIndexedType())); + auto *IndexS = Builder.CreateIntCast(Index, IntPtrTy, /*isSigned=*/true); + LocalOffset = eval(BO_Mul, ElementSize, IndexS); + } + + // If this is the first offset, set it as the total offset. Otherwise, add + // the local offset into the running total. + if (!TotalOffset || TotalOffset == Zero) + TotalOffset = LocalOffset; + else + TotalOffset = eval(BO_Add, TotalOffset, LocalOffset); + } + + // Common case: if the total offset is zero, don't emit a check. + if (TotalOffset == Zero) + return GEPVal; + + // Now that we've computed the total offset, add it to the base pointer (with + // wrapping semantics). + auto *IntPtr = Builder.CreatePtrToInt(GEP->getPointerOperand(), IntPtrTy); + auto *ComputedGEP = Builder.CreateAdd(IntPtr, TotalOffset); + + // The GEP is valid if: + // 1) The total offset doesn't overflow, and + // 2) The sign of the difference between the computed address and the base + // pointer matches the sign of the total offset. + llvm::Value *ValidGEP; + auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows); + if (SignedIndices) { + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero); + llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); + ValidGEP = Builder.CreateAnd( + Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid), + NoOffsetOverflow); + } else if (!SignedIndices && !IsSubtraction) { + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow); + } else { + auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr); + ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow); + } + + llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)}; + // Pass the computed GEP to the runtime to avoid emitting poisoned arguments. + llvm::Value *DynamicArgs[] = {IntPtr, ComputedGEP}; + EmitCheck(std::make_pair(ValidGEP, SanitizerKind::PointerOverflow), + SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs); + + return GEPVal; +} diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index a67131af5d..f26263d947 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -117,10 +117,24 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, const ObjCArrayLiteral *ALE = dyn_cast<ObjCArrayLiteral>(E); if (!ALE) DLE = cast<ObjCDictionaryLiteral>(E); - - // Compute the type of the array we're initializing. + + // Optimize empty collections by referencing constants, when available. uint64_t NumElements = ALE ? ALE->getNumElements() : DLE->getNumElements(); + if (NumElements == 0 && CGM.getLangOpts().ObjCRuntime.hasEmptyCollections()) { + StringRef ConstantName = ALE ? "__NSArray0__" : "__NSDictionary0__"; + QualType IdTy(CGM.getContext().getObjCIdType()); + llvm::Constant *Constant = + CGM.CreateRuntimeVariable(ConvertType(IdTy), ConstantName); + LValue LV = MakeNaturalAlignAddrLValue(Constant, IdTy); + llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getLocStart()); + cast<llvm::LoadInst>(Ptr)->setMetadata( + CGM.getModule().getMDKindID("invariant.load"), + llvm::MDNode::get(getLLVMContext(), None)); + return Builder.CreateBitCast(Ptr, ConvertType(E->getType())); + } + + // Compute the type of the array we're initializing. llvm::APInt APNumElements(Context.getTypeSize(Context.getSizeType()), NumElements); QualType ElementType = Context.getObjCIdType().withConst(); @@ -1316,7 +1330,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, BinaryOperator assign(&ivarRef, finalArg, BO_Assign, ivarRef.getType(), VK_RValue, OK_Ordinary, - SourceLocation(), false); + SourceLocation(), FPOptions()); EmitStmt(&assign); } @@ -1469,6 +1483,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ if (DI) DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin()); + RunCleanupsScope ForScope(*this); + // The local variable comes into scope immediately. AutoVarEmission variable = AutoVarEmission::invalid(); if (const DeclStmt *SD = dyn_cast<DeclStmt>(S.getElement())) @@ -1499,8 +1515,6 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ ArrayType::Normal, 0); Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr"); - RunCleanupsScope ForScope(*this); - // Emit the collection pointer. In ARC, we do a retain. llvm::Value *Collection; if (getLangOpts().ObjCAutoRefCount) { @@ -1532,16 +1546,15 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ getContext().getPointerType(ItemsTy)); // The third argument is the capacity of that temporary array. - llvm::Type *UnsignedLongLTy = ConvertType(getContext().UnsignedLongTy); - llvm::Constant *Count = llvm::ConstantInt::get(UnsignedLongLTy, NumItems); - Args.add(RValue::get(Count), getContext().UnsignedLongTy); + llvm::Type *NSUIntegerTy = ConvertType(getContext().getNSUIntegerType()); + llvm::Constant *Count = llvm::ConstantInt::get(NSUIntegerTy, NumItems); + Args.add(RValue::get(Count), getContext().getNSUIntegerType()); // Start the enumeration. RValue CountRV = - CGM.getObjCRuntime().GenerateMessageSend(*this, ReturnValueSlot(), - getContext().UnsignedLongTy, - FastEnumSel, - Collection, Args); + CGM.getObjCRuntime().GenerateMessageSend(*this, ReturnValueSlot(), + getContext().getNSUIntegerType(), + FastEnumSel, Collection, Args); // The initial number of objects that were returned in the buffer. llvm::Value *initialBufferLimit = CountRV.getScalarVal(); @@ -1549,7 +1562,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ llvm::BasicBlock *EmptyBB = createBasicBlock("forcoll.empty"); llvm::BasicBlock *LoopInitBB = createBasicBlock("forcoll.loopinit"); - llvm::Value *zero = llvm::Constant::getNullValue(UnsignedLongLTy); + llvm::Value *zero = llvm::Constant::getNullValue(NSUIntegerTy); // If the limit pointer was zero to begin with, the collection is // empty; skip all this. Set the branch weight assuming this has the same @@ -1581,11 +1594,11 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ EmitBlock(LoopBodyBB); // The current index into the buffer. - llvm::PHINode *index = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.index"); + llvm::PHINode *index = Builder.CreatePHI(NSUIntegerTy, 3, "forcoll.index"); index->addIncoming(zero, LoopInitBB); // The current buffer size. - llvm::PHINode *count = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.count"); + llvm::PHINode *count = Builder.CreatePHI(NSUIntegerTy, 3, "forcoll.count"); count->addIncoming(initialBufferLimit, LoopInitBB); incrementProfileCounter(&S); @@ -1695,8 +1708,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ llvm::BasicBlock *FetchMoreBB = createBasicBlock("forcoll.refetch"); // First we check in the local buffer. - llvm::Value *indexPlusOne - = Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1)); + llvm::Value *indexPlusOne = + Builder.CreateAdd(index, llvm::ConstantInt::get(NSUIntegerTy, 1)); // If we haven't overrun the buffer yet, we can continue. // Set the branch weights based on the simplifying assumption that this is @@ -1713,10 +1726,9 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ EmitBlock(FetchMoreBB); CountRV = - CGM.getObjCRuntime().GenerateMessageSend(*this, ReturnValueSlot(), - getContext().UnsignedLongTy, - FastEnumSel, - Collection, Args); + CGM.getObjCRuntime().GenerateMessageSend(*this, ReturnValueSlot(), + getContext().getNSUIntegerType(), + FastEnumSel, Collection, Args); // If we got a zero count, we're done. llvm::Value *refetchCount = CountRV.getScalarVal(); @@ -1836,12 +1848,8 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM, F->addFnAttr(llvm::Attribute::NonLazyBind); } - if (IsForwarding(Name)) { - llvm::AttrBuilder B; - B.addAttribute(llvm::Attribute::Returned); - - F->arg_begin()->addAttr(llvm::AttributeSet::get(F->getContext(), 1, B)); - } + if (IsForwarding(Name)) + F->arg_begin()->addAttr(llvm::Attribute::Returned); } return RTF; @@ -2405,6 +2413,12 @@ void CodeGenFunction::destroyARCWeak(CodeGenFunction &CGF, CGF.EmitARCDestroyWeak(addr); } +void CodeGenFunction::emitARCIntrinsicUse(CodeGenFunction &CGF, Address addr, + QualType type) { + llvm::Value *value = CGF.Builder.CreateLoad(addr); + CGF.EmitARCIntrinsicUse(value); +} + namespace { struct CallObjCAutoreleasePoolObject final : EHScopeStack::Cleanup { llvm::Value *Token; @@ -3230,10 +3244,12 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( SrcTy = C.getPointerType(SrcTy); FunctionArgList args; - ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy); - args.push_back(&dstDecl); - ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy); - args.push_back(&srcDecl); + ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, + DestTy, ImplicitParamDecl::Other); + args.push_back(&DstDecl); + ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, + SrcTy, ImplicitParamDecl::Other); + args.push_back(&SrcDecl); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); @@ -3249,12 +3265,12 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( StartFunction(FD, C.VoidTy, Fn, FI, args); - DeclRefExpr DstExpr(&dstDecl, false, DestTy, + DeclRefExpr DstExpr(&DstDecl, false, DestTy, VK_RValue, SourceLocation()); UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(), VK_LValue, OK_Ordinary, SourceLocation()); - DeclRefExpr SrcExpr(&srcDecl, false, SrcTy, + DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy, VK_RValue, SourceLocation()); UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(), VK_LValue, OK_Ordinary, SourceLocation()); @@ -3263,7 +3279,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment()); CXXOperatorCallExpr TheCall(C, OO_Equal, CalleeExp->getCallee(), Args, DestTy->getPointeeType(), - VK_LValue, SourceLocation(), false); + VK_LValue, SourceLocation(), FPOptions()); EmitStmt(&TheCall); @@ -3311,10 +3327,12 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( SrcTy = C.getPointerType(SrcTy); FunctionArgList args; - ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy); - args.push_back(&dstDecl); - ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy); - args.push_back(&srcDecl); + ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, + DestTy, ImplicitParamDecl::Other); + args.push_back(&DstDecl); + ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, + SrcTy, ImplicitParamDecl::Other); + args.push_back(&SrcDecl); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); @@ -3329,7 +3347,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( StartFunction(FD, C.VoidTy, Fn, FI, args); - DeclRefExpr SrcExpr(&srcDecl, false, SrcTy, + DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy, VK_RValue, SourceLocation()); UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(), @@ -3355,7 +3373,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( CXXConstExpr->getConstructionKind(), SourceRange()); - DeclRefExpr DstExpr(&dstDecl, false, DestTy, + DeclRefExpr DstExpr(&DstDecl, false, DestTy, VK_RValue, SourceLocation()); RValue DV = EmitAnyExpr(&DstExpr); @@ -3416,4 +3434,37 @@ CodeGenFunction::EmitBuiltinAvailable(ArrayRef<llvm::Value *> Args) { return Builder.CreateICmpNE(CallRes, llvm::Constant::getNullValue(Int32Ty)); } +void CodeGenModule::emitAtAvailableLinkGuard() { + if (!IsOSVersionAtLeastFn) + return; + // @available requires CoreFoundation only on Darwin. + if (!Target.getTriple().isOSDarwin()) + return; + // Add -framework CoreFoundation to the linker commands. We still want to + // emit the core foundation reference down below because otherwise if + // CoreFoundation is not used in the code, the linker won't link the + // framework. + auto &Context = getLLVMContext(); + llvm::Metadata *Args[2] = {llvm::MDString::get(Context, "-framework"), + llvm::MDString::get(Context, "CoreFoundation")}; + LinkerOptionsMetadata.push_back(llvm::MDNode::get(Context, Args)); + // Emit a reference to a symbol from CoreFoundation to ensure that + // CoreFoundation is linked into the final binary. + llvm::FunctionType *FTy = + llvm::FunctionType::get(Int32Ty, {VoidPtrTy}, false); + llvm::Constant *CFFunc = + CreateRuntimeFunction(FTy, "CFBundleGetVersionNumber"); + + llvm::FunctionType *CheckFTy = llvm::FunctionType::get(VoidTy, {}, false); + llvm::Function *CFLinkCheckFunc = cast<llvm::Function>(CreateBuiltinFunction( + CheckFTy, "__clang_at_available_requires_core_foundation_framework")); + CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility); + CodeGenFunction CGF(*this); + CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc)); + CGF.EmitNounwindRuntimeCall(CFFunc, llvm::Constant::getNullValue(VoidPtrTy)); + CGF.Builder.CreateUnreachable(); + addCompilerUsedGlobal(CFLinkCheckFunc); +} + CGObjCRuntime::~CGObjCRuntime() {} diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index 9f6ccb4b5d..c8b8be7f45 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -34,7 +34,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Compiler.h" -#include <cstdarg> using namespace clang; using namespace CodeGen; @@ -58,18 +57,19 @@ public: /// Initialises the lazy function with the name, return type, and the types /// of the arguments. - LLVM_END_WITH_NULL - void init(CodeGenModule *Mod, const char *name, llvm::Type *RetTy, ...) { + template <typename... Tys> + void init(CodeGenModule *Mod, const char *name, llvm::Type *RetTy, + Tys *... Types) { CGM = Mod; FunctionName = name; Function = nullptr; - std::vector<llvm::Type *> ArgTys; - va_list Args; - va_start(Args, RetTy); - while (llvm::Type *ArgTy = va_arg(Args, llvm::Type *)) - ArgTys.push_back(ArgTy); - va_end(Args); - FTy = llvm::FunctionType::get(RetTy, ArgTys, false); + if(sizeof...(Tys)) { + SmallVector<llvm::Type *, 8> ArgTys({Types...}); + FTy = llvm::FunctionType::get(RetTy, ArgTys, false); + } + else { + FTy = llvm::FunctionType::get(RetTy, None, false); + } } llvm::FunctionType *getType() { return FTy; } @@ -603,11 +603,10 @@ protected: public: CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) { // IMP objc_msg_lookup(id, SEL); - MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, - nullptr); + MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy); // IMP objc_msg_lookup_super(struct objc_super*, SEL); MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, - PtrToObjCSuperTy, SelectorTy, nullptr); + PtrToObjCSuperTy, SelectorTy); } }; @@ -663,7 +662,7 @@ class CGObjCGNUstep : public CGObjCGNU { } // The lookup function is guaranteed not to capture the receiver pointer. - LookupFn->setDoesNotCapture(1); + LookupFn->addParamAttr(0, llvm::Attribute::NoCapture); llvm::Value *args[] = { EnforceType(Builder, ReceiverPtr.getPointer(), PtrToIdTy), @@ -702,52 +701,51 @@ class CGObjCGNUstep : public CGObjCGNU { CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) { const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime; - llvm::StructType *SlotStructTy = llvm::StructType::get(PtrTy, - PtrTy, PtrTy, IntTy, IMPTy, nullptr); + llvm::StructType *SlotStructTy = + llvm::StructType::get(PtrTy, PtrTy, PtrTy, IntTy, IMPTy); SlotTy = llvm::PointerType::getUnqual(SlotStructTy); // Slot_t objc_msg_lookup_sender(id *receiver, SEL selector, id sender); SlotLookupFn.init(&CGM, "objc_msg_lookup_sender", SlotTy, PtrToIdTy, - SelectorTy, IdTy, nullptr); + SelectorTy, IdTy); // Slot_t objc_msg_lookup_super(struct objc_super*, SEL); SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy, - PtrToObjCSuperTy, SelectorTy, nullptr); + PtrToObjCSuperTy, SelectorTy); // If we're in ObjC++ mode, then we want to make if (CGM.getLangOpts().CPlusPlus) { llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // void *__cxa_begin_catch(void *e) - EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy, nullptr); + EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy); // void __cxa_end_catch(void) - ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy, nullptr); + ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy); // void _Unwind_Resume_or_Rethrow(void*) ExceptionReThrowFn.init(&CGM, "_Unwind_Resume_or_Rethrow", VoidTy, - PtrTy, nullptr); + PtrTy); } else if (R.getVersion() >= VersionTuple(1, 7)) { llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // id objc_begin_catch(void *e) - EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy, nullptr); + EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy); // void objc_end_catch(void) - ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy, nullptr); + ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy); // void _Unwind_Resume_or_Rethrow(void*) - ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy, - PtrTy, nullptr); + ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy, PtrTy); } llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); SetPropertyAtomic.init(&CGM, "objc_setProperty_atomic", VoidTy, IdTy, - SelectorTy, IdTy, PtrDiffTy, nullptr); + SelectorTy, IdTy, PtrDiffTy); SetPropertyAtomicCopy.init(&CGM, "objc_setProperty_atomic_copy", VoidTy, - IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr); + IdTy, SelectorTy, IdTy, PtrDiffTy); SetPropertyNonAtomic.init(&CGM, "objc_setProperty_nonatomic", VoidTy, - IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr); + IdTy, SelectorTy, IdTy, PtrDiffTy); SetPropertyNonAtomicCopy.init(&CGM, "objc_setProperty_nonatomic_copy", - VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr); + VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy); // void objc_setCppObjectAtomic(void *dest, const void *src, void // *helper); CxxAtomicObjectSetFn.init(&CGM, "objc_setCppObjectAtomic", VoidTy, PtrTy, - PtrTy, PtrTy, nullptr); + PtrTy, PtrTy); // void objc_getCppObjectAtomic(void *dest, const void *src, void // *helper); CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy, - PtrTy, PtrTy, nullptr); + PtrTy, PtrTy); } llvm::Constant *GetCppAtomicObjectGetFunction() override { @@ -849,14 +847,14 @@ protected: public: CGObjCObjFW(CodeGenModule &Mod): CGObjCGNU(Mod, 9, 3) { // IMP objc_msg_lookup(id, SEL); - MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, nullptr); + MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy); MsgLookupFnSRet.init(&CGM, "objc_msg_lookup_stret", IMPTy, IdTy, - SelectorTy, nullptr); + SelectorTy); // IMP objc_msg_lookup_super(struct objc_super*, SEL); MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, - PtrToObjCSuperTy, SelectorTy, nullptr); + PtrToObjCSuperTy, SelectorTy); MsgLookupSuperFnSRet.init(&CGM, "objc_msg_lookup_super_stret", IMPTy, - PtrToObjCSuperTy, SelectorTy, nullptr); + PtrToObjCSuperTy, SelectorTy); } }; } // end anonymous namespace @@ -945,35 +943,34 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, } PtrToIdTy = llvm::PointerType::getUnqual(IdTy); - ObjCSuperTy = llvm::StructType::get(IdTy, IdTy, nullptr); + ObjCSuperTy = llvm::StructType::get(IdTy, IdTy); PtrToObjCSuperTy = llvm::PointerType::getUnqual(ObjCSuperTy); llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // void objc_exception_throw(id); - ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr); - ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr); + ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy); + ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy); // int objc_sync_enter(id); - SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy, nullptr); + SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy); // int objc_sync_exit(id); - SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy, nullptr); + SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy); // void objc_enumerationMutation (id) - EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy, - IdTy, nullptr); + EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy, IdTy); // id objc_getProperty(id, SEL, ptrdiff_t, BOOL) GetPropertyFn.init(&CGM, "objc_getProperty", IdTy, IdTy, SelectorTy, - PtrDiffTy, BoolTy, nullptr); + PtrDiffTy, BoolTy); // void objc_setProperty(id, SEL, ptrdiff_t, id, BOOL, BOOL) SetPropertyFn.init(&CGM, "objc_setProperty", VoidTy, IdTy, SelectorTy, - PtrDiffTy, IdTy, BoolTy, BoolTy, nullptr); + PtrDiffTy, IdTy, BoolTy, BoolTy); // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL) - GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy, - PtrDiffTy, BoolTy, BoolTy, nullptr); + GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy, + PtrDiffTy, BoolTy, BoolTy); // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL) - SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy, - PtrDiffTy, BoolTy, BoolTy, nullptr); + SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy, + PtrDiffTy, BoolTy, BoolTy); // IMP type llvm::Type *IMPArgs[] = { IdTy, SelectorTy }; @@ -997,21 +994,19 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, // Get functions needed in GC mode // id objc_assign_ivar(id, id, ptrdiff_t); - IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy, - nullptr); + IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy); // id objc_assign_strongCast (id, id*) StrongCastAssignFn.init(&CGM, "objc_assign_strongCast", IdTy, IdTy, - PtrToIdTy, nullptr); + PtrToIdTy); // id objc_assign_global(id, id*); - GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy, - nullptr); + GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy); // id objc_assign_weak(id, id*); - WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy, nullptr); + WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy); // id objc_read_weak(id*); - WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy, nullptr); + WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy); // void *objc_memmove_collectable(void*, void *, size_t); MemMoveFn.init(&CGM, "objc_memmove_collectable", PtrTy, PtrTy, PtrTy, - SizeTy, nullptr); + SizeTy); } } @@ -1317,7 +1312,7 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, } } // Cast the pointer to a simplified version of the class structure - llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy, nullptr); + llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy); ReceiverClass = Builder.CreateBitCast(ReceiverClass, llvm::PointerType::getUnqual(CastTy)); // Get the superclass pointer @@ -1326,8 +1321,8 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, ReceiverClass = Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign()); // Construct the structure used to look up the IMP - llvm::StructType *ObjCSuperTy = llvm::StructType::get( - Receiver->getType(), IdTy, nullptr); + llvm::StructType *ObjCSuperTy = + llvm::StructType::get(Receiver->getType(), IdTy); // FIXME: Is this really supposed to be a dynamic alloca? Address ObjCSuper = Address(Builder.CreateAlloca(ObjCSuperTy), @@ -1565,11 +1560,8 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames, IvarList.addInt(IntTy, (int)IvarNames.size()); // Get the ivar structure type. - llvm::StructType *ObjCIvarTy = llvm::StructType::get( - PtrToInt8Ty, - PtrToInt8Ty, - IntTy, - nullptr); + llvm::StructType *ObjCIvarTy = + llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, IntTy); // Array of ivar structures. auto Ivars = IvarList.beginArray(ObjCIvarTy); @@ -1611,7 +1603,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure( // anyway; the classes will still work with the GNU runtime, they will just // be ignored. llvm::StructType *ClassTy = llvm::StructType::get( - PtrToInt8Ty, // isa + PtrToInt8Ty, // isa PtrToInt8Ty, // super_class PtrToInt8Ty, // name LongTy, // version @@ -1620,18 +1612,18 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure( IVars->getType(), // ivars Methods->getType(), // methods // These are all filled in by the runtime, so we pretend - PtrTy, // dtable - PtrTy, // subclass_list - PtrTy, // sibling_class - PtrTy, // protocols - PtrTy, // gc_object_type + PtrTy, // dtable + PtrTy, // subclass_list + PtrTy, // sibling_class + PtrTy, // protocols + PtrTy, // gc_object_type // New ABI: LongTy, // abi_version IvarOffsets->getType(), // ivar_offsets Properties->getType(), // properties IntPtrTy, // strong_pointers - IntPtrTy, // weak_pointers - nullptr); + IntPtrTy // weak_pointers + ); ConstantInitBuilder Builder(CGM); auto Elements = Builder.beginStruct(ClassTy); diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index 29b45e325b..992da81409 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -64,13 +64,11 @@ private: // Add the non-lazy-bind attribute, since objc_msgSend is likely to // be called a lot. llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; - return - CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, - params, true), - "objc_msgSend", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NonLazyBind)); + return CGM.CreateRuntimeFunction( + llvm::FunctionType::get(ObjectPtrTy, params, true), "objc_msgSend", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NonLazyBind)); } /// void objc_msgSend_stret (id, SEL, ...) @@ -107,8 +105,8 @@ private: llvm::Constant *getMessageSendFp2retFn() const { llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; llvm::Type *longDoubleType = llvm::Type::getX86_FP80Ty(VMContext); - llvm::Type *resultType = - llvm::StructType::get(longDoubleType, longDoubleType, nullptr); + llvm::Type *resultType = + llvm::StructType::get(longDoubleType, longDoubleType); return CGM.CreateRuntimeFunction(llvm::FunctionType::get(resultType, params, true), @@ -310,7 +308,7 @@ public: SmallVector<CanQualType,5> Params; Params.push_back(Ctx.VoidPtrTy); Params.push_back(Ctx.VoidPtrTy); - Params.push_back(Ctx.LongTy); + Params.push_back(Ctx.getSizeType()); Params.push_back(Ctx.BoolTy); Params.push_back(Ctx.BoolTy); llvm::FunctionType *FTy = @@ -589,13 +587,11 @@ public: llvm::Constant *getSetJmpFn() { // This is specifically the prototype for x86. llvm::Type *params[] = { CGM.Int32Ty->getPointerTo() }; - return - CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, - params, false), - "_setjmp", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NonLazyBind)); + return CGM.CreateRuntimeFunction( + llvm::FunctionType::get(CGM.Int32Ty, params, false), "_setjmp", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NonLazyBind)); } public: @@ -890,7 +886,7 @@ protected: /// Cached reference to the class for constant strings. This value has type /// int * but is actually an Obj-C class pointer. - llvm::WeakVH ConstantStringClassRef; + llvm::WeakTrackingVH ConstantStringClassRef; /// \brief The LLVM type corresponding to NSConstantString. llvm::StructType *NSConstantStringType = nullptr; @@ -1008,6 +1004,8 @@ protected: const ObjCInterfaceDecl *ID, ObjCCommonTypesHelper &ObjCTypes); + std::string GetSectionName(StringRef Section, StringRef MachOAttributes); + public: /// CreateMetadataVar - Create a global variable with internal /// linkage for use by the Objective-C runtime. @@ -1680,7 +1678,10 @@ struct NullReturnState { /// Complete the null-return operation. It is valid to call this /// regardless of whether 'init' has been called. - RValue complete(CodeGenFunction &CGF, RValue result, QualType resultType, + RValue complete(CodeGenFunction &CGF, + ReturnValueSlot returnSlot, + RValue result, + QualType resultType, const CallArgList &CallArgs, const ObjCMethodDecl *Method) { // If we never had to do a null-check, just use the raw result. @@ -1747,7 +1748,8 @@ struct NullReturnState { // memory or (2) agg values in registers. if (result.isAggregate()) { assert(result.isAggregate() && "null init of non-aggregate result?"); - CGF.EmitNullInitialization(result.getAggregateAddress(), resultType); + if (!returnSlot.isUnused()) + CGF.EmitNullInitialization(result.getAggregateAddress(), resultType); if (contBB) CGF.EmitBlock(contBB); return result; } @@ -2119,11 +2121,11 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, } } - NullReturnState nullReturn; + bool RequiresNullCheck = false; llvm::Constant *Fn = nullptr; if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) { - if (ReceiverCanBeNull) nullReturn.init(CGF, Arg0); + if (ReceiverCanBeNull) RequiresNullCheck = true; Fn = (ObjCABI == 2) ? ObjCTypes.getSendStretFn2(IsSuper) : ObjCTypes.getSendStretFn(IsSuper); } else if (CGM.ReturnTypeUsesFPRet(ResultType)) { @@ -2136,23 +2138,30 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, // arm64 uses objc_msgSend for stret methods and yet null receiver check // must be made for it. if (ReceiverCanBeNull && CGM.ReturnTypeUsesSRet(MSI.CallInfo)) - nullReturn.init(CGF, Arg0); + RequiresNullCheck = true; Fn = (ObjCABI == 2) ? ObjCTypes.getSendFn2(IsSuper) : ObjCTypes.getSendFn(IsSuper); } + // We don't need to emit a null check to zero out an indirect result if the + // result is ignored. + if (Return.isUnused()) + RequiresNullCheck = false; + // Emit a null-check if there's a consumed argument other than the receiver. - bool RequiresNullCheck = false; - if (ReceiverCanBeNull && CGM.getLangOpts().ObjCAutoRefCount && Method) { + if (!RequiresNullCheck && CGM.getLangOpts().ObjCAutoRefCount && Method) { for (const auto *ParamDecl : Method->parameters()) { if (ParamDecl->hasAttr<NSConsumedAttr>()) { - if (!nullReturn.NullBB) - nullReturn.init(CGF, Arg0); RequiresNullCheck = true; break; } } } + + NullReturnState nullReturn; + if (RequiresNullCheck) { + nullReturn.init(CGF, Arg0); + } llvm::Instruction *CallSite; Fn = llvm::ConstantExpr::getBitCast(Fn, MSI.MessengerType); @@ -2166,7 +2175,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, llvm::CallSite(CallSite).setDoesNotReturn(); } - return nullReturn.complete(CGF, rvalue, ResultType, CallArgs, + return nullReturn.complete(CGF, Return, rvalue, ResultType, CallArgs, RequiresNullCheck ? Method : nullptr); } @@ -4790,6 +4799,27 @@ llvm::Value *CGObjCMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF, /* *** Private Interface *** */ +std::string CGObjCCommonMac::GetSectionName(StringRef Section, + StringRef MachOAttributes) { + switch (CGM.getTriple().getObjectFormat()) { + default: + llvm_unreachable("unexpected object file format"); + case llvm::Triple::MachO: { + if (MachOAttributes.empty()) + return ("__DATA," + Section).str(); + return ("__DATA," + Section + "," + MachOAttributes).str(); + } + case llvm::Triple::ELF: + assert(Section.substr(0, 2) == "__" && + "expected the name to begin with __"); + return Section.substr(2).str(); + case llvm::Triple::COFF: + assert(Section.substr(0, 2) == "__" && + "expected the name to begin with __"); + return ("." + Section.substr(2) + "$B").str(); + } +} + /// EmitImageInfo - Emit the image info marker used to encode some module /// level information. /// @@ -4813,9 +4843,10 @@ enum ImageInfoFlags { void CGObjCCommonMac::EmitImageInfo() { unsigned version = 0; // Version is unused? - const char *Section = (ObjCABI == 1) ? - "__OBJC, __image_info,regular" : - "__DATA, __objc_imageinfo, regular, no_dead_strip"; + std::string Section = + (ObjCABI == 1) + ? "__OBJC,__image_info,regular" + : GetSectionName("__objc_imageinfo", "regular,no_dead_strip"); // Generate module-level named metadata to convey this information to the // linker and code-gen. @@ -4826,7 +4857,7 @@ void CGObjCCommonMac::EmitImageInfo() { Mod.addModuleFlag(llvm::Module::Error, "Objective-C Image Info Version", version); Mod.addModuleFlag(llvm::Module::Error, "Objective-C Image Info Section", - llvm::MDString::get(VMContext,Section)); + llvm::MDString::get(VMContext, Section)); if (CGM.getLangOpts().getGC() == LangOptions::NonGC) { // Non-GC overrides those files which specify GC. @@ -5053,6 +5084,11 @@ void IvarLayoutBuilder::visitField(const FieldDecl *field, // Drill down into arrays. uint64_t numElts = 1; + if (auto arrayType = CGM.getContext().getAsIncompleteArrayType(fieldType)) { + numElts = 0; + fieldType = arrayType->getElementType(); + } + // Unlike incomplete arrays, constant arrays can be nested. while (auto arrayType = CGM.getContext().getAsConstantArrayType(fieldType)) { numElts *= arrayType->getSize().getZExtValue(); fieldType = arrayType->getElementType(); @@ -5510,17 +5546,15 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm) // char *name; // char *attributes; // } - PropertyTy = llvm::StructType::create("struct._prop_t", - Int8PtrTy, Int8PtrTy, nullptr); + PropertyTy = llvm::StructType::create("struct._prop_t", Int8PtrTy, Int8PtrTy); // struct _prop_list_t { // uint32_t entsize; // sizeof(struct _prop_t) // uint32_t count_of_properties; // struct _prop_t prop_list[count_of_properties]; // } - PropertyListTy = - llvm::StructType::create("struct._prop_list_t", IntTy, IntTy, - llvm::ArrayType::get(PropertyTy, 0), nullptr); + PropertyListTy = llvm::StructType::create( + "struct._prop_list_t", IntTy, IntTy, llvm::ArrayType::get(PropertyTy, 0)); // struct _prop_list_t * PropertyListPtrTy = llvm::PointerType::getUnqual(PropertyListTy); @@ -5529,9 +5563,8 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm) // char *method_type; // char *_imp; // } - MethodTy = llvm::StructType::create("struct._objc_method", - SelectorPtrTy, Int8PtrTy, Int8PtrTy, - nullptr); + MethodTy = llvm::StructType::create("struct._objc_method", SelectorPtrTy, + Int8PtrTy, Int8PtrTy); // struct _objc_cache * CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache"); @@ -5544,17 +5577,16 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // SEL name; // char *types; // } - MethodDescriptionTy = - llvm::StructType::create("struct._objc_method_description", - SelectorPtrTy, Int8PtrTy, nullptr); + MethodDescriptionTy = llvm::StructType::create( + "struct._objc_method_description", SelectorPtrTy, Int8PtrTy); // struct _objc_method_description_list { // int count; // struct _objc_method_description[1]; // } - MethodDescriptionListTy = llvm::StructType::create( - "struct._objc_method_description_list", IntTy, - llvm::ArrayType::get(MethodDescriptionTy, 0), nullptr); + MethodDescriptionListTy = + llvm::StructType::create("struct._objc_method_description_list", IntTy, + llvm::ArrayType::get(MethodDescriptionTy, 0)); // struct _objc_method_description_list * MethodDescriptionListPtrTy = @@ -5570,11 +5602,10 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // const char ** extendedMethodTypes; // struct _objc_property_list *class_properties; // } - ProtocolExtensionTy = - llvm::StructType::create("struct._objc_protocol_extension", - IntTy, MethodDescriptionListPtrTy, - MethodDescriptionListPtrTy, PropertyListPtrTy, - Int8PtrPtrTy, PropertyListPtrTy, nullptr); + ProtocolExtensionTy = llvm::StructType::create( + "struct._objc_protocol_extension", IntTy, MethodDescriptionListPtrTy, + MethodDescriptionListPtrTy, PropertyListPtrTy, Int8PtrPtrTy, + PropertyListPtrTy); // struct _objc_protocol_extension * ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy); @@ -5586,10 +5617,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) ProtocolListTy = llvm::StructType::create(VMContext, "struct._objc_protocol_list"); - ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy), - LongTy, - llvm::ArrayType::get(ProtocolTy, 0), - nullptr); + ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy), LongTy, + llvm::ArrayType::get(ProtocolTy, 0)); // struct _objc_protocol { // struct _objc_protocol_extension *isa; @@ -5600,9 +5629,7 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // } ProtocolTy->setBody(ProtocolExtensionPtrTy, Int8PtrTy, llvm::PointerType::getUnqual(ProtocolListTy), - MethodDescriptionListPtrTy, - MethodDescriptionListPtrTy, - nullptr); + MethodDescriptionListPtrTy, MethodDescriptionListPtrTy); // struct _objc_protocol_list * ProtocolListPtrTy = llvm::PointerType::getUnqual(ProtocolListTy); @@ -5616,8 +5643,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // char *ivar_type; // int ivar_offset; // } - IvarTy = llvm::StructType::create("struct._objc_ivar", - Int8PtrTy, Int8PtrTy, IntTy, nullptr); + IvarTy = llvm::StructType::create("struct._objc_ivar", Int8PtrTy, Int8PtrTy, + IntTy); // struct _objc_ivar_list * IvarListTy = @@ -5630,9 +5657,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) MethodListPtrTy = llvm::PointerType::getUnqual(MethodListTy); // struct _objc_class_extension * - ClassExtensionTy = - llvm::StructType::create("struct._objc_class_extension", - IntTy, Int8PtrTy, PropertyListPtrTy, nullptr); + ClassExtensionTy = llvm::StructType::create( + "struct._objc_class_extension", IntTy, Int8PtrTy, PropertyListPtrTy); ClassExtensionPtrTy = llvm::PointerType::getUnqual(ClassExtensionTy); ClassTy = llvm::StructType::create(VMContext, "struct._objc_class"); @@ -5652,18 +5678,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_class_ext *ext; // }; ClassTy->setBody(llvm::PointerType::getUnqual(ClassTy), - llvm::PointerType::getUnqual(ClassTy), - Int8PtrTy, - LongTy, - LongTy, - LongTy, - IvarListPtrTy, - MethodListPtrTy, - CachePtrTy, - ProtocolListPtrTy, - Int8PtrTy, - ClassExtensionPtrTy, - nullptr); + llvm::PointerType::getUnqual(ClassTy), Int8PtrTy, LongTy, + LongTy, LongTy, IvarListPtrTy, MethodListPtrTy, CachePtrTy, + ProtocolListPtrTy, Int8PtrTy, ClassExtensionPtrTy); ClassPtrTy = llvm::PointerType::getUnqual(ClassTy); @@ -5677,12 +5694,10 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_property_list *instance_properties;// category's @property // struct _objc_property_list *class_properties; // } - CategoryTy = - llvm::StructType::create("struct._objc_category", - Int8PtrTy, Int8PtrTy, MethodListPtrTy, - MethodListPtrTy, ProtocolListPtrTy, - IntTy, PropertyListPtrTy, PropertyListPtrTy, - nullptr); + CategoryTy = llvm::StructType::create( + "struct._objc_category", Int8PtrTy, Int8PtrTy, MethodListPtrTy, + MethodListPtrTy, ProtocolListPtrTy, IntTy, PropertyListPtrTy, + PropertyListPtrTy); // Global metadata structures @@ -5693,10 +5708,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // short cat_def_cnt; // char *defs[cls_def_cnt + cat_def_cnt]; // } - SymtabTy = - llvm::StructType::create("struct._objc_symtab", - LongTy, SelectorPtrTy, ShortTy, ShortTy, - llvm::ArrayType::get(Int8PtrTy, 0), nullptr); + SymtabTy = llvm::StructType::create("struct._objc_symtab", LongTy, + SelectorPtrTy, ShortTy, ShortTy, + llvm::ArrayType::get(Int8PtrTy, 0)); SymtabPtrTy = llvm::PointerType::getUnqual(SymtabTy); // struct _objc_module { @@ -5705,10 +5719,8 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // char *name; // struct _objc_symtab* symtab; // } - ModuleTy = - llvm::StructType::create("struct._objc_module", - LongTy, LongTy, Int8PtrTy, SymtabPtrTy, nullptr); - + ModuleTy = llvm::StructType::create("struct._objc_module", LongTy, LongTy, + Int8PtrTy, SymtabPtrTy); // FIXME: This is the size of the setjmp buffer and should be target // specific. 18 is what's used on 32-bit X86. @@ -5717,10 +5729,9 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // Exceptions llvm::Type *StackPtrTy = llvm::ArrayType::get(CGM.Int8PtrTy, 4); - ExceptionDataTy = - llvm::StructType::create("struct._objc_exception_data", - llvm::ArrayType::get(CGM.Int32Ty,SetJmpBufferSize), - StackPtrTy, nullptr); + ExceptionDataTy = llvm::StructType::create( + "struct._objc_exception_data", + llvm::ArrayType::get(CGM.Int32Ty, SetJmpBufferSize), StackPtrTy); } ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModule &cgm) @@ -5731,8 +5742,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // struct _objc_method method_list[method_count]; // } MethodListnfABITy = - llvm::StructType::create("struct.__method_list_t", IntTy, IntTy, - llvm::ArrayType::get(MethodTy, 0), nullptr); + llvm::StructType::create("struct.__method_list_t", IntTy, IntTy, + llvm::ArrayType::get(MethodTy, 0)); // struct method_list_t * MethodListnfABIPtrTy = llvm::PointerType::getUnqual(MethodListnfABITy); @@ -5756,14 +5767,12 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul ProtocolListnfABITy = llvm::StructType::create(VMContext, "struct._objc_protocol_list"); - ProtocolnfABITy = - llvm::StructType::create("struct._protocol_t", ObjectPtrTy, Int8PtrTy, - llvm::PointerType::getUnqual(ProtocolListnfABITy), - MethodListnfABIPtrTy, MethodListnfABIPtrTy, - MethodListnfABIPtrTy, MethodListnfABIPtrTy, - PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy, - Int8PtrTy, PropertyListPtrTy, - nullptr); + ProtocolnfABITy = llvm::StructType::create( + "struct._protocol_t", ObjectPtrTy, Int8PtrTy, + llvm::PointerType::getUnqual(ProtocolListnfABITy), MethodListnfABIPtrTy, + MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy, + PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy, Int8PtrTy, + PropertyListPtrTy); // struct _protocol_t* ProtocolnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolnfABITy); @@ -5773,8 +5782,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // struct _protocol_t *[protocol_count]; // } ProtocolListnfABITy->setBody(LongTy, - llvm::ArrayType::get(ProtocolnfABIPtrTy, 0), - nullptr); + llvm::ArrayType::get(ProtocolnfABIPtrTy, 0)); // struct _objc_protocol_list* ProtocolListnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolListnfABITy); @@ -5788,7 +5796,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // } IvarnfABITy = llvm::StructType::create( "struct._ivar_t", llvm::PointerType::getUnqual(IvarOffsetVarTy), - Int8PtrTy, Int8PtrTy, IntTy, IntTy, nullptr); + Int8PtrTy, Int8PtrTy, IntTy, IntTy); // struct _ivar_list_t { // uint32 entsize; // sizeof(struct _ivar_t) @@ -5796,8 +5804,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // struct _iver_t list[count]; // } IvarListnfABITy = - llvm::StructType::create("struct._ivar_list_t", IntTy, IntTy, - llvm::ArrayType::get(IvarnfABITy, 0), nullptr); + llvm::StructType::create("struct._ivar_list_t", IntTy, IntTy, + llvm::ArrayType::get(IvarnfABITy, 0)); IvarListnfABIPtrTy = llvm::PointerType::getUnqual(IvarListnfABITy); @@ -5816,13 +5824,10 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // } // FIXME. Add 'reserved' field in 64bit abi mode! - ClassRonfABITy = llvm::StructType::create("struct._class_ro_t", - IntTy, IntTy, IntTy, Int8PtrTy, - Int8PtrTy, MethodListnfABIPtrTy, - ProtocolListnfABIPtrTy, - IvarListnfABIPtrTy, - Int8PtrTy, PropertyListPtrTy, - nullptr); + ClassRonfABITy = llvm::StructType::create( + "struct._class_ro_t", IntTy, IntTy, IntTy, Int8PtrTy, Int8PtrTy, + MethodListnfABIPtrTy, ProtocolListnfABIPtrTy, IvarListnfABIPtrTy, + Int8PtrTy, PropertyListPtrTy); // ImpnfABITy - LLVM for id (*)(id, SEL, ...) llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; @@ -5839,11 +5844,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul ClassnfABITy = llvm::StructType::create(VMContext, "struct._class_t"); ClassnfABITy->setBody(llvm::PointerType::getUnqual(ClassnfABITy), - llvm::PointerType::getUnqual(ClassnfABITy), - CachePtrTy, + llvm::PointerType::getUnqual(ClassnfABITy), CachePtrTy, llvm::PointerType::getUnqual(ImpnfABITy), - llvm::PointerType::getUnqual(ClassRonfABITy), - nullptr); + llvm::PointerType::getUnqual(ClassRonfABITy)); // LLVM for struct _class_t * ClassnfABIPtrTy = llvm::PointerType::getUnqual(ClassnfABITy); @@ -5858,15 +5861,10 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // const struct _prop_list_t * const class_properties; // const uint32_t size; // } - CategorynfABITy = llvm::StructType::create("struct._category_t", - Int8PtrTy, ClassnfABIPtrTy, - MethodListnfABIPtrTy, - MethodListnfABIPtrTy, - ProtocolListnfABIPtrTy, - PropertyListPtrTy, - PropertyListPtrTy, - IntTy, - nullptr); + CategorynfABITy = llvm::StructType::create( + "struct._category_t", Int8PtrTy, ClassnfABIPtrTy, MethodListnfABIPtrTy, + MethodListnfABIPtrTy, ProtocolListnfABIPtrTy, PropertyListPtrTy, + PropertyListPtrTy, IntTy); // New types for nonfragile abi messaging. CodeGen::CodeGenTypes &Types = CGM.getTypes(); @@ -5903,9 +5901,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // SUPER_IMP messenger; // SEL name; // }; - SuperMessageRefTy = - llvm::StructType::create("struct._super_message_ref_t", - ImpnfABITy, SelectorPtrTy, nullptr); + SuperMessageRefTy = llvm::StructType::create("struct._super_message_ref_t", + ImpnfABITy, SelectorPtrTy); // SuperMessageRefPtrTy - LLVM for struct _super_message_ref_t* SuperMessageRefPtrTy = llvm::PointerType::getUnqual(SuperMessageRefTy); @@ -5916,10 +5913,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // const char* name; // c++ typeinfo string // Class cls; // }; - EHTypeTy = - llvm::StructType::create("struct._objc_typeinfo", - llvm::PointerType::getUnqual(Int8PtrTy), - Int8PtrTy, ClassnfABIPtrTy, nullptr); + EHTypeTy = llvm::StructType::create("struct._objc_typeinfo", + llvm::PointerType::getUnqual(Int8PtrTy), + Int8PtrTy, ClassnfABIPtrTy); EHTypePtrTy = llvm::PointerType::getUnqual(EHTypeTy); } @@ -5974,17 +5970,21 @@ void CGObjCNonFragileABIMac::FinishNonFragileABIModule() { } AddModuleClassList(DefinedClasses, "OBJC_LABEL_CLASS_$", - "__DATA, __objc_classlist, regular, no_dead_strip"); + GetSectionName("__objc_classlist", + "regular,no_dead_strip")); AddModuleClassList(DefinedNonLazyClasses, "OBJC_LABEL_NONLAZY_CLASS_$", - "__DATA, __objc_nlclslist, regular, no_dead_strip"); + GetSectionName("__objc_nlclslist", + "regular,no_dead_strip")); // Build list of all implemented category addresses in array // L_OBJC_LABEL_CATEGORY_$. AddModuleClassList(DefinedCategories, "OBJC_LABEL_CATEGORY_$", - "__DATA, __objc_catlist, regular, no_dead_strip"); + GetSectionName("__objc_catlist", + "regular,no_dead_strip")); AddModuleClassList(DefinedNonLazyCategories, "OBJC_LABEL_NONLAZY_CATEGORY_$", - "__DATA, __objc_nlcatlist, regular, no_dead_strip"); + GetSectionName("__objc_nlcatlist", + "regular,no_dead_strip")); EmitImageInfo(); } @@ -6397,15 +6397,15 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, llvm::GlobalVariable *PTGV = CGM.getModule().getGlobalVariable(ProtocolName); if (PTGV) return CGF.Builder.CreateAlignedLoad(PTGV, Align); - PTGV = new llvm::GlobalVariable( - CGM.getModule(), - Init->getType(), false, - llvm::GlobalValue::WeakAnyLinkage, - Init, - ProtocolName); - PTGV->setSection("__DATA, __objc_protorefs, coalesced, no_dead_strip"); + PTGV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false, + llvm::GlobalValue::WeakAnyLinkage, Init, + ProtocolName); + PTGV->setSection(GetSectionName("__objc_protorefs", + "coalesced,no_dead_strip")); PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility); PTGV->setAlignment(Align.getQuantity()); + if (!CGM.getTriple().isOSBinFormatMachO()) + PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolName)); CGM.addCompilerUsedGlobal(PTGV); return CGF.Builder.CreateAlignedLoad(PTGV, Align); } @@ -6620,10 +6620,14 @@ CGObjCNonFragileABIMac::ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID, Ivar->getAccessControl() == ObjCIvarDecl::Private || Ivar->getAccessControl() == ObjCIvarDecl::Package; - if (ID->hasAttr<DLLExportAttr>() && !IsPrivateOrPackage) - IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); - else if (ID->hasAttr<DLLImportAttr>()) - IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + const ObjCInterfaceDecl *ContainingID = Ivar->getContainingInterface(); + + if (ContainingID->hasAttr<DLLImportAttr>()) + IvarOffsetGV + ->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + else if (ContainingID->hasAttr<DLLExportAttr>() && !IsPrivateOrPackage) + IvarOffsetGV + ->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } } return IvarOffsetGV; @@ -6862,8 +6866,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolRef)); PTGV->setAlignment( CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy)); - if (CGM.getTriple().isOSBinFormatMachO()) - PTGV->setSection("__DATA, __objc_protolist, coalesced, no_dead_strip"); + PTGV->setSection(GetSectionName("__objc_protolist", + "coalesced,no_dead_strip")); PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility); CGM.addCompilerUsedGlobal(PTGV); return Entry; @@ -7059,7 +7063,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, /*constant*/ false, llvm::GlobalValue::WeakAnyLinkage); messageRef->setVisibility(llvm::GlobalValue::HiddenVisibility); - messageRef->setSection("__DATA, __objc_msgrefs, coalesced"); + messageRef->setSection(GetSectionName("__objc_msgrefs", "coalesced")); } bool requiresnullCheck = false; @@ -7089,7 +7093,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, CGCallee callee(CGCalleeInfo(), calleePtr); RValue result = CGF.EmitCall(MSI.CallInfo, callee, returnSlot, args); - return nullReturn.complete(CGF, result, resultType, formalArgs, + return nullReturn.complete(CGF, returnSlot, result, resultType, formalArgs, requiresnullCheck ? method : nullptr); } @@ -7170,7 +7174,8 @@ CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF, false, llvm::GlobalValue::PrivateLinkage, ClassGV, "OBJC_CLASSLIST_REFERENCES_$_"); Entry->setAlignment(Align.getQuantity()); - Entry->setSection("__DATA, __objc_classrefs, regular, no_dead_strip"); + Entry->setSection(GetSectionName("__objc_classrefs", + "regular,no_dead_strip")); CGM.addCompilerUsedGlobal(Entry); } return CGF.Builder.CreateAlignedLoad(Entry, Align); @@ -7204,7 +7209,8 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF, false, llvm::GlobalValue::PrivateLinkage, ClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); Entry->setAlignment(Align.getQuantity()); - Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip"); + Entry->setSection(GetSectionName("__objc_superrefs", + "regular,no_dead_strip")); CGM.addCompilerUsedGlobal(Entry); } return CGF.Builder.CreateAlignedLoad(Entry, Align); @@ -7226,7 +7232,8 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF, MetaClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); Entry->setAlignment(Align.getQuantity()); - Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip"); + Entry->setSection(GetSectionName("__objc_superrefs", + "regular,no_dead_strip")); CGM.addCompilerUsedGlobal(Entry); } @@ -7322,7 +7329,8 @@ Address CGObjCNonFragileABIMac::EmitSelectorAddr(CodeGenFunction &CGF, false, llvm::GlobalValue::PrivateLinkage, Casted, "OBJC_SELECTOR_REFERENCES_"); Entry->setExternallyInitialized(true); - Entry->setSection("__DATA, __objc_selrefs, literal_pointers, no_dead_strip"); + Entry->setSection(GetSectionName("__objc_selrefs", + "literal_pointers,no_dead_strip")); Entry->setAlignment(Align.getQuantity()); CGM.addCompilerUsedGlobal(Entry); } diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp index 3da7ed230e..2f886fd82c 100644 --- a/lib/CodeGen/CGObjCRuntime.cpp +++ b/lib/CodeGen/CGObjCRuntime.cpp @@ -26,61 +26,27 @@ using namespace clang; using namespace CodeGen; -static uint64_t LookupFieldBitOffset(CodeGen::CodeGenModule &CGM, - const ObjCInterfaceDecl *OID, - const ObjCImplementationDecl *ID, - const ObjCIvarDecl *Ivar) { - const ObjCInterfaceDecl *Container = Ivar->getContainingInterface(); - - // FIXME: We should eliminate the need to have ObjCImplementationDecl passed - // in here; it should never be necessary because that should be the lexical - // decl context for the ivar. - - // If we know have an implementation (and the ivar is in it) then - // look up in the implementation layout. - const ASTRecordLayout *RL; - if (ID && declaresSameEntity(ID->getClassInterface(), Container)) - RL = &CGM.getContext().getASTObjCImplementationLayout(ID); - else - RL = &CGM.getContext().getASTObjCInterfaceLayout(Container); - - // Compute field index. - // - // FIXME: The index here is closely tied to how ASTContext::getObjCLayout is - // implemented. This should be fixed to get the information from the layout - // directly. - unsigned Index = 0; - - for (const ObjCIvarDecl *IVD = Container->all_declared_ivar_begin(); - IVD; IVD = IVD->getNextIvar()) { - if (Ivar == IVD) - break; - ++Index; - } - assert(Index < RL->getFieldCount() && "Ivar is not inside record layout!"); - - return RL->getFieldOffset(Index); -} - uint64_t CGObjCRuntime::ComputeIvarBaseOffset(CodeGen::CodeGenModule &CGM, const ObjCInterfaceDecl *OID, const ObjCIvarDecl *Ivar) { - return LookupFieldBitOffset(CGM, OID, nullptr, Ivar) / - CGM.getContext().getCharWidth(); + return CGM.getContext().lookupFieldBitOffset(OID, nullptr, Ivar) / + CGM.getContext().getCharWidth(); } uint64_t CGObjCRuntime::ComputeIvarBaseOffset(CodeGen::CodeGenModule &CGM, const ObjCImplementationDecl *OID, const ObjCIvarDecl *Ivar) { - return LookupFieldBitOffset(CGM, OID->getClassInterface(), OID, Ivar) / - CGM.getContext().getCharWidth(); + return CGM.getContext().lookupFieldBitOffset(OID->getClassInterface(), OID, + Ivar) / + CGM.getContext().getCharWidth(); } unsigned CGObjCRuntime::ComputeBitfieldBitOffset( CodeGen::CodeGenModule &CGM, const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar) { - return LookupFieldBitOffset(CGM, ID, ID->getImplementation(), Ivar); + return CGM.getContext().lookupFieldBitOffset(ID, ID->getImplementation(), + Ivar); } LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, @@ -90,7 +56,11 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, unsigned CVRQualifiers, llvm::Value *Offset) { // Compute (type*) ( (char *) BaseValue + Offset) - QualType IvarTy = Ivar->getType().withCVRQualifiers(CVRQualifiers); + QualType InterfaceTy{OID->getTypeForDecl(), 0}; + QualType ObjectPtrTy = + CGF.CGM.getContext().getObjCObjectPointerType(InterfaceTy); + QualType IvarTy = + Ivar->getUsageType(ObjectPtrTy).withCVRQualifiers(CVRQualifiers); llvm::Type *LTy = CGF.CGM.getTypes().ConvertTypeForMem(IvarTy); llvm::Value *V = CGF.Builder.CreateBitCast(BaseValue, CGF.Int8PtrTy); V = CGF.Builder.CreateInBoundsGEP(V, Offset, "add.ptr"); @@ -115,7 +85,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, // Note, there is a subtle invariant here: we can only call this routine on // non-synthesized ivars but we may be called for synthesized ivars. However, // a synthesized ivar can never be a bit-field, so this is safe. - uint64_t FieldBitOffset = LookupFieldBitOffset(CGF.CGM, OID, nullptr, Ivar); + uint64_t FieldBitOffset = + CGF.CGM.getContext().lookupFieldBitOffset(OID, nullptr, Ivar); uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth(); uint64_t AlignmentBits = CGF.CGM.getTarget().getCharAlign(); uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext()); @@ -138,7 +109,9 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, Addr = CGF.Builder.CreateElementBitCast(Addr, llvm::Type::getIntNTy(CGF.getLLVMContext(), Info->StorageSize)); - return LValue::MakeBitfield(Addr, *Info, IvarTy, AlignmentSource::Decl); + return LValue::MakeBitfield(Addr, *Info, IvarTy, + LValueBaseInfo(AlignmentSource::Decl), + TBAAAccessInfo()); } namespace { diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp index db02c631c9..4d637615d2 100644 --- a/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/lib/CodeGen/CGOpenCLRuntime.cpp @@ -16,6 +16,7 @@ #include "CGOpenCLRuntime.h" #include "CodeGenFunction.h" #include "TargetInfo.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include <assert.h> @@ -35,8 +36,8 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { "Not an OpenCL specific type!"); llvm::LLVMContext& Ctx = CGM.getLLVMContext(); - uint32_t ImgAddrSpc = CGM.getContext().getTargetAddressSpace( - CGM.getTarget().getOpenCLImageAddrSpace()); + uint32_t AddrSpc = CGM.getContext().getTargetAddressSpace( + CGM.getTarget().getOpenCLTypeAddrSpace(T)); switch (cast<BuiltinType>(T)->getKind()) { default: llvm_unreachable("Unexpected opencl builtin type!"); @@ -45,29 +46,29 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { case BuiltinType::Id: \ return llvm::PointerType::get( \ llvm::StructType::create(Ctx, "opencl." #ImgType "_" #Suffix "_t"), \ - ImgAddrSpc); + AddrSpc); #include "clang/Basic/OpenCLImageTypes.def" case BuiltinType::OCLSampler: - return getSamplerType(); + return getSamplerType(T); case BuiltinType::OCLEvent: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.event_t"), 0); + return llvm::PointerType::get( + llvm::StructType::create(Ctx, "opencl.event_t"), AddrSpc); case BuiltinType::OCLClkEvent: return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.clk_event_t"), 0); + llvm::StructType::create(Ctx, "opencl.clk_event_t"), AddrSpc); case BuiltinType::OCLQueue: return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.queue_t"), 0); + llvm::StructType::create(Ctx, "opencl.queue_t"), AddrSpc); case BuiltinType::OCLReserveID: return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0); + llvm::StructType::create(Ctx, "opencl.reserve_id_t"), AddrSpc); } } -llvm::Type *CGOpenCLRuntime::getPipeType() { +llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) { if (!PipeTy){ - uint32_t PipeAddrSpc = - CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); + uint32_t PipeAddrSpc = CGM.getContext().getTargetAddressSpace( + CGM.getTarget().getOpenCLTypeAddrSpace(T)); PipeTy = llvm::PointerType::get(llvm::StructType::create( CGM.getLLVMContext(), "opencl.pipe_t"), PipeAddrSpc); } @@ -75,12 +76,12 @@ llvm::Type *CGOpenCLRuntime::getPipeType() { return PipeTy; } -llvm::PointerType *CGOpenCLRuntime::getSamplerType() { +llvm::PointerType *CGOpenCLRuntime::getSamplerType(const Type *T) { if (!SamplerTy) SamplerTy = llvm::PointerType::get(llvm::StructType::create( CGM.getLLVMContext(), "opencl.sampler_t"), CGM.getContext().getTargetAddressSpace( - LangAS::opencl_constant)); + CGM.getTarget().getOpenCLTypeAddrSpace(T))); return SamplerTy; } @@ -103,3 +104,45 @@ llvm::Value *CGOpenCLRuntime::getPipeElemAlign(const Expr *PipeArg) { .getQuantity(); return llvm::ConstantInt::get(Int32Ty, TypeSize, false); } + +llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { + assert(CGM.getLangOpts().OpenCL); + return llvm::IntegerType::getInt8PtrTy( + CGM.getLLVMContext(), + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); +} + +CGOpenCLRuntime::EnqueuedBlockInfo +CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { + // The block literal may be assigned to a const variable. Chasing down + // to get the block literal. + if (auto DR = dyn_cast<DeclRefExpr>(E)) { + E = cast<VarDecl>(DR->getDecl())->getInit(); + } + if (auto Cast = dyn_cast<CastExpr>(E)) { + E = Cast->getSubExpr(); + } + auto *Block = cast<BlockExpr>(E); + + // The same block literal may be enqueued multiple times. Cache it if + // possible. + auto Loc = EnqueuedBlockMap.find(Block); + if (Loc != EnqueuedBlockMap.end()) { + return Loc->second; + } + + // Emit block literal as a common block expression and get the block invoke + // function. + llvm::Function *Invoke; + auto *V = CGF.EmitBlockLiteral(cast<BlockExpr>(Block), &Invoke); + auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel( + CGF, Invoke, V->stripPointerCasts()); + + // The common part of the post-processing of the kernel goes here. + F->addFnAttr(llvm::Attribute::NoUnwind); + F->setCallingConv( + CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel)); + EnqueuedBlockInfo Info{F, V}; + EnqueuedBlockMap[Block] = Info; + return Info; +} diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h index ee3cb3dda0..ead303d1d0 100644 --- a/lib/CodeGen/CGOpenCLRuntime.h +++ b/lib/CodeGen/CGOpenCLRuntime.h @@ -17,11 +17,13 @@ #define LLVM_CLANG_LIB_CODEGEN_CGOPENCLRUNTIME_H #include "clang/AST/Type.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" namespace clang { +class Expr; class VarDecl; namespace CodeGen { @@ -35,6 +37,14 @@ protected: llvm::Type *PipeTy; llvm::PointerType *SamplerTy; + /// Structure for enqueued block information. + struct EnqueuedBlockInfo { + llvm::Function *Kernel; /// Enqueued block kernel. + llvm::Value *BlockArg; /// The first argument to enqueued block kernel. + }; + /// Maps block expression to block information. + llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap; + public: CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM), PipeTy(nullptr), SamplerTy(nullptr) {} @@ -48,9 +58,9 @@ public: virtual llvm::Type *convertOpenCLSpecificType(const Type *T); - virtual llvm::Type *getPipeType(); + virtual llvm::Type *getPipeType(const PipeType *T); - llvm::PointerType *getSamplerType(); + llvm::PointerType *getSamplerType(const Type *T); // \brief Returnes a value which indicates the size in bytes of the pipe // element. @@ -59,6 +69,13 @@ public: // \brief Returnes a value which indicates the alignment in bytes of the pipe // element. virtual llvm::Value *getPipeElemAlign(const Expr *PipeArg); + + /// \return __generic void* type. + llvm::PointerType *getGenericVoidPointerType(); + + /// \return enqueued block information for enqueued block. + EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, + const Expr *E); }; } diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index ae9cf9dc59..35929af95e 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" @@ -264,6 +265,13 @@ public: return nullptr; } + /// \brief Get an LValue for the current ThreadID variable. + LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { + if (OuterRegionInfo) + return OuterRegionInfo->getThreadIDVariableLValue(CGF); + llvm_unreachable("No LValue for inlined OpenMP construct"); + } + /// \brief Get the name of the capture helper. StringRef getHelperName() const override { if (auto *OuterRegionInfo = getOldCSI()) @@ -420,7 +428,7 @@ public: /// \brief Values for bit flags used in the ident_t to describe the fields. /// All enumeric elements are named and described in accordance with the code /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h -enum OpenMPLocationFlags { +enum OpenMPLocationFlags : unsigned { /// \brief Use trampoline for internal microtask. OMP_IDENT_IMD = 0x01, /// \brief Use c-style ident structure. @@ -436,7 +444,14 @@ enum OpenMPLocationFlags { /// \brief Implicit barrier in 'sections' directive. OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, /// \brief Implicit barrier in 'single' directive. - OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 + OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, + /// Call of __kmp_for_static_init for static loop. + OMP_IDENT_WORK_LOOP = 0x200, + /// Call of __kmp_for_static_init for sections. + OMP_IDENT_WORK_SECTIONS = 0x400, + /// Call of __kmp_for_static_init for distribute. + OMP_IDENT_WORK_DISTRIBUTE = 0x800, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; /// \brief Describes ident structure that describes a source location. @@ -643,6 +658,12 @@ enum OpenMPRTLFunction { // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 // *vec); OMPRTL__kmpc_doacross_wait, + // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + OMPRTL__kmpc_task_reduction_init, + // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + OMPRTL__kmpc_task_reduction_get_th_data, // // Offloading related calls @@ -697,6 +718,409 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { } } +/// Check if the combiner is a call to UDR combiner and if it is so return the +/// UDR decl used for reduction. +static const OMPDeclareReductionDecl * +getReductionInit(const Expr *ReductionOp) { + if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) + if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) + if (auto *DRE = + dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) + if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) + return DRD; + return nullptr; +} + +static void emitInitWithReductionInitializer(CodeGenFunction &CGF, + const OMPDeclareReductionDecl *DRD, + const Expr *InitOp, + Address Private, Address Original, + QualType Ty) { + if (DRD->getInitializer()) { + std::pair<llvm::Function *, llvm::Function *> Reduction = + CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); + auto *CE = cast<CallExpr>(InitOp); + auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); + const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); + const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); + auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); + auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), + [=]() -> Address { return Private; }); + PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), + [=]() -> Address { return Original; }); + (void)PrivateScope.Privatize(); + RValue Func = RValue::get(Reduction.second); + CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); + CGF.EmitIgnoredExpr(InitOp); + } else { + llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); + auto *GV = new llvm::GlobalVariable( + CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Init, ".init"); + LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); + RValue InitRVal; + switch (CGF.getEvaluationKind(Ty)) { + case TEK_Scalar: + InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); + break; + case TEK_Complex: + InitRVal = + RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); + break; + case TEK_Aggregate: + InitRVal = RValue::getAggregate(LV.getAddress()); + break; + } + OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); + CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), + /*IsInitializer=*/false); + } +} + +/// \brief Emit initialization of arrays of complex types. +/// \param DestAddr Address of the array. +/// \param Type Type of array. +/// \param Init Initial expression of array. +/// \param SrcAddr Address of the original array. +static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, + QualType Type, bool EmitDeclareReductionInit, + const Expr *Init, + const OMPDeclareReductionDecl *DRD, + Address SrcAddr = Address::invalid()) { + // Perform element-by-element initialization. + QualType ElementTy; + + // Drill down to the base element type on both arrays. + auto ArrayTy = Type->getAsArrayTypeUnsafe(); + auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); + DestAddr = + CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); + if (DRD) + SrcAddr = + CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + + llvm::Value *SrcBegin = nullptr; + if (DRD) + SrcBegin = SrcAddr.getPointer(); + auto DestBegin = DestAddr.getPointer(); + // Cast from pointer to array type to pointer to single element. + auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); + // The basic structure here is a while-do loop. + auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); + auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); + auto IsEmpty = + CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); + CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + auto EntryBB = CGF.Builder.GetInsertBlock(); + CGF.EmitBlock(BodyBB); + + CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); + + llvm::PHINode *SrcElementPHI = nullptr; + Address SrcElementCurrent = Address::invalid(); + if (DRD) { + SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, + "omp.arraycpy.srcElementPast"); + SrcElementPHI->addIncoming(SrcBegin, EntryBB); + SrcElementCurrent = + Address(SrcElementPHI, + SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + } + llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( + DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + DestElementPHI->addIncoming(DestBegin, EntryBB); + Address DestElementCurrent = + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + // Emit copy. + { + CodeGenFunction::RunCleanupsScope InitScope(CGF); + if (EmitDeclareReductionInit) { + emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, + SrcElementCurrent, ElementTy); + } else + CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), + /*IsInitializer=*/false); + } + + if (DRD) { + // Shift the address forward by one element. + auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( + SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); + } + + // Shift the address forward by one element. + auto DestElementNext = CGF.Builder.CreateConstGEP1_32( + DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + // Check whether we've reached the end. + auto Done = + CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); + CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); + DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); + + // Done. + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + +LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { + return CGF.EmitOMPSharedLValue(E); +} + +LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, + const Expr *E) { + if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) + return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); + return LValue(); +} + +void ReductionCodeGen::emitAggregateInitialization( + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + const OMPDeclareReductionDecl *DRD) { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + bool EmitDeclareReductionInit = + DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); + EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), + EmitDeclareReductionInit, + EmitDeclareReductionInit ? ClausesData[N].ReductionOp + : PrivateVD->getInit(), + DRD, SharedLVal.getAddress()); +} + +ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> ReductionOps) { + ClausesData.reserve(Shareds.size()); + SharedAddresses.reserve(Shareds.size()); + Sizes.reserve(Shareds.size()); + BaseDecls.reserve(Shareds.size()); + auto IPriv = Privates.begin(); + auto IRed = ReductionOps.begin(); + for (const auto *Ref : Shareds) { + ClausesData.emplace_back(Ref, *IPriv, *IRed); + std::advance(IPriv, 1); + std::advance(IRed, 1); + } +} + +void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { + assert(SharedAddresses.size() == N && + "Number of generated lvalues must be exactly N."); + LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); + LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); + SharedAddresses.emplace_back(First, Second); +} + +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); + if (!PrivateType->isVariablyModifiedType()) { + Sizes.emplace_back( + CGF.getTypeSize( + SharedAddresses[N].first.getType().getNonReferenceType()), + nullptr); + return; + } + llvm::Value *Size; + llvm::Value *SizeInChars; + llvm::Type *ElemType = + cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) + ->getElementType(); + auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); + if (AsArraySection) { + Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), + SharedAddresses[N].first.getPointer()); + Size = CGF.Builder.CreateNUWAdd( + Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); + SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); + } else { + SizeInChars = CGF.getTypeSize( + SharedAddresses[N].first.getType().getNonReferenceType()); + Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); + } + Sizes.emplace_back(SizeInChars, Size); + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast<OpaqueValueExpr>( + CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), + RValue::get(Size)); + CGF.EmitVariablyModifiedType(PrivateType); +} + +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, + llvm::Value *Size) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + if (!PrivateType->isVariablyModifiedType()) { + assert(!Size && !Sizes[N].second && + "Size should be nullptr for non-variably modified reduction " + "items."); + return; + } + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast<OpaqueValueExpr>( + CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), + RValue::get(Size)); + CGF.EmitVariablyModifiedType(PrivateType); +} + +void ReductionCodeGen::emitInitialization( + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { + assert(SharedAddresses.size() > N && "No variable was generated"); + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + QualType PrivateType = PrivateVD->getType(); + PrivateAddr = CGF.Builder.CreateElementBitCast( + PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + QualType SharedType = SharedAddresses[N].first.getType(); + SharedLVal = CGF.MakeAddrLValue( + CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), + CGF.ConvertTypeForMem(SharedType)), + SharedType, SharedAddresses[N].first.getBaseInfo(), + CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); + if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { + emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); + } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { + emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, + PrivateAddr, SharedLVal.getAddress(), + SharedLVal.getType()); + } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && + !CGF.isTrivialInitializer(PrivateVD->getInit())) { + CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, + PrivateVD->getType().getQualifiers(), + /*IsInitializer=*/false); + } +} + +bool ReductionCodeGen::needCleanups(unsigned N) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); + return DTorKind != QualType::DK_none; +} + +void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); + if (needCleanups(N)) { + PrivateAddr = CGF.Builder.CreateElementBitCast( + PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); + } +} + +static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + LValue BaseLV) { + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + if (auto *PtrTy = BaseTy->getAs<PointerType>()) + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); + else { + LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); + BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); + } + BaseTy = BaseTy->getPointeeType(); + } + return CGF.MakeAddrLValue( + CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), + CGF.ConvertTypeForMem(ElTy)), + BaseLV.getType(), BaseLV.getBaseInfo(), + CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); +} + +static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + llvm::Type *BaseLVType, CharUnits BaseLVAlignment, + llvm::Value *Addr) { + Address Tmp = Address::invalid(); + Address TopTmp = Address::invalid(); + Address MostTopTmp = Address::invalid(); + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + Tmp = CGF.CreateMemTemp(BaseTy); + if (TopTmp.isValid()) + CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); + else + MostTopTmp = Tmp; + TopTmp = Tmp; + BaseTy = BaseTy->getPointeeType(); + } + llvm::Type *Ty = BaseLVType; + if (Tmp.isValid()) + Ty = Tmp.getElementType(); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); + if (Tmp.isValid()) { + CGF.Builder.CreateStore(Addr, Tmp); + return MostTopTmp; + } + return Address(Addr, BaseLVAlignment); +} + +Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + const DeclRefExpr *DE; + const VarDecl *OrigVD = nullptr; + if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) { + auto *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = cast<DeclRefExpr>(Base); + OrigVD = cast<VarDecl>(DE->getDecl()); + } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) { + auto *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = cast<DeclRefExpr>(Base); + OrigVD = cast<VarDecl>(DE->getDecl()); + } + if (OrigVD) { + BaseDecls.emplace_back(OrigVD); + auto OriginalBaseLValue = CGF.EmitLValue(DE); + LValue BaseLValue = + loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), + OriginalBaseLValue); + llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( + BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); + llvm::Value *Ptr = + CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment); + return castToBase(CGF, OrigVD->getType(), + SharedAddresses[N].first.getType(), + OriginalBaseLValue.getPointer()->getType(), + OriginalBaseLValue.getAlignment(), Ptr); + } + BaseDecls.emplace_back( + cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); + return PrivateAddr; +} + +bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + return DRD && DRD->getInitializer(); +} + LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), @@ -728,7 +1152,7 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) IdentTy = llvm::StructType::create( "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, - CGM.Int8PtrTy /* psource */, nullptr); + CGM.Int8PtrTy /* psource */); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); loadOffloadInfoMetadata(); @@ -747,9 +1171,9 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, QualType PtrTy = C.getPointerType(Ty).withRestrict(); FunctionArgList Args; ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), - /*Id=*/nullptr, PtrTy); + /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), - /*Id=*/nullptr, PtrTy); + /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); Args.push_back(&OmpOutParm); Args.push_back(&OmpInParm); auto &FnInfo = @@ -760,6 +1184,7 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); Fn->removeFnAttr(llvm::Attribute::NoInline); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); Fn->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. @@ -777,7 +1202,14 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, .getAddress(); }); (void)Scope.Privatize(); - CGF.EmitIgnoredExpr(CombinerInitializer); + if (!IsCombiner && Out->hasInit() && + !CGF.isTrivialInitializer(Out->getInit())) { + CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), + Out->getType().getQualifiers(), + /*IsInitializer=*/true); + } + if (CombinerInitializer) + CGF.EmitIgnoredExpr(CombinerInitializer); Scope.ForceCleanup(); CGF.FinishFunction(); return Fn; @@ -803,7 +1235,10 @@ void CGOpenMPRuntime::emitUserDefinedReduction( Orig = &C.Idents.get("omp_orig"); } Initializer = emitCombinerOrInitializer( - CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), + CGM, D->getType(), + D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init + : nullptr, + cast<VarDecl>(D->lookup(Orig).front()), cast<VarDecl>(D->lookup(Priv).front()), /*IsCombiner=*/false); } @@ -1015,19 +1450,22 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, if (ThreadID != nullptr) return ThreadID; } - if (auto *OMPRegionInfo = - dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - if (OMPRegionInfo->getThreadIDVariable()) { - // Check if this an outlined function with thread id passed as argument. - auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); - ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); - // If value loaded in entry block, cache it and use it everywhere in - // function. - if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { - auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); - Elem.second.ThreadID = ThreadID; + // If exceptions are enabled, do not use parameter to avoid possible crash. + if (!CGF.getInvokeDest()) { + if (auto *OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo->getThreadIDVariable()) { + // Check if this an outlined function with thread id passed as argument. + auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); + // If value loaded in entry block, cache it and use it everywhere in + // function. + if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + Elem.second.ThreadID = ThreadID; + } + return ThreadID; } - return ThreadID; } } @@ -1553,6 +1991,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); break; } + case OMPRTL__kmpc_task_reduction_init: { + // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); + break; + } + case OMPRTL__kmpc_task_reduction_get_th_data: { + // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t @@ -1807,8 +2265,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( // threadprivate copy of the variable VD CodeGenFunction CtorCGF(CGM); FunctionArgList Args; - ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, CGM.getContext().VoidPtrTy); + ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, + ImplicitParamDecl::Other); Args.push_back(&Dst); auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( @@ -1838,8 +2296,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( // of the variable VD CodeGenFunction DtorCGF(CGM); FunctionArgList Args; - ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, CGM.getContext().VoidPtrTy); + ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, + ImplicitParamDecl::Other); Args.push_back(&Dst); auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( @@ -1903,6 +2361,27 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } +Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name) { + llvm::Twine VarName(Name, ".artificial."); + llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); + llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); + llvm::Value *Args[] = { + emitUpdateLocation(CGF, SourceLocation()), + getThreadID(CGF, SourceLocation()), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), + CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, + /*IsSigned=*/false), + getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; + return Address( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), + VarLVType->getPointerTo(/*AddrSpace=*/0)), + CGM.getPointerAlign()); +} + /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { @@ -1991,7 +2470,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); + RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; @@ -2190,10 +2669,8 @@ static llvm::Value *emitCopyprivateCopyFunction( auto &C = CGM.getContext(); // void copy_func(void *LHSArg, void *RHSArg); FunctionArgList Args; - ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, - C.VoidPtrTy); - ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, - C.VoidPtrTy); + ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); @@ -2466,16 +2943,14 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule, return Schedule | Modifier; } -void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, - SourceLocation Loc, - const OpenMPScheduleTy &ScheduleKind, - unsigned IVSize, bool IVSigned, - bool Ordered, llvm::Value *UB, - llvm::Value *Chunk) { +void CGOpenMPRuntime::emitForDispatchInit( + CodeGenFunction &CGF, SourceLocation Loc, + const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, + bool Ordered, const DispatchRTInput &DispatchValues) { if (!CGF.HaveInsertPoint()) return; - OpenMPSchedType Schedule = - getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); + OpenMPSchedType Schedule = getRuntimeSchedule( + ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); assert(Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && @@ -2486,14 +2961,14 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, // kmp_int[32|64] stride, kmp_int[32|64] chunk); // If the Chunk was not specified in the clause - use default value 1. - if (Chunk == nullptr) - Chunk = CGF.Builder.getIntN(IVSize, 1); + llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk + : CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(addMonoNonMonoModifier( Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type - CGF.Builder.getIntN(IVSize, 0), // Lower - UB, // Upper + DispatchValues.LB, // Lower + DispatchValues.UB, // Upper CGF.Builder.getIntN(IVSize, 1), // Stride Chunk // Chunk }; @@ -2504,87 +2979,101 @@ static void emitForStaticInitCall( CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, - unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, - Address ST, llvm::Value *Chunk) { + const CGOpenMPRuntime::StaticRTInput &Values) { if (!CGF.HaveInsertPoint()) - return; - - assert(!Ordered); - assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || - Schedule == OMP_sch_static_balanced_chunked || - Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || - Schedule == OMP_dist_sch_static || - Schedule == OMP_dist_sch_static_chunked); - - // Call __kmpc_for_static_init( - // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, - // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, - // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, - // kmp_int[32|64] incr, kmp_int[32|64] chunk); - if (Chunk == nullptr) { - assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || - Schedule == OMP_dist_sch_static) && - "expected static non-chunked schedule"); - // If the Chunk was not specified in the clause - use default value 1. - Chunk = CGF.Builder.getIntN(IVSize, 1); - } else { - assert((Schedule == OMP_sch_static_chunked || - Schedule == OMP_sch_static_balanced_chunked || - Schedule == OMP_ord_static_chunked || - Schedule == OMP_dist_sch_static_chunked) && - "expected static chunked schedule"); - } - llvm::Value *Args[] = { - UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( - Schedule, M1, M2)), // Schedule type - IL.getPointer(), // &isLastIter - LB.getPointer(), // &LB - UB.getPointer(), // &UB - ST.getPointer(), // &Stride - CGF.Builder.getIntN(IVSize, 1), // Incr - Chunk // Chunk - }; - CGF.EmitRuntimeCall(ForStaticInitFunction, Args); + return; + + assert(!Values.Ordered); + assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || + Schedule == OMP_dist_sch_static || + Schedule == OMP_dist_sch_static_chunked); + + // Call __kmpc_for_static_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + llvm::Value *Chunk = Values.Chunk; + if (Chunk == nullptr) { + assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || + Schedule == OMP_dist_sch_static) && + "expected static non-chunked schedule"); + // If the Chunk was not specified in the clause - use default value 1. + Chunk = CGF.Builder.getIntN(Values.IVSize, 1); + } else { + assert((Schedule == OMP_sch_static_chunked || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static_chunked || + Schedule == OMP_dist_sch_static_chunked) && + "expected static chunked schedule"); + } + llvm::Value *Args[] = { + UpdateLocation, + ThreadId, + CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, + M2)), // Schedule type + Values.IL.getPointer(), // &isLastIter + Values.LB.getPointer(), // &LB + Values.UB.getPointer(), // &UB + Values.ST.getPointer(), // &Stride + CGF.Builder.getIntN(Values.IVSize, 1), // Incr + Chunk // Chunk + }; + CGF.EmitRuntimeCall(ForStaticInitFunction, Args); } void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, - unsigned IVSize, bool IVSigned, - bool Ordered, Address IL, Address LB, - Address UB, Address ST, - llvm::Value *Chunk) { - OpenMPSchedType ScheduleNum = - getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); - auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); + const StaticRTInput &Values) { + OpenMPSchedType ScheduleNum = getRuntimeSchedule( + ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); + assert(isOpenMPWorksharingDirective(DKind) && + "Expected loop-based or sections-based directive."); + auto *UpdatedLocation = emitUpdateLocation(CGF, Loc, + isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS); auto *ThreadId = getThreadID(CGF, Loc); - auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); + auto *StaticInitFunction = + createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, - ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, - Ordered, IL, LB, UB, ST, Chunk); + ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); } void CGOpenMPRuntime::emitDistributeStaticInit( CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, - bool Ordered, Address IL, Address LB, Address UB, Address ST, - llvm::Value *Chunk) { - OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); - auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); + OpenMPDistScheduleClauseKind SchedKind, + const CGOpenMPRuntime::StaticRTInput &Values) { + OpenMPSchedType ScheduleNum = + getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); + auto *UpdatedLocation = + emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); auto *ThreadId = getThreadID(CGF, Loc); - auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); + auto *StaticInitFunction = + createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, - OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, - UB, ST, Chunk); + OMPC_SCHEDULE_MODIFIER_unknown, Values); } void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, - SourceLocation Loc) { + SourceLocation Loc, + OpenMPDirectiveKind DKind) { if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc, + isOpenMPDistributeDirective(DKind) + ? OMP_IDENT_WORK_DISTRIBUTE + : isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS), + getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), Args); } @@ -2702,6 +3191,8 @@ enum KmpTaskTFields { KmpTaskTStride, /// (Taskloops only) Is last iteration flag. KmpTaskTLastIter, + /// (Taskloops only) Reduction data. + KmpTaskTReductions, }; } // anonymous namespace @@ -2786,8 +3277,7 @@ createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, const RegionCodeGenTy &Codegen) { auto &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.VoidPtrTy); + ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); Args.push_back(&DummyPtr); CodeGenFunction CGF(CGM); @@ -2890,21 +3380,34 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { // descriptor, so we can reuse the logic that emits Ctors and Dtors. auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), - IdentInfo, C.CharTy); + IdentInfo, C.CharTy, ImplicitParamDecl::Other); auto *UnRegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), - Desc); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), + Desc); }); auto *RegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), - Desc); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), + Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); }); + if (CGM.supportsCOMDAT()) { + // It is sufficient to call registration function only once, so create a + // COMDAT group for registration/unregistration functions and associated + // data. That would reduce startup time and code size. Registration + // function serves as a COMDAT group key. + auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); + RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); + RegFn->setComdat(ComdatKey); + UnRegFn->setComdat(ComdatKey); + DeviceImages->setComdat(ComdatKey); + Desc->setComdat(ComdatKey); + } return RegFn; } @@ -2974,7 +3477,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create the offloading info metadata node. llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); - // Auxiliar methods to create metadata values and strings. + // Auxiliary methods to create metadata values and strings. auto getMDInt = [&](unsigned v) { return llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); @@ -3241,6 +3744,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, // kmp_uint64 ub; // kmp_int64 st; // kmp_int32 liter; + // void * reductions; // }; auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); UD->startDefinition(); @@ -3264,6 +3768,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, addFieldToRecordDecl(C, RD, KmpUInt64Ty); addFieldToRecordDecl(C, RD, KmpInt64Ty); addFieldToRecordDecl(C, RD, KmpInt32Ty); + addFieldToRecordDecl(C, RD, C.VoidPtrTy); } RD->completeDefinition(); return RD; @@ -3294,7 +3799,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, /// For taskloops: /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, -/// tt->shareds); +/// tt->reductions, tt->shareds); /// return 0; /// } /// \endcode @@ -3307,10 +3812,11 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, llvm::Value *TaskPrivatesMap) { auto &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); - ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, - KmpTaskTWithPrivatesPtrQTy.withRestrict()); + ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, + ImplicitParamDecl::Other); + ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + KmpTaskTWithPrivatesPtrQTy.withRestrict(), + ImplicitParamDecl::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); auto &TaskEntryFnInfo = @@ -3321,7 +3827,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, ".omp_task_entry.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, @@ -3379,14 +3884,19 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); auto LILVal = CGF.EmitLValueForField(Base, *LIFI); auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); + auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); + auto RLVal = CGF.EmitLValueForField(Base, *RFI); + auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal(); CallArgs.push_back(LBParam); CallArgs.push_back(UBParam); CallArgs.push_back(StParam); CallArgs.push_back(LIParam); + CallArgs.push_back(RParam); } CallArgs.push_back(SharedsParam); - CGF.EmitCallOrInvoke(TaskFunction, CallArgs); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, + CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); @@ -3401,10 +3911,11 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, QualType KmpTaskTWithPrivatesQTy) { auto &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); - ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, - KmpTaskTWithPrivatesPtrQTy.withRestrict()); + ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, + ImplicitParamDecl::Other); + ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + KmpTaskTWithPrivatesPtrQTy.withRestrict(), + ImplicitParamDecl::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); FunctionType::ExtInfo Info; @@ -3460,36 +3971,40 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, FunctionArgList Args; ImplicitParamDecl TaskPrivatesArg( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.getPointerType(PrivatesQTy).withConst().withRestrict()); + C.getPointerType(PrivatesQTy).withConst().withRestrict(), + ImplicitParamDecl::Other); Args.push_back(&TaskPrivatesArg); llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; unsigned Counter = 1; for (auto *E: PrivateVars) { Args.push_back(ImplicitParamDecl::Create( - C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) - .withConst() - .withRestrict())); + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(C.getPointerType(E->getType())) + .withConst() + .withRestrict(), + ImplicitParamDecl::Other)); auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } for (auto *E : FirstprivateVars) { Args.push_back(ImplicitParamDecl::Create( - C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) - .withConst() - .withRestrict())); + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(C.getPointerType(E->getType())) + .withConst() + .withRestrict(), + ImplicitParamDecl::Other)); auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } for (auto *E: LastprivateVars) { Args.push_back(ImplicitParamDecl::Create( - C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) - .withConst() - .withRestrict())); + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(C.getPointerType(E->getType())) + .withConst() + .withRestrict(), + ImplicitParamDecl::Other)); auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; @@ -3504,6 +4019,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, TaskPrivatesMapFnInfo); TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); + TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); @@ -3567,7 +4083,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF, auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), - SharedRefLValue.getType(), AlignmentSource::Decl); + SharedRefLValue.getType(), + LValueBaseInfo(AlignmentSource::Decl), + SharedRefLValue.getTBAAInfo()); QualType Type = OriginalVD->getType(); if (Type->isArrayType()) { // Initialize firstprivate array. @@ -3577,7 +4095,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, SharedRefLValue.getAddress(), Type); } else { // Initialize firstprivate array using element-by-element - // intialization. + // initialization. CGF.EmitOMPAggregateAssign( PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, [&CGF, Elem, Init, &CapturesInfo](Address DestElement, @@ -3646,12 +4164,14 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { auto &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); - ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); - ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, C.IntTy); + ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + KmpTaskTWithPrivatesPtrQTy, + ImplicitParamDecl::Other); + ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + KmpTaskTWithPrivatesPtrQTy, + ImplicitParamDecl::Other); + ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, + ImplicitParamDecl::Other); Args.push_back(&DstArg); Args.push_back(&SrcArg); Args.push_back(&LastprivArg); @@ -3761,9 +4281,20 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); // Build type kmp_task_t (if not built yet). - if (KmpTaskTQTy.isNull()) { - KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( - CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); + if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { + if (SavedKmpTaskloopTQTy.isNull()) { + SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( + CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); + } + KmpTaskTQTy = SavedKmpTaskloopTQTy; + } else { + assert(D.getDirectiveKind() == OMPD_task && + "Expected taskloop or task directive"); + if (SavedKmpTaskTQTy.isNull()) { + SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( + CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); + } + KmpTaskTQTy = SavedKmpTaskTQTy; } auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); // Build particular struct kmp_task_t for the given task. @@ -3780,9 +4311,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // Emit initial values for private copies (if any). llvm::Value *TaskPrivatesMap = nullptr; auto *TaskPrivatesMapTy = - std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), - 3) - ->getType(); + std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); TaskPrivatesMap = emitTaskPrivateMappingFunction( @@ -4053,8 +4582,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, - NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, - PrePostActionTy &) { + NumDependencies, &DepWaitTaskArgs, + Loc](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, @@ -4065,11 +4594,12 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), DepWaitTaskArgs); // Call proxy_task_entry(gtid, new_task); - auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, + Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; - CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, + OutlinedFnArgs); }; // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, @@ -4137,11 +4667,27 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), /*IsInitializer=*/true); + // Store reductions address. + LValue RedLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); + if (Data.Reductions) + CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); + else { + CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.getContext().VoidPtrTy); + } enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; llvm::Value *TaskArgs[] = { - UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), - UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), - llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), + UpLoc, + ThreadID, + Result.NewTask, + IfVal, + LBLVal.getPointer(), + UBLVal.getPointer(), + CGF.EmitLoadOfScalar(StLVal, SourceLocation()), + llvm::ConstantInt::getNullValue( + CGF.IntTy), // Always 0 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -4150,10 +4696,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, /*isSigned=*/false) : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), - Result.TaskDupFn - ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, - CGF.VoidPtrTy) - : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; + Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Result.TaskDupFn, CGF.VoidPtrTy) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); } @@ -4265,10 +4810,8 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction( // void reduction_func(void *LHSArg, void *RHSArg); FunctionArgList Args; - ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, - C.VoidPtrTy); - ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, - C.VoidPtrTy); + ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); @@ -4659,6 +5202,353 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } +/// Generates unique name for artificial threadprivate variables. +/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N> +static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, + unsigned N) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Prefix << "." << Loc.getRawEncoding() << "_" << N; + return Out.str(); +} + +/// Emits reduction initializer function: +/// \code +/// void @.red_init(void* %arg) { +/// %0 = bitcast void* %arg to <type>* +/// store <type> <init>, <type>* %0 +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) { + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&Param); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_init.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + Address PrivateAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&Param), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + LValue SharedLVal; + // If initializer uses initializer from declare reduction construct, emit a + // pointer to the address of the original reduction item (reuired by reduction + // initializer) + if (RCG.usesReductionInitializer(N)) { + Address SharedAddr = + CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().VoidPtrTy, + generateUniqueName("reduction", Loc, N)); + SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); + } else { + SharedLVal = CGF.MakeNaturalAlignAddrLValue( + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + CGM.getContext().VoidPtrTy); + } + // Emit the initializer: + // %0 = bitcast void* %arg to <type>* + // store <type> <init>, <type>* %0 + RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, + [](CodeGenFunction &) { return false; }); + CGF.FinishFunction(); + return Fn; +} + +/// Emits reduction combiner function: +/// \code +/// void @.red_comb(void* %arg0, void* %arg1) { +/// %lhs = bitcast void* %arg0 to <type>* +/// %rhs = bitcast void* %arg1 to <type>* +/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) +/// store <type> %2, <type>* %lhs +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N, + const Expr *ReductionOp, + const Expr *LHS, const Expr *RHS, + const Expr *PrivateRef) { + auto &C = CGM.getContext(); + auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); + auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); + FunctionArgList Args; + ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&ParamInOut); + Args.emplace_back(&ParamIn); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_comb.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + // Remap lhs and rhs variables to the addresses of the function arguments. + // %lhs = bitcast void* %arg0 to <type>* + // %rhs = bitcast void* %arg1 to <type>* + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamInOut), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + return CGF.Builder.CreateElementBitCast( + PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); + }); + PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamIn), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + return CGF.Builder.CreateElementBitCast( + PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); + }); + PrivateScope.Privatize(); + // Emit the combiner body: + // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) + // store <type> %2, <type>* %lhs + CGM.getOpenMPRuntime().emitSingleReductionCombiner( + CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), + cast<DeclRefExpr>(RHS)); + CGF.FinishFunction(); + return Fn; +} + +/// Emits reduction finalizer function: +/// \code +/// void @.red_fini(void* %arg) { +/// %0 = bitcast void* %arg to <type>* +/// <destroy>(<type>* %0) +/// ret void +/// } +/// \endcode +static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, + SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N) { + if (!RCG.needCleanups(N)) + return nullptr; + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.emplace_back(&Param); + auto &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + ".red_fini.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + Address PrivateAddr = CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&Param), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + llvm::Value *Size = nullptr; + // If the size of the reduction item is non-constant, load it from global + // threadprivate variable. + if (RCG.getSizes(N).second) { + Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + Size = + CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), SourceLocation()); + } + RCG.emitAggregateType(CGF, N, Size); + // Emit the finalizer body: + // <destroy>(<type>* %0) + RCG.emitCleanups(CGF, N, PrivateAddr); + CGF.FinishFunction(); + return Fn; +} + +llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { + if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) + return nullptr; + + // Build typedef struct: + // kmp_task_red_input { + // void *reduce_shar; // shared reduction item + // size_t reduce_size; // size of data item + // void *reduce_init; // data initialization routine + // void *reduce_fini; // data finalization routine + // void *reduce_comb; // data combiner routine + // kmp_task_red_flags_t flags; // flags for additional info from compiler + // } kmp_task_red_input_t; + ASTContext &C = CGM.getContext(); + auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); + RD->startDefinition(); + const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); + const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *FlagsFD = addFieldToRecordDecl( + C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); + RD->completeDefinition(); + QualType RDType = C.getRecordType(RD); + unsigned Size = Data.ReductionVars.size(); + llvm::APInt ArraySize(/*numBits=*/64, Size); + QualType ArrayRDType = C.getConstantArrayType( + RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + // kmp_task_red_input_t .rd_input.[Size]; + Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); + ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { + // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; + llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), + llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; + llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( + TaskRedInput.getPointer(), Idxs, + /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, + ".rd_input.gep."); + LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); + // ElemLVal.reduce_shar = &Shareds[Cnt]; + LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); + RCG.emitSharedLValue(CGF, Cnt); + llvm::Value *CastedShared = + CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); + CGF.EmitStoreOfScalar(CastedShared, SharedLVal); + RCG.emitAggregateType(CGF, Cnt); + llvm::Value *SizeValInChars; + llvm::Value *SizeVal; + std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); + // We use delayed creation/initialization for VLAs, array sections and + // custom reduction initializations. It is required because runtime does not + // provide the way to pass the sizes of VLAs/array sections to + // initializer/combiner/finalizer functions and does not pass the pointer to + // original reduction item to the initializer. Instead threadprivate global + // variables are used to store these values and use them in the functions. + bool DelayedCreation = !!SizeVal; + SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, + /*isSigned=*/false); + LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); + CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); + // ElemLVal.reduce_init = init; + LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); + llvm::Value *InitAddr = + CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); + CGF.EmitStoreOfScalar(InitAddr, InitLVal); + DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); + // ElemLVal.reduce_fini = fini; + LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); + llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); + llvm::Value *FiniAddr = Fini + ? CGF.EmitCastToVoidPtr(Fini) + : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); + // ElemLVal.reduce_comb = comb; + LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); + llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( + CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], + RHSExprs[Cnt], Data.ReductionCopies[Cnt])); + CGF.EmitStoreOfScalar(CombAddr, CombLVal); + // ElemLVal.flags = 0; + LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); + if (DelayedCreation) { + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), + FlagsLVal); + } else + CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); + } + // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void + // *data); + llvm::Value *Args[] = { + CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, + /*isSigned=*/true), + llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), + CGM.VoidPtrTy)}; + return CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); +} + +void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, + SourceLocation Loc, + ReductionCodeGen &RCG, + unsigned N) { + auto Sizes = RCG.getSizes(N); + // Emit threadprivate global variable if the type is non-constant + // (Sizes.second = nullptr). + if (Sizes.second) { + llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, + /*isSigned=*/false); + Address SizeAddr = getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().getSizeType(), + generateUniqueName("reduction_size", Loc, N)); + CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); + } + // Store address of the original reduction item if custom initializer is used. + if (RCG.usesReductionInitializer(N)) { + Address SharedAddr = getAddrOfArtificialThreadPrivate( + CGF, CGM.getContext().VoidPtrTy, + generateUniqueName("reduction", Loc, N)); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), + SharedAddr, /*IsVolatile=*/false); + } +} + +Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal) { + // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void + // *d); + llvm::Value *Args[] = { + CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, + /*isSigned=*/true), + ReductionsPtr, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), + CGM.VoidPtrTy)}; + return Address( + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), + SharedLVal.getAlignment()); +} + void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) @@ -5101,6 +5991,8 @@ public: OMP_MAP_PRIVATE_PTR = 0x80, /// \brief Pass the element to the device by value. OMP_MAP_PRIVATE_VAL = 0x100, + /// Implicit map + OMP_MAP_IMPLICIT = 0x200, }; /// Class that associates information with a base pointer to be passed to the @@ -5271,7 +6163,7 @@ private: OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, - bool IsFirstComponentList) const { + bool IsFirstComponentList, bool IsImplicit) const { // The following summarizes what has to be generated for each map and the // types bellow. The generated information is expressed in this order: @@ -5406,8 +6298,7 @@ private: } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) - .getPointer(); + BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); // If the variable is a pointer and is being dereferenced (i.e. is not // the last component), the base has to be the pointer itself, not its @@ -5426,6 +6317,7 @@ private: } } + unsigned DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0; for (; I != CE; ++I) { auto Next = std::next(I); @@ -5460,7 +6352,8 @@ private: isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && "Unexpected expression"); - auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); + llvm::Value *LB = + CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); auto *Size = getExprTypeSize(I->getAssociatedExpression()); // If we have a member expression and the current component is a @@ -5475,9 +6368,11 @@ private: BasePointers.push_back(BP); Pointers.push_back(RefAddr); Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); - Types.push_back(getMapTypeBits( - /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, - !IsExpressionFirstInfo, IsCaptureFirstInfo)); + Types.push_back(DefaultFlags | + getMapTypeBits( + /*MapType*/ OMPC_MAP_alloc, + /*MapTypeModifier=*/OMPC_MAP_unknown, + !IsExpressionFirstInfo, IsCaptureFirstInfo)); IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; // The reference will be the next base address. @@ -5492,9 +6387,9 @@ private: // same expression except for the first one. We also need to signal // this map is the first one that relates with the current capture // (there is a set of entries for each capture). - Types.push_back(getMapTypeBits(MapType, MapTypeModifier, - !IsExpressionFirstInfo, - IsCaptureFirstInfo)); + Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier, + !IsExpressionFirstInfo, + IsCaptureFirstInfo)); // If we have a final array section, we are done with this expression. if (IsFinalArraySection) @@ -5506,7 +6401,6 @@ private: IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; - continue; } } } @@ -5568,26 +6462,25 @@ public: RPK_MemberReference, }; OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - OpenMPMapClauseKind MapType; - OpenMPMapClauseKind MapTypeModifier; - ReturnPointerKind ReturnDevicePointer; + OpenMPMapClauseKind MapType = OMPC_MAP_unknown; + OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; + ReturnPointerKind ReturnDevicePointer = RPK_None; + bool IsImplicit = false; - MapInfo() - : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), - ReturnDevicePointer(RPK_None) {} + MapInfo() = default; MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, - ReturnPointerKind ReturnDevicePointer) + ReturnPointerKind ReturnDevicePointer, bool IsImplicit) : Components(Components), MapType(MapType), MapTypeModifier(MapTypeModifier), - ReturnDevicePointer(ReturnDevicePointer) {} + ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} }; // We have to process the component lists that relate with the same // declaration in a single chunk so that we can generate the map flags // correctly. Therefore, we organize all lists in a map. - llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info; + llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; // Helper function to fill the information map for the different supported // clauses. @@ -5595,25 +6488,29 @@ public: const ValueDecl *D, OMPClauseMappableExprCommon::MappableExprComponentListRef L, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, - MapInfo::ReturnPointerKind ReturnDevicePointer) { + MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) { const ValueDecl *VD = D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; - Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); + Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, + IsImplicit); }; // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) - for (auto L : C->component_lists()) + for (auto L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), - MapInfo::RPK_None); + MapInfo::RPK_None, C->isImplicit()); + } for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) - for (auto L : C->component_lists()) + for (auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, - MapInfo::RPK_None); + MapInfo::RPK_None, C->isImplicit()); + } for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) - for (auto L : C->component_lists()) + for (auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, - MapInfo::RPK_None); + MapInfo::RPK_None, C->isImplicit()); + } // Look at the use_device_ptr clause information and mark the existing map // entries as such. If there is no map information for an entry in the @@ -5674,9 +6571,9 @@ public: // Remember the current base pointer index. unsigned CurrentBasePointersIdx = BasePointers.size(); // FIXME: MSVC 2013 seems to require this-> to find the member method. - this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, - L.Components, BasePointers, Pointers, - Sizes, Types, IsFirstComponentList); + this->generateInfoForComponentList( + L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, + Sizes, Types, IsFirstComponentList, L.IsImplicit); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -5740,7 +6637,8 @@ public: for (auto L : It->second) { generateInfoForComponentList( /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, - BasePointers, Pointers, Sizes, Types, IsFirstComponentList); + BasePointers, Pointers, Sizes, Types, IsFirstComponentList, + /*IsImplicit=*/false); IsFirstComponentList = false; } return; @@ -5760,9 +6658,9 @@ public: "We got information for the wrong declaration??"); assert(!L.second.empty() && "Not expecting declaration with no component lists."); - generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), - L.second, BasePointers, Pointers, Sizes, - Types, IsFirstComponentList); + generateInfoForComponentList( + C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, + Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit()); IsFirstComponentList = false; } @@ -5911,16 +6809,11 @@ emitOffloadingArrays(CodeGenFunction &CGF, for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { llvm::Value *BPVal = *BasePointers[i]; - if (BPVal->getType()->isPointerTy()) - BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); - else { - assert(BPVal->getType()->isIntegerTy() && - "If not a pointer, the value type must be an integer."); - BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); - } llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), Info.BasePointersArray, 0, i); + BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(BPVal, BPAddr); @@ -5929,16 +6822,11 @@ emitOffloadingArrays(CodeGenFunction &CGF, Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); llvm::Value *PVal = Pointers[i]; - if (PVal->getType()->isPointerTy()) - PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); - else { - assert(PVal->getType()->isIntegerTy() && - "If not a pointer, the value type must be an integer."); - PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); - } llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), Info.PointersArray, 0, i); + P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(PVal, PAddr); @@ -6001,8 +6889,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, assert(OutlinedFn && "Invalid outlined function!"); - auto &Ctx = CGF.getContext(); - // Fill up the arrays with all the captured variables. MappableExprsHandler::MapValuesArrayTy KernelArgs; MappableExprsHandler::MapBaseValuesArrayTy BasePointers; @@ -6024,9 +6910,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), CE = CS.capture_end(); CI != CE; ++CI, ++RI, ++CV) { - StringRef Name; - QualType Ty; - CurBasePointers.clear(); CurPointers.clear(); CurSizes.clear(); @@ -6067,19 +6950,10 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); } - // Keep track on whether the host function has to be executed. - auto OffloadErrorQType = - Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); - auto OffloadError = CGF.MakeAddrLValue( - CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), - OffloadErrorQType); - CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), - OffloadError); - // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device, - OutlinedFnID, OffloadError, - &D](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, + OutlinedFn, OutlinedFnID, &D, + &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); // Emit the offloading arrays. TargetDataInfo Info; @@ -6170,13 +7044,26 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, OffloadingArgs); } - CGF.EmitStoreOfScalar(Return, OffloadError); + // Check the error code and execute the host version if required. + llvm::BasicBlock *OffloadFailedBlock = + CGF.createBasicBlock("omp_offload.failed"); + llvm::BasicBlock *OffloadContBlock = + CGF.createBasicBlock("omp_offload.cont"); + llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); + CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); + + CGF.EmitBlock(OffloadFailedBlock); + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs); + CGF.EmitBranch(OffloadContBlock); + + CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); }; // Notify that the host version must be executed. - auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), - OffloadError); + auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF, + PrePostActionTy &) { + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, + KernelArgs); }; // If we have a target function ID it means that we need to support @@ -6194,19 +7081,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, RegionCodeGenTy ElseRCG(ElseGen); ElseRCG(CGF); } - - // Check the error code and execute the host version if required. - auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); - auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); - auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); - auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); - CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); - - CGF.EmitBlock(OffloadFailedBlock); - CGF.Builder.CreateCall(OutlinedFn, KernelArgs); - CGF.EmitBranch(OffloadContBlock); - - CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); } void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, @@ -6311,7 +7185,7 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { } } - // If we are in target mode we do not emit any global (declare target is not + // If we are in target mode, we do not emit any global (declare target is not // implemented yet). Therefore we signal that GD was processed in this case. return true; } @@ -6923,3 +7797,29 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, CGF.EmitRuntimeCall(RTLFn, Args); } +void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee, + ArrayRef<llvm::Value *> Args, + SourceLocation Loc) const { + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); + + if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { + if (Fn->doesNotThrow()) { + CGF.EmitNounwindRuntimeCall(Fn, Args); + return; + } + } + CGF.EmitRuntimeCall(Callee, Args); +} + +void CGOpenMPRuntime::emitOutlinedFunctionCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args) const { + assert(Loc.isValid() && "Outlined function call location must be valid."); + emitCall(CGF, OutlinedFn, Args, Loc); +} + +Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, + const VarDecl *NativeParam, + const VarDecl *TargetParam) const { + return CGF.GetAddrOfLocalVar(NativeParam); +} diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index dc62f2b6d5..94a1438413 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -96,15 +96,106 @@ struct OMPTaskDataTy final { SmallVector<const Expr *, 4> FirstprivateInits; SmallVector<const Expr *, 4> LastprivateVars; SmallVector<const Expr *, 4> LastprivateCopies; + SmallVector<const Expr *, 4> ReductionVars; + SmallVector<const Expr *, 4> ReductionCopies; + SmallVector<const Expr *, 4> ReductionOps; SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences; llvm::PointerIntPair<llvm::Value *, 1, bool> Final; llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule; llvm::PointerIntPair<llvm::Value *, 1, bool> Priority; + llvm::Value *Reductions = nullptr; unsigned NumberOfParts = 0; bool Tied = true; bool Nogroup = false; }; +/// Class intended to support codegen of all kind of the reduction clauses. +class ReductionCodeGen { +private: + /// Data required for codegen of reduction clauses. + struct ReductionData { + /// Reference to the original shared item. + const Expr *Ref = nullptr; + /// Helper expression for generation of private copy. + const Expr *Private = nullptr; + /// Helper expression for generation reduction operation. + const Expr *ReductionOp = nullptr; + ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp) + : Ref(Ref), Private(Private), ReductionOp(ReductionOp) {} + }; + /// List of reduction-based clauses. + SmallVector<ReductionData, 4> ClausesData; + + /// List of addresses of original shared variables/expressions. + SmallVector<std::pair<LValue, LValue>, 4> SharedAddresses; + /// Sizes of the reduction items in chars. + SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> Sizes; + /// Base declarations for the reduction items. + SmallVector<const VarDecl *, 4> BaseDecls; + + /// Emits lvalue for shared expresion. + LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E); + /// Emits upper bound for shared expression (if array section). + LValue emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E); + /// Performs aggregate initialization. + /// \param N Number of reduction item in the common list. + /// \param PrivateAddr Address of the corresponding private item. + /// \param SharedLVal Address of the original shared variable. + /// \param DRD Declare reduction construct used for reduction item. + void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr, LValue SharedLVal, + const OMPDeclareReductionDecl *DRD); + +public: + ReductionCodeGen(ArrayRef<const Expr *> Shareds, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> ReductionOps); + /// Emits lvalue for a reduction item. + /// \param N Number of the reduction item. + void emitSharedLValue(CodeGenFunction &CGF, unsigned N); + /// Emits the code for the variable-modified type, if required. + /// \param N Number of the reduction item. + void emitAggregateType(CodeGenFunction &CGF, unsigned N); + /// Emits the code for the variable-modified type, if required. + /// \param N Number of the reduction item. + /// \param Size Size of the type in chars. + void emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size); + /// Performs initialization of the private copy for the reduction item. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + /// \param DefaultInit Default initialization sequence that should be + /// performed if no reduction specific initialization is found. + /// \param SharedLVal Address of the original shared variable. + void + emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, + LValue SharedLVal, + llvm::function_ref<bool(CodeGenFunction &)> DefaultInit); + /// Returns true if the private copy requires cleanups. + bool needCleanups(unsigned N); + /// Emits cleanup code for the reduction item. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr); + /// Adjusts \p PrivatedAddr for using instead of the original variable + /// address in normal operations. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr); + /// Returns LValue for the reduction item. + LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; } + /// Returns the size of the reduction item (in chars and total number of + /// elements in the item), or nullptr, if the size is a constant. + std::pair<llvm::Value *, llvm::Value *> getSizes(unsigned N) const { + return Sizes[N]; + } + /// Returns the base declaration of the reduction item. + const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; } + /// Returns true if the initialization of the reduction item uses initializer + /// from declare reduction construct. + bool usesReductionInitializer(unsigned N) const; +}; + class CGOpenMPRuntime { protected: CodeGenModule &CGM; @@ -121,7 +212,7 @@ protected: /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. /// \param CodeGen Lambda codegen specific to an accelerator device. - /// An oulined function may not be an entry if, e.g. the if clause always + /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, @@ -159,6 +250,11 @@ protected: // virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; } + /// Emits \p Callee function call with arguments \p Args with location \p Loc. + void emitCall(CodeGenFunction &CGF, llvm::Value *Callee, + ArrayRef<llvm::Value *> Args = llvm::None, + SourceLocation Loc = SourceLocation()) const; + private: /// \brief Default const ident_t object used for initialization of all other /// ident_t objects. @@ -222,6 +318,10 @@ private: /// deconstructors of firstprivate C++ objects */ /// } kmp_task_t; QualType KmpTaskTQTy; + /// Saved kmp_task_t for task directive. + QualType SavedKmpTaskTQTy; + /// Saved kmp_task_t for taskloop-based directive. + QualType SavedKmpTaskloopTQTy; /// \brief Type typedef struct kmp_depend_info { /// kmp_intptr_t base_addr; /// size_t len; @@ -672,16 +772,26 @@ public: /// virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const; - virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, - const OpenMPScheduleTy &ScheduleKind, - unsigned IVSize, bool IVSigned, bool Ordered, - llvm::Value *UB, - llvm::Value *Chunk = nullptr); + /// struct with the values to be passed to the dispatch runtime function + struct DispatchRTInput { + /// Loop lower bound + llvm::Value *LB = nullptr; + /// Loop upper bound + llvm::Value *UB = nullptr; + /// Chunk size specified using 'schedule' clause (nullptr if chunk + /// was not specified) + llvm::Value *Chunk = nullptr; + DispatchRTInput() = default; + DispatchRTInput(llvm::Value *LB, llvm::Value *UB, llvm::Value *Chunk) + : LB(LB), UB(UB), Chunk(Chunk) {} + }; - /// \brief Call the appropriate runtime routine to initialize it before start + /// Call the appropriate runtime routine to initialize it before start /// of loop. - /// - /// Depending on the loop schedule, it is nesessary to call some runtime + + /// This is used for non static scheduled types and when the ordered + /// clause is present on the loop construct. + /// Depending on the loop schedule, it is necessary to call some runtime /// routine before start of the OpenMP loop to get the loop upper / lower /// bounds \a LB and \a UB and stride \a ST. /// @@ -689,49 +799,76 @@ public: /// \param Loc Clang source location. /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param Ordered true if loop is ordered, false otherwise. - /// \param IL Address of the output variable in which the flag of the - /// last iteration is returned. - /// \param LB Address of the output variable in which the lower iteration - /// number is returned. - /// \param UB Address of the output variable in which the upper iteration - /// number is returned. - /// \param ST Address of the output variable in which the stride value is - /// returned nesessary to generated the static_chunked scheduled loop. - /// \param Chunk Value of the chunk for the static_chunked scheduled loop. + /// \param DispatchValues struct containing llvm values for lower bound, upper + /// bound, and chunk expression. /// For the default (nullptr) value, the chunk 1 will be used. /// + virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, + const OpenMPScheduleTy &ScheduleKind, + unsigned IVSize, bool IVSigned, bool Ordered, + const DispatchRTInput &DispatchValues); + + /// Struct with the values to be passed to the static runtime function + struct StaticRTInput { + /// Size of the iteration variable in bits. + unsigned IVSize = 0; + /// Sign of the iteration variable. + bool IVSigned = false; + /// true if loop is ordered, false otherwise. + bool Ordered = false; + /// Address of the output variable in which the flag of the last iteration + /// is returned. + Address IL = Address::invalid(); + /// Address of the output variable in which the lower iteration number is + /// returned. + Address LB = Address::invalid(); + /// Address of the output variable in which the upper iteration number is + /// returned. + Address UB = Address::invalid(); + /// Address of the output variable in which the stride value is returned + /// necessary to generated the static_chunked scheduled loop. + Address ST = Address::invalid(); + /// Value of the chunk for the static_chunked scheduled loop. For the + /// default (nullptr) value, the chunk 1 will be used. + llvm::Value *Chunk = nullptr; + StaticRTInput(unsigned IVSize, bool IVSigned, bool Ordered, Address IL, + Address LB, Address UB, Address ST, + llvm::Value *Chunk = nullptr) + : IVSize(IVSize), IVSigned(IVSigned), Ordered(Ordered), IL(IL), LB(LB), + UB(UB), ST(ST), Chunk(Chunk) {} + }; + /// \brief Call the appropriate runtime routine to initialize it before start + /// of loop. + /// + /// This is used only in case of static schedule, when the user did not + /// specify a ordered clause on the loop construct. + /// Depending on the loop schedule, it is necessary to call some runtime + /// routine before start of the OpenMP loop to get the loop upper / lower + /// bounds LB and UB and stride ST. + /// + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param DKind Kind of the directive. + /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. + /// \param Values Input arguments for the construct. + /// virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, - unsigned IVSize, bool IVSigned, bool Ordered, - Address IL, Address LB, Address UB, Address ST, - llvm::Value *Chunk = nullptr); + const StaticRTInput &Values); /// /// \param CGF Reference to current CodeGenFunction. /// \param Loc Clang source location. /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause. - /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. - /// \param Ordered true if loop is ordered, false otherwise. - /// \param IL Address of the output variable in which the flag of the - /// last iteration is returned. - /// \param LB Address of the output variable in which the lower iteration - /// number is returned. - /// \param UB Address of the output variable in which the upper iteration - /// number is returned. - /// \param ST Address of the output variable in which the stride value is - /// returned nesessary to generated the static_chunked scheduled loop. - /// \param Chunk Value of the chunk for the static_chunked scheduled loop. - /// For the default (nullptr) value, the chunk 1 will be used. + /// \param Values Input arguments for the construct. /// - virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, + virtual void emitDistributeStaticInit(CodeGenFunction &CGF, + SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, - unsigned IVSize, bool IVSigned, - bool Ordered, Address IL, Address LB, - Address UB, Address ST, - llvm::Value *Chunk = nullptr); + const StaticRTInput &Values); /// \brief Call the appropriate runtime routine to notify that we finished /// iteration of the ordered loop with the dynamic scheduling. @@ -739,7 +876,7 @@ public: /// \param CGF Reference to current CodeGenFunction. /// \param Loc Clang source location. /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, @@ -750,15 +887,17 @@ public: /// /// \param CGF Reference to current CodeGenFunction. /// \param Loc Clang source location. + /// \param DKind Kind of the directive for which the static finish is emitted. /// - virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc); + virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind DKind); /// Call __kmpc_dispatch_next( /// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, /// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, /// kmp_int[32|64] *p_stride); /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. + /// \param IVSigned Sign of the iteration variable. /// \param IL Address of the output variable in which the flag of the /// last iteration is returned. /// \param LB Address of the output variable in which the lower iteration @@ -810,6 +949,14 @@ public: SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF = nullptr); + /// Creates artificial threadprivate variable with name \p Name and type \p + /// VarType. + /// \param VarType Type of the artificial threadprivate variable. + /// \param Name Name of the artificial threadprivate variable. + virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, + QualType VarType, + StringRef Name); + /// \brief Emit flush of the variables specified in 'omp flush' directive. /// \param Vars List of variables to flush. virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars, @@ -968,6 +1115,51 @@ public: ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options); + /// Emit a code for initialization of task reduction clause. Next code + /// should be emitted for reduction: + /// \code + /// + /// _task_red_item_t red_data[n]; + /// ... + /// red_data[i].shar = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit<i>; + /// red_data[i].f_fini = (void*)RedDest<i>; + /// red_data[i].f_comb = (void*)RedOp<i>; + /// red_data[i].flags = <Flag_i>; + /// ... + /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// \endcode + /// + /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. + /// \param Data Additional data for task generation like tiedness, final + /// state, list of privates, reductions etc. + virtual llvm::Value *emitTaskReductionInit(CodeGenFunction &CGF, + SourceLocation Loc, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + const OMPTaskDataTy &Data); + + /// Required to resolve existing problems in the runtime. Emits threadprivate + /// variables to store the size of the VLAs/array sections for + /// initializer/combiner/finalizer functions + emits threadprivate variable to + /// store the pointer to the original reduction item for the custom + /// initializer defined by declare reduction construct. + /// \param RCG Allows to reuse an existing data for the reductions. + /// \param N Reduction item for which fixups must be emitted. + virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, + ReductionCodeGen &RCG, unsigned N); + + /// Get the address of `void *` type of the privatue copy of the reduction + /// item specified by the \p SharedLVal. + /// \param ReductionsPtr Pointer to the reduction data returned by the + /// emitTaskReductionInit function. + /// \param SharedLVal Address of the original reduction item. + virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal); + /// \brief Emit code for 'taskwait' directive. virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc); @@ -995,7 +1187,7 @@ public: /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. /// \param CodeGen Code generation sequence for the \a D directive. - /// An oulined function may not be an entry if, e.g. the if clause always + /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, @@ -1034,7 +1226,7 @@ public: virtual bool emitTargetGlobalVariable(GlobalDecl GD); /// \brief Emit the global \a GD if it is meaningful for the target. Returns - /// if it was emitted succesfully. + /// if it was emitted successfully. /// \param GD Global to scan. virtual bool emitTargetGlobal(GlobalDecl GD); @@ -1146,6 +1338,30 @@ public: /// \param C 'depend' clause with 'sink|source' dependency kind. virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C); + + /// Translates the native parameter of outlined function if this is required + /// for target. + /// \param FD Field decl from captured record for the paramater. + /// \param NativeParam Parameter itself. + virtual const VarDecl *translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const { + return NativeParam; + } + + /// Gets the address of the native argument basing on the address of the + /// target-specific parameter. + /// \param NativeParam Parameter itself. + /// \param TargetParam Corresponding target-specific parameter. + virtual Address getParameterAddress(CodeGenFunction &CGF, + const VarDecl *NativeParam, + const VarDecl *TargetParam) const; + + /// Emits call of the outlined function with the provided arguments, + /// translating these arguments to correct target-specific arguments. + virtual void + emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args = llvm::None) const; }; } // namespace CodeGen diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index c3391d087b..884bcc1c2d 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -150,20 +150,18 @@ enum NamedBarrier : unsigned { /// Get the GPU warp size. static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateCall( + return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), - llvm::None, "nvptx_warp_size"); + "nvptx_warp_size"); } /// Get the id of the current thread on the GPU. static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateCall( + return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), - llvm::None, "nvptx_tid"); + "nvptx_tid"); } /// Get the id of the warp in the block. @@ -185,17 +183,15 @@ static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { /// Get the maximum number of threads in a block of the GPU. static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateCall( + return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), - llvm::None, "nvptx_num_threads"); + "nvptx_num_threads"); } /// Get barrier to synchronize all threads in a block. static void getNVPTXCTABarrier(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - Bld.CreateCall(llvm::Intrinsic::getDeclaration( + CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); } @@ -205,9 +201,9 @@ static void getNVPTXBarrier(CodeGenFunction &CGF, int ID, llvm::Value *NumThreads) { CGBuilderTy &Bld = CGF.Builder; llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads}; - Bld.CreateCall(llvm::Intrinsic::getDeclaration(&CGF.CGM.getModule(), - llvm::Intrinsic::nvvm_barrier), - Args); + CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration( + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier), + Args); } /// Synchronize all GPU threads in a block. @@ -345,7 +341,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF, Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); CGF.EmitBlock(WorkerBB); - CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None); + emitCall(CGF, WST.WorkerFn); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(MasterCheckBB); @@ -555,7 +551,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C=*/0)); llvm::Value *FnArgs[] = {ZeroAddr.getPointer(), ZeroAddr.getPointer()}; - CGF.EmitCallOrInvoke(Fn, FnArgs); + emitCall(CGF, Fn, FnArgs); // Go to end of parallel region. CGF.EmitBranch(TerminateBB); @@ -861,6 +857,7 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal); OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); + OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); return OutlinedFun; @@ -882,7 +879,7 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); + emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } void CGOpenMPRuntimeNVPTX::emitParallelCall( @@ -931,10 +928,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( auto *ThreadID = getThreadID(CGF, Loc); llvm::Value *Args[] = {RTLoc, ThreadID}; - auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF, - PrePostActionTy &) { - auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF, + PrePostActionTy &) { + auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF, + PrePostActionTy &Action) { Action.Enter(CGF); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; @@ -943,7 +940,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( OutlinedFnArgs.push_back( llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - CGF.EmitCallOrInvoke(Fn, OutlinedFnArgs); + emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs); }; RegionCodeGenTy RCG(CodeGen); @@ -979,7 +976,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( OutlinedFnArgs.push_back( llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); + emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } /// This function creates calls to one of two shuffle functions to copy @@ -1243,32 +1240,27 @@ static void emitReductionListCopy( /// local = local @ remote /// else /// local = remote -llvm::Value *emitReduceScratchpadFunction(CodeGenModule &CGM, - ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy, - llvm::Value *ReduceFn) { +static llvm::Value * +emitReduceScratchpadFunction(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, llvm::Value *ReduceFn) { auto &C = CGM.getContext(); auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); // Destination of the copy. - ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.VoidPtrTy); + ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); // Base address of the scratchpad array, with each element storing a // Reduce list per team. - ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.VoidPtrTy); + ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); // A source index into the scratchpad array. - ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, Int32Ty); + ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other); // Row width of an element in the scratchpad array, typically // the number of teams. - ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, Int32Ty); + ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other); // If should_reduce == 1, then it's load AND reduce, // If should_reduce == 0 (or otherwise), then it only loads (+ copy). // The latter case is used for initialization. - ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, Int32Ty); + ImplicitParamDecl ShouldReduceArg(C, Int32Ty, ImplicitParamDecl::Other); FunctionArgList Args; Args.push_back(&ReduceListArg); @@ -1372,28 +1364,24 @@ llvm::Value *emitReduceScratchpadFunction(CodeGenModule &CGM, /// for elem in Reduce List: /// scratchpad[elem_id][index] = elem /// -llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, - ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy) { +static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, + ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy) { auto &C = CGM.getContext(); auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); // Source of the copy. - ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.VoidPtrTy); + ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); // Base address of the scratchpad array, with each element storing a // Reduce list per team. - ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.VoidPtrTy); + ImplicitParamDecl ScratchPadArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); // A destination index into the scratchpad array, typically the team // identifier. - ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, Int32Ty); + ImplicitParamDecl IndexArg(C, Int32Ty, ImplicitParamDecl::Other); // Row width of an element in the scratchpad array, typically // the number of teams. - ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, Int32Ty); + ImplicitParamDecl WidthArg(C, Int32Ty, ImplicitParamDecl::Other); FunctionArgList Args; Args.push_back(&ReduceListArg); @@ -1474,13 +1462,12 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, // ReduceList: thread local Reduce list. // At the stage of the computation when this function is called, partially // aggregated values reside in the first lane of every active warp. - ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.VoidPtrTy); + ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); // NumWarps: number of warps active in the parallel region. This could // be smaller than 32 (max warps in a CTA) for partial block reduction. - ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, - C.getIntTypeForBitwidth(32, /* Signed */ true)); + ImplicitParamDecl NumWarpsArg(C, + C.getIntTypeForBitwidth(32, /* Signed */ true), + ImplicitParamDecl::Other); FunctionArgList Args; Args.push_back(&ReduceListArg); Args.push_back(&NumWarpsArg); @@ -1722,17 +1709,14 @@ emitShuffleAndReduceFunction(CodeGenModule &CGM, auto &C = CGM.getContext(); // Thread local Reduce list used to host the values of data to be reduced. - ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.VoidPtrTy); + ImplicitParamDecl ReduceListArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); // Current lane id; could be logical. - ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.ShortTy); + ImplicitParamDecl LaneIDArg(C, C.ShortTy, ImplicitParamDecl::Other); // Offset of the remote source lane relative to the current lane. - ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.ShortTy); + ImplicitParamDecl RemoteLaneOffsetArg(C, C.ShortTy, + ImplicitParamDecl::Other); // Algorithm version. This is expected to be known at compile time. - ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, SourceLocation(), - /*Id=*/nullptr, C.ShortTy); + ImplicitParamDecl AlgoVerArg(C, C.ShortTy, ImplicitParamDecl::Other); FunctionArgList Args; Args.push_back(&ReduceListArg); Args.push_back(&LaneIDArg); @@ -2250,3 +2234,86 @@ void CGOpenMPRuntimeNVPTX::emitReduction( CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } + +const VarDecl * +CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const { + if (!NativeParam->getType()->isReferenceType()) + return NativeParam; + QualType ArgType = NativeParam->getType(); + QualifierCollector QC; + const Type *NonQualTy = QC.strip(ArgType); + QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType(); + if (const auto *Attr = FD->getAttr<OMPCaptureKindAttr>()) { + if (Attr->getCaptureKind() == OMPC_map) { + PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy, + LangAS::opencl_global); + } + } + ArgType = CGM.getContext().getPointerType(PointeeTy); + QC.addRestrict(); + enum { NVPTX_local_addr = 5 }; + QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr)); + ArgType = QC.apply(CGM.getContext(), ArgType); + return ImplicitParamDecl::Create( + CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(), + NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other); +} + +Address +CGOpenMPRuntimeNVPTX::getParameterAddress(CodeGenFunction &CGF, + const VarDecl *NativeParam, + const VarDecl *TargetParam) const { + assert(NativeParam != TargetParam && + NativeParam->getType()->isReferenceType() && + "Native arg must not be the same as target arg."); + Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam); + QualType NativeParamType = NativeParam->getType(); + QualifierCollector QC; + const Type *NonQualTy = QC.strip(NativeParamType); + QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType(); + unsigned NativePointeeAddrSpace = + CGF.getContext().getTargetAddressSpace(NativePointeeTy); + QualType TargetTy = TargetParam->getType(); + llvm::Value *TargetAddr = CGF.EmitLoadOfScalar( + LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation()); + // First cast to generic. + TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo( + /*AddrSpace=*/0)); + // Cast from generic to native address space. + TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo( + NativePointeeAddrSpace)); + Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType); + CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false, + NativeParamType); + return NativeParamAddr; +} + +void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args) const { + SmallVector<llvm::Value *, 4> TargetArgs; + TargetArgs.reserve(Args.size()); + auto *FnType = + cast<llvm::FunctionType>(OutlinedFn->getType()->getPointerElementType()); + for (unsigned I = 0, E = Args.size(); I < E; ++I) { + if (FnType->isVarArg() && FnType->getNumParams() <= I) { + TargetArgs.append(std::next(Args.begin(), I), Args.end()); + break; + } + llvm::Type *TargetType = FnType->getParamType(I); + llvm::Value *NativeArg = Args[I]; + if (!TargetType->isPointerTy()) { + TargetArgs.emplace_back(NativeArg); + continue; + } + llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo( + /*AddrSpace=*/0)); + TargetArgs.emplace_back( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType)); + } + CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs); +} diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index ae25e94759..a9d6386e6b 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -268,6 +268,26 @@ public: /// \return Specified function. llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + /// Translates the native parameter of outlined function if this is required + /// for target. + /// \param FD Field decl from captured record for the paramater. + /// \param NativeParam Parameter itself. + const VarDecl *translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const override; + + /// Gets the address of the native argument basing on the address of the + /// target-specific parameter. + /// \param NativeParam Parameter itself. + /// \param TargetParam Corresponding target-specific parameter. + Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, + const VarDecl *TargetParam) const override; + + /// Emits call of the outlined function with the provided arguments, + /// translating these arguments to correct target-specific arguments. + void emitOutlinedFunctionCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args = llvm::None) const override; + /// Target codegen is specialized based on two programming models: the /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd' /// model for constructs like 'target parallel' that support it. diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp index 7d530a278f..1644ab4c07 100644 --- a/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -403,6 +403,27 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, } return; } + + // Check if current Field is better as a single field run. When current field + // has legal integer width, and its bitfield offset is naturally aligned, it + // is better to make the bitfield a separate storage component so as it can be + // accessed directly with lower cost. + auto IsBetterAsSingleFieldRun = [&](RecordDecl::field_iterator Field) { + if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses) + return false; + unsigned Width = Field->getBitWidthValue(Context); + if (!DataLayout.isLegalInteger(Width)) + return false; + // Make sure Field is natually aligned if it is treated as an IType integer. + if (getFieldBitOffset(*Field) % + Context.toBits(getAlignment(getIntNType(Width))) != + 0) + return false; + return true; + }; + + // The start field is better as a single field run. + bool StartFieldAsSingleRun = false; for (;;) { // Check to see if we need to start a new run. if (Run == FieldEnd) { @@ -414,17 +435,28 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, Run = Field; StartBitOffset = getFieldBitOffset(*Field); Tail = StartBitOffset + Field->getBitWidthValue(Context); + StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Run); } ++Field; continue; } - // Add bitfields to the run as long as they qualify. - if (Field != FieldEnd && Field->getBitWidthValue(Context) != 0 && + + // If the start field of a new run is better as a single run, or + // if current field is better as a single run, or + // if current field has zero width bitfield, or + // if the offset of current field is inconsistent with the offset of + // previous field plus its offset, + // skip the block below and go ahead to emit the storage. + // Otherwise, try to add bitfields to the run. + if (!StartFieldAsSingleRun && Field != FieldEnd && + !IsBetterAsSingleFieldRun(Field) && + Field->getBitWidthValue(Context) != 0 && Tail == getFieldBitOffset(*Field)) { Tail += Field->getBitWidthValue(Context); ++Field; continue; } + // We've hit a break-point in the run and need to emit a storage field. llvm::Type *Type = getIntNType(Tail - StartBitOffset); // Add the storage member to the record and set the bitfield info for all of @@ -435,6 +467,7 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset), MemberInfo::Field, nullptr, *Run)); Run = FieldEnd; + StartFieldAsSingleRun = false; } } diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 9c92367410..6a78865fc9 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -45,7 +45,7 @@ void CodeGenFunction::EmitStopPoint(const Stmt *S) { } } -void CodeGenFunction::EmitStmt(const Stmt *S) { +void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { assert(S && "Null statement?"); PGO.setCurrentStmt(S); @@ -131,16 +131,16 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { case Stmt::IndirectGotoStmtClass: EmitIndirectGotoStmt(cast<IndirectGotoStmt>(*S)); break; - case Stmt::IfStmtClass: EmitIfStmt(cast<IfStmt>(*S)); break; - case Stmt::WhileStmtClass: EmitWhileStmt(cast<WhileStmt>(*S)); break; - case Stmt::DoStmtClass: EmitDoStmt(cast<DoStmt>(*S)); break; - case Stmt::ForStmtClass: EmitForStmt(cast<ForStmt>(*S)); break; + case Stmt::IfStmtClass: EmitIfStmt(cast<IfStmt>(*S)); break; + case Stmt::WhileStmtClass: EmitWhileStmt(cast<WhileStmt>(*S), Attrs); break; + case Stmt::DoStmtClass: EmitDoStmt(cast<DoStmt>(*S), Attrs); break; + case Stmt::ForStmtClass: EmitForStmt(cast<ForStmt>(*S), Attrs); break; - case Stmt::ReturnStmtClass: EmitReturnStmt(cast<ReturnStmt>(*S)); break; + case Stmt::ReturnStmtClass: EmitReturnStmt(cast<ReturnStmt>(*S)); break; - case Stmt::SwitchStmtClass: EmitSwitchStmt(cast<SwitchStmt>(*S)); break; - case Stmt::GCCAsmStmtClass: // Intentional fall-through. - case Stmt::MSAsmStmtClass: EmitAsmStmt(cast<AsmStmt>(*S)); break; + case Stmt::SwitchStmtClass: EmitSwitchStmt(cast<SwitchStmt>(*S)); break; + case Stmt::GCCAsmStmtClass: // Intentional fall-through. + case Stmt::MSAsmStmtClass: EmitAsmStmt(cast<AsmStmt>(*S)); break; case Stmt::CoroutineBodyStmtClass: EmitCoroutineBody(cast<CoroutineBodyStmt>(*S)); break; @@ -178,7 +178,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { EmitCXXTryStmt(cast<CXXTryStmt>(*S)); break; case Stmt::CXXForRangeStmtClass: - EmitCXXForRangeStmt(cast<CXXForRangeStmt>(*S)); + EmitCXXForRangeStmt(cast<CXXForRangeStmt>(*S), Attrs); break; case Stmt::SEHTryStmtClass: EmitSEHTryStmt(cast<SEHTryStmt>(*S)); @@ -555,23 +555,7 @@ void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) { } void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { - const Stmt *SubStmt = S.getSubStmt(); - switch (SubStmt->getStmtClass()) { - case Stmt::DoStmtClass: - EmitDoStmt(cast<DoStmt>(*SubStmt), S.getAttrs()); - break; - case Stmt::ForStmtClass: - EmitForStmt(cast<ForStmt>(*SubStmt), S.getAttrs()); - break; - case Stmt::WhileStmtClass: - EmitWhileStmt(cast<WhileStmt>(*SubStmt), S.getAttrs()); - break; - case Stmt::CXXForRangeStmtClass: - EmitCXXForRangeStmt(cast<CXXForRangeStmt>(*SubStmt), S.getAttrs()); - break; - default: - EmitStmt(SubStmt); - } + EmitStmt(S.getSubStmt(), S.getAttrs()); } void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) { @@ -1024,6 +1008,18 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { /// if the function returns void, or may be missing one if the function returns /// non-void. Fun stuff :). void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { + if (requiresReturnValueCheck()) { + llvm::Constant *SLoc = EmitCheckSourceLocation(S.getLocStart()); + auto *SLocPtr = + new llvm::GlobalVariable(CGM.getModule(), SLoc->getType(), false, + llvm::GlobalVariable::PrivateLinkage, SLoc); + SLocPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.getSanitizerMetadata()->disableSanitizerForGlobal(SLocPtr); + assert(ReturnLocation.isValid() && "No valid return location"); + Builder.CreateStore(Builder.CreateBitCast(SLocPtr, Int8PtrTy), + ReturnLocation); + } + // Returning from an outlined SEH helper is UB, and we already warn on it. if (IsOutlinedSEHHelper) { Builder.CreateUnreachable(); @@ -1166,7 +1162,7 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { if (Rem) Rem--; SwitchInsn->addCase(Builder.getInt(LHS), CaseDest); - LHS++; + ++LHS; } return; } @@ -2127,16 +2123,16 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect, /* IsAlignStack */ false, AsmDialect); llvm::CallInst *Result = Builder.CreateCall(IA, Args); - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoUnwind); // Attach readnone and readonly attributes. if (!HasSideEffect) { if (ReadNone) - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ReadNone); else if (ReadOnly) - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ReadOnly); } @@ -2157,7 +2153,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Conservatively, mark all inline asm blocks in CUDA as convergent // (meaning, they may call an intrinsically convergent op, such as bar.sync, // and so can't have certain optimizations applied around them). - Result->addAttribute(llvm::AttributeSet::FunctionIndex, + Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::Convergent); } @@ -2198,7 +2194,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::IntegerType::get(getLLVMContext(), (unsigned)TmpSize)); Tmp = Builder.CreateTrunc(Tmp, TruncTy); } else if (TruncTy->isIntegerTy()) { - Tmp = Builder.CreateTrunc(Tmp, TruncTy); + Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy); } else if (TruncTy->isVectorTy()) { Tmp = Builder.CreateBitCast(Tmp, TruncTy); } diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 2f64da2912..df7f7802d3 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -65,6 +65,8 @@ public: for (auto &C : CS->captures()) { if (C.capturesVariable() || C.capturesVariableByCopy()) { auto *VD = C.getCapturedVar(); + assert(VD == VD->getCanonicalDecl() && + "Canonical decl must be captured."); DeclRefExpr DRE(const_cast<VarDecl *>(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && @@ -87,7 +89,8 @@ public: class OMPParallelScope final : public OMPLexicalScope { bool EmitPreInitStmt(const OMPExecutableDirective &S) { OpenMPDirectiveKind Kind = S.getDirectiveKind(); - return !isOpenMPTargetExecutionDirective(Kind) && + return !(isOpenMPTargetExecutionDirective(Kind) || + isOpenMPLoopBoundSharingDirective(Kind)) && isOpenMPParallelDirective(Kind); } @@ -135,6 +138,22 @@ public: } // namespace +LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { + if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { + if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { + OrigVD = OrigVD->getCanonicalDecl(); + bool IsCaptured = + LambdaCaptureFields.lookup(OrigVD) || + (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || + (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); + DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured, + OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); + return EmitLValue(&DRE); + } + } + return EmitLValue(E); +} + llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { auto &C = getContext(); llvm::Value *Size = nullptr; @@ -227,21 +246,67 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, return TmpAddr; } -llvm::Function * -CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { - assert( - CapturedStmtInfo && - "CapturedStmtInfo should be set when generating the captured function"); - const CapturedDecl *CD = S.getCapturedDecl(); - const RecordDecl *RD = S.getCapturedRecordDecl(); +static QualType getCanonicalParamType(ASTContext &C, QualType T) { + if (T->isLValueReferenceType()) { + return C.getLValueReferenceType( + getCanonicalParamType(C, T.getNonReferenceType()), + /*SpelledAsLValue=*/false); + } + if (T->isPointerType()) + return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); + if (auto *A = T->getAsArrayTypeUnsafe()) { + if (auto *VLA = dyn_cast<VariableArrayType>(A)) + return getCanonicalParamType(C, VLA->getElementType()); + else if (!A->isVariablyModifiedType()) + return C.getCanonicalType(T); + } + return C.getCanonicalParamType(T); +} + +namespace { + /// Contains required data for proper outlined function codegen. + struct FunctionOptions { + /// Captured statement for which the function is generated. + const CapturedStmt *S = nullptr; + /// true if cast to/from UIntPtr is required for variables captured by + /// value. + const bool UIntPtrCastRequired = true; + /// true if only casted arguments must be registered as local args or VLA + /// sizes. + const bool RegisterCastedArgsOnly = false; + /// Name of the generated function. + const StringRef FunctionName; + explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, + bool RegisterCastedArgsOnly, + StringRef FunctionName) + : S(S), UIntPtrCastRequired(UIntPtrCastRequired), + RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), + FunctionName(FunctionName) {} + }; +} + +static llvm::Function *emitOutlinedFunctionPrologue( + CodeGenFunction &CGF, FunctionArgList &Args, + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> + &LocalAddrs, + llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> + &VLASizes, + llvm::Value *&CXXThisValue, const FunctionOptions &FO) { + const CapturedDecl *CD = FO.S->getCapturedDecl(); + const RecordDecl *RD = FO.S->getCapturedRecordDecl(); assert(CD->hasBody() && "missing CapturedDecl body"); + CXXThisValue = nullptr; // Build the argument list. + CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGM.getContext(); - FunctionArgList Args; + FunctionArgList TargetArgs; Args.append(CD->param_begin(), std::next(CD->param_begin(), CD->getContextParamPosition())); - auto I = S.captures().begin(); + TargetArgs.append( + CD->param_begin(), + std::next(CD->param_begin(), CD->getContextParamPosition())); + auto I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { QualType ArgType = FD->getType(); IdentifierInfo *II = nullptr; @@ -253,124 +318,216 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { // deal with pointers. We can pass in the same way the VLA type sizes to the // outlined function. if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || - I->capturesVariableArrayType()) - ArgType = Ctx.getUIntPtrType(); + I->capturesVariableArrayType()) { + if (FO.UIntPtrCastRequired) + ArgType = Ctx.getUIntPtrType(); + } if (I->capturesVariable() || I->capturesVariableByCopy()) { CapVar = I->getCapturedVar(); II = CapVar->getIdentifier(); } else if (I->capturesThis()) - II = &getContext().Idents.get("this"); + II = &Ctx.Idents.get("this"); else { assert(I->capturesVariableArrayType()); - II = &getContext().Idents.get("vla"); - } - if (ArgType->isVariablyModifiedType()) { - bool IsReference = ArgType->isLValueReferenceType(); - ArgType = - getContext().getCanonicalParamType(ArgType.getNonReferenceType()); - if (IsReference && !ArgType->isPointerType()) { - ArgType = getContext().getLValueReferenceType( - ArgType, /*SpelledAsLValue=*/false); - } + II = &Ctx.Idents.get("vla"); } - Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, - FD->getLocation(), II, ArgType)); + if (ArgType->isVariablyModifiedType()) + ArgType = getCanonicalParamType(Ctx, ArgType); + auto *Arg = + ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II, + ArgType, ImplicitParamDecl::Other); + Args.emplace_back(Arg); + // Do not cast arguments if we emit function with non-original types. + TargetArgs.emplace_back( + FO.UIntPtrCastRequired + ? Arg + : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); ++I; } Args.append( std::next(CD->param_begin(), CD->getContextParamPosition() + 1), CD->param_end()); + TargetArgs.append( + std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); // Create the function declaration. FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Function *F = llvm::Function::Create( - FuncLLVMTy, llvm::GlobalValue::InternalLinkage, - CapturedStmtInfo->getHelperName(), &CGM.getModule()); + llvm::Function *F = + llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + FO.FunctionName, &CGM.getModule()); CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); if (CD->isNothrow()) - F->addFnAttr(llvm::Attribute::NoUnwind); + F->setDoesNotThrow(); // Generate the function. - StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), - CD->getBody()->getLocStart()); + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, + FO.S->getLocStart(), CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); - I = S.captures().begin(); + I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { + // Do not map arguments if we emit function with non-original types. + Address LocalAddr(Address::invalid()); + if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { + LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], + TargetArgs[Cnt]); + } else { + LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); + } // If we are capturing a pointer by copy we don't need to do anything, just // use the value that we get from the arguments. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { const VarDecl *CurVD = I->getCapturedVar(); - Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); // If the variable is a reference we need to materialize it here. if (CurVD->getType()->isReferenceType()) { - Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), - ".materialized_ref"); - EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, - CurVD->getType()); + Address RefAddr = CGF.CreateMemTemp( + CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); + CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, + /*Volatile=*/false, CurVD->getType()); LocalAddr = RefAddr; } - setAddrOfLocalVar(CurVD, LocalAddr); + if (!FO.RegisterCastedArgsOnly) + LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); ++Cnt; ++I; continue; } - LValue ArgLVal = - MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), - AlignmentSource::Decl); + LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), + AlignmentSource::Decl); if (FD->hasCapturedVLAType()) { - LValue CastedArgLVal = - MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), - Args[Cnt]->getName(), ArgLVal), - FD->getType(), AlignmentSource::Decl); + if (FO.UIntPtrCastRequired) { + ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), + Args[Cnt]->getName(), + ArgLVal), + FD->getType(), AlignmentSource::Decl); + } auto *ExprArg = - EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); + CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); auto VAT = FD->getCapturedVLAType(); - VLASizeMap[VAT->getSizeExpr()] = ExprArg; + VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); } else if (I->capturesVariable()) { auto *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); Address ArgAddr = ArgLVal.getAddress(); if (!VarTy->isReferenceType()) { if (ArgLVal.getType()->isLValueReferenceType()) { - ArgAddr = EmitLoadOfReference( - ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); + ArgAddr = CGF.EmitLoadOfReference(ArgLVal); } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { assert(ArgLVal.getType()->isPointerType()); - ArgAddr = EmitLoadOfPointer( + ArgAddr = CGF.EmitLoadOfPointer( ArgAddr, ArgLVal.getType()->castAs<PointerType>()); } } - setAddrOfLocalVar( - Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); + if (!FO.RegisterCastedArgsOnly) { + LocalAddrs.insert( + {Args[Cnt], + {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); + } } else if (I->capturesVariableByCopy()) { assert(!FD->getType()->isAnyPointerType() && "Not expecting a captured pointer."); auto *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); - setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), - Args[Cnt]->getName(), ArgLVal, - VarTy->isReferenceType())); + LocalAddrs.insert( + {Args[Cnt], + {Var, + FO.UIntPtrCastRequired + ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), + ArgLVal, VarTy->isReferenceType()) + : ArgLVal.getAddress()}}); } else { // If 'this' is captured, load it into CXXThisValue. assert(I->capturesThis()); - CXXThisValue = - EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); + CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) + .getScalarVal(); + LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); } ++Cnt; ++I; } + return F; +} + +llvm::Function * +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + // Build the argument list. + bool NeedWrapperFunction = + getDebugInfo() && + CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; + FunctionArgList Args; + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; + llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << CapturedStmtInfo->getHelperName(); + if (NeedWrapperFunction) + Out << "_debug__"; + FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, + Out.str()); + llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, + VLASizes, CXXThisValue, FO); + for (const auto &LocalAddrPair : LocalAddrs) { + if (LocalAddrPair.second.first) { + setAddrOfLocalVar(LocalAddrPair.second.first, + LocalAddrPair.second.second); + } + } + for (const auto &VLASizePair : VLASizes) + VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; PGO.assignRegionCounters(GlobalDecl(CD), F); CapturedStmtInfo->EmitBody(*this, CD->getBody()); FinishFunction(CD->getBodyRBrace()); - - return F; + if (!NeedWrapperFunction) + return F; + + FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, + /*RegisterCastedArgsOnly=*/true, + CapturedStmtInfo->getHelperName()); + CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); + Args.clear(); + LocalAddrs.clear(); + VLASizes.clear(); + llvm::Function *WrapperF = + emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, + WrapperCGF.CXXThisValue, WrapperFO); + llvm::SmallVector<llvm::Value *, 4> CallArgs; + for (const auto *Arg : Args) { + llvm::Value *CallArg; + auto I = LocalAddrs.find(Arg); + if (I != LocalAddrs.end()) { + LValue LV = WrapperCGF.MakeAddrLValue( + I->second.second, + I->second.first ? I->second.first->getType() : Arg->getType(), + AlignmentSource::Decl); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); + } else { + auto EI = VLASizes.find(Arg); + if (EI != VLASizes.end()) + CallArg = EI->second.second; + else { + LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), + Arg->getType(), + AlignmentSource::Decl); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); + } + } + CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); + } + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), + F, CallArgs); + WrapperCGF.FinishFunction(); + return WrapperF; } //===----------------------------------------------------------------------===// @@ -437,156 +594,6 @@ void CodeGenFunction::EmitOMPAggregateAssign( EmitBlock(DoneBB, /*IsFinished=*/true); } -/// Check if the combiner is a call to UDR combiner and if it is so return the -/// UDR decl used for reduction. -static const OMPDeclareReductionDecl * -getReductionInit(const Expr *ReductionOp) { - if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) - if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) - if (auto *DRE = - dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) - if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) - return DRD; - return nullptr; -} - -static void emitInitWithReductionInitializer(CodeGenFunction &CGF, - const OMPDeclareReductionDecl *DRD, - const Expr *InitOp, - Address Private, Address Original, - QualType Ty) { - if (DRD->getInitializer()) { - std::pair<llvm::Function *, llvm::Function *> Reduction = - CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); - auto *CE = cast<CallExpr>(InitOp); - auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); - const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); - const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); - auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); - auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), - [=]() -> Address { return Private; }); - PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), - [=]() -> Address { return Original; }); - (void)PrivateScope.Privatize(); - RValue Func = RValue::get(Reduction.second); - CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); - CGF.EmitIgnoredExpr(InitOp); - } else { - llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); - auto *GV = new llvm::GlobalVariable( - CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, Init, ".init"); - LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); - RValue InitRVal; - switch (CGF.getEvaluationKind(Ty)) { - case TEK_Scalar: - InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); - break; - case TEK_Complex: - InitRVal = - RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); - break; - case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress()); - break; - } - OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); - CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); - CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), - /*IsInitializer=*/false); - } -} - -/// \brief Emit initialization of arrays of complex types. -/// \param DestAddr Address of the array. -/// \param Type Type of array. -/// \param Init Initial expression of array. -/// \param SrcAddr Address of the original array. -static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, - QualType Type, const Expr *Init, - Address SrcAddr = Address::invalid()) { - auto *DRD = getReductionInit(Init); - // Perform element-by-element initialization. - QualType ElementTy; - - // Drill down to the base element type on both arrays. - auto ArrayTy = Type->getAsArrayTypeUnsafe(); - auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); - DestAddr = - CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); - if (DRD) - SrcAddr = - CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); - - llvm::Value *SrcBegin = nullptr; - if (DRD) - SrcBegin = SrcAddr.getPointer(); - auto DestBegin = DestAddr.getPointer(); - // Cast from pointer to array type to pointer to single element. - auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); - // The basic structure here is a while-do loop. - auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); - auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); - auto IsEmpty = - CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); - CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); - - // Enter the loop body, making that address the current address. - auto EntryBB = CGF.Builder.GetInsertBlock(); - CGF.EmitBlock(BodyBB); - - CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); - - llvm::PHINode *SrcElementPHI = nullptr; - Address SrcElementCurrent = Address::invalid(); - if (DRD) { - SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, - "omp.arraycpy.srcElementPast"); - SrcElementPHI->addIncoming(SrcBegin, EntryBB); - SrcElementCurrent = - Address(SrcElementPHI, - SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - } - llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( - DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); - DestElementPHI->addIncoming(DestBegin, EntryBB); - Address DestElementCurrent = - Address(DestElementPHI, - DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - - // Emit copy. - { - CodeGenFunction::RunCleanupsScope InitScope(CGF); - if (DRD && (DRD->getInitializer() || !Init)) { - emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, - SrcElementCurrent, ElementTy); - } else - CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), - /*IsInitializer=*/false); - } - - if (DRD) { - // Shift the address forward by one element. - auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( - SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); - } - - // Shift the address forward by one element. - auto DestElementNext = CGF.Builder.CreateConstGEP1_32( - DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - // Check whether we've reached the end. - auto Done = - CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); - CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); - DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); - - // Done. - CGF.EmitBlock(DoneBB, /*IsFinished=*/true); -} - void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy) { @@ -939,253 +946,109 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } -static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV, llvm::Value *Addr) { - Address Tmp = Address::invalid(); - Address TopTmp = Address::invalid(); - Address MostTopTmp = Address::invalid(); - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - Tmp = CGF.CreateMemTemp(BaseTy); - if (TopTmp.isValid()) - CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); - else - MostTopTmp = Tmp; - TopTmp = Tmp; - BaseTy = BaseTy->getPointeeType(); - } - llvm::Type *Ty = BaseLV.getPointer()->getType(); - if (Tmp.isValid()) - Ty = Tmp.getElementType(); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); - if (Tmp.isValid()) { - CGF.Builder.CreateStore(Addr, Tmp); - return MostTopTmp; - } - return Address(Addr, BaseLV.getAlignment()); -} - -static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV) { - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - if (auto *PtrTy = BaseTy->getAs<PointerType>()) - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); - else { - BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), - BaseTy->castAs<ReferenceType>()); - } - BaseTy = BaseTy->getPointeeType(); - } - return CGF.MakeAddrLValue( - Address( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), - BaseLV.getAlignment()), - BaseLV.getType(), BaseLV.getAlignmentSource()); -} - void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return; + SmallVector<const Expr *, 4> Shareds; + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> ReductionOps; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); auto IPriv = C->privates().begin(); auto IRed = C->reduction_ops().begin(); - for (auto IRef : C->varlists()) { - auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); - auto *DRD = getReductionInit(*IRed); - if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { - auto *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) - Base = TempOASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast<DeclRefExpr>(Base); - auto *OrigVD = cast<VarDecl>(DE->getDecl()); - auto OASELValueLB = EmitOMPArraySectionExpr(OASE); - auto OASELValueUB = - EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = - loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), - OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { - return OASELValueLB.getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, - OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit VarDecl with copy init for arrays. - // Get the address of the original variable captured in current - // captured region. - auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), - OASELValueLB.getPointer()); - Size = Builder.CreateNUWAdd( - Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast<OpaqueValueExpr>( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get(Size)); - EmitVariablyModifiedType(PrivateVD->getType()); - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, - OASELValueLB.getAddress()); - EmitAutoVarCleanups(Emission); - // Emit private VarDecl with reduction init. - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - OASELValueLB.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), - OASELValueLB.getType(), OriginalBaseLValue, - Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { - auto *Base = ASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast<DeclRefExpr>(Base); - auto *OrigVD = cast<VarDecl>(DE->getDecl()); - auto ASELValue = EmitLValue(ASE); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = loadToBegin( - *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate( - LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, - OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - ASELValue.getAddress(), - ASELValue.getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - ASELValue.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), ASELValue.getType(), - OriginalBaseLValue, Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), - "rhs.begin"); - }); - } else { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); - QualType Type = PrivateVD->getType(); - if (getContext().getAsArrayType(Type)) { - // Store the address of the original variable associated with the LHS - // implicit variable. - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - Address OriginalAddr = EmitLValue(&DRE).getAddress(); - PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, - LHSVD]() -> Address { - OriginalAddr = Builder.CreateElementBitCast( - OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); - return OriginalAddr; - }); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { - if (Type->isVariablyModifiedType()) { - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast<OpaqueValueExpr>( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get( - getTypeSize(OrigVD->getType().getNonReferenceType()))); - EmitVariablyModifiedType(Type); - } - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, OriginalAddr); - EmitAutoVarCleanups(Emission); - return Emission.getAllocatedAddress(); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), - ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); - }); - } else { - // Store the address of the original variable associated with the LHS - // implicit variable. - Address OriginalAddr = Address::invalid(); - PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, - &OriginalAddr]() -> Address { - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - OriginalAddr = EmitLValue(&DRE).getAddress(); - return OriginalAddr; - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - OriginalAddr, - PrivateVD->getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - return Addr; - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Shareds.emplace_back(Ref); + Privates.emplace_back(*IPriv); + ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); + unsigned Count = 0; + auto ILHS = LHSs.begin(); + auto IRHS = RHSs.begin(); + auto IPriv = Privates.begin(); + for (const auto *IRef : Shareds) { + auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); + // Emit private VarDecl with reduction init. + RedCG.emitSharedLValue(*this, Count); + RedCG.emitAggregateType(*this, Count); + auto Emission = EmitAutoVarAlloca(*PrivateVD); + RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), + RedCG.getSharedLValue(Count), + [&Emission](CodeGenFunction &CGF) { + CGF.EmitAutoVarInit(Emission); + return true; + }); + EmitAutoVarCleanups(Emission); + Address BaseAddr = RedCG.adjustPrivateAddress( + *this, Count, Emission.getAllocatedAddress()); + bool IsRegistered = PrivateScope.addPrivate( + RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + + auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + QualType Type = PrivateVD->getType(); + bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); + if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || + isa<ArraySubscriptExpr>(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { + return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), + "rhs.begin"); + }); + } else { + QualType Type = PrivateVD->getType(); + bool IsArray = getContext().getAsArrayType(Type) != nullptr; + Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); + // Store the address of the original variable associated with the LHS + // implicit variable. + if (IsArray) { + OriginalAddr = Builder.CreateElementBitCast( + OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); } - ++ILHS; - ++IRHS; - ++IPriv; - ++IRed; + PrivateScope.addPrivate( + LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); + PrivateScope.addPrivate( + RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { + return IsArray + ? Builder.CreateElementBitCast( + GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), "rhs.begin") + : GetAddrOfLocalVar(PrivateVD); + }); } + ++ILHS; + ++IRHS; + ++IPriv; + ++Count; } } @@ -1243,10 +1106,20 @@ static void emitPostUpdateForReductionClause( CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } -static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &S, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) { +namespace { +/// Codegen lambda for appending distribute lower and upper bounds to outlined +/// parallel function. This is necessary for combined constructs such as +/// 'distribute parallel for' +typedef llvm::function_ref<void(CodeGenFunction &, + const OMPExecutableDirective &, + llvm::SmallVectorImpl<llvm::Value *> &)> + CodeGenBoundParametersTy; +} // anonymous namespace + +static void emitCommonOMPParallelDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const CodeGenBoundParametersTy &CodeGenBoundParameters) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); @@ -1273,11 +1146,20 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, OMPParallelScope Scope(CGF, S); llvm::SmallVector<llvm::Value *, 16> CapturedVars; + // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk + // lower and upper bounds with the pragma 'for' chunking mechanism. + // The following lambda takes care of appending the lower and upper bound + // parameters when necessary + CodeGenBoundParameters(CGF, S, CapturedVars); CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars, IfCond); } +static void emitEmptyBoundParameters(CodeGenFunction &, + const OMPExecutableDirective &, + llvm::SmallVectorImpl<llvm::Value *> &) {} + void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -1298,7 +1180,8 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); emitPostUpdateForReductionClause( *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -1375,12 +1258,14 @@ void CodeGenFunction::EmitOMPInnerLoop( EmitBlock(LoopExit.getBlock()); } -void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { +bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { if (!HaveInsertPoint()) - return; + return false; // Emit inits for the linear variables. + bool HasLinears = false; for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { for (auto *Init : C->inits()) { + HasLinears = true; auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { AutoVarEmission Emission = EmitAutoVarAlloca(*VD); @@ -1405,6 +1290,7 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { EmitIgnoredExpr(CS); } } + return HasLinears; } void CodeGenFunction::EmitOMPLinearClauseFinal( @@ -1643,6 +1529,13 @@ void CodeGenFunction::EmitOMPSimdFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } +static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, + const OMPLoopDirective &S, + CodeGenFunction::JumpDest LoopExit) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); +} + void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPLoopScope PreInitScope(CGF, S); @@ -1687,7 +1580,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGF.EmitOMPSimdInit(S); emitAlignedClause(CGF, S); - CGF.EmitOMPLinearClauseInit(S); + (void)CGF.EmitOMPLinearClauseInit(S); { OMPPrivateScope LoopScope(CGF); CGF.EmitOMPPrivateLoopCounters(S, LoopScope); @@ -1725,9 +1618,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, - const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { +void CodeGenFunction::EmitOMPOuterLoop( + bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, + CodeGenFunction::OMPPrivateScope &LoopScope, + const CodeGenFunction::OMPLoopArguments &LoopArgs, + const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, + const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { auto &RT = CGM.getOpenMPRuntime(); const Expr *IVExpr = S.getIterationVariable(); @@ -1745,15 +1641,18 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, llvm::Value *BoolCondVal = nullptr; if (!DynamicOrOrdered) { - // UB = min(UB, GlobalUB) - EmitIgnoredExpr(S.getEnsureUpperBound()); + // UB = min(UB, GlobalUB) or + // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. + // 'distribute parallel for') + EmitIgnoredExpr(LoopArgs.EUB); // IV = LB - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(LoopArgs.Init); // IV < UB - BoolCondVal = EvaluateExprAsBool(S.getCond()); + BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); } else { - BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, - LB, UB, ST); + BoolCondVal = + RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, + LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); } // If there are any cleanups between here and the loop-exit scope, @@ -1773,7 +1672,7 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, // Emit "IV = LB" (in case of static schedule, we have already calculated new // LB for loop condition and emitted it above). if (DynamicOrOrdered) - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(LoopArgs.Init); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); @@ -1787,24 +1686,27 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, EmitOMPSimdInit(S, IsMonotonic); SourceLocation Loc = S.getLocStart(); - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); - }, - [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { - if (Ordered) { - CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( - CGF, Loc, IVSize, IVSigned); - } - }); + + // when 'distribute' is not combined with a 'for': + // while (idx <= UB) { BODY; ++idx; } + // when 'distribute' is combined with a 'for' + // (e.g. 'distribute parallel for') + // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); + }, + [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { + CodeGenOrdered(CGF, Loc, IVSize, IVSigned); + }); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); if (!DynamicOrOrdered) { // Emit "LB = LB + Stride", "UB = UB + Stride". - EmitIgnoredExpr(S.getNextLowerBound()); - EmitIgnoredExpr(S.getNextUpperBound()); + EmitIgnoredExpr(LoopArgs.NextLB); + EmitIgnoredExpr(LoopArgs.NextUB); } EmitBranch(CondBlock); @@ -1815,7 +1717,8 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, // Tell the runtime we are done. auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { if (!DynamicOrOrdered) - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + S.getDirectiveKind()); }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); } @@ -1823,7 +1726,8 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, void CodeGenFunction::EmitOMPForOuterLoop( const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + const OMPLoopArguments &LoopArgs, + const CodeGenDispatchBoundsTy &CGDispatchBounds) { auto &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). @@ -1832,7 +1736,7 @@ void CodeGenFunction::EmitOMPForOuterLoop( assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, - /*Chunked=*/Chunk != nullptr)) && + LoopArgs.Chunk != nullptr)) && "static non-chunked schedule does not need outer loop"); // Emit outer loop. @@ -1890,22 +1794,48 @@ void CodeGenFunction::EmitOMPForOuterLoop( const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); if (DynamicOrOrdered) { - llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); + auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); + llvm::Value *LBVal = DispatchBounds.first; + llvm::Value *UBVal = DispatchBounds.second; + CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, + LoopArgs.Chunk}; RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, - IVSigned, Ordered, UBVal, Chunk); + IVSigned, Ordered, DipatchRTInputValues); } else { - RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - Ordered, IL, LB, UB, ST, Chunk); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, + LoopArgs.ST, LoopArgs.Chunk); + RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), + ScheduleKind, StaticInit); } - EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, - ST, IL, Chunk); + auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, + const unsigned IVSize, + const bool IVSigned) { + if (Ordered) { + CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, + IVSigned); + } + }; + + OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, + LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); + OuterLoopArgs.IncExpr = S.getInc(); + OuterLoopArgs.Init = S.getInit(); + OuterLoopArgs.Cond = S.getCond(); + OuterLoopArgs.NextLB = S.getNextLowerBound(); + OuterLoopArgs.NextUB = S.getNextUpperBound(); + EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, + emitOMPLoopBodyWithStopPoint, CodeGenOrdered); } +static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, + const unsigned IVSize, const bool IVSigned) {} + void CodeGenFunction::EmitOMPDistributeOuterLoop( - OpenMPDistScheduleClauseKind ScheduleKind, - const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, + OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoopContent) { auto &RT = CGM.getOpenMPRuntime(); @@ -1918,26 +1848,159 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, /* Ordered = */ false, - IL, LB, UB, ST, Chunk); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, + LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); + + // for combined 'distribute' and 'for' the increment expression of distribute + // is store in DistInc. For 'distribute' alone, it is in Inc. + Expr *IncExpr; + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) + IncExpr = S.getDistInc(); + else + IncExpr = S.getInc(); + + // this routine is shared by 'omp distribute parallel for' and + // 'omp distribute': select the right EUB expression depending on the + // directive + OMPLoopArguments OuterLoopArgs; + OuterLoopArgs.LB = LoopArgs.LB; + OuterLoopArgs.UB = LoopArgs.UB; + OuterLoopArgs.ST = LoopArgs.ST; + OuterLoopArgs.IL = LoopArgs.IL; + OuterLoopArgs.Chunk = LoopArgs.Chunk; + OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedEnsureUpperBound() + : S.getEnsureUpperBound(); + OuterLoopArgs.IncExpr = IncExpr; + OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedInit() + : S.getInit(); + OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); + OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedNextLowerBound() + : S.getNextLowerBound(); + OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedNextUpperBound() + : S.getNextUpperBound(); + + EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, + LoopScope, OuterLoopArgs, CodeGenLoopContent, + emitEmptyOrdered); +} + +/// Emit a helper variable and return corresponding lvalue. +static LValue EmitOMPHelperVar(CodeGenFunction &CGF, + const DeclRefExpr *Helper) { + auto VDecl = cast<VarDecl>(Helper->getDecl()); + CGF.EmitVarDecl(*VDecl); + return CGF.EmitLValue(Helper); +} + +static std::pair<LValue, LValue> +emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, + const OMPExecutableDirective &S) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + LValue LB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); + + // When composing 'distribute' with 'for' (e.g. as in 'distribute + // parallel for') we need to use the 'distribute' + // chunk lower and upper bounds rather than the whole loop iteration + // space. These are parameters to the outlined function for 'parallel' + // and we copy the bounds of the previous schedule into the + // the current ones. + LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); + LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); + llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); + PrevLBVal = CGF.EmitScalarConversion( + PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), + LS.getIterationVariable()->getType(), SourceLocation()); + llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); + PrevUBVal = CGF.EmitScalarConversion( + PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), + LS.getIterationVariable()->getType(), SourceLocation()); + + CGF.EmitStoreOfScalar(PrevLBVal, LB); + CGF.EmitStoreOfScalar(PrevUBVal, UB); + + return {LB, UB}; +} + +/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then +/// we need to use the LB and UB expressions generated by the worksharing +/// code generation support, whereas in non combined situations we would +/// just emit 0 and the LastIteration expression +/// This function is necessary due to the difference of the LB and UB +/// types for the RT emission routines for 'for_static_init' and +/// 'for_dispatch_init' +static std::pair<llvm::Value *, llvm::Value *> +emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + Address LB, Address UB) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + const Expr *IVExpr = LS.getIterationVariable(); + // when implementing a dynamic schedule for a 'for' combined with a + // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop + // is not normalized as each team only executes its own assigned + // distribute chunk + QualType IteratorTy = IVExpr->getType(); + llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, + SourceLocation()); + llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, + SourceLocation()); + return {LBVal, UBVal}; +} + +static void emitDistributeParallelForDistributeInnerBoundParams( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { + const auto &Dir = cast<OMPLoopDirective>(S); + LValue LB = + CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); + auto LBCast = CGF.Builder.CreateIntCast( + CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + CapturedVars.push_back(LBCast); + LValue UB = + CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); + + auto UBCast = CGF.Builder.CreateIntCast( + CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + CapturedVars.push_back(UBCast); +} + +static void +emitInnerParallelForWhenCombined(CodeGenFunction &CGF, + const OMPLoopDirective &S, + CodeGenFunction::JumpDest LoopExit) { + auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), + emitDistributeParallelForInnerBounds, + emitDistributeParallelForDispatchBounds); + }; - EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, - S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); + emitCommonOMPParallelDirective( + CGF, S, OMPD_for, CGInlinedWorksharingLoop, + emitDistributeParallelForDistributeInnerBoundParams); } void CodeGenFunction::EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_distribute_parallel_for, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, - /*HasCancel=*/false); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); + OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, + /*HasCancel=*/false); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, + /*HasCancel=*/false); } void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( @@ -1987,18 +2050,6 @@ void CodeGenFunction::EmitOMPTargetSimdDirective( }); } -void CodeGenFunction::EmitOMPTeamsDistributeDirective( - const OMPTeamsDistributeDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_teams_distribute, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( const OMPTeamsDistributeSimdDirective &S) { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2037,6 +2088,7 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( const OMPTargetTeamsDistributeDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_target_teams_distribute, [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2047,6 +2099,7 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( const OMPTargetTeamsDistributeParallelForDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_target_teams_distribute_parallel_for, [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2057,6 +2110,7 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_target_teams_distribute_parallel_for_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2067,6 +2121,7 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( const OMPTargetTeamsDistributeSimdDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_target_teams_distribute_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2075,14 +2130,6 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( }); } -/// \brief Emit a helper variable and return corresponding lvalue. -static LValue EmitOMPHelperVar(CodeGenFunction &CGF, - const DeclRefExpr *Helper) { - auto VDecl = cast<VarDecl>(Helper->getDecl()); - CGF.EmitVarDecl(*VDecl); - return CGF.EmitLValue(Helper); -} - namespace { struct ScheduleKindModifiersTy { OpenMPScheduleClauseKind Kind; @@ -2095,7 +2142,10 @@ namespace { }; } // namespace -bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { +bool CodeGenFunction::EmitOMPWorksharingLoop( + const OMPLoopDirective &S, Expr *EUB, + const CodeGenLoopBoundsTy &CodeGenLoopBounds, + const CodeGenDispatchBoundsTy &CGDispatchBounds) { // Emit the loop iteration variable. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); @@ -2143,12 +2193,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { llvm::DenseSet<const Expr *> EmittedFinals; emitAlignedClause(*this, S); - EmitOMPLinearClauseInit(S); + bool HasLinears = EmitOMPLinearClauseInit(S); // Emit helper vars inits. - LValue LB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); + + std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); + LValue LB = Bounds.first; + LValue UB = Bounds.second; LValue ST = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); LValue IL = @@ -2157,7 +2207,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // Emit 'then' code. { OMPPrivateScope LoopScope(*this); - if (EmitOMPFirstprivateClause(S, LoopScope)) { + if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables and post-update of // lastprivate variables. @@ -2202,10 +2252,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // chunks that are approximately equal in size, and at most one chunk is // distributed to each thread. Note that the size of the chunks is // unspecified in this case. - RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, Ordered, - IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), + UB.getAddress(), ST.getAddress()); + RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), + ScheduleKind, StaticInit); auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); @@ -2223,7 +2274,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + S.getDirectiveKind()); }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); } else { @@ -2234,9 +2286,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. + const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), + ST.getAddress(), IL.getAddress(), + Chunk, EUB); EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, - LB.getAddress(), UB.getAddress(), ST.getAddress(), - IL.getAddress(), Chunk); + LoopArguments, CGDispatchBounds); } if (isOpenMPSimdDirective(S.getDirectiveKind())) { EmitOMPSimdFinal(S, @@ -2274,12 +2328,42 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { return HasLastprivateClause; } +/// The following two functions generate expressions for the loop lower +/// and upper bounds in case of static and dynamic (dispatch) schedule +/// of the associated 'for' or 'distribute' loop. +static std::pair<LValue, LValue> +emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + LValue LB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); + return {LB, UB}; +} + +/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not +/// consider the lower and upper bound expressions generated by the +/// worksharing loop support, but we use 0 and the iteration space size as +/// constants +static std::pair<llvm::Value *, llvm::Value *> +emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, + Address LB, Address UB) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + const Expr *IVExpr = LS.getIterationVariable(); + const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); + llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); + llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); + return {LBVal, UBVal}; +} + void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2297,7 +2381,9 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2346,8 +2432,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); // Generate condition for loop. BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, - OK_Ordinary, S.getLocStart(), - /*fpContractable=*/false); + OK_Ordinary, S.getLocStart(), FPOptions()); // Increment for loop counter. UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, S.getLocStart()); @@ -2404,10 +2489,11 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // Emit static non-chunked loop. OpenMPScheduleTy ScheduleKind; ScheduleKind.Schedule = OMPC_SCHEDULE_static; + CGOpenMPRuntime::StaticRTInput StaticInit( + /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), + LB.getAddress(), UB.getAddress(), ST.getAddress()); CGF.CGM.getOpenMPRuntime().emitForStaticInit( - CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, - /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); // UB = min(UB, GlobalUB); auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); auto *MinUBGlobalUB = CGF.Builder.CreateSelect( @@ -2420,7 +2506,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { [](CodeGenFunction &) {}); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + S.getDirectiveKind()); }; CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); @@ -2549,9 +2636,11 @@ void CodeGenFunction::EmitOMPParallelForDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); - CGF.EmitOMPWorksharingLoop(S); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); }; - emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPParallelForSimdDirective( @@ -2559,9 +2648,11 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPWorksharingLoop(S); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); }; - emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPParallelSectionsDirective( @@ -2571,7 +2662,8 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitSections(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, @@ -2655,16 +2747,38 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, ++ID; } } + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, S.getLocStart(), LHSs, RHSs, Data); // Build list of dependences. for (const auto *C : S.getClausesOfKind<OMPDependClause>()) for (auto *IRef : C->varlists()) Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs]( + auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty()) { + enum { PrivatesParam = 2, CopyFnParam = 3 }; auto *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); auto *PrivatesPtr = CGF.Builder.CreateLoad( @@ -2696,7 +2810,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } - CGF.EmitRuntimeCall(CopyFn, CallArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CopyFn, CallArgs); for (auto &&Pair : LastprivateDstsOrigs) { auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); DeclRefExpr DRE( @@ -2714,7 +2829,85 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } } + if (Data.Reductions) { + OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); + ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); + for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = + Address(CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType( + Data.ReductionCopies[Cnt]->getType()), + SourceLocation()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + Scope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + } + } + // Privatize all private variables except for in_reduction items. (void)Scope.Privatize(); + SmallVector<const Expr *, 4> InRedVars; + SmallVector<const Expr *, 4> InRedPrivs; + SmallVector<const Expr *, 4> InRedOps; + SmallVector<const Expr *, 4> TaskgroupDescriptors; + for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ITD = C->taskgroup_descriptors().begin(); + for (const auto *Ref : C->varlists()) { + InRedVars.emplace_back(Ref); + InRedPrivs.emplace_back(*IPriv); + InRedOps.emplace_back(*IRed); + TaskgroupDescriptors.emplace_back(*ITD); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ITD, 1); + } + } + // Privatize in_reduction items here, because taskgroup descriptors must be + // privatized earlier. + OMPPrivateScope InRedScope(CGF); + if (!InRedVars.empty()) { + ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); + for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + // The taskgroup descriptor variable is always implicit firstprivate and + // privatized already during procoessing of the firstprivates. + llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( + CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = Address( + CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), + SourceLocation()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + } + } + (void)InRedScope.Privatize(); Action.Enter(CGF); BodyGen(CGF); @@ -2773,6 +2966,35 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( const OMPTaskgroupDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + if (const Expr *E = S.getReductionRef()) { + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + OMPTaskDataTy Data; + for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + llvm::Value *ReductionDesc = + CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), + LHSs, RHSs, Data); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + CGF.EmitVarDecl(*VD); + CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), + /*Volatile=*/false, E->getType()); + } CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2789,7 +3011,9 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { }(), S.getLocStart()); } -void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { +void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, + const CodeGenLoopTy &CodeGenLoop, + Expr *IncExpr) { // Emit the loop iteration variable. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); @@ -2830,10 +3054,17 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { // Emit 'then' code. { // Emit helper vars inits. - LValue LB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); + + LValue LB = EmitOMPHelperVar( + *this, cast<DeclRefExpr>( + (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedLowerBoundVariable() + : S.getLowerBoundVariable()))); + LValue UB = EmitOMPHelperVar( + *this, cast<DeclRefExpr>( + (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedUpperBoundVariable() + : S.getUpperBoundVariable()))); LValue ST = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); LValue IL = @@ -2878,33 +3109,46 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { // league. The size of the chunks is unspecified in this case. if (RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr)) { + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), + LB.getAddress(), UB.getAddress(), ST.getAddress()); RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, /* Ordered = */ false, - IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + StaticInit); auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); - EmitIgnoredExpr(S.getEnsureUpperBound()); + EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedEnsureUpperBound() + : S.getEnsureUpperBound()); // IV = LB; - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedInit() + : S.getInit()); + + Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); + + // for distribute alone, codegen // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); + // when combined with 'for' (e.g. as in 'distribute parallel for') + // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); }, [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. - RT.emitForStaticFinish(*this, S.getLocStart()); + RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind()); } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, - LB.getAddress(), UB.getAddress(), ST.getAddress(), - IL.getAddress(), Chunk); + const OMPLoopArguments LoopArguments = { + LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), + Chunk}; + EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, + CodeGenLoop); } // Emit final copy of the lastprivate variables if IsLastIter != 0. @@ -2926,7 +3170,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPDistributeLoop(S); + + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, @@ -2957,7 +3202,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); - CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + OutlinedFn, CapturedVars); } else { Action.Enter(CGF); CGF.EmitStmt( @@ -3353,6 +3599,8 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_firstprivate: case OMPC_lastprivate: case OMPC_reduction: + case OMPC_task_reduction: + case OMPC_in_reduction: case OMPC_safelen: case OMPC_simdlen: case OMPC_collapse: @@ -3596,6 +3844,28 @@ void CodeGenFunction::EmitOMPTargetTeamsDirective( emitCommonOMPTargetDirective(*this, S, CodeGen); } +void CodeGenFunction::EmitOMPTeamsDistributeDirective( + const OMPTeamsDistributeDirective &S) { + + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + void CodeGenFunction::EmitOMPCancellationPointDirective( const OMPCancellationPointDirective &S) { CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), @@ -3835,7 +4105,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF, CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen); + emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); emitPostUpdateForReductionClause( CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -3864,7 +4135,14 @@ void CodeGenFunction::EmitOMPTargetParallelDirective( void CodeGenFunction::EmitOMPTargetParallelForDirective( const OMPTargetParallelForDirective &S) { - // TODO: codegen for target parallel for. + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_target_parallel_for, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); } /// Emit a helper variable and return corresponding lvalue. @@ -4001,7 +4279,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, CodeGen); }; - EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + if (Data.Nogroup) + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + else { + CGM.getOpenMPRuntime().emitTaskgroupRegion( + *this, + [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + }, + S.getLocStart()); + } } void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp index 1907b8b402..67388ee208 100644 --- a/lib/CodeGen/CGVTables.cpp +++ b/lib/CodeGen/CGVTables.cpp @@ -14,11 +14,12 @@ #include "CGCXXABI.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/Frontend/CodeGenOptions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Format.h" #include "llvm/Transforms/Utils/Cloning.h" #include <algorithm> @@ -122,6 +123,33 @@ static RValue PerformReturnAdjustment(CodeGenFunction &CGF, return RValue::get(ReturnValue); } +/// This function clones a function's DISubprogram node and enters it into +/// a value map with the intent that the map can be utilized by the cloner +/// to short-circuit Metadata node mapping. +/// Furthermore, the function resolves any DILocalVariable nodes referenced +/// by dbg.value intrinsics so they can be properly mapped during cloning. +static void resolveTopLevelMetadata(llvm::Function *Fn, + llvm::ValueToValueMapTy &VMap) { + // Clone the DISubprogram node and put it into the Value map. + auto *DIS = Fn->getSubprogram(); + if (!DIS) + return; + auto *NewDIS = DIS->replaceWithDistinct(DIS->clone()); + VMap.MD()[DIS].reset(NewDIS); + + // Find all llvm.dbg.declare intrinsics and resolve the DILocalVariable nodes + // they are referencing. + for (auto &BB : Fn->getBasicBlockList()) { + for (auto &I : BB) { + if (auto *DII = dyn_cast<llvm::DbgInfoIntrinsic>(&I)) { + auto *DILocal = DII->getVariable(); + if (!DILocal->isResolved()) + DILocal->resolve(); + } + } + } +} + // This function does roughly the same thing as GenerateThunk, but in a // very different way, so that va_start and va_end work correctly. // FIXME: This function assumes "this" is the first non-sret LLVM argument of @@ -154,6 +182,10 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, // Clone to thunk. llvm::ValueToValueMapTy VMap; + + // We are cloning a function while some Metadata nodes are still unresolved. + // Ensure that the value mapper does not encounter any of them. + resolveTopLevelMetadata(BaseFn, VMap); llvm::Function *NewFn = llvm::CloneFunction(BaseFn, VMap); Fn->replaceAllUsesWith(NewFn); NewFn->takeName(Fn); @@ -379,12 +411,9 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD, // Apply the standard set of call attributes. unsigned CallingConv; - CodeGen::AttributeListType AttributeList; - CGM.ConstructAttributeList(CalleePtr->getName(), - *CurFnInfo, MD, AttributeList, + llvm::AttributeList Attrs; + CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, MD, Attrs, CallingConv, /*AttrOnCallSite=*/true); - llvm::AttributeSet Attrs = - llvm::AttributeSet::get(getLLVMContext(), AttributeList); Call->setAttributes(Attrs); Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); @@ -904,6 +933,8 @@ void CodeGenModule::EmitDeferredVTables() { for (const CXXRecordDecl *RD : DeferredVTables) if (shouldEmitVTableAtEndOfTranslationUnit(*this, RD)) VTables.GenerateClassData(RD); + else if (shouldOpportunisticallyEmitVTables()) + OpportunisticVTables.push_back(RD); assert(savedSize == DeferredVTables.size() && "deferred extra vtables during vtable emission?"); diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index 53a376df64..7d07ea4516 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -20,6 +20,7 @@ #include "llvm/IR/Value.h" #include "llvm/IR/Type.h" #include "Address.h" +#include "CodeGenTBAA.h" namespace llvm { class Constant; @@ -146,6 +147,20 @@ static inline AlignmentSource getFieldAlignmentSource(AlignmentSource Source) { return AlignmentSource::Decl; } +class LValueBaseInfo { + AlignmentSource AlignSource; + +public: + explicit LValueBaseInfo(AlignmentSource Source = AlignmentSource::Type) + : AlignSource(Source) {} + AlignmentSource getAlignmentSource() const { return AlignSource; } + void setAlignmentSource(AlignmentSource Source) { AlignSource = Source; } + + void mergeForCast(const LValueBaseInfo &Info) { + setAlignmentSource(Info.getAlignmentSource()); + } +}; + /// LValue - This represents an lvalue references. Because C/C++ allow /// bitfields, this is not a simple LLVM pointer, it may be a pointer plus a /// bitrange. @@ -200,7 +215,8 @@ class LValue { // to make the default bitfield pattern all-zeroes. bool ImpreciseLifetime : 1; - unsigned AlignSource : 2; + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; // This flag shows if a nontemporal load/stores should be used when accessing // this lvalue. @@ -208,18 +224,9 @@ class LValue { Expr *BaseIvarExp; - /// Used by struct-path-aware TBAA. - QualType TBAABaseType; - /// Offset relative to the base type. - uint64_t TBAAOffset; - - /// TBAAInfo - TBAA information to attach to dereferences of this LValue. - llvm::MDNode *TBAAInfo; - private: - void Initialize(QualType Type, Qualifiers Quals, - CharUnits Alignment, AlignmentSource AlignSource, - llvm::MDNode *TBAAInfo = nullptr) { + void Initialize(QualType Type, Qualifiers Quals, CharUnits Alignment, + LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo) { assert((!Alignment.isZero() || Type->isIncompleteType()) && "initializing l-value with zero alignment!"); this->Type = Type; @@ -227,7 +234,8 @@ private: this->Alignment = Alignment.getQuantity(); assert(this->Alignment == Alignment.getQuantity() && "Alignment exceeds allowed max!"); - this->AlignSource = unsigned(AlignSource); + this->BaseInfo = BaseInfo; + this->TBAAInfo = TBAAInfo; // Initialize Objective-C flags. this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false; @@ -235,11 +243,6 @@ private: this->Nontemporal = false; this->ThreadLocalRef = false; this->BaseIvarExp = nullptr; - - // Initialize fields for TBAA. - this->TBAABaseType = Type; - this->TBAAOffset = 0; - this->TBAAInfo = TBAAInfo; } public: @@ -299,29 +302,19 @@ public: Expr *getBaseIvarExp() const { return BaseIvarExp; } void setBaseIvarExp(Expr *V) { BaseIvarExp = V; } - QualType getTBAABaseType() const { return TBAABaseType; } - void setTBAABaseType(QualType T) { TBAABaseType = T; } - - uint64_t getTBAAOffset() const { return TBAAOffset; } - void setTBAAOffset(uint64_t O) { TBAAOffset = O; } - - llvm::MDNode *getTBAAInfo() const { return TBAAInfo; } - void setTBAAInfo(llvm::MDNode *N) { TBAAInfo = N; } + TBAAAccessInfo getTBAAInfo() const { return TBAAInfo; } + void setTBAAInfo(TBAAAccessInfo Info) { TBAAInfo = Info; } const Qualifiers &getQuals() const { return Quals; } Qualifiers &getQuals() { return Quals; } - unsigned getAddressSpace() const { return Quals.getAddressSpace(); } + LangAS getAddressSpace() const { return Quals.getAddressSpace(); } CharUnits getAlignment() const { return CharUnits::fromQuantity(Alignment); } void setAlignment(CharUnits A) { Alignment = A.getQuantity(); } - AlignmentSource getAlignmentSource() const { - return AlignmentSource(AlignSource); - } - void setAlignmentSource(AlignmentSource Source) { - AlignSource = unsigned(Source); - } + LValueBaseInfo getBaseInfo() const { return BaseInfo; } + void setBaseInfo(LValueBaseInfo Info) { BaseInfo = Info; } // simple lvalue llvm::Value *getPointer() const { @@ -368,10 +361,8 @@ public: // global register lvalue llvm::Value *getGlobalReg() const { assert(isGlobalReg()); return V; } - static LValue MakeAddr(Address address, QualType type, - ASTContext &Context, - AlignmentSource alignSource, - llvm::MDNode *TBAAInfo = nullptr) { + static LValue MakeAddr(Address address, QualType type, ASTContext &Context, + LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo) { Qualifiers qs = type.getQualifiers(); qs.setObjCGCAttr(Context.getObjCGCAttrKind(type)); @@ -379,29 +370,31 @@ public: R.LVType = Simple; assert(address.getPointer()->getType()->isPointerTy()); R.V = address.getPointer(); - R.Initialize(type, qs, address.getAlignment(), alignSource, TBAAInfo); + R.Initialize(type, qs, address.getAlignment(), BaseInfo, TBAAInfo); return R; } static LValue MakeVectorElt(Address vecAddress, llvm::Value *Idx, - QualType type, AlignmentSource alignSource) { + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { LValue R; R.LVType = VectorElt; R.V = vecAddress.getPointer(); R.VectorIdx = Idx; R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(), - alignSource); + BaseInfo, TBAAInfo); return R; } static LValue MakeExtVectorElt(Address vecAddress, llvm::Constant *Elts, - QualType type, AlignmentSource alignSource) { + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { LValue R; R.LVType = ExtVectorElt; R.V = vecAddress.getPointer(); R.VectorElts = Elts; R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(), - alignSource); + BaseInfo, TBAAInfo); return R; } @@ -411,15 +404,15 @@ public: /// bit-field refers to. /// \param Info - The information describing how to perform the bit-field /// access. - static LValue MakeBitfield(Address Addr, - const CGBitFieldInfo &Info, - QualType type, - AlignmentSource alignSource) { + static LValue MakeBitfield(Address Addr, const CGBitFieldInfo &Info, + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { LValue R; R.LVType = BitField; R.V = Addr.getPointer(); R.BitFieldInfo = &Info; - R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), alignSource); + R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), BaseInfo, + TBAAInfo); return R; } @@ -428,7 +421,7 @@ public: R.LVType = GlobalReg; R.V = Reg.getPointer(); R.Initialize(type, type.getQualifiers(), Reg.getAlignment(), - AlignmentSource::Decl); + LValueBaseInfo(AlignmentSource::Decl), TBAAAccessInfo()); return R; } diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp index 166f44f816..c152291b15 100644 --- a/lib/CodeGen/CodeGenABITypes.cpp +++ b/lib/CodeGen/CodeGenABITypes.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/CodeGenABITypes.h" +#include "CGRecordLayout.h" #include "CodeGenModule.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/CodeGenOptions.h" @@ -64,3 +65,25 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM, returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes, info, {}, args); } + +llvm::FunctionType * +CodeGen::convertFreeFunctionType(CodeGenModule &CGM, const FunctionDecl *FD) { + assert(FD != nullptr && "Expected a non-null function declaration!"); + llvm::Type *T = CGM.getTypes().ConvertFunctionType(FD->getType(), FD); + + if (auto FT = dyn_cast<llvm::FunctionType>(T)) + return FT; + + return nullptr; +} + +llvm::Type * +CodeGen::convertTypeForMemory(CodeGenModule &CGM, QualType T) { + return CGM.getTypes().ConvertTypeForMem(T); +} + +unsigned CodeGen::getLLVMFieldNumber(CodeGenModule &CGM, + const RecordDecl *RD, + const FieldDecl *FD) { + return CGM.getTypes().getCGRecordLayout(RD).getLLVMFieldNo(FD); +} diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp index b864069dc6..6ca69d63cd 100644 --- a/lib/CodeGen/CodeGenAction.cpp +++ b/lib/CodeGen/CodeGenAction.cpp @@ -46,6 +46,38 @@ using namespace clang; using namespace llvm; namespace clang { + class BackendConsumer; + class ClangDiagnosticHandler final : public DiagnosticHandler { + public: + ClangDiagnosticHandler(const CodeGenOptions &CGOpts, BackendConsumer *BCon) + : CodeGenOpts(CGOpts), BackendCon(BCon) {} + + bool handleDiagnostics(const DiagnosticInfo &DI) override; + + bool isAnalysisRemarkEnabled(StringRef PassName) const override { + return (CodeGenOpts.OptimizationRemarkAnalysisPattern && + CodeGenOpts.OptimizationRemarkAnalysisPattern->match(PassName)); + } + bool isMissedOptRemarkEnabled(StringRef PassName) const override { + return (CodeGenOpts.OptimizationRemarkMissedPattern && + CodeGenOpts.OptimizationRemarkMissedPattern->match(PassName)); + } + bool isPassedOptRemarkEnabled(StringRef PassName) const override { + return (CodeGenOpts.OptimizationRemarkPattern && + CodeGenOpts.OptimizationRemarkPattern->match(PassName)); + } + + bool isAnyRemarkEnabled() const override { + return (CodeGenOpts.OptimizationRemarkAnalysisPattern || + CodeGenOpts.OptimizationRemarkMissedPattern || + CodeGenOpts.OptimizationRemarkPattern); + } + + private: + const CodeGenOptions &CodeGenOpts; + BackendConsumer *BackendCon; + }; + class BackendConsumer : public ASTConsumer { using LinkModule = CodeGenAction::LinkModule; @@ -224,18 +256,20 @@ namespace clang { void *OldContext = Ctx.getInlineAsmDiagnosticContext(); Ctx.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, this); - LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler = + std::unique_ptr<DiagnosticHandler> OldDiagnosticHandler = Ctx.getDiagnosticHandler(); - void *OldDiagnosticContext = Ctx.getDiagnosticContext(); - Ctx.setDiagnosticHandler(DiagnosticHandler, this); - Ctx.setDiagnosticHotnessRequested(CodeGenOpts.DiagnosticsWithHotness); - - std::unique_ptr<llvm::tool_output_file> OptRecordFile; + Ctx.setDiagnosticHandler(llvm::make_unique<ClangDiagnosticHandler>( + CodeGenOpts, this)); + Ctx.setDiagnosticsHotnessRequested(CodeGenOpts.DiagnosticsWithHotness); + if (CodeGenOpts.DiagnosticsHotnessThreshold != 0) + Ctx.setDiagnosticsHotnessThreshold( + CodeGenOpts.DiagnosticsHotnessThreshold); + + std::unique_ptr<llvm::ToolOutputFile> OptRecordFile; if (!CodeGenOpts.OptRecordFile.empty()) { std::error_code EC; - OptRecordFile = - llvm::make_unique<llvm::tool_output_file>(CodeGenOpts.OptRecordFile, - EC, sys::fs::F_None); + OptRecordFile = llvm::make_unique<llvm::ToolOutputFile>( + CodeGenOpts.OptRecordFile, EC, sys::fs::F_None); if (EC) { Diags.Report(diag::err_cannot_open_file) << CodeGenOpts.OptRecordFile << EC.message(); @@ -246,7 +280,7 @@ namespace clang { llvm::make_unique<yaml::Output>(OptRecordFile->os())); if (CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone) - Ctx.setDiagnosticHotnessRequested(true); + Ctx.setDiagnosticsHotnessRequested(true); } // Link each LinkModule into our module. @@ -261,7 +295,7 @@ namespace clang { Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext); - Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext); + Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler)); if (OptRecordFile) OptRecordFile->keep(); @@ -296,11 +330,6 @@ namespace clang { ((BackendConsumer*)Context)->InlineAsmDiagHandler2(SM, Loc); } - static void DiagnosticHandler(const llvm::DiagnosticInfo &DI, - void *Context) { - ((BackendConsumer *)Context)->DiagnosticHandlerImpl(DI); - } - /// Get the best possible source location to represent a diagnostic that /// may have associated debug info. const FullSourceLoc @@ -340,6 +369,11 @@ namespace clang { void BackendConsumer::anchor() {} } +bool ClangDiagnosticHandler::handleDiagnostics(const DiagnosticInfo &DI) { + BackendCon->DiagnosticHandlerImpl(DI); + return true; +} + /// ConvertBackendLocation - Convert a location in a temporary llvm::SourceMgr /// buffer to be a valid FullSourceLoc. static FullSourceLoc ConvertBackendLocation(const llvm::SMDiagnostic &D, @@ -399,6 +433,8 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D, case llvm::SourceMgr::DK_Note: DiagID = diag::note_fe_inline_asm; break; + case llvm::SourceMgr::DK_Remark: + llvm_unreachable("remarks unexpected"); } // If this problem has clang-level source location information, report the // issue in the source with a note showing the instantiated @@ -548,9 +584,9 @@ void BackendConsumer::UnsupportedDiagHandler( StringRef Filename; unsigned Line, Column; - bool BadDebugInfo; - FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, - Line, Column); + bool BadDebugInfo = false; + FullSourceLoc Loc = + getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); Diags.Report(Loc, diag::err_fe_backend_unsupported) << D.getMessage().str(); @@ -572,8 +608,8 @@ void BackendConsumer::EmitOptimizationMessage( StringRef Filename; unsigned Line, Column; bool BadDebugInfo = false; - FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, - Line, Column); + FullSourceLoc Loc = + getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); std::string Msg; raw_string_ostream MsgStream(Msg); @@ -597,6 +633,10 @@ void BackendConsumer::EmitOptimizationMessage( void BackendConsumer::OptimizationRemarkHandler( const llvm::DiagnosticInfoOptimizationBase &D) { + // Without hotness information, don't show noisy remarks. + if (D.isVerbose() && !D.getHotness()) + return; + if (D.isPassed()) { // Optimization remarks are active only if the -Rpass flag has a regular // expression that matches the name of the pass name in \p D. @@ -881,6 +921,8 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM, case llvm::SourceMgr::DK_Note: DiagID = diag::note_fe_inline_asm; break; + case llvm::SourceMgr::DK_Remark: + llvm_unreachable("remarks unexpected"); } Diags->Report(DiagID).AddString("cannot compile inline asm"); @@ -944,7 +986,7 @@ std::unique_ptr<llvm::Module> CodeGenAction::loadModule(MemoryBufferRef MBRef) { void CodeGenAction::ExecuteAction() { // If this is an IR file, we have to treat it specially. - if (getCurrentFileKind() == IK_LLVM_IR) { + if (getCurrentFileKind().getLanguage() == InputKind::LLVM_IR) { BackendAction BA = static_cast<BackendAction>(Act); CompilerInstance &CI = getCompilerInstance(); std::unique_ptr<raw_pwrite_stream> OS = diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index 11660b4f41..234d1a2849 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -22,6 +22,7 @@ #include "CodeGenPGO.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/ASTLambda.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/StmtCXX.h" @@ -45,15 +46,15 @@ static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts, if (CGOpts.DisableLifetimeMarkers) return false; - // Asan uses markers for use-after-scope checks. - if (CGOpts.SanitizeAddressUseAfterScope) - return true; - // Disable lifetime markers in msan builds. // FIXME: Remove this when msan works with lifetime markers. if (LangOpts.Sanitize.has(SanitizerKind::Memory)) return false; + // Asan uses markers for use-after-scope checks. + if (CGOpts.SanitizeAddressUseAfterScope) + return true; + // For now, only in optimized builds. return CGOpts.OptimizationLevel != 0; } @@ -117,25 +118,32 @@ CodeGenFunction::~CodeGenFunction() { } CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T, - AlignmentSource *Source) { - return getNaturalTypeAlignment(T->getPointeeType(), Source, - /*forPointee*/ true); + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { + return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, TBAAInfo, + /* forPointeeType= */ true); } CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T, - AlignmentSource *Source, + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo, bool forPointeeType) { + if (TBAAInfo) + *TBAAInfo = CGM.getTBAAAccessInfo(T); + // Honor alignment typedef attributes even on incomplete types. // We also honor them straight for C++ class types, even as pointees; // there's an expressivity gap here. if (auto TT = T->getAs<TypedefType>()) { if (auto Align = TT->getDecl()->getMaxAlignment()) { - if (Source) *Source = AlignmentSource::AttributedType; + if (BaseInfo) + *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType); return getContext().toCharUnitsFromBits(Align); } } - if (Source) *Source = AlignmentSource::Type; + if (BaseInfo) + *BaseInfo = LValueBaseInfo(AlignmentSource::Type); CharUnits Alignment; if (T->isIncompleteType()) { @@ -165,19 +173,22 @@ CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T, } LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { - AlignmentSource AlignSource; - CharUnits Alignment = getNaturalTypeAlignment(T, &AlignSource); - return LValue::MakeAddr(Address(V, Alignment), T, getContext(), AlignSource, - CGM.getTBAAInfo(T)); + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + CharUnits Alignment = getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo); + return LValue::MakeAddr(Address(V, Alignment), T, getContext(), BaseInfo, + TBAAInfo); } /// Given a value of type T* that may not be to a complete object, /// construct an l-value with the natural pointee alignment of T. LValue CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) { - AlignmentSource AlignSource; - CharUnits Align = getNaturalTypeAlignment(T, &AlignSource, /*pointee*/ true); - return MakeAddrLValue(Address(V, Align), T, AlignSource); + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + CharUnits Align = getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo, + /* forPointeeType= */ true); + return MakeAddrLValue(Address(V, Align), T, BaseInfo, TBAAInfo); } @@ -346,7 +357,7 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // Emit debug descriptor for function end. if (CGDebugInfo *DI = getDebugInfo()) - DI->EmitFunctionEnd(Builder); + DI->EmitFunctionEnd(Builder, CurFn); // Reset the debug location to that of the simple 'return' expression, if any // rather than that of the end of the function's scope '}'. @@ -426,6 +437,43 @@ bool CodeGenFunction::ShouldXRayInstrumentFunction() const { return CGM.getCodeGenOpts().XRayInstrumentFunctions; } +llvm::Constant * +CodeGenFunction::EncodeAddrForUseInPrologue(llvm::Function *F, + llvm::Constant *Addr) { + // Addresses stored in prologue data can't require run-time fixups and must + // be PC-relative. Run-time fixups are undesirable because they necessitate + // writable text segments, which are unsafe. And absolute addresses are + // undesirable because they break PIE mode. + + // Add a layer of indirection through a private global. Taking its address + // won't result in a run-time fixup, even if Addr has linkonce_odr linkage. + auto *GV = new llvm::GlobalVariable(CGM.getModule(), Addr->getType(), + /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Addr); + + // Create a PC-relative address. + auto *GOTAsInt = llvm::ConstantExpr::getPtrToInt(GV, IntPtrTy); + auto *FuncAsInt = llvm::ConstantExpr::getPtrToInt(F, IntPtrTy); + auto *PCRelAsInt = llvm::ConstantExpr::getSub(GOTAsInt, FuncAsInt); + return (IntPtrTy == Int32Ty) + ? PCRelAsInt + : llvm::ConstantExpr::getTrunc(PCRelAsInt, Int32Ty); +} + +llvm::Value * +CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F, + llvm::Value *EncodedAddr) { + // Reconstruct the address of the global. + auto *PCRelAsInt = Builder.CreateSExt(EncodedAddr, IntPtrTy); + auto *FuncAsInt = Builder.CreatePtrToInt(F, IntPtrTy, "func_addr.int"); + auto *GOTAsInt = Builder.CreateAdd(PCRelAsInt, FuncAsInt, "global_addr.int"); + auto *GOTAddr = Builder.CreateIntToPtr(GOTAsInt, Int8PtrPtrTy, "global_addr"); + + // Load the original pointer through the global. + return Builder.CreateLoad(Address(GOTAddr, getPointerAlign()), + "decoded_addr"); +} + /// EmitFunctionInstrumentation - Emit LLVM code to call the specified /// instrumentation function with the current function and the call site, if /// function instrumentation is enabled. @@ -477,8 +525,8 @@ static void removeImageAccessQualifier(std::string& TyName) { // for example in clGetKernelArgInfo() implementation between the address // spaces with targets without unique mapping to the OpenCL address spaces // (basically all single AS CPUs). -static unsigned ArgInfoAddressSpace(unsigned LangAS) { - switch (LangAS) { +static unsigned ArgInfoAddressSpace(LangAS AS) { + switch (AS) { case LangAS::opencl_global: return 1; case LangAS::opencl_constant: return 2; case LangAS::opencl_local: return 3; @@ -610,11 +658,6 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, argBaseTypeNames.push_back(llvm::MDString::get(Context, baseTypeName)); - // Get argument type qualifiers: - if (ty.isConstQualified()) - typeQuals = "const"; - if (ty.isVolatileQualified()) - typeQuals += typeQuals.empty() ? "volatile" : " volatile"; if (isPipe) typeQuals = "pipe"; } @@ -623,7 +666,10 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, // Get image and pipe access qualifier: if (ty->isImageType()|| ty->isPipeType()) { - const OpenCLAccessAttr *A = parm->getAttr<OpenCLAccessAttr>(); + const Decl *PDecl = parm; + if (auto *TD = dyn_cast<TypedefType>(ty)) + PDecl = TD->getDecl(); + const OpenCLAccessAttr *A = PDecl->getAttr<OpenCLAccessAttr>(); if (A && A->isWriteOnly()) accessQuals.push_back(llvm::MDString::get(Context, "write_only")); else if (A && A->isReadWrite()) @@ -663,34 +709,42 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext()); if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) { - QualType hintQTy = A->getTypeHint(); - const ExtVectorType *hintEltQTy = hintQTy->getAs<ExtVectorType>(); - bool isSignedInteger = - hintQTy->isSignedIntegerType() || - (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType()); - llvm::Metadata *attrMDArgs[] = { + QualType HintQTy = A->getTypeHint(); + const ExtVectorType *HintEltQTy = HintQTy->getAs<ExtVectorType>(); + bool IsSignedInteger = + HintQTy->isSignedIntegerType() || + (HintEltQTy && HintEltQTy->getElementType()->isSignedIntegerType()); + llvm::Metadata *AttrMDArgs[] = { llvm::ConstantAsMetadata::get(llvm::UndefValue::get( CGM.getTypes().ConvertType(A->getTypeHint()))), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::IntegerType::get(Context, 32), - llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))}; - Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs)); + llvm::APInt(32, (uint64_t)(IsSignedInteger ? 1 : 0))))}; + Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, AttrMDArgs)); } if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) { - llvm::Metadata *attrMDArgs[] = { + llvm::Metadata *AttrMDArgs[] = { llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; - Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs)); + Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs)); } if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) { - llvm::Metadata *attrMDArgs[] = { + llvm::Metadata *AttrMDArgs[] = { llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; - Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs)); + Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs)); + } + + if (const OpenCLIntelReqdSubGroupSizeAttr *A = + FD->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) { + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Builder.getInt32(A->getSubGroupSize()))}; + Fn->setMetadata("intel_reqd_sub_group_size", + llvm::MDNode::get(Context, AttrMDArgs)); } } @@ -715,6 +769,35 @@ static void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) { Fn->removeFnAttr(llvm::Attribute::SanitizeThread); } +static bool matchesStlAllocatorFn(const Decl *D, const ASTContext &Ctx) { + auto *MD = dyn_cast_or_null<CXXMethodDecl>(D); + if (!MD || !MD->getDeclName().getAsIdentifierInfo() || + !MD->getDeclName().getAsIdentifierInfo()->isStr("allocate") || + (MD->getNumParams() != 1 && MD->getNumParams() != 2)) + return false; + + if (MD->parameters()[0]->getType().getCanonicalType() != Ctx.getSizeType()) + return false; + + if (MD->getNumParams() == 2) { + auto *PT = MD->parameters()[1]->getType()->getAs<PointerType>(); + if (!PT || !PT->isVoidPointerType() || + !PT->getPointeeType().isConstQualified()) + return false; + } + + return true; +} + +/// Return the UBSan prologue signature for \p FD if one is available. +static llvm::Constant *getPrologueSignature(CodeGenModule &CGM, + const FunctionDecl *FD) { + if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) + if (!MD->isStatic()) + return nullptr; + return CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM); +} + void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, @@ -738,8 +821,19 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, CurFnInfo = &FnInfo; assert(CurFn->isDeclaration() && "Function already has body?"); - if (CGM.isInSanitizerBlacklist(Fn, Loc)) - SanOpts.clear(); + // If this function has been blacklisted for any of the enabled sanitizers, + // disable the sanitizer for the function. + do { +#define SANITIZER(NAME, ID) \ + if (SanOpts.empty()) \ + break; \ + if (SanOpts.has(SanitizerKind::ID)) \ + if (CGM.isInSanitizerBlacklist(SanitizerKind::ID, Fn, Loc)) \ + SanOpts.set(SanitizerKind::ID, false); + +#include "clang/Basic/Sanitizers.def" +#undef SANITIZER + } while (0); if (D) { // Apply the no_sanitize* attributes to SanOpts. @@ -774,6 +868,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, } } + // Ignore unrelated casts in STL allocate() since the allocator must cast + // from void* to T* before object initialization completes. Don't match on the + // namespace because not all allocators are in std:: + if (D && SanOpts.has(SanitizerKind::CFIUnrelatedCast)) { + if (matchesStlAllocatorFn(D, getContext())) + SanOpts.Mask &= ~SanitizerKind::CFIUnrelatedCast; + } + // Apply xray attributes to the function (as a string, for now) if (D && ShouldXRayInstrumentFunction()) { if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) { @@ -786,9 +888,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, llvm::utostr(LogArgs->getArgumentCount())); } } else { - Fn->addFnAttr( - "xray-instruction-threshold", - llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); + if (!CGM.imbueXRayAttrs(Fn, Loc)) + Fn->addFnAttr( + "xray-instruction-threshold", + llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); } } @@ -800,6 +903,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Fn->addFnAttr("no-jump-tables", llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables)); + // Add profile-sample-accurate value. + if (CGM.getCodeGenOpts().ProfileSampleAccurate) + Fn->addFnAttr("profile-sample-accurate"); + if (getLangOpts().OpenCL) { // Add metadata for a kernel function. if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) @@ -810,11 +917,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // prologue data. if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) { if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (llvm::Constant *PrologueSig = - CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) { + if (llvm::Constant *PrologueSig = getPrologueSignature(CGM, FD)) { llvm::Constant *FTRTTIConst = CGM.GetAddrOfRTTIDescriptor(FD->getType(), /*ForEH=*/true); - llvm::Constant *PrologueStructElems[] = { PrologueSig, FTRTTIConst }; + llvm::Constant *FTRTTIConstEncoded = + EncodeAddrForUseInPrologue(Fn, FTRTTIConst); + llvm::Constant *PrologueStructElems[] = {PrologueSig, + FTRTTIConstEncoded}; llvm::Constant *PrologueStructConst = llvm::ConstantStruct::getAnon(PrologueStructElems, /*Packed=*/true); Fn->setPrologueData(PrologueStructConst); @@ -854,6 +963,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Builder.SetInsertPoint(EntryBB); + // If we're checking the return value, allocate space for a pointer to a + // precise source location of the checked return statement. + if (requiresReturnValueCheck()) { + ReturnLocation = CreateDefaultAlignTempAlloca(Int8PtrTy, "return.sloc.ptr"); + InitTempAlloca(ReturnLocation, llvm::ConstantPointerNull::get(Int8PtrTy)); + } + // Emit subprogram debug descriptor. if (CGDebugInfo *DI = getDebugInfo()) { // Reconstruct the type from the argument list so that implicit parameters, @@ -881,8 +997,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (CGM.getCodeGenOpts().InstrumentForProfiling) { if (CGM.getCodeGenOpts().CallFEntry) Fn->addFnAttr("fentry-call", "true"); - else - Fn->addFnAttr("counting-function", getTarget().getMCountName()); + else { + if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>()) + Fn->addFnAttr("counting-function", getTarget().getMCountName()); + } } if (RetTy->isVoidType()) { @@ -967,13 +1085,25 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, CXXThisValue = CXXABIThisValue; } - // Null-check the 'this' pointer once per function, if it's available. - if (CXXThisValue) { + // Check the 'this' pointer once per function, if it's available. + if (CXXABIThisValue) { SanitizerSet SkippedChecks; - SkippedChecks.set(SanitizerKind::Alignment, true); SkippedChecks.set(SanitizerKind::ObjectSize, true); - EmitTypeCheck(TCK_Load, Loc, CXXThisValue, MD->getThisType(getContext()), - /*Alignment=*/CharUnits::Zero(), SkippedChecks); + QualType ThisTy = MD->getThisType(getContext()); + + // If this is the call operator of a lambda with no capture-default, it + // may have a static invoker function, which may call this operator with + // a null 'this' pointer. + if (isLambdaCallOperator(MD) && + cast<CXXRecordDecl>(MD->getParent())->getLambdaCaptureDefault() == + LCD_None) + SkippedChecks.set(SanitizerKind::Null, true); + + EmitTypeCheck(isa<CXXConstructorDecl>(MD) ? TCK_ConstructorCall + : TCK_MemberCall, + Loc, CXXABIThisValue, ThisTy, + getContext().getTypeAlignInChars(ThisTy->getPointeeType()), + SkippedChecks); } } @@ -1076,10 +1206,9 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD, if (!Param->hasAttr<PassObjectSizeAttr>()) continue; - IdentifierInfo *NoID = nullptr; auto *Implicit = ImplicitParamDecl::Create( - getContext(), Param->getDeclContext(), Param->getLocation(), NoID, - getContext().getSizeType()); + getContext(), Param->getDeclContext(), Param->getLocation(), + /*Id=*/nullptr, getContext().getSizeType(), ImplicitParamDecl::Other); SizeArguments[Param] = Implicit; Args.push_back(Implicit); } @@ -1158,16 +1287,11 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, !getLangOpts().CUDAIsDevice && FD->hasAttr<CUDAGlobalAttr>()) CGM.getCUDARuntime().emitDeviceStub(*this, Args); - else if (isa<CXXConversionDecl>(FD) && - cast<CXXConversionDecl>(FD)->isLambdaToBlockPointerConversion()) { - // The lambda conversion to block pointer is special; the semantics can't be - // expressed in the AST, so IRGen needs to special-case it. - EmitLambdaToBlockPointerBody(Args); - } else if (isa<CXXMethodDecl>(FD) && - cast<CXXMethodDecl>(FD)->isLambdaStaticInvoker()) { + else if (isa<CXXMethodDecl>(FD) && + cast<CXXMethodDecl>(FD)->isLambdaStaticInvoker()) { // The lambda static invoker function is special, because it forwards or // clones the body of the function call operator (but is actually static). - EmitLambdaStaticInvokeFunction(cast<CXXMethodDecl>(FD)); + EmitLambdaStaticInvokeBody(cast<CXXMethodDecl>(FD)); } else if (FD->isDefaulted() && isa<CXXMethodDecl>(FD) && (cast<CXXMethodDecl>(FD)->isCopyAssignmentOperator() || cast<CXXMethodDecl>(FD)->isMoveAssignmentOperator())) { diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 2db8a18cef..e1034c129d 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -76,6 +76,10 @@ class ObjCAtThrowStmt; class ObjCAtSynchronizedStmt; class ObjCAutoreleasePoolStmt; +namespace analyze_os_log { +class OSLogBufferLayout; +} + namespace CodeGen { class CodeGenTypes; class CGCallee; @@ -111,13 +115,17 @@ enum TypeEvaluationKind { SANITIZER_CHECK(DynamicTypeCacheMiss, dynamic_type_cache_miss, 0) \ SANITIZER_CHECK(FloatCastOverflow, float_cast_overflow, 0) \ SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 0) \ + SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0) \ SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0) \ SANITIZER_CHECK(MissingReturn, missing_return, 0) \ SANITIZER_CHECK(MulOverflow, mul_overflow, 0) \ SANITIZER_CHECK(NegateOverflow, negate_overflow, 0) \ + SANITIZER_CHECK(NullabilityArg, nullability_arg, 0) \ + SANITIZER_CHECK(NullabilityReturn, nullability_return, 1) \ SANITIZER_CHECK(NonnullArg, nonnull_arg, 0) \ - SANITIZER_CHECK(NonnullReturn, nonnull_return, 0) \ + SANITIZER_CHECK(NonnullReturn, nonnull_return, 1) \ SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0) \ + SANITIZER_CHECK(PointerOverflow, pointer_overflow, 0) \ SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \ SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \ SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \ @@ -173,6 +181,25 @@ public: // because of jumps. VarBypassDetector Bypasses; + // CodeGen lambda for loops and support for ordered clause + typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &, + JumpDest)> + CodeGenLoopTy; + typedef llvm::function_ref<void(CodeGenFunction &, SourceLocation, + const unsigned, const bool)> + CodeGenOrderedTy; + + // Codegen lambda for loop bounds in worksharing loop constructs + typedef llvm::function_ref<std::pair<LValue, LValue>( + CodeGenFunction &, const OMPExecutableDirective &S)> + CodeGenLoopBoundsTy; + + // Codegen lambda for loop bounds in dispatch-based loop implementation + typedef llvm::function_ref<std::pair<llvm::Value *, llvm::Value *>( + CodeGenFunction &, const OMPExecutableDirective &S, Address LB, + Address UB)> + CodeGenDispatchBoundsTy; + /// \brief CGBuilder insert helper. This function is called after an /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, @@ -240,9 +267,9 @@ public: if (I->capturesThis()) CXXThisFieldDecl = *Field; else if (I->capturesVariable()) - CaptureFields[I->getCapturedVar()] = *Field; + CaptureFields[I->getCapturedVar()->getCanonicalDecl()] = *Field; else if (I->capturesVariableByCopy()) - CaptureFields[I->getCapturedVar()] = *Field; + CaptureFields[I->getCapturedVar()->getCanonicalDecl()] = *Field; } } @@ -256,7 +283,7 @@ public: /// \brief Lookup the captured field decl for a variable. virtual const FieldDecl *lookup(const VarDecl *VD) const { - return CaptureFields.lookup(VD); + return CaptureFields.lookup(VD->getCanonicalDecl()); } bool isCXXThisExprCaptured() const { return getThisFieldDecl() != nullptr; } @@ -686,6 +713,7 @@ public: llvm::function_ref<Address()> PrivateGen) { assert(PerformCleanup && "adding private to dead scope"); + LocalVD = LocalVD->getCanonicalDecl(); // Only save it once. if (SavedLocals.count(LocalVD)) return false; @@ -738,6 +766,7 @@ public: /// Checks if the global variable is captured in current function. bool isGlobalVarCaptured(const VarDecl *VD) const { + VD = VD->getCanonicalDecl(); return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0; } @@ -1094,7 +1123,7 @@ private: auto IP = CGF.Builder.saveAndClearIP(); CGF.EmitBlock(Stack.back().ExitBlock.getBlock()); CodeGen(CGF); - CGF.EmitBranchThroughCleanup(Stack.back().ContBlock); + CGF.EmitBranch(Stack.back().ContBlock.getBlock()); CGF.Builder.restoreIP(IP); Stack.back().HasBeenEmitted = true; } @@ -1385,6 +1414,17 @@ private: return RetValNullabilityPrecondition; } + /// Used to store precise source locations for return statements by the + /// runtime return value checks. + Address ReturnLocation = Address::invalid(); + + /// Check if the return value of this function requires sanitization. + bool requiresReturnValueCheck() const { + return requiresReturnValueNullabilityCheck() || + (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) && + CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>()); + } + llvm::BasicBlock *TerminateLandingPad; llvm::BasicBlock *TerminateHandler; llvm::BasicBlock *TrapBB; @@ -1392,16 +1432,8 @@ private: /// True if we need emit the life-time markers. const bool ShouldEmitLifetimeMarkers; - /// Add a kernel metadata node to the named metadata node 'opencl.kernels'. - /// In the kernel metadata node, reference the kernel function and metadata - /// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2): - /// - A node for the vec_type_hint(<type>) qualifier contains string - /// "vec_type_hint", an undefined value of the <type> data type, - /// and a Boolean that is true if the <type> is integer and signed. - /// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string - /// "work_group_size_hint", and three 32-bit integers X, Y and Z. - /// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string - /// "reqd_work_group_size", and three 32-bit integers X, Y and Z. + /// Add OpenCL kernel arg metadata and the kernel attribute meatadata to + /// the function metadata. void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); @@ -1454,6 +1486,9 @@ public: const TargetInfo &getTarget() const { return Target; } llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); } + const TargetCodeGenInfo &getTargetHooks() const { + return CGM.getTargetCodeGenInfo(); + } //===--------------------------------------------------------------------===// // Cleanups @@ -1549,13 +1584,21 @@ public: // Block Bits //===--------------------------------------------------------------------===// - llvm::Value *EmitBlockLiteral(const BlockExpr *); + /// Emit block literal. + /// \return an LLVM value which is a pointer to a struct which contains + /// information about the block, including the block invoke function, the + /// captured variables, etc. + /// \param InvokeF will contain the block invoke function if it is not + /// nullptr. + llvm::Value *EmitBlockLiteral(const BlockExpr *, + llvm::Function **InvokeF = nullptr); static void destroyBlockInfos(CGBlockInfo *info); llvm::Function *GenerateBlockFunction(GlobalDecl GD, const CGBlockInfo &Info, const DeclMapTy &ldm, - bool IsLambdaConversionToBlock); + bool IsLambdaConversionToBlock, + bool BuildGlobalBlock); llvm::Constant *GenerateCopyHelperFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo); @@ -1614,10 +1657,9 @@ public: void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator, CallArgList &CallArgs); - void EmitLambdaToBlockPointerBody(FunctionArgList &Args); void EmitLambdaBlockInvokeBody(); void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD); - void EmitLambdaStaticInvokeFunction(const CXXMethodDecl *MD); + void EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD); void EmitAsanPrologueOrEpilogue(bool Prologue); /// \brief Emit the unified return block, trying to avoid its emission when @@ -1724,11 +1766,6 @@ public: llvm::Value *EmitVTableTypeCheckedLoad(const CXXRecordDecl *RD, llvm::Value *VTable, uint64_t VTableByteOffset); - /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given - /// expr can be devirtualized. - bool CanDevirtualizeMemberFunctionCall(const Expr *Base, - const CXXMethodDecl *MD); - /// EnterDtorCleanups - Enter the cleanups necessary to complete the /// given phase of destruction for a destructor. The end result /// should call destructors on members and base classes in reverse @@ -1751,6 +1788,15 @@ public: /// EmitMCountInstrumentation - Emit call to .mcount. void EmitMCountInstrumentation(); + /// Encode an address into a form suitable for use in a function prologue. + llvm::Constant *EncodeAddrForUseInPrologue(llvm::Function *F, + llvm::Constant *Addr); + + /// Decode an address used in a function prologue, encoded by \c + /// EncodeAddrForUseInPrologue. + llvm::Value *DecodeAddrUsedInPrologue(llvm::Value *F, + llvm::Value *EncodedAddr); + /// EmitFunctionProlog - Emit the target specific LLVM code to load the /// arguments for the given function. This is also responsible for naming the /// LLVM function arguments. @@ -1764,7 +1810,7 @@ public: SourceLocation EndLoc); /// Emit a test that checks if the return value \p RV is nonnull. - void EmitReturnValueCheck(llvm::Value *RV, SourceLocation EndLoc); + void EmitReturnValueCheck(llvm::Value *RV); /// EmitStartEHSpec - Emit the start of the exception spec. void EmitStartEHSpec(const Decl *D); @@ -1793,8 +1839,7 @@ public: /// TypeOfSelfObject - Return type of object that this self represents. QualType TypeOfSelfObject(); - /// hasAggregateLLVMType - Return true if the specified AST type will map into - /// an aggregate LLVM type or is void. + /// getEvaluationKind - Return the TypeEvaluationKind of QualType \c T. static TypeEvaluationKind getEvaluationKind(QualType T); static bool hasScalarEvaluationKind(QualType T) { @@ -1873,40 +1918,85 @@ public: //===--------------------------------------------------------------------===// LValue MakeAddrLValue(Address Addr, QualType T, - AlignmentSource AlignSource = AlignmentSource::Type) { - return LValue::MakeAddr(Addr, T, getContext(), AlignSource, - CGM.getTBAAInfo(T)); + AlignmentSource Source = AlignmentSource::Type) { + return LValue::MakeAddr(Addr, T, getContext(), LValueBaseInfo(Source), + CGM.getTBAAAccessInfo(T)); + } + + LValue MakeAddrLValue(Address Addr, QualType T, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { + return LValue::MakeAddr(Addr, T, getContext(), BaseInfo, TBAAInfo); } LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, - AlignmentSource AlignSource = AlignmentSource::Type) { + AlignmentSource Source = AlignmentSource::Type) { return LValue::MakeAddr(Address(V, Alignment), T, getContext(), - AlignSource, CGM.getTBAAInfo(T)); + LValueBaseInfo(Source), CGM.getTBAAAccessInfo(T)); + } + + LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, + LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo) { + return LValue::MakeAddr(Address(V, Alignment), T, getContext(), + BaseInfo, TBAAInfo); } LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T); LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T); CharUnits getNaturalTypeAlignment(QualType T, - AlignmentSource *Source = nullptr, + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr, bool forPointeeType = false); CharUnits getNaturalPointeeTypeAlignment(QualType T, - AlignmentSource *Source = nullptr); - - Address EmitLoadOfReference(Address Ref, const ReferenceType *RefTy, - AlignmentSource *Source = nullptr); - LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); + + Address EmitLoadOfReference(LValue RefLVal, + LValueBaseInfo *PointeeBaseInfo = nullptr, + TBAAAccessInfo *PointeeTBAAInfo = nullptr); + LValue EmitLoadOfReferenceLValue(LValue RefLVal); + LValue EmitLoadOfReferenceLValue(Address RefAddr, QualType RefTy, + AlignmentSource Source = + AlignmentSource::Type) { + LValue RefLVal = MakeAddrLValue(RefAddr, RefTy, LValueBaseInfo(Source), + CGM.getTBAAAccessInfo(RefTy)); + return EmitLoadOfReferenceLValue(RefLVal); + } Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, - AlignmentSource *Source = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy); - /// CreateTempAlloca - This creates a alloca and inserts it into the entry - /// block. The caller is responsible for setting an appropriate alignment on + /// CreateTempAlloca - This creates an alloca and inserts it into the entry + /// block if \p ArraySize is nullptr, otherwise inserts it at the current + /// insertion point of the builder. The caller is responsible for setting an + /// appropriate alignment on /// the alloca. - llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, - const Twine &Name = "tmp"); + /// + /// \p ArraySize is the number of array elements to be allocated if it + /// is not nullptr. + /// + /// LangAS::Default is the address space of pointers to local variables and + /// temporaries, as exposed in the source language. In certain + /// configurations, this is not the same as the alloca address space, and a + /// cast is needed to lift the pointer from the alloca AS into + /// LangAS::Default. This can happen when the target uses a restricted + /// address space for the stack but the source language requires + /// LangAS::Default to be a generic address space. The latter condition is + /// common for most programming languages; OpenCL is an exception in that + /// LangAS::Default is the private address space, which naturally maps + /// to the stack. + /// + /// Because the address of a temporary is often exposed to the program in + /// various ways, this function will perform the cast by default. The cast + /// may be avoided by passing false as \p CastToDefaultAddrSpace; this is + /// more efficient if the caller knows that the address will not be exposed. + llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp", + llvm::Value *ArraySize = nullptr); Address CreateTempAlloca(llvm::Type *Ty, CharUnits align, - const Twine &Name = "tmp"); + const Twine &Name = "tmp", + llvm::Value *ArraySize = nullptr, + bool CastToDefaultAddrSpace = true); /// CreateDefaultAlignedTempAlloca - This creates an alloca with the /// default ABI alignment of the given LLVM type. @@ -1941,9 +2031,12 @@ public: Address CreateIRTemp(QualType T, const Twine &Name = "tmp"); /// CreateMemTemp - Create a temporary memory object of the given type, with - /// appropriate alignment. - Address CreateMemTemp(QualType T, const Twine &Name = "tmp"); - Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp"); + /// appropriate alignment. Cast it to the default address space if + /// \p CastToDefaultAddrSpace is true. + Address CreateMemTemp(QualType T, const Twine &Name = "tmp", + bool CastToDefaultAddrSpace = true); + Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp", + bool CastToDefaultAddrSpace = true); /// CreateAggTemp - Create a temporary memory object for the given /// aggregate type. @@ -1984,7 +2077,7 @@ public: /// pointer to a char. Address EmitMSVAListRef(const Expr *E); - /// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will + /// EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will /// always be accessible even if no aggregate location is provided. RValue EmitAnyExprToTemp(const Expr *E); @@ -2075,9 +2168,8 @@ public: llvm::BlockAddress *GetAddrOfLabel(const LabelDecl *L); llvm::BasicBlock *GetIndirectGotoBlock(); - /// Check if \p E is a reference, or a C++ "this" pointer wrapped in value- - /// preserving casts. - static bool IsDeclRefOrWrappedCXXThis(const Expr *E); + /// Check if \p E is a C++ "this" pointer wrapped in value-preserving casts. + static bool IsWrappedCXXThis(const Expr *E); /// EmitNullInitialization - Generate code to set a value of the given type to /// null, If the type contains data member pointers, they will be initialized @@ -2290,9 +2382,17 @@ public: TCK_Upcast, /// Checking the operand of a cast to a virtual base object. Must be an /// object within its lifetime. - TCK_UpcastToVirtualBase + TCK_UpcastToVirtualBase, + /// Checking the value assigned to a _Nonnull pointer. Must not be null. + TCK_NonnullAssign }; + /// Determine whether the pointer type check \p TCK permits null pointers. + static bool isNullPointerAllowed(TypeCheckKind TCK); + + /// Determine whether the pointer type check \p TCK requires a vptr check. + static bool isVptrCheckRequired(TypeCheckKind TCK, QualType Ty); + /// \brief Whether any type-checking sanitizers are enabled. If \c false, /// calls to EmitTypeCheck can be skipped. bool sanitizePerformTypeCheck() const; @@ -2461,6 +2561,12 @@ public: PeepholeProtection protectFromPeepholes(RValue rvalue); void unprotectFromPeepholes(PeepholeProtection protection); + void EmitAlignmentAssumption(llvm::Value *PtrValue, llvm::Value *Alignment, + llvm::Value *OffsetValue = nullptr) { + Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment, + OffsetValue); + } + //===--------------------------------------------------------------------===// // Statement Emission //===--------------------------------------------------------------------===// @@ -2474,7 +2580,7 @@ public: /// This function may clear the current insertion point; callers should use /// EnsureInsertPoint if they wish to subsequently generate code without first /// calling EmitBlock, EmitBranch, or EmitStmt. - void EmitStmt(const Stmt *S); + void EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs = None); /// EmitSimpleStmt - Try to emit a "simple" statement which does not /// necessarily require an insertion point or debug information; typically @@ -2524,6 +2630,14 @@ public: void EmitCoroutineBody(const CoroutineBodyStmt &S); void EmitCoreturnStmt(const CoreturnStmt &S); + RValue EmitCoawaitExpr(const CoawaitExpr &E, + AggValueSlot aggSlot = AggValueSlot::ignored(), + bool ignoreResult = false); + LValue EmitCoawaitLValue(const CoawaitExpr *E); + RValue EmitCoyieldExpr(const CoyieldExpr &E, + AggValueSlot aggSlot = AggValueSlot::ignored(), + bool ignoreResult = false); + LValue EmitCoyieldLValue(const CoyieldExpr *E); RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID); void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false); @@ -2695,7 +2809,9 @@ public: /// and initializes them with the values according to OpenMP standard. /// /// \param D Directive (possibly) with the 'linear' clause. - void EmitOMPLinearClauseInit(const OMPLoopDirective &D); + /// \return true if at least one linear variable is found that should be + /// initialized with the value of the original variable, false otherwise. + bool EmitOMPLinearClauseInit(const OMPLoopDirective &D); typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/, llvm::Value * /*OutlinedFn*/, @@ -2741,7 +2857,6 @@ public: void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); - void EmitOMPDistributeLoop(const OMPDistributeDirective &S); void EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S); void EmitOMPDistributeParallelForSimdDirective( @@ -2798,32 +2913,84 @@ public: void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S, OMPPrivateScope &LoopScope); + /// Helper for the OpenMP loop directives. + void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); + + /// \brief Emit code for the worksharing loop-based directive. + /// \return true, if this construct has any lastprivate clause, false - + /// otherwise. + bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB, + const CodeGenLoopBoundsTy &CodeGenLoopBounds, + const CodeGenDispatchBoundsTy &CGDispatchBounds); + + /// Emits the lvalue for the expression with possibly captured variable. + LValue EmitOMPSharedLValue(const Expr *E); + private: - /// Helpers for blocks - llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); + /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not + /// nullptr. It should be called without \p InvokeF if the caller does not + /// need invoke function to be returned. + llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info, + llvm::Function **InvokeF = nullptr); /// Helpers for the OpenMP loop directives. - void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); void EmitOMPSimdFinal( const OMPLoopDirective &D, const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen); - /// \brief Emit code for the worksharing loop-based directive. - /// \return true, if this construct has any lastprivate clause, false - - /// otherwise. - bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); - void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered, - const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); + + void EmitOMPDistributeLoop(const OMPLoopDirective &S, + const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr); + + /// struct with the values to be passed to the OpenMP loop-related functions + struct OMPLoopArguments { + /// loop lower bound + Address LB = Address::invalid(); + /// loop upper bound + Address UB = Address::invalid(); + /// loop stride + Address ST = Address::invalid(); + /// isLastIteration argument for runtime functions + Address IL = Address::invalid(); + /// Chunk value generated by sema + llvm::Value *Chunk = nullptr; + /// EnsureUpperBound + Expr *EUB = nullptr; + /// IncrementExpression + Expr *IncExpr = nullptr; + /// Loop initialization + Expr *Init = nullptr; + /// Loop exit condition + Expr *Cond = nullptr; + /// Update of LB after a whole chunk has been executed + Expr *NextLB = nullptr; + /// Update of UB after a whole chunk has been executed + Expr *NextUB = nullptr; + OMPLoopArguments() = default; + OMPLoopArguments(Address LB, Address UB, Address ST, Address IL, + llvm::Value *Chunk = nullptr, Expr *EUB = nullptr, + Expr *IncExpr = nullptr, Expr *Init = nullptr, + Expr *Cond = nullptr, Expr *NextLB = nullptr, + Expr *NextUB = nullptr) + : LB(LB), UB(UB), ST(ST), IL(IL), Chunk(Chunk), EUB(EUB), + IncExpr(IncExpr), Init(Init), Cond(Cond), NextLB(NextLB), + NextUB(NextUB) {} + }; + void EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, + const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoop, + const CodeGenOrderedTy &CodeGenOrdered); void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, bool Ordered, Address LB, - Address UB, Address ST, Address IL, - llvm::Value *Chunk); - void EmitOMPDistributeOuterLoop( - OpenMPDistScheduleClauseKind ScheduleKind, - const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); + OMPPrivateScope &LoopScope, bool Ordered, + const OMPLoopArguments &LoopArgs, + const CodeGenDispatchBoundsTy &CGDispatchBounds); + void EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind, + const OMPLoopDirective &S, + OMPPrivateScope &LoopScope, + const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoopContent); /// \brief Emit code for sections directive. void EmitSections(const OMPExecutableDirective &S); @@ -2921,11 +3088,15 @@ public: /// the LLVM value representation. llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, - AlignmentSource AlignSource = - AlignmentSource::Type, - llvm::MDNode *TBAAInfo = nullptr, - QualType TBAABaseTy = QualType(), - uint64_t TBAAOffset = 0, + AlignmentSource Source = AlignmentSource::Type, + bool isNontemporal = false) { + return EmitLoadOfScalar(Addr, Volatile, Ty, Loc, LValueBaseInfo(Source), + CGM.getTBAAAccessInfo(Ty), isNontemporal); + } + + llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, + SourceLocation Loc, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo, bool isNontemporal = false); /// EmitLoadOfScalar - Load a scalar value from an address, taking @@ -2939,10 +3110,16 @@ public: /// the LLVM value representation. void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, - AlignmentSource AlignSource = AlignmentSource::Type, - llvm::MDNode *TBAAInfo = nullptr, bool isInit = false, - QualType TBAABaseTy = QualType(), - uint64_t TBAAOffset = 0, bool isNontemporal = false); + AlignmentSource Source = AlignmentSource::Type, + bool isInit = false, bool isNontemporal = false) { + EmitStoreOfScalar(Value, Addr, Volatile, Ty, LValueBaseInfo(Source), + CGM.getTBAAAccessInfo(Ty), isInit, isNontemporal); + } + + void EmitStoreOfScalar(llvm::Value *Value, Address Addr, + bool Volatile, QualType Ty, + LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo, + bool isInit = false, bool isNontemporal = false); /// EmitStoreOfScalar - Store a scalar value to an address, taking /// care to appropriately convert from the memory representation to @@ -3012,7 +3189,8 @@ public: RValue EmitRValueForField(LValue LV, const FieldDecl *FD, SourceLocation Loc); Address EmitArrayToPointerDecay(const Expr *Array, - AlignmentSource *AlignSource = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); class ConstantEmission { llvm::PointerIntPair<llvm::Constant*, 1, bool> ValueAndIsReference; @@ -3045,6 +3223,7 @@ public: }; ConstantEmission tryEmitAsConstant(DeclRefExpr *refExpr); + ConstantEmission tryEmitAsConstant(const MemberExpr *ME); RValue EmitPseudoObjectRValue(const PseudoObjectExpr *e, AggValueSlot slot = AggValueSlot::ignored()); @@ -3153,7 +3332,8 @@ public: Address EmitCXXMemberDataPointerAddress(const Expr *E, Address base, llvm::Value *memberPtr, const MemberPointerType *memberPtrType, - AlignmentSource *AlignSource = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, ReturnValueSlot ReturnValue); @@ -3172,6 +3352,13 @@ public: unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); + /// Emit IR for __builtin_os_log_format. + RValue emitBuiltinOSLogFormat(const CallExpr &E); + + llvm::Function *generateBuiltinOSLogHelperFunction( + const analyze_os_log::OSLogBufferLayout &Layout, + CharUnits BufferAlignment); + RValue EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue); /// EmitTargetBuiltinExpr - Emit the given builtin call. Returns 0 if the call @@ -3290,6 +3477,7 @@ public: static Destroyer destroyARCStrongImprecise; static Destroyer destroyARCStrongPrecise; static Destroyer destroyARCWeak; + static Destroyer emitARCIntrinsicUse; void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr); llvm::Value *EmitObjCAutoreleasePoolPush(); @@ -3383,6 +3571,14 @@ public: void EmitCXXGuardedInit(const VarDecl &D, llvm::GlobalVariable *DeclPtr, bool PerformInit); + enum class GuardKind { VariableGuard, TlsGuard }; + + /// Emit a branch to select whether or not to perform guarded initialization. + void EmitCXXGuardedInitBranch(llvm::Value *NeedsInit, + llvm::BasicBlock *InitBlock, + llvm::BasicBlock *NoInitBlock, + GuardKind Kind, const VarDecl *D); + /// GenerateCXXGlobalInitFunc - Generates code for initializing global /// variables. void GenerateCXXGlobalInitFunc(llvm::Function *Fn, @@ -3391,9 +3587,10 @@ public: /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global /// variables. - void GenerateCXXGlobalDtorsFunc(llvm::Function *Fn, - const std::vector<std::pair<llvm::WeakVH, - llvm::Constant*> > &DtorsAndObjects); + void GenerateCXXGlobalDtorsFunc( + llvm::Function *Fn, + const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> + &DtorsAndObjects); void GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, const VarDecl *D, @@ -3475,6 +3672,33 @@ public: /// nonnull, if \p LHS is marked _Nonnull. void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc); + /// An enumeration which makes it easier to specify whether or not an + /// operation is a subtraction. + enum { NotSubtraction = false, IsSubtraction = true }; + + /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to + /// detect undefined behavior when the pointer overflow sanitizer is enabled. + /// \p SignedIndices indicates whether any of the GEP indices are signed. + /// \p IsSubtraction indicates whether the expression used to form the GEP + /// is a subtraction. + llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr, + ArrayRef<llvm::Value *> IdxList, + bool SignedIndices, + bool IsSubtraction, + SourceLocation Loc, + const Twine &Name = ""); + + /// Specifies which type of sanitizer check to apply when handling a + /// particular builtin. + enum BuiltinCheckKind { + BCK_CTZPassedZero, + BCK_CLZPassedZero, + }; + + /// Emits an argument for a call to a builtin. If the builtin sanitizer is + /// enabled, a runtime check specified by \p Kind is also emitted. + llvm::Value *EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind); + /// \brief Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. llvm::Constant *EmitCheckTypeDescriptor(QualType T); @@ -3508,6 +3732,9 @@ public: /// "trap-func-name" if specified. llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID); + /// \brief Emit a stub for the cross-DSO CFI check function. + void EmitCfiCheckStub(); + /// \brief Emit a cross-DSO CFI failure handling function. void EmitCfiCheckFail(); @@ -3667,39 +3894,32 @@ public: unsigned ParamsToSkip = 0, EvaluationOrder Order = EvaluationOrder::Default); - /// EmitPointerWithAlignment - Given an expression with a pointer - /// type, emit the value and compute our best estimate of the - /// alignment of the pointee. + /// EmitPointerWithAlignment - Given an expression with a pointer type, + /// emit the value and compute our best estimate of the alignment of the + /// pointee. /// - /// Note that this function will conservatively fall back on the type - /// when it doesn't + /// \param BaseInfo - If non-null, this will be initialized with + /// information about the source of the alignment and the may-alias + /// attribute. Note that this function will conservatively fall back on + /// the type when it doesn't recognize the expression and may-alias will + /// be set to false. /// - /// \param Source - If non-null, this will be initialized with - /// information about the source of the alignment. Note that this - /// function will conservatively fall back on the type when it - /// doesn't recognize the expression, which means that sometimes - /// - /// a worst-case One - /// reasonable way to use this information is when there's a - /// language guarantee that the pointer must be aligned to some - /// stricter value, and we're simply trying to ensure that - /// sufficiently obvious uses of under-aligned objects don't get - /// miscompiled; for example, a placement new into the address of - /// a local variable. In such a case, it's quite reasonable to - /// just ignore the returned alignment when it isn't from an - /// explicit source. + /// One reasonable way to use this information is when there's a language + /// guarantee that the pointer must be aligned to some stricter value, and + /// we're simply trying to ensure that sufficiently obvious uses of under- + /// aligned objects don't get miscompiled; for example, a placement new + /// into the address of a local variable. In such a case, it's quite + /// reasonable to just ignore the returned alignment when it isn't from an + /// explicit source. Address EmitPointerWithAlignment(const Expr *Addr, - AlignmentSource *Source = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK); private: QualType getVarArgType(const Expr *Arg); - const TargetCodeGenInfo &getTargetHooks() const { - return CGM.getTargetCodeGenInfo(); - } - void EmitDeclMetadata(); BlockByrefHelpers *buildByrefHelpers(llvm::StructType &byrefType, @@ -3708,6 +3928,11 @@ private: void AddObjCARCExceptionMetadata(llvm::Instruction *Inst); llvm::Value *GetValueForARMHint(unsigned BuiltinID); + llvm::Value *EmitX86CpuIs(const CallExpr *E); + llvm::Value *EmitX86CpuIs(StringRef CPUStr); + llvm::Value *EmitX86CpuSupports(const CallExpr *E); + llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs); + llvm::Value *EmitX86CpuInit(); }; /// Helper class with most of the code for saving a value for a diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index a3920e1add..b2a18a03f2 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -23,7 +23,7 @@ #include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" -#include "CodeGenTBAA.h" +#include "ConstantEmitter.h" #include "CoverageMappingGen.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" @@ -45,6 +45,7 @@ #include "clang/Frontend/CodeGenOptions.h" #include "clang/Sema/SemaDiagnostic.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -59,6 +60,11 @@ using namespace clang; using namespace CodeGen; +static llvm::cl::opt<bool> LimitedCoverage( + "limited-coverage-experimental", llvm::cl::ZeroOrMore, + llvm::cl::desc("Emit limited coverage mapping information (experimental)"), + llvm::cl::init(false)); + static const char AnnotationSection[] = "llvm.metadata"; static CGCXXABI *createCXXABI(CodeGenModule &CGM) { @@ -111,6 +117,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, C.getTargetInfo().getMaxPointerWidth()); Int8PtrTy = Int8Ty->getPointerTo(0); Int8PtrPtrTy = Int8PtrTy->getPointerTo(0); + AllocaInt8PtrTy = Int8Ty->getPointerTo( + M.getDataLayout().getAllocaAddrSpace()); + ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace(); RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); BuiltinCC = getTargetCodeGenInfo().getABIInfo().getBuiltinCC(); @@ -367,13 +376,18 @@ void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags, if (MainFile.empty()) MainFile = "<stdin>"; Diags.Report(diag::warn_profile_data_unprofiled) << MainFile; - } else - Diags.Report(diag::warn_profile_data_out_of_date) << Visited << Missing - << Mismatched; + } else { + if (Mismatched > 0) + Diags.Report(diag::warn_profile_data_out_of_date) << Visited << Mismatched; + + if (Missing > 0) + Diags.Report(diag::warn_profile_data_missing) << Visited << Missing; + } } void CodeGenModule::Release() { EmitDeferred(); + EmitVTablesOpportunistically(); applyGlobalValReplacements(); applyReplacements(); checkAliases(); @@ -392,8 +406,11 @@ void CodeGenModule::Release() { } if (OpenMPRuntime) if (llvm::Function *OpenMPRegistrationFunction = - OpenMPRuntime->emitRegistrationFunction()) - AddGlobalCtor(OpenMPRegistrationFunction, 0); + OpenMPRuntime->emitRegistrationFunction()) { + auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? + OpenMPRegistrationFunction : nullptr; + AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey); + } if (PGOReader) { getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext)); if (PGOStats.hasDiagnostics()) @@ -406,8 +423,11 @@ void CodeGenModule::Release() { EmitDeferredUnusedCoverageMappings(); if (CoverageMapping) CoverageMapping->emit(); - if (CodeGenOpts.SanitizeCfiCrossDso) + if (CodeGenOpts.SanitizeCfiCrossDso) { CodeGenFunction(*this).EmitCfiCheckFail(); + CodeGenFunction(*this).EmitCfiCheckStub(); + } + emitAtAvailableLinkGuard(); emitLLVMUsed(); if (SanStats) SanStats->finish(); @@ -416,6 +436,12 @@ void CodeGenModule::Release() { (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) { EmitModuleLinkOptions(); } + + // Record mregparm value now so it is visible through rest of codegen. + if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) + getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters", + CodeGenOpts.NumRegisterParameters); + if (CodeGenOpts.DwarfVersion) { // We actually want the latest version when there are conflicts. // We can change from Warning to Latest if such mode is supported. @@ -450,17 +476,17 @@ void CodeGenModule::Release() { llvm::DEBUG_METADATA_VERSION); // We need to record the widths of enums and wchar_t, so that we can generate - // the correct build attributes in the ARM backend. + // the correct build attributes in the ARM backend. wchar_size is also used by + // TargetLibraryInfo. + uint64_t WCharWidth = + Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity(); + getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth); + llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch(); if ( Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb) { - // Width of wchar_t in bytes - uint64_t WCharWidth = - Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity(); - getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth); - // The minimum width of an enum in bytes uint64_t EnumWidth = Context.getLangOpts().ShortEnums ? 1 : 4; getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth); @@ -479,6 +505,26 @@ void CodeGenModule::Release() { LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0); } + // Emit OpenCL specific module metadata: OpenCL/SPIR version. + if (LangOpts.OpenCL) { + EmitOpenCLMetadata(); + // Emit SPIR version. + if (getTriple().getArch() == llvm::Triple::spir || + getTriple().getArch() == llvm::Triple::spir64) { + // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the + // opencl.spir.version named metadata. + llvm::Metadata *SPIRVerElts[] = { + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, LangOpts.OpenCLVersion / 100)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, (LangOpts.OpenCLVersion / 100 > 1) ? 0 : 2))}; + llvm::NamedMDNode *SPIRVerMD = + TheModule.getOrInsertNamedMetadata("opencl.spir.version"); + llvm::LLVMContext &Ctx = TheModule.getContext(); + SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); + } + } + if (uint32_t PLevel = Context.getLangOpts().PICLevel) { assert(PLevel < 3 && "Invalid PIC Level"); getModule().setPICLevel(static_cast<llvm::PICLevel::Level>(PLevel)); @@ -502,6 +548,20 @@ void CodeGenModule::Release() { EmitTargetMetadata(); } +void CodeGenModule::EmitOpenCLMetadata() { + // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the + // opencl.ocl.version named metadata node. + llvm::Metadata *OCLVerElts[] = { + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, LangOpts.OpenCLVersion / 100)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, (LangOpts.OpenCLVersion % 100) / 10))}; + llvm::NamedMDNode *OCLVerMD = + TheModule.getOrInsertNamedMetadata("opencl.ocl.version"); + llvm::LLVMContext &Ctx = TheModule.getContext(); + OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); +} + void CodeGenModule::UpdateCompletedType(const TagDecl *TD) { // Make sure that this type is translated. Types.UpdateCompletedType(TD); @@ -512,16 +572,20 @@ void CodeGenModule::RefreshTypeCacheForClass(const CXXRecordDecl *RD) { Types.RefreshTypeCacheForClass(RD); } -llvm::MDNode *CodeGenModule::getTBAAInfo(QualType QTy) { +llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) { if (!TBAA) return nullptr; - return TBAA->getTBAAInfo(QTy); + return TBAA->getTypeInfo(QTy); } -llvm::MDNode *CodeGenModule::getTBAAInfoForVTablePtr() { +TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) { + return TBAAAccessInfo(getTBAATypeInfo(AccessType)); +} + +TBAAAccessInfo CodeGenModule::getTBAAVTablePtrAccessInfo() { if (!TBAA) - return nullptr; - return TBAA->getTBAAInfoForVTablePtr(); + return TBAAAccessInfo(); + return TBAA->getVTablePtrAccessInfo(); } llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) { @@ -530,36 +594,43 @@ llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) { return TBAA->getTBAAStructInfo(QTy); } -llvm::MDNode *CodeGenModule::getTBAAStructTagInfo(QualType BaseTy, - llvm::MDNode *AccessN, - uint64_t O) { +llvm::MDNode *CodeGenModule::getTBAABaseTypeInfo(QualType QTy) { if (!TBAA) return nullptr; - return TBAA->getTBAAStructTagInfo(BaseTy, AccessN, O); + return TBAA->getBaseTypeInfo(QTy); +} + +llvm::MDNode *CodeGenModule::getTBAAAccessTagInfo(TBAAAccessInfo Info) { + if (!TBAA) + return nullptr; + return TBAA->getAccessTagInfo(Info); +} + +TBAAAccessInfo CodeGenModule::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo) { + if (!TBAA) + return TBAAAccessInfo(); + return TBAA->mergeTBAAInfoForCast(SourceInfo, TargetInfo); +} + +TBAAAccessInfo +CodeGenModule::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, + TBAAAccessInfo InfoB) { + if (!TBAA) + return TBAAAccessInfo(); + return TBAA->mergeTBAAInfoForConditionalOperator(InfoA, InfoB); } -/// Decorate the instruction with a TBAA tag. For both scalar TBAA -/// and struct-path aware TBAA, the tag has the same format: -/// base type, access type and offset. -/// When ConvertTypeToTag is true, we create a tag based on the scalar type. void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst, - llvm::MDNode *TBAAInfo, - bool ConvertTypeToTag) { - if (ConvertTypeToTag && TBAA) - Inst->setMetadata(llvm::LLVMContext::MD_tbaa, - TBAA->getTBAAScalarTagInfo(TBAAInfo)); - else - Inst->setMetadata(llvm::LLVMContext::MD_tbaa, TBAAInfo); + TBAAAccessInfo TBAAInfo) { + if (llvm::MDNode *Tag = getTBAAAccessTagInfo(TBAAInfo)) + Inst->setMetadata(llvm::LLVMContext::MD_tbaa, Tag); } void CodeGenModule::DecorateInstructionWithInvariantGroup( llvm::Instruction *I, const CXXRecordDecl *RD) { - llvm::Metadata *MD = CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); - auto *MetaDataNode = dyn_cast<llvm::MDNode>(MD); - // Check if we have to wrap MDString in MDNode. - if (!MetaDataNode) - MetaDataNode = llvm::MDNode::get(getLLVMContext(), MD); - I->setMetadata(llvm::LLVMContext::MD_invariant_group, MetaDataNode); + I->setMetadata(llvm::LLVMContext::MD_invariant_group, + llvm::MDNode::get(getLLVMContext(), {})); } void CodeGenModule::Error(SourceLocation loc, StringRef message) { @@ -655,9 +726,9 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { } } - StringRef &FoundStr = MangledDeclNames[CanonicalGD]; - if (!FoundStr.empty()) - return FoundStr; + auto FoundName = MangledDeclNames.find(CanonicalGD); + if (FoundName != MangledDeclNames.end()) + return FoundName->second; const auto *ND = cast<NamedDecl>(GD.getDecl()); SmallString<256> Buffer; @@ -688,7 +759,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { // Keep the first result in the case of a mangling collision. auto Result = Manglings.insert(std::make_pair(Str, GD)); - return FoundStr = Result.first->first(); + return MangledDeclNames[CanonicalGD] = Result.first->first(); } StringRef CodeGenModule::getBlockMangledName(GlobalDecl GD, @@ -740,7 +811,7 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { // Get the type of a ctor entry, { i32, void ()*, i8* }. llvm::StructType *CtorStructTy = llvm::StructType::get( - Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy, nullptr); + Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy); // Construct the constructor and destructor arrays. ConstantInitBuilder builder(*this); @@ -830,10 +901,9 @@ void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D, const CGFunctionInfo &Info, llvm::Function *F) { unsigned CallingConv; - AttributeListType AttributeList; - ConstructAttributeList(F->getName(), Info, D, AttributeList, CallingConv, - false); - F->setAttributes(llvm::AttributeSet::get(getLLVMContext(), AttributeList)); + llvm::AttributeList PAL; + ConstructAttributeList(F->getName(), Info, D, PAL, CallingConv, false); + F->setAttributes(PAL); F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); } @@ -882,14 +952,20 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) B.addAttribute(llvm::Attribute::NoInline); - F->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get( - F->getContext(), - llvm::AttributeSet::FunctionIndex, B)); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); return; } - if (D->hasAttr<OptimizeNoneAttr>()) { + // Track whether we need to add the optnone LLVM attribute, + // starting with the default for this optimization level. + bool ShouldAddOptNone = + !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0; + // We can't add optnone in the following cases, it won't pass the verifier. + ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>(); + ShouldAddOptNone &= !F->hasFnAttribute(llvm::Attribute::AlwaysInline); + ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>(); + + if (ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) { B.addAttribute(llvm::Attribute::OptimizeNone); // OptimizeNone implies noinline; we should not be inlining such functions. @@ -943,7 +1019,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // function. if (!D->hasAttr<OptimizeNoneAttr>()) { if (D->hasAttr<ColdAttr>()) { - B.addAttribute(llvm::Attribute::OptimizeForSize); + if (!ShouldAddOptNone) + B.addAttribute(llvm::Attribute::OptimizeForSize); B.addAttribute(llvm::Attribute::Cold); } @@ -951,9 +1028,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute(llvm::Attribute::MinSize); } - F->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get( - F->getContext(), llvm::AttributeSet::FunctionIndex, B)); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) @@ -999,11 +1074,27 @@ void CodeGenModule::setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO) { SetCommonAttributes(D, GO); - if (D) + if (D) { + if (auto *GV = dyn_cast<llvm::GlobalVariable>(GO)) { + if (auto *SA = D->getAttr<PragmaClangBSSSectionAttr>()) + GV->addAttribute("bss-section", SA->getName()); + if (auto *SA = D->getAttr<PragmaClangDataSectionAttr>()) + GV->addAttribute("data-section", SA->getName()); + if (auto *SA = D->getAttr<PragmaClangRodataSectionAttr>()) + GV->addAttribute("rodata-section", SA->getName()); + } + + if (auto *F = dyn_cast<llvm::Function>(GO)) { + if (auto *SA = D->getAttr<PragmaClangTextSectionAttr>()) + if (!D->getAttr<SectionAttr>()) + F->addFnAttr("implicit-section-name", SA->getName()); + } + if (const SectionAttr *SA = D->getAttr<SectionAttr>()) GO->setSection(SA->getName()); + } - getTargetCodeGenInfo().setTargetAttributes(D, GO, *this); + getTargetCodeGenInfo().setTargetAttributes(D, GO, *this, ForDefinition); } void CodeGenModule::SetInternalFunctionAttributes(const Decl *D, @@ -1021,7 +1112,7 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV, const NamedDecl *ND) { // Set linkage and visibility in case we never see a definition. LinkageInfo LV = ND->getLinkageAndVisibility(); - if (LV.getLinkage() != ExternalLinkage) { + if (!isExternallyVisible(LV.getLinkage())) { // Don't set internal linkage on declarations. } else { if (ND->hasAttr<DLLImportAttr>()) { @@ -1029,7 +1120,6 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV, GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); } else if (ND->hasAttr<DLLExportAttr>()) { GV->setLinkage(llvm::GlobalValue::ExternalLinkage); - GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } else if (ND->hasAttr<WeakAttr>() || ND->isWeakImported()) { // "extern_weak" is overloaded in LLVM; we probably should have // separate linkage types for this. @@ -1062,6 +1152,7 @@ void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType()); F->addTypeMetadata(0, MD); + F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType())); // Emit a hash-based bit set entry for cross-DSO calls. if (CodeGenOpts.SanitizeCfiCrossDso) @@ -1071,7 +1162,9 @@ void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD, void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, bool IsIncompleteFunction, - bool IsThunk) { + bool IsThunk, + ForDefinition_t IsForDefinition) { + if (llvm::Intrinsic::ID IID = F->getIntrinsicID()) { // If this is an intrinsic function, set the function's attributes // to the intrinsic's attributes. @@ -1081,8 +1174,13 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, const auto *FD = cast<FunctionDecl>(GD.getDecl()); - if (!IsIncompleteFunction) + if (!IsIncompleteFunction) { SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F); + // Setup target-specific attributes. + if (!IsForDefinition) + getTargetCodeGenInfo().setTargetAttributes(FD, F, *this, + NotForDefinition); + } // Add the Returned attribute for "this", except for iOS 5 and earlier // where substantial code, including the libstdc++ dylib, was compiled with @@ -1101,13 +1199,17 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, setLinkageAndVisibilityForGV(F, FD); + if (FD->getAttr<PragmaClangTextSectionAttr>()) { + F->addFnAttr("implicit-section-name"); + } + if (const SectionAttr *SA = FD->getAttr<SectionAttr>()) F->setSection(SA->getName()); if (FD->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. - F->addAttribute(llvm::AttributeSet::FunctionIndex, + F->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoBuiltin); // A sane operator new returns a non-aliasing pointer. @@ -1116,7 +1218,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, auto Kind = FD->getDeclName().getCXXOverloadedOperator(); if (getCodeGenOpts().AssumeSaneOperatorNew && (Kind == OO_New || Kind == OO_Array_New)) - F->addAttribute(llvm::AttributeSet::ReturnIndex, + F->addAttribute(llvm::AttributeList::ReturnIndex, llvm::Attribute::NoAlias); } @@ -1145,7 +1247,7 @@ void CodeGenModule::addCompilerUsedGlobal(llvm::GlobalValue *GV) { } static void emitUsed(CodeGenModule &CGM, StringRef Name, - std::vector<llvm::WeakVH> &List) { + std::vector<llvm::WeakTrackingVH> &List) { // Don't create llvm.used if there is no need. if (List.empty()) return; @@ -1197,7 +1299,7 @@ void CodeGenModule::AddDependentLib(StringRef Lib) { /// \brief Add link options implied by the given module, including modules /// it depends on, using a postorder walk. static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, - SmallVectorImpl<llvm::Metadata *> &Metadata, + SmallVectorImpl<llvm::MDNode *> &Metadata, llvm::SmallPtrSet<Module *, 16> &Visited) { // Import this module's parent. if (Mod->Parent && Visited.insert(Mod->Parent).second) { @@ -1285,7 +1387,7 @@ void CodeGenModule::EmitModuleLinkOptions() { // Add link options for all of the imported modules in reverse topological // order. We don't do anything to try to order import link flags with respect // to linker options inserted by things like #pragma comment(). - SmallVector<llvm::Metadata *, 16> MetadataArgs; + SmallVector<llvm::MDNode *, 16> MetadataArgs; Visited.clear(); for (Module *M : LinkModules) if (Visited.insert(M).second) @@ -1294,9 +1396,9 @@ void CodeGenModule::EmitModuleLinkOptions() { LinkerOptionsMetadata.append(MetadataArgs.begin(), MetadataArgs.end()); // Add the linker options metadata flag. - getModule().addModuleFlag(llvm::Module::AppendUnique, "Linker Options", - llvm::MDNode::get(getLLVMContext(), - LinkerOptionsMetadata)); + auto *NMD = getModule().getOrInsertNamedMetadata("llvm.linker.options"); + for (auto *MD : LinkerOptionsMetadata) + NMD->addOperand(MD); } void CodeGenModule::EmitDeferred() { @@ -1319,13 +1421,10 @@ void CodeGenModule::EmitDeferred() { // Grab the list of decls to emit. If EmitGlobalDefinition schedules more // work, it will not interfere with this. - std::vector<DeferredGlobal> CurDeclsToEmit; + std::vector<GlobalDecl> CurDeclsToEmit; CurDeclsToEmit.swap(DeferredDeclsToEmit); - for (DeferredGlobal &G : CurDeclsToEmit) { - GlobalDecl D = G.GD; - G.GV = nullptr; - + for (GlobalDecl &D : CurDeclsToEmit) { // We should call GetAddrOfGlobal with IsForDefinition set to true in order // to get GlobalValue with exactly the type we need, not something that // might had been created for another decl with the same mangled name but @@ -1364,6 +1463,24 @@ void CodeGenModule::EmitDeferred() { } } +void CodeGenModule::EmitVTablesOpportunistically() { + // Try to emit external vtables as available_externally if they have emitted + // all inlined virtual functions. It runs after EmitDeferred() and therefore + // is not allowed to create new references to things that need to be emitted + // lazily. Note that it also uses fact that we eagerly emitting RTTI. + + assert((OpportunisticVTables.empty() || shouldOpportunisticallyEmitVTables()) + && "Only emit opportunistic vtables with optimizations"); + + for (const CXXRecordDecl *RD : OpportunisticVTables) { + assert(getVTables().isVTableExternal(RD) && + "This queue should only contain external vtables"); + if (getCXXABI().canSpeculativelyEmitVTable(RD)) + VTables.GenerateClassData(RD); + } + OpportunisticVTables.clear(); +} + void CodeGenModule::EmitGlobalAnnotations() { if (Annotations.empty()) return; @@ -1435,20 +1552,21 @@ void CodeGenModule::AddGlobalAnnotations(const ValueDecl *D, Annotations.push_back(EmitAnnotateAttr(GV, I, D->getLocation())); } -bool CodeGenModule::isInSanitizerBlacklist(llvm::Function *Fn, +bool CodeGenModule::isInSanitizerBlacklist(SanitizerMask Kind, + llvm::Function *Fn, SourceLocation Loc) const { const auto &SanitizerBL = getContext().getSanitizerBlacklist(); // Blacklist by function name. - if (SanitizerBL.isBlacklistedFunction(Fn->getName())) + if (SanitizerBL.isBlacklistedFunction(Kind, Fn->getName())) return true; // Blacklist by location. if (Loc.isValid()) - return SanitizerBL.isBlacklistedLocation(Loc); + return SanitizerBL.isBlacklistedLocation(Kind, Loc); // If location is unknown, this may be a compiler-generated function. Assume // it's located in the main file. auto &SM = Context.getSourceManager(); if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - return SanitizerBL.isBlacklistedFile(MainFile->getName()); + return SanitizerBL.isBlacklistedFile(Kind, MainFile->getName()); } return false; } @@ -1457,13 +1575,14 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV, SourceLocation Loc, QualType Ty, StringRef Category) const { // For now globals can be blacklisted only in ASan and KASan. - if (!LangOpts.Sanitize.hasOneOf( - SanitizerKind::Address | SanitizerKind::KernelAddress)) + const SanitizerMask EnabledAsanMask = LangOpts.Sanitize.Mask & + (SanitizerKind::Address | SanitizerKind::KernelAddress); + if (!EnabledAsanMask) return false; const auto &SanitizerBL = getContext().getSanitizerBlacklist(); - if (SanitizerBL.isBlacklistedGlobal(GV->getName(), Category)) + if (SanitizerBL.isBlacklistedGlobal(EnabledAsanMask, GV->getName(), Category)) return true; - if (SanitizerBL.isBlacklistedLocation(Loc, Category)) + if (SanitizerBL.isBlacklistedLocation(EnabledAsanMask, Loc, Category)) return true; // Check global type. if (!Ty.isNull()) { @@ -1475,13 +1594,41 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV, // We allow to blacklist only record types (classes, structs etc.) if (Ty->isRecordType()) { std::string TypeStr = Ty.getAsString(getContext().getPrintingPolicy()); - if (SanitizerBL.isBlacklistedType(TypeStr, Category)) + if (SanitizerBL.isBlacklistedType(EnabledAsanMask, TypeStr, Category)) return true; } } return false; } +bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, + StringRef Category) const { + if (!LangOpts.XRayInstrument) + return false; + const auto &XRayFilter = getContext().getXRayFilter(); + using ImbueAttr = XRayFunctionFilter::ImbueAttribute; + auto Attr = XRayFunctionFilter::ImbueAttribute::NONE; + if (Loc.isValid()) + Attr = XRayFilter.shouldImbueLocation(Loc, Category); + if (Attr == ImbueAttr::NONE) + Attr = XRayFilter.shouldImbueFunction(Fn->getName()); + switch (Attr) { + case ImbueAttr::NONE: + return false; + case ImbueAttr::ALWAYS: + Fn->addFnAttr("function-instrument", "xray-always"); + break; + case ImbueAttr::ALWAYS_ARG1: + Fn->addFnAttr("function-instrument", "xray-always"); + Fn->addFnAttr("xray-log-args", "1"); + break; + case ImbueAttr::NEVER: + Fn->addFnAttr("function-instrument", "xray-never"); + break; + } + return true; +} + bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { // Never defer when EmitAllDecls is specified. if (LangOpts.EmitAllDecls) @@ -1678,13 +1825,13 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } StringRef MangledName = getMangledName(GD); - if (llvm::GlobalValue *GV = GetGlobalValue(MangledName)) { + if (GetGlobalValue(MangledName) != nullptr) { // The value has already been used and should therefore be emitted. - addDeferredDeclToEmit(GV, GD); + addDeferredDeclToEmit(GD); } else if (MustBeEmitted(Global)) { // The value must be emitted, but cannot be emitted eagerly. assert(!MayBeEmittedEagerly(Global)); - addDeferredDeclToEmit(/*GV=*/nullptr, GD); + addDeferredDeclToEmit(GD); } else { // Otherwise, remember that we saw a deferred decl with this name. The // first use of the mangled name will cause it to move into @@ -1856,6 +2003,10 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { return !isTriviallyRecursive(F); } +bool CodeGenModule::shouldOpportunisticallyEmitVTables() { + return CodeGenOpts.OptimizationLevel > 0; +} + void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast<ValueDecl>(GD.getDecl()); @@ -1904,13 +2055,10 @@ static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the function when it is first created. -llvm::Constant * -CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, - llvm::Type *Ty, - GlobalDecl GD, bool ForVTable, - bool DontDefer, bool IsThunk, - llvm::AttributeSet ExtraAttrs, - ForDefinition_t IsForDefinition) { +llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( + StringRef MangledName, llvm::Type *Ty, GlobalDecl GD, bool ForVTable, + bool DontDefer, bool IsThunk, llvm::AttributeList ExtraAttrs, + ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); // Lookup the entry, lazily creating it if necessary. @@ -1999,13 +2147,11 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, assert(F->getName() == MangledName && "name was uniqued!"); if (D) - SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); - if (ExtraAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex)) { - llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeSet::FunctionIndex); - F->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(VMContext, - llvm::AttributeSet::FunctionIndex, - B)); + SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk, + IsForDefinition); + if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) { + llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); } if (!DontDefer) { @@ -2015,7 +2161,7 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, if (D && isa<CXXDestructorDecl>(D) && getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D), GD.getDtorType())) - addDeferredDeclToEmit(F, GD); + addDeferredDeclToEmit(GD); // This is the first use or definition of a mangled name. If there is a // deferred decl with this name, remember that we need to emit it at the end @@ -2025,7 +2171,7 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, // Move the potentially referenced deferred decl to the // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we // don't need it anymore). - addDeferredDeclToEmit(F, DDI->second); + addDeferredDeclToEmit(DDI->second); DeferredDecls.erase(DDI); // Otherwise, there are cases we have to worry about where we're @@ -2045,7 +2191,7 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, FD = FD->getPreviousDecl()) { if (isa<CXXRecordDecl>(FD->getLexicalDeclContext())) { if (FD->doesThisDeclarationHaveABody()) { - addDeferredDeclToEmit(F, GD.getWithDecl(FD)); + addDeferredDeclToEmit(GD.getWithDecl(FD)); break; } } @@ -2080,7 +2226,7 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD, StringRef MangledName = getMangledName(GD); return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer, - /*IsThunk=*/false, llvm::AttributeSet(), + /*IsThunk=*/false, llvm::AttributeList(), IsForDefinition); } @@ -2126,7 +2272,7 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { /// type and name. llvm::Constant * CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, - llvm::AttributeSet ExtraAttrs, + llvm::AttributeList ExtraAttrs, bool Local) { llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, @@ -2154,9 +2300,8 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, /// CreateBuiltinFunction - Create a new builtin function with the specified /// type and name. llvm::Constant * -CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, - StringRef Name, - llvm::AttributeSet ExtraAttrs) { +CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, StringRef Name, + llvm::AttributeList ExtraAttrs) { llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, /*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs); @@ -2247,11 +2392,13 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, return llvm::ConstantExpr::getBitCast(Entry, Ty); } - unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace()); + auto AddrSpace = GetGlobalVarAddressSpace(D); + auto TargetAddrSpace = getContext().getTargetAddressSpace(AddrSpace); + auto *GV = new llvm::GlobalVariable( getModule(), Ty->getElementType(), false, llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr, - llvm::GlobalVariable::NotThreadLocal, AddrSpace); + llvm::GlobalVariable::NotThreadLocal, TargetAddrSpace); // If we already created a global with the same mangled name (but different // type) before, take its name and remove it from its parent. @@ -2274,7 +2421,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, if (DDI != DeferredDecls.end()) { // Move the potentially referenced deferred decl to the DeferredDeclsToEmit // list, and remove it from DeferredDecls (since we don't need it anymore). - addDeferredDeclToEmit(GV, DDI->second); + addDeferredDeclToEmit(DDI->second); DeferredDecls.erase(DDI); } @@ -2300,16 +2447,70 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, EmitGlobalVarDefinition(D); } + // Emit section information for extern variables. + if (D->hasExternalStorage()) { + if (const SectionAttr *SA = D->getAttr<SectionAttr>()) + GV->setSection(SA->getName()); + } + // Handle XCore specific ABI requirements. if (getTriple().getArch() == llvm::Triple::xcore && D->getLanguageLinkage() == CLanguageLinkage && D->getType().isConstant(Context) && isExternallyVisible(D->getLinkageAndVisibility().getLinkage())) GV->setSection(".cp.rodata"); + + // Check if we a have a const declaration with an initializer, we may be + // able to emit it as available_externally to expose it's value to the + // optimizer. + if (Context.getLangOpts().CPlusPlus && GV->hasExternalLinkage() && + D->getType().isConstQualified() && !GV->hasInitializer() && + !D->hasDefinition() && D->hasInit() && !D->hasAttr<DLLImportAttr>()) { + const auto *Record = + Context.getBaseElementType(D->getType())->getAsCXXRecordDecl(); + bool HasMutableFields = Record && Record->hasMutableFields(); + if (!HasMutableFields) { + const VarDecl *InitDecl; + const Expr *InitExpr = D->getAnyInitializer(InitDecl); + if (InitExpr) { + ConstantEmitter emitter(*this); + llvm::Constant *Init = emitter.tryEmitForInitializer(*InitDecl); + if (Init) { + auto *InitType = Init->getType(); + if (GV->getType()->getElementType() != InitType) { + // The type of the initializer does not match the definition. + // This happens when an initializer has a different type from + // the type of the global (because of padding at the end of a + // structure for instance). + GV->setName(StringRef()); + // Make a new global with the correct type, this is now guaranteed + // to work. + auto *NewGV = cast<llvm::GlobalVariable>( + GetAddrOfGlobalVar(D, InitType, IsForDefinition)); + + // Erase the old global, since it is no longer used. + cast<llvm::GlobalValue>(GV)->eraseFromParent(); + GV = NewGV; + } else { + GV->setInitializer(Init); + GV->setConstant(true); + GV->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage); + } + emitter.finalize(GV); + } + } + } + } } - if (AddrSpace != Ty->getAddressSpace()) - return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty); + LangAS ExpectedAS = + D ? D->getType().getAddressSpace() + : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default); + assert(getContext().getTargetAddressSpace(ExpectedAS) == + Ty->getPointerAddressSpace()); + if (AddrSpace != ExpectedAS) + return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace, + ExpectedAS, Ty); return GV; } @@ -2443,18 +2644,27 @@ CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const { getDataLayout().getTypeStoreSizeInBits(Ty)); } -unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D, - unsigned AddrSpace) { - if (D && LangOpts.CUDA && LangOpts.CUDAIsDevice) { - if (D->hasAttr<CUDAConstantAttr>()) - AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant); - else if (D->hasAttr<CUDASharedAttr>()) - AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared); +LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { + LangAS AddrSpace = LangAS::Default; + if (LangOpts.OpenCL) { + AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global; + assert(AddrSpace == LangAS::opencl_global || + AddrSpace == LangAS::opencl_constant || + AddrSpace == LangAS::opencl_local || + AddrSpace >= LangAS::FirstTargetAddressSpace); + return AddrSpace; + } + + if (LangOpts.CUDA && LangOpts.CUDAIsDevice) { + if (D && D->hasAttr<CUDAConstantAttr>()) + return LangAS::cuda_constant; + else if (D && D->hasAttr<CUDASharedAttr>()) + return LangAS::cuda_shared; else - AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device); + return LangAS::cuda_device; } - return AddrSpace; + return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } template<typename SomeDecl> @@ -2539,6 +2749,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); + Optional<ConstantEmitter> emitter; + // CUDA E.2.4.1 "__shared__ variables cannot have an initialization // as part of their declaration." Sema has already checked for // error cases, so we just need to set Init to UndefValue. @@ -2559,7 +2771,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, Init = EmitNullConstant(D->getType()); } else { initializedGlobalDecl = GlobalDecl(D); - Init = EmitConstantInit(*InitDecl); + emitter.emplace(*this); + Init = emitter->tryEmitForInitializer(*InitDecl); if (!Init) { QualType T = InitExpr->getType(); @@ -2607,10 +2820,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // "extern int x[];") and then a definition of a different type (e.g. // "int x[10];"). This also happens when an initializer has a different type // from the type of the global (this happens with unions). - if (!GV || - GV->getType()->getElementType() != InitType || + if (!GV || GV->getType()->getElementType() != InitType || GV->getType()->getAddressSpace() != - GetGlobalVarAddressSpace(D, getContext().getTargetAddressSpace(ASTTy))) { + getContext().getTargetAddressSpace(GetGlobalVarAddressSpace(D))) { // Move the old entry aside so that we'll create a new one. Entry->setName(StringRef()); @@ -2673,7 +2885,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, Linkage = llvm::GlobalValue::InternalLinkage; } } + GV->setInitializer(Init); + if (emitter) emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor && @@ -2762,6 +2976,14 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, if (D->hasAttr<SectionAttr>()) return true; + // A variable cannot be both common and exist in a section. + // We dont try to determine which is the right section in the front-end. + // If no specialized section name is applicable, it will resort to default. + if (D->hasAttr<PragmaClangBSSSectionAttr>() || + D->hasAttr<PragmaClangDataSectionAttr>() || + D->hasAttr<PragmaClangRodataSectionAttr>()) + return true; + // Thread local vars aren't considered common linkage. if (D->getTLSKind()) return true; @@ -2908,14 +3130,8 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, continue; // Get the call site's attribute list. - SmallVector<llvm::AttributeSet, 8> newAttrs; - llvm::AttributeSet oldAttrs = callSite.getAttributes(); - - // Collect any return attributes from the call. - if (oldAttrs.hasAttributes(llvm::AttributeSet::ReturnIndex)) - newAttrs.push_back( - llvm::AttributeSet::get(newFn->getContext(), - oldAttrs.getRetAttributes())); + SmallVector<llvm::AttributeSet, 8> newArgAttrs; + llvm::AttributeList oldAttrs = callSite.getAttributes(); // If the function was passed too few arguments, don't transform. unsigned newNumArgs = newFn->arg_size(); @@ -2925,27 +3141,19 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, // If any of the types mismatch, we don't transform. unsigned argNo = 0; bool dontTransform = false; - for (llvm::Function::arg_iterator ai = newFn->arg_begin(), - ae = newFn->arg_end(); ai != ae; ++ai, ++argNo) { - if (callSite.getArgument(argNo)->getType() != ai->getType()) { + for (llvm::Argument &A : newFn->args()) { + if (callSite.getArgument(argNo)->getType() != A.getType()) { dontTransform = true; break; } // Add any parameter attributes. - if (oldAttrs.hasAttributes(argNo + 1)) - newAttrs. - push_back(llvm:: - AttributeSet::get(newFn->getContext(), - oldAttrs.getParamAttributes(argNo + 1))); + newArgAttrs.push_back(oldAttrs.getParamAttributes(argNo)); + argNo++; } if (dontTransform) continue; - if (oldAttrs.hasAttributes(llvm::AttributeSet::FunctionIndex)) - newAttrs.push_back(llvm::AttributeSet::get(newFn->getContext(), - oldAttrs.getFnAttributes())); - // Okay, we can transform this. Create the new call instruction and copy // over the required information. newArgs.append(callSite.arg_begin(), callSite.arg_begin() + argNo); @@ -2969,8 +3177,9 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, if (!newCall->getType()->isVoidTy()) newCall->takeName(callSite.getInstruction()); - newCall.setAttributes( - llvm::AttributeSet::get(newFn->getContext(), newAttrs)); + newCall.setAttributes(llvm::AttributeList::get( + newFn->getContext(), oldAttrs.getFnAttributes(), + oldAttrs.getRetAttributes(), newArgAttrs)); newCall.setCallingConv(callSite.getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. @@ -3364,6 +3573,10 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { return ConstantAddress(GV, Alignment); } +bool CodeGenModule::getExpressionLocationsEnabled() const { + return !CodeGenOpts.EmitCodeView || CodeGenOpts.DebugColumnInfo; +} + QualType CodeGenModule::getObjCFastEnumerationStateType() { if (ObjCFastEnumerationStateType.isNull()) { RecordDecl *D = Context.buildImplicitRecord("__objcFastEnumerationState"); @@ -3594,12 +3807,18 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( !EvalResult.hasSideEffects()) Value = &EvalResult.Val; + LangAS AddrSpace = + VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace(); + + Optional<ConstantEmitter> emitter; llvm::Constant *InitialValue = nullptr; bool Constant = false; llvm::Type *Type; if (Value) { // The temporary has a constant initializer, use it. - InitialValue = EmitConstantValue(*Value, MaterializedType, nullptr); + emitter.emplace(*this); + InitialValue = emitter->emitForInitializer(*Value, AddrSpace, + MaterializedType); Constant = isTypeConstant(MaterializedType, /*ExcludeCtor*/Value); Type = InitialValue->getType(); } else { @@ -3624,20 +3843,25 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( Linkage = llvm::GlobalVariable::InternalLinkage; } } - unsigned AddrSpace = GetGlobalVarAddressSpace( - VD, getContext().getTargetAddressSpace(MaterializedType)); + auto TargetAS = getContext().getTargetAddressSpace(AddrSpace); auto *GV = new llvm::GlobalVariable( getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(), - /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, - AddrSpace); + /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); + if (emitter) emitter->finalize(GV); setGlobalVisibility(GV, VD); GV->setAlignment(Align.getQuantity()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); if (VD->getTLSKind()) setTLSMode(GV, *VD); - MaterializedGlobalTemporaryMap[E] = GV; - return ConstantAddress(GV, Align); + llvm::Constant *CV = GV; + if (AddrSpace != LangAS::Default) + CV = getTargetCodeGenInfo().performAddrSpaceCast( + *this, GV, AddrSpace, LangAS::Default, + Type->getPointerTo( + getContext().getTargetAddressSpace(LangAS::Default))); + MaterializedGlobalTemporaryMap[E] = CV; + return ConstantAddress(CV, Align); } /// EmitObjCPropertyImplementations - Emit information for synthesized @@ -3779,11 +4003,16 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { AddDeferredUnusedCoverageMapping(D); break; + case Decl::CXXDeductionGuide: + // Function-like, but does not result in code emission. + break; + case Decl::Var: case Decl::Decomposition: // Skip variable templates if (cast<VarDecl>(D)->getDescribedVarTemplate()) return; + LLVM_FALLTHROUGH; case Decl::VarTemplateSpecialization: EmitGlobal(cast<VarDecl>(D)); if (auto *DD = dyn_cast<DecompositionDecl>(D)) @@ -3802,6 +4031,11 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { EmitDeclContext(cast<NamespaceDecl>(D)); break; case Decl::CXXRecord: + if (DebugInfo) { + if (auto *ES = D->getASTContext().getExternalSource()) + if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) + DebugInfo->completeUnusedClass(cast<CXXRecordDecl>(*D)); + } // Emit any static data members, they may be definitions. for (auto *I : cast<CXXRecordDecl>(D)->decls()) if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I)) @@ -4017,6 +4251,9 @@ void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) { case Decl::CXXDestructor: { if (!cast<FunctionDecl>(D)->doesThisDeclarationHaveABody()) return; + SourceManager &SM = getContext().getSourceManager(); + if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getLocStart())) + return; auto I = DeferredEmptyCoverageMappingDecls.find(D); if (I == DeferredEmptyCoverageMappingDecls.end()) DeferredEmptyCoverageMappingDecls[D] = true; @@ -4307,6 +4544,60 @@ llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { return InternalId; } +// Generalize pointer types to a void pointer with the qualifiers of the +// originally pointed-to type, e.g. 'const char *' and 'char * const *' +// generalize to 'const void *' while 'char *' and 'const char **' generalize to +// 'void *'. +static QualType GeneralizeType(ASTContext &Ctx, QualType Ty) { + if (!Ty->isPointerType()) + return Ty; + + return Ctx.getPointerType( + QualType(Ctx.VoidTy).withCVRQualifiers( + Ty->getPointeeType().getCVRQualifiers())); +} + +// Apply type generalization to a FunctionType's return and argument types +static QualType GeneralizeFunctionType(ASTContext &Ctx, QualType Ty) { + if (auto *FnType = Ty->getAs<FunctionProtoType>()) { + SmallVector<QualType, 8> GeneralizedParams; + for (auto &Param : FnType->param_types()) + GeneralizedParams.push_back(GeneralizeType(Ctx, Param)); + + return Ctx.getFunctionType( + GeneralizeType(Ctx, FnType->getReturnType()), + GeneralizedParams, FnType->getExtProtoInfo()); + } + + if (auto *FnType = Ty->getAs<FunctionNoProtoType>()) + return Ctx.getFunctionNoProtoType( + GeneralizeType(Ctx, FnType->getReturnType())); + + llvm_unreachable("Encountered unknown FunctionType"); +} + +llvm::Metadata *CodeGenModule::CreateMetadataIdentifierGeneralized(QualType T) { + T = GeneralizeFunctionType(getContext(), T); + + llvm::Metadata *&InternalId = GeneralizedMetadataIdMap[T.getCanonicalType()]; + if (InternalId) + return InternalId; + + if (isExternallyVisible(T->getLinkage())) { + std::string OutName; + llvm::raw_string_ostream Out(OutName); + getCXXABI().getMangleContext().mangleTypeName(T, Out); + Out << ".generalized"; + + InternalId = llvm::MDString::get(getLLVMContext(), Out.str()); + } else { + InternalId = llvm::MDNode::getDistinct(getLLVMContext(), + llvm::ArrayRef<llvm::Metadata *>()); + } + + return InternalId; +} + /// Returns whether this module needs the "all-vtables" type identifier. bool CodeGenModule::NeedAllVtablesTypeId() const { // Returns true if at least one of vtable-based CFI checkers is enabled and @@ -4348,20 +4639,30 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, // If we have a TargetAttr build up the feature map based on that. TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); + ParsedAttr.Features.erase( + llvm::remove_if(ParsedAttr.Features, + [&](const std::string &Feat) { + return !Target.isValidFeatureName( + StringRef{Feat}.substr(1)); + }), + ParsedAttr.Features.end()); + // Make a copy of the features as passed on the command line into the // beginning of the additional features from the function to override. - ParsedAttr.first.insert(ParsedAttr.first.begin(), + ParsedAttr.Features.insert(ParsedAttr.Features.begin(), Target.getTargetOpts().FeaturesAsWritten.begin(), Target.getTargetOpts().FeaturesAsWritten.end()); - if (ParsedAttr.second != "") - TargetCPU = ParsedAttr.second; + if (ParsedAttr.Architecture != "" && + Target.isValidCPUName(ParsedAttr.Architecture)) + TargetCPU = ParsedAttr.Architecture; // Now populate the feature map, first with the TargetCPU which is either // the default or a new one from the target attribute string. Then we'll use // the passed in features (FeaturesAsWritten) along with the new ones from // the attribute. - Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, ParsedAttr.first); + Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, + ParsedAttr.Features); } else { Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Target.getTargetOpts().Features); @@ -4377,8 +4678,8 @@ llvm::SanitizerStatReport &CodeGenModule::getSanStats() { llvm::Value * CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E, CodeGenFunction &CGF) { - llvm::Constant *C = EmitConstantExpr(E, E->getType(), &CGF); - auto SamplerT = getOpenCLRuntime().getSamplerType(); + llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType()); + auto SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); auto FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); return CGF.Builder.CreateCall(CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 8faebe1ea7..7a47c576c0 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -28,6 +28,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/SanitizerBlacklist.h" +#include "clang/Basic/XRayLists.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -314,14 +315,9 @@ private: /// This is a list of deferred decls which we have seen that *are* actually /// referenced. These get code generated when the module is done. - struct DeferredGlobal { - DeferredGlobal(llvm::GlobalValue *GV, GlobalDecl GD) : GV(GV), GD(GD) {} - llvm::TrackingVH<llvm::GlobalValue> GV; - GlobalDecl GD; - }; - std::vector<DeferredGlobal> DeferredDeclsToEmit; - void addDeferredDeclToEmit(llvm::GlobalValue *GV, GlobalDecl GD) { - DeferredDeclsToEmit.emplace_back(GV, GD); + std::vector<GlobalDecl> DeferredDeclsToEmit; + void addDeferredDeclToEmit(GlobalDecl GD) { + DeferredDeclsToEmit.emplace_back(GD); } /// List of alias we have emitted. Used to make sure that what they point to @@ -345,11 +341,14 @@ private: /// A queue of (optional) vtables to consider emitting. std::vector<const CXXRecordDecl*> DeferredVTables; + /// A queue of (optional) vtables that may be emitted opportunistically. + std::vector<const CXXRecordDecl *> OpportunisticVTables; + /// List of global values which are required to be present in the object file; /// bitcast to i8*. This is used for forcing visibility of symbols which may /// otherwise be optimized out. - std::vector<llvm::WeakVH> LLVMUsed; - std::vector<llvm::WeakVH> LLVMCompilerUsed; + std::vector<llvm::WeakTrackingVH> LLVMUsed; + std::vector<llvm::WeakTrackingVH> LLVMCompilerUsed; /// Store the list of global constructors and their respective priorities to /// be emitted when the translation unit is complete. @@ -420,7 +419,7 @@ private: SmallVector<GlobalInitData, 8> PrioritizedCXXGlobalInits; /// Global destructor functions and arguments that need to run on termination. - std::vector<std::pair<llvm::WeakVH,llvm::Constant*> > CXXGlobalDtors; + std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> CXXGlobalDtors; /// \brief The complete set of modules that has been imported. llvm::SetVector<clang::Module *> ImportedModules; @@ -430,14 +429,14 @@ private: llvm::SmallPtrSet<clang::Module *, 16> EmittedModuleInitializers; /// \brief A vector of metadata strings. - SmallVector<llvm::Metadata *, 16> LinkerOptionsMetadata; + SmallVector<llvm::MDNode *, 16> LinkerOptionsMetadata; /// @name Cache for Objective-C runtime types /// @{ /// Cached reference to the class for constant strings. This value has type /// int * but is actually an Obj-C class pointer. - llvm::WeakVH CFConstantStringClassRef; + llvm::WeakTrackingVH CFConstantStringClassRef; /// \brief The type used to describe the state of a fast enumeration in /// Objective-C's for..in loop. @@ -454,7 +453,7 @@ private: bool isTriviallyRecursive(const FunctionDecl *F); bool shouldEmitFunction(GlobalDecl GD); - + bool shouldOpportunisticallyEmitVTables(); /// Map used to be sure we don't emit the same CompoundLiteral twice. llvm::DenseMap<const CompoundLiteralExpr *, llvm::GlobalVariable *> EmittedCompoundLiterals; @@ -498,7 +497,9 @@ private: /// Mapping from canonical types to their metadata identifiers. We need to /// maintain this mapping because identifiers may be formed from distinct /// MDNodes. - llvm::DenseMap<QualType, llvm::Metadata *> MetadataIdMap; + typedef llvm::DenseMap<QualType, llvm::Metadata *> MetadataTypeMap; + MetadataTypeMap MetadataIdMap; + MetadataTypeMap GeneralizedMetadataIdMap; public: CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts, @@ -514,6 +515,9 @@ public: /// Finalize LLVM code generation. void Release(); + /// Return true if we should emit location information for expressions. + bool getExpressionLocationsEnabled() const; + /// Return a reference to the configured Objective-C runtime. CGObjCRuntime &getObjCRuntime() { if (!ObjCRuntime) createObjCRuntime(); @@ -650,25 +654,53 @@ public: CtorList &getGlobalCtors() { return GlobalCtors; } CtorList &getGlobalDtors() { return GlobalDtors; } - llvm::MDNode *getTBAAInfo(QualType QTy); - llvm::MDNode *getTBAAInfoForVTablePtr(); + /// getTBAATypeInfo - Get metadata used to describe accesses to objects of + /// the given type. + llvm::MDNode *getTBAATypeInfo(QualType QTy); + + /// getTBAAAccessInfo - Get TBAA information that describes an access to + /// an object of the given type. + TBAAAccessInfo getTBAAAccessInfo(QualType AccessType); + + /// getTBAAVTablePtrAccessInfo - Get the TBAA information that describes an + /// access to a virtual table pointer. + TBAAAccessInfo getTBAAVTablePtrAccessInfo(); + llvm::MDNode *getTBAAStructInfo(QualType QTy); - /// Return the path-aware tag for given base type, access node and offset. - llvm::MDNode *getTBAAStructTagInfo(QualType BaseTy, llvm::MDNode *AccessN, - uint64_t O); + + /// getTBAABaseTypeInfo - Get metadata that describes the given base access + /// type. Return null if the type is not suitable for use in TBAA access tags. + llvm::MDNode *getTBAABaseTypeInfo(QualType QTy); + + /// getTBAAAccessTagInfo - Get TBAA tag for a given memory access. + llvm::MDNode *getTBAAAccessTagInfo(TBAAAccessInfo Info); + + /// mergeTBAAInfoForCast - Get merged TBAA information for the purposes of + /// type casts. + TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo); + + /// mergeTBAAInfoForConditionalOperator - Get merged TBAA information for the + /// purposes of conditional operator. + TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, + TBAAAccessInfo InfoB); + + /// getTBAAInfoForSubobject - Get TBAA information for an access with a given + /// base lvalue. + TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType) { + if (Base.getTBAAInfo().isMayAlias()) + return TBAAAccessInfo::getMayAliasInfo(); + return getTBAAAccessInfo(AccessType); + } bool isTypeConstant(QualType QTy, bool ExcludeCtorDtor); bool isPaddedAtomicType(QualType type); bool isPaddedAtomicType(const AtomicType *type); - /// Decorate the instruction with a TBAA tag. For scalar TBAA, the tag - /// is the same as the type. For struct-path aware TBAA, the tag - /// is different from the type: base type, access type and offset. - /// When ConvertTypeToTag is true, we create a tag based on the scalar type. + /// DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag. void DecorateInstructionWithTBAA(llvm::Instruction *Inst, - llvm::MDNode *TBAAInfo, - bool ConvertTypeToTag = true); + TBAAAccessInfo TBAAInfo); /// Adds !invariant.barrier !tag to instruction void DecorateInstructionWithInvariantGroup(llvm::Instruction *I, @@ -711,11 +743,15 @@ public: SourceLocation Loc = SourceLocation(), bool TLS = false); - /// Return the address space of the underlying global variable for D, as + /// Return the AST address space of the underlying global variable for D, as /// determined by its declaration. Normally this is the same as the address /// space of D's type, but in CUDA, address spaces are associated with - /// declarations, not types. - unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace); + /// declarations, not types. If D is nullptr, return the default address + /// space for global variable. + /// + /// For languages without explicit address spaces, if D has default address + /// space, target-specific global or constant address space may be returned. + LangAS GetGlobalVarAddressSpace(const VarDecl *D); /// Return the llvm::Constant for the address of the given global variable. /// If Ty is non-null and if the global doesn't exist, then it will be created @@ -910,14 +946,13 @@ public: /// Create a new runtime function with the specified type and name. llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, - llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(), + llvm::AttributeList ExtraAttrs = llvm::AttributeList(), bool Local = false); /// Create a new compiler builtin function with the specified type and name. - llvm::Constant *CreateBuiltinFunction(llvm::FunctionType *Ty, - StringRef Name, - llvm::AttributeSet ExtraAttrs = - llvm::AttributeSet()); + llvm::Constant * + CreateBuiltinFunction(llvm::FunctionType *Ty, StringRef Name, + llvm::AttributeList ExtraAttrs = llvm::AttributeList()); /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, StringRef Name); @@ -940,27 +975,6 @@ public: llvm::Constant *getMemberPointerConstant(const UnaryOperator *e); - /// Try to emit the initializer for the given declaration as a constant; - /// returns 0 if the expression cannot be emitted as a constant. - llvm::Constant *EmitConstantInit(const VarDecl &D, - CodeGenFunction *CGF = nullptr); - - /// Try to emit the given expression as a constant; returns 0 if the - /// expression cannot be emitted as a constant. - llvm::Constant *EmitConstantExpr(const Expr *E, QualType DestType, - CodeGenFunction *CGF = nullptr); - - /// Emit the given constant value as a constant, in the type's scalar - /// representation. - llvm::Constant *EmitConstantValue(const APValue &Value, QualType DestType, - CodeGenFunction *CGF = nullptr); - - /// Emit the given constant value as a constant, in the type's memory - /// representation. - llvm::Constant *EmitConstantValueForMemory(const APValue &Value, - QualType DestType, - CodeGenFunction *CGF = nullptr); - /// \brief Emit type info if type of an expression is a variably modified /// type. Also emit proper debug info for cast types. void EmitExplicitCastExprType(const ExplicitCastExpr *E, @@ -1020,11 +1034,12 @@ public: /// \param CalleeInfo - The callee information these attributes are being /// constructed for. If valid, the attributes applied to this decl may /// contribute to the function attributes and calling convention. - /// \param PAL [out] - On return, the attribute list to use. + /// \param Attrs [out] - On return, the attribute list to use. /// \param CallingConv [out] - On return, the LLVM calling convention to use. void ConstructAttributeList(StringRef Name, const CGFunctionInfo &Info, - CGCalleeInfo CalleeInfo, AttributeListType &PAL, - unsigned &CallingConv, bool AttrOnCallSite); + CGCalleeInfo CalleeInfo, + llvm::AttributeList &Attrs, unsigned &CallingConv, + bool AttrOnCallSite); /// Adds attributes to F according to our CodeGenOptions and LangOptions, as /// though we had emitted it ourselves. We remove any attributes on F that @@ -1059,13 +1074,14 @@ public: void RefreshTypeCacheForClass(const CXXRecordDecl *Class); - /// \brief Appends Opts to the "Linker Options" metadata value. + /// \brief Appends Opts to the "llvm.linker.options" metadata value. void AppendLinkerOptions(StringRef Opts); /// \brief Appends a detect mismatch command to the linker options. void AddDetectMismatch(StringRef Name, StringRef Value); - /// \brief Appends a dependent lib to the "Linker Options" metadata value. + /// \brief Appends a dependent lib to the "llvm.linker.options" metadata + /// value. void AddDependentLib(StringRef Lib); llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD); @@ -1120,12 +1136,19 @@ public: /// annotations are emitted during finalization of the LLVM code. void AddGlobalAnnotations(const ValueDecl *D, llvm::GlobalValue *GV); - bool isInSanitizerBlacklist(llvm::Function *Fn, SourceLocation Loc) const; + bool isInSanitizerBlacklist(SanitizerMask Kind, llvm::Function *Fn, + SourceLocation Loc) const; bool isInSanitizerBlacklist(llvm::GlobalVariable *GV, SourceLocation Loc, QualType Ty, StringRef Category = StringRef()) const; + /// Imbue XRay attributes to a function, applying the always/never attribute + /// lists in the process. Returns true if we did imbue attributes this way, + /// false otherwise. + bool imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, + StringRef Category = StringRef()) const; + SanitizerMetadata *getSanitizerMetadata() { return SanitizerMD.get(); } @@ -1138,8 +1161,7 @@ public: /// are emitted lazily. void EmitGlobal(GlobalDecl D); - bool TryEmitDefinitionAsAlias(GlobalDecl Alias, GlobalDecl Target, - bool InEveryTU); + bool TryEmitDefinitionAsAlias(GlobalDecl Alias, GlobalDecl Target); bool TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D); /// Set attributes for a global definition. @@ -1189,6 +1211,11 @@ public: /// internal identifiers). llvm::Metadata *CreateMetadataIdentifierForType(QualType T); + /// Create a metadata identifier for the generalization of the given type. + /// This may either be an MDString (for external identifiers) or a distinct + /// unnamed MDNode (for internal identifiers). + llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T); + /// Create and attach type metadata to the given function. void CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Function *F); @@ -1199,7 +1226,7 @@ public: void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset, const CXXRecordDecl *RD); - /// \breif Get the declaration of std::terminate for the platform. + /// \brief Get the declaration of std::terminate for the platform. llvm::Constant *getTerminateFn(); llvm::SanitizerStatReport &getSanStats(); @@ -1213,12 +1240,11 @@ public: llvm::Constant *getNullPointer(llvm::PointerType *T, QualType QT); private: - llvm::Constant * - GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D, - bool ForVTable, bool DontDefer = false, - bool IsThunk = false, - llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(), - ForDefinition_t IsForDefinition = NotForDefinition); + llvm::Constant *GetOrCreateLLVMFunction( + StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, + bool DontDefer = false, bool IsThunk = false, + llvm::AttributeList ExtraAttrs = llvm::AttributeList(), + ForDefinition_t IsForDefinition = NotForDefinition); llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName, llvm::PointerType *PTy, @@ -1230,7 +1256,8 @@ private: /// Set function attributes for a function declaration. void SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, - bool IsIncompleteFunction, bool IsThunk); + bool IsIncompleteFunction, bool IsThunk, + ForDefinition_t IsForDefinition); void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr); @@ -1277,6 +1304,12 @@ private: /// Emit any needed decls for which code generation was deferred. void EmitDeferred(); + /// Try to emit external vtables as available_externally if they have emitted + /// all inlined virtual functions. It runs after EmitDeferred() and therefore + /// is not allowed to create new references to things that need to be emitted + /// lazily. + void EmitVTablesOpportunistically(); + /// Call replaceAllUsesWith on all pairs in Replacements. void applyReplacements(); @@ -1288,6 +1321,10 @@ private: /// Emit any vtables which we deferred and still have a use for. void EmitDeferredVTables(); + /// Emit a dummy function that reference a CoreFoundation symbol when + /// @available is used on Darwin. + void emitAtAvailableLinkGuard(); + /// Emit the llvm.used and llvm.compiler.used metadata. void emitLLVMUsed(); @@ -1306,6 +1343,9 @@ private: /// Emits target specific Metadata for global declarations. void EmitTargetMetadata(); + /// Emits OpenCL specific Metadata e.g. OpenCL version. + void EmitOpenCLMetadata(); + /// Emit the llvm.gcov metadata used to tell LLVM where to emit the .gcno and /// .gcda files in a way that persists in .bc files. void EmitCoverageFile(); @@ -1333,6 +1373,7 @@ private: bool AttrOnCallSite, llvm::AttrBuilder &FuncAttrs); }; + } // end namespace CodeGen } // end namespace clang diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp index 90711b5479..c3d66c1dab 100644 --- a/lib/CodeGen/CodeGenPGO.cpp +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -612,11 +612,14 @@ uint64_t PGOHash::finalize() { llvm::MD5::MD5Result Result; MD5.final(Result); using namespace llvm::support; - return endian::read<uint64_t, little, unaligned>(Result); + return Result.low(); } void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { const Decl *D = GD.getDecl(); + if (!D->hasBody()) + return; + bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr(); llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); if (!InstrumentRegions && !PGOReader) @@ -666,7 +669,7 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { } bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) { - if (SkipCoverageMapping) + if (!D->getBody()) return true; // Don't map the functions in system headers. diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h index 0026df570b..0759e65388 100644 --- a/lib/CodeGen/CodeGenPGO.h +++ b/lib/CodeGen/CodeGenPGO.h @@ -40,14 +40,11 @@ private: std::unique_ptr<llvm::InstrProfRecord> ProfRecord; std::vector<uint64_t> RegionCounts; uint64_t CurrentRegionCount; - /// \brief A flag that is set to true when this function doesn't need - /// to have coverage mapping data. - bool SkipCoverageMapping; public: CodeGenPGO(CodeGenModule &CGM) - : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), - FunctionHash(0), CurrentRegionCount(0), SkipCoverageMapping(false) {} + : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), FunctionHash(0), + CurrentRegionCount(0) {} /// Whether or not we have PGO region data for the current function. This is /// false both when we have no data at all and when our data has been diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp index 04224e7267..66fdb5f799 100644 --- a/lib/CodeGen/CodeGenTBAA.cpp +++ b/lib/CodeGen/CodeGenTBAA.cpp @@ -88,8 +88,26 @@ static bool TypeHasMayAlias(QualType QTy) { return false; } -llvm::MDNode * -CodeGenTBAA::getTBAAInfo(QualType QTy) { +/// Check if the given type is a valid base type to be used in access tags. +static bool isValidBaseType(QualType QTy) { + if (QTy->isReferenceType()) + return false; + if (const RecordType *TTy = QTy->getAs<RecordType>()) { + const RecordDecl *RD = TTy->getDecl()->getDefinition(); + // Incomplete types are not valid base access types. + if (!RD) + return false; + if (RD->hasFlexibleArrayMember()) + return false; + // RD can be struct, union, class, interface or enum. + // For now, we only handle struct and class. + if (RD->isStruct() || RD->isClass()) + return true; + } + return false; +} + +llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) { // At -O0 or relaxed aliasing, TBAA is not emitted for regular types. if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) return nullptr; @@ -99,8 +117,16 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) { if (TypeHasMayAlias(QTy)) return getChar(); - const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); + // We need this function to not fall back to returning the "omnipotent char" + // type node for aggregate and union types. Otherwise, any dereference of an + // aggregate will result into the may-alias access descriptor, meaning all + // subsequent accesses to direct and indirect members of that aggregate will + // be considered may-alias too. + // TODO: Combine getTypeInfo() and getBaseTypeInfo() into a single function. + if (isValidBaseType(QTy)) + return getBaseTypeInfo(QTy); + const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); if (llvm::MDNode *N = MetadataCache[Ty]) return N; @@ -120,15 +146,15 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) { // Unsigned types can alias their corresponding signed types. case BuiltinType::UShort: - return getTBAAInfo(Context.ShortTy); + return getTypeInfo(Context.ShortTy); case BuiltinType::UInt: - return getTBAAInfo(Context.IntTy); + return getTypeInfo(Context.IntTy); case BuiltinType::ULong: - return getTBAAInfo(Context.LongTy); + return getTypeInfo(Context.LongTy); case BuiltinType::ULongLong: - return getTBAAInfo(Context.LongLongTy); + return getTypeInfo(Context.LongLongTy); case BuiltinType::UInt128: - return getTBAAInfo(Context.Int128Ty); + return getTypeInfo(Context.Int128Ty); // Treat all other builtin types as distinct types. This includes // treating wchar_t, char16_t, and char32_t as distinct from their @@ -139,10 +165,16 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) { } } - // Handle pointers. + // C++1z [basic.lval]p10: "If a program attempts to access the stored value of + // an object through a glvalue of other than one of the following types the + // behavior is undefined: [...] a char, unsigned char, or std::byte type." + if (Ty->isStdByteType()) + return MetadataCache[Ty] = getChar(); + + // Handle pointers and references. // TODO: Implement C++'s type "similarity" and consider dis-"similar" // pointers distinct. - if (Ty->isPointerType()) + if (Ty->isPointerType() || Ty->isReferenceType()) return MetadataCache[Ty] = createTBAAScalarType("any pointer", getChar()); @@ -166,8 +198,8 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) { return MetadataCache[Ty] = getChar(); } -llvm::MDNode *CodeGenTBAA::getTBAAInfoForVTablePtr() { - return createTBAAScalarType("vtable pointer", getRoot()); +TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo() { + return TBAAAccessInfo(createTBAAScalarType("vtable pointer", getRoot())); } bool @@ -206,8 +238,8 @@ CodeGenTBAA::CollectFields(uint64_t BaseOffset, /* Otherwise, treat whatever it is as a field. */ uint64_t Offset = BaseOffset; uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity(); - llvm::MDNode *TBAAInfo = MayAlias ? getChar() : getTBAAInfo(QTy); - llvm::MDNode *TBAATag = getTBAAScalarTagInfo(TBAAInfo); + llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy); + llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType)); Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); return true; } @@ -227,26 +259,12 @@ CodeGenTBAA::getTBAAStructInfo(QualType QTy) { return StructMetadataCache[Ty] = nullptr; } -/// Check if the given type can be handled by path-aware TBAA. -static bool isTBAAPathStruct(QualType QTy) { - if (const RecordType *TTy = QTy->getAs<RecordType>()) { - const RecordDecl *RD = TTy->getDecl()->getDefinition(); - if (RD->hasFlexibleArrayMember()) - return false; - // RD can be struct, union, class, interface or enum. - // For now, we only handle struct and class. - if (RD->isStruct() || RD->isClass()) - return true; - } - return false; -} +llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { + if (!isValidBaseType(QTy)) + return nullptr; -llvm::MDNode * -CodeGenTBAA::getTBAAStructTypeInfo(QualType QTy) { const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); - assert(isTBAAPathStruct(QTy)); - - if (llvm::MDNode *N = StructTypeMetadataCache[Ty]) + if (llvm::MDNode *N = BaseTypeMetadataCache[Ty]) return N; if (const RecordType *TTy = QTy->getAs<RecordType>()) { @@ -258,13 +276,10 @@ CodeGenTBAA::getTBAAStructTypeInfo(QualType QTy) { for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { QualType FieldQTy = i->getType(); - llvm::MDNode *FieldNode; - if (isTBAAPathStruct(FieldQTy)) - FieldNode = getTBAAStructTypeInfo(FieldQTy); - else - FieldNode = getTBAAInfo(FieldQTy); + llvm::MDNode *FieldNode = isValidBaseType(FieldQTy) ? + getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy); if (!FieldNode) - return StructTypeMetadataCache[Ty] = nullptr; + return BaseTypeMetadataCache[Ty] = nullptr; Fields.push_back(std::make_pair( FieldNode, Layout.getFieldOffset(idx) / Context.getCharWidth())); } @@ -278,46 +293,56 @@ CodeGenTBAA::getTBAAStructTypeInfo(QualType QTy) { OutName = RD->getName(); } // Create the struct type node with a vector of pairs (offset, type). - return StructTypeMetadataCache[Ty] = + return BaseTypeMetadataCache[Ty] = MDHelper.createTBAAStructTypeNode(OutName, Fields); } - return StructMetadataCache[Ty] = nullptr; + return BaseTypeMetadataCache[Ty] = nullptr; } -/// Return a TBAA tag node for both scalar TBAA and struct-path aware TBAA. -llvm::MDNode * -CodeGenTBAA::getTBAAStructTagInfo(QualType BaseQTy, llvm::MDNode *AccessNode, - uint64_t Offset) { - if (!AccessNode) +llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) { + if (Info.isMayAlias()) + Info = TBAAAccessInfo(getChar()); + + if (!Info.AccessType) return nullptr; if (!CodeGenOpts.StructPathTBAA) - return getTBAAScalarTagInfo(AccessNode); + Info = TBAAAccessInfo(Info.AccessType); - const Type *BTy = Context.getCanonicalType(BaseQTy).getTypePtr(); - TBAAPathTag PathTag = TBAAPathTag(BTy, AccessNode, Offset); - if (llvm::MDNode *N = StructTagMetadataCache[PathTag]) + llvm::MDNode *&N = AccessTagMetadataCache[Info]; + if (N) return N; - llvm::MDNode *BNode = nullptr; - if (isTBAAPathStruct(BaseQTy)) - BNode = getTBAAStructTypeInfo(BaseQTy); - if (!BNode) - return StructTagMetadataCache[PathTag] = - MDHelper.createTBAAStructTagNode(AccessNode, AccessNode, 0); + if (!Info.BaseType) { + Info.BaseType = Info.AccessType; + assert(!Info.Offset && "Nonzero offset for an access with no base type!"); + } + return N = MDHelper.createTBAAStructTagNode(Info.BaseType, Info.AccessType, + Info.Offset); +} - return StructTagMetadataCache[PathTag] = - MDHelper.createTBAAStructTagNode(BNode, AccessNode, Offset); +TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo) { + if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias()) + return TBAAAccessInfo::getMayAliasInfo(); + return TargetInfo; } -llvm::MDNode * -CodeGenTBAA::getTBAAScalarTagInfo(llvm::MDNode *AccessNode) { - if (!AccessNode) - return nullptr; - if (llvm::MDNode *N = ScalarTagMetadataCache[AccessNode]) - return N; +TBAAAccessInfo +CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, + TBAAAccessInfo InfoB) { + if (InfoA == InfoB) + return InfoA; + + if (!InfoA || !InfoB) + return TBAAAccessInfo(); + + if (InfoA.isMayAlias() || InfoB.isMayAlias()) + return TBAAAccessInfo::getMayAliasInfo(); - return ScalarTagMetadataCache[AccessNode] = - MDHelper.createTBAAStructTagNode(AccessNode, AccessNode, 0); + // TODO: Implement the rest of the logic here. For example, two accesses + // with same final access types result in an access to an object of that final + // access type regardless of their base types. + return TBAAAccessInfo::getMayAliasInfo(); } diff --git a/lib/CodeGen/CodeGenTBAA.h b/lib/CodeGen/CodeGenTBAA.h index ddb063d9e8..e4f459ce93 100644 --- a/lib/CodeGen/CodeGenTBAA.h +++ b/lib/CodeGen/CodeGenTBAA.h @@ -30,15 +30,72 @@ namespace clang { class Type; namespace CodeGen { - class CGRecordLayout; +class CGRecordLayout; - struct TBAAPathTag { - TBAAPathTag(const Type *B, const llvm::MDNode *A, uint64_t O) - : BaseT(B), AccessN(A), Offset(O) {} - const Type *BaseT; - const llvm::MDNode *AccessN; - uint64_t Offset; - }; +// TBAAAccessKind - A kind of TBAA memory access descriptor. +enum class TBAAAccessKind : unsigned { + Ordinary, + MayAlias, +}; + +// TBAAAccessInfo - Describes a memory access in terms of TBAA. +struct TBAAAccessInfo { + TBAAAccessInfo(TBAAAccessKind Kind, llvm::MDNode *BaseType, + llvm::MDNode *AccessType, uint64_t Offset) + : Kind(Kind), BaseType(BaseType), AccessType(AccessType), Offset(Offset) + {} + + TBAAAccessInfo(llvm::MDNode *BaseType, llvm::MDNode *AccessType, + uint64_t Offset) + : TBAAAccessInfo(TBAAAccessKind::Ordinary, BaseType, AccessType, Offset) + {} + + explicit TBAAAccessInfo(llvm::MDNode *AccessType) + : TBAAAccessInfo(/* BaseType= */ nullptr, AccessType, /* Offset= */ 0) + {} + + TBAAAccessInfo() + : TBAAAccessInfo(/* AccessType= */ nullptr) + {} + + static TBAAAccessInfo getMayAliasInfo() { + return TBAAAccessInfo(TBAAAccessKind::MayAlias, /* BaseType= */ nullptr, + /* AccessType= */ nullptr, /* Offset= */ 0); + } + + bool isMayAlias() const { return Kind == TBAAAccessKind::MayAlias; } + + bool operator==(const TBAAAccessInfo &Other) const { + return Kind == Other.Kind && + BaseType == Other.BaseType && + AccessType == Other.AccessType && + Offset == Other.Offset; + } + + bool operator!=(const TBAAAccessInfo &Other) const { + return !(*this == Other); + } + + explicit operator bool() const { + return *this != TBAAAccessInfo(); + } + + /// Kind - The kind of the access descriptor. + TBAAAccessKind Kind; + + /// BaseType - The base/leading access type. May be null if this access + /// descriptor represents an access that is not considered to be an access + /// to an aggregate or union member. + llvm::MDNode *BaseType; + + /// AccessType - The final access type. May be null if there is no TBAA + /// information available about this access. + llvm::MDNode *AccessType; + + /// Offset - The byte offset of the final access within the base one. Must be + /// zero if the base access type is not specified. + uint64_t Offset; +}; /// CodeGenTBAA - This class organizes the cross-module state that is used /// while lowering AST types to LLVM types. @@ -54,12 +111,10 @@ class CodeGenTBAA { /// MetadataCache - This maps clang::Types to scalar llvm::MDNodes describing /// them. llvm::DenseMap<const Type *, llvm::MDNode *> MetadataCache; - /// This maps clang::Types to a struct node in the type DAG. - llvm::DenseMap<const Type *, llvm::MDNode *> StructTypeMetadataCache; - /// This maps TBAAPathTags to a tag node. - llvm::DenseMap<TBAAPathTag, llvm::MDNode *> StructTagMetadataCache; - /// This maps a scalar type to a scalar tag node. - llvm::DenseMap<const llvm::MDNode *, llvm::MDNode *> ScalarTagMetadataCache; + /// This maps clang::Types to a base access type in the type DAG. + llvm::DenseMap<const Type *, llvm::MDNode *> BaseTypeMetadataCache; + /// This maps TBAA access descriptors to tag nodes. + llvm::DenseMap<TBAAAccessInfo, llvm::MDNode *> AccessTagMetadataCache; /// StructMetadataCache - This maps clang::Types to llvm::MDNodes describing /// them for struct assignments. @@ -95,27 +150,34 @@ public: MangleContext &MContext); ~CodeGenTBAA(); - /// getTBAAInfo - Get the TBAA MDNode to be used for a dereference - /// of the given type. - llvm::MDNode *getTBAAInfo(QualType QTy); + /// getTypeInfo - Get metadata used to describe accesses to objects of the + /// given type. + llvm::MDNode *getTypeInfo(QualType QTy); - /// getTBAAInfoForVTablePtr - Get the TBAA MDNode to be used for a - /// dereference of a vtable pointer. - llvm::MDNode *getTBAAInfoForVTablePtr(); + /// getVTablePtrAccessInfo - Get the TBAA information that describes an + /// access to a virtual table pointer. + TBAAAccessInfo getVTablePtrAccessInfo(); /// getTBAAStructInfo - Get the TBAAStruct MDNode to be used for a memcpy of /// the given type. llvm::MDNode *getTBAAStructInfo(QualType QTy); - /// Get the MDNode in the type DAG for given struct type QType. - llvm::MDNode *getTBAAStructTypeInfo(QualType QType); - /// Get the tag MDNode for a given base type, the actual scalar access MDNode - /// and offset into the base type. - llvm::MDNode *getTBAAStructTagInfo(QualType BaseQType, - llvm::MDNode *AccessNode, uint64_t Offset); + /// getBaseTypeInfo - Get metadata that describes the given base access type. + /// Return null if the type is not suitable for use in TBAA access tags. + llvm::MDNode *getBaseTypeInfo(QualType QTy); + + /// getAccessTagInfo - Get TBAA tag for a given memory access. + llvm::MDNode *getAccessTagInfo(TBAAAccessInfo Info); + + /// mergeTBAAInfoForCast - Get merged TBAA information for the purpose of + /// type casts. + TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo); - /// Get the scalar tag MDNode for a given scalar type. - llvm::MDNode *getTBAAScalarTagInfo(llvm::MDNode *AccessNode); + /// mergeTBAAInfoForConditionalOperator - Get merged TBAA information for the + /// purpose of conditional operator. + TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, + TBAAAccessInfo InfoB); }; } // end namespace CodeGen @@ -123,32 +185,36 @@ public: namespace llvm { -template<> struct DenseMapInfo<clang::CodeGen::TBAAPathTag> { - static clang::CodeGen::TBAAPathTag getEmptyKey() { - return clang::CodeGen::TBAAPathTag( - DenseMapInfo<const clang::Type *>::getEmptyKey(), - DenseMapInfo<const MDNode *>::getEmptyKey(), +template<> struct DenseMapInfo<clang::CodeGen::TBAAAccessInfo> { + static clang::CodeGen::TBAAAccessInfo getEmptyKey() { + unsigned UnsignedKey = DenseMapInfo<unsigned>::getEmptyKey(); + return clang::CodeGen::TBAAAccessInfo( + static_cast<clang::CodeGen::TBAAAccessKind>(UnsignedKey), + DenseMapInfo<MDNode *>::getEmptyKey(), + DenseMapInfo<MDNode *>::getEmptyKey(), DenseMapInfo<uint64_t>::getEmptyKey()); } - static clang::CodeGen::TBAAPathTag getTombstoneKey() { - return clang::CodeGen::TBAAPathTag( - DenseMapInfo<const clang::Type *>::getTombstoneKey(), - DenseMapInfo<const MDNode *>::getTombstoneKey(), + static clang::CodeGen::TBAAAccessInfo getTombstoneKey() { + unsigned UnsignedKey = DenseMapInfo<unsigned>::getTombstoneKey(); + return clang::CodeGen::TBAAAccessInfo( + static_cast<clang::CodeGen::TBAAAccessKind>(UnsignedKey), + DenseMapInfo<MDNode *>::getTombstoneKey(), + DenseMapInfo<MDNode *>::getTombstoneKey(), DenseMapInfo<uint64_t>::getTombstoneKey()); } - static unsigned getHashValue(const clang::CodeGen::TBAAPathTag &Val) { - return DenseMapInfo<const clang::Type *>::getHashValue(Val.BaseT) ^ - DenseMapInfo<const MDNode *>::getHashValue(Val.AccessN) ^ + static unsigned getHashValue(const clang::CodeGen::TBAAAccessInfo &Val) { + auto KindValue = static_cast<unsigned>(Val.Kind); + return DenseMapInfo<unsigned>::getHashValue(KindValue) ^ + DenseMapInfo<MDNode *>::getHashValue(Val.BaseType) ^ + DenseMapInfo<MDNode *>::getHashValue(Val.AccessType) ^ DenseMapInfo<uint64_t>::getHashValue(Val.Offset); } - static bool isEqual(const clang::CodeGen::TBAAPathTag &LHS, - const clang::CodeGen::TBAAPathTag &RHS) { - return LHS.BaseT == RHS.BaseT && - LHS.AccessN == RHS.AccessN && - LHS.Offset == RHS.Offset; + static bool isEqual(const clang::CodeGen::TBAAAccessInfo &LHS, + const clang::CodeGen::TBAAAccessInfo &RHS) { + return LHS == RHS; } }; diff --git a/lib/CodeGen/CodeGenTypeCache.h b/lib/CodeGen/CodeGenTypeCache.h index 47e26bcaa1..2af7b30eaf 100644 --- a/lib/CodeGen/CodeGenTypeCache.h +++ b/lib/CodeGen/CodeGenTypeCache.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_LIB_CODEGEN_CODEGENTYPECACHE_H #include "clang/AST/CharUnits.h" +#include "clang/Basic/AddressSpaces.h" #include "llvm/IR/CallingConv.h" namespace llvm { @@ -60,6 +61,12 @@ struct CodeGenTypeCache { llvm::PointerType *Int8PtrPtrTy; }; + /// void* in alloca address space + union { + llvm::PointerType *AllocaVoidPtrTy; + llvm::PointerType *AllocaInt8PtrTy; + }; + /// The size and alignment of the builtin C type 'int'. This comes /// up enough in various ABI lowering tasks to be worth pre-computing. union { @@ -88,6 +95,8 @@ struct CodeGenTypeCache { unsigned char SizeAlignInBytes; }; + LangAS ASTAllocaAddressSpace; + CharUnits getSizeSize() const { return CharUnits::fromQuantity(SizeSizeInBytes); } @@ -105,6 +114,8 @@ struct CodeGenTypeCache { llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; } llvm::CallingConv::ID BuiltinCC; llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; } + + LangAS getASTAllocaAddressSpace() const { return ASTAllocaAddressSpace; } }; } // end namespace CodeGen diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp index dc24b2040f..80f2a31ac0 100644 --- a/lib/CodeGen/CodeGenTypes.cpp +++ b/lib/CodeGen/CodeGenTypes.cpp @@ -44,6 +44,10 @@ CodeGenTypes::~CodeGenTypes() { delete &*I++; } +const CodeGenOptions &CodeGenTypes::getCodeGenOpts() const { + return CGM.getCodeGenOpts(); +} + void CodeGenTypes::addRecordTypeName(const RecordDecl *RD, llvm::StructType *Ty, StringRef suffix) { @@ -439,6 +443,12 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { static_cast<unsigned>(Context.getTypeSize(T))); break; + case BuiltinType::Float16: + ResultType = + getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T), + /* UseNativeHalf = */ true); + break; + case BuiltinType::Half: // Half FP can either be storage-only (lowered to i16) or native. ResultType = @@ -490,7 +500,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { llvm_unreachable("Unexpected undeduced type!"); case Type::Complex: { llvm::Type *EltTy = ConvertType(cast<ComplexType>(Ty)->getElementType()); - ResultType = llvm::StructType::get(EltTy, EltTy, nullptr); + ResultType = llvm::StructType::get(EltTy, EltTy); break; } case Type::LValueReference: @@ -635,7 +645,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { break; } case Type::Pipe: { - ResultType = CGM.getOpenCLRuntime().getPipeType(); + ResultType = CGM.getOpenCLRuntime().getPipeType(cast<PipeType>(Ty)); break; } } diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h index f0b97ebde1..d082342bf5 100644 --- a/lib/CodeGen/CodeGenTypes.h +++ b/lib/CodeGen/CodeGenTypes.h @@ -164,8 +164,6 @@ class CodeGenTypes { llvm::SmallSet<const Type *, 8> RecordsWithOpaqueMemberPointers; - unsigned ClangCallConvToLLVMCallConv(CallingConv CC); - public: CodeGenTypes(CodeGenModule &cgm); ~CodeGenTypes(); @@ -178,6 +176,10 @@ public: const TargetInfo &getTarget() const { return Target; } CGCXXABI &getCXXABI() const { return TheCXXABI; } llvm::LLVMContext &getLLVMContext() { return TheModule.getContext(); } + const CodeGenOptions &getCodeGenOpts() const; + + /// Convert clang calling convention to LLVM callilng convention. + unsigned ClangCallConvToLLVMCallConv(CallingConv CC); /// ConvertType - Convert type T into a llvm::Type. llvm::Type *ConvertType(QualType T); diff --git a/lib/CodeGen/ConstantEmitter.h b/lib/CodeGen/ConstantEmitter.h new file mode 100644 index 0000000000..90c9fcd8cf --- /dev/null +++ b/lib/CodeGen/ConstantEmitter.h @@ -0,0 +1,178 @@ +//===--- ConstantEmitter.h - IR constant emission ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A helper class for emitting expressions and values as llvm::Constants +// and as initializers for global variables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_CONSTANTEMITTER_H +#define LLVM_CLANG_LIB_CODEGEN_CONSTANTEMITTER_H + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" + +namespace clang { +namespace CodeGen { + +class ConstantEmitter { +public: + CodeGenModule &CGM; + CodeGenFunction *CGF; + +private: + bool Abstract = false; + + /// Whether non-abstract components of the emitter have been initialized. + bool InitializedNonAbstract = false; + + /// Whether the emitter has been finalized. + bool Finalized = false; + + /// Whether the constant-emission failed. + bool Failed = false; + + /// The AST address space where this (non-abstract) initializer is going. + /// Used for generating appropriate placeholders. + LangAS DestAddressSpace; + + llvm::SmallVector<std::pair<llvm::Constant *, llvm::GlobalVariable*>, 4> + PlaceholderAddresses; + +public: + ConstantEmitter(CodeGenModule &CGM, CodeGenFunction *CGF = nullptr) + : CGM(CGM), CGF(CGF) {} + + /// Initialize this emission in the context of the given function. + /// Use this if the expression might contain contextaul references like + /// block addresses or PredefinedExprs. + ConstantEmitter(CodeGenFunction &CGF) + : CGM(CGF.CGM), CGF(&CGF) {} + + ConstantEmitter(const ConstantEmitter &other) = delete; + ConstantEmitter &operator=(const ConstantEmitter &other) = delete; + + ~ConstantEmitter(); + + /// Is the current emission context abstract? + bool isAbstract() const { + return Abstract; + } + + /// Try to emit the initiaizer of the given declaration as an abstract + /// constant. If this succeeds, the emission must be finalized. + llvm::Constant *tryEmitForInitializer(const VarDecl &D); + llvm::Constant *tryEmitForInitializer(const Expr *E, LangAS destAddrSpace, + QualType destType); + llvm::Constant *emitForInitializer(const APValue &value, LangAS destAddrSpace, + QualType destType); + + void finalize(llvm::GlobalVariable *global); + + // All of the "abstract" emission methods below permit the emission to + // be immediately discarded without finalizing anything. Therefore, they + // must also promise not to do anything that will, in the future, require + // finalization: + // + // - using the CGF (if present) for anything other than establishing + // semantic context; for example, an expression with ignored + // side-effects must not be emitted as an abstract expression + // + // - doing anything that would not be safe to duplicate within an + // initializer or to propagate to another context; for example, + // side effects, or emitting an initialization that requires a + // reference to its current location. + + /// Try to emit the initializer of the given declaration as an abstract + /// constant. + llvm::Constant *tryEmitAbstractForInitializer(const VarDecl &D); + + /// Emit the result of the given expression as an abstract constant, + /// asserting that it succeeded. This is only safe to do when the + /// expression is known to be a constant expression with either a fairly + /// simple type or a known simple form. + llvm::Constant *emitAbstract(const Expr *E, QualType T); + llvm::Constant *emitAbstract(SourceLocation loc, const APValue &value, + QualType T); + + /// Try to emit the result of the given expression as an abstract constant. + llvm::Constant *tryEmitAbstract(const Expr *E, QualType T); + llvm::Constant *tryEmitAbstractForMemory(const Expr *E, QualType T); + + llvm::Constant *tryEmitAbstract(const APValue &value, QualType T); + llvm::Constant *tryEmitAbstractForMemory(const APValue &value, QualType T); + + llvm::Constant *emitNullForMemory(QualType T) { + return emitNullForMemory(CGM, T); + } + llvm::Constant *emitForMemory(llvm::Constant *C, QualType T) { + return emitForMemory(CGM, C, T); + } + + static llvm::Constant *emitNullForMemory(CodeGenModule &CGM, QualType T); + static llvm::Constant *emitForMemory(CodeGenModule &CGM, llvm::Constant *C, + QualType T); + + // These are private helper routines of the constant emitter that + // can't actually be private because things are split out into helper + // functions and classes. + + llvm::Constant *tryEmitPrivateForVarInit(const VarDecl &D); + + llvm::Constant *tryEmitPrivate(const Expr *E, QualType T); + llvm::Constant *tryEmitPrivateForMemory(const Expr *E, QualType T); + + llvm::Constant *tryEmitPrivate(const APValue &value, QualType T); + llvm::Constant *tryEmitPrivateForMemory(const APValue &value, QualType T); + + /// Get the address of the current location. This is a constant + /// that will resolve, after finalization, to the address of the + /// 'signal' value that is registered with the emitter later. + llvm::GlobalValue *getCurrentAddrPrivate(); + + /// Register a 'signal' value with the emitter to inform it where to + /// resolve a placeholder. The signal value must be unique in the + /// initializer; it might, for example, be the address of a global that + /// refers to the current-address value in its own initializer. + /// + /// Uses of the placeholder must be properly anchored before finalizing + /// the emitter, e.g. by being installed as the initializer of a global + /// variable. That is, it must be possible to replaceAllUsesWith + /// the placeholder with the proper address of the signal. + void registerCurrentAddrPrivate(llvm::Constant *signal, + llvm::GlobalValue *placeholder); + +private: + void initializeNonAbstract(LangAS destAS) { + assert(!InitializedNonAbstract); + InitializedNonAbstract = true; + DestAddressSpace = destAS; + } + llvm::Constant *markIfFailed(llvm::Constant *init) { + if (!init) + Failed = true; + return init; + } + + struct AbstractState { + bool OldValue; + size_t OldPlaceholdersSize; + }; + AbstractState pushAbstract() { + AbstractState saved = { Abstract, PlaceholderAddresses.size() }; + Abstract = true; + return saved; + } + llvm::Constant *validateAndPopAbstract(llvm::Constant *C, AbstractState save); +}; + +} +} + +#endif diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp index 5bc9e5011a..08b8c2ec54 100644 --- a/lib/CodeGen/CoverageMappingGen.cpp +++ b/lib/CodeGen/CoverageMappingGen.cpp @@ -29,7 +29,7 @@ using namespace clang; using namespace CodeGen; using namespace llvm::coverage; -void CoverageSourceInfo::SourceRangeSkipped(SourceRange Range) { +void CoverageSourceInfo::SourceRangeSkipped(SourceRange Range, SourceLocation) { SkippedRanges.push_back(Range); } @@ -45,10 +45,19 @@ class SourceMappingRegion { /// \brief The region's ending location. Optional<SourceLocation> LocEnd; + /// Whether this region should be emitted after its parent is emitted. + bool DeferRegion; + + /// Whether this region is a gap region. The count from a gap region is set + /// as the line execution count if there are no other regions on the line. + bool GapRegion; + public: SourceMappingRegion(Counter Count, Optional<SourceLocation> LocStart, - Optional<SourceLocation> LocEnd) - : Count(Count), LocStart(LocStart), LocEnd(LocEnd) {} + Optional<SourceLocation> LocEnd, bool DeferRegion = false, + bool GapRegion = false) + : Count(Count), LocStart(LocStart), LocEnd(LocEnd), + DeferRegion(DeferRegion), GapRegion(GapRegion) {} const Counter &getCounter() const { return Count; } @@ -71,6 +80,44 @@ public: assert(LocEnd && "Region has no end location"); return *LocEnd; } + + bool isDeferred() const { return DeferRegion; } + + void setDeferred(bool Deferred) { DeferRegion = Deferred; } + + bool isGap() const { return GapRegion; } + + void setGap(bool Gap) { GapRegion = Gap; } +}; + +/// Spelling locations for the start and end of a source region. +struct SpellingRegion { + /// The line where the region starts. + unsigned LineStart; + + /// The column where the region starts. + unsigned ColumnStart; + + /// The line where the region ends. + unsigned LineEnd; + + /// The column where the region ends. + unsigned ColumnEnd; + + SpellingRegion(SourceManager &SM, SourceLocation LocStart, + SourceLocation LocEnd) { + LineStart = SM.getSpellingLineNumber(LocStart); + ColumnStart = SM.getSpellingColumnNumber(LocStart); + LineEnd = SM.getSpellingLineNumber(LocEnd); + ColumnEnd = SM.getSpellingColumnNumber(LocEnd); + } + + /// Check if the start and end locations appear in source order, i.e + /// top->bottom, left->right. + bool isInSourceOrder() const { + return (LineStart < LineEnd) || + (LineStart == LineEnd && ColumnStart <= ColumnEnd); + } }; /// \brief Provides the common functionality for the different @@ -241,12 +288,9 @@ public: auto CovFileID = getCoverageFileID(LocStart); if (!CovFileID) continue; - unsigned LineStart = SM.getSpellingLineNumber(LocStart); - unsigned ColumnStart = SM.getSpellingColumnNumber(LocStart); - unsigned LineEnd = SM.getSpellingLineNumber(LocEnd); - unsigned ColumnEnd = SM.getSpellingColumnNumber(LocEnd); + SpellingRegion SR{SM, LocStart, LocEnd}; auto Region = CounterMappingRegion::makeSkipped( - *CovFileID, LineStart, ColumnStart, LineEnd, ColumnEnd); + *CovFileID, SR.LineStart, SR.ColumnStart, SR.LineEnd, SR.ColumnEnd); // Make sure that we only collect the regions that are inside // the souce code of this function. if (Region.LineStart >= FileLineRanges[*CovFileID].first && @@ -284,16 +328,19 @@ public: if (Filter.count(std::make_pair(LocStart, LocEnd))) continue; - // Find the spilling locations for the mapping region. - unsigned LineStart = SM.getSpellingLineNumber(LocStart); - unsigned ColumnStart = SM.getSpellingColumnNumber(LocStart); - unsigned LineEnd = SM.getSpellingLineNumber(LocEnd); - unsigned ColumnEnd = SM.getSpellingColumnNumber(LocEnd); - - assert(LineStart <= LineEnd && "region start and end out of order"); - MappingRegions.push_back(CounterMappingRegion::makeRegion( - Region.getCounter(), *CovFileID, LineStart, ColumnStart, LineEnd, - ColumnEnd)); + // Find the spelling locations for the mapping region. + SpellingRegion SR{SM, LocStart, LocEnd}; + assert(SR.isInSourceOrder() && "region start and end out of order"); + + if (Region.isGap()) { + MappingRegions.push_back(CounterMappingRegion::makeGapRegion( + Region.getCounter(), *CovFileID, SR.LineStart, SR.ColumnStart, + SR.LineEnd, SR.ColumnEnd)); + } else { + MappingRegions.push_back(CounterMappingRegion::makeRegion( + Region.getCounter(), *CovFileID, SR.LineStart, SR.ColumnStart, + SR.LineEnd, SR.ColumnEnd)); + } } } @@ -317,14 +364,11 @@ public: "region spans multiple files"); Filter.insert(std::make_pair(ParentLoc, LocEnd)); - unsigned LineStart = SM.getSpellingLineNumber(ParentLoc); - unsigned ColumnStart = SM.getSpellingColumnNumber(ParentLoc); - unsigned LineEnd = SM.getSpellingLineNumber(LocEnd); - unsigned ColumnEnd = SM.getSpellingColumnNumber(LocEnd); - + SpellingRegion SR{SM, ParentLoc, LocEnd}; + assert(SR.isInSourceOrder() && "region start and end out of order"); MappingRegions.push_back(CounterMappingRegion::makeExpansion( - *ParentFileID, *ExpandedFileID, LineStart, ColumnStart, LineEnd, - ColumnEnd)); + *ParentFileID, *ExpandedFileID, SR.LineStart, SR.ColumnStart, + SR.LineEnd, SR.ColumnEnd)); } return Filter; } @@ -389,6 +433,10 @@ struct CounterCoverageMappingBuilder /// \brief A stack of currently live regions. std::vector<SourceMappingRegion> RegionStack; + /// The currently deferred region: its end location and count can be set once + /// its parent has been popped from the region stack. + Optional<SourceMappingRegion> DeferredRegion; + CounterExpressionBuilder Builder; /// \brief A location in the most recently visited file or macro. @@ -424,19 +472,61 @@ struct CounterCoverageMappingBuilder /// used with popRegions to exit a "scope", ending the region that was pushed. size_t pushRegion(Counter Count, Optional<SourceLocation> StartLoc = None, Optional<SourceLocation> EndLoc = None) { - if (StartLoc) + if (StartLoc) { MostRecentLocation = *StartLoc; + completeDeferred(Count, MostRecentLocation); + } RegionStack.emplace_back(Count, StartLoc, EndLoc); return RegionStack.size() - 1; } + /// Complete any pending deferred region by setting its end location and + /// count, and then pushing it onto the region stack. + size_t completeDeferred(Counter Count, SourceLocation DeferredEndLoc) { + size_t Index = RegionStack.size(); + if (!DeferredRegion) + return Index; + + // Consume the pending region. + SourceMappingRegion DR = DeferredRegion.getValue(); + DeferredRegion = None; + + // If the region ends in an expansion, find the expansion site. + if (SM.getFileID(DeferredEndLoc) != SM.getMainFileID()) { + FileID StartFile = SM.getFileID(DR.getStartLoc()); + if (isNestedIn(DeferredEndLoc, StartFile)) { + do { + DeferredEndLoc = getIncludeOrExpansionLoc(DeferredEndLoc); + } while (StartFile != SM.getFileID(DeferredEndLoc)); + } + } + + // The parent of this deferred region ends where the containing decl ends, + // so the region isn't useful. + if (DR.getStartLoc() == DeferredEndLoc) + return Index; + + // If we're visiting statements in non-source order (e.g switch cases or + // a loop condition) we can't construct a sensible deferred region. + if (!SpellingRegion(SM, DR.getStartLoc(), DeferredEndLoc).isInSourceOrder()) + return Index; + + DR.setGap(true); + DR.setCounter(Count); + DR.setEndLoc(DeferredEndLoc); + handleFileExit(DeferredEndLoc); + RegionStack.push_back(DR); + return Index; + } + /// \brief Pop regions from the stack into the function's list of regions. /// /// Adds all regions from \c ParentIndex to the top of the stack to the /// function's \c SourceRegions. void popRegions(size_t ParentIndex) { assert(RegionStack.size() >= ParentIndex && "parent not in stack"); + bool ParentOfDeferredRegion = false; while (RegionStack.size() > ParentIndex) { SourceMappingRegion &Region = RegionStack.back(); if (Region.hasStartLoc()) { @@ -468,9 +558,26 @@ struct CounterCoverageMappingBuilder assert(SM.isWrittenInSameFile(Region.getStartLoc(), EndLoc)); SourceRegions.push_back(Region); + + if (ParentOfDeferredRegion) { + ParentOfDeferredRegion = false; + + // If there's an existing deferred region, keep the old one, because + // it means there are two consecutive returns (or a similar pattern). + if (!DeferredRegion.hasValue() && + // File IDs aren't gathered within macro expansions, so it isn't + // useful to try and create a deferred region inside of one. + (SM.getFileID(EndLoc) == SM.getMainFileID())) + DeferredRegion = + SourceMappingRegion(Counter::getZero(), EndLoc, None); + } + } else if (Region.isDeferred()) { + assert(!ParentOfDeferredRegion && "Consecutive deferred regions"); + ParentOfDeferredRegion = true; } RegionStack.pop_back(); } + assert(!ParentOfDeferredRegion && "Deferred region with no parent"); } /// \brief Return the currently active region. @@ -481,15 +588,17 @@ struct CounterCoverageMappingBuilder /// \brief Propagate counts through the children of \c S. Counter propagateCounts(Counter TopCount, const Stmt *S) { - size_t Index = pushRegion(TopCount, getStart(S), getEnd(S)); + SourceLocation StartLoc = getStart(S); + SourceLocation EndLoc = getEnd(S); + size_t Index = pushRegion(TopCount, StartLoc, EndLoc); Visit(S); Counter ExitCount = getRegion().getCounter(); popRegions(Index); // The statement may be spanned by an expansion. Make sure we handle a file // exit out of this expansion before moving to the next statement. - if (SM.isBeforeInTranslationUnit(getStart(S), S->getLocStart())) - MostRecentLocation = getEnd(S); + if (SM.isBeforeInTranslationUnit(StartLoc, S->getLocStart())) + MostRecentLocation = EndLoc; return ExitCount; } @@ -595,6 +704,8 @@ struct CounterCoverageMappingBuilder handleFileExit(StartLoc); if (!Region.hasStartLoc()) Region.setStartLoc(StartLoc); + + completeDeferred(Region.getCounter(), StartLoc); } /// \brief Mark \c S as a terminator, starting a zero region. @@ -604,6 +715,7 @@ struct CounterCoverageMappingBuilder if (!Region.hasEndLoc()) Region.setEndLoc(getEnd(S)); pushRegion(Counter::getZero()); + getRegion().setDeferred(true); } /// \brief Keep counts of breaks and continues inside loops. @@ -617,13 +729,15 @@ struct CounterCoverageMappingBuilder CoverageMappingModuleGen &CVM, llvm::DenseMap<const Stmt *, unsigned> &CounterMap, SourceManager &SM, const LangOptions &LangOpts) - : CoverageMappingBuilder(CVM, SM, LangOpts), CounterMap(CounterMap) {} + : CoverageMappingBuilder(CVM, SM, LangOpts), CounterMap(CounterMap), + DeferredRegion(None) {} /// \brief Write the mapping data to the output stream void write(llvm::raw_ostream &OS) { llvm::SmallVector<unsigned, 8> VirtualFileMapping; gatherFileIDs(VirtualFileMapping); SourceRegionFilter Filter = emitExpansionRegions(); + assert(!DeferredRegion && "Deferred region never completed"); emitSourceRegions(Filter); gatherSkippedRegions(); @@ -644,14 +758,42 @@ struct CounterCoverageMappingBuilder handleFileExit(getEnd(S)); } + /// Determine whether the final deferred region emitted in \p Body should be + /// discarded. + static bool discardFinalDeferredRegionInDecl(Stmt *Body) { + if (auto *CS = dyn_cast<CompoundStmt>(Body)) { + Stmt *LastStmt = CS->body_back(); + if (auto *IfElse = dyn_cast<IfStmt>(LastStmt)) { + if (auto *Else = dyn_cast_or_null<CompoundStmt>(IfElse->getElse())) + LastStmt = Else->body_back(); + else + LastStmt = IfElse->getElse(); + } + return dyn_cast_or_null<ReturnStmt>(LastStmt); + } + return false; + } + void VisitDecl(const Decl *D) { + assert(!DeferredRegion && "Deferred region never completed"); + Stmt *Body = D->getBody(); // Do not propagate region counts into system headers. if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body)))) return; - propagateCounts(getRegionCounter(Body), Body); + Counter ExitCount = propagateCounts(getRegionCounter(Body), Body); + assert(RegionStack.empty() && "Regions entered but never exited"); + + if (DeferredRegion) { + // Complete (or discard) any deferred regions introduced by the last + // statement. + if (discardFinalDeferredRegionInDecl(Body)) + DeferredRegion = None; + else + popRegions(completeDeferred(ExitCount, getEnd(Body))); + } } void VisitReturnStmt(const ReturnStmt *S) { @@ -682,6 +824,8 @@ struct CounterCoverageMappingBuilder assert(!BreakContinueStack.empty() && "break not in a loop or switch!"); BreakContinueStack.back().BreakCount = addCounters( BreakContinueStack.back().BreakCount, getRegion().getCounter()); + // FIXME: a break in a switch should terminate regions for all preceding + // case statements, not just the most recent one. terminateRegion(S); } @@ -692,6 +836,16 @@ struct CounterCoverageMappingBuilder terminateRegion(S); } + void VisitCallExpr(const CallExpr *E) { + VisitStmt(E); + + // Terminate the region when we hit a noreturn function. + // (This is helpful dealing with switch statements.) + QualType CalleeType = E->getCallee()->getType(); + if (getFunctionExtInfo(*CalleeType).getNoReturn()) + terminateRegion(E); + } + void VisitWhileStmt(const WhileStmt *S) { extendRegion(S); @@ -823,15 +977,20 @@ struct CounterCoverageMappingBuilder extendRegion(Body); if (const auto *CS = dyn_cast<CompoundStmt>(Body)) { if (!CS->body_empty()) { - // The body of the switch needs a zero region so that fallthrough counts - // behave correctly, but it would be misleading to include the braces of - // the compound statement in the zeroed area, so we need to handle this - // specially. + // Make a region for the body of the switch. If the body starts with + // a case, that case will reuse this region; otherwise, this covers + // the unreachable code at the beginning of the switch body. size_t Index = - pushRegion(Counter::getZero(), getStart(CS->body_front()), - getEnd(CS->body_back())); + pushRegion(Counter::getZero(), getStart(CS->body_front())); for (const auto *Child : CS->children()) Visit(Child); + + // Set the end for the body of the switch, if it isn't already set. + for (size_t i = RegionStack.size(); i != Index; --i) { + if (!RegionStack[i - 1].hasEndLoc()) + RegionStack[i - 1].setEndLoc(getEnd(CS->body_back())); + } + popRegions(Index); } } else @@ -940,16 +1099,18 @@ struct CounterCoverageMappingBuilder } void VisitBinLAnd(const BinaryOperator *E) { - extendRegion(E); - Visit(E->getLHS()); + extendRegion(E->getLHS()); + propagateCounts(getRegion().getCounter(), E->getLHS()); + handleFileExit(getEnd(E->getLHS())); extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); } void VisitBinLOr(const BinaryOperator *E) { - extendRegion(E); - Visit(E->getLHS()); + extendRegion(E->getLHS()); + propagateCounts(getRegion().getCounter(), E->getLHS()); + handleFileExit(getEnd(E->getLHS())); extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); @@ -961,12 +1122,10 @@ struct CounterCoverageMappingBuilder } }; -bool isMachO(const CodeGenModule &CGM) { - return CGM.getTarget().getTriple().isOSBinFormatMachO(); -} - -StringRef getCoverageSection(const CodeGenModule &CGM) { - return llvm::getInstrProfCoverageSectionName(isMachO(CGM)); +std::string getCoverageSection(const CodeGenModule &CGM) { + return llvm::getInstrProfSectionName( + llvm::IPSK_covmap, + CGM.getContext().getTargetInfo().getTriple().getObjectFormat()); } std::string normalizeFilename(StringRef Filename) { @@ -994,6 +1153,9 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, case CounterMappingRegion::SkippedRegion: OS << "Skipped,"; break; + case CounterMappingRegion::GapRegion: + OS << "Gap,"; + break; } OS << "File " << R.FileID << ", " << R.LineStart << ":" << R.ColumnStart diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h index b6789c2a79..d07ed5ebcf 100644 --- a/lib/CodeGen/CoverageMappingGen.h +++ b/lib/CodeGen/CoverageMappingGen.h @@ -39,7 +39,7 @@ class CoverageSourceInfo : public PPCallbacks { public: ArrayRef<SourceRange> getSkippedRanges() const { return SkippedRanges; } - void SourceRangeSkipped(SourceRange Range) override; + void SourceRangeSkipped(SourceRange Range, SourceLocation EndifLoc) override; }; namespace CodeGen { diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h index 2435830385..c7bdeac58a 100644 --- a/lib/CodeGen/EHScopeStack.h +++ b/lib/CodeGen/EHScopeStack.h @@ -202,7 +202,7 @@ public: template <std::size_t... Is> T restore(CodeGenFunction &CGF, llvm::index_sequence<Is...>) { // It's important that the restores are emitted in order. The braced init - // list guarentees that. + // list guarantees that. return T{DominatingValue<As>::restore(CGF, std::get<Is>(Saved))...}; } diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index 3b2183ea36..173bc4d8df 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -62,12 +62,20 @@ public: bool classifyReturnType(CGFunctionInfo &FI) const override; + bool passClassIndirect(const CXXRecordDecl *RD) const { + // Clang <= 4 used the pre-C++11 rule, which ignores move operations. + // The PS4 platform ABI follows the behavior of Clang 3.2. + if (CGM.getCodeGenOpts().getClangABICompat() <= + CodeGenOptions::ClangABI::Ver4 || + CGM.getTriple().getOS() == llvm::Triple::PS4) + return RD->hasNonTrivialDestructor() || + RD->hasNonTrivialCopyConstructor(); + return !canCopyArgument(RD); + } + RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override { - // Structures with either a non-trivial destructor or a non-trivial - // copy constructor are always indirect. - // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared - // special members. - if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) + // If C++ prohibits us from making a copy, pass by address. + if (passClassIndirect(RD)) return RAA_Indirect; return RAA_Default; } @@ -157,9 +165,17 @@ public: Address Ptr, QualType ElementType, const CXXDestructorDecl *Dtor) override; + /// Itanium says that an _Unwind_Exception has to be "double-word" + /// aligned (and thus the end of it is also so-aligned), meaning 16 + /// bytes. Of course, that was written for the actual Itanium, + /// which is a 64-bit platform. Classically, the ABI doesn't really + /// specify the alignment on other platforms, but in practice + /// libUnwind declares the struct with __attribute__((aligned)), so + /// we assume that alignment here. (It's generally 16 bytes, but + /// some targets overwrite it.) CharUnits getAlignmentOfExnObject() { - unsigned Align = CGM.getContext().getTargetInfo().getExnObjectAlignment(); - return CGM.getContext().toCharUnitsFromBits(Align); + auto align = CGM.getContext().getTargetDefaultAlignForAttributeAligned(); + return CGM.getContext().toCharUnitsFromBits(align); } void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override; @@ -284,6 +300,14 @@ public: // linkage together with vtables when needed. if (ForVTable && !Thunk->hasLocalLinkage()) Thunk->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage); + + // Propagate dllexport storage, to enable the linker to generate import + // thunks as necessary (e.g. when a parent class has a key function and a + // child class doesn't, and the construction vtable for the parent in the + // child needs to reference the parent's thunks). + const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); + if (MD->hasAttr<DLLExportAttr>()) + Thunk->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This, @@ -366,20 +390,30 @@ public: void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override; private: - bool hasAnyVirtualInlineFunction(const CXXRecordDecl *RD) const { - const auto &VtableLayout = - CGM.getItaniumVTableContext().getVTableLayout(RD); - - for (const auto &VtableComponent : VtableLayout.vtable_components()) { - // Skip empty slot. - if (!VtableComponent.isUsedFunctionPointerKind()) - continue; - - const CXXMethodDecl *Method = VtableComponent.getFunctionDecl(); - if (Method->getCanonicalDecl()->isInlined()) - return true; - } - return false; + bool hasAnyUnusedVirtualInlineFunction(const CXXRecordDecl *RD) const { + const auto &VtableLayout = + CGM.getItaniumVTableContext().getVTableLayout(RD); + + for (const auto &VtableComponent : VtableLayout.vtable_components()) { + // Skip empty slot. + if (!VtableComponent.isUsedFunctionPointerKind()) + continue; + + const CXXMethodDecl *Method = VtableComponent.getFunctionDecl(); + if (!Method->getCanonicalDecl()->isInlined()) + continue; + + StringRef Name = CGM.getMangledName(VtableComponent.getGlobalDecl()); + auto *Entry = CGM.GetGlobalValue(Name); + // This checks if virtual inline function has already been emitted. + // Note that it is possible that this inline function would be emitted + // after trying to emit vtable speculatively. Because of this we do + // an extra pass after emitting all deferred vtables to find and emit + // these vtables opportunistically. + if (!Entry || Entry->isDeclaration()) + return true; + } + return false; } bool isVTableHidden(const CXXRecordDecl *RD) const { @@ -499,7 +533,7 @@ llvm::Type * ItaniumCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) { if (MPT->isMemberDataPointer()) return CGM.PtrDiffTy; - return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy, nullptr); + return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy); } /// In the Itanium and ARM ABIs, method pointers have the form: @@ -988,10 +1022,8 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (!RD) return false; - // Return indirectly if we have a non-trivial copy ctor or non-trivial dtor. - // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared - // special members. - if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) { + // If C++ prohibits us from making a copy, return by address. + if (passClassIndirect(RD)) { auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); return true; @@ -1134,8 +1166,8 @@ static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) { // Mark the function as nounwind readonly. llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind, llvm::Attribute::ReadOnly }; - llvm::AttributeSet Attrs = llvm::AttributeSet::get( - CGF.getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs); + llvm::AttributeList Attrs = llvm::AttributeList::get( + CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs); return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs); } @@ -1398,9 +1430,9 @@ void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, // FIXME: avoid the fake decl QualType T = Context.getPointerType(Context.VoidPtrTy); - ImplicitParamDecl *VTTDecl - = ImplicitParamDecl::Create(Context, nullptr, MD->getLocation(), - &Context.Idents.get("vtt"), T); + auto *VTTDecl = ImplicitParamDecl::Create( + Context, /*DC=*/nullptr, MD->getLocation(), &Context.Idents.get("vtt"), + T, ImplicitParamDecl::CXXVTT); Params.insert(Params.begin() + 1, VTTDecl); getStructorImplicitParamDecl(CGF) = VTTDecl; } @@ -1687,11 +1719,11 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { if (CGM.getLangOpts().AppleKext) return false; - // If we don't have any inline virtual functions, and if vtable is not hidden, - // then we are safe to emit available_externally copy of vtable. + // If we don't have any not emitted inline virtual function, and if vtable is + // not hidden, then we are safe to emit available_externally copy of vtable. // FIXME we can still emit a copy of the vtable if we // can emit definition of the inline functions. - return !hasAnyVirtualInlineFunction(RD) && !isVTableHidden(RD); + return !hasAnyUnusedVirtualInlineFunction(RD) && !isVTableHidden(RD); } static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, Address InitialPtr, @@ -1910,10 +1942,11 @@ static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM, llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.getTypes().ConvertType(CGM.getContext().IntTy), GuardPtrTy, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_acquire", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind)); + return CGM.CreateRuntimeFunction( + FTy, "__cxa_guard_acquire", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind)); } static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM, @@ -1921,10 +1954,11 @@ static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM, // void __cxa_guard_release(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_release", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind)); + return CGM.CreateRuntimeFunction( + FTy, "__cxa_guard_release", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind)); } static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM, @@ -1932,10 +1966,11 @@ static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM, // void __cxa_guard_abort(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FTy, "__cxa_guard_abort", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind)); + return CGM.CreateRuntimeFunction( + FTy, "__cxa_guard_abort", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind)); } namespace { @@ -2092,13 +2127,14 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, (UseARMGuardVarABI && !useInt8GuardVariable) ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1)) : LI; - llvm::Value *isInitialized = Builder.CreateIsNull(V, "guard.uninitialized"); + llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized"); llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check"); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); // Check if the first byte of the guard variable is zero. - Builder.CreateCondBr(isInitialized, InitCheckBlock, EndBlock); + CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock, + CodeGenFunction::GuardKind::VariableGuard, &D); CGF.EmitBlock(InitCheckBlock); @@ -2573,6 +2609,9 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) { if (!GV) { // Create a new global variable. + // Note for the future: If we would ever like to do deferred emission of + // RTTI, check if emitting vtables opportunistically need any adjustment. + GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy, /*Constant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, @@ -2626,6 +2665,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::Float: case BuiltinType::Double: case BuiltinType::LongDouble: + case BuiltinType::Float16: case BuiltinType::Float128: case BuiltinType::Char16: case BuiltinType::Char32: @@ -2716,7 +2756,9 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM, // function. bool IsDLLImport = RD->hasAttr<DLLImportAttr>(); if (CGM.getVTables().isVTableExternal(RD)) - return IsDLLImport ? false : true; + return IsDLLImport && !CGM.getTriple().isWindowsItaniumEnvironment() + ? false + : true; if (IsDLLImport) return true; @@ -2941,6 +2983,8 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, return llvm::GlobalValue::InternalLinkage; case VisibleNoLinkage: + case ModuleInternalLinkage: + case ModuleLinkage: case ExternalLinkage: // RTTI is not enabled, which means that this type info struct is going // to be used for exception handling. Give it linkonce_odr linkage. @@ -2952,17 +2996,16 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, if (RD->hasAttr<WeakAttr>()) return llvm::GlobalValue::WeakODRLinkage; if (CGM.getTriple().isWindowsItaniumEnvironment()) - if (RD->hasAttr<DLLImportAttr>()) - return llvm::GlobalValue::ExternalLinkage; - if (RD->isDynamicClass()) { - llvm::GlobalValue::LinkageTypes LT = CGM.getVTableLinkage(RD); - // MinGW won't export the RTTI information when there is a key function. - // Make sure we emit our own copy instead of attempting to dllimport it. if (RD->hasAttr<DLLImportAttr>() && - llvm::GlobalValue::isAvailableExternallyLinkage(LT)) - LT = llvm::GlobalValue::LinkOnceODRLinkage; - return LT; - } + ShouldUseExternalRTTIDescriptor(CGM, Ty)) + return llvm::GlobalValue::ExternalLinkage; + // MinGW always uses LinkOnceODRLinkage for type info. + if (RD->isDynamicClass() && + !CGM.getContext() + .getTargetInfo() + .getTriple() + .isWindowsGNUEnvironment()) + return CGM.getVTableLinkage(RD); } return llvm::GlobalValue::LinkOnceODRLinkage; @@ -3165,7 +3208,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force, if (DLLExport || (RD && RD->hasAttr<DLLExportAttr>())) { TypeName->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); - } else if (CGM.getLangOpts().RTTI && RD && RD->hasAttr<DLLImportAttr>()) { + } else if (RD && RD->hasAttr<DLLImportAttr>() && + ShouldUseExternalRTTIDescriptor(CGM, Ty)) { TypeName->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); @@ -3620,6 +3664,18 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, !CGM.TryEmitBaseDestructorAsAlias(DD)) return; + // FIXME: The deleting destructor is equivalent to the selected operator + // delete if: + // * either the delete is a destroying operator delete or the destructor + // would be trivial if it weren't virtual, + // * the conversion from the 'this' parameter to the first parameter of the + // destructor is equivalent to a bitcast, + // * the destructor does not have an implicit "this" return, and + // * the operator delete has the same calling convention and IR function type + // as the destructor. + // In such cases we should try to emit the deleting dtor as an alias to the + // selected 'operator delete'. + llvm::Function *Fn = CGM.codegenCXXStructor(MD, Type); if (CGType == StructorCodegen::COMDAT) { @@ -3927,9 +3983,8 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF, static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) { llvm::FunctionType *fnTy = llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); - llvm::Constant *fnRef = - CGM.CreateRuntimeFunction(fnTy, "__clang_call_terminate", - llvm::AttributeSet(), /*Local=*/true); + llvm::Constant *fnRef = CGM.CreateRuntimeFunction( + fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true); llvm::Function *fn = dyn_cast<llvm::Function>(fnRef); if (fn && fn->empty()) { diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp index acea5c1143..a6f21d8ddc 100644 --- a/lib/CodeGen/MacroPPCallbacks.cpp +++ b/lib/CodeGen/MacroPPCallbacks.cpp @@ -26,8 +26,8 @@ void MacroPPCallbacks::writeMacroDefinition(const IdentifierInfo &II, if (MI.isFunctionLike()) { Name << '('; - if (!MI.arg_empty()) { - MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end(); + if (!MI.param_empty()) { + MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end(); for (; AI + 1 != E; ++AI) { Name << (*AI)->getName(); Name << ','; @@ -198,7 +198,8 @@ void MacroPPCallbacks::MacroDefined(const Token &MacroNameTok, } void MacroPPCallbacks::MacroUndefined(const Token &MacroNameTok, - const MacroDefinition &MD) { + const MacroDefinition &MD, + const MacroDirective *Undef) { IdentifierInfo *Id = MacroNameTok.getIdentifierInfo(); SourceLocation location = getCorrectLocation(MacroNameTok.getLocation()); Gen->getCGDebugInfo()->CreateMacro(getCurrentScope(), diff --git a/lib/CodeGen/MacroPPCallbacks.h b/lib/CodeGen/MacroPPCallbacks.h index 06217f9c58..e117f96f47 100644 --- a/lib/CodeGen/MacroPPCallbacks.h +++ b/lib/CodeGen/MacroPPCallbacks.h @@ -110,8 +110,8 @@ public: /// Hook called whenever a macro \#undef is seen. /// /// MD is released immediately following this callback. - void MacroUndefined(const Token &MacroNameTok, - const MacroDefinition &MD) override; + void MacroUndefined(const Token &MacroNameTok, const MacroDefinition &MD, + const MacroDirective *Undef) override; }; } // end namespace clang diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index ddfdefac43..e81dedf14d 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -819,46 +819,44 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { return RAA_Default; case llvm::Triple::x86_64: - // Win64 passes objects with non-trivial copy ctors indirectly. - if (RD->hasNonTrivialCopyConstructor()) - return RAA_Indirect; - - // If an object has a destructor, we'd really like to pass it indirectly + // If a class has a destructor, we'd really like to pass it indirectly // because it allows us to elide copies. Unfortunately, MSVC makes that // impossible for small types, which it will pass in a single register or // stack slot. Most objects with dtors are large-ish, so handle that early. // We can't call out all large objects as being indirect because there are // multiple x64 calling conventions and the C++ ABI code shouldn't dictate // how we pass large POD types. + // + // Note: This permits small classes with nontrivial destructors to be + // passed in registers, which is non-conforming. if (RD->hasNonTrivialDestructor() && getContext().getTypeSize(RD->getTypeForDecl()) > 64) return RAA_Indirect; - // If this is true, the implicit copy constructor that Sema would have - // created would not be deleted. FIXME: We should provide a more direct way - // for CodeGen to ask whether the constructor was deleted. - if (!RD->hasUserDeclaredCopyConstructor() && - !RD->hasUserDeclaredMoveConstructor() && - !RD->needsOverloadResolutionForMoveConstructor() && - !RD->hasUserDeclaredMoveAssignment() && - !RD->needsOverloadResolutionForMoveAssignment()) - return RAA_Default; - - // Otherwise, Sema should have created an implicit copy constructor if - // needed. - assert(!RD->needsImplicitCopyConstructor()); - - // We have to make sure the trivial copy constructor isn't deleted. - for (const CXXConstructorDecl *CD : RD->ctors()) { - if (CD->isCopyConstructor()) { - assert(CD->isTrivial()); - // We had at least one undeleted trivial copy ctor. Return directly. - if (!CD->isDeleted()) - return RAA_Default; + // If a class has at least one non-deleted, trivial copy constructor, it + // is passed according to the C ABI. Otherwise, it is passed indirectly. + // + // Note: This permits classes with non-trivial copy or move ctors to be + // passed in registers, so long as they *also* have a trivial copy ctor, + // which is non-conforming. + if (RD->needsImplicitCopyConstructor()) { + // If the copy ctor has not yet been declared, we can read its triviality + // off the AST. + if (!RD->defaultedCopyConstructorIsDeleted() && + RD->hasTrivialCopyConstructor()) + return RAA_Default; + } else { + // Otherwise, we need to find the copy constructor(s) and ask. + for (const CXXConstructorDecl *CD : RD->ctors()) { + if (CD->isCopyConstructor()) { + // We had at least one nondeleted trivial copy ctor. Return directly. + if (!CD->isDeleted() && CD->isTrivial()) + return RAA_Default; + } } } - // The trivial copy constructor was deleted. Return indirectly. + // We have no trivial, non-deleted copy constructor. return RAA_Indirect; } @@ -1413,11 +1411,10 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl()); assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD)); if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) { - ImplicitParamDecl *IsMostDerived - = ImplicitParamDecl::Create(Context, nullptr, - CGF.CurGD.getDecl()->getLocation(), - &Context.Idents.get("is_most_derived"), - Context.IntTy); + auto *IsMostDerived = ImplicitParamDecl::Create( + Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(), + &Context.Idents.get("is_most_derived"), Context.IntTy, + ImplicitParamDecl::Other); // The 'most_derived' parameter goes second if the ctor is variadic and last // if it's not. Dtors can't be variadic. const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); @@ -1427,11 +1424,10 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, Params.push_back(IsMostDerived); getStructorImplicitParamDecl(CGF) = IsMostDerived; } else if (isDeletingDtor(CGF.CurGD)) { - ImplicitParamDecl *ShouldDelete - = ImplicitParamDecl::Create(Context, nullptr, - CGF.CurGD.getDecl()->getLocation(), - &Context.Idents.get("should_call_delete"), - Context.IntTy); + auto *ShouldDelete = ImplicitParamDecl::Create( + Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(), + &Context.Idents.get("should_call_delete"), Context.IntTy, + ImplicitParamDecl::Other); Params.push_back(ShouldDelete); getStructorImplicitParamDecl(CGF) = ShouldDelete; } @@ -2210,9 +2206,8 @@ static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD, llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get( CGF.IntTy, DtorStub->getType(), /*IsVarArg=*/false); - llvm::Constant *TLRegDtor = - CGF.CGM.CreateRuntimeFunction(TLRegDtorTy, "__tlregdtor", - llvm::AttributeSet(), /*Local=*/true); + llvm::Constant *TLRegDtor = CGF.CGM.CreateRuntimeFunction( + TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /*Local=*/true); if (llvm::Function *TLRegDtorFn = dyn_cast<llvm::Function>(TLRegDtor)) TLRegDtorFn->setDoesNotThrow(); @@ -2308,9 +2303,9 @@ static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) { CGM.IntTy->getPointerTo(), /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_header", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind), + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind), /*Local=*/true); } @@ -2320,9 +2315,9 @@ static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) { CGM.IntTy->getPointerTo(), /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_footer", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind), + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind), /*Local=*/true); } @@ -2332,9 +2327,9 @@ static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) { CGM.IntTy->getPointerTo(), /*isVarArg=*/false); return CGM.CreateRuntimeFunction( FTy, "_Init_thread_abort", - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - llvm::Attribute::NoUnwind), + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind), /*Local=*/true); } @@ -2466,11 +2461,12 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, // Test our bit from the guard variable. llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1ULL << GuardNum); llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr); - llvm::Value *IsInitialized = - Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero); + llvm::Value *NeedsInit = + Builder.CreateICmpEQ(Builder.CreateAnd(LI, Bit), Zero); llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init"); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); - Builder.CreateCondBr(IsInitialized, EndBlock, InitBlock); + CGF.EmitCXXGuardedInitBranch(NeedsInit, InitBlock, EndBlock, + CodeGenFunction::GuardKind::VariableGuard, &D); // Set our bit in the guard variable and emit the initializer and add a global // destructor if appropriate. @@ -2505,7 +2501,8 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, Builder.CreateICmpSGT(FirstGuardLoad, InitThreadEpoch); llvm::BasicBlock *AttemptInitBlock = CGF.createBasicBlock("init.attempt"); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); - Builder.CreateCondBr(IsUninitialized, AttemptInitBlock, EndBlock); + CGF.EmitCXXGuardedInitBranch(IsUninitialized, AttemptInitBlock, EndBlock, + CodeGenFunction::GuardKind::VariableGuard, &D); // This BasicBlock attempts to determine whether or not this thread is // responsible for doing the initialization. @@ -3428,6 +3425,8 @@ static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) { return llvm::GlobalValue::InternalLinkage; case VisibleNoLinkage: + case ModuleInternalLinkage: + case ModuleLinkage: case ExternalLinkage: return llvm::GlobalValue::LinkOnceODRLinkage; } @@ -3720,7 +3719,7 @@ CatchTypeInfo MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type, QualType CatchHandlerType) { // TypeDescriptors for exceptions never have qualified pointer types, - // qualifiers are stored seperately in order to support qualification + // qualifiers are stored separately in order to support qualification // conversions. bool IsConst, IsVolatile, IsUnaligned; Type = @@ -3757,6 +3756,9 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) { if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName)) return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); + // Note for the future: If we would ever like to do deferred emission of + // RTTI, check if emitting vtables opportunistically need any adjustment. + // Compute the fields for the TypeDescriptor. SmallString<256> TypeInfoString; { @@ -3803,7 +3805,7 @@ static void emitCXXDestructor(CodeGenModule &CGM, const CXXDestructorDecl *dtor, if (!dtor->getParent()->getNumVBases() && (dtorType == StructorType::Complete || dtorType == StructorType::Base)) { bool ProducedAlias = !CGM.TryEmitDefinitionAsAlias( - GlobalDecl(dtor, Dtor_Complete), GlobalDecl(dtor, Dtor_Base), true); + GlobalDecl(dtor, Dtor_Complete), GlobalDecl(dtor, Dtor_Base)); if (ProducedAlias) { if (dtorType == StructorType::Complete) return; @@ -3873,18 +3875,21 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, // Following the 'this' pointer is a reference to the source object that we // are copying from. ImplicitParamDecl SrcParam( - getContext(), nullptr, SourceLocation(), &getContext().Idents.get("src"), + getContext(), /*DC=*/nullptr, SourceLocation(), + &getContext().Idents.get("src"), getContext().getLValueReferenceType(RecordTy, - /*SpelledAsLValue=*/true)); + /*SpelledAsLValue=*/true), + ImplicitParamDecl::Other); if (IsCopy) FunctionArgs.push_back(&SrcParam); // Constructors for classes which utilize virtual bases have an additional // parameter which indicates whether or not it is being delegated to by a more // derived constructor. - ImplicitParamDecl IsMostDerived(getContext(), nullptr, SourceLocation(), + ImplicitParamDecl IsMostDerived(getContext(), /*DC=*/nullptr, + SourceLocation(), &getContext().Idents.get("is_most_derived"), - getContext().IntTy); + getContext().IntTy, ImplicitParamDecl::Other); // Only add the parameter to the list if thie class has virtual bases. if (RD->getNumVBases() > 0) FunctionArgs.push_back(&IsMostDerived); diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp index 89090c8b6a..8aa9bfb421 100644 --- a/lib/CodeGen/ModuleBuilder.cpp +++ b/lib/CodeGen/ModuleBuilder.cpp @@ -119,6 +119,14 @@ namespace { return Builder->GetAddrOfGlobal(global, ForDefinition_t(isForDefinition)); } + llvm::Module *StartModule(llvm::StringRef ModuleName, + llvm::LLVMContext &C) { + assert(!M && "Replacing existing Module?"); + M.reset(new llvm::Module(ModuleName, C)); + Initialize(*Ctx); + return M.get(); + } + void Initialize(ASTContext &Context) override { Ctx = &Context; @@ -197,7 +205,7 @@ namespace { // Provide some coverage mapping even for methods that aren't emitted. // Don't do this for templated classes though, as they may not be // instantiable. - if (!MD->getParent()->getDescribedClassTemplate()) + if (!MD->getParent()->isDependentContext()) Builder->AddDeferredUnusedCoverageMapping(MD); } @@ -317,6 +325,11 @@ llvm::Constant *CodeGenerator::GetAddrOfGlobal(GlobalDecl global, ->GetAddrOfGlobal(global, isForDefinition); } +llvm::Module *CodeGenerator::StartModule(llvm::StringRef ModuleName, + llvm::LLVMContext &C) { + return static_cast<CodeGeneratorImpl*>(this)->StartModule(ModuleName, C); +} + CodeGenerator *clang::CreateLLVMCodeGen( DiagnosticsEngine &Diags, llvm::StringRef ModuleName, const HeaderSearchOptions &HeaderSearchOpts, diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 37ecc05aa1..d0760b9cc2 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -152,6 +152,9 @@ public: CodeGenOpts.CodeModel = "default"; CodeGenOpts.ThreadModel = "single"; CodeGenOpts.DebugTypeExtRefs = true; + // When building a module MainFileName is the name of the modulemap file. + CodeGenOpts.MainFileName = + LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule; CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo); CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning()); } diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp index 0bfe30a32c..fc8e36d2c5 100644 --- a/lib/CodeGen/SwiftCallingConv.cpp +++ b/lib/CodeGen/SwiftCallingConv.cpp @@ -57,6 +57,10 @@ static CharUnits getTypeStoreSize(CodeGenModule &CGM, llvm::Type *type) { return CharUnits::fromQuantity(CGM.getDataLayout().getTypeStoreSize(type)); } +static CharUnits getTypeAllocSize(CodeGenModule &CGM, llvm::Type *type) { + return CharUnits::fromQuantity(CGM.getDataLayout().getTypeAllocSize(type)); +} + void SwiftAggLowering::addTypedData(QualType type, CharUnits begin) { // Deal with various aggregate types as special cases: @@ -542,7 +546,9 @@ SwiftAggLowering::getCoerceAndExpandTypes() const { packed = true; elts.push_back(entry.Type); - lastEnd = entry.End; + + lastEnd = entry.Begin + getTypeAllocSize(CGM, entry.Type); + assert(entry.End <= lastEnd); } // We don't need to adjust 'packed' to deal with possible tail padding diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 82f412f012..b1773b7090 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -14,6 +14,7 @@ #include "TargetInfo.h" #include "ABIInfo.h" +#include "CGBlocks.h" #include "CGCXXABI.h" #include "CGValue.h" #include "CodeGenFunction.h" @@ -22,7 +23,9 @@ #include "clang/CodeGen/SwiftCallingConv.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" @@ -183,7 +186,11 @@ const TargetInfo &ABIInfo::getTarget() const { return CGT.getTarget(); } -bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); } +const CodeGenOptions &ABIInfo::getCodeGenOpts() const { + return CGT.getCodeGenOpts(); +} + +bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); } bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { return false; @@ -398,7 +405,17 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib, } unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const { - return llvm::CallingConv::C; + // OpenCL kernels are called via an explicit runtime API with arguments + // set with clSetKernelArg(), not as normal sub-functions. + // Return SPIR_KERNEL by default as the kernel calling convention to + // ensure the fingerprint is fixed such way that each OpenCL argument + // gets one matching argument in the produced kernel function argument + // list to enable feasible implementation of clSetKernelArg() with + // aggregates etc. In case we would use the default C calling conv here, + // clSetKernelArg() might break depending on the target-specific + // conventions; different targets might split structs passed as values + // to multiple function arguments etc. + return llvm::CallingConv::SPIR_KERNEL; } llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, @@ -406,13 +423,36 @@ llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule & return llvm::ConstantPointerNull::get(T); } +LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + return D ? D->getType().getAddressSpace() : LangAS::Default; +} + llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( - CodeGen::CodeGenFunction &CGF, llvm::Value *Src, QualType SrcTy, - QualType DestTy) const { + CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr, + LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const { // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. - return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, - CGF.ConvertType(DestTy)); + if (auto *C = dyn_cast<llvm::Constant>(Src)) + return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy); + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DestTy); +} + +llvm::Constant * +TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src, + LangAS SrcAddr, LangAS DestAddr, + llvm::Type *DestTy) const { + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + return llvm::ConstantExpr::getPointerCast(Src, DestTy); +} + +llvm::SyncScope::ID +TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const { + return C.getOrInsertSyncScopeID(""); /* default sync scope */ } static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); @@ -836,7 +876,10 @@ bool IsX86_MMXType(llvm::Type *IRType) { static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) { - if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) { + bool IsMMXCons = llvm::StringSwitch<bool>(Constraint) + .Cases("y", "&y", "^Ym", true) + .Default(false); + if (IsMMXCons && Ty->isVectorTy()) { if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) { // Invalid MMX constraint return nullptr; @@ -853,8 +896,14 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) + if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { + if (BT->getKind() == BuiltinType::LongDouble) { + if (&Context.getTargetInfo().getLongDoubleFormat() == + &llvm::APFloat::x87DoubleExtended()) + return false; + } return true; + } } else if (const VectorType *VT = Ty->getAs<VectorType>()) { // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX // registers specially. @@ -942,8 +991,7 @@ class X86_32ABIInfo : public SwiftABIInfo { Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; - ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State, - const ABIArgInfo& current) const; + /// \brief Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; @@ -1009,7 +1057,8 @@ public: const llvm::Triple &Triple, const CodeGenOptions &Opts); void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { // Darwin uses different dwarf register numbers for EH. @@ -1038,14 +1087,14 @@ public: getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig = (0xeb << 0) | // jmp rel8 (0x06 << 8) | // .+0x08 - ('F' << 16) | - ('T' << 24); + ('v' << 16) | + ('2' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "movl\t%ebp, %ebp" - "\t\t## marker for objc_retainAutoreleaseReturnValue"; + "\t\t// marker for objc_retainAutoreleaseReturnValue"; } }; @@ -1527,27 +1576,6 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { return true; } -ABIArgInfo -X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State, - const ABIArgInfo ¤t) const { - // Assumes vectorCall calling convention. - const Type *Base = nullptr; - uint64_t NumElts = 0; - - if (!Ty->isBuiltinType() && !Ty->isVectorType() && - isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - // HVA types get passed directly in registers if there is room. - State.FreeSSERegs -= NumElts; - return getDirectX86Hva(); - } - // If there's no room, the HVA gets passed as normal indirect - // structure. - return getIndirectResult(Ty, /*ByVal=*/false, State); - } - return current; -} - ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. @@ -1566,35 +1594,20 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } } - // vectorcall adds the concept of a homogenous vector aggregate, similar - // to other targets, regcall uses some of the HVA rules. + // Regcall uses the concept of a homogenous vector aggregate, similar + // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if ((State.CC == llvm::CallingConv::X86_VectorCall || - State.CC == llvm::CallingConv::X86_RegCall) && + if (State.CC == llvm::CallingConv::X86_RegCall && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.CC == llvm::CallingConv::X86_RegCall) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - if (Ty->isBuiltinType() || Ty->isVectorType()) - return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); - - } - return getIndirectResult(Ty, /*ByVal=*/false, State); - } else if (State.CC == llvm::CallingConv::X86_VectorCall) { - if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) { - // Actual floating-point types get registers first time through if - // there is registers available - State.FreeSSERegs -= NumElts; + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + if (Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); - } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { - // HVA Types only get registers after everything else has been - // set, so it gets set as indirect for now. - return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty)); - } + return ABIArgInfo::getExpand(); } + return getIndirectResult(Ty, /*ByVal=*/false, State); } if (isAggregateTypeForABI(Ty)) { @@ -1675,31 +1688,53 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, bool &UsedInAlloca) const { - // Vectorcall only allows the first 6 parameters to be passed in registers, - // and homogeneous vector aggregates are only put into registers as a second - // priority. - unsigned Count = 0; - CCState ZeroState = State; - ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0; - // HVAs must be done as a second priority for registers, so the deferred - // items are dealt with by going through the pattern a second time. + // Vectorcall x86 works subtly different than in x64, so the format is + // a bit different than the x64 version. First, all vector types (not HVAs) + // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers. + // This differs from the x64 implementation, where the first 6 by INDEX get + // registers. + // After that, integers AND HVAs are assigned Left to Right in the same pass. + // Integers are passed as ECX/EDX if one is available (in order). HVAs will + // first take up the remaining YMM/XMM registers. If insufficient registers + // remain but an integer register (ECX/EDX) is available, it will be passed + // in that, else, on the stack. for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = classifyArgumentType(I.type, State); - else - // Parameters after the 6th cannot be passed in registers, - // so pretend there are no registers left for them. - I.info = classifyArgumentType(I.type, ZeroState); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); - ++Count; + // First pass do all the vector types. + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType& Ty = I.type; + if ((Ty->isVectorType() || Ty->isBuiltinType()) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + I.info = ABIArgInfo::getDirect(); + } else { + I.info = classifyArgumentType(Ty, State); + } + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } - Count = 0; - // Go through the arguments a second time to get HVAs registers if there - // are still some available. + for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = reclassifyHvaArgType(I.type, State, I.info); - ++Count; + // Second pass, do the rest! + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType& Ty = I.type; + bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts); + + if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) { + // Assign true HVAs (non vector/native FP types). + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + I.info = getDirectX86Hva(); + } else { + I.info = getIndirectResult(Ty, /*ByVal=*/false, State); + } + } else if (!IsHva) { + // Assign all Non-HVAs, so this will exclude Vector/FP args. + I.info = classifyArgumentType(Ty, State); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } } @@ -1882,7 +1917,6 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( case llvm::Triple::DragonFly: case llvm::Triple::FreeBSD: case llvm::Triple::OpenBSD: - case llvm::Triple::Bitrig: case llvm::Triple::Win32: return true; default: @@ -1890,9 +1924,11 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( } } -void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { +void X86_32TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { // Get the LLVM function. @@ -1901,10 +1937,7 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, // Now add the 'alignstack' attribute with a value of 16. llvm::AttrBuilder B; B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - B)); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); @@ -2084,9 +2117,14 @@ class X86_64ABIInfo : public SwiftABIInfo { return !getTarget().getTriple().isOSDarwin(); } - /// GCC classifies <1 x long long> as SSE but compatibility with older clang - // compilers require us to classify it as INTEGER. + /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to + /// classify it as INTEGER (for compatibility with older clang compilers). bool classifyIntegerMMXAsSSE() const { + // Clang <= 3.8 did not do this. + if (getCodeGenOpts().getClangABICompat() <= + CodeGenOptions::ClangABI::Ver3_8) + return false; + const llvm::Triple &Triple = getTarget().getTriple(); if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4) return false; @@ -2240,23 +2278,28 @@ public: llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { - unsigned Sig; - if (getABIInfo().has64BitPointers()) - Sig = (0xeb << 0) | // jmp rel8 - (0x0a << 8) | // .+0x0c - ('F' << 16) | - ('T' << 24); - else - Sig = (0xeb << 0) | // jmp rel8 - (0x06 << 8) | // .+0x08 - ('F' << 16) | - ('T' << 24); + unsigned Sig = (0xeb << 0) | // jmp rel8 + (0x06 << 8) | // .+0x08 + ('v' << 16) | + ('2' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + // Get the LLVM function. + auto *Fn = cast<llvm::Function>(GV); + + // Now add the 'alignstack' attribute with a value of 16. + llvm::AttrBuilder B; + B.addStackAlignmentAttr(16); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); @@ -2303,7 +2346,8 @@ public: Win32StructABI, NumRegisterParameters, false) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { @@ -2331,11 +2375,12 @@ static void addStackProbeSizeTargetAttribute(const Decl *D, } } -void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { - X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - +void WinX86_32TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); + if (!IsForDefinition) + return; addStackProbeSizeTargetAttribute(D, GV, CGM); } @@ -2346,7 +2391,8 @@ public: : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; @@ -2375,12 +2421,22 @@ public: } }; -void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { - TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - +void WinX86_64TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + // Get the LLVM function. + auto *Fn = cast<llvm::Function>(GV); + + // Now add the 'alignstack' attribute with a value of 16. + llvm::AttrBuilder B; + B.addStackAlignmentAttr(16); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); @@ -3162,8 +3218,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, } } - llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr); - + llvm::StructType *Result = llvm::StructType::get(Lo, Hi); // Verify that the second element is at an 8-byte offset. assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && @@ -3238,8 +3293,7 @@ classifyReturnType(QualType RetTy) const { case ComplexX87: assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), - llvm::Type::getX86_FP80Ty(getVMContext()), - nullptr); + llvm::Type::getX86_FP80Ty(getVMContext())); break; } @@ -3496,18 +3550,27 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { unsigned FreeSSERegs = IsRegCall ? 16 : 8; unsigned NeededInt, NeededSSE; - if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && - !FI.getReturnType()->getTypePtr()->isUnionType()) { - FI.getReturnInfo() = - classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); - if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { - FreeIntRegs -= NeededInt; - FreeSSERegs -= NeededSSE; - } else { - FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); - } - } else if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + if (!getCXXABI().classifyReturnType(FI)) { + if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && + !FI.getReturnType()->getTypePtr()->isUnionType()) { + FI.getReturnInfo() = + classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; + } else { + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } + } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>()) { + // Complex Long Double Type is passed in Memory when Regcall + // calling convention is used. + const ComplexType *CT = FI.getReturnType()->getAs<ComplexType>(); + if (getContext().getCanonicalType(CT->getElementType()) == + getContext().LongDoubleTy) + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } else + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + } // If the return value is indirect, then the hidden argument is consuming one // integer register. @@ -3735,7 +3798,7 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, CharUnits::fromQuantity(16)); llvm::Type *DoubleTy = CGF.DoubleTy; - llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr); + llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy); llvm::Value *V; Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); @@ -3897,6 +3960,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, bool IsRegCall) const { unsigned Count = 0; for (auto &I : FI.arguments()) { + // Vectorcall in x64 only permits the first 6 arguments to be passed + // as XMM/YMM registers. if (Count < VectorcallMaxParamNumAsReg) I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); else { @@ -3909,11 +3974,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, ++Count; } - Count = 0; for (auto &I : FI.arguments()) { - if (Count < VectorcallMaxParamNumAsReg) - I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); - ++Count; + I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); } } @@ -3974,7 +4036,10 @@ Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. class PPC32_SVR4_ABIInfo : public DefaultABIInfo { -bool IsSoftFloatABI; + bool IsSoftFloatABI; + + CharUnits getParamTypeAlignment(QualType Ty) const; + public: PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI) : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {} @@ -3996,13 +4061,46 @@ public: bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; +} + +CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { + // Complex types are passed just like their elements + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + if (Ty->isVectorType()) + return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 + : 4); + + // For single-element float/vector structs, we consider the whole type + // to have the same alignment requirements as its single element. + const Type *AlignTy = nullptr; + if (const Type *EltType = isSingleElementStruct(Ty, getContext())) { + const BuiltinType *BT = EltType->getAs<BuiltinType>(); + if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || + (BT && BT->isFloatingPoint())) + AlignTy = EltType; + } + if (AlignTy) + return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4); + return CharUnits::fromQuantity(4); } // TODO: this implementation is now likely redundant with // DefaultABIInfo::EmitVAArg. Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, QualType Ty) const { + if (getTarget().getTriple().isOSDarwin()) { + auto TI = getContext().getTypeInfoInChars(Ty); + TI.second = getParamTypeAlignment(Ty); + + CharUnits SlotSize = CharUnits::fromQuantity(4); + return emitVoidPtrVAArg(CGF, VAList, Ty, + classifyArgumentType(Ty).isIndirect(), TI, SlotSize, + /*AllowHigherAlign=*/true); + } + const unsigned OverflowLimit = 8; if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { // TODO: Implement this. For now ignore. @@ -4640,7 +4738,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { llvm::Type *CoerceTy; if (Bits > GPRBits) { CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits); - CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr); + CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy); } else CoerceTy = llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); @@ -4777,7 +4875,8 @@ class AArch64ABIInfo : public SwiftABIInfo { public: enum ABIKind { AAPCS = 0, - DarwinPCS + DarwinPCS, + Win64 }; private: @@ -4815,10 +4914,14 @@ private: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { - return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) - : EmitAAPCSVAArg(VAListAddr, Ty, CGF); + return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty) + : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) + : EmitAAPCSVAArg(VAListAddr, Ty, CGF); } + Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + bool shouldPassIndirectlyForSwift(CharUnits totalSize, ArrayRef<llvm::Type*> scalars, bool asReturnValue) const override { @@ -4827,6 +4930,9 @@ private: bool isSwiftErrorInRegister() const override { return true; } + + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -4835,7 +4941,7 @@ public: : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tfp, fp\t\t# marker for objc_retainAutoreleaseReturnValue"; + return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { @@ -4844,6 +4950,22 @@ public: bool doesReturnSlotInterfereWithArgs() const override { return false; } }; + +class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { +public: + WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K) + : AArch64TargetCodeGenInfo(CGT, K) {} + + void getDependentLibraryOption(llvm::StringRef Lib, + llvm::SmallString<24> &Opt) const override { + Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); + } + + void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, + llvm::SmallString<32> &Opt) const override { + Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; + } +}; } ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { @@ -4893,10 +5015,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { // Empty records are always ignored on Darwin, but actually passed in C++ mode // elsewhere for GNU compatibility. - if (isEmptyRecord(getContext(), Ty, true)) { + uint64_t Size = getContext().getTypeSize(Ty); + bool IsEmpty = isEmptyRecord(getContext(), Ty, true); + if (IsEmpty || Size == 0) { if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) return ABIArgInfo::getIgnore(); + // GNU C mode. The only argument that gets ignored is an empty one with size + // 0. + if (IsEmpty && Size == 0) + return ABIArgInfo::getIgnore(); return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } @@ -4909,7 +5037,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { } // Aggregates <= 16 bytes are passed directly in registers or on the stack. - uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 128) { // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of // same size and alignment. @@ -4917,7 +5044,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { return coerceToIntArray(Ty, getContext(), getVMContext()); } unsigned Alignment = getContext().getTypeAlign(Ty); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. @@ -4949,7 +5076,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { : ABIArgInfo::getDirect()); } - if (isEmptyRecord(getContext(), RetTy, true)) + uint64_t Size = getContext().getTypeSize(RetTy); + if (isEmptyRecord(getContext(), RetTy, true) || Size == 0) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; @@ -4959,7 +5087,6 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getDirect(); // Aggregates <= 16 bytes are returned directly in registers or on the stack. - uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 128) { // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of // same size and alignment. @@ -4967,7 +5094,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { return coerceToIntArray(RetTy, getContext(), getVMContext()); } unsigned Alignment = getContext().getTypeAlign(RetTy); - Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. @@ -4995,6 +5122,17 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize, + llvm::Type *eltTy, + unsigned elts) const { + if (!llvm::isPowerOf2_32(elts)) + return false; + if (totalSize.getQuantity() != 8 && + (totalSize.getQuantity() != 16 || elts == 1)) + return false; + return true; +} + bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS64 must have base types of a floating // point type or a short-vector type. This is the same as the 32-bit ABI, @@ -5305,6 +5443,14 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } +Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + CGF.getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(8), + /*allowHigherAlign*/ false); +} + //===----------------------------------------------------------------------===// // ARM ABI Implementation //===----------------------------------------------------------------------===// @@ -5383,6 +5529,8 @@ private: bool isSwiftErrorInRegister() const override { return true; } + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { @@ -5399,7 +5547,7 @@ public: } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue"; + return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue"; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, @@ -5417,7 +5565,10 @@ public: } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -5449,10 +5600,7 @@ public: // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); - Fn->addAttributes(llvm::AttributeSet::FunctionIndex, - llvm::AttributeSet::get(CGM.getLLVMContext(), - llvm::AttributeSet::FunctionIndex, - B)); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } }; @@ -5462,7 +5610,8 @@ public: : ARMTargetCodeGenInfo(CGT, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { @@ -5476,8 +5625,11 @@ public: }; void WindowsARMTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { - ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); + if (!IsForDefinition) + return; addStackProbeSizeTargetAttribute(D, GV, CGM); } } @@ -5534,17 +5686,14 @@ void ARMABIInfo::setCCs() { // AAPCS apparently requires runtime support functions to be soft-float, but // that's almost certainly for historic reasons (Thumb1 not supporting VFP // most likely). It's more convenient for AAPCS16_VFP to be hard-float. - switch (getABIKind()) { - case APCS: - case AAPCS16_VFP: - if (abiCC != getLLVMDefaultCC()) + + // The Run-time ABI for the ARM Architecture section 4.1.2 requires + // AEABI-complying FP helper functions to use the base AAPCS. + // These AEABI functions are expanded in the ARM llvm backend, all the builtin + // support functions emitted by clang such as the _Complex helpers follow the + // abiCC. + if (abiCC != getLLVMDefaultCC()) BuiltinCC = abiCC; - break; - case AAPCS: - case AAPCS_VFP: - BuiltinCC = llvm::CallingConv::ARM_AAPCS; - break; - } } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, @@ -5898,6 +6047,20 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, + llvm::Type *eltTy, + unsigned numElts) const { + if (!llvm::isPowerOf2_32(numElts)) + return false; + unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy); + if (size > 64) + return false; + if (vectorSize.getQuantity() != 8 && + (vectorSize.getQuantity() != 16 || numElts == 1)) + return false; + return true; +} + bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS-VFP must have base types of float, // double, or 64-bit or 128-bit vectors. @@ -5992,7 +6155,9 @@ public: : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; + private: // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. @@ -6046,9 +6211,11 @@ Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm_unreachable("NVPTX does not support varargs"); } -void NVPTXTargetCodeGenInfo:: -setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const{ +void NVPTXTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -6484,14 +6651,17 @@ public: MSP430TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; }; } -void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const { +void MSP430TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) { // Handle 'interrupt' attribute: @@ -6550,10 +6720,21 @@ public: } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; llvm::Function *Fn = cast<llvm::Function>(GV); + + if (FD->hasAttr<MipsLongCallAttr>()) + Fn->addFnAttr("long-call"); + else if (FD->hasAttr<MipsShortCallAttr>()) + Fn->addFnAttr("short-call"); + + // Other attributes do not have a meaning for declarations. + if (!IsForDefinition) + return; + if (FD->hasAttr<Mips16Attr>()) { Fn->addFnAttr("mips16"); } @@ -6561,6 +6742,11 @@ public: Fn->addFnAttr("nomips16"); } + if (FD->hasAttr<MicroMipsAttr>()) + Fn->addFnAttr("micromips"); + else if (FD->hasAttr<NoMicroMipsAttr>()) + Fn->addFnAttr("nomicromips"); + const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>(); if (!Attr) return; @@ -6910,7 +7096,10 @@ public: : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { + if (!IsForDefinition) + return; const auto *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; auto *Fn = cast<llvm::Function>(GV); @@ -6938,11 +7127,15 @@ public: : DefaultTargetCodeGenInfo(CGT) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; }; void TCETargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -7038,13 +7231,13 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const { ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - uint64_t Size = getContext().getTypeSize(Ty); if (Size > 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); @@ -7238,38 +7431,138 @@ public: namespace { class AMDGPUABIInfo final : public DefaultABIInfo { +private: + static const unsigned MaxNumRegsForArgsRet = 16; + + unsigned numRegsForType(QualType Ty) const; + + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const override; + public: - explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : + DefaultABIInfo(CGT) {} -private: - ABIArgInfo classifyArgumentType(QualType Ty) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyKernelArgumentType(QualType Ty) const; + ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const; void computeInfo(CGFunctionInfo &FI) const override; }; +bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + return true; +} + +bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( + const Type *Base, uint64_t Members) const { + uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; + + // Homogeneous Aggregates may occupy at most 16 registers. + return Members * NumRegs <= MaxNumRegsForArgsRet; +} + +/// Estimate number of registers the type will use when passed in registers. +unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const { + unsigned NumRegs = 0; + + if (const VectorType *VT = Ty->getAs<VectorType>()) { + // Compute from the number of elements. The reported size is based on the + // in-memory size, which includes the padding 4th element for 3-vectors. + QualType EltTy = VT->getElementType(); + unsigned EltSize = getContext().getTypeSize(EltTy); + + // 16-bit element vectors should be passed as packed. + if (EltSize == 16) + return (VT->getNumElements() + 1) / 2; + + unsigned EltNumRegs = (EltSize + 31) / 32; + return EltNumRegs * VT->getNumElements(); + } + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + assert(!RD->hasFlexibleArrayMember()); + + for (const FieldDecl *Field : RD->fields()) { + QualType FieldTy = Field->getType(); + NumRegs += numRegsForType(FieldTy); + } + + return NumRegs; + } + + return (getContext().getTypeSize(Ty) + 31) / 32; +} + void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { + llvm::CallingConv::ID CC = FI.getCallingConvention(); + if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - unsigned CC = FI.getCallingConvention(); - for (auto &Arg : FI.arguments()) - if (CC == llvm::CallingConv::AMDGPU_KERNEL) - Arg.info = classifyArgumentType(Arg.type); - else - Arg.info = DefaultABIInfo::classifyArgumentType(Arg.type); + unsigned NumRegsLeft = MaxNumRegsForArgsRet; + for (auto &Arg : FI.arguments()) { + if (CC == llvm::CallingConv::AMDGPU_KERNEL) { + Arg.info = classifyKernelArgumentType(Arg.type); + } else { + Arg.info = classifyArgumentType(Arg.type, NumRegsLeft); + } + } } -/// \brief Classify argument of given type \p Ty. -ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const { - llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty)); - if (!StrTy) { - return DefaultABIInfo::classifyArgumentType(Ty); +ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { + if (isAggregateTypeForABI(RetTy)) { + // Records with non-trivial destructors/copy-constructors should not be + // returned by value. + if (!getRecordArgABI(RetTy, getCXXABI())) { + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just return a regular value. + if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const RecordType *RT = RetTy->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyReturnType(RetTy); + } + + // Pack aggregates <= 4 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(RetTy); + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + if (Size <= 64) { + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) + return ABIArgInfo::getDirect(); + } } + // Otherwise just do the default thing. + return DefaultABIInfo::classifyReturnType(RetTy); +} + +/// For kernels all parameters are really passed in a special buffer. It doesn't +/// make sense to pass anything byval, so everything must be direct. +ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + // TODO: Can we omit empty structs? + // Coerce single element structs to its element. - if (StrTy->getNumElements() == 1) { - return ABIArgInfo::getDirect(); - } + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); // If we set CanBeFlattened to true, CodeGen will expand the struct to its // individual elements, which confuses the Clover OpenCL backend; therefore we @@ -7277,34 +7570,116 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const { return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } +ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, + unsigned &NumRegsLeft) const { + assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); + + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (isAggregateTypeForABI(Ty)) { + // Records with non-trivial destructors/copy-constructors should not be + // passed by value. + if (auto RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just pass a regular value. TODO: We + // could do reasonable-size multiple-element structs too, using getExpand(), + // though watch out for things like bitfields. + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyArgumentType(Ty); + } + + // Pack aggregates <= 8 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 64) { + unsigned NumRegs = (Size + 31) / 32; + NumRegsLeft -= std::min(NumRegsLeft, NumRegs); + + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + // XXX: Should this be i64 instead, and should the limit increase? + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (NumRegsLeft > 0) { + unsigned NumRegs = numRegsForType(Ty); + if (NumRegsLeft >= NumRegs) { + NumRegsLeft -= NumRegs; + return ABIArgInfo::getDirect(); + } + } + } + + // Otherwise just do the default thing. + ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); + if (!ArgInfo.isIndirect()) { + unsigned NumRegs = numRegsForType(Ty); + NumRegsLeft -= std::min(NumRegs, NumRegsLeft); + } + + return ArgInfo; +} + class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; unsigned getOpenCLKernelCallingConv() const override; llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; + + LangAS getASTAllocaAddressSpace() const override { + return getLangASFromTargetAS( + getABIInfo().getDataLayout().getAllocaAddrSpace()); + } + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const override; + llvm::Function * + createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *BlockInvokeFunc, + llvm::Value *BlockLiteral) const override; }; } -static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM); - void AMDGPUTargetCodeGenInfo::setTargetAttributes( - const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const { + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; llvm::Function *F = cast<llvm::Function>(GV); - if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) { - unsigned Min = Attr->getMin(); - unsigned Max = Attr->getMax(); + const auto *ReqdWGS = M.getLangOpts().OpenCL ? + FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; + const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); + if (ReqdWGS || FlatWGS) { + unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; + unsigned Max = FlatWGS ? FlatWGS->getMax() : 0; + if (ReqdWGS && Min == 0 && Max == 0) + Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); if (Min != 0) { assert(Min <= Max && "Min must be less than or equal Max"); @@ -7343,8 +7718,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } - - appendOpenCLVersionMD(M); } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { @@ -7369,6 +7742,49 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( llvm::ConstantPointerNull::get(NPT), PT); } +LangAS +AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + LangAS DefaultGlobalAS = getLangASFromTargetAS( + CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); + if (!D) + return DefaultGlobalAS; + + LangAS AddrSpace = D->getType().getAddressSpace(); + assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); + if (AddrSpace != LangAS::Default) + return AddrSpace; + + if (CGM.isTypeConstant(D->getType(), false)) { + if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) + return ConstAS.getValue(); + } + return DefaultGlobalAS; +} + +llvm::SyncScope::ID +AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const { + StringRef Name; + switch (S) { + case SyncScope::OpenCLWorkGroup: + Name = "workgroup"; + break; + case SyncScope::OpenCLDevice: + Name = "agent"; + break; + case SyncScope::OpenCLAllSVMDevices: + Name = ""; + break; + case SyncScope::OpenCLSubGroup: + Name = "subgroup"; + } + return C.getOrInsertSyncScopeID(Name); +} + //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. @@ -8015,45 +8431,18 @@ class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { public: SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} - void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; }; + } // End anonymous namespace. -/// Emit SPIR specific metadata: OpenCL and SPIR version. -void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { - llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); - llvm::Module &M = CGM.getModule(); - // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the - // opencl.spir.version named metadata. - llvm::Metadata *SPIRVerElts[] = { - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, (CGM.getLangOpts().OpenCLVersion / 100 > 1) ? 0 : 2))}; - llvm::NamedMDNode *SPIRVerMD = - M.getOrInsertNamedMetadata("opencl.spir.version"); - SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); - appendOpenCLVersionMD(CGM); -} - -static void appendOpenCLVersionMD(CodeGen::CodeGenModule &CGM) { - llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); - llvm::Module &M = CGM.getModule(); - // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the - // opencl.ocl.version named metadata node. - llvm::Metadata *OCLVerElts[] = { - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))}; - llvm::NamedMDNode *OCLVerMD = - M.getOrInsertNamedMetadata("opencl.ocl.version"); - OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); +namespace clang { +namespace CodeGen { +void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { + DefaultABIInfo SPIRABI(CGM.getTypes()); + SPIRABI.computeInfo(FI); +} +} } unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { @@ -8435,6 +8824,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; + else if (Triple.isOSWindows()) + return SetCGInfo( + new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIInfo::Win64)); return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); } @@ -8564,3 +8956,108 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); } } + +/// Create an OpenCL kernel for an enqueued block. +/// +/// The kernel has the same function type as the block invoke function. Its +/// name is the name of the block invoke function postfixed with "_kernel". +/// It simply calls the block invoke function then returns. +llvm::Function * +TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *Invoke, + llvm::Value *BlockLiteral) const { + auto *InvokeFT = Invoke->getFunctionType(); + llvm::SmallVector<llvm::Type *, 2> ArgTys; + for (auto &P : InvokeFT->params()) + ArgTys.push_back(P); + auto &C = CGF.getLLVMContext(); + std::string Name = Invoke->getName().str() + "_kernel"; + auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); + auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, + &CGF.CGM.getModule()); + auto IP = CGF.Builder.saveIP(); + auto *BB = llvm::BasicBlock::Create(C, "entry", F); + auto &Builder = CGF.Builder; + Builder.SetInsertPoint(BB); + llvm::SmallVector<llvm::Value *, 2> Args; + for (auto &A : F->args()) + Args.push_back(&A); + Builder.CreateCall(Invoke, Args); + Builder.CreateRetVoid(); + Builder.restoreIP(IP); + return F; +} + +/// Create an OpenCL kernel for an enqueued block. +/// +/// The type of the first argument (the block literal) is the struct type +/// of the block literal instead of a pointer type. The first argument +/// (block literal) is passed directly by value to the kernel. The kernel +/// allocates the same type of struct on stack and stores the block literal +/// to it and passes its pointer to the block invoke function. The kernel +/// has "enqueued-block" function attribute and kernel argument metadata. +llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( + CodeGenFunction &CGF, llvm::Function *Invoke, + llvm::Value *BlockLiteral) const { + auto &Builder = CGF.Builder; + auto &C = CGF.getLLVMContext(); + + auto *BlockTy = BlockLiteral->getType()->getPointerElementType(); + auto *InvokeFT = Invoke->getFunctionType(); + llvm::SmallVector<llvm::Type *, 2> ArgTys; + llvm::SmallVector<llvm::Metadata *, 8> AddressQuals; + llvm::SmallVector<llvm::Metadata *, 8> AccessQuals; + llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames; + llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames; + llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals; + llvm::SmallVector<llvm::Metadata *, 8> ArgNames; + + ArgTys.push_back(BlockTy); + ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); + AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0))); + ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); + ArgTypeQuals.push_back(llvm::MDString::get(C, "")); + AccessQuals.push_back(llvm::MDString::get(C, "none")); + ArgNames.push_back(llvm::MDString::get(C, "block_literal")); + for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { + ArgTys.push_back(InvokeFT->getParamType(I)); + ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); + AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); + AccessQuals.push_back(llvm::MDString::get(C, "none")); + ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); + ArgTypeQuals.push_back(llvm::MDString::get(C, "")); + ArgNames.push_back( + llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); + } + std::string Name = Invoke->getName().str() + "_kernel"; + auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); + auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, + &CGF.CGM.getModule()); + F->addFnAttr("enqueued-block"); + auto IP = CGF.Builder.saveIP(); + auto *BB = llvm::BasicBlock::Create(C, "entry", F); + Builder.SetInsertPoint(BB); + unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy); + auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); + BlockPtr->setAlignment(BlockAlign); + Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); + auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); + llvm::SmallVector<llvm::Value *, 2> Args; + Args.push_back(Cast); + for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I) + Args.push_back(I); + Builder.CreateCall(Invoke, Args); + Builder.CreateRetVoid(); + Builder.restoreIP(IP); + + F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); + F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); + F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); + F->setMetadata("kernel_arg_base_type", + llvm::MDNode::get(C, ArgBaseTypeNames)); + F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); + if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata) + F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); + + return F; +} diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h index 223d6d047a..d745e420c4 100644 --- a/lib/CodeGen/TargetInfo.h +++ b/lib/CodeGen/TargetInfo.h @@ -15,9 +15,11 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H #define LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H +#include "CodeGenModule.h" #include "CGValue.h" #include "clang/AST/Type.h" #include "clang/Basic/LLVM.h" +#include "clang/Basic/SyncScope.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -34,8 +36,8 @@ class Decl; namespace CodeGen { class ABIInfo; class CallArgList; -class CodeGenModule; class CodeGenFunction; +class CGBlockInfo; class CGFunctionInfo; /// TargetCodeGenInfo - This class organizes various target-specific @@ -55,7 +57,8 @@ public: /// setTargetAttributes - Provides a convenient hook to handle extra /// target-specific attributes for the given global. virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const {} + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const {} /// emitTargetMD - Provides a convenient hook to handle extra /// target-specific metadata for the given global. @@ -229,13 +232,71 @@ public: virtual llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const; + /// Get target favored AST address space of a global variable for languages + /// other than OpenCL and CUDA. + /// If \p D is nullptr, returns the default target favored address space + /// for global variable. + virtual LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const; + + /// Get the AST address space for alloca. + virtual LangAS getASTAllocaAddressSpace() const { return LangAS::Default; } + /// Perform address space cast of an expression of pointer type. /// \param V is the LLVM value to be casted to another address space. - /// \param SrcTy is the QualType of \p V. - /// \param DestTy is the destination QualType. + /// \param SrcAddr is the language address space of \p V. + /// \param DestAddr is the targeted language address space. + /// \param DestTy is the destination LLVM pointer type. + /// \param IsNonNull is the flag indicating \p V is known to be non null. virtual llvm::Value *performAddrSpaceCast(CodeGen::CodeGenFunction &CGF, - llvm::Value *V, QualType SrcTy, QualType DestTy) const; + llvm::Value *V, LangAS SrcAddr, + LangAS DestAddr, llvm::Type *DestTy, + bool IsNonNull = false) const; + + /// Perform address space cast of a constant expression of pointer type. + /// \param V is the LLVM constant to be casted to another address space. + /// \param SrcAddr is the language address space of \p V. + /// \param DestAddr is the targeted language address space. + /// \param DestTy is the destination LLVM pointer type. + virtual llvm::Constant *performAddrSpaceCast(CodeGenModule &CGM, + llvm::Constant *V, + LangAS SrcAddr, LangAS DestAddr, + llvm::Type *DestTy) const; + + /// Get the syncscope used in LLVM IR. + virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const; + + /// Inteface class for filling custom fields of a block literal for OpenCL. + class TargetOpenCLBlockHelper { + public: + typedef std::pair<llvm::Value *, StringRef> ValueTy; + TargetOpenCLBlockHelper() {} + virtual ~TargetOpenCLBlockHelper() {} + /// Get the custom field types for OpenCL blocks. + virtual llvm::SmallVector<llvm::Type *, 1> getCustomFieldTypes() = 0; + /// Get the custom field values for OpenCL blocks. + virtual llvm::SmallVector<ValueTy, 1> + getCustomFieldValues(CodeGenFunction &CGF, const CGBlockInfo &Info) = 0; + virtual bool areAllCustomFieldValuesConstant(const CGBlockInfo &Info) = 0; + /// Get the custom field values for OpenCL blocks if all values are LLVM + /// constants. + virtual llvm::SmallVector<llvm::Constant *, 1> + getCustomFieldValues(CodeGenModule &CGM, const CGBlockInfo &Info) = 0; + }; + virtual TargetOpenCLBlockHelper *getTargetOpenCLBlockHelper() const { + return nullptr; + } + /// Create an OpenCL kernel for an enqueued block. The kernel function is + /// a wrapper for the block invoke function with target-specific calling + /// convention and ABI as an OpenCL kernel. The wrapper function accepts + /// block context and block arguments in target-specific way and calls + /// the original block invoke function. + virtual llvm::Function * + createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *BlockInvokeFunc, + llvm::Value *BlockLiteral) const; }; } // namespace CodeGen |