diff options
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.cpp | 587 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.h | 43 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 30 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 57 | ||||
-rw-r--r-- | lib/CodeGen/CGStmtOpenMP.cpp | 239 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenFunction.h | 7 | ||||
-rw-r--r-- | test/OpenMP/critical_codegen.cpp | 4 | ||||
-rw-r--r-- | test/OpenMP/parallel_copyin_codegen.cpp | 16 | ||||
-rw-r--r-- | test/OpenMP/single_codegen.cpp | 10 | ||||
-rw-r--r-- | test/OpenMP/taskgroup_codegen.cpp | 1 |
10 files changed, 538 insertions, 456 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 810a2db3a5..75103db266 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -252,7 +252,7 @@ private: StringRef HelperName; }; -static void EmptyCodeGen(CodeGenFunction &) { +static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { llvm_unreachable("No codegen for expressions"); } /// \brief API for generation of expressions captured in a innermost OpenMP @@ -564,8 +564,33 @@ enum OpenMPRTLFunction { OMPRTL__tgt_unregister_lib, }; +/// A basic class for pre|post-action for advanced codegen sequence for OpenMP +/// region. +class CleanupTy final : public EHScopeStack::Cleanup { + PrePostActionTy *Action; + +public: + explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + Action->Exit(CGF); + } +}; + } // anonymous namespace +void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { + CodeGenFunction::RunCleanupsScope Scope(CGF); + if (PrePostAction) { + CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); + Callback(CodeGen, CGF, *PrePostAction); + } else { + PrePostActionTy Action; + Callback(CodeGen, CGF, Action); + } +} + LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), @@ -581,10 +606,7 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { // The point of exit cannot be a branch out of the structured block. // longjmp() and throw() must not violate the entry/exit criteria. CGF.EHStack.pushTerminate(); - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CodeGen(CGF); - } + CodeGen(CGF); CGF.EHStack.popTerminate(); } @@ -1644,12 +1666,10 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, // the condition and the dead arm of the if/else. bool CondConstant; if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { - CodeGenFunction::RunCleanupsScope Scope(CGF); - if (CondConstant) { + if (CondConstant) ThenGen(CGF); - } else { + else ElseGen(CGF); - } return; } @@ -1662,26 +1682,16 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, // Emit the 'then' code. CGF.EmitBlock(ThenBlock); - { - CodeGenFunction::RunCleanupsScope ThenScope(CGF); - ThenGen(CGF); - } + ThenGen(CGF); CGF.EmitBranch(ContBlock); // Emit the 'else' code if present. - { - // There is no need to emit line number for unconditional branch. - auto NL = ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(ElseBlock); - } - { - CodeGenFunction::RunCleanupsScope ThenScope(CGF); - ElseGen(CGF); - } - { - // There is no need to emit line number for unconditional branch. - auto NL = ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBranch(ContBlock); - } + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ElseBlock); + ElseGen(CGF); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBranch(ContBlock); // Emit the continuation block for code after the if. CGF.EmitBlock(ContBlock, /*IsFinished=*/true); } @@ -1693,8 +1703,8 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, if (!CGF.HaveInsertPoint()) return; auto *RTLoc = emitUpdateLocation(CGF, Loc); - auto &&ThenGen = [this, OutlinedFn, CapturedVars, - RTLoc](CodeGenFunction &CGF) { + RegionCodeGenTy ThenGen = [this, OutlinedFn, CapturedVars, + RTLoc](CodeGenFunction &CGF, PrePostActionTy &) { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); llvm::Value *Args[] = { RTLoc, @@ -1707,8 +1717,8 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; - auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, - Loc](CodeGenFunction &CGF) { + RegionCodeGenTy ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, + Loc](CodeGenFunction &CGF, PrePostActionTy &) { auto ThreadID = getThreadID(CGF, Loc); // Build calls: // __kmpc_serialized_parallel(&Loc, GTid); @@ -1733,12 +1743,10 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); }; - if (IfCond) { + if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); + else ThenGen(CGF); - } } // If we're inside an (outlined) parallel region, use the region info's @@ -1790,21 +1798,39 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { } namespace { -template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup { - llvm::Value *Callee; - llvm::Value *Args[N]; +/// Common pre(post)-action for different OpenMP constructs. +class CommonActionTy final : public PrePostActionTy { + llvm::Value *EnterCallee; + ArrayRef<llvm::Value *> EnterArgs; + llvm::Value *ExitCallee; + ArrayRef<llvm::Value *> ExitArgs; + bool Conditional; + llvm::BasicBlock *ContBlock = nullptr; public: - CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) - : Callee(Callee) { - assert(CleanupArgs.size() == N); - std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); + CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, + llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, + bool Conditional = false) + : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), + ExitArgs(ExitArgs), Conditional(Conditional) {} + void Enter(CodeGenFunction &CGF) override { + llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); + if (Conditional) { + llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); + auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); + ContBlock = CGF.createBasicBlock("omp_if.end"); + // Generate the branch (If-stmt) + CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); + CGF.EmitBlock(ThenBlock); + } } - - void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { - if (!CGF.HaveInsertPoint()) - return; - CGF.EmitRuntimeCall(Callee, Args); + void Done(CodeGenFunction &CGF) { + // Emit the rest of blocks/branches + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + void Exit(CodeGenFunction &CGF) override { + CGF.EmitRuntimeCall(ExitCallee, ExitArgs); } }; } // anonymous namespace @@ -1819,45 +1845,22 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, // Prepare arguments and build a call to __kmpc_critical if (!CGF.HaveInsertPoint()) return; - CodeGenFunction::RunCleanupsScope Scope(CGF); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), getCriticalRegionLock(CriticalName)}; + llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), + std::end(Args)); if (Hint) { - llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args), - std::end(Args)); - auto *HintVal = CGF.EmitScalarExpr(Hint); - ArgsWithHint.push_back( - CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false)); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint), - ArgsWithHint); - } else - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); - // Build a call to __kmpc_end_critical - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), - llvm::makeArrayRef(Args)); + EnterArgs.push_back(CGF.Builder.CreateIntCast( + CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); + } + CommonActionTy Action( + createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint + : OMPRTL__kmpc_critical), + EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); + CriticalOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); } -static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, - OpenMPDirectiveKind Kind, SourceLocation Loc, - const RegionCodeGenTy &BodyOpGen) { - llvm::Value *CallBool = CGF.EmitScalarConversion( - IfCond, - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), - CGF.getContext().BoolTy, Loc); - - auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); - auto *ContBlock = CGF.createBasicBlock("omp_if.end"); - // Generate the branch (If-stmt) - CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); - CGF.EmitBlock(ThenBlock); - CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); - // Emit the rest of bblocks/branches - CGF.EmitBranch(ContBlock); - CGF.EmitBlock(ContBlock, true); -} - void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) { @@ -1869,18 +1872,12 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, // } // Prepare arguments and build a call to __kmpc_master llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - auto *IsMaster = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); - typedef CallEndCleanup<std::extent<decltype(Args)>::value> - MasterCallEndCleanup; - emitIfStmt( - CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CGF.EHStack.pushCleanup<MasterCallEndCleanup>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), - llvm::makeArrayRef(Args)); - MasterOpGen(CGF); - }); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, + createRuntimeFunction(OMPRTL__kmpc_end_master), Args, + /*Conditional=*/true); + MasterOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_master, MasterOpGen); + Action.Done(CGF); } void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, @@ -1903,16 +1900,12 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, // TaskgroupOpGen(); // __kmpc_end_taskgroup(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_taskgroup - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); - // Build a call to __kmpc_end_taskgroup - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), - llvm::makeArrayRef(Args)); - emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); - } + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, + createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), + Args); + TaskgroupOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); } /// Given an array of pointers to variables, project the address of a @@ -2008,22 +2001,16 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, } // Prepare arguments and build a call to __kmpc_single llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - auto *IsSingle = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); - typedef CallEndCleanup<std::extent<decltype(Args)>::value> - SingleCallEndCleanup; - emitIfStmt( - CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CGF.EHStack.pushCleanup<SingleCallEndCleanup>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), - llvm::makeArrayRef(Args)); - SingleOpGen(CGF); - if (DidIt.isValid()) { - // did_it = 1; - CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); - } - }); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, + createRuntimeFunction(OMPRTL__kmpc_end_single), Args, + /*Conditional=*/true); + SingleOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_single, SingleOpGen); + if (DidIt.isValid()) { + // did_it = 1; + CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); + } + Action.Done(CGF); // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, // <copy_func>, did_it); if (DidIt.isValid()) { @@ -2073,14 +2060,14 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, // OrderedOpGen(); // __kmpc_end_ordered(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_ordered - CodeGenFunction::RunCleanupsScope Scope(CGF); if (IsThreads) { llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); - // Build a call to __kmpc_end_ordered - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), - llvm::makeArrayRef(Args)); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, + createRuntimeFunction(OMPRTL__kmpc_end_ordered), + Args); + OrderedOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); + return; } emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } @@ -2596,12 +2583,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { IdentInfo, C.CharTy); auto *UnRegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) { + CGM, ".omp_offloading.descriptor_unreg", + [&](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), Desc); }); auto *RegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) { + CGM, ".omp_offloading.descriptor_reg", + [&](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); @@ -3469,9 +3458,10 @@ void CGOpenMPRuntime::emitTaskCall( DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ThenCodeGen = [this, NumDependencies, - &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) { - // TODO: add check for untied tasks. + RegionCodeGenTy ThenCodeGen = [this, NumDependencies, &TaskArgs, + &DepTaskArgs](CodeGenFunction &CGF, + PrePostActionTy &) { + // TODO: add check for untied tasks. if (NumDependencies) { CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); @@ -3480,8 +3470,6 @@ void CGOpenMPRuntime::emitTaskCall( TaskArgs); } }; - typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> - IfCallEndCleanup; llvm::Value *DepWaitTaskArgs[6]; if (NumDependencies) { @@ -3493,7 +3481,8 @@ void CGOpenMPRuntime::emitTaskCall( DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, - NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) { + NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, + PrePostActionTy &) { CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 @@ -3502,28 +3491,29 @@ void CGOpenMPRuntime::emitTaskCall( if (NumDependencies) CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), DepWaitTaskArgs); + // Call proxy_task_entry(gtid, new_task); + RegionCodeGenTy CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( + CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; + CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); + }; + // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), - TaskArgs); // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); - CGF.EHStack.pushCleanup<IfCallEndCleanup>( - NormalAndEHCleanup, - createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), - llvm::makeArrayRef(TaskArgs)); - - // Call proxy_task_entry(gtid, new_task); - llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; - CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); + CommonActionTy Action( + createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, + createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); + CodeGen.setAction(Action); + CodeGen(CGF); }; - if (IfCond) { + if (IfCond) emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); + else ThenCodeGen(CGF); - } } /// \brief Emit reduction operation for each element of array (required for @@ -3714,6 +3704,25 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM, return Fn; } +static void emitSingleReductionCombiner(CodeGenFunction &CGF, + const Expr *ReductionOp, + const Expr *PrivateRef, + const DeclRefExpr *LHS, + const DeclRefExpr *RHS) { + if (PrivateRef->getType()->isArrayType()) { + // Emit reduction for array section. + auto *LHSVar = cast<VarDecl>(LHS->getDecl()); + auto *RHSVar = cast<VarDecl>(RHS->getDecl()); + EmitOMPAggregateReduction( + CGF, PrivateRef->getType(), LHSVar, RHSVar, + [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { + emitReductionCombiner(CGF, ReductionOp); + }); + } else + // Emit reduction for array subscript or single variable. + emitReductionCombiner(CGF, ReductionOp); +} + void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, @@ -3765,15 +3774,8 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - if ((*IPriv)->getType()->isArrayType()) { - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction( - CGF, (*IPriv)->getType(), LHSVar, RHSVar, - [=](CodeGenFunction &CGF, const Expr *, const Expr *, - const Expr *) { emitReductionCombiner(CGF, E); }); - } else - emitReductionCombiner(CGF, E); + emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), + cast<DeclRefExpr>(*IRHS)); ++IPriv; ++ILHS; ++IRHS; @@ -3863,40 +3865,32 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); CGF.EmitBlock(Case1BB); - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); - llvm::Value *EndArgs[] = { - IdentTLoc, // ident_t *<loc> - ThreadId, // i32 <gtid> - Lock // kmp_critical_name *&<lock> - }; - CGF.EHStack - .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( - NormalAndEHCleanup, - createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait - : OMPRTL__kmpc_end_reduce), - llvm::makeArrayRef(EndArgs)); + // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); + llvm::Value *EndArgs[] = { + IdentTLoc, // ident_t *<loc> + ThreadId, // i32 <gtid> + Lock // kmp_critical_name *&<lock> + }; + RegionCodeGenTy CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( + CodeGenFunction &CGF, PrePostActionTy &Action) { auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - if ((*IPriv)->getType()->isArrayType()) { - // Emit reduction for array section. - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction( - CGF, (*IPriv)->getType(), LHSVar, RHSVar, - [=](CodeGenFunction &CGF, const Expr *, const Expr *, - const Expr *) { emitReductionCombiner(CGF, E); }); - } else - // Emit reduction for array subscript or single variable. - emitReductionCombiner(CGF, E); + emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), + cast<DeclRefExpr>(*IRHS)); ++IPriv; ++ILHS; ++IRHS; } - } + }; + CommonActionTy Action( + nullptr, llvm::None, + createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait + : OMPRTL__kmpc_end_reduce), + EndArgs); + CodeGen.setAction(Action); + CodeGen(CGF); CGF.EmitBranch(DefaultBB); @@ -3909,106 +3903,111 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); CGF.EmitBlock(Case2BB); - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - if (!WithNowait) { - // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); - llvm::Value *EndArgs[] = { - IdentTLoc, // ident_t *<loc> - ThreadId, // i32 <gtid> - Lock // kmp_critical_name *&<lock> - }; - CGF.EHStack - .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( - NormalAndEHCleanup, - createRuntimeFunction(OMPRTL__kmpc_end_reduce), - llvm::makeArrayRef(EndArgs)); - } + RegionCodeGenTy AtomicCodeGen = [this, Loc, &Privates, &LHSExprs, &RHSExprs, + &ReductionOps](CodeGenFunction &CGF, + PrePostActionTy &Action) { auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); auto IPriv = Privates.begin(); for (auto *E : ReductionOps) { - const Expr *XExpr = nullptr; - const Expr *EExpr = nullptr; - const Expr *UpExpr = nullptr; - BinaryOperatorKind BO = BO_Comma; - if (auto *BO = dyn_cast<BinaryOperator>(E)) { - if (BO->getOpcode() == BO_Assign) { - XExpr = BO->getLHS(); - UpExpr = BO->getRHS(); - } + const Expr *XExpr = nullptr; + const Expr *EExpr = nullptr; + const Expr *UpExpr = nullptr; + BinaryOperatorKind BO = BO_Comma; + if (auto *BO = dyn_cast<BinaryOperator>(E)) { + if (BO->getOpcode() == BO_Assign) { + XExpr = BO->getLHS(); + UpExpr = BO->getRHS(); } - // Try to emit update expression as a simple atomic. - auto *RHSExpr = UpExpr; - if (RHSExpr) { - // Analyze RHS part of the whole expression. - if (auto *ACO = dyn_cast<AbstractConditionalOperator>( - RHSExpr->IgnoreParenImpCasts())) { - // If this is a conditional operator, analyze its condition for - // min/max reduction operator. - RHSExpr = ACO->getCond(); - } - if (auto *BORHS = - dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { - EExpr = BORHS->getRHS(); - BO = BORHS->getOpcode(); - } + } + // Try to emit update expression as a simple atomic. + auto *RHSExpr = UpExpr; + if (RHSExpr) { + // Analyze RHS part of the whole expression. + if (auto *ACO = dyn_cast<AbstractConditionalOperator>( + RHSExpr->IgnoreParenImpCasts())) { + // If this is a conditional operator, analyze its condition for + // min/max reduction operator. + RHSExpr = ACO->getCond(); } - if (XExpr) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto &&AtomicRedGen = [this, BO, VD, IPriv, - Loc](CodeGenFunction &CGF, const Expr *XExpr, - const Expr *EExpr, const Expr *UpExpr) { - LValue X = CGF.EmitLValue(XExpr); - RValue E; - if (EExpr) - E = CGF.EmitAnyExpr(EExpr); - CGF.EmitOMPAtomicSimpleUpdateExpr( - X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, - [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate( - VD, [&CGF, VD, XRValue, Loc]() -> Address { - Address LHSTemp = CGF.CreateMemTemp(VD->getType()); - CGF.emitOMPSimpleStore( - CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, - VD->getType().getNonReferenceType(), Loc); - return LHSTemp; - }); - (void)PrivateScope.Privatize(); - return CGF.EmitAnyExpr(UpExpr); - }); - }; - if ((*IPriv)->getType()->isArrayType()) { - // Emit atomic reduction for array section. - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, - AtomicRedGen, XExpr, EExpr, UpExpr); - } else - // Emit atomic reduction for array subscript or single variable. - AtomicRedGen(CGF, XExpr, EExpr, UpExpr); - } else { - // Emit as a critical region. - auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *, - const Expr *, const Expr *) { - emitCriticalRegion( - CGF, ".atomic_reduction", - [=](CodeGenFunction &CGF) { emitReductionCombiner(CGF, E); }, - Loc); - }; - if ((*IPriv)->getType()->isArrayType()) { - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, - CritRedGen); - } else - CritRedGen(CGF, nullptr, nullptr, nullptr); + if (auto *BORHS = + dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { + EExpr = BORHS->getRHS(); + BO = BORHS->getOpcode(); } + } + if (XExpr) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto &&AtomicRedGen = [this, BO, VD, IPriv, + Loc](CodeGenFunction &CGF, const Expr *XExpr, + const Expr *EExpr, const Expr *UpExpr) { + LValue X = CGF.EmitLValue(XExpr); + RValue E; + if (EExpr) + E = CGF.EmitAnyExpr(EExpr); + CGF.EmitOMPAtomicSimpleUpdateExpr( + X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, + [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate( + VD, [&CGF, VD, XRValue, Loc]() -> Address { + Address LHSTemp = CGF.CreateMemTemp(VD->getType()); + CGF.emitOMPSimpleStore( + CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, + VD->getType().getNonReferenceType(), Loc); + return LHSTemp; + }); + (void)PrivateScope.Privatize(); + return CGF.EmitAnyExpr(UpExpr); + }); + }; + if ((*IPriv)->getType()->isArrayType()) { + // Emit atomic reduction for array section. + auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, + AtomicRedGen, XExpr, EExpr, UpExpr); + } else + // Emit atomic reduction for array subscript or single variable. + AtomicRedGen(CGF, XExpr, EExpr, UpExpr); + } else { + // Emit as a critical region. + auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *, + const Expr *, const Expr *) { + emitCriticalRegion( + CGF, ".atomic_reduction", + [=](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + emitReductionCombiner(CGF, E); + }, + Loc); + }; + if ((*IPriv)->getType()->isArrayType()) { + auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, + CritRedGen); + } else + CritRedGen(CGF, nullptr, nullptr, nullptr); + } ++ILHS; ++IRHS; ++IPriv; } - } + }; + if (!WithNowait) { + // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); + llvm::Value *EndArgs[] = { + IdentTLoc, // ident_t *<loc> + ThreadId, // i32 <gtid> + Lock // kmp_critical_name *&<lock> + }; + CommonActionTy Action(nullptr, llvm::None, + createRuntimeFunction(OMPRTL__kmpc_end_reduce), + EndArgs); + AtomicCodeGen.setAction(Action); + AtomicCodeGen(CGF); + } else + AtomicCodeGen(CGF); CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); @@ -4105,8 +4104,8 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, // kmp_int32 cncl_kind); if (auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - auto &&ThenGen = [this, Loc, CancelRegion, - OMPRegionInfo](CodeGenFunction &CGF) { + RegionCodeGenTy ThenGen = [this, Loc, CancelRegion, OMPRegionInfo]( + CodeGenFunction &CGF, PrePostActionTy &) { llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; @@ -4131,7 +4130,8 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; if (IfCond) - emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {}); + emitOMPIfClause(CGF, IfCond, ThenGen, + [](CodeGenFunction &, PrePostActionTy &) {}); else ThenGen(CGF); } @@ -4167,21 +4167,9 @@ static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { assert(!ParentName.empty() && "Invalid target region parent name!"); - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - - // Emit target region as a standalone region. - auto &&CodeGen = [&CS, &D](CodeGenFunction &CGF) { - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - (void)CGF.EmitOMPFirstprivateClause(D, PrivateScope); - CGF.EmitOMPPrivateClause(D, PrivateScope); - (void)PrivateScope.Privatize(); - - CGF.EmitStmt(CS.getCapturedStmt()); - }; - emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); } @@ -4471,9 +4459,10 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, OffloadError); // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, - hasVLACaptures, Device, OutlinedFnID, OffloadError, - OffloadErrorQType, &D](CodeGenFunction &CGF) { + RegionCodeGenTy ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, + &MapTypes, hasVLACaptures, Device, OutlinedFnID, + OffloadError, OffloadErrorQType, + &D](CodeGenFunction &CGF, PrePostActionTy &) { unsigned PointerNumVal = BasePointers.size(); llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); llvm::Value *BasePointersArray; @@ -4646,8 +4635,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, }; // Notify that the host version must be executed. - auto &&ElseGen = [this, OffloadError, - OffloadErrorQType](CodeGenFunction &CGF) { + RegionCodeGenTy ElseGen = [this, OffloadError, OffloadErrorQType]( + CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), OffloadError); }; @@ -4657,16 +4646,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // regardless of the conditional in the if clause if, e.g., the user do not // specify target triples. if (OutlinedFnID) { - if (IfCond) { + if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); + else ThenGen(CGF); - } - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); + } else ElseGen(CGF); - } // Check the error code and execute the host version if required. auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); @@ -4708,8 +4693,10 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, llvm::Function *Fn; llvm::Constant *Addr; - emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, - /*isOffloadEntry=*/true); + std::tie(Fn, Addr) = + CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( + CGM, cast<OMPTargetDirective>(*E), ParentName, + /*isOffloadEntry=*/true); assert(Fn && Addr && "Target region emission failed."); return; } diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 1935e93e20..0923d53618 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -46,7 +46,44 @@ class Address; class CodeGenFunction; class CodeGenModule; -typedef llvm::function_ref<void(CodeGenFunction &)> RegionCodeGenTy; +/// A basic class for pre|post-action for advanced codegen sequence for OpenMP +/// region. +class PrePostActionTy { +public: + explicit PrePostActionTy() {} + virtual void Enter(CodeGenFunction &CGF) {} + virtual void Exit(CodeGenFunction &CGF) {} + virtual ~PrePostActionTy() {} +}; + +/// Class provides a way to call simple version of codegen for OpenMP region, or +/// an advanced with possible pre|post-actions in codegen. +class RegionCodeGenTy final { + intptr_t CodeGen; + typedef void (*CodeGenTy)(intptr_t, CodeGenFunction &, PrePostActionTy &); + CodeGenTy Callback; + mutable PrePostActionTy *PrePostAction; + RegionCodeGenTy() = delete; + RegionCodeGenTy &operator=(const RegionCodeGenTy &) = delete; + template <typename Callable> + static void CallbackFn(intptr_t CodeGen, CodeGenFunction &CGF, + PrePostActionTy &Action) { + return (*reinterpret_cast<Callable *>(CodeGen))(CGF, Action); + } + +public: + template <typename Callable> + RegionCodeGenTy( + Callable &&CodeGen, + typename std::enable_if< + !std::is_same<typename std::remove_reference<Callable>::type, + RegionCodeGenTy>::value>::type * = nullptr) + : CodeGen(reinterpret_cast<intptr_t>(&CodeGen)), + Callback(CallbackFn<typename std::remove_reference<Callable>::type>), + PrePostAction(nullptr) {} + void setAction(PrePostActionTy &Action) const { PrePostAction = &Action; } + void operator()(CodeGenFunction &CGF) const; +}; class CGOpenMPRuntime { protected: @@ -810,13 +847,15 @@ public: /// \param OutlinedFn Outlined function value to be defined by this call. /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. + /// \param CodeGen Code generation sequence for the \a D directive. /// An oulined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry); + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen); /// \brief Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 3a634ac308..3b2b30cdb7 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -305,28 +305,32 @@ void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID, void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { if (!IsOffloadEntry) // Nothing to do. return; assert(!ParentName.empty() && "Invalid target region parent name!"); - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - EntryFunctionState EST; WorkerFunctionState WST(CGM); // Emit target region as a standalone region. - auto &&CodeGen = [&EST, &WST, &CS, &D, this](CodeGenFunction &CGF) { - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - (void)CGF.EmitOMPFirstprivateClause(D, PrivateScope); - CGF.EmitOMPPrivateClause(D, PrivateScope); - (void)PrivateScope.Privatize(); - - emitEntryHeader(CGF, EST, WST); - CGF.EmitStmt(CS.getCapturedStmt()); - emitEntryFooter(CGF, EST); - }; + class NVPTXPrePostActionTy : public PrePostActionTy { + CGOpenMPRuntimeNVPTX &RT; + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST; + CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST; + + public: + NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT, + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, + CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST) + : RT(RT), EST(EST), WST(WST) {} + void Enter(CodeGenFunction &CGF) override { + RT.emitEntryHeader(CGF, EST, WST); + } + void Exit(CodeGenFunction &CGF) override { RT.emitEntryFooter(CGF, EST); } + } Action(*this, EST, WST); + CodeGen.setAction(Action); emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index b52bae04e0..6ef3d3c08b 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -24,6 +24,34 @@ namespace clang { namespace CodeGen { class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { +public: + class EntryFunctionState { + public: + llvm::BasicBlock *ExitBB; + + EntryFunctionState() : ExitBB(nullptr){}; + }; + + class WorkerFunctionState { + public: + llvm::Function *WorkerFn; + const CGFunctionInfo *CGFI; + + WorkerFunctionState(CodeGenModule &CGM); + + private: + void createWorkerFunction(CodeGenModule &CGM); + }; + + /// \brief Helper for target entry function. Guide the master and worker + /// threads to their respective locations. + void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, + WorkerFunctionState &WST); + + /// \brief Signal termination of OMP execution. + void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); + +private: // // NVPTX calls. // @@ -66,24 +94,6 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { // Outlined function for the workers to execute. llvm::GlobalVariable *WorkID; - class EntryFunctionState { - public: - llvm::BasicBlock *ExitBB; - - EntryFunctionState() : ExitBB(nullptr){}; - }; - - class WorkerFunctionState { - public: - llvm::Function *WorkerFn; - const CGFunctionInfo *CGFI; - - WorkerFunctionState(CodeGenModule &CGM); - - private: - void createWorkerFunction(CodeGenModule &CGM); - }; - /// \brief Initialize master-worker control state. void initializeEnvironment(); @@ -93,14 +103,6 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { /// \brief Helper for worker function. Emit body of worker loop. void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST); - /// \brief Helper for target entry function. Guide the master and worker - /// threads to their respective locations. - void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, - WorkerFunctionState &WST); - - /// \brief Signal termination of OMP execution. - void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); - /// \brief Returns specified OpenMP runtime function for the current OpenMP /// implementation. Specialized for the NVPTX device. /// \param Function OpenMP runtime function. @@ -129,7 +131,8 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry) override; + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen) override; public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 30385cd8b9..87c9b3b4d1 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -26,8 +26,7 @@ using namespace CodeGen; namespace { /// Lexical scope for OpenMP executable constructs, that handles correct codegen /// for captured expressions. -class OMPLexicalScope { - CodeGenFunction::LexicalScope Scope; +class OMPLexicalScope : public CodeGenFunction::LexicalScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { for (const auto *C : S.clauses()) { if (auto *CPI = OMPClauseWithPreInit::get(C)) { @@ -48,10 +47,11 @@ class OMPLexicalScope { public: OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) - : Scope(CGF, S.getSourceRange()) { + : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()) { emitPreInitStmt(CGF, S); } }; + } // namespace llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { @@ -1097,8 +1097,6 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - llvm::SmallVector<llvm::Value *, 16> CapturedVars; - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). emitParallelOrTeamsOutlinedFunction(S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); @@ -1110,7 +1108,7 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, CGF, NumThreads, NumThreadsClause->getLocStart()); } if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { - CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); + CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); CGF.CGM.getOpenMPRuntime().emitProcBindClause( CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); } @@ -1122,14 +1120,17 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, break; } } + + OMPLexicalScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars, IfCond); } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { - OMPLexicalScope Scope(*this, S); // Emit parallel region as a standalone region. - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPPrivateScope PrivateScope(CGF); bool Copyins = CGF.EmitOMPCopyinClause(S); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); @@ -1465,7 +1466,7 @@ void CodeGenFunction::EmitOMPSimdFinal( } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { // if (PreCond) { // for (IV in 0..LastIteration) BODY; // <Final counter/linear vars updates>; @@ -1508,7 +1509,6 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { emitAlignedClause(CGF, S); CGF.EmitOMPLinearClauseInit(S); - bool HasLastprivateClause; { OMPPrivateScope LoopScope(CGF); emitPrivateLoopCounters(CGF, LoopScope, S.counters(), @@ -1516,7 +1516,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { emitPrivateLinearVars(CGF, S, LoopScope); CGF.EmitOMPPrivateClause(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); - HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); + bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), @@ -1526,9 +1526,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { }, [](CodeGenFunction &) {}); // Emit final copy of the lastprivate variables at the end of loops. - if (HasLastprivateClause) { + if (HasLastprivateClause) CGF.EmitOMPLastprivateClauseFinal(S); - } CGF.EmitOMPReductionClauseFinal(S); emitPostUpdateForReductionClause( CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); @@ -1543,6 +1542,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGF.EmitBlock(ContBlock, true); } }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } @@ -1928,11 +1928,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; + auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { + HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + }; { OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); - }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, S.hasCancel()); } @@ -1945,11 +1946,12 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { bool HasLastprivates = false; + auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { + HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + }; { OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); - }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } @@ -1972,7 +1974,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); auto *CS = dyn_cast<CompoundStmt>(Stmt); bool HasLastprivates = false; - auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { auto &C = CGF.CGM.getContext(); auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Emit helper vars inits. @@ -2112,10 +2115,10 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, S.hasCancel()); } @@ -2137,17 +2140,17 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { AssignmentOps.append(C->assignment_ops().begin(), C->assignment_ops().end()); } + // Emit code for 'single' region along with 'copyprivate' clauses + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope SingleScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); + CGF.EmitOMPPrivateClause(S, SingleScope); + (void)SingleScope.Privatize(); + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }; { OMPLexicalScope Scope(*this, S); - // Emit code for 'single' region along with 'copyprivate' clauses - auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CodeGenFunction::OMPPrivateScope SingleScope(CGF); - (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); - CGF.EmitOMPPrivateClause(S, SingleScope); - (void)SingleScope.Privatize(); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }; CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), CopyprivateVars, DestExprs, SrcExprs, AssignmentOps); @@ -2162,21 +2165,23 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { } void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; Expr *Hint = nullptr; if (auto *HintClause = S.getSingleClause<OMPHintClause>()) Hint = HintClause->getHint(); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitCriticalRegion(*this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart(), Hint); @@ -2186,8 +2191,7 @@ void CodeGenFunction::EmitOMPParallelForDirective( const OMPParallelForDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitOMPWorksharingLoop(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); @@ -2197,8 +2201,7 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( const OMPParallelForSimdDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitOMPWorksharingLoop(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); @@ -2208,14 +2211,14 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( const OMPParallelSectionsDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'sections' directive. - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitSections(S); }; + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitSections(S); + }; emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); } void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Emit outlined function for task construct. - OMPLexicalScope Scope(*this, S); auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); auto CapturedStruct = GenerateCapturedStmtArgument(*CS); auto *I = CS->getCapturedDecl()->param_begin(); @@ -2265,46 +2268,47 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { } } auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( - CodeGenFunction &CGF) { + CodeGenFunction &CGF, PrePostActionTy &) { // Set proper addresses for generated private copies. auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); - OMPPrivateScope Scope(CGF); - if (!PrivateVars.empty() || !FirstprivateVars.empty()) { - auto *CopyFn = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); - auto *PrivatesPtr = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); - // Map privates. - llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> - PrivatePtrs; - llvm::SmallVector<llvm::Value *, 16> CallArgs; - CallArgs.push_back(PrivatesPtr); - for (auto *E : PrivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - Address PrivatePtr = - CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); - CallArgs.push_back(PrivatePtr.getPointer()); - } - for (auto *E : FirstprivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - Address PrivatePtr = - CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); - CallArgs.push_back(PrivatePtr.getPointer()); + { + OMPPrivateScope Scope(CGF); + if (!PrivateVars.empty() || !FirstprivateVars.empty()) { + auto *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); + auto *PrivatesPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); + // Map privates. + llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; + llvm::SmallVector<llvm::Value *, 16> CallArgs; + CallArgs.push_back(PrivatesPtr); + for (auto *E : PrivateVars) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + for (auto *E : FirstprivateVars) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + CGF.EmitRuntimeCall(CopyFn, CallArgs); + for (auto &&Pair : PrivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } } - CGF.EmitRuntimeCall(CopyFn, CallArgs); - for (auto &&Pair : PrivatePtrs) { - Address Replacement(CGF.Builder.CreateLoad(Pair.second), - CGF.getContext().getDeclAlign(Pair.first)); - Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + (void)Scope.Privatize(); + if (*PartId) { + // TODO: emit code for untied tasks. } + CGF.EmitStmt(CS->getCapturedStmt()); } - (void)Scope.Privatize(); - if (*PartId) { - // TODO: emit code for untied tasks. - } - CGF.EmitStmt(CS->getCapturedStmt()); }; auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, OMPD_task, CodeGen); @@ -2334,6 +2338,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { break; } } + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitTaskCall( *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, @@ -2355,10 +2360,11 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { void CodeGenFunction::EmitOMPTaskgroupDirective( const OMPTaskgroupDirective &S) { - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); } @@ -2490,10 +2496,10 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitOMPDistributeLoop(S); }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, false); } @@ -2511,9 +2517,9 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { if (!S.getAssociatedStmt()) return; - OMPLexicalScope Scope(*this, S); auto *C = S.getSingleClause<OMPSIMDClause>(); - auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, + PrePostActionTy &Action) { if (C) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); llvm::SmallVector<llvm::Value *, 16> CapturedVars; @@ -2521,10 +2527,12 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); } else { + Action.Enter(CGF); CGF.EmitStmt( cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); } }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); } @@ -2970,18 +2978,39 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { } } - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, + PrePostActionTy &) { CGF.EmitStopPoint(CS); EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), S.getV(), S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); } +std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/> +CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( + CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName, + bool IsOffloadEntry) { + llvm::Function *OutlinedFn = nullptr; + llvm::Constant *OutlinedFnID = nullptr; + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + (void)PrivateScope.Privatize(); + + Action.Enter(CGF); + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); + return std::make_pair(OutlinedFn, OutlinedFnID); +} + void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { - OMPLexicalScope Scope(*this, S); const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); llvm::SmallVector<llvm::Value *, 16> CapturedVars; @@ -3027,9 +3056,9 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { ParentName = CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); - CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, - IsOffloadEntry); - + std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction( + CGM, S, ParentName, IsOffloadEntry); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, CapturedVars); } @@ -3039,8 +3068,6 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - llvm::SmallVector<llvm::Value *, 16> CapturedVars; - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). emitParallelOrTeamsOutlinedFunction(S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); @@ -3063,14 +3090,16 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, ThreadLimitVal, S.getLocStart()); } + OMPLexicalScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, CapturedVars); } void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); // Emit parallel region as a standalone region. - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); @@ -3112,10 +3141,12 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { // emit the code inside the construct for now - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_data, - [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); + *this, OMPD_target_data, [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); } void CodeGenFunction::EmitOMPTargetEnterDataDirective( @@ -3140,18 +3171,22 @@ void CodeGenFunction::EmitOMPTargetParallelForDirective( void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { // emit the code inside the construct for now - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_taskloop, - [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); + *this, OMPD_taskloop, [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); } void CodeGenFunction::EmitOMPTaskLoopSimdDirective( const OMPTaskLoopSimdDirective &S) { // emit the code inside the construct for now - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_taskloop_simd, - [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); + *this, OMPD_taskloop_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 85d5f7a432..c4e5ecd900 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2366,6 +2366,13 @@ public: void EmitOMPDistributeDirective(const OMPDistributeDirective &S); void EmitOMPDistributeLoop(const OMPDistributeDirective &S); + /// Emit outlined function for the target directive. + static std::pair<llvm::Function * /*OutlinedFn*/, + llvm::Constant * /*OutlinedFnID*/> + EmitOMPTargetDirectiveOutlinedFunction(CodeGenModule &CGM, + const OMPTargetDirective &S, + StringRef ParentName, + bool IsOffloadEntry); /// \brief Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. diff --git a/test/OpenMP/critical_codegen.cpp b/test/OpenMP/critical_codegen.cpp index e44e2202e9..aad60cfd4f 100644 --- a/test/OpenMP/critical_codegen.cpp +++ b/test/OpenMP/critical_codegen.cpp @@ -39,7 +39,11 @@ int main() { #pragma omp critical(the_name1) hint(23) foo(); // CHECK: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) +// CHECK: br label // CHECK-NOT: call {{.*}}void @__kmpc_end_critical( +// CHECK: br label +// CHECK-NOT: call {{.*}}void @__kmpc_end_critical( +// CHECK: br label if (a) #pragma omp critical(the_name) while (1) diff --git a/test/OpenMP/parallel_copyin_codegen.cpp b/test/OpenMP/parallel_copyin_codegen.cpp index ff76cfe4dd..49e7b3fd61 100644 --- a/test/OpenMP/parallel_copyin_codegen.cpp +++ b/test/OpenMP/parallel_copyin_codegen.cpp @@ -87,10 +87,6 @@ int main() { // TLS-LAMBDA: [[G_CPY_VAL:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR:@.+]]() // TLS-LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G_CPY_VAL]]) - // TLS-LAMBDA: define {{.*}}i{{[0-9]+}}* [[G_CTOR]]() - // TLS-LAMBDA: ret i{{[0-9]+}}* [[G]] - // TLS-LAMBDA: } - #pragma omp parallel copyin(g) { // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) @@ -122,6 +118,11 @@ int main() { g = 1; // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* // TLS-LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* + + // TLS-LAMBDA: define {{.*}}i{{[0-9]+}}* [[G_CTOR]]() + // TLS-LAMBDA: ret i{{[0-9]+}}* [[G]] + // TLS-LAMBDA: } + [&]() { // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], @@ -149,9 +150,6 @@ int main() { // TLS-BLOCKS: [[G_CPY_VAL:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR:@.+]]() // TLS-BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G_CPY_VAL]]) - // TLS-BLOCKS: define {{.*}}i{{[0-9]+}}* [[G_CTOR]]() - // TLS-BLOCKS: ret i{{[0-9]+}}* [[G]] - // TLS-BLOCKS: } #pragma omp parallel copyin(g) { // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) @@ -189,6 +187,10 @@ int main() { // TLS-BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_CAPTURE_DST]] // TLS-BLOCKS-NOT: [[G]]{{[[^:word:]]}} // TLS-BLOCKS: call {{.*}}void {{%.+}}(i8 + + // TLS-BLOCKS: define {{.*}}i{{[0-9]+}}* [[G_CTOR]]() + // TLS-BLOCKS: ret i{{[0-9]+}}* [[G]] + // TLS-BLOCKS: } ^{ // BLOCKS: define {{.+}} void {{@.+}}(i8* // TLS-BLOCKS: define {{.+}} void {{@.+}}(i8* diff --git a/test/OpenMP/single_codegen.cpp b/test/OpenMP/single_codegen.cpp index 58c5e82c16..ca1f80245d 100644 --- a/test/OpenMP/single_codegen.cpp +++ b/test/OpenMP/single_codegen.cpp @@ -111,8 +111,8 @@ int main() { // CHECK-NEXT: invoke void [[FOO]]() // CHECK: to label {{%?}}[[CONT:.+]] unwind // CHECK: [[CONT]] -// CHECK: store i32 1, i32* [[DID_IT]] // CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: store i32 1, i32* [[DID_IT]] // CHECK-NEXT: br label {{%?}}[[EXIT]] // CHECK: [[EXIT]] // CHECK: [[A_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 @@ -255,8 +255,8 @@ void array_func(int n, int a[n], St s[2]) { // CHECK-LABEL: invoke void @_ZZN2SSC1ERiENKUlvE_clEv( // CHECK-SAME: [[CAP_TY]]* [[CAP]]) -// CHECK: store i32 1, i32* [[DID_IT]], // CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) +// CHECK: store i32 1, i32* [[DID_IT]], // CHECK: br label // CHECK: call void @__kmpc_end_single(%{{.+}}* @{{.+}}, i32 %{{.+}}) @@ -334,8 +334,8 @@ void array_func(int n, int a[n], St s[2]) { // CHECK-NEXT: load i32, i32* % // CHECK-NEXT: sdiv i32 %{{.+}}, 1 // CHECK-NEXT: store i32 % -// CHECK-NEXT: store i32 1, i32* [[DID_IT]], // CHECK-NEXT: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) +// CHECK-NEXT: store i32 1, i32* [[DID_IT]], // CHECK-NEXT: br label // CHECK: getelementptr inbounds [3 x i8*], [3 x i8*]* [[LIST:%.+]], i64 0, i64 0 @@ -376,8 +376,8 @@ void array_func(int n, int a[n], St s[2]) { // CHECK-NEXT: store double* % // CHECK-LABEL: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv( -// CHECK: store i32 1, i32* [[DID_IT]], -// CHECK-NEXT: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) +// CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) +// CHECK-NEXT: store i32 1, i32* [[DID_IT]], // CHECK-NEXT: br label // CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) diff --git a/test/OpenMP/taskgroup_codegen.cpp b/test/OpenMP/taskgroup_codegen.cpp index d1bc2aafc7..0f6e81b3ba 100644 --- a/test/OpenMP/taskgroup_codegen.cpp +++ b/test/OpenMP/taskgroup_codegen.cpp @@ -32,6 +32,7 @@ int main() { foo(); // CHECK-NOT: call {{.*}}void @__kmpc_taskgroup // CHECK-NOT: call {{.*}}void @__kmpc_end_taskgroup +// CHECK: ret return a; } |