summaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp1443
1 files changed, 1123 insertions, 320 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 20eb0b29f4..c3f60d7f60 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1,9 +1,8 @@
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,7 +21,6 @@
#include "clang/Basic/BitmaskEnum.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
@@ -432,7 +430,7 @@ public:
/// Values for bit flags used in the ident_t to describe the fields.
/// All enumeric elements are named and described in accordance with the code
-/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
+/// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
enum OpenMPLocationFlags : unsigned {
/// Use trampoline for internal microtask.
OMP_IDENT_IMD = 0x01,
@@ -461,7 +459,7 @@ enum OpenMPLocationFlags : unsigned {
/// Describes ident structure that describes a source location.
/// All descriptions are taken from
-/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
+/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
/// Original structure:
/// typedef struct ident {
/// kmp_int32 reserved_1; /**< might be used in Fortran;
@@ -669,6 +667,10 @@ enum OpenMPRTLFunction {
// Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
// *d);
OMPRTL__kmpc_task_reduction_get_th_data,
+ // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
+ OMPRTL__kmpc_alloc,
+ // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
+ OMPRTL__kmpc_free,
//
// Offloading related calls
@@ -1340,7 +1342,7 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
return UDRMap.lookup(D);
}
-static llvm::Value *emitParallelOrTeamsOutlinedFunction(
+static llvm::Function *emitParallelOrTeamsOutlinedFunction(
CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
@@ -1370,7 +1372,7 @@ static llvm::Value *emitParallelOrTeamsOutlinedFunction(
return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
}
-llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
+llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
@@ -1378,7 +1380,7 @@ llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
}
-llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
+llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
@@ -1386,7 +1388,7 @@ llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
}
-llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
+llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
@@ -1417,7 +1419,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
InnermostKind,
TD ? TD->hasCancel() : false, Action);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS);
+ llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
if (!Tied)
NumberOfParts = Action.getNumberOfParts();
return Res;
@@ -1478,7 +1480,7 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
// Initialize default location for psource field of ident_t structure of
// all ident_t objects. Format is ";file;function;line;column;;".
// Taken from
- // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
+ // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
DefaultOpenMPPSource =
CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
DefaultOpenMPPSource =
@@ -1665,9 +1667,8 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
return llvm::PointerType::getUnqual(Kmpc_MicroTy);
}
-llvm::Constant *
-CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
- llvm::Constant *RTLFn = nullptr;
+llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
+ llvm::FunctionCallee RTLFn = nullptr;
switch (static_cast<OpenMPRTLFunction>(Function)) {
case OMPRTL__kmpc_fork_call: {
// Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
@@ -1677,6 +1678,22 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
+ if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
+ if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
+ llvm::LLVMContext &Ctx = F->getContext();
+ llvm::MDBuilder MDB(Ctx);
+ // Annotate the callback behavior of the __kmpc_fork_call:
+ // - The callback callee is argument number 2 (microtask).
+ // - The first two arguments of the callback callee are unknown (-1).
+ // - All variadic arguments to the __kmpc_fork_call are passed to the
+ // callback callee.
+ F->addMetadata(
+ llvm::LLVMContext::MD_callback,
+ *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
+ 2, {-1, -1},
+ /* VarArgsArePassed */ true)}));
+ }
+ }
break;
}
case OMPRTL__kmpc_global_thread_num: {
@@ -2084,6 +2101,22 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
+ if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
+ if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
+ llvm::LLVMContext &Ctx = F->getContext();
+ llvm::MDBuilder MDB(Ctx);
+ // Annotate the callback behavior of the __kmpc_fork_teams:
+ // - The callback callee is argument number 2 (microtask).
+ // - The first two arguments of the callback callee are unknown (-1).
+ // - All variadic arguments to the __kmpc_fork_teams are passed to the
+ // callback callee.
+ F->addMetadata(
+ llvm::LLVMContext::MD_callback,
+ *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
+ 2, {-1, -1},
+ /* VarArgsArePassed */ true)}));
+ }
+ }
break;
}
case OMPRTL__kmpc_taskloop: {
@@ -2166,6 +2199,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
break;
}
+ case OMPRTL__kmpc_alloc: {
+ // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
+ // al); omp_allocator_handle_t type is void *.
+ llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
+ break;
+ }
+ case OMPRTL__kmpc_free: {
+ // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
+ // al); omp_allocator_handle_t type is void *.
+ llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
+ break;
+ }
case OMPRTL__kmpc_push_target_tripcount: {
// Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
// size);
@@ -2355,8 +2406,8 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
return RTLFn;
}
-llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
- bool IVSigned) {
+llvm::FunctionCallee
+CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
@@ -2381,8 +2432,8 @@ llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
return CGM.CreateRuntimeFunction(FnTy, Name);
}
-llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
- bool IVSigned) {
+llvm::FunctionCallee
+CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
StringRef Name =
@@ -2403,8 +2454,8 @@ llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
return CGM.CreateRuntimeFunction(FnTy, Name);
}
-llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
- bool IVSigned) {
+llvm::FunctionCallee
+CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
StringRef Name =
@@ -2420,8 +2471,8 @@ llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
return CGM.CreateRuntimeFunction(FnTy, Name);
}
-llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
- bool IVSigned) {
+llvm::FunctionCallee
+CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
StringRef Name =
@@ -2836,7 +2887,7 @@ void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
}
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
- llvm::Value *OutlinedFn,
+ llvm::Function *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) {
if (!CGF.HaveInsertPoint())
@@ -2854,7 +2905,8 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
RealArgs.append(std::begin(Args), std::end(Args));
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
- llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
+ llvm::FunctionCallee RTLFn =
+ RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
CGF.EmitRuntimeCall(RTLFn, RealArgs);
};
auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
@@ -2915,9 +2967,8 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
return ThreadIDTemp;
}
-llvm::Constant *
-CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
- const llvm::Twine &Name) {
+llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
+ llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << Name;
@@ -2932,7 +2983,8 @@ CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
return Elem.second = new llvm::GlobalVariable(
CGM.getModule(), Ty, /*IsConstant*/ false,
llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
- Elem.first());
+ Elem.first(), /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal, AddressSpace);
}
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
@@ -2944,17 +2996,18 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
namespace {
/// Common pre(post)-action for different OpenMP constructs.
class CommonActionTy final : public PrePostActionTy {
- llvm::Value *EnterCallee;
+ llvm::FunctionCallee EnterCallee;
ArrayRef<llvm::Value *> EnterArgs;
- llvm::Value *ExitCallee;
+ llvm::FunctionCallee ExitCallee;
ArrayRef<llvm::Value *> ExitArgs;
bool Conditional;
llvm::BasicBlock *ContBlock = nullptr;
public:
- CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
- llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
- bool Conditional = false)
+ CommonActionTy(llvm::FunctionCallee EnterCallee,
+ ArrayRef<llvm::Value *> EnterArgs,
+ llvm::FunctionCallee ExitCallee,
+ ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
: EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
ExitArgs(ExitArgs), Conditional(Conditional) {}
void Enter(CodeGenFunction &CGF) override {
@@ -3059,8 +3112,7 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
unsigned Index, const VarDecl *Var) {
// Pull out the pointer to the variable.
- Address PtrAddr =
- CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
+ Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
@@ -3176,8 +3228,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
Address CopyprivateList =
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
- Address Elem = CGF.Builder.CreateConstArrayGEP(
- CopyprivateList, I, CGF.getPointerSize());
+ Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
@@ -3241,6 +3292,24 @@ unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
return Flags;
}
+void CGOpenMPRuntime::getDefaultScheduleAndChunk(
+ CodeGenFunction &CGF, const OMPLoopDirective &S,
+ OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
+ // Check if the loop directive is actually a doacross loop directive. In this
+ // case choose static, 1 schedule.
+ if (llvm::any_of(
+ S.getClausesOfKind<OMPOrderedClause>(),
+ [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
+ ScheduleKind = OMPC_SCHEDULE_static;
+ // Chunk size is 1 in this case.
+ llvm::APInt ChunkSize(32, 1);
+ ChunkExpr = IntegerLiteral::Create(
+ CGF.getContext(), ChunkSize,
+ CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation());
+ }
+}
+
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPDirectiveKind Kind, bool EmitChecks,
bool ForceSimpleCall) {
@@ -3412,7 +3481,7 @@ void CGOpenMPRuntime::emitForDispatchInit(
static void emitForStaticInitCall(
CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
- llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
+ llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
const CGOpenMPRuntime::StaticRTInput &Values) {
if (!CGF.HaveInsertPoint())
@@ -3473,7 +3542,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
? OMP_IDENT_WORK_LOOP
: OMP_IDENT_WORK_SECTIONS);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
- llvm::Constant *StaticInitFunction =
+ llvm::FunctionCallee StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
@@ -3488,7 +3557,7 @@ void CGOpenMPRuntime::emitDistributeStaticInit(
llvm::Value *UpdatedLocation =
emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
- llvm::Constant *StaticInitFunction =
+ llvm::FunctionCallee StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
@@ -3731,14 +3800,29 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
"Entry not initialized!");
assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
"Resetting with the new address.");
- if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
+ if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
+ if (Entry.getVarSize().isZero()) {
+ Entry.setVarSize(VarSize);
+ Entry.setLinkage(Linkage);
+ }
return;
- Entry.setAddress(Addr);
+ }
Entry.setVarSize(VarSize);
Entry.setLinkage(Linkage);
+ Entry.setAddress(Addr);
} else {
- if (hasDeviceGlobalVarEntryInfo(VarName))
+ if (hasDeviceGlobalVarEntryInfo(VarName)) {
+ auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
+ assert(Entry.isValid() && Entry.getFlags() == Flags &&
+ "Entry not initialized!");
+ assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
+ "Resetting with the new address.");
+ if (Entry.getVarSize().isZero()) {
+ Entry.setVarSize(VarSize);
+ Entry.setLinkage(Linkage);
+ }
return;
+ }
OffloadEntriesDeviceGlobalVar.try_emplace(
VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
++OffloadingEntriesNum;
@@ -4364,12 +4448,12 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
/// return 0;
/// }
/// \endcode
-static llvm::Value *
+static llvm::Function *
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
QualType KmpTaskTWithPrivatesPtrQTy,
QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
- QualType SharedsPtrTy, llvm::Value *TaskFunction,
+ QualType SharedsPtrTy, llvm::Function *TaskFunction,
llvm::Value *TaskPrivatesMap) {
ASTContext &C = CGM.getContext();
FunctionArgList Args;
@@ -4614,11 +4698,6 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
return TaskPrivatesMap;
}
-static bool stable_sort_comparator(const PrivateDataTy P1,
- const PrivateDataTy P2) {
- return P1.first > P2.first;
-}
-
/// Emit initialization for private variables in task-based directives.
static void emitPrivatesInit(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
@@ -4661,7 +4740,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
// Check if the variable is the target-based BasePointersArray,
// PointersArray or SizesArray.
LValue SharedRefLValue;
- QualType Type = OriginalVD->getType();
+ QualType Type = PrivateLValue.getType();
const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
if (IsTargetTask && !SharedField) {
assert(isa<ImplicitParamDecl>(OriginalVD) &&
@@ -4837,7 +4916,7 @@ checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
CGOpenMPRuntime::TaskResultTy
CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
const OMPExecutableDirective &D,
- llvm::Value *TaskFunction, QualType SharedsTy,
+ llvm::Function *TaskFunction, QualType SharedsTy,
Address Shareds, const OMPTaskDataTy &Data) {
ASTContext &C = CGM.getContext();
llvm::SmallVector<PrivateDataTy, 4> Privates;
@@ -4872,7 +4951,9 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
/*PrivateElemInit=*/nullptr));
++I;
}
- std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
+ llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
+ return L.first > R.first;
+ });
QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Build type kmp_routine_entry_t (if not built yet).
emitKmpRoutineEntryT(KmpInt32Ty);
@@ -4911,7 +4992,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
// Emit initial values for private copies (if any).
llvm::Value *TaskPrivatesMap = nullptr;
llvm::Type *TaskPrivatesMapTy =
- std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
+ std::next(TaskFunction->arg_begin(), 3)->getType();
if (!Privates.empty()) {
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
TaskPrivatesMap = emitTaskPrivateMappingFunction(
@@ -4925,7 +5006,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
}
// Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
// kmp_task_t *tt);
- llvm::Value *TaskEntry = emitProxyTaskFunction(
+ llvm::Function *TaskEntry = emitProxyTaskFunction(
CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
TaskPrivatesMap);
@@ -4934,7 +5015,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
// Task flags. Format is taken from
- // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
+ // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
// description of kmp_tasking_flags struct.
enum {
TiedFlag = 0x1,
@@ -5037,7 +5118,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
const OMPExecutableDirective &D,
- llvm::Value *TaskFunction,
+ llvm::Function *TaskFunction,
QualType SharedsTy, Address Shareds,
const Expr *IfCond,
const OMPTaskDataTy &Data) {
@@ -5047,7 +5128,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
TaskResultTy Result =
emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
llvm::Value *NewTask = Result.NewTask;
- llvm::Value *TaskEntry = Result.TaskEntry;
+ llvm::Function *TaskEntry = Result.TaskEntry;
llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
LValue TDBase = Result.TDBase;
const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
@@ -5057,7 +5138,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
unsigned NumDependencies = Data.Dependences.size();
if (NumDependencies) {
// Dependence kind for RTL.
- enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
+ enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
RecordDecl *KmpDependInfoRD;
QualType FlagsTy =
@@ -5074,7 +5155,6 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
} else {
KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
}
- CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
// Define type kmp_depend_info[<Dependences.size()>];
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
@@ -5101,7 +5181,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
Size = CGF.getTypeSize(Ty);
}
LValue Base = CGF.MakeAddrLValue(
- CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize),
+ CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
KmpDependInfoTy);
// deps[i].base_addr = &<Dependences[i].second>;
LValue BaseAddrLVal = CGF.EmitLValueForField(
@@ -5124,6 +5204,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
case OMPC_DEPEND_inout:
DepKind = DepInOut;
break;
+ case OMPC_DEPEND_mutexinoutset:
+ DepKind = DepMutexInOutSet;
+ break;
case OMPC_DEPEND_source:
case OMPC_DEPEND_sink:
case OMPC_DEPEND_unknown:
@@ -5135,8 +5218,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
FlagsLVal);
}
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
- CGF.VoidPtrTy);
+ CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
}
// NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
@@ -5231,7 +5313,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
const OMPLoopDirective &D,
- llvm::Value *TaskFunction,
+ llvm::Function *TaskFunction,
QualType SharedsTy, Address Shareds,
const Expr *IfCond,
const OMPTaskDataTy &Data) {
@@ -5411,10 +5493,10 @@ static void emitReductionCombiner(CodeGenFunction &CGF,
CGF.EmitIgnoredExpr(ReductionOp);
}
-llvm::Value *CGOpenMPRuntime::emitReductionFunction(
- CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType,
- ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
- ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
+llvm::Function *CGOpenMPRuntime::emitReductionFunction(
+ SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps) {
ASTContext &C = CGM.getContext();
// void reduction_func(void *LHSArg, void *RHSArg);
@@ -5466,8 +5548,7 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction(
if (PrivTy->isVariablyModifiedType()) {
// Get array size and emit VLA type.
++Idx;
- Address Elem =
- CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
+ Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
const VariableArrayType *VLA =
CGF.getContext().getAsVariableArrayType(PrivTy);
@@ -5605,8 +5686,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
auto IPriv = Privates.begin();
unsigned Idx = 0;
for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
- Address Elem =
- CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
+ Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
@@ -5614,8 +5694,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
if ((*IPriv)->getType()->isVariablyModifiedType()) {
// Store array size.
++Idx;
- Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
- CGF.getPointerSize());
+ Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
llvm::Value *Size = CGF.Builder.CreateIntCast(
CGF.getVLASize(
CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
@@ -5627,9 +5706,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
}
// 2. Emit reduce_func().
- llvm::Value *ReductionFn = emitReductionFunction(
- CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
- Privates, LHSExprs, RHSExprs, ReductionOps);
+ llvm::Function *ReductionFn = emitReductionFunction(
+ Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
+ LHSExprs, RHSExprs, ReductionOps);
// 3. Create static kmp_critical_name lock = { 0 };
std::string Name = getName({"reduction"});
@@ -6393,12 +6472,59 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
}
-/// discard all CompoundStmts intervening between two constructs
-static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
- while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
- Body = CS->body_front();
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
+static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+ // We can skip constant expressions.
+ // We can skip expressions with trivial calls or simple expressions.
+ return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
+ !E->hasNonTrivialCall(Ctx)) &&
+ !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
+}
- return Body;
+const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
+ const Stmt *Body) {
+ const Stmt *Child = Body->IgnoreContainers();
+ while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
+ Child = nullptr;
+ for (const Stmt *S : C->body()) {
+ if (const auto *E = dyn_cast<Expr>(S)) {
+ if (isTrivial(Ctx, E))
+ continue;
+ }
+ // Some of the statements can be ignored.
+ if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
+ isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
+ continue;
+ // Analyze declarations.
+ if (const auto *DS = dyn_cast<DeclStmt>(S)) {
+ if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
+ if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
+ isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
+ isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
+ isa<UsingDirectiveDecl>(D) ||
+ isa<OMPDeclareReductionDecl>(D) ||
+ isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
+ return true;
+ const auto *VD = dyn_cast<VarDecl>(D);
+ if (!VD)
+ return false;
+ return VD->isConstexpr() ||
+ ((VD->getType().isTrivialType(Ctx) ||
+ VD->getType()->isReferenceType()) &&
+ (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
+ }))
+ continue;
+ }
+ // Found multiple children - cannot get the one child only.
+ if (Child)
+ return nullptr;
+ Child = S;
+ }
+ if (Child)
+ Child = Child->IgnoreContainers();
+ }
+ return Child;
}
/// Emit the number of teams for a target directive. Inspect the num_teams
@@ -6410,63 +6536,208 @@ static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
///
/// Otherwise, return nullptr.
static llvm::Value *
-emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
- CodeGenFunction &CGF,
+emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &D) {
- assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
- "teams directive expected to be "
- "emitted only for the host!");
-
+ assert(!CGF.getLangOpts().OpenMPIsDevice &&
+ "Clauses associated with the teams directive expected to be emitted "
+ "only for the host!");
+ OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+ assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
+ "Expected target-based executable directive.");
CGBuilderTy &Bld = CGF.Builder;
-
- // If the target directive is combined with a teams directive:
- // Return the value in the num_teams clause, if any.
- // Otherwise, return 0 to denote the runtime default.
- if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
- if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
+ switch (DirectiveKind) {
+ case OMPD_target: {
+ const auto *CS = D.getInnermostCapturedStmt();
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+ const Stmt *ChildStmt =
+ CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
+ if (const auto *NestedDir =
+ dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
+ if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
+ if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
+ CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+ const Expr *NumTeams =
+ NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
+ llvm::Value *NumTeamsVal =
+ CGF.EmitScalarExpr(NumTeams,
+ /*IgnoreResultAssign*/ true);
+ return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
+ return Bld.getInt32(0);
+ }
+ if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
+ isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
+ return Bld.getInt32(1);
+ return Bld.getInt32(0);
+ }
+ return nullptr;
+ }
+ case OMPD_target_teams:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd: {
+ if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
- llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
- /*IgnoreResultAssign*/ true);
- return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+ const Expr *NumTeams =
+ D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
+ llvm::Value *NumTeamsVal =
+ CGF.EmitScalarExpr(NumTeams,
+ /*IgnoreResultAssign*/ true);
+ return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
/*IsSigned=*/true);
}
-
- // The default value is 0.
return Bld.getInt32(0);
}
-
- // If the target directive is combined with a parallel directive but not a
- // teams directive, start one team.
- if (isOpenMPParallelDirective(D.getDirectiveKind()))
+ case OMPD_target_parallel:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_simd:
return Bld.getInt32(1);
-
- // If the current target region has a teams region enclosed, we need to get
- // the number of teams to pass to the runtime function call. This is done
- // by generating the expression in a inlined region. This is required because
- // the expression is captured in the enclosing target environment when the
- // teams directive is not combined with target.
-
- const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
-
- if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
- ignoreCompoundStmts(CS.getCapturedStmt()))) {
- if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
- if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
- CGOpenMPInnerExprInfo CGInfo(CGF, CS);
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_allocate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_declare_mapper:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_requires:
+ case OMPD_unknown:
+ break;
+ }
+ llvm_unreachable("Unexpected directive kind.");
+}
+
+static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
+ llvm::Value *DefaultThreadLimitVal) {
+ const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
+ CGF.getContext(), CS->getCapturedStmt());
+ if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
+ if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
+ llvm::Value *NumThreads = nullptr;
+ llvm::Value *CondVal = nullptr;
+ // Handle if clause. If if clause present, the number of threads is
+ // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
+ if (Dir->hasClausesOfKind<OMPIfClause>()) {
+ CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
- return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
- /*IsSigned=*/true);
+ const OMPIfClause *IfClause = nullptr;
+ for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
+ if (C->getNameModifier() == OMPD_unknown ||
+ C->getNameModifier() == OMPD_parallel) {
+ IfClause = C;
+ break;
+ }
+ }
+ if (IfClause) {
+ const Expr *Cond = IfClause->getCondition();
+ bool Result;
+ if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
+ if (!Result)
+ return CGF.Builder.getInt32(1);
+ } else {
+ CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
+ if (const auto *PreInit =
+ cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
+ for (const auto *I : PreInit->decls()) {
+ if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
+ CGF.EmitVarDecl(cast<VarDecl>(*I));
+ } else {
+ CodeGenFunction::AutoVarEmission Emission =
+ CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
+ CGF.EmitAutoVarCleanups(Emission);
+ }
+ }
+ }
+ CondVal = CGF.EvaluateExprAsBool(Cond);
+ }
+ }
}
-
- // If we have an enclosed teams directive but no num_teams clause we use
- // the default value 0.
- return Bld.getInt32(0);
+ // Check the value of num_threads clause iff if clause was not specified
+ // or is not evaluated to false.
+ if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
+ CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+ const auto *NumThreadsClause =
+ Dir->getSingleClause<OMPNumThreadsClause>();
+ CodeGenFunction::LexicalScope Scope(
+ CGF, NumThreadsClause->getNumThreads()->getSourceRange());
+ if (const auto *PreInit =
+ cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
+ for (const auto *I : PreInit->decls()) {
+ if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
+ CGF.EmitVarDecl(cast<VarDecl>(*I));
+ } else {
+ CodeGenFunction::AutoVarEmission Emission =
+ CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
+ CGF.EmitAutoVarCleanups(Emission);
+ }
+ }
+ }
+ NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
+ NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
+ /*IsSigned=*/false);
+ if (DefaultThreadLimitVal)
+ NumThreads = CGF.Builder.CreateSelect(
+ CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
+ DefaultThreadLimitVal, NumThreads);
+ } else {
+ NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
+ : CGF.Builder.getInt32(0);
+ }
+ // Process condition of the if clause.
+ if (CondVal) {
+ NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
+ CGF.Builder.getInt32(1));
+ }
+ return NumThreads;
}
+ if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
+ return CGF.Builder.getInt32(1);
+ return DefaultThreadLimitVal;
}
-
- // No teams associated with the directive.
- return nullptr;
+ return DefaultThreadLimitVal ? DefaultThreadLimitVal
+ : CGF.Builder.getInt32(0);
}
/// Emit the number of threads for a target directive. Inspect the
@@ -6478,98 +6749,208 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
///
/// Otherwise, return nullptr.
static llvm::Value *
-emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
- CodeGenFunction &CGF,
+emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &D) {
- assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
- "teams directive expected to be "
- "emitted only for the host!");
-
+ assert(!CGF.getLangOpts().OpenMPIsDevice &&
+ "Clauses associated with the teams directive expected to be emitted "
+ "only for the host!");
+ OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+ assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
+ "Expected target-based executable directive.");
CGBuilderTy &Bld = CGF.Builder;
-
- //
- // If the target directive is combined with a teams directive:
- // Return the value in the thread_limit clause, if any.
- //
- // If the target directive is combined with a parallel directive:
- // Return the value in the num_threads clause, if any.
- //
- // If both clauses are set, select the minimum of the two.
- //
- // If neither teams or parallel combined directives set the number of threads
- // in a team, return 0 to denote the runtime default.
- //
- // If this is not a teams directive return nullptr.
-
- if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
- isOpenMPParallelDirective(D.getDirectiveKind())) {
- llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
- llvm::Value *NumThreadsVal = nullptr;
- llvm::Value *ThreadLimitVal = nullptr;
-
- if (const auto *ThreadLimitClause =
- D.getSingleClause<OMPThreadLimitClause>()) {
+ llvm::Value *ThreadLimitVal = nullptr;
+ llvm::Value *NumThreadsVal = nullptr;
+ switch (DirectiveKind) {
+ case OMPD_target: {
+ const CapturedStmt *CS = D.getInnermostCapturedStmt();
+ if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
+ return NumThreads;
+ const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
+ CGF.getContext(), CS->getCapturedStmt());
+ if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
+ if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
+ CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+ const auto *ThreadLimitClause =
+ Dir->getSingleClause<OMPThreadLimitClause>();
+ CodeGenFunction::LexicalScope Scope(
+ CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
+ if (const auto *PreInit =
+ cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
+ for (const auto *I : PreInit->decls()) {
+ if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
+ CGF.EmitVarDecl(cast<VarDecl>(*I));
+ } else {
+ CodeGenFunction::AutoVarEmission Emission =
+ CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
+ CGF.EmitAutoVarCleanups(Emission);
+ }
+ }
+ }
+ llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
+ ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
+ ThreadLimitVal =
+ Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
+ }
+ if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
+ !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
+ CS = Dir->getInnermostCapturedStmt();
+ const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
+ CGF.getContext(), CS->getCapturedStmt());
+ Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
+ }
+ if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
+ !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
+ CS = Dir->getInnermostCapturedStmt();
+ if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
+ return NumThreads;
+ }
+ if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
+ return Bld.getInt32(1);
+ }
+ return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
+ }
+ case OMPD_target_teams: {
+ if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
- llvm::Value *ThreadLimit =
- CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
- /*IgnoreResultAssign*/ true);
- ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
- /*IsSigned=*/true);
+ const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
+ llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
+ ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
+ ThreadLimitVal =
+ Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
}
-
- if (const auto *NumThreadsClause =
- D.getSingleClause<OMPNumThreadsClause>()) {
+ const CapturedStmt *CS = D.getInnermostCapturedStmt();
+ if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
+ return NumThreads;
+ const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
+ CGF.getContext(), CS->getCapturedStmt());
+ if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
+ if (Dir->getDirectiveKind() == OMPD_distribute) {
+ CS = Dir->getInnermostCapturedStmt();
+ if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
+ return NumThreads;
+ }
+ }
+ return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
+ }
+ case OMPD_target_teams_distribute:
+ if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
+ CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
+ const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
+ llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
+ ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
+ ThreadLimitVal =
+ Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
+ }
+ return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
+ case OMPD_target_parallel:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd: {
+ llvm::Value *CondVal = nullptr;
+ // Handle if clause. If if clause present, the number of threads is
+ // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
+ if (D.hasClausesOfKind<OMPIfClause>()) {
+ const OMPIfClause *IfClause = nullptr;
+ for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
+ if (C->getNameModifier() == OMPD_unknown ||
+ C->getNameModifier() == OMPD_parallel) {
+ IfClause = C;
+ break;
+ }
+ }
+ if (IfClause) {
+ const Expr *Cond = IfClause->getCondition();
+ bool Result;
+ if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
+ if (!Result)
+ return Bld.getInt32(1);
+ } else {
+ CodeGenFunction::RunCleanupsScope Scope(CGF);
+ CondVal = CGF.EvaluateExprAsBool(Cond);
+ }
+ }
+ }
+ if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
+ CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
+ const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
+ llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
+ ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
+ ThreadLimitVal =
+ Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
+ }
+ if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
- llvm::Value *NumThreads =
- CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
- /*IgnoreResultAssign*/ true);
+ const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
+ llvm::Value *NumThreads = CGF.EmitScalarExpr(
+ NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
NumThreadsVal =
- Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
- }
-
- // Select the lesser of thread_limit and num_threads.
- if (NumThreadsVal)
+ Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false);
ThreadLimitVal = ThreadLimitVal
- ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
+ ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
ThreadLimitVal),
NumThreadsVal, ThreadLimitVal)
: NumThreadsVal;
-
- // Set default value passed to the runtime if either teams or a target
- // parallel type directive is found but no clause is specified.
+ }
if (!ThreadLimitVal)
- ThreadLimitVal = DefaultThreadLimitVal;
-
+ ThreadLimitVal = Bld.getInt32(0);
+ if (CondVal)
+ return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
return ThreadLimitVal;
}
-
- // If the current target region has a teams region enclosed, we need to get
- // the thread limit to pass to the runtime function call. This is done
- // by generating the expression in a inlined region. This is required because
- // the expression is captured in the enclosing target environment when the
- // teams directive is not combined with target.
-
- const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
-
- if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
- ignoreCompoundStmts(CS.getCapturedStmt()))) {
- if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
- if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
- CGOpenMPInnerExprInfo CGInfo(CGF, CS);
- CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
- return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
- /*IsSigned=*/true);
- }
-
- // If we have an enclosed teams directive but no thread_limit clause we
- // use the default value 0.
- return CGF.Builder.getInt32(0);
- }
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_simd:
+ return Bld.getInt32(1);
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_allocate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_declare_mapper:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_requires:
+ case OMPD_unknown:
+ break;
}
-
- // No teams associated with the directive.
- return nullptr;
+ llvm_unreachable("Unsupported directive kind.");
}
namespace {
@@ -7135,7 +7516,7 @@ private:
Address HB = CGF.Builder.CreateConstGEP(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
CGF.VoidPtrTy),
- TypeSize.getQuantity() - 1, CharUnits::One());
+ TypeSize.getQuantity() - 1);
PartialStruct.HighestElem = {
std::numeric_limits<decltype(
PartialStruct.HighestElem.first)>::max(),
@@ -7169,15 +7550,13 @@ private:
Pointers.push_back(LB.getPointer());
Sizes.push_back(Size);
Types.push_back(Flags);
- LB = CGF.Builder.CreateConstGEP(ComponentLB, 1,
- CGF.getPointerSize());
+ LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
}
BasePointers.push_back(BP.getPointer());
Pointers.push_back(LB.getPointer());
Size = CGF.Builder.CreatePtrDiff(
CGF.EmitCastToVoidPtr(
- CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One())
- .getPointer()),
+ CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
CGF.EmitCastToVoidPtr(LB.getPointer()));
Sizes.push_back(Size);
Types.push_back(Flags);
@@ -7260,9 +7639,17 @@ private:
// A first private variable captured by reference will use only the
// 'private ptr' and 'map to' flag. Return the right flags if the captured
// declaration is known as first-private in this handler.
- if (FirstPrivateDecls.count(Cap.getCapturedVar()))
+ if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
+ if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
+ Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
+ return MappableExprsHandler::OMP_MAP_ALWAYS |
+ MappableExprsHandler::OMP_MAP_TO;
+ if (Cap.getCapturedVar()->getType()->isAnyPointerType())
+ return MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
return MappableExprsHandler::OMP_MAP_PRIVATE |
MappableExprsHandler::OMP_MAP_TO;
+ }
return MappableExprsHandler::OMP_MAP_TO |
MappableExprsHandler::OMP_MAP_FROM;
}
@@ -7889,9 +8276,6 @@ public:
}
} else {
assert(CI.capturesVariable() && "Expected captured reference.");
- CurBasePointers.push_back(CV);
- CurPointers.push_back(CV);
-
const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
QualType ElementType = PtrTy->getPointeeType();
CurSizes.push_back(CGF.getTypeSize(ElementType));
@@ -7899,6 +8283,30 @@ public:
// default the value doesn't have to be retrieved. For an aggregate
// type, the default is 'tofrom'.
CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
+ const VarDecl *VD = CI.getCapturedVar();
+ if (FirstPrivateDecls.count(VD) &&
+ VD->getType().isConstant(CGF.getContext())) {
+ llvm::Constant *Addr =
+ CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
+ // Copy the value of the original variable to the new global copy.
+ CGF.Builder.CreateMemCpy(
+ CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
+ Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
+ CurSizes.back(), /*isVolatile=*/false);
+ // Use new global variable as the base pointers.
+ CurBasePointers.push_back(Addr);
+ CurPointers.push_back(Addr);
+ } else {
+ CurBasePointers.push_back(CV);
+ if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) {
+ Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
+ CV, ElementType, CGF.getContext().getDeclAlign(VD),
+ AlignmentSource::Decl));
+ CurPointers.push_back(PtrAddr.getPointer());
+ } else {
+ CurPointers.push_back(CV);
+ }
+ }
}
// Every default map produces a single argument which is a target parameter.
CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
@@ -8065,70 +8473,17 @@ static void emitOffloadingArraysArgument(
}
}
-/// Checks if the expression is constant or does not have non-trivial function
-/// calls.
-static bool isTrivial(ASTContext &Ctx, const Expr * E) {
- // We can skip constant expressions.
- // We can skip expressions with trivial calls or simple expressions.
- return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
- !E->hasNonTrivialCall(Ctx)) &&
- !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
-}
-
-/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
-/// iff there is only one that is not evaluatable at the compile time.
-static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
- if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
- const Stmt *Child = nullptr;
- for (const Stmt *S : C->body()) {
- if (const auto *E = dyn_cast<Expr>(S)) {
- if (isTrivial(Ctx, E))
- continue;
- }
- // Some of the statements can be ignored.
- if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
- isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
- continue;
- // Analyze declarations.
- if (const auto *DS = dyn_cast<DeclStmt>(S)) {
- if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
- if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
- isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
- isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
- isa<UsingDirectiveDecl>(D) ||
- isa<OMPDeclareReductionDecl>(D) ||
- isa<OMPThreadPrivateDecl>(D))
- return true;
- const auto *VD = dyn_cast<VarDecl>(D);
- if (!VD)
- return false;
- return VD->isConstexpr() ||
- ((VD->getType().isTrivialType(Ctx) ||
- VD->getType()->isReferenceType()) &&
- (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
- }))
- continue;
- }
- // Found multiple children - cannot get the one child only.
- if (Child)
- return Body;
- Child = S;
- }
- if (Child)
- return Child;
- }
- return Body;
-}
-
/// Check for inner distribute directive.
static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
const auto *CS = D.getInnermostCapturedStmt();
const auto *Body =
CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
- const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
+ const Stmt *ChildStmt =
+ CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
- if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ if (const auto *NestedDir =
+ dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
switch (D.getDirectiveKind()) {
case OMPD_target:
@@ -8139,8 +8494,9 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
/*IgnoreCaptured=*/true);
if (!Body)
return nullptr;
- ChildStmt = getSingleCompoundChild(Ctx, Body);
- if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
+ if (const auto *NND =
+ dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPDistributeDirective(DKind))
return NND;
@@ -8170,6 +8526,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_cancellation_point:
case OMPD_ordered:
case OMPD_threadprivate:
+ case OMPD_allocate:
case OMPD_task:
case OMPD_simd:
case OMPD_sections:
@@ -8200,6 +8557,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
+ case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
case OMPD_requires:
@@ -8244,7 +8602,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
- llvm::Value *OutlinedFn,
+ llvm::Function *OutlinedFn,
llvm::Value *OutlinedFnID,
const Expr *IfCond, const Expr *Device) {
if (!CGF.HaveInsertPoint())
@@ -8295,8 +8653,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// Return value of the runtime offloading call.
llvm::Value *Return;
- llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D);
- llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D);
+ llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
+ llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
// The target region is an outlined function launched by the runtime
@@ -8592,6 +8950,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_cancellation_point:
case OMPD_ordered:
case OMPD_threadprivate:
+ case OMPD_allocate:
case OMPD_task:
case OMPD_simd:
case OMPD_sections:
@@ -8622,6 +8981,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
+ case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
case OMPD_requires:
@@ -8698,6 +9058,40 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
return false;
}
+llvm::Constant *
+CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
+ const VarDecl *VD) {
+ assert(VD->getType().isConstant(CGM.getContext()) &&
+ "Expected constant variable.");
+ StringRef VarName;
+ llvm::Constant *Addr;
+ llvm::GlobalValue::LinkageTypes Linkage;
+ QualType Ty = VD->getType();
+ SmallString<128> Buffer;
+ {
+ unsigned DeviceID;
+ unsigned FileID;
+ unsigned Line;
+ getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
+ FileID, Line);
+ llvm::raw_svector_ostream OS(Buffer);
+ OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
+ << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
+ VarName = OS.str();
+ }
+ Linkage = llvm::GlobalValue::InternalLinkage;
+ Addr =
+ getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
+ getDefaultFirstprivateAddressSpace());
+ cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
+ CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
+ CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
+ OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
+ VarName, Addr, VarSize,
+ OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
+ return Addr;
+}
+
void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
llvm::Constant *Addr) {
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
@@ -8788,6 +9182,30 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
" Expected target-based directive.");
}
+bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
+ LangAS &AS) {
+ if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
+ return false;
+ const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+ switch(A->getAllocatorType()) {
+ case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+ // Not supported, fallback to the default mem space.
+ case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+ case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+ case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+ case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+ case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+ case OMPAllocateDeclAttr::OMPConstMemAlloc:
+ case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+ AS = LangAS::Default;
+ return true;
+ case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+ llvm_unreachable("Expected predefined allocator for the variables with the "
+ "static storage.");
+ }
+ return false;
+}
+
CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
CodeGenModule &CGM)
: CGM(CGM) {
@@ -8836,7 +9254,7 @@ llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,
- llvm::Value *OutlinedFn,
+ llvm::Function *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars) {
if (!CGF.HaveInsertPoint())
return;
@@ -8853,7 +9271,7 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
RealArgs.append(std::begin(Args), std::end(Args));
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
- llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
+ llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
CGF.EmitRuntimeCall(RTLFn, RealArgs);
}
@@ -9075,6 +9493,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_cancellation_point:
case OMPD_ordered:
case OMPD_threadprivate:
+ case OMPD_allocate:
case OMPD_task:
case OMPD_simd:
case OMPD_sections:
@@ -9102,6 +9521,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
+ case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
case OMPD_target:
@@ -9299,6 +9719,307 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
}
}
+// This are the Functions that are needed to mangle the name of the
+// vector functions generated by the compiler, according to the rules
+// defined in the "Vector Function ABI specifications for AArch64",
+// available at
+// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
+
+/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
+///
+/// TODO: Need to implement the behavior for reference marked with a
+/// var or no linear modifiers (1.b in the section). For this, we
+/// need to extend ParamKindTy to support the linear modifiers.
+static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
+ QT = QT.getCanonicalType();
+
+ if (QT->isVoidType())
+ return false;
+
+ if (Kind == ParamKindTy::Uniform)
+ return false;
+
+ if (Kind == ParamKindTy::Linear)
+ return false;
+
+ // TODO: Handle linear references with modifiers
+
+ if (Kind == ParamKindTy::LinearWithVarStride)
+ return false;
+
+ return true;
+}
+
+/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
+static bool getAArch64PBV(QualType QT, ASTContext &C) {
+ QT = QT.getCanonicalType();
+ unsigned Size = C.getTypeSize(QT);
+
+ // Only scalars and complex within 16 bytes wide set PVB to true.
+ if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
+ return false;
+
+ if (QT->isFloatingType())
+ return true;
+
+ if (QT->isIntegerType())
+ return true;
+
+ if (QT->isPointerType())
+ return true;
+
+ // TODO: Add support for complex types (section 3.1.2, item 2).
+
+ return false;
+}
+
+/// Computes the lane size (LS) of a return type or of an input parameter,
+/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
+/// TODO: Add support for references, section 3.2.1, item 1.
+static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
+ if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
+ QualType PTy = QT.getCanonicalType()->getPointeeType();
+ if (getAArch64PBV(PTy, C))
+ return C.getTypeSize(PTy);
+ }
+ if (getAArch64PBV(QT, C))
+ return C.getTypeSize(QT);
+
+ return C.getTypeSize(C.getUIntPtrType());
+}
+
+// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
+// signature of the scalar function, as defined in 3.2.2 of the
+// AAVFABI.
+static std::tuple<unsigned, unsigned, bool>
+getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
+ QualType RetType = FD->getReturnType().getCanonicalType();
+
+ ASTContext &C = FD->getASTContext();
+
+ bool OutputBecomesInput = false;
+
+ llvm::SmallVector<unsigned, 8> Sizes;
+ if (!RetType->isVoidType()) {
+ Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
+ if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
+ OutputBecomesInput = true;
+ }
+ for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
+ QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
+ Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
+ }
+
+ assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
+ // The LS of a function parameter / return value can only be a power
+ // of 2, starting from 8 bits, up to 128.
+ assert(std::all_of(Sizes.begin(), Sizes.end(),
+ [](unsigned Size) {
+ return Size == 8 || Size == 16 || Size == 32 ||
+ Size == 64 || Size == 128;
+ }) &&
+ "Invalid size");
+
+ return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
+ *std::max_element(std::begin(Sizes), std::end(Sizes)),
+ OutputBecomesInput);
+}
+
+/// Mangle the parameter part of the vector function name according to
+/// their OpenMP classification. The mangling function is defined in
+/// section 3.5 of the AAVFABI.
+static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ for (const auto &ParamAttr : ParamAttrs) {
+ switch (ParamAttr.Kind) {
+ case LinearWithVarStride:
+ Out << "ls" << ParamAttr.StrideOrArg;
+ break;
+ case Linear:
+ Out << 'l';
+ // Don't print the step value if it is not present or if it is
+ // equal to 1.
+ if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
+ Out << ParamAttr.StrideOrArg;
+ break;
+ case Uniform:
+ Out << 'u';
+ break;
+ case Vector:
+ Out << 'v';
+ break;
+ }
+
+ if (!!ParamAttr.Alignment)
+ Out << 'a' << ParamAttr.Alignment;
+ }
+
+ return Out.str();
+}
+
+// Function used to add the attribute. The parameter `VLEN` is
+// templated to allow the use of "x" when targeting scalable functions
+// for SVE.
+template <typename T>
+static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
+ char ISA, StringRef ParSeq,
+ StringRef MangledName, bool OutputBecomesInput,
+ llvm::Function *Fn) {
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ Out << Prefix << ISA << LMask << VLEN;
+ if (OutputBecomesInput)
+ Out << "v";
+ Out << ParSeq << "_" << MangledName;
+ Fn->addFnAttr(Out.str());
+}
+
+// Helper function to generate the Advanced SIMD names depending on
+// the value of the NDS when simdlen is not present.
+static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
+ StringRef Prefix, char ISA,
+ StringRef ParSeq, StringRef MangledName,
+ bool OutputBecomesInput,
+ llvm::Function *Fn) {
+ switch (NDS) {
+ case 8:
+ addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ break;
+ case 16:
+ addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ break;
+ case 32:
+ addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ break;
+ case 64:
+ case 128:
+ addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ break;
+ default:
+ llvm_unreachable("Scalar type is too wide.");
+ }
+}
+
+/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
+static void emitAArch64DeclareSimdFunction(
+ CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
+ ArrayRef<ParamAttrTy> ParamAttrs,
+ OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
+ char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
+
+ // Get basic data for building the vector signature.
+ const auto Data = getNDSWDS(FD, ParamAttrs);
+ const unsigned NDS = std::get<0>(Data);
+ const unsigned WDS = std::get<1>(Data);
+ const bool OutputBecomesInput = std::get<2>(Data);
+
+ // Check the values provided via `simdlen` by the user.
+ // 1. A `simdlen(1)` doesn't produce vector signatures,
+ if (UserVLEN == 1) {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Warning,
+ "The clause simdlen(1) has no effect when targeting aarch64.");
+ CGM.getDiags().Report(SLoc, DiagID);
+ return;
+ }
+
+ // 2. Section 3.3.1, item 1: user input must be a power of 2 for
+ // Advanced SIMD output.
+ if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
+ "power of 2 when targeting Advanced SIMD.");
+ CGM.getDiags().Report(SLoc, DiagID);
+ return;
+ }
+
+ // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
+ // limits.
+ if (ISA == 's' && UserVLEN != 0) {
+ if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
+ unsigned DiagID = CGM.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
+ "lanes in the architectural constraints "
+ "for SVE (min is 128-bit, max is "
+ "2048-bit, by steps of 128-bit)");
+ CGM.getDiags().Report(SLoc, DiagID) << WDS;
+ return;
+ }
+ }
+
+ // Sort out parameter sequence.
+ const std::string ParSeq = mangleVectorParameters(ParamAttrs);
+ StringRef Prefix = "_ZGV";
+ // Generate simdlen from user input (if any).
+ if (UserVLEN) {
+ if (ISA == 's') {
+ // SVE generates only a masked function.
+ addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ } else {
+ assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
+ // Advanced SIMD generates one or two functions, depending on
+ // the `[not]inbranch` clause.
+ switch (State) {
+ case OMPDeclareSimdDeclAttr::BS_Undefined:
+ addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ break;
+ case OMPDeclareSimdDeclAttr::BS_Notinbranch:
+ addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ break;
+ case OMPDeclareSimdDeclAttr::BS_Inbranch:
+ addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ break;
+ }
+ }
+ } else {
+ // If no user simdlen is provided, follow the AAVFABI rules for
+ // generating the vector length.
+ if (ISA == 's') {
+ // SVE, section 3.4.1, item 1.
+ addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ } else {
+ assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
+ // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
+ // two vector names depending on the use of the clause
+ // `[not]inbranch`.
+ switch (State) {
+ case OMPDeclareSimdDeclAttr::BS_Undefined:
+ addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ break;
+ case OMPDeclareSimdDeclAttr::BS_Notinbranch:
+ addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ break;
+ case OMPDeclareSimdDeclAttr::BS_Inbranch:
+ addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
+ OutputBecomesInput, Fn);
+ break;
+ }
+ }
+ }
+}
+
void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
llvm::Function *Fn) {
ASTContext &C = CGM.getContext();
@@ -9385,12 +10106,26 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
++MI;
}
llvm::APSInt VLENVal;
- if (const Expr *VLEN = Attr->getSimdlen())
- VLENVal = VLEN->EvaluateKnownConstInt(C);
+ SourceLocation ExprLoc;
+ const Expr *VLENExpr = Attr->getSimdlen();
+ if (VLENExpr) {
+ VLENVal = VLENExpr->EvaluateKnownConstInt(C);
+ ExprLoc = VLENExpr->getExprLoc();
+ }
OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
- CGM.getTriple().getArch() == llvm::Triple::x86_64)
+ CGM.getTriple().getArch() == llvm::Triple::x86_64) {
emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
+ } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
+ unsigned VLEN = VLENVal.getExtValue();
+ StringRef MangledName = Fn->getName();
+ if (CGM.getTarget().hasFeature("sve"))
+ emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
+ MangledName, 's', 128, Fn, ExprLoc);
+ if (CGM.getTarget().hasFeature("neon"))
+ emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
+ MangledName, 'n', 128, Fn, ExprLoc);
+ }
}
FD = FD->getPreviousDecl();
}
@@ -9403,11 +10138,12 @@ public:
static const int DoacrossFinArgs = 2;
private:
- llvm::Value *RTLFn;
+ llvm::FunctionCallee RTLFn;
llvm::Value *Args[DoacrossFinArgs];
public:
- DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
+ DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
+ ArrayRef<llvm::Value *> CallArgs)
: RTLFn(RTLFn) {
assert(CallArgs.size() == DoacrossFinArgs);
std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
@@ -9454,10 +10190,8 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
enum { LowerFD = 0, UpperFD, StrideFD };
// Fill dims with data.
for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
- LValue DimsLVal =
- CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP(
- DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)),
- KmpDimTy);
+ LValue DimsLVal = CGF.MakeAddrLValue(
+ CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
// dims.upper = num_iterations;
LValue UpperLVal = CGF.EmitLValueForField(
DimsLVal, *std::next(RD->field_begin(), UpperFD));
@@ -9480,16 +10214,16 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
getThreadID(CGF, D.getBeginLoc()),
llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder
- .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy))
- .getPointer(),
+ CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
CGM.VoidPtrTy)};
- llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
+ llvm::FunctionCallee RTLFn =
+ createRuntimeFunction(OMPRTL__kmpc_doacross_init);
CGF.EmitRuntimeCall(RTLFn, Args);
llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
- llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
+ llvm::FunctionCallee FiniRTLFn =
+ createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
}
@@ -9508,20 +10242,14 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
llvm::Value *CntVal = CGF.EmitScalarConversion(
CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
CounterVal->getExprLoc());
- CGF.EmitStoreOfScalar(
- CntVal,
- CGF.Builder.CreateConstArrayGEP(
- CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)),
- /*Volatile=*/false, Int64Ty);
+ CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
+ /*Volatile=*/false, Int64Ty);
}
llvm::Value *Args[] = {
emitUpdateLocation(CGF, C->getBeginLoc()),
getThreadID(CGF, C->getBeginLoc()),
- CGF.Builder
- .CreateConstArrayGEP(CntAddr, 0,
- CGM.getContext().getTypeSizeInChars(Int64Ty))
- .getPointer()};
- llvm::Value *RTLFn;
+ CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
+ llvm::FunctionCallee RTLFn;
if (C->getDependencyKind() == OMPC_DEPEND_source) {
RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
} else {
@@ -9532,12 +10260,12 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
}
void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
- llvm::Value *Callee,
+ llvm::FunctionCallee Callee,
ArrayRef<llvm::Value *> Args) const {
assert(Loc.isValid() && "Outlined function call location must be valid.");
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
- if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
+ if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
if (Fn->doesNotThrow()) {
CGF.EmitNounwindRuntimeCall(Fn, Args);
return;
@@ -9547,7 +10275,7 @@ void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
}
void CGOpenMPRuntime::emitOutlinedFunctionCall(
- CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
+ CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
ArrayRef<llvm::Value *> Args) const {
emitCall(CGF, Loc, OutlinedFn, Args);
}
@@ -9558,24 +10286,99 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
return CGF.GetAddrOfLocalVar(NativeParam);
}
+namespace {
+/// Cleanup action for allocate support.
+class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
+public:
+ static const int CleanupArgs = 3;
+
+private:
+ llvm::FunctionCallee RTLFn;
+ llvm::Value *Args[CleanupArgs];
+
+public:
+ OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
+ ArrayRef<llvm::Value *> CallArgs)
+ : RTLFn(RTLFn) {
+ assert(CallArgs.size() == CleanupArgs &&
+ "Size of arguments does not match.");
+ std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
+ }
+ void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+ if (!CGF.HaveInsertPoint())
+ return;
+ CGF.EmitRuntimeCall(RTLFn, Args);
+ }
+};
+} // namespace
+
Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
- return Address::invalid();
-}
-
-llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
+ if (!VD)
+ return Address::invalid();
+ const VarDecl *CVD = VD->getCanonicalDecl();
+ if (!CVD->hasAttr<OMPAllocateDeclAttr>())
+ return Address::invalid();
+ const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
+ // Use the default allocation.
+ if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
+ !AA->getAllocator())
+ return Address::invalid();
+ llvm::Value *Size;
+ CharUnits Align = CGM.getContext().getDeclAlign(CVD);
+ if (CVD->getType()->isVariablyModifiedType()) {
+ Size = CGF.getTypeSize(CVD->getType());
+ // Align the size: ((size + align - 1) / align) * align
+ Size = CGF.Builder.CreateNUWAdd(
+ Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
+ Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
+ Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
+ } else {
+ CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
+ Size = CGM.getSize(Sz.alignTo(Align));
+ }
+ llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
+ assert(AA->getAllocator() &&
+ "Expected allocator expression for non-default allocator.");
+ llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
+ // According to the standard, the original allocator type is a enum (integer).
+ // Convert to pointer type, if required.
+ if (Allocator->getType()->isIntegerTy())
+ Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
+ else if (Allocator->getType()->isPointerTy())
+ Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
+ CGM.VoidPtrTy);
+ llvm::Value *Args[] = {ThreadID, Size, Allocator};
+
+ llvm::Value *Addr =
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
+ CVD->getName() + ".void.addr");
+ llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
+ Allocator};
+ llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
+
+ CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
+ llvm::makeArrayRef(FiniArgs));
+ Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Addr,
+ CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
+ CVD->getName() + ".addr");
+ return Address(Addr, Align);
+}
+
+llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
llvm_unreachable("Not supported in SIMD-only mode");
}
-llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
+llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
llvm_unreachable("Not supported in SIMD-only mode");
}
-llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
+llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
@@ -9585,7 +10388,7 @@ llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
SourceLocation Loc,
- llvm::Value *OutlinedFn,
+ llvm::Function *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) {
llvm_unreachable("Not supported in SIMD-only mode");
@@ -9716,7 +10519,7 @@ void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
const OMPExecutableDirective &D,
- llvm::Value *TaskFunction,
+ llvm::Function *TaskFunction,
QualType SharedsTy, Address Shareds,
const Expr *IfCond,
const OMPTaskDataTy &Data) {
@@ -9725,7 +10528,7 @@ void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
void CGOpenMPSIMDRuntime::emitTaskLoopCall(
CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
- llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
+ llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
const Expr *IfCond, const OMPTaskDataTy &Data) {
llvm_unreachable("Not supported in SIMD-only mode");
}
@@ -9785,9 +10588,10 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
- llvm::Value *OutlinedFn,
+ llvm::Function *OutlinedFn,
llvm::Value *OutlinedFnID,
- const Expr *IfCond, const Expr *Device) {
+ const Expr *IfCond,
+ const Expr *Device) {
llvm_unreachable("Not supported in SIMD-only mode");
}
@@ -9810,7 +10614,7 @@ llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,
- llvm::Value *OutlinedFn,
+ llvm::Function *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars) {
llvm_unreachable("Not supported in SIMD-only mode");
}
@@ -9857,4 +10661,3 @@ CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
const VarDecl *TargetParam) const {
llvm_unreachable("Not supported in SIMD-only mode");
}
-