summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Wennborg <hans@hanshq.net>2018-08-16 09:35:01 +0000
committerHans Wennborg <hans@hanshq.net>2018-08-16 09:35:01 +0000
commitca31943738e28c39acff8db9368be70e4fec64ab (patch)
tree584467c757e903df2f630558588b3a5deaf4ce82
parentbdb1c367582dd7ce7cb80c234c9f4e7ea662b14b (diff)
Merging r339603:
------------------------------------------------------------------------ r339603 | abataev | 2018-08-13 21:04:24 +0200 (Mon, 13 Aug 2018) | 4 lines [OPENMP] Fix emission of the loop doacross constructs. The number of loops associated with the OpenMP loop constructs should not be considered as the number loops to collapse. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_70@339851 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/clang/AST/OpenMPClause.h96
-rw-r--r--lib/AST/OpenMPClause.cpp127
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp92
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h8
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp33
-rw-r--r--lib/Sema/SemaOpenMP.cpp269
-rw-r--r--lib/Serialization/ASTReaderStmt.cpp18
-rw-r--r--lib/Serialization/ASTWriterStmt.cpp9
-rw-r--r--test/OpenMP/ordered_doacross_codegen.c28
-rw-r--r--test/OpenMP/ordered_doacross_codegen.cpp107
-rw-r--r--test/OpenMP/parallel_for_simd_ast_print.cpp2
11 files changed, 571 insertions, 218 deletions
diff --git a/include/clang/AST/OpenMPClause.h b/include/clang/AST/OpenMPClause.h
index a28609f8cd..419dbe52fa 100644
--- a/include/clang/AST/OpenMPClause.h
+++ b/include/clang/AST/OpenMPClause.h
@@ -922,8 +922,11 @@ public:
/// \endcode
/// In this example directive '#pragma omp for' has 'ordered' clause with
/// parameter 2.
-class OMPOrderedClause : public OMPClause {
+class OMPOrderedClause final
+ : public OMPClause,
+ private llvm::TrailingObjects<OMPOrderedClause, Expr *> {
friend class OMPClauseReader;
+ friend TrailingObjects;
/// Location of '('.
SourceLocation LParenLoc;
@@ -931,6 +934,26 @@ class OMPOrderedClause : public OMPClause {
/// Number of for-loops.
Stmt *NumForLoops = nullptr;
+ /// Real number of loops.
+ unsigned NumberOfLoops = 0;
+
+ /// Build 'ordered' clause.
+ ///
+ /// \param Num Expression, possibly associated with this clause.
+ /// \param NumLoops Number of loops, associated with this clause.
+ /// \param StartLoc Starting location of the clause.
+ /// \param LParenLoc Location of '('.
+ /// \param EndLoc Ending location of the clause.
+ OMPOrderedClause(Expr *Num, unsigned NumLoops, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation EndLoc)
+ : OMPClause(OMPC_ordered, StartLoc, EndLoc), LParenLoc(LParenLoc),
+ NumForLoops(Num), NumberOfLoops(NumLoops) {}
+
+ /// Build an empty clause.
+ explicit OMPOrderedClause(unsigned NumLoops)
+ : OMPClause(OMPC_ordered, SourceLocation(), SourceLocation()),
+ NumberOfLoops(NumLoops) {}
+
/// Set the number of associated for-loops.
void setNumForLoops(Expr *Num) { NumForLoops = Num; }
@@ -938,17 +961,17 @@ public:
/// Build 'ordered' clause.
///
/// \param Num Expression, possibly associated with this clause.
+ /// \param NumLoops Number of loops, associated with this clause.
/// \param StartLoc Starting location of the clause.
/// \param LParenLoc Location of '('.
/// \param EndLoc Ending location of the clause.
- OMPOrderedClause(Expr *Num, SourceLocation StartLoc,
- SourceLocation LParenLoc, SourceLocation EndLoc)
- : OMPClause(OMPC_ordered, StartLoc, EndLoc), LParenLoc(LParenLoc),
- NumForLoops(Num) {}
+ static OMPOrderedClause *Create(const ASTContext &C, Expr *Num,
+ unsigned NumLoops, SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc);
/// Build an empty clause.
- explicit OMPOrderedClause()
- : OMPClause(OMPC_ordered, SourceLocation(), SourceLocation()) {}
+ static OMPOrderedClause* CreateEmpty(const ASTContext &C, unsigned NumLoops);
/// Sets the location of '('.
void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
@@ -959,6 +982,17 @@ public:
/// Return the number of associated for-loops.
Expr *getNumForLoops() const { return cast_or_null<Expr>(NumForLoops); }
+ /// Set number of iterations for the specified loop.
+ void setLoopNumIterations(unsigned NumLoop, Expr *NumIterations);
+ /// Get number of iterations for all the loops.
+ ArrayRef<Expr *> getLoopNumIterations() const;
+
+ /// Set loop counter for the specified loop.
+ void setLoopCounter(unsigned NumLoop, Expr *Counter);
+ /// Get loops counter for the specified loop.
+ Expr *getLoopCunter(unsigned NumLoop);
+ const Expr *getLoopCunter(unsigned NumLoop) const;
+
child_range children() { return child_range(&NumForLoops, &NumForLoops + 1); }
static bool classof(const OMPClause *T) {
@@ -3087,24 +3121,32 @@ class OMPDependClause final
/// Colon location.
SourceLocation ColonLoc;
+ /// Number of loops, associated with the depend clause.
+ unsigned NumLoops = 0;
+
/// Build clause with number of variables \a N.
///
/// \param StartLoc Starting location of the clause.
/// \param LParenLoc Location of '('.
/// \param EndLoc Ending location of the clause.
/// \param N Number of the variables in the clause.
+ /// \param NumLoops Number of loops that is associated with this depend
+ /// clause.
OMPDependClause(SourceLocation StartLoc, SourceLocation LParenLoc,
- SourceLocation EndLoc, unsigned N)
+ SourceLocation EndLoc, unsigned N, unsigned NumLoops)
: OMPVarListClause<OMPDependClause>(OMPC_depend, StartLoc, LParenLoc,
- EndLoc, N) {}
+ EndLoc, N), NumLoops(NumLoops) {}
/// Build an empty clause.
///
/// \param N Number of variables.
- explicit OMPDependClause(unsigned N)
+ /// \param NumLoops Number of loops that is associated with this depend
+ /// clause.
+ explicit OMPDependClause(unsigned N, unsigned NumLoops)
: OMPVarListClause<OMPDependClause>(OMPC_depend, SourceLocation(),
SourceLocation(), SourceLocation(),
- N) {}
+ N),
+ NumLoops(NumLoops) {}
/// Set dependency kind.
void setDependencyKind(OpenMPDependClauseKind K) { DepKind = K; }
@@ -3126,16 +3168,23 @@ public:
/// \param DepLoc Location of the dependency type.
/// \param ColonLoc Colon location.
/// \param VL List of references to the variables.
- static OMPDependClause *
- Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
- SourceLocation EndLoc, OpenMPDependClauseKind DepKind,
- SourceLocation DepLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VL);
+ /// \param NumLoops Number of loops that is associated with this depend
+ /// clause.
+ static OMPDependClause *Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc,
+ OpenMPDependClauseKind DepKind,
+ SourceLocation DepLoc, SourceLocation ColonLoc,
+ ArrayRef<Expr *> VL, unsigned NumLoops);
/// Creates an empty clause with \a N variables.
///
/// \param C AST context.
/// \param N The number of variables.
- static OMPDependClause *CreateEmpty(const ASTContext &C, unsigned N);
+ /// \param NumLoops Number of loops that is associated with this depend
+ /// clause.
+ static OMPDependClause *CreateEmpty(const ASTContext &C, unsigned N,
+ unsigned NumLoops);
/// Get dependency type.
OpenMPDependClauseKind getDependencyKind() const { return DepKind; }
@@ -3146,15 +3195,16 @@ public:
/// Get colon location.
SourceLocation getColonLoc() const { return ColonLoc; }
- /// Set the loop counter value for the depend clauses with 'sink|source' kind
- /// of dependency. Required for codegen.
- void setCounterValue(Expr *V);
+ /// Get number of loops associated with the clause.
+ unsigned getNumLoops() const { return NumLoops; }
- /// Get the loop counter value.
- Expr *getCounterValue();
+ /// Set the loop data for the depend clauses with 'sink|source' kind of
+ /// dependency.
+ void setLoopData(unsigned NumLoop, Expr *Cnt);
- /// Get the loop counter value.
- const Expr *getCounterValue() const;
+ /// Get the loop data.
+ Expr *getLoopData(unsigned NumLoop);
+ const Expr *getLoopData(unsigned NumLoop) const;
child_range children() {
return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
diff --git a/lib/AST/OpenMPClause.cpp b/lib/AST/OpenMPClause.cpp
index 50729264bf..0b8aa883e1 100644
--- a/lib/AST/OpenMPClause.cpp
+++ b/lib/AST/OpenMPClause.cpp
@@ -181,6 +181,57 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C)
return nullptr;
}
+OMPOrderedClause *OMPOrderedClause::Create(const ASTContext &C, Expr *Num,
+ unsigned NumLoops,
+ SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc) {
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(2 * NumLoops));
+ auto *Clause =
+ new (Mem) OMPOrderedClause(Num, NumLoops, StartLoc, LParenLoc, EndLoc);
+ for (unsigned I = 0; I < NumLoops; ++I) {
+ Clause->setLoopNumIterations(I, nullptr);
+ Clause->setLoopCounter(I, nullptr);
+ }
+ return Clause;
+}
+
+OMPOrderedClause *OMPOrderedClause::CreateEmpty(const ASTContext &C,
+ unsigned NumLoops) {
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(2 * NumLoops));
+ auto *Clause = new (Mem) OMPOrderedClause(NumLoops);
+ for (unsigned I = 0; I < NumLoops; ++I) {
+ Clause->setLoopNumIterations(I, nullptr);
+ Clause->setLoopCounter(I, nullptr);
+ }
+ return Clause;
+}
+
+void OMPOrderedClause::setLoopNumIterations(unsigned NumLoop,
+ Expr *NumIterations) {
+ assert(NumLoop < NumberOfLoops && "out of loops number.");
+ getTrailingObjects<Expr *>()[NumLoop] = NumIterations;
+}
+
+ArrayRef<Expr *> OMPOrderedClause::getLoopNumIterations() const {
+ return llvm::makeArrayRef(getTrailingObjects<Expr *>(), NumberOfLoops);
+}
+
+void OMPOrderedClause::setLoopCounter(unsigned NumLoop, Expr *Counter) {
+ assert(NumLoop < NumberOfLoops && "out of loops number.");
+ getTrailingObjects<Expr *>()[NumberOfLoops + NumLoop] = Counter;
+}
+
+Expr *OMPOrderedClause::getLoopCunter(unsigned NumLoop) {
+ assert(NumLoop < NumberOfLoops && "out of loops number.");
+ return getTrailingObjects<Expr *>()[NumberOfLoops + NumLoop];
+}
+
+const Expr *OMPOrderedClause::getLoopCunter(unsigned NumLoop) const {
+ assert(NumLoop < NumberOfLoops && "out of loops number.");
+ return getTrailingObjects<Expr *>()[NumberOfLoops + NumLoop];
+}
+
void OMPPrivateClause::setPrivateCopies(ArrayRef<Expr *> VL) {
assert(VL.size() == varlist_size() &&
"Number of private copies is not the same as the preallocated buffer");
@@ -653,44 +704,58 @@ OMPFlushClause *OMPFlushClause::CreateEmpty(const ASTContext &C, unsigned N) {
return new (Mem) OMPFlushClause(N);
}
-OMPDependClause *OMPDependClause::Create(
- const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
- SourceLocation EndLoc, OpenMPDependClauseKind DepKind,
- SourceLocation DepLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VL) {
- void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size() + 1));
- OMPDependClause *Clause =
- new (Mem) OMPDependClause(StartLoc, LParenLoc, EndLoc, VL.size());
+OMPDependClause *
+OMPDependClause::Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation EndLoc,
+ OpenMPDependClauseKind DepKind, SourceLocation DepLoc,
+ SourceLocation ColonLoc, ArrayRef<Expr *> VL,
+ unsigned NumLoops) {
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size() + NumLoops));
+ OMPDependClause *Clause = new (Mem)
+ OMPDependClause(StartLoc, LParenLoc, EndLoc, VL.size(), NumLoops);
Clause->setVarRefs(VL);
Clause->setDependencyKind(DepKind);
Clause->setDependencyLoc(DepLoc);
Clause->setColonLoc(ColonLoc);
- Clause->setCounterValue(nullptr);
+ for (unsigned I = 0 ; I < NumLoops; ++I)
+ Clause->setLoopData(I, nullptr);
return Clause;
}
-OMPDependClause *OMPDependClause::CreateEmpty(const ASTContext &C, unsigned N) {
- void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N + 1));
- return new (Mem) OMPDependClause(N);
-}
-
-void OMPDependClause::setCounterValue(Expr *V) {
- assert(getDependencyKind() == OMPC_DEPEND_sink ||
- getDependencyKind() == OMPC_DEPEND_source || V == nullptr);
- *getVarRefs().end() = V;
-}
-
-const Expr *OMPDependClause::getCounterValue() const {
- auto *V = *getVarRefs().end();
- assert(getDependencyKind() == OMPC_DEPEND_sink ||
- getDependencyKind() == OMPC_DEPEND_source || V == nullptr);
- return V;
-}
-
-Expr *OMPDependClause::getCounterValue() {
- auto *V = *getVarRefs().end();
- assert(getDependencyKind() == OMPC_DEPEND_sink ||
- getDependencyKind() == OMPC_DEPEND_source || V == nullptr);
- return V;
+OMPDependClause *OMPDependClause::CreateEmpty(const ASTContext &C, unsigned N,
+ unsigned NumLoops) {
+ void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N + NumLoops));
+ return new (Mem) OMPDependClause(N, NumLoops);
+}
+
+void OMPDependClause::setLoopData(unsigned NumLoop, Expr *Cnt) {
+ assert((getDependencyKind() == OMPC_DEPEND_sink ||
+ getDependencyKind() == OMPC_DEPEND_source) &&
+ NumLoop < NumLoops &&
+ "Expected sink or source depend + loop index must be less number of "
+ "loops.");
+ auto It = std::next(getVarRefs().end(), NumLoop);
+ *It = Cnt;
+}
+
+Expr *OMPDependClause::getLoopData(unsigned NumLoop) {
+ assert((getDependencyKind() == OMPC_DEPEND_sink ||
+ getDependencyKind() == OMPC_DEPEND_source) &&
+ NumLoop < NumLoops &&
+ "Expected sink or source depend + loop index must be less number of "
+ "loops.");
+ auto It = std::next(getVarRefs().end(), NumLoop);
+ return *It;
+}
+
+const Expr *OMPDependClause::getLoopData(unsigned NumLoop) const {
+ assert((getDependencyKind() == OMPC_DEPEND_sink ||
+ getDependencyKind() == OMPC_DEPEND_source) &&
+ NumLoop < NumLoops &&
+ "Expected sink or source depend + loop index must be less number of "
+ "loops.");
+ auto It = std::next(getVarRefs().end(), NumLoop);
+ return *It;
}
unsigned OMPClauseMappableExprCommon::getComponentsTotalNumber(
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index fa850155df..b68942f2fa 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8782,7 +8782,8 @@ public:
} // namespace
void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D) {
+ const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations) {
if (!CGF.HaveInsertPoint())
return;
@@ -8805,32 +8806,45 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
} else {
RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
}
+ llvm::APInt Size(/*numBits=*/32, NumIterations.size());
+ QualType ArrayTy =
+ C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
- Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
- CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
+ Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
+ CGF.EmitNullInitialization(DimsAddr, ArrayTy);
enum { LowerFD = 0, UpperFD, StrideFD };
// Fill dims with data.
- LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
- // dims.upper = num_iterations;
- LValue UpperLVal =
- CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
- llvm::Value *NumIterVal = CGF.EmitScalarConversion(
- CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
- Int64Ty, D.getNumIterations()->getExprLoc());
- CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
- // dims.stride = 1;
- LValue StrideLVal =
- CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
- CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
- StrideLVal);
+ for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
+ LValue DimsLVal =
+ CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP(
+ DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)),
+ KmpDimTy);
+ // dims.upper = num_iterations;
+ LValue UpperLVal = CGF.EmitLValueForField(
+ DimsLVal, *std::next(RD->field_begin(), UpperFD));
+ llvm::Value *NumIterVal =
+ CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
+ D.getNumIterations()->getType(), Int64Ty,
+ D.getNumIterations()->getExprLoc());
+ CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
+ // dims.stride = 1;
+ LValue StrideLVal = CGF.EmitLValueForField(
+ DimsLVal, *std::next(RD->field_begin(), StrideFD));
+ CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
+ StrideLVal);
+ }
// Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
// kmp_int32 num_dims, struct kmp_dim * dims);
- llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
- getThreadID(CGF, D.getLocStart()),
- llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- DimsAddr.getPointer(), CGM.VoidPtrTy)};
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, D.getLocStart()),
+ getThreadID(CGF, D.getLocStart()),
+ llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder
+ .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy))
+ .getPointer(),
+ CGM.VoidPtrTy)};
llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
CGF.EmitRuntimeCall(RTLFn, Args);
@@ -8845,16 +8859,29 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
const OMPDependClause *C) {
QualType Int64Ty =
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
- const Expr *CounterVal = C->getCounterValue();
- assert(CounterVal);
- llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
- CounterVal->getType(), Int64Ty,
- CounterVal->getExprLoc());
- Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
- CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
- llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
- getThreadID(CGF, C->getLocStart()),
- CntAddr.getPointer()};
+ llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
+ QualType ArrayTy = CGM.getContext().getConstantArrayType(
+ Int64Ty, Size, ArrayType::Normal, 0);
+ Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
+ for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
+ const Expr *CounterVal = C->getLoopData(I);
+ assert(CounterVal);
+ llvm::Value *CntVal = CGF.EmitScalarConversion(
+ CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
+ CounterVal->getExprLoc());
+ CGF.EmitStoreOfScalar(
+ CntVal,
+ CGF.Builder.CreateConstArrayGEP(
+ CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)),
+ /*Volatile=*/false, Int64Ty);
+ }
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, C->getLocStart()),
+ getThreadID(CGF, C->getLocStart()),
+ CGF.Builder
+ .CreateConstArrayGEP(CntAddr, 0,
+ CGM.getContext().getTypeSizeInChars(Int64Ty))
+ .getPointer()};
llvm::Value *RTLFn;
if (C->getDependencyKind() == OMPC_DEPEND_source) {
RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
@@ -9169,7 +9196,8 @@ void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
}
void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D) {
+ const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations) {
llvm_unreachable("Not supported in SIMD-only mode");
}
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 01ff0c20fd..94731b64eb 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -1465,8 +1465,8 @@ public:
/// Emit initialization for doacross loop nesting support.
/// \param D Loop-based construct used in doacross nesting construct.
- virtual void emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D);
+ virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations);
/// Emit code for doacross ordered directive with 'depend' clause.
/// \param C 'depend' clause with 'sink|source' dependency kind.
@@ -2051,8 +2051,8 @@ public:
/// Emit initialization for doacross loop nesting support.
/// \param D Loop-based construct used in doacross nesting construct.
- void emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D) override;
+ void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations) override;
/// Emit code for doacross ordered directive with 'depend' clause.
/// \param C 'depend' clause with 'sink|source' dependency kind.
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index 0d343f84c7..03102f04aa 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1509,6 +1509,23 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters(
}
++I;
}
+ // Privatize extra loop counters used in loops for ordered(n) clauses.
+ for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
+ if (!C->getNumForLoops())
+ continue;
+ for (unsigned I = S.getCollapsedNumber(),
+ E = C->getLoopNumIterations().size();
+ I < E; ++I) {
+ const auto *DRE = cast<DeclRefExpr>(C->getLoopCunter(I));
+ const auto *VD = cast<VarDecl>(DRE->getDecl());
+ // Override only those variables that are really emitted already.
+ if (LocalDeclMap.count(VD)) {
+ (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
+ return CreateMemTemp(DRE->getType(), VD->getName());
+ });
+ }
+ }
+ }
}
static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
@@ -2244,7 +2261,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
bool Ordered = false;
if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
if (OrderedClause->getNumForLoops())
- RT.emitDoacrossInit(*this, S);
+ RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
else
Ordered = true;
}
@@ -4943,6 +4960,20 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
CGF.EmitVarDecl(*VD);
}
}
+ for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
+ if (!C->getNumForLoops())
+ continue;
+ for (unsigned I = LD->getCollapsedNumber(),
+ E = C->getLoopNumIterations().size();
+ I < E; ++I) {
+ if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
+ cast<DeclRefExpr>(C->getLoopCunter(I))->getDecl())) {
+ // Emit only those that were not explicitly referenced in clauses.
+ if (!CGF.LocalDeclMap.count(VD))
+ CGF.EmitVarDecl(*VD);
+ }
+ }
+ }
}
CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
}
diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp
index e1a4c420d4..56152abee1 100644
--- a/lib/Sema/SemaOpenMP.cpp
+++ b/lib/Sema/SemaOpenMP.cpp
@@ -73,6 +73,8 @@ public:
};
using OperatorOffsetTy =
llvm::SmallVector<std::pair<Expr *, OverloadedOperatorKind>, 4>;
+ using DoacrossDependMapTy =
+ llvm::DenseMap<OMPDependClause *, OperatorOffsetTy>;
private:
struct DSAInfo {
@@ -97,8 +99,6 @@ private:
llvm::DenseMap<const ValueDecl *, MappedExprComponentTy>;
using CriticalsWithHintsTy =
llvm::StringMap<std::pair<const OMPCriticalDirective *, llvm::APSInt>>;
- using DoacrossDependMapTy =
- llvm::DenseMap<OMPDependClause *, OperatorOffsetTy>;
struct ReductionData {
using BOKPtrType = llvm::PointerEmbeddedInt<BinaryOperatorKind, 16>;
SourceRange ReductionRange;
@@ -137,7 +137,7 @@ private:
/// first argument (Expr *) contains optional argument of the
/// 'ordered' clause, the second one is true if the regions has 'ordered'
/// clause, false otherwise.
- llvm::PointerIntPair<const Expr *, 1, bool> OrderedRegion;
+ llvm::Optional<std::pair<const Expr *, OMPOrderedClause *>> OrderedRegion;
bool NowaitRegion = false;
bool CancelRegion = false;
unsigned AssociatedLoops = 1;
@@ -398,23 +398,42 @@ public:
}
/// Marks current region as ordered (it has an 'ordered' clause).
- void setOrderedRegion(bool IsOrdered, const Expr *Param) {
+ void setOrderedRegion(bool IsOrdered, const Expr *Param,
+ OMPOrderedClause *Clause) {
assert(!isStackEmpty());
- Stack.back().first.back().OrderedRegion.setInt(IsOrdered);
- Stack.back().first.back().OrderedRegion.setPointer(Param);
+ if (IsOrdered)
+ Stack.back().first.back().OrderedRegion.emplace(Param, Clause);
+ else
+ Stack.back().first.back().OrderedRegion.reset();
+ }
+ /// Returns true, if region is ordered (has associated 'ordered' clause),
+ /// false - otherwise.
+ bool isOrderedRegion() const {
+ if (isStackEmpty())
+ return false;
+ return Stack.back().first.rbegin()->OrderedRegion.hasValue();
+ }
+ /// Returns optional parameter for the ordered region.
+ std::pair<const Expr *, OMPOrderedClause *> getOrderedRegionParam() const {
+ if (isStackEmpty() ||
+ !Stack.back().first.rbegin()->OrderedRegion.hasValue())
+ return std::make_pair(nullptr, nullptr);
+ return Stack.back().first.rbegin()->OrderedRegion.getValue();
}
/// Returns true, if parent region is ordered (has associated
/// 'ordered' clause), false - otherwise.
bool isParentOrderedRegion() const {
if (isStackEmpty() || Stack.back().first.size() == 1)
return false;
- return std::next(Stack.back().first.rbegin())->OrderedRegion.getInt();
+ return std::next(Stack.back().first.rbegin())->OrderedRegion.hasValue();
}
/// Returns optional parameter for the ordered region.
- const Expr *getParentOrderedRegionParam() const {
- if (isStackEmpty() || Stack.back().first.size() == 1)
- return nullptr;
- return std::next(Stack.back().first.rbegin())->OrderedRegion.getPointer();
+ std::pair<const Expr *, OMPOrderedClause *>
+ getParentOrderedRegionParam() const {
+ if (isStackEmpty() || Stack.back().first.size() == 1 ||
+ !std::next(Stack.back().first.rbegin())->OrderedRegion.hasValue())
+ return std::make_pair(nullptr, nullptr);
+ return std::next(Stack.back().first.rbegin())->OrderedRegion.getValue();
}
/// Marks current region as nowait (it has a 'nowait' clause).
void setNowaitRegion(bool IsNowait = true) {
@@ -3736,6 +3755,13 @@ public:
Expr *buildCounterInit() const;
/// Build step of the counter be used for codegen.
Expr *buildCounterStep() const;
+ /// Build loop data with counter value for depend clauses in ordered
+ /// directives.
+ Expr *
+ buildOrderedLoopData(Scope *S, Expr *Counter,
+ llvm::MapVector<const Expr *, DeclRefExpr *> &Captures,
+ SourceLocation Loc, Expr *Inc = nullptr,
+ OverloadedOperatorKind OOK = OO_Amp);
/// Return true if any expression is dependent.
bool dependent() const;
@@ -3900,7 +3926,12 @@ bool OpenMPIterationSpaceChecker::checkAndSetInit(Stmt *S, bool EmitDiags) {
SemaRef.Diag(S->getLocStart(),
diag::ext_omp_loop_not_canonical_init)
<< S->getSourceRange();
- return setLCDeclAndLB(Var, nullptr, Var->getInit());
+ return setLCDeclAndLB(
+ Var,
+ buildDeclRefExpr(SemaRef, Var,
+ Var->getType().getNonReferenceType(),
+ DS->getLocStart()),
+ Var->getInit());
}
}
}
@@ -4262,7 +4293,8 @@ Expr *OpenMPIterationSpaceChecker::buildPreCond(
/// Build reference expression to the counter be used for codegen.
DeclRefExpr *OpenMPIterationSpaceChecker::buildCounterVar(
- llvm::MapVector<const Expr *, DeclRefExpr *> &Captures, DSAStackTy &DSA) const {
+ llvm::MapVector<const Expr *, DeclRefExpr *> &Captures,
+ DSAStackTy &DSA) const {
auto *VD = dyn_cast<VarDecl>(LCDecl);
if (!VD) {
VD = SemaRef.isOpenMPCapturedDecl(LCDecl);
@@ -4302,6 +4334,63 @@ Expr *OpenMPIterationSpaceChecker::buildCounterInit() const { return LB; }
/// Build step of the counter be used for codegen.
Expr *OpenMPIterationSpaceChecker::buildCounterStep() const { return Step; }
+Expr *OpenMPIterationSpaceChecker::buildOrderedLoopData(
+ Scope *S, Expr *Counter,
+ llvm::MapVector<const Expr *, DeclRefExpr *> &Captures, SourceLocation Loc,
+ Expr *Inc, OverloadedOperatorKind OOK) {
+ Expr *Cnt = SemaRef.DefaultLvalueConversion(Counter).get();
+ if (!Cnt)
+ return nullptr;
+ if (Inc) {
+ assert((OOK == OO_Plus || OOK == OO_Minus) &&
+ "Expected only + or - operations for depend clauses.");
+ BinaryOperatorKind BOK = (OOK == OO_Plus) ? BO_Add : BO_Sub;
+ Cnt = SemaRef.BuildBinOp(S, Loc, BOK, Cnt, Inc).get();
+ if (!Cnt)
+ return nullptr;
+ }
+ ExprResult Diff;
+ QualType VarType = LCDecl->getType().getNonReferenceType();
+ if (VarType->isIntegerType() || VarType->isPointerType() ||
+ SemaRef.getLangOpts().CPlusPlus) {
+ // Upper - Lower
+ Expr *Upper =
+ TestIsLessOp ? Cnt : tryBuildCapture(SemaRef, UB, Captures).get();
+ Expr *Lower =
+ TestIsLessOp ? tryBuildCapture(SemaRef, LB, Captures).get() : Cnt;
+ if (!Upper || !Lower)
+ return nullptr;
+
+ Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Sub, Upper, Lower);
+
+ if (!Diff.isUsable() && VarType->getAsCXXRecordDecl()) {
+ // BuildBinOp already emitted error, this one is to point user to upper
+ // and lower bound, and to tell what is passed to 'operator-'.
+ SemaRef.Diag(Upper->getLocStart(), diag::err_omp_loop_diff_cxx)
+ << Upper->getSourceRange() << Lower->getSourceRange();
+ return nullptr;
+ }
+ }
+
+ if (!Diff.isUsable())
+ return nullptr;
+
+ // Parentheses (for dumping/debugging purposes only).
+ Diff = SemaRef.ActOnParenExpr(DefaultLoc, DefaultLoc, Diff.get());
+ if (!Diff.isUsable())
+ return nullptr;
+
+ ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures);
+ if (!NewStep.isUsable())
+ return nullptr;
+ // (Upper - Lower) / Step
+ Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Div, Diff.get(), NewStep.get());
+ if (!Diff.isUsable())
+ return nullptr;
+
+ return Diff.get();
+}
+
/// Iteration space of a single for loop.
struct LoopIterationSpace final {
/// Condition of the loop.
@@ -4361,7 +4450,8 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) {
static bool checkOpenMPIterationSpace(
OpenMPDirectiveKind DKind, Stmt *S, Sema &SemaRef, DSAStackTy &DSA,
unsigned CurrentNestedLoopCount, unsigned NestedLoopCount,
- Expr *CollapseLoopCountExpr, Expr *OrderedLoopCountExpr,
+ unsigned TotalNestedLoopCount, Expr *CollapseLoopCountExpr,
+ Expr *OrderedLoopCountExpr,
Sema::VarsWithInheritedDSAType &VarsWithImplicitDSA,
LoopIterationSpace &ResultIterSpace,
llvm::MapVector<const Expr *, DeclRefExpr *> &Captures) {
@@ -4371,9 +4461,9 @@ static bool checkOpenMPIterationSpace(
if (!For) {
SemaRef.Diag(S->getLocStart(), diag::err_omp_not_for)
<< (CollapseLoopCountExpr != nullptr || OrderedLoopCountExpr != nullptr)
- << getOpenMPDirectiveName(DKind) << NestedLoopCount
+ << getOpenMPDirectiveName(DKind) << TotalNestedLoopCount
<< (CurrentNestedLoopCount > 0) << CurrentNestedLoopCount;
- if (NestedLoopCount > 1) {
+ if (TotalNestedLoopCount > 1) {
if (CollapseLoopCountExpr && OrderedLoopCountExpr)
SemaRef.Diag(DSA.getConstructLoc(),
diag::note_omp_collapse_ordered_expr)
@@ -4506,6 +4596,41 @@ static bool checkOpenMPIterationSpace(
ResultIterSpace.PrivateCounterVar == nullptr ||
ResultIterSpace.CounterInit == nullptr ||
ResultIterSpace.CounterStep == nullptr);
+ if (!HasErrors && DSA.isOrderedRegion()) {
+ if (DSA.getOrderedRegionParam().second->getNumForLoops()) {
+ if (CurrentNestedLoopCount <
+ DSA.getOrderedRegionParam().second->getLoopNumIterations().size()) {
+ DSA.getOrderedRegionParam().second->setLoopNumIterations(
+ CurrentNestedLoopCount, ResultIterSpace.NumIterations);
+ DSA.getOrderedRegionParam().second->setLoopCounter(
+ CurrentNestedLoopCount, ResultIterSpace.CounterVar);
+ }
+ }
+ for (auto &Pair : DSA.getDoacrossDependClauses()) {
+ if (CurrentNestedLoopCount >= Pair.first->getNumLoops()) {
+ // Erroneous case - clause has some problems.
+ continue;
+ }
+ if (Pair.first->getDependencyKind() == OMPC_DEPEND_sink &&
+ Pair.second.size() <= CurrentNestedLoopCount) {
+ // Erroneous case - clause has some problems.
+ Pair.first->setLoopData(CurrentNestedLoopCount, nullptr);
+ continue;
+ }
+ Expr *CntValue;
+ if (Pair.first->getDependencyKind() == OMPC_DEPEND_source)
+ CntValue = ISC.buildOrderedLoopData(
+ DSA.getCurScope(), ResultIterSpace.CounterVar, Captures,
+ Pair.first->getDependencyLoc());
+ else
+ CntValue = ISC.buildOrderedLoopData(
+ DSA.getCurScope(), ResultIterSpace.CounterVar, Captures,
+ Pair.first->getDependencyLoc(),
+ Pair.second[CurrentNestedLoopCount].first,
+ Pair.second[CurrentNestedLoopCount].second);
+ Pair.first->setLoopData(CurrentNestedLoopCount, CntValue);
+ }
+ }
return HasErrors;
}
@@ -4691,6 +4816,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
if (CollapseLoopCountExpr->EvaluateAsInt(Result, SemaRef.getASTContext()))
NestedLoopCount = Result.getLimitedValue();
}
+ unsigned OrderedLoopCount = 1;
if (OrderedLoopCountExpr) {
// Found 'ordered' clause - calculate collapse number.
llvm::APSInt Result;
@@ -4703,21 +4829,43 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
diag::note_collapse_loop_count)
<< CollapseLoopCountExpr->getSourceRange();
}
- NestedLoopCount = Result.getLimitedValue();
+ OrderedLoopCount = Result.getLimitedValue();
}
}
// This is helper routine for loop directives (e.g., 'for', 'simd',
// 'for simd', etc.).
llvm::MapVector<const Expr *, DeclRefExpr *> Captures;
SmallVector<LoopIterationSpace, 4> IterSpaces;
- IterSpaces.resize(NestedLoopCount);
+ IterSpaces.resize(std::max(OrderedLoopCount, NestedLoopCount));
Stmt *CurStmt = AStmt->IgnoreContainers(/* IgnoreCaptured */ true);
for (unsigned Cnt = 0; Cnt < NestedLoopCount; ++Cnt) {
- if (checkOpenMPIterationSpace(DKind, CurStmt, SemaRef, DSA, Cnt,
- NestedLoopCount, CollapseLoopCountExpr,
- OrderedLoopCountExpr, VarsWithImplicitDSA,
- IterSpaces[Cnt], Captures))
+ if (checkOpenMPIterationSpace(
+ DKind, CurStmt, SemaRef, DSA, Cnt, NestedLoopCount,
+ std::max(OrderedLoopCount, NestedLoopCount), CollapseLoopCountExpr,
+ OrderedLoopCountExpr, VarsWithImplicitDSA, IterSpaces[Cnt],
+ Captures))
+ return 0;
+ // Move on to the next nested for loop, or to the loop body.
+ // OpenMP [2.8.1, simd construct, Restrictions]
+ // All loops associated with the construct must be perfectly nested; that
+ // is, there must be no intervening code nor any OpenMP directive between
+ // any two loops.
+ CurStmt = cast<ForStmt>(CurStmt)->getBody()->IgnoreContainers();
+ }
+ for (unsigned Cnt = NestedLoopCount; Cnt < OrderedLoopCount; ++Cnt) {
+ if (checkOpenMPIterationSpace(
+ DKind, CurStmt, SemaRef, DSA, Cnt, NestedLoopCount,
+ std::max(OrderedLoopCount, NestedLoopCount), CollapseLoopCountExpr,
+ OrderedLoopCountExpr, VarsWithImplicitDSA, IterSpaces[Cnt],
+ Captures))
return 0;
+ if (Cnt > 0 && IterSpaces[Cnt].CounterVar) {
+ // Handle initialization of captured loop iterator variables.
+ auto *DRE = cast<DeclRefExpr>(IterSpaces[Cnt].CounterVar);
+ if (isa<OMPCapturedExprDecl>(DRE->getDecl())) {
+ Captures[DRE] = DRE;
+ }
+ }
// Move on to the next nested for loop, or to the loop body.
// OpenMP [2.8.1, simd construct, Restrictions]
// All loops associated with the construct must be perfectly nested; that
@@ -5104,7 +5252,6 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
Built.Inits.resize(NestedLoopCount);
Built.Updates.resize(NestedLoopCount);
Built.Finals.resize(NestedLoopCount);
- SmallVector<Expr *, 4> LoopMultipliers;
{
ExprResult Div;
// Go from inner nested loop to outer.
@@ -5174,7 +5321,6 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
HasErrors = true;
break;
}
- LoopMultipliers.push_back(Div.get());
}
if (!Update.isUsable() || !Final.isUsable()) {
HasErrors = true;
@@ -5222,55 +5368,6 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
Built.DistCombinedFields.NLB = CombNextLB.get();
Built.DistCombinedFields.NUB = CombNextUB.get();
- Expr *CounterVal = SemaRef.DefaultLvalueConversion(IV.get()).get();
- // Fill data for doacross depend clauses.
- for (const auto &Pair : DSA.getDoacrossDependClauses()) {
- if (Pair.first->getDependencyKind() == OMPC_DEPEND_source) {
- Pair.first->setCounterValue(CounterVal);
- } else {
- if (NestedLoopCount != Pair.second.size() ||
- NestedLoopCount != LoopMultipliers.size() + 1) {
- // Erroneous case - clause has some problems.
- Pair.first->setCounterValue(CounterVal);
- continue;
- }
- assert(Pair.first->getDependencyKind() == OMPC_DEPEND_sink);
- auto I = Pair.second.rbegin();
- auto IS = IterSpaces.rbegin();
- auto ILM = LoopMultipliers.rbegin();
- Expr *UpCounterVal = CounterVal;
- Expr *Multiplier = nullptr;
- for (int Cnt = NestedLoopCount - 1; Cnt >= 0; --Cnt) {
- if (I->first) {
- assert(IS->CounterStep);
- Expr *NormalizedOffset =
- SemaRef
- .BuildBinOp(CurScope, I->first->getExprLoc(), BO_Div,
- I->first, IS->CounterStep)
- .get();
- if (Multiplier) {
- NormalizedOffset =
- SemaRef
- .BuildBinOp(CurScope, I->first->getExprLoc(), BO_Mul,
- NormalizedOffset, Multiplier)
- .get();
- }
- assert(I->second == OO_Plus || I->second == OO_Minus);
- BinaryOperatorKind BOK = (I->second == OO_Plus) ? BO_Add : BO_Sub;
- UpCounterVal = SemaRef
- .BuildBinOp(CurScope, I->first->getExprLoc(), BOK,
- UpCounterVal, NormalizedOffset)
- .get();
- }
- Multiplier = *ILM;
- ++I;
- ++IS;
- ++ILM;
- }
- Pair.first->setCounterValue(UpCounterVal);
- }
- }
-
return NestedLoopCount;
}
@@ -5838,12 +5935,12 @@ StmtResult Sema::ActOnOpenMPOrderedDirective(ArrayRef<OMPClause *> Clauses,
Diag(DependFound->getLocStart(), diag::err_omp_depend_clause_thread_simd)
<< getOpenMPClauseName(TC ? TC->getClauseKind() : SC->getClauseKind());
ErrorFound = true;
- } else if (DependFound && !DSAStack->getParentOrderedRegionParam()) {
+ } else if (DependFound && !DSAStack->getParentOrderedRegionParam().first) {
Diag(DependFound->getLocStart(),
diag::err_omp_ordered_directive_without_param);
ErrorFound = true;
} else if (TC || Clauses.empty()) {
- if (const Expr *Param = DSAStack->getParentOrderedRegionParam()) {
+ if (const Expr *Param = DSAStack->getParentOrderedRegionParam().first) {
SourceLocation ErrLoc = TC ? TC->getLocStart() : StartLoc;
Diag(ErrLoc, diag::err_omp_ordered_directive_with_param)
<< (TC != nullptr);
@@ -8619,9 +8716,11 @@ OMPClause *Sema::ActOnOpenMPOrderedClause(SourceLocation StartLoc,
} else {
NumForLoops = nullptr;
}
- DSAStack->setOrderedRegion(/*IsOrdered=*/true, NumForLoops);
- return new (Context)
- OMPOrderedClause(NumForLoops, StartLoc, LParenLoc, EndLoc);
+ auto *Clause = OMPOrderedClause::Create(
+ Context, NumForLoops, NumForLoops ? DSAStack->getAssociatedLoops() : 0,
+ StartLoc, LParenLoc, EndLoc);
+ DSAStack->setOrderedRegion(/*IsOrdered=*/true, NumForLoops, Clause);
+ return Clause;
}
OMPClause *Sema::ActOnOpenMPSimpleClause(
@@ -11477,8 +11576,9 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
DSAStackTy::OperatorOffsetTy OpsOffs;
llvm::APSInt DepCounter(/*BitWidth=*/32);
llvm::APSInt TotalDepCount(/*BitWidth=*/32);
- if (DepKind == OMPC_DEPEND_sink) {
- if (const Expr *OrderedCountExpr = DSAStack->getParentOrderedRegionParam()) {
+ if (DepKind == OMPC_DEPEND_sink || DepKind == OMPC_DEPEND_source) {
+ if (const Expr *OrderedCountExpr =
+ DSAStack->getParentOrderedRegionParam().first) {
TotalDepCount = OrderedCountExpr->EvaluateKnownConstInt(Context);
TotalDepCount.setIsUnsigned(/*Val=*/true);
}
@@ -11494,7 +11594,7 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
SourceLocation ELoc = RefExpr->getExprLoc();
Expr *SimpleExpr = RefExpr->IgnoreParenCasts();
if (DepKind == OMPC_DEPEND_sink) {
- if (DSAStack->getParentOrderedRegionParam() &&
+ if (DSAStack->getParentOrderedRegionParam().first &&
DepCounter >= TotalDepCount) {
Diag(ELoc, diag::err_omp_depend_sink_unexpected_expr);
continue;
@@ -11560,7 +11660,7 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
continue;
}
if (!CurContext->isDependentContext() &&
- DSAStack->getParentOrderedRegionParam() &&
+ DSAStack->getParentOrderedRegionParam().first &&
DepCounter != DSAStack->isParentLoopControlVariable(D).first) {
const ValueDecl *VD =
DSAStack->getParentLoopControlVariable(DepCounter.getZExtValue());
@@ -11598,7 +11698,7 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
if (!CurContext->isDependentContext() && DepKind == OMPC_DEPEND_sink &&
TotalDepCount > VarList.size() &&
- DSAStack->getParentOrderedRegionParam() &&
+ DSAStack->getParentOrderedRegionParam().first &&
DSAStack->getParentLoopControlVariable(VarList.size() + 1)) {
Diag(EndLoc, diag::err_omp_depend_sink_expected_loop_iteration)
<< 1 << DSAStack->getParentLoopControlVariable(VarList.size() + 1);
@@ -11608,7 +11708,8 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
return nullptr;
auto *C = OMPDependClause::Create(Context, StartLoc, LParenLoc, EndLoc,
- DepKind, DepLoc, ColonLoc, Vars);
+ DepKind, DepLoc, ColonLoc, Vars,
+ TotalDepCount.getZExtValue());
if ((DepKind == OMPC_DEPEND_sink || DepKind == OMPC_DEPEND_source) &&
DSAStack->isParentOrderedRegion())
DSAStack->addDoacrossDependClause(C, OpsOffs);
diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp
index d9d780b25b..570b554986 100644
--- a/lib/Serialization/ASTReaderStmt.cpp
+++ b/lib/Serialization/ASTReaderStmt.cpp
@@ -1856,7 +1856,7 @@ OMPClause *OMPClauseReader::readClause() {
C = new (Context) OMPScheduleClause();
break;
case OMPC_ordered:
- C = new (Context) OMPOrderedClause();
+ C = OMPOrderedClause::CreateEmpty(Context, Reader->Record.readInt());
break;
case OMPC_nowait:
C = new (Context) OMPNowaitClause();
@@ -1927,9 +1927,12 @@ OMPClause *OMPClauseReader::readClause() {
case OMPC_flush:
C = OMPFlushClause::CreateEmpty(Context, Reader->Record.readInt());
break;
- case OMPC_depend:
- C = OMPDependClause::CreateEmpty(Context, Reader->Record.readInt());
+ case OMPC_depend: {
+ unsigned NumVars = Reader->Record.readInt();
+ unsigned NumLoops = Reader->Record.readInt();
+ C = OMPDependClause::CreateEmpty(Context, NumVars, NumLoops);
break;
+ }
case OMPC_device:
C = new (Context) OMPDeviceClause();
break;
@@ -2087,6 +2090,10 @@ void OMPClauseReader::VisitOMPScheduleClause(OMPScheduleClause *C) {
void OMPClauseReader::VisitOMPOrderedClause(OMPOrderedClause *C) {
C->setNumForLoops(Reader->Record.readSubExpr());
+ for (unsigned I = 0, E = C->NumberOfLoops; I < E; ++I)
+ C->setLoopNumIterations(I, Reader->Record.readSubExpr());
+ for (unsigned I = 0, E = C->NumberOfLoops; I < E; ++I)
+ C->setLoopCounter(I, Reader->Record.readSubExpr());
C->setLParenLoc(Reader->ReadSourceLocation());
}
@@ -2395,10 +2402,11 @@ void OMPClauseReader::VisitOMPDependClause(OMPDependClause *C) {
unsigned NumVars = C->varlist_size();
SmallVector<Expr *, 16> Vars;
Vars.reserve(NumVars);
- for (unsigned i = 0; i != NumVars; ++i)
+ for (unsigned I = 0; I != NumVars; ++I)
Vars.push_back(Reader->Record.readSubExpr());
C->setVarRefs(Vars);
- C->setCounterValue(Reader->Record.readSubExpr());
+ for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I)
+ C->setLoopData(I, Reader->Record.readSubExpr());
}
void OMPClauseReader::VisitOMPDeviceClause(OMPDeviceClause *C) {
diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp
index 48c3f79a43..09cb0ddc4d 100644
--- a/lib/Serialization/ASTWriterStmt.cpp
+++ b/lib/Serialization/ASTWriterStmt.cpp
@@ -1898,7 +1898,12 @@ void OMPClauseWriter::VisitOMPScheduleClause(OMPScheduleClause *C) {
}
void OMPClauseWriter::VisitOMPOrderedClause(OMPOrderedClause *C) {
+ Record.push_back(C->getLoopNumIterations().size());
Record.AddStmt(C->getNumForLoops());
+ for (Expr *NumIter : C->getLoopNumIterations())
+ Record.AddStmt(NumIter);
+ for (unsigned I = 0, E = C->getLoopNumIterations().size(); I <E; ++I)
+ Record.AddStmt(C->getLoopCunter(I));
Record.AddSourceLocation(C->getLParenLoc());
}
@@ -2102,13 +2107,15 @@ void OMPClauseWriter::VisitOMPFlushClause(OMPFlushClause *C) {
void OMPClauseWriter::VisitOMPDependClause(OMPDependClause *C) {
Record.push_back(C->varlist_size());
+ Record.push_back(C->getNumLoops());
Record.AddSourceLocation(C->getLParenLoc());
Record.push_back(C->getDependencyKind());
Record.AddSourceLocation(C->getDependencyLoc());
Record.AddSourceLocation(C->getColonLoc());
for (auto *VE : C->varlists())
Record.AddStmt(VE);
- Record.AddStmt(C->getCounterValue());
+ for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I)
+ Record.AddStmt(C->getLoopData(I));
}
void OMPClauseWriter::VisitOMPDeviceClause(OMPDeviceClause *C) {
diff --git a/test/OpenMP/ordered_doacross_codegen.c b/test/OpenMP/ordered_doacross_codegen.c
index c12df2638f..a6cd4fe48b 100644
--- a/test/OpenMP/ordered_doacross_codegen.c
+++ b/test/OpenMP/ordered_doacross_codegen.c
@@ -19,17 +19,19 @@ void foo();
// CHECK-LABEL: @main()
int main() {
int i;
-// CHECK: [[DIMS:%.+]] = alloca [[KMP_DIM]],
+// CHECK: [[DIMS:%.+]] = alloca [1 x [[KMP_DIM]]],
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK: icmp
// CHECK-NEXT: br i1 %
-// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8*
+// CHECK: [[CAST:%.+]] = bitcast [1 x [[KMP_DIM]]]* [[DIMS]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 24, i1 false)
-// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 1
+// CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], [1 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
+// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIM]], i32 0, i32 1
// CHECK: store i64 %{{.+}}, i64* %
-// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 2
+// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIM]], i32 0, i32 2
// CHECK: store i64 1, i64* %
-// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8*
+// CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], [1 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
+// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8*
// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]])
// CHECK: call void @__kmpc_for_static_init_4(
#pragma omp for ordered(1)
@@ -37,18 +39,26 @@ int main() {
a[i] = b[i] + 1;
foo();
// CHECK: call void [[FOO:.+]](
-// CHECK: load i32, i32* [[CNT:%.+]],
+// CHECK: load i32, i32* [[I:%.+]],
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
+// CHECK-NEXT: sdiv i32 %{{.+}}, 1
// CHECK-NEXT: sext i32 %{{.+}} to i64
-// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0
+// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
#pragma omp ordered depend(source)
c[i] = c[i] + 1;
foo();
// CHECK: call void [[FOO]]
-// CHECK: load i32, i32* [[CNT]],
+// CHECK: load i32, i32* [[I]],
// CHECK-NEXT: sub nsw i32 %{{.+}}, 2
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
+// CHECK-NEXT: sdiv i32 %{{.+}}, 1
// CHECK-NEXT: sext i32 %{{.+}} to i64
-// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0
+// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
#pragma omp ordered depend(sink : i - 2)
d[i] = a[i - 2];
diff --git a/test/OpenMP/ordered_doacross_codegen.cpp b/test/OpenMP/ordered_doacross_codegen.cpp
index 60b0353b71..2f19e9c2d5 100644
--- a/test/OpenMP/ordered_doacross_codegen.cpp
+++ b/test/OpenMP/ordered_doacross_codegen.cpp
@@ -19,17 +19,19 @@ void foo();
// CHECK-LABEL: @main()
int main() {
int i;
-// CHECK: [[DIMS:%.+]] = alloca [[KMP_DIM]],
+// CHECK: [[DIMS:%.+]] = alloca [1 x [[KMP_DIM]]],
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK: icmp
// CHECK-NEXT: br i1 %
-// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8*
+// CHECK: [[CAST:%.+]] = bitcast [1 x [[KMP_DIM]]]* [[DIMS]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 24, i1 false)
-// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 1
+// CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], [1 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
+// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIM]], i32 0, i32 1
// CHECK: store i64 %{{.+}}, i64* %
-// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 2
+// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIM]], i32 0, i32 2
// CHECK: store i64 1, i64* %
-// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8*
+// CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], [1 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
+// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8*
// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]])
// CHECK: call void @__kmpc_for_static_init_4(
#pragma omp for ordered(1)
@@ -37,18 +39,26 @@ int main() {
a[i] = b[i] + 1;
foo();
// CHECK: invoke void [[FOO:.+]](
-// CHECK: load i32, i32* [[CNT:%.+]],
+// CHECK: load i32, i32* [[I:%.+]],
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
+// CHECK-NEXT: sdiv i32 %{{.+}}, 1
// CHECK-NEXT: sext i32 %{{.+}} to i64
-// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0
+// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
#pragma omp ordered depend(source)
c[i] = c[i] + 1;
foo();
// CHECK: invoke void [[FOO]]
-// CHECK: load i32, i32* [[CNT]],
+// CHECK: load i32, i32* [[I]],
// CHECK-NEXT: sub nsw i32 %{{.+}}, 2
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
+// CHECK-NEXT: sdiv i32 %{{.+}}, 1
// CHECK-NEXT: sext i32 %{{.+}} to i64
-// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0
+// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
#pragma omp ordered depend(sink : i - 2)
d[i] = a[i - 2];
@@ -75,41 +85,84 @@ struct TestStruct {
T bar(T, T, T);
void baz(T, T);
TestStruct() {
-// CHECK: [[CNT:%.+]] = alloca i64,
-// CHECK: [[DIMS:%.+]] = alloca [[KMP_DIM]],
+// CHECK: [[DIMS:%.+]] = alloca [2 x [[KMP_DIM]]],
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
-// CHECK: icmp
-// CHECK-NEXT: br i1 %
-// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8*
-// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 24, i1 false)
-// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 1
+// CHECK: [[CAST:%.+]] = bitcast [2 x [[KMP_DIM]]]* [[DIMS]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 48, i1 false)
+// CHECK: [[DIM:%.+]] = getelementptr inbounds [2 x [[KMP_DIM]]], [2 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
+// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIM]], i32 0, i32 1
+// CHECK: store i64 10, i64* %
+// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIM]], i32 0, i32 2
+// CHECK: store i64 1, i64* %
+// CHECK: [[DIM:%.+]] = getelementptr inbounds [2 x [[KMP_DIM]]], [2 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 1
+// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIM]], i32 0, i32 1
// CHECK: store i64 %{{.+}}, i64* %
-// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIMS]], i32 0, i32 2
+// CHECK: getelementptr inbounds [[KMP_DIM]], [[KMP_DIM]]* [[DIM]], i32 0, i32 2
// CHECK: store i64 1, i64* %
-// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIMS]] to i8*
-// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]])
-// CHECK: call void @__kmpc_for_static_init_8(
+// CHECK: [[DIM:%.+]] = getelementptr inbounds [2 x [[KMP_DIM]]], [2 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
+// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8*
+// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 2, i8* [[CAST]])
+// CHECK: call void @__kmpc_for_static_init_4(
#pragma omp for ordered(2)
for (T j = 0; j < M; j++)
for (i = 0; i < n; i += 2) {
a[i][j] = foo(i, j);
// CHECK: invoke {{.+TestStruct.+foo}}
-// CHECK: load i64, i64* [[CNT]],
-// CHECK-NEXT: sub nsw i64 %{{.+}}, 1
+// CHECK: load i32*, i32** %
+// CHECK: load i32, i32* %
+// CHECK: load i32, i32* %
+// CHECK: load i32, i32* [[J:%.+]],
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
+// CHECK-NEXT: sdiv i32 %{{.+}}, 1
+// CHECK-NEXT: sext i32 %{{.+}} to i64
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
+// CHECK-NEXT: [[I:%.+]] = load i32*, i32** [[I_REF:%.+]],
+// CHECK-NEXT: load i32, i32* [[I]],
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 2
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
+// CHECK-NEXT: sdiv i32 %{{.+}}, 2
+// CHECK-NEXT: sext i32 %{{.+}} to i64
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 1
+// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
-// CHECK-NEXT: load i64, i64* [[CNT]],
-// CHECK-NEXT: load i32, i32* %
-// CHECK-NEXT: mul nsw i32 1, %
+// CHECK-NEXT: load i32, i32* [[J:%.+]],
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 1
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
+// CHECK-NEXT: sdiv i32 %{{.+}}, 1
// CHECK-NEXT: sext i32 %{{.+}} to i64
-// CHECK-NEXT: sub nsw i64 %
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
+// CHECK-NEXT: [[I:%.+]] = load i32*, i32** [[I_REF]],
+// CHECK-NEXT: load i32, i32* [[I]],
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
+// CHECK-NEXT: sdiv i32 %{{.+}}, 2
+// CHECK-NEXT: sext i32 %{{.+}} to i64
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 1
+// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
#pragma omp ordered depend(sink : j, i - 2) depend(sink : j - 1, i)
b[i][j] = bar(a[i][j], b[i - 1][j], b[i][j - 1]);
// CHECK: invoke {{.+TestStruct.+bar}}
-// CHECK: load i64, i64* [[CNT]],
+// CHECK: load i32*, i32** %
+// CHECK: load i32, i32* %
+// CHECK: load i32, i32* %
+// CHECK: load i32, i32* [[J]],
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
+// CHECK-NEXT: sdiv i32 %{{.+}}, 1
+// CHECK-NEXT: sext i32 %{{.+}} to i64
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
+// CHECK-NEXT: [[I:%.+]] = load i32*, i32** [[I_REF]],
+// CHECK-NEXT: load i32, i32* [[I]],
+// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
+// CHECK-NEXT: sdiv i32 %{{.+}}, 2
+// CHECK-NEXT: sext i32 %{{.+}} to i64
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 1
+// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
+// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
#pragma omp ordered depend(source)
baz(a[i][j], b[i][j]);
diff --git a/test/OpenMP/parallel_for_simd_ast_print.cpp b/test/OpenMP/parallel_for_simd_ast_print.cpp
index fd4153752a..597e66d22e 100644
--- a/test/OpenMP/parallel_for_simd_ast_print.cpp
+++ b/test/OpenMP/parallel_for_simd_ast_print.cpp
@@ -48,7 +48,7 @@ class S8 : public S7<S1> {
public:
S8(int v) : S7<S1>(v){
-#pragma omp parallel for simd private(a) private(this->a) private(S7<S1>::a)
+#pragma omp parallel for simd private(a) private(this->a) private(S7 <S1>::a)
for (int k = 0; k < a.a; ++k)
++this->a.a;
}