summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTobias Grosser <grosser@fim.uni-passau.de>2012-12-20 20:18:57 +0000
committerTobias Grosser <grosser@fim.uni-passau.de>2012-12-20 20:18:57 +0000
commit5e4079507be2de1d920415bfd0f768da8584ec4b (patch)
tree3e70318910b31d87eaa257ec99960db66622678b
parent2c9d10104da9a8ecadf1c0cbb4371a5757d10a79 (diff)
isl: Detect openmp parallelism
Based on code written by Riyadh Baghdadi. Merged from: https://llvm.org/svn/llvm-project/polly/trunk@170102 llvm-svn: 170753
-rw-r--r--polly/lib/CodeGen/IslAst.cpp211
-rw-r--r--polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel.ll46
-rw-r--r--polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll55
-rw-r--r--polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll46
-rw-r--r--polly/test/Isl/Ast/OpenMP/nested_loop_outer_parallel.ll46
-rw-r--r--polly/test/Isl/Ast/OpenMP/single_loop_param_non_parallel.ll34
-rw-r--r--polly/test/Isl/Ast/OpenMP/single_loop_param_parallel.ll35
7 files changed, 467 insertions, 6 deletions
diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp
index 99f69bef4fe8..4f79ea89cbd5 100644
--- a/polly/lib/CodeGen/IslAst.cpp
+++ b/polly/lib/CodeGen/IslAst.cpp
@@ -22,6 +22,7 @@
#include "polly/CodeGen/IslAst.h"
#include "polly/LinkAllPasses.h"
+#include "polly/Dependences.h"
#include "polly/ScopInfo.h"
#define DEBUG_TYPE "polly-ast"
@@ -43,10 +44,14 @@ static cl::opt<bool>
UseContext("polly-ast-use-context", cl::desc("Use context"), cl::Hidden,
cl::init(false), cl::ZeroOrMore);
+static cl::opt<bool>
+DetectParallel("polly-ast-detect-parallel", cl::desc("Detect parallelism"),
+ cl::Hidden, cl::init(false), cl::ZeroOrMore);
+
namespace polly {
class IslAst {
public:
- IslAst(Scop *Scop);
+ IslAst(Scop *Scop, Dependences &D);
~IslAst();
@@ -72,8 +77,183 @@ static void IslAstUserFree(void *User)
free(UserStruct);
}
-static __isl_give isl_ast_node *AtEachDomain(__isl_keep isl_ast_node *Node,
- __isl_keep isl_ast_build *Context, void *User)
+// Information about an ast node.
+struct AstNodeUserInfo {
+ // The node is the outermost parallel loop.
+ int IsOutermostParallel;
+};
+
+// Temporary information used when building the ast.
+struct AstBuildUserInfo {
+ // The dependence information.
+ Dependences *Deps;
+
+ // We are inside a parallel for node.
+ int InParallelFor;
+};
+
+// Print a loop annotated with OpenMP pragmas.
+static __isl_give isl_printer *
+printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
+ __isl_take isl_ast_print_options *PrintOptions,
+ AstNodeUserInfo *Info) {
+ if (Info && Info->IsOutermostParallel) {
+ Printer = isl_printer_start_line(Printer);
+ if (Info->IsOutermostParallel)
+ Printer = isl_printer_print_str(Printer, "#pragma omp parallel for");
+ Printer = isl_printer_end_line(Printer);
+ }
+ return isl_ast_node_for_print(Node, Printer, PrintOptions);
+}
+
+// Print an isl_ast_for.
+static __isl_give isl_printer *
+printFor(__isl_take isl_printer *Printer,
+ __isl_take isl_ast_print_options *PrintOptions,
+ __isl_keep isl_ast_node *Node, void *User) {
+ isl_id *Id = isl_ast_node_get_annotation(Node);
+ if (!Id)
+ return isl_ast_node_for_print(Node, Printer, PrintOptions);
+
+ struct AstNodeUserInfo *Info = (struct AstNodeUserInfo *) isl_id_get_user(Id);
+ Printer = printParallelFor(Node, Printer, PrintOptions, Info);
+ isl_id_free(Id);
+ return Printer;
+}
+
+// Allocate an AstNodeInfo structure and initialize it with default values.
+static struct AstNodeUserInfo *allocateAstNodeUserInfo() {
+ struct AstNodeUserInfo *NodeInfo;
+ NodeInfo = (struct AstNodeUserInfo *) malloc(sizeof(struct AstNodeUserInfo));
+ NodeInfo->IsOutermostParallel = 0;
+ return NodeInfo;
+}
+
+// Free the AstNodeInfo structure.
+static void freeAstNodeUserInfo(void *Ptr) {
+ struct AstNodeUserInfo *Info;
+ Info = (struct AstNodeUserInfo *) Ptr;
+ free(Info);
+}
+
+// Check if the current scheduling dimension is parallel.
+//
+// We check for parallelism by verifying that the loop does not carry any
+// dependences.
+//
+// Parallelism test: if the distance is zero in all outer dimensions, then it
+// has to be zero in the current dimension as well.
+//
+// Implementation: first, translate dependences into time space, then force
+// outer dimensions to be equal. If the distance is zero in the current
+// dimension, then the loop is parallel. The distance is zero in the current
+// dimension if it is a subset of a map with equal values for the current
+// dimension.
+static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
+ Dependences *D) {
+ isl_union_map *Schedule, *Deps;
+ isl_map *ScheduleDeps, *Test;
+ isl_space *ScheduleSpace;
+ unsigned Dimension, IsParallel;
+
+ Schedule = isl_ast_build_get_schedule(Build);
+ ScheduleSpace = isl_ast_build_get_schedule_space(Build);
+
+ Dimension = isl_space_dim(ScheduleSpace, isl_dim_out) - 1;
+
+ Deps = D->getDependences(Dependences::TYPE_ALL);
+ Deps = isl_union_map_apply_range(Deps, isl_union_map_copy(Schedule));
+ Deps = isl_union_map_apply_domain(Deps, Schedule);
+
+ if (isl_union_map_is_empty(Deps)) {
+ isl_union_map_free(Deps);
+ isl_space_free(ScheduleSpace);
+ return 1;
+ }
+
+ ScheduleDeps = isl_map_from_union_map(Deps);
+
+ for (unsigned i = 0; i < Dimension; i++)
+ ScheduleDeps = isl_map_equate(ScheduleDeps, isl_dim_out, i, isl_dim_in, i);
+
+ Test = isl_map_universe(isl_map_get_space(ScheduleDeps));
+ Test = isl_map_equate(Test, isl_dim_out, Dimension, isl_dim_in, Dimension);
+ IsParallel = isl_map_is_subset(ScheduleDeps, Test);
+
+ isl_space_free(ScheduleSpace);
+ isl_map_free(Test);
+ isl_map_free(ScheduleDeps);
+
+ return IsParallel;
+}
+
+// Mark a for node openmp parallel, if it is the outermost parallel for node.
+static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
+ struct AstBuildUserInfo *BuildInfo,
+ struct AstNodeUserInfo *NodeInfo) {
+ if (BuildInfo->InParallelFor)
+ return;
+
+ if (astScheduleDimIsParallel(Build, BuildInfo->Deps)) {
+ BuildInfo->InParallelFor = 1;
+ NodeInfo->IsOutermostParallel = 1;
+ }
+}
+
+// This method is executed before the construction of a for node. It creates
+// an isl_id that is used to annotate the subsequently generated ast for nodes.
+//
+// In this function we also run the following analyses:
+//
+// - Detection of openmp parallel loops
+//
+static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build,
+ void *User) {
+ isl_id *Id;
+ struct AstBuildUserInfo *BuildInfo;
+ struct AstNodeUserInfo *NodeInfo;
+
+ BuildInfo = (struct AstBuildUserInfo *) User;
+ NodeInfo = allocateAstNodeUserInfo();
+ Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo);
+ Id = isl_id_set_free_user(Id, freeAstNodeUserInfo);
+
+ markOpenmpParallel(Build, BuildInfo, NodeInfo);
+
+ return Id;
+}
+
+// This method is executed after the construction of a for node.
+//
+// It performs the following actions:
+//
+// - Reset the 'InParallelFor' flag, as soon as we leave a for node,
+// that is marked as openmp parallel.
+//
+static __isl_give isl_ast_node *
+astBuildAfterFor(__isl_take isl_ast_node *Node,
+ __isl_keep isl_ast_build *Build, void *User) {
+ isl_id *Id;
+ struct AstBuildUserInfo *BuildInfo;
+ struct AstNodeUserInfo *Info;
+
+ Id = isl_ast_node_get_annotation(Node);
+ if (!Id)
+ return Node;
+ Info = (struct AstNodeUserInfo *) isl_id_get_user(Id);
+ if (Info && Info->IsOutermostParallel) {
+ BuildInfo = (struct AstBuildUserInfo *) User;
+ BuildInfo->InParallelFor = 0;
+ }
+
+ isl_id_free(Id);
+
+ return Node;
+}
+
+static __isl_give isl_ast_node *
+AtEachDomain(__isl_keep isl_ast_node *Node,
+ __isl_keep isl_ast_build *Context, void *User)
{
isl_map *Map;
struct IslAstUser *UserStruct;
@@ -90,10 +270,11 @@ static __isl_give isl_ast_node *AtEachDomain(__isl_keep isl_ast_node *Node,
return isl_ast_node_set_annotation(Node, Annotation);
}
-IslAst::IslAst(Scop *Scop) : S(Scop) {
+IslAst::IslAst(Scop *Scop, Dependences &D) : S(Scop) {
isl_ctx *Ctx = S->getIslCtx();
isl_options_set_ast_build_atomic_upper_bound(Ctx, true);
isl_ast_build *Context;
+ struct AstBuildUserInfo BuildInfo;
if (UseContext)
Context = isl_ast_build_from_context(S->getContext());
@@ -112,6 +293,16 @@ IslAst::IslAst(Scop *Scop) : S(Scop) {
isl_union_map_dump(Schedule);
);
+ if (DetectParallel) {
+ BuildInfo.Deps = &D;
+ BuildInfo.InParallelFor = 0;
+
+ Context = isl_ast_build_set_before_each_for(Context, &astBuildBeforeFor,
+ &BuildInfo);
+ Context = isl_ast_build_set_after_each_for(Context, &astBuildAfterFor,
+ &BuildInfo);
+ }
+
Root = isl_ast_build_ast_from_schedule(Context, Schedule);
isl_ast_build_free(Context);
@@ -141,7 +332,11 @@ IslAst::~IslAst() {
/// Print a C like representation of the program.
void IslAst::pprint(llvm::raw_ostream &OS) {
isl_ast_node *Root;
- isl_ast_print_options *Options = isl_ast_print_options_alloc(S->getIslCtx());
+ isl_ast_print_options *Options;
+
+ Options = isl_ast_print_options_alloc(S->getIslCtx());
+ Options = isl_ast_print_options_set_print_for(Options, &printFor, NULL);
+
isl_printer *P = isl_printer_to_str(S->getIslCtx());
P = isl_printer_set_output_format(P, ISL_FORMAT_C);
Root = getAst();
@@ -174,7 +369,9 @@ bool IslAstInfo::runOnScop(Scop &Scop) {
S = &Scop;
- Ast = new IslAst(&Scop);
+ Dependences &D = getAnalysis<Dependences>();
+
+ Ast = new IslAst(&Scop, D);
return false;
}
@@ -195,12 +392,14 @@ void IslAstInfo::getAnalysisUsage(AnalysisUsage &AU) const {
// Get the Common analysis usage of ScopPasses.
ScopPass::getAnalysisUsage(AU);
AU.addRequired<ScopInfo>();
+ AU.addRequired<Dependences>();
}
char IslAstInfo::ID = 0;
INITIALIZE_PASS_BEGIN(IslAstInfo, "polly-ast",
"Generate an AST of the SCoP (isl)", false, false)
INITIALIZE_PASS_DEPENDENCY(ScopInfo)
+INITIALIZE_PASS_DEPENDENCY(Dependences)
INITIALIZE_PASS_END(IslAstInfo, "polly-ast",
"Generate an AST from the SCoP (isl)", false, false)
diff --git a/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel.ll b/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel.ll
new file mode 100644
index 000000000000..bba34cc8b32f
--- /dev/null
+++ b/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel.ll
@@ -0,0 +1,46 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; for (i = 0; i < 1024; i++)
+; for (j = 0; j < 1024; j++)
+; A[i][j] = 1;
+
+@A = common global [1024 x [1024 x i32]] zeroinitializer
+define void @bar() {
+start:
+ fence seq_cst
+ br label %loop.i
+
+loop.i:
+ %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ]
+ %exitcond.i = icmp ne i64 %i, 1024
+ br i1 %exitcond.i, label %loop.j, label %ret
+
+loop.j:
+ %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ]
+ %exitcond.j = icmp ne i64 %j, 1024
+ br i1 %exitcond.j, label %loop.body, label %loop.i.backedge
+
+loop.body:
+ %scevgep = getelementptr [1024 x [1024 x i32] ]* @A, i64 0, i64 %j, i64 %i
+ store i32 1, i32* %scevgep
+ br label %loop.j.backedge
+
+loop.j.backedge:
+ %j.next = add nsw i64 %j, 1
+ br label %loop.j
+
+loop.i.backedge:
+ %i.next = add nsw i64 %i, 1
+ br label %loop.i
+
+ret:
+ fence seq_cst
+ ret void
+}
+
+; CHECK: #pragma omp parallel for
+; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1)
+; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1)
+; CHECK: Stmt_loop_body(c1, c3);
diff --git a/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll b/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll
new file mode 100644
index 000000000000..b87610a8b261
--- /dev/null
+++ b/polly/test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll
@@ -0,0 +1,55 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; for (i = 0; i < n; i++)
+; for (j = 0; j < n; j++)
+; A[i][j] = 1;
+
+@A = common global [1024 x [1024 x i32]] zeroinitializer
+define void @bar(i64 %n) {
+start:
+ fence seq_cst
+ br label %loop.i
+
+loop.i:
+ %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ]
+ %exitcond.i = icmp ne i64 %i, %n
+ br i1 %exitcond.i, label %loop.j, label %ret
+
+loop.j:
+ %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ]
+ %exitcond.j = icmp ne i64 %j, %n
+ br i1 %exitcond.j, label %loop.body, label %loop.i.backedge
+
+loop.body:
+ %scevgep = getelementptr [1024 x [1024 x i32] ]* @A, i64 0, i64 %j, i64 %i
+ store i32 1, i32* %scevgep
+ br label %loop.j.backedge
+
+loop.j.backedge:
+ %j.next = add nsw i64 %j, 1
+ br label %loop.j
+
+loop.i.backedge:
+ %i.next = add nsw i64 %i, 1
+ br label %loop.i
+
+ret:
+ fence seq_cst
+ ret void
+}
+
+; At the first look both loops seem parallel, however due to the delinearization
+; we get the following dependences:
+; [n] -> { loop_body[i0, i1] -> loop_body[1024 + i0, -1 + i1]:
+; 0 <= i0 < n - 1024 and 1 <= i1 < n}
+; They cause the outer loop to be non-parallel. We can only prove their
+; absence, if we know that n < 1024. This information is currently not available
+; to polly. However, we should be able to obtain it due to the out of bounds
+; memory accesses, that would happen if n >= 1024.
+;
+; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
+; CHECK: #pragma omp parallel for
+; CHECK: for (int c3 = 0; c3 < n; c3 += 1)
+; CHECK: Stmt_loop_body(c1, c3);
diff --git a/polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll b/polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll
new file mode 100644
index 000000000000..c530aaf6533c
--- /dev/null
+++ b/polly/test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll
@@ -0,0 +1,46 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; for (i = 0; i < n; i++)
+; for (j = 0; j < n; j++)
+; A[j] = 1;
+
+@A = common global [1024 x i32] zeroinitializer
+define void @bar(i64 %n) {
+start:
+ fence seq_cst
+ br label %loop.i
+
+loop.i:
+ %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ]
+ %exitcond.i = icmp ne i64 %i, %n
+ br i1 %exitcond.i, label %loop.j, label %ret
+
+loop.j:
+ %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ]
+ %exitcond.j = icmp ne i64 %j, %n
+ br i1 %exitcond.j, label %loop.body, label %loop.i.backedge
+
+loop.body:
+ %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %j
+ store i32 1, i32* %scevgep
+ br label %loop.j.backedge
+
+loop.j.backedge:
+ %j.next = add nsw i64 %j, 1
+ br label %loop.j
+
+loop.i.backedge:
+ %i.next = add nsw i64 %i, 1
+ br label %loop.i
+
+ret:
+ fence seq_cst
+ ret void
+}
+
+; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
+; CHECK: #pragma omp parallel for
+; CHECK: for (int c3 = 0; c3 < n; c3 += 1)
+; CHECK: Stmt_loop_body(c1, c3);
diff --git a/polly/test/Isl/Ast/OpenMP/nested_loop_outer_parallel.ll b/polly/test/Isl/Ast/OpenMP/nested_loop_outer_parallel.ll
new file mode 100644
index 000000000000..8ca4b1ad970c
--- /dev/null
+++ b/polly/test/Isl/Ast/OpenMP/nested_loop_outer_parallel.ll
@@ -0,0 +1,46 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; for (i = 0; i < n; i++)
+; for (j = 0; j < n; j++)
+; A[i] = 1;
+
+@A = common global [1024 x i32] zeroinitializer
+define void @bar(i64 %n) {
+start:
+ fence seq_cst
+ br label %loop.i
+
+loop.i:
+ %i = phi i64 [ 0, %start ], [ %i.next, %loop.i.backedge ]
+ %exitcond.i = icmp ne i64 %i, %n
+ br i1 %exitcond.i, label %loop.j, label %ret
+
+loop.j:
+ %j = phi i64 [ 0, %loop.i], [ %j.next, %loop.j.backedge ]
+ %exitcond.j = icmp ne i64 %j, %n
+ br i1 %exitcond.j, label %loop.body, label %loop.i.backedge
+
+loop.body:
+ %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %i
+ store i32 1, i32* %scevgep
+ br label %loop.j.backedge
+
+loop.j.backedge:
+ %j.next = add nsw i64 %j, 1
+ br label %loop.j
+
+loop.i.backedge:
+ %i.next = add nsw i64 %i, 1
+ br label %loop.i
+
+ret:
+ fence seq_cst
+ ret void
+}
+
+; CHECK: #pragma omp parallel for
+; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
+; CHECK: for (int c3 = 0; c3 < n; c3 += 1)
+; CHECK: Stmt_loop_body(c1, c3);
diff --git a/polly/test/Isl/Ast/OpenMP/single_loop_param_non_parallel.ll b/polly/test/Isl/Ast/OpenMP/single_loop_param_non_parallel.ll
new file mode 100644
index 000000000000..9a3e18c523d3
--- /dev/null
+++ b/polly/test/Isl/Ast/OpenMP/single_loop_param_non_parallel.ll
@@ -0,0 +1,34 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; for (i = 0; i < n; i++)
+; A[0] = i;
+
+@A = common global [1024 x i32] zeroinitializer
+define void @bar(i64 %n) {
+start:
+ fence seq_cst
+ br label %loop.header
+
+loop.header:
+ %i = phi i64 [ 0, %start ], [ %i.next, %loop.backedge ]
+ %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 0
+ %exitcond = icmp ne i64 %i, %n
+ br i1 %exitcond, label %loop.body, label %ret
+
+loop.body:
+ store i32 1, i32* %scevgep
+ br label %loop.backedge
+
+loop.backedge:
+ %i.next = add nsw i64 %i, 1
+ br label %loop.header
+
+ret:
+ fence seq_cst
+ ret void
+}
+
+; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
+; CHECK: Stmt_loop_body(c1)
diff --git a/polly/test/Isl/Ast/OpenMP/single_loop_param_parallel.ll b/polly/test/Isl/Ast/OpenMP/single_loop_param_parallel.ll
new file mode 100644
index 000000000000..499cfa7d27a4
--- /dev/null
+++ b/polly/test/Isl/Ast/OpenMP/single_loop_param_parallel.ll
@@ -0,0 +1,35 @@
+; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; for (i = 0; i < n; i++)
+; A[i] = 1;
+
+@A = common global [1024 x i32] zeroinitializer
+define void @bar(i64 %n) {
+start:
+ fence seq_cst
+ br label %loop.header
+
+loop.header:
+ %i = phi i64 [ 0, %start ], [ %i.next, %loop.backedge ]
+ %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %i
+ %exitcond = icmp ne i64 %i, %n
+ br i1 %exitcond, label %loop.body, label %ret
+
+loop.body:
+ store i32 1, i32* %scevgep
+ br label %loop.backedge
+
+loop.backedge:
+ %i.next = add nsw i64 %i, 1
+ br label %loop.header
+
+ret:
+ fence seq_cst
+ ret void
+}
+
+; CHECK: #pragma omp parallel for
+; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
+; CHECK: Stmt_loop_body(c1)