summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZhang Xiang <xiang.zhang@iluvatar.com>2023-12-05 16:47:03 +0800
committerZhang Xiang <xiang.zhang@iluvatar.com>2023-12-05 17:03:38 +0800
commitcbcc7f3d80519eef04426447356cab3c7aad8cbb (patch)
tree142349c86aa518fa39b01db95427455a14c7edac
parentaf7995f600d3b91a9d7269f2789913e06a61ef38 (diff)
[SimplifyCFG] Not folding branch in constant loops which expected unrollupstream/users/xiangzhangllvm/refine-simplify-CFG-for-loop-unroll
Constant iteration loop with unroll hint usually expected do unroll by consumers, folding branches in such loop header at SimplifyCFG will break unroll optimization.
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp57
-rw-r--r--llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll851
2 files changed, 860 insertions, 48 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c09cf9c2325c..ca5a55d9cd17 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -73,6 +73,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -3634,6 +3635,59 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
}
}
+bool hasUnrollHint(Instruction *TI) {
+ MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
+ if (!MD)
+ return false;
+
+ return GetUnrollMetadata(MD, "llvm.loop.unroll.enable") ||
+ GetUnrollMetadata(MD, "llvm.loop.unroll.full") ||
+ GetUnrollMetadata(MD, "llvm.loop.unroll.count");
+}
+
+// Escape folding "I < ConstNum" with "Cond2" when loops with constant
+// iterations and expected unroll.
+// #pragma unroll
+// for (int I = 0; I < ConstNum; ++I) { // ConstNum > 1
+// if (Cond2) {
+// break;
+// }
+// xxx loop body;
+// }
+// Folding these conditional branches may break/affect loop unroll.
+static bool isConstantLoopWithUnrollHint(BranchInst *PBI) {
+ ICmpInst *ICmp = dyn_cast<ICmpInst>(PBI->getCondition());
+ if (!ICmp)
+ return false;
+
+ // Make sure ConstNum > 1
+ bool DoFold = true;
+ for (unsigned I = 0; I < ICmp->getNumOperands(); ++I) {
+ ConstantInt *Op = dyn_cast<ConstantInt>(ICmp->getOperand(I));
+ if (!Op)
+ continue;
+ if (Op->getSExtValue() > 1) {
+ DoFold = false;
+ break;
+ }
+ }
+ if (DoFold)
+ return false;
+
+ // Loop information has not been established yet, so here we easily judge
+ // whether it is a loop by backedge.
+ BasicBlock *PBB = PBI->getParent();
+ for (Function::iterator I = PBB->getIterator(), E = PBB->getParent()->end();
+ I != E; ++I) {
+ BasicBlock *BB = &*I;
+ if (is_contained(predecessors(PBB), BB)) {
+ if (hasUnrollHint(BB->getTerminator()))
+ return true;
+ }
+ }
+ return false;
+}
+
/// Determine if the two branches share a common destination and deduce a glue
/// that joins the branches' conditions to arrive at the common destination if
/// that would be profitable.
@@ -3645,6 +3699,9 @@ shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
"PredBB must be a predecessor of BB.");
+ if (isConstantLoopWithUnrollHint(PBI))
+ return std::nullopt;
+
// We have the potential to fold the conditions together, but if the
// predecessor branch is predictable, we may not want to merge them.
uint64_t PTWeight, PFWeight;
diff --git a/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll b/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
index f442da778213..a3e316ebec0c 100644
--- a/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
+++ b/llvm/test/Transforms/SimplifyCFG/simplify-cfg-unroll.ll
@@ -10,10 +10,11 @@ define void @func(i32 noundef %Idx, ptr noundef %Arr, i32 noundef %Dims, ptr nou
; CHECK-CFG: for.cond:
; CHECK-CFG-NEXT: [[DIM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC16:%.*]], [[FOR_COND_CLEANUP6:%.*]] ]
; CHECK-CFG-NEXT: [[IDX_ADDR_0:%.*]] = phi i32 [ [[IDX]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_COND_CLEANUP6]] ]
-; CHECK-CFG-NEXT: [[CMP:%.*]] = icmp sge i32 [[DIM_0]], 16
+; CHECK-CFG-NEXT: [[CMP:%.*]] = icmp slt i32 [[DIM_0]], 16
+; CHECK-CFG-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[CLEANUP:%.*]]
+; CHECK-CFG: for.body:
; CHECK-CFG-NEXT: [[CMP1:%.*]] = icmp eq i32 [[DIM_0]], [[DIMS]]
-; CHECK-CFG-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
-; CHECK-CFG-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-CFG-NEXT: br i1 [[CMP1]], label [[CLEANUP]], label [[IF_END:%.*]]
; CHECK-CFG: if.end:
; CHECK-CFG-NEXT: [[IDXPROM:%.*]] = sext i32 [[DIM_0]] to i64
; CHECK-CFG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[IDXPROM]]
@@ -50,72 +51,830 @@ define void @func(i32 noundef %Idx, ptr noundef %Arr, i32 noundef %Dims, ptr nou
; CHECK-UNROLL-NEXT: entry:
; CHECK-UNROLL-NEXT: br label [[FOR_COND:%.*]]
; CHECK-UNROLL: for.cond:
-; CHECK-UNROLL-NEXT: [[DIM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC16:%.*]], [[FOR_COND_CLEANUP6:%.*]] ]
-; CHECK-UNROLL-NEXT: [[IDX_ADDR_0:%.*]] = phi i32 [ [[IDX]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_COND_CLEANUP6]] ]
-; CHECK-UNROLL-NEXT: [[CMP:%.*]] = icmp sge i32 [[DIM_0]], 16
-; CHECK-UNROLL-NEXT: [[CMP1:%.*]] = icmp eq i32 [[DIM_0]], [[DIMS]]
-; CHECK-UNROLL-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]]
-; CHECK-UNROLL-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-UNROLL: for.body:
+; CHECK-UNROLL-NEXT: [[CMP1:%.*]] = icmp eq i32 0, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
; CHECK-UNROLL: if.end:
-; CHECK-UNROLL-NEXT: [[IDXPROM:%.*]] = sext i32 [[DIM_0]] to i64
-; CHECK-UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[IDXPROM]]
-; CHECK-UNROLL-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT: [[IDXPROM2:%.*]] = sext i32 [[IDX_ADDR_0]] to i64
-; CHECK-UNROLL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM2]]
-; CHECK-UNROLL-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
-; CHECK-UNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP1]], 1
; CHECK-UNROLL-NEXT: br label [[FOR_COND4:%.*]]
; CHECK-UNROLL: for.cond4:
; CHECK-UNROLL-NEXT: br label [[FOR_BODY7:%.*]]
; CHECK-UNROLL: for.cond.cleanup6:
-; CHECK-UNROLL-NEXT: [[INC16]] = add nsw i32 [[DIM_0]], 1
-; CHECK-UNROLL-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_1:%.*]]
+; CHECK-UNROLL: for.body.1:
+; CHECK-UNROLL-NEXT: [[CMP1_1:%.*]] = icmp eq i32 1, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_1]], label [[CLEANUP]], label [[IF_END_1:%.*]]
+; CHECK-UNROLL: if.end.1:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 1
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_1:%.*]]
+; CHECK-UNROLL: for.cond4.1:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_12:%.*]]
+; CHECK-UNROLL: for.body7.12:
+; CHECK-UNROLL-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-UNROLL-NEXT: [[TMP2:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_11:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_11]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_1:%.*]]
+; CHECK-UNROLL: for.body7.1.1:
+; CHECK-UNROLL-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX11_1_1]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13_1_1]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_1]], ptr [[ARRAYIDX13_1_1]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_1:%.*]]
+; CHECK-UNROLL: for.body7.2.1:
+; CHECK-UNROLL-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_1:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX11_2_1]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX13_2_1]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_1:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_1]], ptr [[ARRAYIDX13_2_1]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_1:%.*]]
+; CHECK-UNROLL: for.body7.3.1:
+; CHECK-UNROLL-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_1:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX11_3_1]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX13_3_1]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_1:%.*]] = add nsw i32 [[TMP11]], [[TMP10]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_1]], ptr [[ARRAYIDX13_3_1]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4:%.*]], label [[FOR_COND_CLEANUP6_1:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.1:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_2:%.*]]
+; CHECK-UNROLL: for.body.2:
+; CHECK-UNROLL-NEXT: [[CMP1_2:%.*]] = icmp eq i32 2, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_2]], label [[CLEANUP]], label [[IF_END_2:%.*]]
+; CHECK-UNROLL: if.end.2:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 2
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_2:%.*]]
+; CHECK-UNROLL: for.cond4.2:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_24:%.*]]
+; CHECK-UNROLL: for.body7.24:
+; CHECK-UNROLL-NEXT: [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-UNROLL-NEXT: [[TMP14:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_23:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_23]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_2:%.*]]
+; CHECK-UNROLL: for.body7.1.2:
+; CHECK-UNROLL-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_2:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11_1_2]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX13_1_2]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_2:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_2]], ptr [[ARRAYIDX13_1_2]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_2:%.*]]
+; CHECK-UNROLL: for.body7.2.2:
+; CHECK-UNROLL-NEXT: [[TMP18:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_2:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX11_2_2]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX13_2_2]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_2:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_2]], ptr [[ARRAYIDX13_2_2]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_2:%.*]]
+; CHECK-UNROLL: for.body7.3.2:
+; CHECK-UNROLL-NEXT: [[TMP21:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_2:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11_3_2]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX13_3_2]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_2:%.*]] = add nsw i32 [[TMP23]], [[TMP22]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_2]], ptr [[ARRAYIDX13_3_2]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_2:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.2:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_3:%.*]]
+; CHECK-UNROLL: for.body.3:
+; CHECK-UNROLL-NEXT: [[CMP1_3:%.*]] = icmp eq i32 3, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_3]], label [[CLEANUP]], label [[IF_END_3:%.*]]
+; CHECK-UNROLL: if.end.3:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 3
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_3:%.*]]
+; CHECK-UNROLL: for.cond4.3:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_36:%.*]]
+; CHECK-UNROLL: for.body7.36:
+; CHECK-UNROLL-NEXT: [[TMP24:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
+; CHECK-UNROLL-NEXT: [[TMP26:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_35:%.*]] = add nsw i32 [[TMP26]], [[TMP25]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_35]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_3:%.*]]
+; CHECK-UNROLL: for.body7.1.3:
+; CHECK-UNROLL-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_3:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX11_1_3]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX13_1_3]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_3:%.*]] = add nsw i32 [[TMP29]], [[TMP28]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_3]], ptr [[ARRAYIDX13_1_3]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_3:%.*]]
+; CHECK-UNROLL: for.body7.2.3:
+; CHECK-UNROLL-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_3:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX11_2_3]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX13_2_3]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_3:%.*]] = add nsw i32 [[TMP32]], [[TMP31]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_3]], ptr [[ARRAYIDX13_2_3]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_3:%.*]]
+; CHECK-UNROLL: for.body7.3.3:
+; CHECK-UNROLL-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_3:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX11_3_3]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX13_3_3]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_3:%.*]] = add nsw i32 [[TMP35]], [[TMP34]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_3]], ptr [[ARRAYIDX13_3_3]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_3:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.3:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_4:%.*]]
+; CHECK-UNROLL: for.body.4:
+; CHECK-UNROLL-NEXT: [[CMP1_4:%.*]] = icmp eq i32 4, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_4]], label [[CLEANUP]], label [[IF_END_4:%.*]]
+; CHECK-UNROLL: if.end.4:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 4
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_4:%.*]]
+; CHECK-UNROLL: for.cond4.4:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_48:%.*]]
+; CHECK-UNROLL: for.body7.48:
+; CHECK-UNROLL-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4
+; CHECK-UNROLL-NEXT: [[TMP38:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_47:%.*]] = add nsw i32 [[TMP38]], [[TMP37]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_47]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_4:%.*]]
+; CHECK-UNROLL: for.body7.1.4:
+; CHECK-UNROLL-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_4:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP40:%.*]] = load i32, ptr [[ARRAYIDX11_1_4]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP41:%.*]] = load i32, ptr [[ARRAYIDX13_1_4]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_4:%.*]] = add nsw i32 [[TMP41]], [[TMP40]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_4]], ptr [[ARRAYIDX13_1_4]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_4:%.*]]
+; CHECK-UNROLL: for.body7.2.4:
+; CHECK-UNROLL-NEXT: [[TMP42:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_4:%.*]] = getelementptr inbounds i32, ptr [[TMP42]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP43:%.*]] = load i32, ptr [[ARRAYIDX11_2_4]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP44:%.*]] = load i32, ptr [[ARRAYIDX13_2_4]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_4:%.*]] = add nsw i32 [[TMP44]], [[TMP43]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_4]], ptr [[ARRAYIDX13_2_4]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_4:%.*]]
+; CHECK-UNROLL: for.body7.3.4:
+; CHECK-UNROLL-NEXT: [[TMP45:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_4:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARRAYIDX11_3_4]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARRAYIDX13_3_4]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_4:%.*]] = add nsw i32 [[TMP47]], [[TMP46]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_4]], ptr [[ARRAYIDX13_3_4]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_4:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.4:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_5:%.*]]
+; CHECK-UNROLL: for.body.5:
+; CHECK-UNROLL-NEXT: [[CMP1_5:%.*]] = icmp eq i32 5, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_5]], label [[CLEANUP]], label [[IF_END_5:%.*]]
+; CHECK-UNROLL: if.end.5:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 5
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_5:%.*]]
+; CHECK-UNROLL: for.cond4.5:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_5:%.*]]
+; CHECK-UNROLL: for.body7.5:
+; CHECK-UNROLL-NEXT: [[TMP48:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4
+; CHECK-UNROLL-NEXT: [[TMP50:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_5:%.*]] = add nsw i32 [[TMP50]], [[TMP49]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_5]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_5:%.*]]
+; CHECK-UNROLL: for.body7.1.5:
+; CHECK-UNROLL-NEXT: [[TMP51:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_5:%.*]] = getelementptr inbounds i32, ptr [[TMP51]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX11_1_5]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP53:%.*]] = load i32, ptr [[ARRAYIDX13_1_5]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_5:%.*]] = add nsw i32 [[TMP53]], [[TMP52]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_5]], ptr [[ARRAYIDX13_1_5]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_5:%.*]]
+; CHECK-UNROLL: for.body7.2.5:
+; CHECK-UNROLL-NEXT: [[TMP54:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_5:%.*]] = getelementptr inbounds i32, ptr [[TMP54]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP55:%.*]] = load i32, ptr [[ARRAYIDX11_2_5]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP56:%.*]] = load i32, ptr [[ARRAYIDX13_2_5]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_5:%.*]] = add nsw i32 [[TMP56]], [[TMP55]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_5]], ptr [[ARRAYIDX13_2_5]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_5:%.*]]
+; CHECK-UNROLL: for.body7.3.5:
+; CHECK-UNROLL-NEXT: [[TMP57:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_5:%.*]] = getelementptr inbounds i32, ptr [[TMP57]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARRAYIDX11_3_5]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_5:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP59:%.*]] = load i32, ptr [[ARRAYIDX13_3_5]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_5:%.*]] = add nsw i32 [[TMP59]], [[TMP58]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_5]], ptr [[ARRAYIDX13_3_5]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_5:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.5:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_6:%.*]]
+; CHECK-UNROLL: for.body.6:
+; CHECK-UNROLL-NEXT: [[CMP1_6:%.*]] = icmp eq i32 6, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_6]], label [[CLEANUP]], label [[IF_END_6:%.*]]
+; CHECK-UNROLL: if.end.6:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 6
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_6:%.*]]
+; CHECK-UNROLL: for.cond4.6:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_6:%.*]]
+; CHECK-UNROLL: for.body7.6:
+; CHECK-UNROLL-NEXT: [[TMP60:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4
+; CHECK-UNROLL-NEXT: [[TMP62:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_6:%.*]] = add nsw i32 [[TMP62]], [[TMP61]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_6]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_6:%.*]]
+; CHECK-UNROLL: for.body7.1.6:
+; CHECK-UNROLL-NEXT: [[TMP63:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_6:%.*]] = getelementptr inbounds i32, ptr [[TMP63]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP64:%.*]] = load i32, ptr [[ARRAYIDX11_1_6]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP65:%.*]] = load i32, ptr [[ARRAYIDX13_1_6]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_6:%.*]] = add nsw i32 [[TMP65]], [[TMP64]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_6]], ptr [[ARRAYIDX13_1_6]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_6:%.*]]
+; CHECK-UNROLL: for.body7.2.6:
+; CHECK-UNROLL-NEXT: [[TMP66:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_6:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP67:%.*]] = load i32, ptr [[ARRAYIDX11_2_6]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP68:%.*]] = load i32, ptr [[ARRAYIDX13_2_6]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_6:%.*]] = add nsw i32 [[TMP68]], [[TMP67]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_6]], ptr [[ARRAYIDX13_2_6]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_6:%.*]]
+; CHECK-UNROLL: for.body7.3.6:
+; CHECK-UNROLL-NEXT: [[TMP69:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_6:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP70:%.*]] = load i32, ptr [[ARRAYIDX11_3_6]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_6:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP71:%.*]] = load i32, ptr [[ARRAYIDX13_3_6]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_6:%.*]] = add nsw i32 [[TMP71]], [[TMP70]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_6]], ptr [[ARRAYIDX13_3_6]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_6:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.6:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_7:%.*]]
+; CHECK-UNROLL: for.body.7:
+; CHECK-UNROLL-NEXT: [[CMP1_7:%.*]] = icmp eq i32 7, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_7]], label [[CLEANUP]], label [[IF_END_7:%.*]]
+; CHECK-UNROLL: if.end.7:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 7
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_7:%.*]]
+; CHECK-UNROLL: for.cond4.7:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_7:%.*]]
+; CHECK-UNROLL: for.body7.7:
+; CHECK-UNROLL-NEXT: [[TMP72:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4
+; CHECK-UNROLL-NEXT: [[TMP74:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_7:%.*]] = add nsw i32 [[TMP74]], [[TMP73]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_7]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_7:%.*]]
+; CHECK-UNROLL: for.body7.1.7:
+; CHECK-UNROLL-NEXT: [[TMP75:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_7:%.*]] = getelementptr inbounds i32, ptr [[TMP75]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP76:%.*]] = load i32, ptr [[ARRAYIDX11_1_7]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP77:%.*]] = load i32, ptr [[ARRAYIDX13_1_7]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_7:%.*]] = add nsw i32 [[TMP77]], [[TMP76]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_7]], ptr [[ARRAYIDX13_1_7]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_7:%.*]]
+; CHECK-UNROLL: for.body7.2.7:
+; CHECK-UNROLL-NEXT: [[TMP78:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_7:%.*]] = getelementptr inbounds i32, ptr [[TMP78]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP79:%.*]] = load i32, ptr [[ARRAYIDX11_2_7]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP80:%.*]] = load i32, ptr [[ARRAYIDX13_2_7]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_7:%.*]] = add nsw i32 [[TMP80]], [[TMP79]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_7]], ptr [[ARRAYIDX13_2_7]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_7:%.*]]
+; CHECK-UNROLL: for.body7.3.7:
+; CHECK-UNROLL-NEXT: [[TMP81:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_7:%.*]] = getelementptr inbounds i32, ptr [[TMP81]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP82:%.*]] = load i32, ptr [[ARRAYIDX11_3_7]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_7:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP83:%.*]] = load i32, ptr [[ARRAYIDX13_3_7]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_7:%.*]] = add nsw i32 [[TMP83]], [[TMP82]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_7]], ptr [[ARRAYIDX13_3_7]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_7:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.7:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_8:%.*]]
+; CHECK-UNROLL: for.body.8:
+; CHECK-UNROLL-NEXT: [[CMP1_8:%.*]] = icmp eq i32 8, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_8]], label [[CLEANUP]], label [[IF_END_8:%.*]]
+; CHECK-UNROLL: if.end.8:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 8
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_8:%.*]]
+; CHECK-UNROLL: for.cond4.8:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_8:%.*]]
+; CHECK-UNROLL: for.body7.8:
+; CHECK-UNROLL-NEXT: [[TMP84:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4
+; CHECK-UNROLL-NEXT: [[TMP86:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_8:%.*]] = add nsw i32 [[TMP86]], [[TMP85]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_8]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_8:%.*]]
+; CHECK-UNROLL: for.body7.1.8:
+; CHECK-UNROLL-NEXT: [[TMP87:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_8:%.*]] = getelementptr inbounds i32, ptr [[TMP87]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP88:%.*]] = load i32, ptr [[ARRAYIDX11_1_8]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP89:%.*]] = load i32, ptr [[ARRAYIDX13_1_8]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_8:%.*]] = add nsw i32 [[TMP89]], [[TMP88]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_8]], ptr [[ARRAYIDX13_1_8]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_8:%.*]]
+; CHECK-UNROLL: for.body7.2.8:
+; CHECK-UNROLL-NEXT: [[TMP90:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_8:%.*]] = getelementptr inbounds i32, ptr [[TMP90]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP91:%.*]] = load i32, ptr [[ARRAYIDX11_2_8]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP92:%.*]] = load i32, ptr [[ARRAYIDX13_2_8]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_8:%.*]] = add nsw i32 [[TMP92]], [[TMP91]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_8]], ptr [[ARRAYIDX13_2_8]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_8:%.*]]
+; CHECK-UNROLL: for.body7.3.8:
+; CHECK-UNROLL-NEXT: [[TMP93:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_8:%.*]] = getelementptr inbounds i32, ptr [[TMP93]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP94:%.*]] = load i32, ptr [[ARRAYIDX11_3_8]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_8:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP95:%.*]] = load i32, ptr [[ARRAYIDX13_3_8]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_8:%.*]] = add nsw i32 [[TMP95]], [[TMP94]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_8]], ptr [[ARRAYIDX13_3_8]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_8:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.8:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_9:%.*]]
+; CHECK-UNROLL: for.body.9:
+; CHECK-UNROLL-NEXT: [[CMP1_9:%.*]] = icmp eq i32 9, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_9]], label [[CLEANUP]], label [[IF_END_9:%.*]]
+; CHECK-UNROLL: if.end.9:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 9
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_9:%.*]]
+; CHECK-UNROLL: for.cond4.9:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_9:%.*]]
+; CHECK-UNROLL: for.body7.9:
+; CHECK-UNROLL-NEXT: [[TMP96:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP96]], align 4
+; CHECK-UNROLL-NEXT: [[TMP98:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_9:%.*]] = add nsw i32 [[TMP98]], [[TMP97]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_9]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_9:%.*]]
+; CHECK-UNROLL: for.body7.1.9:
+; CHECK-UNROLL-NEXT: [[TMP99:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_9:%.*]] = getelementptr inbounds i32, ptr [[TMP99]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP100:%.*]] = load i32, ptr [[ARRAYIDX11_1_9]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARRAYIDX13_1_9]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_9:%.*]] = add nsw i32 [[TMP101]], [[TMP100]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_9]], ptr [[ARRAYIDX13_1_9]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_9:%.*]]
+; CHECK-UNROLL: for.body7.2.9:
+; CHECK-UNROLL-NEXT: [[TMP102:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_9:%.*]] = getelementptr inbounds i32, ptr [[TMP102]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP103:%.*]] = load i32, ptr [[ARRAYIDX11_2_9]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP104:%.*]] = load i32, ptr [[ARRAYIDX13_2_9]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_9:%.*]] = add nsw i32 [[TMP104]], [[TMP103]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_9]], ptr [[ARRAYIDX13_2_9]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_9:%.*]]
+; CHECK-UNROLL: for.body7.3.9:
+; CHECK-UNROLL-NEXT: [[TMP105:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_9:%.*]] = getelementptr inbounds i32, ptr [[TMP105]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP106:%.*]] = load i32, ptr [[ARRAYIDX11_3_9]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_9:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP107:%.*]] = load i32, ptr [[ARRAYIDX13_3_9]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_9:%.*]] = add nsw i32 [[TMP107]], [[TMP106]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_9]], ptr [[ARRAYIDX13_3_9]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_9:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.9:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_10:%.*]]
+; CHECK-UNROLL: for.body.10:
+; CHECK-UNROLL-NEXT: [[CMP1_10:%.*]] = icmp eq i32 10, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_10]], label [[CLEANUP]], label [[IF_END_10:%.*]]
+; CHECK-UNROLL: if.end.10:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 10
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_10:%.*]]
+; CHECK-UNROLL: for.cond4.10:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_10:%.*]]
+; CHECK-UNROLL: for.body7.10:
+; CHECK-UNROLL-NEXT: [[TMP108:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP108]], align 4
+; CHECK-UNROLL-NEXT: [[TMP110:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_10:%.*]] = add nsw i32 [[TMP110]], [[TMP109]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_10]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_10:%.*]]
+; CHECK-UNROLL: for.body7.1.10:
+; CHECK-UNROLL-NEXT: [[TMP111:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_10:%.*]] = getelementptr inbounds i32, ptr [[TMP111]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP112:%.*]] = load i32, ptr [[ARRAYIDX11_1_10]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP113:%.*]] = load i32, ptr [[ARRAYIDX13_1_10]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_10:%.*]] = add nsw i32 [[TMP113]], [[TMP112]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_10]], ptr [[ARRAYIDX13_1_10]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_10:%.*]]
+; CHECK-UNROLL: for.body7.2.10:
+; CHECK-UNROLL-NEXT: [[TMP114:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_10:%.*]] = getelementptr inbounds i32, ptr [[TMP114]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP115:%.*]] = load i32, ptr [[ARRAYIDX11_2_10]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP116:%.*]] = load i32, ptr [[ARRAYIDX13_2_10]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_10:%.*]] = add nsw i32 [[TMP116]], [[TMP115]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_10]], ptr [[ARRAYIDX13_2_10]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_10:%.*]]
+; CHECK-UNROLL: for.body7.3.10:
+; CHECK-UNROLL-NEXT: [[TMP117:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_10:%.*]] = getelementptr inbounds i32, ptr [[TMP117]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP118:%.*]] = load i32, ptr [[ARRAYIDX11_3_10]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP119:%.*]] = load i32, ptr [[ARRAYIDX13_3_10]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_10:%.*]] = add nsw i32 [[TMP119]], [[TMP118]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_10]], ptr [[ARRAYIDX13_3_10]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_10:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.10:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_11:%.*]]
+; CHECK-UNROLL: for.body.11:
+; CHECK-UNROLL-NEXT: [[CMP1_11:%.*]] = icmp eq i32 11, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_11]], label [[CLEANUP]], label [[IF_END_11:%.*]]
+; CHECK-UNROLL: if.end.11:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 11
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_11:%.*]]
+; CHECK-UNROLL: for.cond4.11:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_11:%.*]]
+; CHECK-UNROLL: for.body7.11:
+; CHECK-UNROLL-NEXT: [[TMP120:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP120]], align 4
+; CHECK-UNROLL-NEXT: [[TMP122:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_119:%.*]] = add nsw i32 [[TMP122]], [[TMP121]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_119]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_11:%.*]]
+; CHECK-UNROLL: for.body7.1.11:
+; CHECK-UNROLL-NEXT: [[TMP123:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_11:%.*]] = getelementptr inbounds i32, ptr [[TMP123]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP124:%.*]] = load i32, ptr [[ARRAYIDX11_1_11]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP125:%.*]] = load i32, ptr [[ARRAYIDX13_1_11]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_11:%.*]] = add nsw i32 [[TMP125]], [[TMP124]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_11]], ptr [[ARRAYIDX13_1_11]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_11:%.*]]
+; CHECK-UNROLL: for.body7.2.11:
+; CHECK-UNROLL-NEXT: [[TMP126:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_11:%.*]] = getelementptr inbounds i32, ptr [[TMP126]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP127:%.*]] = load i32, ptr [[ARRAYIDX11_2_11]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP128:%.*]] = load i32, ptr [[ARRAYIDX13_2_11]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_11:%.*]] = add nsw i32 [[TMP128]], [[TMP127]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_11]], ptr [[ARRAYIDX13_2_11]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_11:%.*]]
+; CHECK-UNROLL: for.body7.3.11:
+; CHECK-UNROLL-NEXT: [[TMP129:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_11:%.*]] = getelementptr inbounds i32, ptr [[TMP129]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP130:%.*]] = load i32, ptr [[ARRAYIDX11_3_11]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_11:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP131:%.*]] = load i32, ptr [[ARRAYIDX13_3_11]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_11:%.*]] = add nsw i32 [[TMP131]], [[TMP130]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_11]], ptr [[ARRAYIDX13_3_11]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_11:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.11:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_12:%.*]]
+; CHECK-UNROLL: for.body.12:
+; CHECK-UNROLL-NEXT: [[CMP1_12:%.*]] = icmp eq i32 12, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_12]], label [[CLEANUP]], label [[IF_END_12:%.*]]
+; CHECK-UNROLL: if.end.12:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 12
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_12:%.*]]
+; CHECK-UNROLL: for.cond4.12:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1210:%.*]]
+; CHECK-UNROLL: for.body7.1210:
+; CHECK-UNROLL-NEXT: [[TMP132:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT: [[TMP133:%.*]] = load i32, ptr [[TMP132]], align 4
+; CHECK-UNROLL-NEXT: [[TMP134:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_12:%.*]] = add nsw i32 [[TMP134]], [[TMP133]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_12]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_12:%.*]]
+; CHECK-UNROLL: for.body7.1.12:
+; CHECK-UNROLL-NEXT: [[TMP135:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_12:%.*]] = getelementptr inbounds i32, ptr [[TMP135]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP136:%.*]] = load i32, ptr [[ARRAYIDX11_1_12]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP137:%.*]] = load i32, ptr [[ARRAYIDX13_1_12]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_12:%.*]] = add nsw i32 [[TMP137]], [[TMP136]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_12]], ptr [[ARRAYIDX13_1_12]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_12:%.*]]
+; CHECK-UNROLL: for.body7.2.12:
+; CHECK-UNROLL-NEXT: [[TMP138:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_12:%.*]] = getelementptr inbounds i32, ptr [[TMP138]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP139:%.*]] = load i32, ptr [[ARRAYIDX11_2_12]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP140:%.*]] = load i32, ptr [[ARRAYIDX13_2_12]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_12:%.*]] = add nsw i32 [[TMP140]], [[TMP139]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_12]], ptr [[ARRAYIDX13_2_12]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_12:%.*]]
+; CHECK-UNROLL: for.body7.3.12:
+; CHECK-UNROLL-NEXT: [[TMP141:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_12:%.*]] = getelementptr inbounds i32, ptr [[TMP141]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP142:%.*]] = load i32, ptr [[ARRAYIDX11_3_12]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP143:%.*]] = load i32, ptr [[ARRAYIDX13_3_12]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_12:%.*]] = add nsw i32 [[TMP143]], [[TMP142]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_12]], ptr [[ARRAYIDX13_3_12]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_12:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.12:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_13:%.*]]
+; CHECK-UNROLL: for.body.13:
+; CHECK-UNROLL-NEXT: [[CMP1_13:%.*]] = icmp eq i32 13, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_13]], label [[CLEANUP]], label [[IF_END_13:%.*]]
+; CHECK-UNROLL: if.end.13:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 13
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_13:%.*]]
+; CHECK-UNROLL: for.cond4.13:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_13:%.*]]
+; CHECK-UNROLL: for.body7.13:
+; CHECK-UNROLL-NEXT: [[TMP144:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT: [[TMP145:%.*]] = load i32, ptr [[TMP144]], align 4
+; CHECK-UNROLL-NEXT: [[TMP146:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_13:%.*]] = add nsw i32 [[TMP146]], [[TMP145]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_13]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_13:%.*]]
+; CHECK-UNROLL: for.body7.1.13:
+; CHECK-UNROLL-NEXT: [[TMP147:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_13:%.*]] = getelementptr inbounds i32, ptr [[TMP147]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP148:%.*]] = load i32, ptr [[ARRAYIDX11_1_13]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP149:%.*]] = load i32, ptr [[ARRAYIDX13_1_13]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_13:%.*]] = add nsw i32 [[TMP149]], [[TMP148]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_13]], ptr [[ARRAYIDX13_1_13]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_13:%.*]]
+; CHECK-UNROLL: for.body7.2.13:
+; CHECK-UNROLL-NEXT: [[TMP150:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_13:%.*]] = getelementptr inbounds i32, ptr [[TMP150]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX11_2_13]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP152:%.*]] = load i32, ptr [[ARRAYIDX13_2_13]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_13:%.*]] = add nsw i32 [[TMP152]], [[TMP151]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_13]], ptr [[ARRAYIDX13_2_13]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_13:%.*]]
+; CHECK-UNROLL: for.body7.3.13:
+; CHECK-UNROLL-NEXT: [[TMP153:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_13:%.*]] = getelementptr inbounds i32, ptr [[TMP153]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP154:%.*]] = load i32, ptr [[ARRAYIDX11_3_13]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_13:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP155:%.*]] = load i32, ptr [[ARRAYIDX13_3_13]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_13:%.*]] = add nsw i32 [[TMP155]], [[TMP154]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_13]], ptr [[ARRAYIDX13_3_13]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_13:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.13:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_14:%.*]]
+; CHECK-UNROLL: for.body.14:
+; CHECK-UNROLL-NEXT: [[CMP1_14:%.*]] = icmp eq i32 14, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_14]], label [[CLEANUP]], label [[IF_END_14:%.*]]
+; CHECK-UNROLL: if.end.14:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 14
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_14:%.*]]
+; CHECK-UNROLL: for.cond4.14:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_14:%.*]]
+; CHECK-UNROLL: for.body7.14:
+; CHECK-UNROLL-NEXT: [[TMP156:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT: [[TMP157:%.*]] = load i32, ptr [[TMP156]], align 4
+; CHECK-UNROLL-NEXT: [[TMP158:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_14:%.*]] = add nsw i32 [[TMP158]], [[TMP157]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_14]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_14:%.*]]
+; CHECK-UNROLL: for.body7.1.14:
+; CHECK-UNROLL-NEXT: [[TMP159:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_14:%.*]] = getelementptr inbounds i32, ptr [[TMP159]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP160:%.*]] = load i32, ptr [[ARRAYIDX11_1_14]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP161:%.*]] = load i32, ptr [[ARRAYIDX13_1_14]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_14:%.*]] = add nsw i32 [[TMP161]], [[TMP160]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_14]], ptr [[ARRAYIDX13_1_14]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_14:%.*]]
+; CHECK-UNROLL: for.body7.2.14:
+; CHECK-UNROLL-NEXT: [[TMP162:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_14:%.*]] = getelementptr inbounds i32, ptr [[TMP162]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP163:%.*]] = load i32, ptr [[ARRAYIDX11_2_14]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP164:%.*]] = load i32, ptr [[ARRAYIDX13_2_14]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_14:%.*]] = add nsw i32 [[TMP164]], [[TMP163]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_14]], ptr [[ARRAYIDX13_2_14]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_14:%.*]]
+; CHECK-UNROLL: for.body7.3.14:
+; CHECK-UNROLL-NEXT: [[TMP165:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_14:%.*]] = getelementptr inbounds i32, ptr [[TMP165]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP166:%.*]] = load i32, ptr [[ARRAYIDX11_3_14]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP167:%.*]] = load i32, ptr [[ARRAYIDX13_3_14]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_14:%.*]] = add nsw i32 [[TMP167]], [[TMP166]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_14]], ptr [[ARRAYIDX13_3_14]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_14:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.14:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY_15:%.*]]
+; CHECK-UNROLL: for.body.15:
+; CHECK-UNROLL-NEXT: [[CMP1_15:%.*]] = icmp eq i32 15, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_15]], label [[CLEANUP]], label [[IF_END_15:%.*]]
+; CHECK-UNROLL: if.end.15:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 15
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_15:%.*]]
+; CHECK-UNROLL: for.cond4.15:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_15:%.*]]
+; CHECK-UNROLL: for.body7.15:
+; CHECK-UNROLL-NEXT: [[TMP168:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT: [[TMP169:%.*]] = load i32, ptr [[TMP168]], align 4
+; CHECK-UNROLL-NEXT: [[TMP170:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_15:%.*]] = add nsw i32 [[TMP170]], [[TMP169]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_15]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_15:%.*]]
+; CHECK-UNROLL: for.body7.1.15:
+; CHECK-UNROLL-NEXT: [[TMP171:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_15:%.*]] = getelementptr inbounds i32, ptr [[TMP171]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP172:%.*]] = load i32, ptr [[ARRAYIDX11_1_15]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP173:%.*]] = load i32, ptr [[ARRAYIDX13_1_15]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_15:%.*]] = add nsw i32 [[TMP173]], [[TMP172]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_15]], ptr [[ARRAYIDX13_1_15]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_15:%.*]]
+; CHECK-UNROLL: for.body7.2.15:
+; CHECK-UNROLL-NEXT: [[TMP174:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_15:%.*]] = getelementptr inbounds i32, ptr [[TMP174]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP175:%.*]] = load i32, ptr [[ARRAYIDX11_2_15]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP176:%.*]] = load i32, ptr [[ARRAYIDX13_2_15]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_15:%.*]] = add nsw i32 [[TMP176]], [[TMP175]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_15]], ptr [[ARRAYIDX13_2_15]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_15:%.*]]
+; CHECK-UNROLL: for.body7.3.15:
+; CHECK-UNROLL-NEXT: [[TMP177:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_15:%.*]] = getelementptr inbounds i32, ptr [[TMP177]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP178:%.*]] = load i32, ptr [[ARRAYIDX11_3_15]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_15:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP179:%.*]] = load i32, ptr [[ARRAYIDX13_3_15]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_15:%.*]] = add nsw i32 [[TMP179]], [[TMP178]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_15]], ptr [[ARRAYIDX13_3_15]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_15:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.15:
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY_16:%.*]], label [[CLEANUP]]
+; CHECK-UNROLL: for.body.16:
+; CHECK-UNROLL-NEXT: [[CMP1_16:%.*]] = icmp eq i32 16, [[DIMS]]
+; CHECK-UNROLL-NEXT: br i1 [[CMP1_16]], label [[CLEANUP]], label [[IF_END_16:%.*]]
+; CHECK-UNROLL: if.end.16:
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 16
+; CHECK-UNROLL-NEXT: br label [[FOR_COND4_16:%.*]]
+; CHECK-UNROLL: for.cond4.16:
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_16:%.*]]
+; CHECK-UNROLL: for.body7.16:
+; CHECK-UNROLL-NEXT: [[TMP180:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT: [[TMP181:%.*]] = load i32, ptr [[TMP180]], align 4
+; CHECK-UNROLL-NEXT: [[TMP182:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_16:%.*]] = add nsw i32 [[TMP182]], [[TMP181]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_16]], ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1_16:%.*]]
+; CHECK-UNROLL: for.body7.1.16:
+; CHECK-UNROLL-NEXT: [[TMP183:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1_16:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP184:%.*]] = load i32, ptr [[ARRAYIDX11_1_16]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP185:%.*]] = load i32, ptr [[ARRAYIDX13_1_16]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1_16:%.*]] = add nsw i32 [[TMP185]], [[TMP184]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_1_16]], ptr [[ARRAYIDX13_1_16]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2_16:%.*]]
+; CHECK-UNROLL: for.body7.2.16:
+; CHECK-UNROLL-NEXT: [[TMP186:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2_16:%.*]] = getelementptr inbounds i32, ptr [[TMP186]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP187:%.*]] = load i32, ptr [[ARRAYIDX11_2_16]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP188:%.*]] = load i32, ptr [[ARRAYIDX13_2_16]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2_16:%.*]] = add nsw i32 [[TMP188]], [[TMP187]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_2_16]], ptr [[ARRAYIDX13_2_16]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3_16:%.*]]
+; CHECK-UNROLL: for.body7.3.16:
+; CHECK-UNROLL-NEXT: [[TMP189:%.*]] = load ptr, ptr [[ARRAYIDX_16]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3_16:%.*]] = getelementptr inbounds i32, ptr [[TMP189]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP190:%.*]] = load i32, ptr [[ARRAYIDX11_3_16]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3_16:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP191:%.*]] = load i32, ptr [[ARRAYIDX13_3_16]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3_16:%.*]] = add nsw i32 [[TMP191]], [[TMP190]]
+; CHECK-UNROLL-NEXT: store i32 [[ADD14_3_16]], ptr [[ARRAYIDX13_3_16]], align 4
+; CHECK-UNROLL-NEXT: call void @_Z3barv()
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6_16:%.*]]
+; CHECK-UNROLL: for.cond.cleanup6.16:
+; CHECK-UNROLL-NEXT: unreachable
; CHECK-UNROLL: for.body7:
-; CHECK-UNROLL-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-; CHECK-UNROLL-NEXT: [[TMP4:%.*]] = load i32, ptr [[OUT]], align 4
-; CHECK-UNROLL-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+; CHECK-UNROLL-NEXT: [[TMP192:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT: [[TMP193:%.*]] = load i32, ptr [[TMP192]], align 4
+; CHECK-UNROLL-NEXT: [[TMP194:%.*]] = load i32, ptr [[OUT]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP194]], [[TMP193]]
; CHECK-UNROLL-NEXT: store i32 [[ADD14]], ptr [[OUT]], align 4
; CHECK-UNROLL-NEXT: call void @_Z3barv()
; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_1:%.*]]
; CHECK-UNROLL: for.body7.1:
-; CHECK-UNROLL-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
-; CHECK-UNROLL-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4
+; CHECK-UNROLL-NEXT: [[TMP195:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP195]], i64 1
+; CHECK-UNROLL-NEXT: [[TMP196:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4
; CHECK-UNROLL-NEXT: [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
-; CHECK-UNROLL-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4
-; CHECK-UNROLL-NEXT: [[ADD14_1:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+; CHECK-UNROLL-NEXT: [[TMP197:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_1:%.*]] = add nsw i32 [[TMP197]], [[TMP196]]
; CHECK-UNROLL-NEXT: store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4
; CHECK-UNROLL-NEXT: call void @_Z3barv()
; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_2:%.*]]
; CHECK-UNROLL: for.body7.2:
-; CHECK-UNROLL-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 2
-; CHECK-UNROLL-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4
+; CHECK-UNROLL-NEXT: [[TMP198:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP198]], i64 2
+; CHECK-UNROLL-NEXT: [[TMP199:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4
; CHECK-UNROLL-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
-; CHECK-UNROLL-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4
-; CHECK-UNROLL-NEXT: [[ADD14_2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+; CHECK-UNROLL-NEXT: [[TMP200:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_2:%.*]] = add nsw i32 [[TMP200]], [[TMP199]]
; CHECK-UNROLL-NEXT: store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4
; CHECK-UNROLL-NEXT: call void @_Z3barv()
; CHECK-UNROLL-NEXT: br label [[FOR_BODY7_3:%.*]]
; CHECK-UNROLL: for.body7.3:
-; CHECK-UNROLL-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
-; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 3
-; CHECK-UNROLL-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4
+; CHECK-UNROLL-NEXT: [[TMP201:%.*]] = load ptr, ptr [[ARR]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP201]], i64 3
+; CHECK-UNROLL-NEXT: [[TMP202:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4
; CHECK-UNROLL-NEXT: [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
-; CHECK-UNROLL-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4
-; CHECK-UNROLL-NEXT: [[ADD14_3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+; CHECK-UNROLL-NEXT: [[TMP203:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_3:%.*]] = add nsw i32 [[TMP203]], [[TMP202]]
; CHECK-UNROLL-NEXT: store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4
; CHECK-UNROLL-NEXT: call void @_Z3barv()
-; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4:%.*]], label [[FOR_COND_CLEANUP6]]
+; CHECK-UNROLL-NEXT: br i1 false, label [[FOR_BODY7_4]], label [[FOR_COND_CLEANUP6:%.*]]
; CHECK-UNROLL: for.body7.4:
-; CHECK-UNROLL-NEXT: [[ARRAYIDX_LCSSA:%.*]] = phi ptr [ [[ARRAYIDX]], [[FOR_BODY7_3]] ]
-; CHECK-UNROLL-NEXT: [[TMP14:%.*]] = load ptr, ptr [[ARRAYIDX_LCSSA]], align 8
-; CHECK-UNROLL-NEXT: [[ARRAYIDX11_4:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 4
-; CHECK-UNROLL-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX11_4]], align 4
+; CHECK-UNROLL-NEXT: [[ARRAYIDX_LCSSA:%.*]] = phi ptr [ [[ARR]], [[FOR_BODY7_3]] ], [ [[ARRAYIDX_1]], [[FOR_BODY7_3_1]] ], [ [[ARRAYIDX_2]], [[FOR_BODY7_3_2]] ], [ [[ARRAYIDX_3]], [[FOR_BODY7_3_3]] ], [ [[ARRAYIDX_4]], [[FOR_BODY7_3_4]] ], [ [[ARRAYIDX_5]], [[FOR_BODY7_3_5]] ], [ [[ARRAYIDX_6]], [[FOR_BODY7_3_6]] ], [ [[ARRAYIDX_7]], [[FOR_BODY7_3_7]] ], [ [[ARRAYIDX_8]], [[FOR_BODY7_3_8]] ], [ [[ARRAYIDX_9]], [[FOR_BODY7_3_9]] ], [ [[ARRAYIDX_10]], [[FOR_BODY7_3_10]] ], [ [[ARRAYIDX_11]], [[FOR_BODY7_3_11]] ], [ [[ARRAYIDX_12]], [[FOR_BODY7_3_12]] ], [ [[ARRAYIDX_13]], [[FOR_BODY7_3_13]] ], [ [[ARRAYIDX_14]], [[FOR_BODY7_3_14]] ], [ [[ARRAYIDX_15]], [[FOR_BODY7_3_15]] ], [ [[ARRAYIDX_16]], [[FOR_BODY7_3_16]] ]
+; CHECK-UNROLL-NEXT: [[TMP204:%.*]] = load ptr, ptr [[ARRAYIDX_LCSSA]], align 8
+; CHECK-UNROLL-NEXT: [[ARRAYIDX11_4:%.*]] = getelementptr inbounds i32, ptr [[TMP204]], i64 4
+; CHECK-UNROLL-NEXT: [[TMP205:%.*]] = load i32, ptr [[ARRAYIDX11_4]], align 4
; CHECK-UNROLL-NEXT: [[ARRAYIDX13_4:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 4
-; CHECK-UNROLL-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX13_4]], align 4
-; CHECK-UNROLL-NEXT: [[ADD14_4:%.*]] = add nsw i32 [[TMP16]], [[TMP15]]
+; CHECK-UNROLL-NEXT: [[TMP206:%.*]] = load i32, ptr [[ARRAYIDX13_4]], align 4
+; CHECK-UNROLL-NEXT: [[ADD14_4:%.*]] = add nsw i32 [[TMP206]], [[TMP205]]
; CHECK-UNROLL-NEXT: store i32 [[ADD14_4]], ptr [[ARRAYIDX13_4]], align 4
; CHECK-UNROLL-NEXT: call void @_Z3barv()
; CHECK-UNROLL-NEXT: unreachable
@@ -192,7 +951,3 @@ declare void @_Z3barv()
; CHECK-CFG: [[META2]] = !{!"llvm.loop.unroll.enable"}
; CHECK-CFG: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
;.
-; CHECK-UNROLL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK-UNROLL: [[META1]] = !{!"llvm.loop.mustprogress"}
-; CHECK-UNROLL: [[META2]] = !{!"llvm.loop.unroll.enable"}
-;.