aboutsummaryrefslogtreecommitdiffstats
path: root/src/qml/compiler/qv4ssa.cpp
diff options
context:
space:
mode:
authorErik Verbruggen <erik.verbruggen@digia.com>2014-07-23 12:22:11 +0200
committerErik Verbruggen <erik.verbruggen@digia.com>2014-08-18 16:05:55 +0200
commit2ae518d3a4ea994dfb6c120f826ef7c801b70807 (patch)
tree60b14fe12a75619cfef8494789dd72ddb610ff8b /src/qml/compiler/qv4ssa.cpp
parent7e5a589b905969a5712b801cec01be257fbc237c (diff)
V4 IR: Add loop peeling.
By peeling the first iteration off of a loop and putting it in front of the loop, type inference can deduce more type information for esp. loop induction variables. To prevent increasing the code size too much, only the inner-most loops are peeled. This gives a 10% speed-up on crypto.js. Change-Id: I57f9611695bc8defc0bff84e440b8a20b2c8a34e Reviewed-by: Fawzi Mohamed <fawzi.mohamed@digia.com>
Diffstat (limited to 'src/qml/compiler/qv4ssa.cpp')
-rw-r--r--src/qml/compiler/qv4ssa.cpp244
1 files changed, 231 insertions, 13 deletions
diff --git a/src/qml/compiler/qv4ssa.cpp b/src/qml/compiler/qv4ssa.cpp
index 4d1735cffd..b2d23569ff 100644
--- a/src/qml/compiler/qv4ssa.cpp
+++ b/src/qml/compiler/qv4ssa.cpp
@@ -4509,6 +4509,199 @@ private:
std::vector<int> tempForLocal;
};
+class CloneBasicBlock: protected IR::StmtVisitor, protected CloneExpr
+{
+public:
+ BasicBlock *operator()(IR::BasicBlock *originalBlock)
+ {
+ block = new BasicBlock(originalBlock->function, 0);
+
+ foreach (Stmt *s, originalBlock->statements()) {
+ s->accept(this);
+ clonedStmt->location = s->location;
+ }
+
+ return block;
+ }
+
+protected:
+ virtual void visitExp(Exp *stmt)
+ { clonedStmt = block->EXP(clone(stmt->expr)); }
+
+ virtual void visitMove(Move *stmt)
+ { clonedStmt = block->MOVE(clone(stmt->target), clone(stmt->source)); }
+
+ virtual void visitJump(Jump *stmt)
+ { clonedStmt = block->JUMP(stmt->target); }
+
+ virtual void visitCJump(CJump *stmt)
+ { clonedStmt = block->CJUMP(clone(stmt->cond), stmt->iftrue, stmt->iffalse); }
+
+ virtual void visitRet(Ret *stmt)
+ { clonedStmt = block->RET(clone(stmt->expr)); }
+
+ virtual void visitPhi(Phi *stmt)
+ {
+ Phi *phi = block->function->NewStmt<Phi>();
+ clonedStmt = phi;
+
+ phi->targetTemp = clone(stmt->targetTemp);
+ phi->d = new Stmt::Data;
+ foreach (Expr *in, stmt->d->incoming)
+ phi->d->incoming.append(clone(in));
+ block->appendStatement(phi);
+ }
+
+private:
+ IR::Stmt *clonedStmt;
+};
+
+// Loop-peeling is done by unfolding the loop once. The "original" loop basic blocks stay where they
+// are, and a copy of the loop is placed after it. Special care is taken while copying the loop body:
+// by having the copies of the basic-blocks point to the same nodes as the "original" basic blocks,
+// updating the immediate dominators is easy: if the edge of a copied basic-block B points to a
+// block C that has also been copied, set the immediate dominator of B to the corresponding
+// immediate dominator of C. Finally, for any node outside the loop that gets a new edge attached,
+// the immediate dominator has to be re-calculated.
+class LoopPeeling
+{
+ DominatorTree &dt;
+
+public:
+ LoopPeeling(DominatorTree &dt)
+ : dt(dt)
+ {}
+
+ void run(const QVector<LoopDetection::LoopInfo *> &loops)
+ {
+ foreach (LoopDetection::LoopInfo *loopInfo, loops)
+ peelLoop(loopInfo);
+ }
+
+private:
+ // All copies have their outgoing edges pointing to the same successor block as the originals.
+ // For each copied block, check where the outgoing edges point to. If it's a block inside the
+ // (original) loop, rewire it to the corresponding copy. Otherwise, which is when it points
+ // out of the loop, leave it alone.
+ // As an extra, collect all edges that point out of the copied loop, because the targets need
+ // to have their immediate dominator rechecked.
+ void rewire(BasicBlock *newLoopBlock, const QVector<BasicBlock *> &from, const QVector<BasicBlock *> &to, QVector<BasicBlock *> &loopExits)
+ {
+ for (int i = 0, ei = newLoopBlock->out.size(); i != ei; ++i) {
+ BasicBlock *&out = newLoopBlock->out[i];
+ const int idx = from.indexOf(out);
+ if (idx == -1) {
+ if (!loopExits.contains(out))
+ loopExits.append(out);
+ } else {
+ out->in.removeOne(newLoopBlock);
+ BasicBlock *newTo = to.at(idx);
+ newTo->in.append(newLoopBlock);
+ out = newTo;
+
+ Stmt *terminator = newLoopBlock->terminator();
+ if (Jump *jump = terminator->asJump()) {
+ Q_ASSERT(i == 0);
+ jump->target = newTo;
+ } else if (CJump *cjump = terminator->asCJump()) {
+ Q_ASSERT(i == 0 || i == 1);
+ if (i == 0)
+ cjump->iftrue = newTo;
+ else
+ cjump->iffalse = newTo;
+ }
+ }
+ }
+ }
+
+ void peelLoop(LoopDetection::LoopInfo *loop)
+ {
+ CloneBasicBlock clone;
+
+ LoopDetection::LoopInfo unpeeled(*loop);
+ unpeeled.loopHeader = clone(unpeeled.loopHeader);
+ unpeeled.loopHeader->setContainingGroup(loop->loopHeader->containingGroup());
+ unpeeled.loopHeader->markAsGroupStart(true);
+ for (int i = 0, ei = unpeeled.loopBody.size(); i != ei; ++i) {
+ BasicBlock *&bodyBlock = unpeeled.loopBody[i];
+ bodyBlock = clone(bodyBlock);
+ bodyBlock->setContainingGroup(unpeeled.loopHeader);
+ Q_ASSERT(bodyBlock->statementCount() == loop->loopBody[i]->statementCount());
+ }
+
+ // The cloned blocks will have no incoming edges, but they do have outgoing ones (copying
+ // the terminators will automatically insert that edge). The blocks where the originals
+ // pointed to will have an extra incoming edge from the copied blocks.
+
+ foreach (BasicBlock *in, loop->loopHeader->in) {
+ if (unpeeled.loopHeader != in // this can happen for really tight loops (where there are no body blocks). This is a back-edge in that case.
+ && !unpeeled.loopBody.contains(in) // if the edge is not coming from within the copied set, leave it alone
+ && !dt.dominates(loop->loopHeader, in)) // an edge coming from within the loop (so a back-edge): this is handled when rewiring all outgoing edges
+ continue;
+
+ unpeeled.loopHeader->in.append(in);
+ loop->loopHeader->in.removeOne(in);
+
+ Stmt *terminator = in->terminator();
+ if (Jump *jump = terminator->asJump()) {
+ jump->target = unpeeled.loopHeader;
+ in->out[0] = unpeeled.loopHeader;
+ } else if (CJump *cjump = terminator->asCJump()) {
+ if (cjump->iftrue == loop->loopHeader) {
+ cjump->iftrue = unpeeled.loopHeader;
+ Q_ASSERT(in->out[0] == loop->loopHeader);
+ in->out[0] = unpeeled.loopHeader;
+ } else if (cjump->iffalse == loop->loopHeader) {
+ cjump->iffalse = unpeeled.loopHeader;
+ Q_ASSERT(in->out[1] == loop->loopHeader);
+ in->out[1] = unpeeled.loopHeader;
+ } else {
+ Q_UNREACHABLE();
+ }
+ }
+ }
+
+ QVector<BasicBlock *> loopExits;
+ loopExits.reserve(8);
+ loopExits.append(unpeeled.loopHeader);
+
+ IR::Function *f = unpeeled.loopHeader->function;
+ rewire(unpeeled.loopHeader, loop->loopBody, unpeeled.loopBody, loopExits);
+ f->addBasicBlock(unpeeled.loopHeader);
+ for (int i = 0, ei = unpeeled.loopBody.size(); i != ei; ++i) {
+ BasicBlock *bodyBlock = unpeeled.loopBody.at(i);
+ rewire(bodyBlock, loop->loopBody, unpeeled.loopBody, loopExits);
+ f->addBasicBlock(bodyBlock);
+ }
+
+ // The original loop is now peeled off, and won't jump back to the loop header. Meaning, it
+ // is not a loop anymore, so unmark it.
+ loop->loopHeader->markAsGroupStart(false);
+ foreach (BasicBlock *bb, loop->loopBody)
+ bb->setContainingGroup(loop->loopHeader->containingGroup());
+
+ // calculate the idoms in a separate loop, because addBasicBlock in the previous loop will
+ // set the block index, which in turn is used by the dominator tree.
+ for (int i = 0, ei = unpeeled.loopBody.size(); i != ei; ++i) {
+ BasicBlock *bodyBlock = unpeeled.loopBody.at(i);
+ BasicBlock *idom = dt.immediateDominator(loop->loopBody.at(i));
+ const int idx = loop->loopBody.indexOf(idom);
+ if (idom == loop->loopHeader)
+ idom = unpeeled.loopHeader;
+ else if (idx != -1)
+ idom = unpeeled.loopBody.at(idx);
+ Q_ASSERT(idom);
+ dt.setImmediateDominator(bodyBlock, idom);
+ }
+
+ BasicBlockSet siblings(f);
+ foreach (BasicBlock *bb, loopExits)
+ dt.collectSiblings(bb, siblings);
+
+ dt.recalculateIDoms(siblings, loop->loopHeader);
+ }
+};
+
static void verifyCFG(IR::Function *function)
{
if (!DoVerification)
@@ -4893,6 +5086,29 @@ void Optimizer::run(QQmlEnginePrivate *qmlEngine)
// Calculate the dominator tree:
DominatorTree df(function);
+
+ {
+ // This is in a separate scope, because loop-peeling doesn't (yet) update the LoopInfo
+ // calculated by the LoopDetection. So by putting it in a separate scope, it is not
+ // available after peeling.
+
+ LoopDetection loopDetection(df);
+ loopDetection.run(function);
+ showMeTheCode(function);
+// cfg2dot(function, loopDetection.allLoops());
+
+ QVector<LoopDetection::LoopInfo *> innerLoops = loopDetection.innermostLoops();
+ LoopPeeling(df).run(innerLoops);
+
+// cfg2dot(function, loopDetection.allLoops());
+ showMeTheCode(function);
+ if (!innerLoops.isEmpty())
+ verifyImmediateDominators(df, function);
+ }
+
+ verifyCFG(function);
+ verifyNoPointerSharing(function);
+
df.computeDF();
verifyCFG(function);
@@ -4907,13 +5123,13 @@ void Optimizer::run(QQmlEnginePrivate *qmlEngine)
// qout << "Cleaning up phi nodes..." << endl;
cleanupPhis(defUses);
-// showMeTheCode(function);
+ showMeTheCode(function);
StatementWorklist worklist(function);
// qout << "Running type inference..." << endl;
TypeInference(qmlEngine, defUses).run(worklist);
-// showMeTheCode(function);
+ showMeTheCode(function);
// qout << "Doing reverse inference..." << endl;
ReverseInference(defUses).run(function);
@@ -4924,22 +5140,18 @@ void Optimizer::run(QQmlEnginePrivate *qmlEngine)
// showMeTheCode(function);
verifyNoPointerSharing(function);
- // Transform the CFG into edge-split SSA.
-// qout << "Starting edge splitting..." << endl;
- splitCriticalEdges(function, df, worklist, defUses);
-// showMeTheCode(function);
-
static bool doOpt = qgetenv("QV4_NO_OPT").isEmpty();
if (doOpt) {
// qout << "Running SSA optimization..." << endl;
worklist.reset();
optimizeSSA(worklist, defUses, df);
-// showMeTheCode(function);
+ showMeTheCode(function);
+
+ verifyImmediateDominators(df, function);
+ verifyCFG(function);
}
-// qout << "Doing block merging..." << endl;
-// mergeBasicBlocks(function);
-// showMeTheCode(function);
+ verifyNoPointerSharing(function);
// Basic-block cycles that are unreachable (i.e. for loops in a then-part where the
// condition is calculated to be always false) are not yet removed. This will choke the
@@ -4948,13 +5160,19 @@ void Optimizer::run(QQmlEnginePrivate *qmlEngine)
cleanupBasicBlocks(function);
// showMeTheCode(function);
- LoopDetection(df).run(function);
- showMeTheCode(function);
+ // Transform the CFG into edge-split SSA.
+// qout << "Starting edge splitting..." << endl;
+ splitCriticalEdges(function, df, worklist, defUses);
+// showMeTheCode(function);
+
+ verifyImmediateDominators(df, function);
+ verifyCFG(function);
// qout << "Doing block scheduling..." << endl;
// df.dumpImmediateDominators();
startEndLoops = BlockScheduler(function, df).go();
showMeTheCode(function);
+// cfg2dot(function);
#ifndef QT_NO_DEBUG
checkCriticalEdges(function->basicBlocks());