From 1ac728b7601dd5fee2fee7ea714f0626055384df Mon Sep 17 00:00:00 2001 From: Erik Verbruggen Date: Wed, 15 Jan 2014 14:36:37 +0100 Subject: V4: remove unnecessary spills and order them correctly. When doing edge resolving, too many spills were generated, and the dependency tracking of moves was not complete. Now we only insert spills that are caused by phi-nodes (because any other spill would be generated at the point a variable was defined). However, there can still be multiple dependencies between the moves generated by the edge resolving. Instead of only checking the first dependency, all of them are tracked. The bug report was a case where an unneccesary spill was generated, that got tracked, but "suppressed" the other (valid!) dependent move. The randomness was caused by the hash seeding of QHash. Task-number: QTBUG-35840 Change-Id: Ifbc3c8fc13de53c46a8b5859721b2497189921a3 Reviewed-by: Fawzi Mohamed Reviewed-by: Simon Hausmann --- src/qml/compiler/qv4regalloc.cpp | 25 +++++++++++++++---------- src/qml/compiler/qv4ssa.cpp | 22 +++++++++++++--------- src/qml/compiler/qv4ssa_p.h | 5 +++-- 3 files changed, 31 insertions(+), 21 deletions(-) (limited to 'src/qml/compiler') diff --git a/src/qml/compiler/qv4regalloc.cpp b/src/qml/compiler/qv4regalloc.cpp index 93ecdb2602..5597759ee1 100644 --- a/src/qml/compiler/qv4regalloc.cpp +++ b/src/qml/compiler/qv4regalloc.cpp @@ -817,19 +817,15 @@ private: void resolve() { foreach (BasicBlock *bb, _function->basicBlocks) { - foreach (BasicBlock *bbOut, bb->out) { -#ifdef DEBUG_REGALLOC - Optimizer::showMeTheCode(_function); -#endif // DEBUG_REGALLOC - + foreach (BasicBlock *bbOut, bb->out) resolveEdge(bb, bbOut); - } } } void resolveEdge(BasicBlock *predecessor, BasicBlock *successor) { #ifdef DEBUG_REGALLOC + Optimizer::showMeTheCode(_function); qDebug() << "Resolving edge" << predecessor->index << "->" << successor->index; #endif // DEBUG_REGALLOC @@ -849,16 +845,20 @@ private: Q_ASSERT(successorStart > 0); foreach (const LifeTimeInterval &it, _liveAtStart[successor]) { - bool lifeTimeHole = false; if (it.end() < successorStart) continue; + + bool lifeTimeHole = false; + bool isPhiTarget = false; Expr *moveFrom = 0; + if (it.start() == successorStart) { foreach (Stmt *s, successor->statements) { if (!s || s->id < 1) continue; if (Phi *phi = s->asPhi()) { if (*phi->targetTemp == it.temp()) { + isPhiTarget = true; Expr *opd = phi->d->incoming[successor->in.indexOf(predecessor)]; if (opd->asConst()) { moveFrom = opd; @@ -939,12 +939,17 @@ private: Temp *moveTo; if (it.reg() == LifeTimeInterval::Invalid || !it.covers(successorStart)) { - int spillSlot = _assignedSpillSlots.value(it.temp(), -1); + if (!isPhiTarget) // if it.temp() is a phi target, skip it. + continue; + const int spillSlot = _assignedSpillSlots.value(it.temp(), -1); if (spillSlot == -1) continue; // it has a life-time hole here. moveTo = createTemp(Temp::StackSlot, spillSlot, it.temp().type); } else { moveTo = createTemp(Temp::PhysicalRegister, platformRegister(it), it.temp().type); + const int spillSlot = _assignedSpillSlots.value(it.temp(), -1); + if (isPhiTarget && spillSlot != -1) + mapping.add(moveFrom, createTemp(Temp::StackSlot, spillSlot, it.temp().type)); } // add move to mapping @@ -1078,7 +1083,7 @@ void RegisterAllocator::run(Function *function, const Optimizer &opt) { QTextStream qout(stdout, QIODevice::WriteOnly); qout << "Ranges:" << endl; - QList intervals = _unhandled; + QVector intervals = _unhandled; std::sort(intervals.begin(), intervals.end(), LifeTimeInterval::lessThanForTemp); foreach (const LifeTimeInterval &r, intervals) { r.dump(qout); @@ -1585,7 +1590,7 @@ void RegisterAllocator::dump() const { qout << "Ranges:" << endl; - QList handled = _handled; + QVector handled = _handled; std::sort(handled.begin(), handled.end(), LifeTimeInterval::lessThanForTemp); foreach (const LifeTimeInterval &r, handled) { r.dump(qout); diff --git a/src/qml/compiler/qv4ssa.cpp b/src/qml/compiler/qv4ssa.cpp index 31a1e4cdc4..a668375da0 100644 --- a/src/qml/compiler/qv4ssa.cpp +++ b/src/qml/compiler/qv4ssa.cpp @@ -3837,15 +3837,20 @@ static inline bool overlappingStorage(const Temp &t1, const Temp &t2) || (t1.type != DoubleType && t2.type != DoubleType); } -int MoveMapping::isUsedAsSource(Expr *e) const +MoveMapping::Moves MoveMapping::sourceUsages(Expr *e, const Moves &moves) { - if (Temp *t = e->asTemp()) - for (int i = 0, ei = _moves.size(); i != ei; ++i) - if (Temp *from = _moves[i].from->asTemp()) + Moves usages; + + if (Temp *t = e->asTemp()) { + for (int i = 0, ei = moves.size(); i != ei; ++i) { + const Move &move = moves[i]; + if (Temp *from = move.from->asTemp()) if (overlappingStorage(*from, *t)) - return i; + usages.append(move); + } + } - return -1; + return usages; } void MoveMapping::add(Expr *from, Temp *to, int id) { @@ -3924,9 +3929,8 @@ void MoveMapping::dump() const MoveMapping::Action MoveMapping::schedule(const Move &m, QList &todo, QList &delayed, QList &output, QList &swaps) const { - int useIdx = isUsedAsSource(m.to); - if (useIdx != -1) { - const Move &dependency = _moves[useIdx]; + Moves usages = sourceUsages(m.to, todo) + sourceUsages(m.to, delayed); + foreach (const Move &dependency, usages) { if (!output.contains(dependency)) { if (delayed.contains(dependency)) { // We have a cycle! Break it by swapping instead of assigning. diff --git a/src/qml/compiler/qv4ssa_p.h b/src/qml/compiler/qv4ssa_p.h index dcbc83ae65..2c61a2fe1a 100644 --- a/src/qml/compiler/qv4ssa_p.h +++ b/src/qml/compiler/qv4ssa_p.h @@ -165,10 +165,11 @@ class MoveMapping bool operator==(const Move &other) const { return from == other.from && to == other.to; } }; + typedef QList Moves; - QList _moves; + Moves _moves; - int isUsedAsSource(Expr *e) const; + static Moves sourceUsages(Expr *e, const Moves &moves); public: void add(Expr *from, Temp *to, int id = 0); -- cgit v1.2.3 From 6f0f73e63182afc92cb1b8255b114fb8f8232f22 Mon Sep 17 00:00:00 2001 From: Erik Verbruggen Date: Wed, 15 Jan 2014 10:46:08 +0100 Subject: V4 IR: update immediate dominators when purging unreachable basic-blocks The basic block scheduling uses this information to place loops. When the immediate dominator information is invalid, the scheduling can be sub-optimal, or will sometimes forget to schedule some blocks. Change-Id: Iaeb45f2b757b676310be25a658ceadc07d5722ec Reviewed-by: Simon Hausmann --- src/qml/compiler/qv4ssa.cpp | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'src/qml/compiler') diff --git a/src/qml/compiler/qv4ssa.cpp b/src/qml/compiler/qv4ssa.cpp index a668375da0..c2889a2b9a 100644 --- a/src/qml/compiler/qv4ssa.cpp +++ b/src/qml/compiler/qv4ssa.cpp @@ -695,10 +695,6 @@ public: computeDF(); } -// QSet operator[](BasicBlock *n) const { -// return DF[n->index]; -// } - const BasicBlockSet &dominatorFrontier(BasicBlock *n) const { return DF[n->index]; } @@ -707,6 +703,11 @@ public: return nodes[idom[bb->index]]; } + void updateImmediateDominator(BasicBlock *bb, BasicBlock *newDominator) + { + idom[bb->index] = newDominator->index; + } + bool dominates(BasicBlock *dominator, BasicBlock *dominated) const { // The index of the basic blocks might have changed, or the nodes array might have changed, // or the block got deleted, so get the index from our copy of the array. @@ -2895,7 +2896,8 @@ namespace { /// and removes unreachable staements from the worklist, so that optimiseSSA won't consider them /// anymore. /// Important: this assumes that there are no critical edges in the control-flow graph! -void purgeBB(BasicBlock *bb, Function *func, DefUsesCalculator &defUses, QVector &W) +void purgeBB(BasicBlock *bb, Function *func, DefUsesCalculator &defUses, QVector &W, + DominatorTree &df) { // TODO: change this to mark the block as deleted, but leave it alone so that other references // won't be dangling pointers. @@ -2945,11 +2947,15 @@ void purgeBB(BasicBlock *bb, Function *func, DefUsesCalculator &defUses, QVector } } - // if a successor has no incoming edges after unlinking the current basic block, then - // it is unreachable, and can be purged too - if (out->in.isEmpty()) + if (out->in.isEmpty()) { + // if a successor has no incoming edges after unlinking the current basic block, then + // it is unreachable, and can be purged too toPurge.append(out); - + } else if (out->in.size() == 1) { + // if the successor now has only one incoming edge, we that edge is the new + // immediate dominator + df.updateImmediateDominator(out, out->in.first()); + } } // unlink all defs/uses from the statements in the basic block @@ -3032,7 +3038,7 @@ bool tryOptimizingComparison(Expr *&expr) } } // anonymous namespace -void optimizeSSA(Function *function, DefUsesCalculator &defUses) +void optimizeSSA(Function *function, DefUsesCalculator &defUses, DominatorTree &df) { const bool variablesCanEscape = function->variablesCanEscape(); @@ -3298,10 +3304,10 @@ void optimizeSSA(Function *function, DefUsesCalculator &defUses) Jump *jump = function->New(); if (convertToValue(constantCondition).toBoolean()) { jump->target = cjump->iftrue; - purgeBB(cjump->iffalse, function, defUses, W); + purgeBB(cjump->iffalse, function, defUses, W, df); } else { jump->target = cjump->iffalse; - purgeBB(cjump->iftrue, function, defUses, W); + purgeBB(cjump->iftrue, function, defUses, W, df); } *ref[s] = jump; @@ -3681,7 +3687,7 @@ void Optimizer::run(QQmlEnginePrivate *qmlEngine) static bool doOpt = qgetenv("QV4_NO_OPT").isEmpty(); if (doOpt) { // qout << "Running SSA optimization..." << endl; - optimizeSSA(function, defUses); + optimizeSSA(function, defUses, df); // showMeTheCode(function); } -- cgit v1.2.3 From a03a6499ab64da2003d2d8a4691ea89606af1f8b Mon Sep 17 00:00:00 2001 From: Erik Verbruggen Date: Thu, 16 Jan 2014 14:55:41 +0100 Subject: V4: lower memory allocator pressure. Changes to datastructures and more re-using of locally used temporary vectors. For the test regress-74474-002.js this lowers the total allocated memory from 1.98GB to 158MB. Thse peak memory usage stays at 75MB. There is no functional change. This should give a modest performance improvement which mainly depends on the speed of malloc()/free(). Change-Id: I1877c1903e59a33ee79ff2b801ef6f2c1cee30a6 Reviewed-by: Simon Hausmann --- src/qml/compiler/qv4regalloc.cpp | 141 +++++++++++++++++++++------------------ src/qml/compiler/qv4ssa.cpp | 30 +++++---- src/qml/compiler/qv4ssa_p.h | 1 + 3 files changed, 95 insertions(+), 77 deletions(-) (limited to 'src/qml/compiler') diff --git a/src/qml/compiler/qv4regalloc.cpp b/src/qml/compiler/qv4regalloc.cpp index 5597759ee1..df1687f267 100644 --- a/src/qml/compiler/qv4regalloc.cpp +++ b/src/qml/compiler/qv4regalloc.cpp @@ -124,7 +124,7 @@ public: return _defs[t].isPhiTarget; } - QList calls() const { return _calls; } + const QList &calls() const { return _calls; } QList hints(const Temp &t) const { return _hints[t]; } void addHint(const Temp &t, int physicalRegister) { @@ -658,13 +658,14 @@ using namespace QT_PREPEND_NAMESPACE(QQmlJS); namespace { class ResolutionPhase: protected StmtVisitor, protected ExprVisitor { - QVector _intervals; + const QVector &_intervals; + QVector _unprocessed; Function *_function; #if !defined(QT_NO_DEBUG) RegAllocInfo *_info; #endif const QHash &_assignedSpillSlots; - QHash _intervalForTemp; + QHash _intervalForTemp; const QVector &_intRegs; const QVector &_fpRegs; @@ -672,8 +673,8 @@ class ResolutionPhase: protected StmtVisitor, protected ExprVisitor { QVector _loads; QVector _stores; - QHash > _liveAtStart; - QHash > _liveAtEnd; + QHash > _liveAtStart; + QHash > _liveAtEnd; public: ResolutionPhase(const QVector &intervals, Function *function, RegAllocInfo *info, @@ -691,6 +692,13 @@ public: #if defined(QT_NO_DEBUG) Q_UNUSED(info) #endif + + _unprocessed.resize(_intervals.size()); + for (int i = 0, ei = _intervals.size(); i != ei; ++i) + _unprocessed[i] = &_intervals[i]; + + _liveAtStart.reserve(function->basicBlocks.size()); + _liveAtEnd.reserve(function->basicBlocks.size()); } void run() { @@ -704,6 +712,7 @@ private: { foreach (BasicBlock *bb, _function->basicBlocks) { QVector newStatements; + newStatements.reserve(bb->statements.size() + 7); bool seenFirstNonPhiStmt = false; for (int i = 0, ei = bb->statements.size(); i != ei; ++i) { @@ -754,22 +763,22 @@ private: } - void activate(const LifeTimeInterval &i) + void activate(const LifeTimeInterval *i) { - Q_ASSERT(!i.isFixedInterval()); - _intervalForTemp[i.temp()] = i; + Q_ASSERT(!i->isFixedInterval()); + _intervalForTemp[i->temp()] = i; - if (i.reg() != LifeTimeInterval::Invalid) { + if (i->reg() != LifeTimeInterval::Invalid) { // check if we need to generate spill/unspill instructions - if (i.start() == _currentStmt->id) { - if (i.isSplitFromInterval()) { - int pReg = platformRegister(i); - _loads.append(generateUnspill(i.temp(), pReg)); + if (i->start() == _currentStmt->id) { + if (i->isSplitFromInterval()) { + int pReg = platformRegister(*i); + _loads.append(generateUnspill(i->temp(), pReg)); } else { - int pReg = platformRegister(i); - int spillSlot = _assignedSpillSlots.value(i.temp(), -1); + int pReg = platformRegister(*i); + int spillSlot = _assignedSpillSlots.value(i->temp(), -1); if (spillSlot != -1) - _stores.append(generateSpill(spillSlot, i.temp().type, pReg)); + _stores.append(generateSpill(spillSlot, i->temp().type, pReg)); } } } @@ -779,37 +788,37 @@ private: { if (Phi *phi = _currentStmt->asPhi()) { // for phi nodes, only activate the range belonging to that node - for (int it = 0, eit = _intervals.size(); it != eit; ++it) { - const LifeTimeInterval &i = _intervals.at(it); - if (i.start() > _currentStmt->id) + for (int it = 0, eit = _unprocessed.size(); it != eit; ++it) { + const LifeTimeInterval *i = _unprocessed.at(it); + if (i->start() > _currentStmt->id) break; - if (i.temp() == *phi->targetTemp) { + if (i->temp() == *phi->targetTemp) { activate(i); - _intervals.remove(it); + _unprocessed.remove(it); break; } } return; } - while (!_intervals.isEmpty()) { - const LifeTimeInterval &i = _intervals.first(); - if (i.start() > _currentStmt->id) + while (!_unprocessed.isEmpty()) { + const LifeTimeInterval *i = _unprocessed.first(); + if (i->start() > _currentStmt->id) break; activate(i); - _intervals.removeFirst(); + _unprocessed.removeFirst(); } } void cleanOldIntervals() { const int id = _currentStmt->id; - QMutableHashIterator it(_intervalForTemp); + QMutableHashIterator it(_intervalForTemp); while (it.hasNext()) { - const LifeTimeInterval &i = it.next().value(); - if (i.end() < id || i.isFixedInterval()) + const LifeTimeInterval *i = it.next().value(); + if (i->end() < id || i->isFixedInterval()) it.remove(); } } @@ -844,20 +853,20 @@ private: Q_ASSERT(successorStart > 0); - foreach (const LifeTimeInterval &it, _liveAtStart[successor]) { - if (it.end() < successorStart) + foreach (const LifeTimeInterval *it, _liveAtStart[successor]) { + if (it->end() < successorStart) continue; bool lifeTimeHole = false; bool isPhiTarget = false; Expr *moveFrom = 0; - if (it.start() == successorStart) { + if (it->start() == successorStart) { foreach (Stmt *s, successor->statements) { if (!s || s->id < 1) continue; if (Phi *phi = s->asPhi()) { - if (*phi->targetTemp == it.temp()) { + if (*phi->targetTemp == it->temp()) { isPhiTarget = true; Expr *opd = phi->d->incoming[successor->in.indexOf(predecessor)]; if (opd->asConst()) { @@ -866,12 +875,12 @@ private: Temp *t = opd->asTemp(); Q_ASSERT(t); - foreach (const LifeTimeInterval &it2, _liveAtEnd[predecessor]) { - if (it2.temp() == *t - && it2.reg() != LifeTimeInterval::Invalid - && it2.covers(predecessorEnd)) { + foreach (const LifeTimeInterval *it2, _liveAtEnd[predecessor]) { + if (it2->temp() == *t + && it2->reg() != LifeTimeInterval::Invalid + && it2->covers(predecessorEnd)) { moveFrom = createTemp(Temp::PhysicalRegister, - platformRegister(it2), t->type); + platformRegister(*it2), t->type); break; } } @@ -886,18 +895,18 @@ private: } } } else { - foreach (const LifeTimeInterval &predIt, _liveAtEnd[predecessor]) { - if (predIt.temp() == it.temp()) { - if (predIt.reg() != LifeTimeInterval::Invalid - && predIt.covers(predecessorEnd)) { - moveFrom = createTemp(Temp::PhysicalRegister, platformRegister(predIt), - predIt.temp().type); + foreach (const LifeTimeInterval *predIt, _liveAtEnd[predecessor]) { + if (predIt->temp() == it->temp()) { + if (predIt->reg() != LifeTimeInterval::Invalid + && predIt->covers(predecessorEnd)) { + moveFrom = createTemp(Temp::PhysicalRegister, platformRegister(*predIt), + predIt->temp().type); } else { - int spillSlot = _assignedSpillSlots.value(predIt.temp(), -1); + int spillSlot = _assignedSpillSlots.value(predIt->temp(), -1); if (spillSlot == -1) lifeTimeHole = true; else - moveFrom = createTemp(Temp::StackSlot, spillSlot, predIt.temp().type); + moveFrom = createTemp(Temp::StackSlot, spillSlot, predIt->temp().type); } break; } @@ -906,20 +915,20 @@ private: if (!moveFrom) { Q_UNUSED(lifeTimeHole); #if !defined(QT_NO_DEBUG) - Q_ASSERT(!_info->isPhiTarget(it.temp()) || it.isSplitFromInterval() || lifeTimeHole); - if (_info->def(it.temp()) != successorStart && !it.isSplitFromInterval()) { + Q_ASSERT(!_info->isPhiTarget(it->temp()) || it->isSplitFromInterval() || lifeTimeHole); + if (_info->def(it->temp()) != successorStart && !it->isSplitFromInterval()) { const int successorEnd = successor->statements.last()->id; const int idx = successor->in.indexOf(predecessor); - foreach (const Use &use, _info->uses(it.temp())) { + foreach (const Use &use, _info->uses(it->temp())) { if (use.pos == static_cast(successorStart)) { // only check the current edge, not all other possible ones. This is // important for phi nodes: they have uses that are only valid when // coming in over a specific edge. foreach (Stmt *s, successor->statements) { if (Phi *phi = s->asPhi()) { - Q_ASSERT(it.temp().index != phi->targetTemp->index); + Q_ASSERT(it->temp().index != phi->targetTemp->index); Q_ASSERT(phi->d->incoming[idx]->asTemp() == 0 - || it.temp().index != phi->d->incoming[idx]->asTemp()->index); + || it->temp().index != phi->d->incoming[idx]->asTemp()->index); } else { // TODO: check that the first non-phi statement does not use // the temp. @@ -938,18 +947,18 @@ private: } Temp *moveTo; - if (it.reg() == LifeTimeInterval::Invalid || !it.covers(successorStart)) { - if (!isPhiTarget) // if it.temp() is a phi target, skip it. + if (it->reg() == LifeTimeInterval::Invalid || !it->covers(successorStart)) { + if (!isPhiTarget) // if it->temp() is a phi target, skip it. continue; - const int spillSlot = _assignedSpillSlots.value(it.temp(), -1); + const int spillSlot = _assignedSpillSlots.value(it->temp(), -1); if (spillSlot == -1) continue; // it has a life-time hole here. - moveTo = createTemp(Temp::StackSlot, spillSlot, it.temp().type); + moveTo = createTemp(Temp::StackSlot, spillSlot, it->temp().type); } else { - moveTo = createTemp(Temp::PhysicalRegister, platformRegister(it), it.temp().type); - const int spillSlot = _assignedSpillSlots.value(it.temp(), -1); + moveTo = createTemp(Temp::PhysicalRegister, platformRegister(*it), it->temp().type); + const int spillSlot = _assignedSpillSlots.value(it->temp(), -1); if (isPhiTarget && spillSlot != -1) - mapping.add(moveFrom, createTemp(Temp::StackSlot, spillSlot, it.temp().type)); + mapping.add(moveFrom, createTemp(Temp::StackSlot, spillSlot, it->temp().type)); } // add move to mapping @@ -1010,10 +1019,10 @@ protected: if (t->kind != Temp::VirtualRegister) return; - const LifeTimeInterval &i = _intervalForTemp[*t]; - Q_ASSERT(i.isValid()); - if (i.reg() != LifeTimeInterval::Invalid && i.covers(_currentStmt->id)) { - int pReg = platformRegister(i); + const LifeTimeInterval *i = _intervalForTemp[*t]; + Q_ASSERT(i->isValid()); + if (i->reg() != LifeTimeInterval::Invalid && i->covers(_currentStmt->id)) { + int pReg = platformRegister(*i); t->kind = Temp::PhysicalRegister; t->index = pReg; } else { @@ -1119,7 +1128,7 @@ void RegisterAllocator::run(Function *function, const Optimizer &opt) #endif // DEBUG_REGALLOC } -static inline LifeTimeInterval createFixedInterval(int reg, bool isFP) +static inline LifeTimeInterval createFixedInterval(int reg, bool isFP, int rangeCount) { Temp t; t.init(Temp::PhysicalRegister, reg, 0); @@ -1128,6 +1137,7 @@ static inline LifeTimeInterval createFixedInterval(int reg, bool isFP) i.setTemp(t); i.setReg(reg); i.setFixedInterval(true); + i.reserveRanges(rangeCount); return i; } @@ -1136,12 +1146,13 @@ void RegisterAllocator::prepareRanges() const int regCount = _normalRegisters.size(); _fixedRegisterRanges.resize(regCount); for (int reg = 0; reg < regCount; ++reg) - _fixedRegisterRanges[reg] = createFixedInterval(reg, false); + _fixedRegisterRanges[reg] = createFixedInterval(reg, false, _info->calls().size()); const int fpRegCount = _fpRegisters.size(); _fixedFPRegisterRanges.resize(fpRegCount); - for (int fpReg = 0; fpReg < fpRegCount; ++fpReg) - _fixedFPRegisterRanges[fpReg] = createFixedInterval(fpReg, true); + for (int fpReg = 0; fpReg < fpRegCount; ++fpReg) { + _fixedFPRegisterRanges[fpReg] = createFixedInterval(fpReg, true, _info->calls().size()); + } foreach (int callPosition, _info->calls()) { for (int reg = 0; reg < regCount; ++reg) diff --git a/src/qml/compiler/qv4ssa.cpp b/src/qml/compiler/qv4ssa.cpp index c2889a2b9a..f14205519d 100644 --- a/src/qml/compiler/qv4ssa.cpp +++ b/src/qml/compiler/qv4ssa.cpp @@ -308,9 +308,9 @@ public: BasicBlock *operator*() const { if (set.blockNumbers) - return set.allBlocks[*numberIt]; + return set.allBlocks.at(*numberIt); else - return set.allBlocks[flagIt]; + return set.allBlocks.at(flagIt); } bool operator==(const const_iterator &other) const @@ -472,9 +472,8 @@ class DominatorTree { #endif // SHOW_SSA } - BasicBlockIndex ancestorWithLowestSemi(BasicBlockIndex v) { - std::vector worklist; - worklist.reserve(vertex.capacity() / 2); + BasicBlockIndex ancestorWithLowestSemi(BasicBlockIndex v, std::vector &worklist) { + worklist.clear(); for (BasicBlockIndex it = v; it != InvalidBasicBlockIndex; it = ancestor[it]) worklist.push_back(it); @@ -517,6 +516,9 @@ class DominatorTree { DFS(nodes.first()->index); Q_ASSERT(N == nodes.size()); // fails with unreachable nodes, but those should have been removed before. + std::vector worklist; + worklist.reserve(vertex.capacity() / 2); + for (int i = N - 1; i > 0; --i) { BasicBlockIndex n = vertex[i]; BasicBlockIndex p = parent[n]; @@ -527,7 +529,7 @@ class DominatorTree { if (dfnum[v->index] <= dfnum[n]) ss = v->index; else - ss = semi[ancestorWithLowestSemi(v->index)]; + ss = semi[ancestorWithLowestSemi(v->index, worklist)]; if (dfnum[ss] < dfnum[s]) s = ss; } @@ -536,7 +538,7 @@ class DominatorTree { link(p, n); if (bucket.contains(p)) { foreach (BasicBlockIndex v, bucket[p]) { - BasicBlockIndex y = ancestorWithLowestSemi(v); + BasicBlockIndex y = ancestorWithLowestSemi(v, worklist); BasicBlockIndex semi_v = semi[v]; if (semi[y] == semi_v) idom[v] = semi_v; @@ -602,7 +604,7 @@ class DominatorTree { std::vector worklist; worklist.reserve(nodes.size() * 2); for (int i = 0, ei = nodes.size(); i != ei; ++i) { - BasicBlockIndex nodeIndex = nodes[i]->index; + BasicBlockIndex nodeIndex = nodes.at(i)->index; worklist.push_back(nodeIndex); NodeProgress &np = nodeStatus[nodeIndex]; np.children = children[nodeIndex]; @@ -633,7 +635,7 @@ class DominatorTree { if (np.todo.empty()) { BasicBlockSet &S = DF[node]; S.init(nodes); - foreach (BasicBlock *y, nodes[node]->out) + foreach (BasicBlock *y, nodes.at(node)->out) if (idom[y->index] != node) S.insert(y); foreach (BasicBlockIndex child, np.children) { @@ -755,6 +757,8 @@ public: VariableCollector(Function *function) : variablesCanEscape(function->variablesCanEscape()) { + _defsites.reserve(function->tempCount); + #if defined(SHOW_SSA) qout << "Variables collected:" << endl; #endif // SHOW_SSA @@ -1019,6 +1023,9 @@ public: , tempCount(0) , processed(f->basicBlocks) { + localMapping.reserve(f->tempCount); + vregMapping.reserve(f->tempCount); + todo.reserve(f->basicBlocks.size()); } void run() { @@ -2466,9 +2473,8 @@ protected: void splitCriticalEdges(Function *f) { - const QVector oldBBs = f->basicBlocks; - - foreach (BasicBlock *bb, oldBBs) { + for (int i = 0, ei = f->basicBlocks.size(); i != ei; ++i) { + BasicBlock *bb = f->basicBlocks[i]; if (bb->in.size() > 1) { for (int inIdx = 0, eInIdx = bb->in.size(); inIdx != eInIdx; ++inIdx) { BasicBlock *inBB = bb->in[inIdx]; diff --git a/src/qml/compiler/qv4ssa_p.h b/src/qml/compiler/qv4ssa_p.h index 2c61a2fe1a..f90fc5b05b 100644 --- a/src/qml/compiler/qv4ssa_p.h +++ b/src/qml/compiler/qv4ssa_p.h @@ -93,6 +93,7 @@ public: void setFrom(Stmt *from); void addRange(int from, int to); Ranges ranges() const { return _ranges; } + void reserveRanges(int capacity) { _ranges.reserve(capacity); } int start() const { return _ranges.first().start; } int end() const { return _end; } -- cgit v1.2.3 From 73f4fdbef816ff623142bee6c235f06c4bdf58d3 Mon Sep 17 00:00:00 2001 From: Erik Verbruggen Date: Wed, 15 Jan 2014 10:45:28 +0100 Subject: V4 IR: do edge splitting after SSA transformation This reduces the work for the dominator tree/frontier calculations, because there are less blocks to consider. All blocks inserted by splitting the critical edges, have (by definition) no effect on the dominator calculations. However, the immediate dominators for all new blocks needs to be added, because this information is used by the block scheduling. This change reduces memory/time usage during optimization passes, especially when processing excessively big switch statements. Change-Id: Ia69882e9dabdddffa1c98b1079012d8d988e1e8f Reviewed-by: Simon Hausmann --- src/qml/compiler/qv4ssa.cpp | 66 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 8 deletions(-) (limited to 'src/qml/compiler') diff --git a/src/qml/compiler/qv4ssa.cpp b/src/qml/compiler/qv4ssa.cpp index f14205519d..7113dc7c26 100644 --- a/src/qml/compiler/qv4ssa.cpp +++ b/src/qml/compiler/qv4ssa.cpp @@ -409,7 +409,7 @@ class DominatorTree { typedef int BasicBlockIndex; enum { InvalidBasicBlockIndex = -1 }; - const QVector nodes; + QVector nodes; int N; std::vector dfnum; // BasicBlock index -> dfnum std::vector vertex; @@ -705,19 +705,57 @@ public: return nodes[idom[bb->index]]; } + void dumpImmediateDominators() const + { + qDebug() << "Immediate dominators for" << idom.size() << "nodes:"; + for (size_t i = 0, ei = idom.size(); i != ei; ++i) + if (idom[i] == InvalidBasicBlockIndex) + qDebug("\tnone -> L%d", int(i)); + else + qDebug("\tL%d -> L%d", idom[i], int(i)); + } + void updateImmediateDominator(BasicBlock *bb, BasicBlock *newDominator) { - idom[bb->index] = newDominator->index; + Q_ASSERT(bb->index >= 0); + + int blockIndex; + if (static_cast::size_type>(bb->index) >= idom.size()) { + // This is a new block, probably introduced by edge splitting. So, we'll have to grow + // the array before inserting the immediate dominator. + nodes.append(bb); + idom.resize(nodes.size(), InvalidBasicBlockIndex); + blockIndex = nodes.size() - 1; + } else { + blockIndex = getBlockIndex(bb); + } + + idom[blockIndex] = getBlockIndex(newDominator); } bool dominates(BasicBlock *dominator, BasicBlock *dominated) const { // The index of the basic blocks might have changed, or the nodes array might have changed, - // or the block got deleted, so get the index from our copy of the array. - return dominates(nodes.indexOf(dominator), nodes.indexOf(dominated)); + // so get the index from our copy of the array. + return dominates(getBlockIndex(dominator), getBlockIndex(dominated)); } private: + int getBlockIndex(BasicBlock *bb) const { + if (!bb) + return InvalidBasicBlockIndex; + + if (bb->index >= 0 && bb->index < nodes.size()) { + if (nodes.at(bb->index) == bb) + return bb->index; + } + + return nodes.indexOf(bb); + } + bool dominates(BasicBlockIndex dominator, BasicBlockIndex dominated) const { + // dominator can be Invalid when the dominated block has no dominator (i.e. the start node) + Q_ASSERT(dominated != InvalidBasicBlockIndex); + for (BasicBlockIndex it = dominated; it != InvalidBasicBlockIndex; it = idom[it]) { if (it == dominator) return true; @@ -2471,7 +2509,7 @@ protected: } }; -void splitCriticalEdges(Function *f) +void splitCriticalEdges(Function *f, DominatorTree &df) { for (int i = 0, ei = f->basicBlocks.size(); i != ei; ++i) { BasicBlock *bb = f->basicBlocks[i]; @@ -2518,6 +2556,9 @@ void splitCriticalEdges(Function *f) } else { Q_ASSERT(!"Unknown terminator!"); } + + // Set the immediate dominator of the new block to inBB + df.updateImmediateDominator(newBB, inBB); } } } @@ -2667,6 +2708,12 @@ public: showMeTheCode(function); schedule(function->basicBlocks.first()); +#if defined(SHOW_SSA) + qDebug() << "Block sequence:"; + foreach (BasicBlock *bb, sequence) + qDebug("\tL%d", bb->index); +#endif // SHOW_SSA + Q_ASSERT(function->basicBlocks.size() == sequence.size()); function->basicBlocks = sequence; return loopsStartEnd; @@ -3660,9 +3707,6 @@ void Optimizer::run(QQmlEnginePrivate *qmlEngine) if (!function->hasTry && !function->hasWith && !function->module->debugMode && doSSA) { // qout << "SSA for " << (function->name ? qPrintable(*function->name) : "") << endl; -// qout << "Starting edge splitting..." << endl; - splitCriticalEdges(function); -// showMeTheCode(function); // Calculate the dominator tree: DominatorTree df(function->basicBlocks); @@ -3690,6 +3734,11 @@ void Optimizer::run(QQmlEnginePrivate *qmlEngine) TypePropagation(defUses).run(function); // showMeTheCode(function); + // Transform the CFG into edge-split SSA. +// qout << "Starting edge splitting..." << endl; + splitCriticalEdges(function, df); +// showMeTheCode(function); + static bool doOpt = qgetenv("QV4_NO_OPT").isEmpty(); if (doOpt) { // qout << "Running SSA optimization..." << endl; @@ -3709,6 +3758,7 @@ void Optimizer::run(QQmlEnginePrivate *qmlEngine) // showMeTheCode(function); // qout << "Doing block scheduling..." << endl; +// df.dumpImmediateDominators(); startEndLoops = BlockScheduler(function, df).go(); // showMeTheCode(function); -- cgit v1.2.3 From 6ccb9f8f04ea257520e518b25999907c6a8421e1 Mon Sep 17 00:00:00 2001 From: Erik Verbruggen Date: Thu, 16 Jan 2014 15:39:00 +0100 Subject: V4: relieve more memory allocator pressure. For _ZN13BenchmarkDemo11initPhysicsEv from the Octane testsuite, the total allocated memory drops from 1.5GB to 51MB. Peak memory usage stays at 29MB. Again, slow implementations of malloc()/free() will see a performance improvement. Change-Id: I21bc2f0d3735de0980fc9b3745906016e2e48a61 Reviewed-by: Simon Hausmann --- src/qml/compiler/qv4regalloc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/qml/compiler') diff --git a/src/qml/compiler/qv4regalloc.cpp b/src/qml/compiler/qv4regalloc.cpp index df1687f267..3521d0c27a 100644 --- a/src/qml/compiler/qv4regalloc.cpp +++ b/src/qml/compiler/qv4regalloc.cpp @@ -1464,9 +1464,9 @@ int RegisterAllocator::nextIntersection(const LifeTimeInterval ¤t, return -1; for (int currentEnd = currentRanges.size(); currentIt < currentEnd; ++currentIt) { - const LifeTimeInterval::Range ¤tRange = currentRanges[currentIt]; + const LifeTimeInterval::Range currentRange = currentRanges.at(currentIt); for (int anotherIt = anotherItStart, anotherEnd = anotherRanges.size(); anotherIt < anotherEnd; ++anotherIt) { - const LifeTimeInterval::Range &anotherRange = anotherRanges[anotherIt]; + const LifeTimeInterval::Range anotherRange = anotherRanges.at(anotherIt); if (anotherRange.start > currentRange.end) break; int intersectPos = intersectionPosition(currentRange, anotherRange); -- cgit v1.2.3