From 62c4e288a11769bde45c9c74d731ed8628303f19 Mon Sep 17 00:00:00 2001 From: Mitch Curtis Date: Thu, 9 Jan 2014 10:48:41 +0100 Subject: Mitigate performance regression in isExpandedEntityValueTooLarge(). 46a8885ae486e238a39efa5119c2714f328b08e4 fixed a security issue [1], but also caused a large performance regression. This patch improves the performance from ~98 seconds to ~.1 second for 1000 entities, using the benchmark in the bug report: "0": 0 msecs per iteration (total: 0, iterations: 1) "250": 1,547 msecs per iteration (total: 1,547, iterations: 1) "500": 12,254 msecs per iteration (total: 12,254, iterations: 1) "750": 45,506 msecs per iteration (total: 45,506, iterations: 1) "1000": 98,112 msecs per iteration (total: 98,112, iterations: 1) "0": 0 msecs per iteration (total: 0, iterations: 1) "250": 6 msecs per iteration (total: 6, iterations: 1) "500": 25 msecs per iteration (total: 25, iterations: 1) "750": 58 msecs per iteration (total: 58, iterations: 1) "1000": 102 msecs per iteration (total: 102, iterations: 1) [1] http://lists.qt-project.org/pipermail/announce/2013-December/000036.html Task-number: QTBUG-35919 Change-Id: I6a6a1a6b606a8033a8f66e294cb55bbd8bdb8a03 Reviewed-by: Richard J. Moore --- src/xml/sax/qxml.cpp | 85 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/xml/sax/qxml.cpp b/src/xml/sax/qxml.cpp index f3a1e479f2..1b05e049f1 100644 --- a/src/xml/sax/qxml.cpp +++ b/src/xml/sax/qxml.cpp @@ -44,6 +44,7 @@ #include "qbuffer.h" #include "qregexp.h" #include "qmap.h" +#include "qhash.h" #include "qstack.h" #include @@ -424,6 +425,10 @@ private: int stringValueLen; QString emptyStr; + QHash literalEntitySizes; + // The entity at (QMap) times. + QHash > referencesToOtherEntities; + QHash expandedSizes; // The limit to the amount of times the DTD parsing functions can be called // for the DTD currently being parsed. static const int dtdRecursionLimit = 2; @@ -3444,6 +3449,10 @@ bool QXmlSimpleReader::parse(const QXmlInputSource *input, bool incremental) { Q_D(QXmlSimpleReader); + d->literalEntitySizes.clear(); + d->referencesToOtherEntities.clear(); + d->expandedSizes.clear(); + if (incremental) { d->initIncrementalParsing(); } else { @@ -6659,43 +6668,63 @@ bool QXmlSimpleReaderPrivate::parseChoiceSeq() bool QXmlSimpleReaderPrivate::isExpandedEntityValueTooLarge(QString *errorMessage) { - QMap literalEntitySizes; - // The entity at (QMap) times. - QMap > referencesToOtherEntities; - QMap expandedSizes; + QString entityNameBuffer; // For every entity, check how many times all entity names were referenced in its value. - foreach (QString toSearch, entities.keys()) { - // The amount of characters that weren't entity names, but literals, like 'X'. - QString leftOvers = entities.value(toSearch); - // How many times was entityName referenced by toSearch? - foreach (QString entityName, entities.keys()) { - for (int i = 0; i < leftOvers.size() && i != -1; ) { - i = leftOvers.indexOf(QString::fromLatin1("&%1;").arg(entityName), i); - if (i != -1) { - leftOvers.remove(i, entityName.size() + 2); - // The entityName we're currently trying to find was matched in this string; increase our count. - ++referencesToOtherEntities[toSearch][entityName]; + for (QMap::const_iterator toSearchIt = entities.constBegin(); + toSearchIt != entities.constEnd(); + ++toSearchIt) { + const QString &toSearch = toSearchIt.key(); + + // Don't check the same entities twice. + if (!literalEntitySizes.contains(toSearch)) { + // The amount of characters that weren't entity names, but literals, like 'X'. + QString leftOvers = entities.value(toSearch); + // How many times was entityName referenced by toSearch? + for (QMap::const_iterator referencedIt = entities.constBegin(); + referencedIt != entities.constEnd(); + ++referencedIt) { + const QString &entityName = referencedIt.key(); + + for (int i = 0; i < leftOvers.size() && i != -1; ) { + entityNameBuffer = QLatin1Char('&') + entityName + QLatin1Char(';'); + + i = leftOvers.indexOf(entityNameBuffer, i); + if (i != -1) { + leftOvers.remove(i, entityName.size() + 2); + // The entityName we're currently trying to find was matched in this string; increase our count. + ++referencesToOtherEntities[toSearch][entityName]; + } } } + literalEntitySizes[toSearch] = leftOvers.size(); } - literalEntitySizes[toSearch] = leftOvers.size(); } - foreach (QString entity, referencesToOtherEntities.keys()) { - expandedSizes[entity] = literalEntitySizes[entity]; - foreach (QString referenceTo, referencesToOtherEntities.value(entity).keys()) { - const int references = referencesToOtherEntities.value(entity).value(referenceTo); - // The total size of an entity's value is the expanded size of all of its referenced entities, plus its literal size. - expandedSizes[entity] += expandedSizes[referenceTo] * references + literalEntitySizes[referenceTo] * references; - } + for (QHash >::const_iterator entityIt = referencesToOtherEntities.constBegin(); + entityIt != referencesToOtherEntities.constEnd(); + ++entityIt) { + const QString &entity = entityIt.key(); + + QHash::iterator expandedIt = expandedSizes.find(entity); + if (expandedIt == expandedSizes.end()) { + expandedIt = expandedSizes.insert(entity, literalEntitySizes.value(entity)); + for (QHash::const_iterator referenceIt = entityIt->constBegin(); + referenceIt != entityIt->constEnd(); + ++referenceIt) { + const QString &referenceTo = referenceIt.key(); + const int references = referencesToOtherEntities.value(entity).value(referenceTo); + // The total size of an entity's value is the expanded size of all of its referenced entities, plus its literal size. + *expandedIt += expandedSizes.value(referenceTo) * references + literalEntitySizes.value(referenceTo) * references; + } - if (expandedSizes[entity] > entityCharacterLimit) { - if (errorMessage) { - *errorMessage = QString::fromLatin1("The XML entity \"%1\" expands too a string that is too large to process (%2 characters > %3)."); - *errorMessage = (*errorMessage).arg(entity).arg(expandedSizes[entity]).arg(entityCharacterLimit); + if (*expandedIt > entityCharacterLimit) { + if (errorMessage) { + *errorMessage = QString::fromLatin1("The XML entity \"%1\" expands to a string that is too large to process (%2 characters > %3).") + .arg(entity, *expandedIt, entityCharacterLimit); + } + return true; } - return true; } } return false; -- cgit v1.2.3