summaryrefslogtreecommitdiffstats
path: root/src/xml
diff options
context:
space:
mode:
authorMitch Curtis <mitch.curtis@digia.com>2014-01-10 11:19:08 +0100
committerThe Qt Project <gerrit-noreply@qt-project.org>2014-01-11 00:03:01 +0100
commit718fae50a4509c2cfc07552e6b1187ed2502f9b9 (patch)
tree4fe5fa8cda37355bd0027998200f791af3637ef7 /src/xml
parentf6002697ece87a99c52b32872194fc8217579e17 (diff)
Mitigate performance regression in isExpandedEntityValueTooLarge().
512a1ce0698d370c313bb561bbf078935fa0342e fixed a security issue [1], but also caused a large performance regression. This patch improves the performance from ~196 seconds to ~.23 seconds for 1000 entities, using the benchmark in the bug report: "0": 0 msecs per iteration (total: 0, iterations: 1) "250": 2,952 msecs per iteration (total: 2,952, iterations: 1) "500": 23,418 msecs per iteration (total: 23,418, iterations: 1) "750": 79,913 msecs per iteration (total: 79,913, iterations: 1) "1000": 196,335 msecs per iteration (total: 196,335, iterations: 1) "0": 0 msecs per iteration (total: 0, iterations: 1) "250": 16 msecs per iteration (total: 16, iterations: 1) "500": 59 msecs per iteration (total: 59, iterations: 1) "750": 132 msecs per iteration (total: 132, iterations: 1) "1000": 232 msecs per iteration (total: 232, iterations: 1) This is a cherry-pick of 62c4e288a11769bde45c9c74d731ed8628303f19. [1] http://lists.qt-project.org/pipermail/announce/2013-December/000036.html Task-number: QTBUG-35919 Change-Id: Ibc2fc94dc0d88b227b9510e15bac9c07f4600591 Reviewed-by: Richard J. Moore <rich@kde.org>
Diffstat (limited to 'src/xml')
-rw-r--r--src/xml/sax/qxml.cpp85
1 files changed, 57 insertions, 28 deletions
diff --git a/src/xml/sax/qxml.cpp b/src/xml/sax/qxml.cpp
index befa801dd2..ffc07b55d0 100644
--- a/src/xml/sax/qxml.cpp
+++ b/src/xml/sax/qxml.cpp
@@ -44,6 +44,7 @@
#include "qbuffer.h"
#include "qregexp.h"
#include "qmap.h"
+#include "qhash.h"
#include "qstack.h"
#include <qdebug.h>
@@ -424,6 +425,10 @@ private:
int stringValueLen;
QString emptyStr;
+ QHash<QString, int> literalEntitySizes;
+ // The entity at (QMap<QString,) referenced the entities at (QMap<QString,) (int>) times.
+ QHash<QString, QHash<QString, int> > referencesToOtherEntities;
+ QHash<QString, int> expandedSizes;
// The limit to the amount of times the DTD parsing functions can be called
// for the DTD currently being parsed.
static const int dtdRecursionLimit = 2;
@@ -3426,6 +3431,10 @@ bool QXmlSimpleReader::parse(const QXmlInputSource *input, bool incremental)
{
Q_D(QXmlSimpleReader);
+ d->literalEntitySizes.clear();
+ d->referencesToOtherEntities.clear();
+ d->expandedSizes.clear();
+
if (incremental) {
d->initIncrementalParsing();
} else {
@@ -6641,43 +6650,63 @@ bool QXmlSimpleReaderPrivate::parseChoiceSeq()
bool QXmlSimpleReaderPrivate::isExpandedEntityValueTooLarge(QString *errorMessage)
{
- QMap<QString, int> literalEntitySizes;
- // The entity at (QMap<QString,) referenced the entities at (QMap<QString,) (int>) times.
- QMap<QString, QMap<QString, int> > referencesToOtherEntities;
- QMap<QString, int> expandedSizes;
+ QString entityNameBuffer;
// For every entity, check how many times all entity names were referenced in its value.
- foreach (QString toSearch, entities.keys()) {
- // The amount of characters that weren't entity names, but literals, like 'X'.
- QString leftOvers = entities.value(toSearch);
- // How many times was entityName referenced by toSearch?
- foreach (QString entityName, entities.keys()) {
- for (int i = 0; i < leftOvers.size() && i != -1; ) {
- i = leftOvers.indexOf(QString::fromLatin1("&%1;").arg(entityName), i);
- if (i != -1) {
- leftOvers.remove(i, entityName.size() + 2);
- // The entityName we're currently trying to find was matched in this string; increase our count.
- ++referencesToOtherEntities[toSearch][entityName];
+ for (QMap<QString,QString>::const_iterator toSearchIt = entities.constBegin();
+ toSearchIt != entities.constEnd();
+ ++toSearchIt) {
+ const QString &toSearch = toSearchIt.key();
+
+ // Don't check the same entities twice.
+ if (!literalEntitySizes.contains(toSearch)) {
+ // The amount of characters that weren't entity names, but literals, like 'X'.
+ QString leftOvers = entities.value(toSearch);
+ // How many times was entityName referenced by toSearch?
+ for (QMap<QString,QString>::const_iterator referencedIt = entities.constBegin();
+ referencedIt != entities.constEnd();
+ ++referencedIt) {
+ const QString &entityName = referencedIt.key();
+
+ for (int i = 0; i < leftOvers.size() && i != -1; ) {
+ entityNameBuffer = QLatin1Char('&') + entityName + QLatin1Char(';');
+
+ i = leftOvers.indexOf(entityNameBuffer, i);
+ if (i != -1) {
+ leftOvers.remove(i, entityName.size() + 2);
+ // The entityName we're currently trying to find was matched in this string; increase our count.
+ ++referencesToOtherEntities[toSearch][entityName];
+ }
}
}
+ literalEntitySizes[toSearch] = leftOvers.size();
}
- literalEntitySizes[toSearch] = leftOvers.size();
}
- foreach (QString entity, referencesToOtherEntities.keys()) {
- expandedSizes[entity] = literalEntitySizes[entity];
- foreach (QString referenceTo, referencesToOtherEntities.value(entity).keys()) {
- const int references = referencesToOtherEntities.value(entity).value(referenceTo);
- // The total size of an entity's value is the expanded size of all of its referenced entities, plus its literal size.
- expandedSizes[entity] += expandedSizes[referenceTo] * references + literalEntitySizes[referenceTo] * references;
- }
+ for (QHash<QString, QHash<QString, int> >::const_iterator entityIt = referencesToOtherEntities.constBegin();
+ entityIt != referencesToOtherEntities.constEnd();
+ ++entityIt) {
+ const QString &entity = entityIt.key();
+
+ QHash<QString, int>::iterator expandedIt = expandedSizes.find(entity);
+ if (expandedIt == expandedSizes.end()) {
+ expandedIt = expandedSizes.insert(entity, literalEntitySizes.value(entity));
+ for (QHash<QString, int>::const_iterator referenceIt = entityIt->constBegin();
+ referenceIt != entityIt->constEnd();
+ ++referenceIt) {
+ const QString &referenceTo = referenceIt.key();
+ const int references = referencesToOtherEntities.value(entity).value(referenceTo);
+ // The total size of an entity's value is the expanded size of all of its referenced entities, plus its literal size.
+ *expandedIt += expandedSizes.value(referenceTo) * references + literalEntitySizes.value(referenceTo) * references;
+ }
- if (expandedSizes[entity] > entityCharacterLimit) {
- if (errorMessage) {
- *errorMessage = QString::fromLatin1("The XML entity \"%1\" expands too a string that is too large to process (%2 characters > %3).");
- *errorMessage = (*errorMessage).arg(entity).arg(expandedSizes[entity]).arg(entityCharacterLimit);
+ if (*expandedIt > entityCharacterLimit) {
+ if (errorMessage) {
+ *errorMessage = QString::fromLatin1("The XML entity \"%1\" expands to a string that is too large to process (%2 characters > %3).")
+ .arg(entity, *expandedIt, entityCharacterLimit);
+ }
+ return true;
}
- return true;
}
}
return false;