summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2011-12-28 15:59:47 -0200
committerQt by Nokia <qt-info@nokia.com>2012-03-30 01:19:59 +0200
commit329ee8cedc78de5304a84e0b205b08f39e439411 (patch)
tree146167cd8f1f6781d0d5800274c085b9383b4c03
parentcff38329aa8635ac8a0696b0aa78782fe5605d16 (diff)
Reimplement the StrictMode URL parsing
The strict mode check is now implemented after the tolerant parser has finished, and only if the tolerant parser has not found any errors. We catch the use of disallowed characters (control characters plus a few not permitted anywhere) and broken percent encodings. We do not catch the use of Unicode characters, as they are permitted in IRIs. In the tests, remove the old errorString test since it makes little sense. Change-Id: I8261a2ccad031ad68fc6377a206e59c9db89fb38 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
-rw-r--r--src/corelib/io/qurl.cpp121
-rw-r--r--src/corelib/io/qurl_p.h22
-rw-r--r--tests/auto/corelib/io/qurl/tst_qurl.cpp75
3 files changed, 176 insertions, 42 deletions
diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp
index 806edb8a13..d9413ef40c 100644
--- a/src/corelib/io/qurl.cpp
+++ b/src/corelib/io/qurl.cpp
@@ -629,8 +629,12 @@ bool QUrlPrivate::setAuthority(const QString &auth, int from, int end)
break;
}
}
- if (x == ushort(x))
+ if (x == ushort(x)) {
port = ushort(x);
+ } else {
+ sectionHasError |= Port;
+ errorCode = InvalidPortError;
+ }
} else {
port = -1;
}
@@ -869,7 +873,8 @@ bool QUrlPrivate::setHost(const QString &value, int from, int iend, bool maybePe
return true;
sectionHasError |= Host;
- errorCode = InvalidIPv6AddressError;
+ errorCode = begin[1].unicode() == 'v' ?
+ InvalidIPvFutureError : InvalidIPv6AddressError;
return false;
}
@@ -926,7 +931,7 @@ bool QUrlPrivate::setHost(const QString &value, int from, int iend, bool maybePe
return true;
}
-void QUrlPrivate::parse(const QString &url)
+void QUrlPrivate::parse(const QString &url, QUrl::ParsingMode parsingMode)
{
// URI-reference = URI / relative-ref
// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
@@ -948,7 +953,8 @@ void QUrlPrivate::parse(const QString &url)
const ushort *const data = reinterpret_cast<const ushort *>(begin);
for (int i = 0; i < len; ++i) {
- if (data[i] == '#' && hash == -1) {
+ register uint uc = data[i];
+ if (uc == '#' && hash == -1) {
hash = i;
// nothing more to be found
@@ -956,9 +962,9 @@ void QUrlPrivate::parse(const QString &url)
}
if (question == -1) {
- if (data[i] == ':' && colon == -1)
+ if (uc == ':' && colon == -1)
colon = i;
- else if (data[i] == '?')
+ else if (uc == '?')
question = i;
}
}
@@ -975,6 +981,7 @@ void QUrlPrivate::parse(const QString &url)
hierStart = 0;
}
+ int pathStart;
int hierEnd = qMin<uint>(qMin<uint>(question, hash), len);
if (hierEnd - hierStart >= 2 && data[hierStart] == '/' && data[hierStart + 1] == '/') {
// we have an authority, it ends at the first slash after these
@@ -989,12 +996,14 @@ void QUrlPrivate::parse(const QString &url)
setAuthority(url, hierStart + 2, authorityEnd);
// even if we failed to set the authority properly, let's try to recover
- setPath(url, authorityEnd, hierEnd);
+ pathStart = authorityEnd;
+ setPath(url, pathStart, hierEnd);
} else {
userName.clear();
password.clear();
host.clear();
port = -1;
+ pathStart = hierStart;
if (hierStart < hierEnd)
setPath(url, hierStart, hierEnd);
@@ -1007,6 +1016,67 @@ void QUrlPrivate::parse(const QString &url)
if (hash != -1)
setFragment(url, hash + 1, len);
+
+ if (sectionHasError || parsingMode == QUrl::TolerantMode)
+ return;
+
+ // The parsing so far was tolerant of errors, so the StrictMode
+ // parsing is actually implemented here, as an extra post-check.
+ // We only execute it if we haven't found any errors so far.
+
+ // What we need to look out for, that the regular parser tolerates:
+ // - percent signs not followed by two hex digits
+ // - forbidden characters, which should always appear encoded
+ // '"' / '<' / '>' / '\' / '^' / '`' / '{' / '|' / '}' / BKSP
+ // control characters
+ // - delimiters not allowed in certain positions
+ // . scheme: parser is already strict
+ // . user info: gen-delims (except for ':') disallowed
+ // . host: parser is stricter than the standard
+ // . port: parser is stricter than the standard
+ // . path: all delimiters allowed
+ // . fragment: all delimiters allowed
+ // . query: all delimiters allowed
+ // We would only need to check the user-info. However, the presence
+ // of the disallowed gen-delims changes the parsing, so we don't
+ // actually need to do anything
+ static const char forbidden[] = "\"<>\\^`{|}\x7F";
+ for (uint i = 0; i < uint(len); ++i) {
+ register uint uc = data[i];
+ if (uc >= 0x80)
+ continue;
+
+ if ((uc == '%' && (uint(len) < i + 2 || !isHex(data[i + 1]) || !isHex(data[i + 2])))
+ || uc < 0x20 || strchr(forbidden, uc)) {
+ // found an error
+ errorSupplement = uc;
+
+ // where are we?
+ if (i > uint(hash)) {
+ errorCode = InvalidFragmentError;
+ sectionHasError |= Fragment;
+ } else if (i > uint(question)) {
+ errorCode = InvalidQueryError;
+ sectionHasError |= Query;
+ } else if (i > uint(pathStart)) {
+ // pathStart is never -1
+ errorCode = InvalidPathError;
+ sectionHasError |= Path;
+ } else {
+ // It must be in the authority, since the scheme is strict.
+ // Since the port and hostname parsers are also strict,
+ // the error can only have happened in the user info.
+ int pos = url.indexOf(QLatin1Char(':'), hierStart);
+ if (i > uint(pos)) {
+ errorCode = InvalidPasswordError;
+ sectionHasError |= Password;
+ } else {
+ errorCode = InvalidUserNameError;
+ sectionHasError |= UserName;
+ }
+ }
+ }
+ }
}
/*
@@ -1360,18 +1430,14 @@ void QUrl::clear()
\a url is assumed to be in unicode format, with no percent
encoding.
- Calling isValid() will tell whether or not a valid URL was
- constructed.
+ Calling isValid() will tell whether or not a valid URL was constructed.
\sa setEncodedUrl()
*/
void QUrl::setUrl(const QString &url, ParsingMode parsingMode)
{
detach();
- if (parsingMode == StrictMode) {
- // ### strict check here!
- }
- d->parse(url);
+ d->parse(url, parsingMode);
}
@@ -2206,7 +2272,7 @@ QUrl &QUrl::operator =(const QString &url)
clear();
} else {
detach();
- d->parse(url);
+ d->parse(url, TolerantMode);
}
return *this;
}
@@ -2423,8 +2489,20 @@ QString QUrl::errorString() const
case QUrlPrivate::SchemeEmptyError:
return QStringLiteral("Empty scheme");
+ case QUrlPrivate::InvalidUserNameError:
+ return QString(QStringLiteral("Invalid user name (character '%1' not permitted)"))
+ .arg(c);
+
+ case QUrlPrivate::InvalidPasswordError:
+ return QString(QStringLiteral("Invalid password (character '%1' not permitted)"))
+ .arg(c);
+
case QUrlPrivate::InvalidRegNameError:
- return QStringLiteral("Hostname contains invalid characters");
+ if (d->errorSupplement)
+ return QString(QStringLiteral("Invalid hostname (character '%1' not permitted)"))
+ .arg(c);
+ else
+ return QStringLiteral("Hostname contains invalid characters");
case QUrlPrivate::InvalidIPv4AddressError:
return QString(); // doesn't happen yet
case QUrlPrivate::InvalidIPv6AddressError:
@@ -2432,14 +2510,25 @@ QString QUrl::errorString() const
case QUrlPrivate::InvalidIPvFutureError:
return QStringLiteral("Invalid IPvFuture address");
case QUrlPrivate::HostMissingEndBracket:
- return QStringLiteral("Expected '[' to match ']' in hostname");
+ return QStringLiteral("Expected ']' to match '[' in hostname");
case QUrlPrivate::InvalidPortError:
case QUrlPrivate::PortEmptyError:
return QStringLiteral("Invalid port or port number out of range");
+ case QUrlPrivate::InvalidPathError:
+ return QString(QStringLiteral("Invalid path (character '%1' not permitted)"))
+ .arg(c);
case QUrlPrivate::PathContainsColonBeforeSlash:
return QStringLiteral("Path component contains ':' before any '/'");
+
+ case QUrlPrivate::InvalidQueryError:
+ return QString(QStringLiteral("Invalid query (character '%1' not permitted)"))
+ .arg(c);
+
+ case QUrlPrivate::InvalidFragmentError:
+ return QString(QStringLiteral("Invalid fragment (character '%1' not permitted)"))
+ .arg(c);
}
return QStringLiteral("<unknown error>");
}
diff --git a/src/corelib/io/qurl_p.h b/src/corelib/io/qurl_p.h
index d9207bd809..fb54d74260 100644
--- a/src/corelib/io/qurl_p.h
+++ b/src/corelib/io/qurl_p.h
@@ -75,27 +75,37 @@ public:
};
enum ErrorCode {
- InvalidSchemeError = 0x000,
+ // the high byte of the error code matches the Section
+ InvalidSchemeError = Scheme << 8,
SchemeEmptyError,
- InvalidRegNameError = 0x800,
+ InvalidUserNameError = UserName << 8,
+
+ InvalidPasswordError = Password << 8,
+
+ InvalidRegNameError = Host << 8,
InvalidIPv4AddressError,
InvalidIPv6AddressError,
InvalidIPvFutureError,
HostMissingEndBracket,
- InvalidPortError = 0x1000,
+ InvalidPortError = Port << 8,
PortEmptyError,
- PathContainsColonBeforeSlash = 0x2000,
+ InvalidPathError = Path << 8,
+ PathContainsColonBeforeSlash,
+
+ InvalidQueryError = Query << 8,
+
+ InvalidFragmentError = Fragment << 8,
- NoError = 0xffff
+ NoError = 0
};
QUrlPrivate();
QUrlPrivate(const QUrlPrivate &copy);
- void parse(const QString &url);
+ void parse(const QString &url, QUrl::ParsingMode parsingMode);
void clear();
// no QString scheme() const;
diff --git a/tests/auto/corelib/io/qurl/tst_qurl.cpp b/tests/auto/corelib/io/qurl/tst_qurl.cpp
index 259e7570fa..7e06dd4e88 100644
--- a/tests/auto/corelib/io/qurl/tst_qurl.cpp
+++ b/tests/auto/corelib/io/qurl/tst_qurl.cpp
@@ -122,6 +122,8 @@ private slots:
void schemeValidator_data();
void schemeValidator();
void invalidSchemeValidator();
+ void strictParser_data();
+ void strictParser();
void tolerantParser();
void correctEncodedMistakes_data();
void correctEncodedMistakes();
@@ -143,7 +145,6 @@ private slots:
void toEncoded();
void setAuthority_data();
void setAuthority();
- void errorString();
void clear();
void resolvedWithAbsoluteSchemes() const;
void resolvedWithAbsoluteSchemes_data() const;
@@ -1594,7 +1595,6 @@ void tst_QUrl::isValid()
}
{
QUrl url = QUrl::fromEncoded("http://strange<username>@ok-hostname/", QUrl::StrictMode);
- QEXPECT_FAIL("", "StrictMode not implemented yet", Continue);
QVERIFY(!url.isValid());
// < and > are not allowed in userinfo in strict mode
url.setUserName("normal_username");
@@ -1604,6 +1604,7 @@ void tst_QUrl::isValid()
QUrl url = QUrl::fromEncoded("http://strange<username>@ok-hostname/");
QVERIFY(url.isValid());
// < and > are allowed in tolerant mode
+ QCOMPARE(url.toEncoded(), QByteArray("http://strange%3Cusername%3E@ok-hostname/"));
}
{
QUrl url = QUrl::fromEncoded("http://strange;hostname/here");
@@ -1710,6 +1711,55 @@ void tst_QUrl::invalidSchemeValidator()
}
}
+void tst_QUrl::strictParser_data()
+{
+ QTest::addColumn<QString>("input");
+ QTest::addColumn<QString>("needle");
+
+ // cannot test bad schemes here, as they are parsed as paths instead
+ //QTest::newRow("invalid-scheme") << "ht%://example.com" << "Invalid scheme";
+ //QTest::newRow("empty-scheme") << ":/" << "Empty scheme";
+
+ QTest::newRow("invalid-user1") << "http://bad<user_name>@ok-hostname" << "Invalid user name";
+ QTest::newRow("invalid-user2") << "http://bad%@ok-hostname" << "Invalid user name";
+
+ QTest::newRow("invalid-password") << "http://user:pass\x7F@ok-hostname" << "Invalid password";
+
+ QTest::newRow("invalid-regname") << "http://bad<hostname>" << "Hostname contains invalid characters";
+ QTest::newRow("invalid-ipv6") << "http://[:::]" << "Invalid IPv6 address";
+ QTest::newRow("invalid-ipvfuture-1") << "http://[v7]" << "Invalid IPvFuture address";
+ QTest::newRow("invalid-ipvfuture-2") << "http://[v7.]" << "Invalid IPvFuture address";
+ QTest::newRow("invalid-ipvfuture-3") << "http://[v789]" << "Invalid IPvFuture address";
+ QTest::newRow("unbalanced-brackets") << "http://[ff02::1" << "Expected ']'";
+
+ QTest::newRow("empty-port") << "http://example.com:" << "Invalid port";
+ QTest::newRow("invalid-port-1") << "http://example.com:-1" << "Invalid port";
+ QTest::newRow("invalid-port-2") << "http://example.com:abc" << "Invalid port";
+ QTest::newRow("invalid-port-3") << "http://example.com:9a" << "Invalid port";
+ QTest::newRow("port-range") << "http://example.com:65536" << "out of range";
+
+ QTest::newRow("invalid-path") << "foo:/path%\x1F" << "Invalid path";
+ // not yet checked:
+ //QTest::newRow("path-colon-before-slash") << "foo::/" << "':' before any '/'";
+
+ QTest::newRow("invalid-query") << "foo:?\\#" << "Invalid query";
+
+ QTest::newRow("invalid-fragment") << "#{}" << "Invalid fragment";
+}
+
+void tst_QUrl::strictParser()
+{
+ QFETCH(QString, input);
+ QFETCH(QString, needle);
+
+ QUrl url(input, QUrl::StrictMode);
+ QVERIFY(!url.isValid());
+ QVERIFY(!url.errorString().isEmpty());
+ if (!url.errorString().contains(needle))
+ qWarning("Error string changed and does not contain \"%s\" anymore: %s",
+ qPrintable(needle), qPrintable(url.errorString()));
+}
+
void tst_QUrl::tolerantParser()
{
{
@@ -1718,18 +1768,16 @@ void tst_QUrl::tolerantParser()
QCOMPARE(url.path(), QString("/path with spaces.html"));
QCOMPARE(url.toString(QUrl::FullyEncoded), QString("http://www.example.com/path%20with%20spaces.html"));
url.setUrl("http://www.example.com/path%20with spaces.html", QUrl::StrictMode);
- QEXPECT_FAIL("", "StrictMode not implemented yet", Continue);
QVERIFY(!url.isValid());
- QEXPECT_FAIL("", "StrictMode not implemented yet", Continue);
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("http://www.example.com/path%2520with%20spaces.html"));
+ QCOMPARE(url.toString(QUrl::FullyEncoded), QString("http://www.example.com/path%20with%20spaces.html"));
}
{
QUrl url = QUrl::fromEncoded("http://www.example.com/path%20with spaces.html");
QVERIFY(url.isValid());
QCOMPARE(url.path(), QString("/path with spaces.html"));
url.setEncodedUrl("http://www.example.com/path%20with spaces.html", QUrl::StrictMode);
- QEXPECT_FAIL("", "StrictMode not implemented yet", Continue);
- QVERIFY(!url.isValid());
+ QVERIFY(url.isValid());
+ QCOMPARE(url.toString(QUrl::FullyEncoded), QString("http://www.example.com/path%20with%20spaces.html"));
}
{
@@ -2209,19 +2257,6 @@ void tst_QUrl::setAuthority()
QCOMPARE(u.toString(), url);
}
-void tst_QUrl::errorString()
-{
- QUrl v;
- QCOMPARE(v.errorString(), QString());
-
- QUrl u = QUrl::fromEncoded("http://strange<username>@bad_hostname/", QUrl::StrictMode);
- QEXPECT_FAIL("", "StrictMode not implemented yet", Abort);
- QVERIFY(!u.isValid());
- QString errorString = "Invalid URL \"http://strange<username>@bad_hostname/\": "
- "error at position 14: expected end of URL, but found '<'";
- QCOMPARE(u.errorString(), errorString);
-}
-
void tst_QUrl::clear()
{
QUrl url("a");