From e1038794b1d7fe298535960eb90d5c823aa3f2c4 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Tue, 31 Jul 2012 14:28:48 +0200 Subject: Make QUrl::setScheme only parse in strict mode (no decoding) The URI RFC defines schemes as containing only a very restricted set of characters, none of which require encoding, so don't even try. Testing this behaviour in some web browsers indicate that they do not accept percent-encoded schemes either. Change-Id: I692dd20e1aac7e8a1bcb276cb5113b5802393d38 Reviewed-by: Lars Knoll --- src/corelib/io/qurl.cpp | 17 ++--------------- src/corelib/io/qurl_p.h | 2 +- tests/auto/corelib/io/qurl/tst_qurl.cpp | 14 +++++++++++--- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp index ff1f7ba78c..bc92e943fc 100644 --- a/src/corelib/io/qurl.cpp +++ b/src/corelib/io/qurl.cpp @@ -656,7 +656,7 @@ inline void QUrlPrivate::appendQuery(QString &appendTo, QUrl::FormattingOptions // setXXX functions -bool QUrlPrivate::setScheme(const QString &value, int len, bool decoded) +bool QUrlPrivate::setScheme(const QString &value, int len) { // schemes are strictly RFC-compliant: // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) @@ -687,19 +687,6 @@ bool QUrlPrivate::setScheme(const QString &value, int len, bool decoded) if (p[i] == '+' || p[i] == '-' || p[i] == '.') continue; - if (p[i] == '%') { - // found a percent-encoded sign - // if we haven't decoded yet, decode and try again - errorSupplement = '%'; - if (decoded) - return false; - - QString decodedScheme; - if (qt_urlRecode(decodedScheme, value.constData(), value.constData() + len, 0, 0) == 0) - return false; - return setScheme(decodedScheme, decodedScheme.length(), true); - } - // found something else errorSupplement = p[i]; return false; @@ -1581,7 +1568,7 @@ void QUrl::setUrl(const QString &url, ParsingMode parsingMode) /*! Sets the scheme of the URL to \a scheme. As a scheme can only - contain ASCII characters, no conversion or encoding is done on the + contain ASCII characters, no conversion or decoding is done on the input. It must also start with an ASCII letter. The scheme describes the type (or protocol) of the URL. It's diff --git a/src/corelib/io/qurl_p.h b/src/corelib/io/qurl_p.h index 3a0d80d7ef..95ccd221a2 100644 --- a/src/corelib/io/qurl_p.h +++ b/src/corelib/io/qurl_p.h @@ -122,7 +122,7 @@ public: void appendFragment(QString &appendTo, QUrl::FormattingOptions options) const; // the "end" parameters are like STL iterators: they point to one past the last valid element - bool setScheme(const QString &value, int len, bool decoded = false); + bool setScheme(const QString &value, int len); bool setAuthority(const QString &auth, int from, int end); void setUserInfo(const QString &userInfo, int from, int end); void setUserName(const QString &value, int from, int end); diff --git a/tests/auto/corelib/io/qurl/tst_qurl.cpp b/tests/auto/corelib/io/qurl/tst_qurl.cpp index 8b4908f977..acde1f8871 100644 --- a/tests/auto/corelib/io/qurl/tst_qurl.cpp +++ b/tests/auto/corelib/io/qurl/tst_qurl.cpp @@ -1833,9 +1833,8 @@ void tst_QUrl::strictParser_data() QTest::addColumn("input"); QTest::addColumn("needle"); - // cannot test bad schemes here, as they are parsed as paths instead - //QTest::newRow("invalid-scheme") << "ht%://example.com" << "Invalid scheme"; - //QTest::newRow("empty-scheme") << ":/" << "Empty scheme"; + QTest::newRow("invalid-scheme") << "ht%://example.com" << "Invalid scheme"; + QTest::newRow("empty-scheme") << ":/" << "Empty scheme"; QTest::newRow("invalid-user1") << "http://bad@ok-hostname" << "Invalid user name"; QTest::newRow("invalid-user2") << "http://bad%@ok-hostname" << "Invalid user name"; @@ -1870,6 +1869,7 @@ void tst_QUrl::strictParser() QFETCH(QString, needle); QUrl url(input, QUrl::StrictMode); + QEXPECT_FAIL("empty-scheme", "QUrl does not forbid paths with a colon before the first slash yet", Abort); QVERIFY(!url.isValid()); QVERIFY(!url.errorString().isEmpty()); if (!url.errorString().contains(needle)) @@ -3038,6 +3038,9 @@ void tst_QUrl::setComponents_data() QTest::newRow("invalid-scheme-2") << QUrl("http://example.com") << int(Scheme) << "http%40" << Tolerant << false << PrettyDecoded << "" << ""; + QTest::newRow("invalid-scheme-3") << QUrl("http://example.com") + << int(Scheme) << "http%61" << Strict << false + << PrettyDecoded << "" << ""; QTest::newRow("invalid-host-1") << QUrl("http://example.com") << int(Host) << "-not-valid-" << Tolerant << false @@ -3047,6 +3050,10 @@ void tst_QUrl::setComponents_data() << PrettyDecoded << "" << ""; // -- test decoded behaviour -- + // '%' characters are not permitted in the scheme, this tests that it fails to set anything + QTest::newRow("invalid-scheme-encode") << QUrl("http://example.com") + << int(Scheme) << "http%61" << Decoded << false + << PrettyDecoded << "" << ""; QTest::newRow("userinfo-encode") << QUrl("http://example.com") << int(UserInfo) << "h%61llo:world@" << Decoded << true << PrettyDecoded << "h%2561llo:world@" << "http://h%2561llo:world%40@example.com"; @@ -3104,6 +3111,7 @@ void tst_QUrl::setComponents() switch (component) { case Scheme: + // scheme is only parsed in strict mode copy.setScheme(newValue); QCOMPARE(copy.scheme(), output); break; -- cgit v1.2.3