diff options
-rw-r--r-- | src/corelib/io/qurl.cpp | 179 | ||||
-rw-r--r-- | tests/auto/corelib/io/qurl/tst_qurl.cpp | 78 |
2 files changed, 214 insertions, 43 deletions
diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp index cc907daa3a..a4e1c1e3e3 100644 --- a/src/corelib/io/qurl.cpp +++ b/src/corelib/io/qurl.cpp @@ -363,6 +363,7 @@ public: enum ErrorCode { // the high byte of the error code matches the Section + // the first item in each value must be the generic "Invalid xxx Error" InvalidSchemeError = Scheme << 8, InvalidUserNameError = UserName << 8, @@ -410,6 +411,9 @@ public: void clearError(); void setError(ErrorCode errorCode, const QString &source, int supplement = -1); ErrorCode validityError(QString *source = 0, int *position = 0) const; + bool validateComponent(Section section, const QString &input, int begin, int end); + bool validateComponent(Section section, const QString &input) + { return validateComponent(section, input, 0, uint(input.length())); } // no QString scheme() const; void appendAuthority(QString &appendTo, QUrl::FormattingOptions options, Section appendingTo) const; @@ -895,58 +899,72 @@ inline void QUrlPrivate::setAuthority(const QString &auth, int from, int end, QU { sectionIsPresent &= ~Authority; sectionIsPresent |= Host; - if (from == end) { - userName.clear(); - password.clear(); - host.clear(); - port = -1; - return; - } - int userInfoIndex = auth.indexOf(QLatin1Char('@'), from); - if (uint(userInfoIndex) < uint(end)) { - setUserInfo(auth, from, userInfoIndex); - from = userInfoIndex + 1; - } + // we never actually _loop_ + while (from != end) { + int userInfoIndex = auth.indexOf(QLatin1Char('@'), from); + if (uint(userInfoIndex) < uint(end)) { + setUserInfo(auth, from, userInfoIndex); + if (mode == QUrl::StrictMode && !validateComponent(UserInfo, auth, from, userInfoIndex)) + break; + from = userInfoIndex + 1; + } - int colonIndex = auth.lastIndexOf(QLatin1Char(':'), end - 1); - if (colonIndex < from) - colonIndex = -1; + int colonIndex = auth.lastIndexOf(QLatin1Char(':'), end - 1); + if (colonIndex < from) + colonIndex = -1; - if (uint(colonIndex) < uint(end)) { - if (auth.at(from).unicode() == '[') { - // check if colonIndex isn't inside the "[...]" part - int closingBracket = auth.indexOf(QLatin1Char(']'), from); - if (uint(closingBracket) > uint(colonIndex)) - colonIndex = -1; + if (uint(colonIndex) < uint(end)) { + if (auth.at(from).unicode() == '[') { + // check if colonIndex isn't inside the "[...]" part + int closingBracket = auth.indexOf(QLatin1Char(']'), from); + if (uint(closingBracket) > uint(colonIndex)) + colonIndex = -1; + } } - } - if (colonIndex == end - 1) { - // found a colon but no digits after it - setError(PortEmptyError, auth, colonIndex + 1); - } else if (uint(colonIndex) < uint(end)) { - unsigned long x = 0; - for (int i = colonIndex + 1; i < end; ++i) { - ushort c = auth.at(i).unicode(); - if (c >= '0' && c <= '9') { - x *= 10; - x += c - '0'; + if (colonIndex == end - 1) { + // found a colon but no digits after it + setError(PortEmptyError, auth, colonIndex + 1); + } else if (uint(colonIndex) < uint(end)) { + unsigned long x = 0; + for (int i = colonIndex + 1; i < end; ++i) { + ushort c = auth.at(i).unicode(); + if (c >= '0' && c <= '9') { + x *= 10; + x += c - '0'; + } else { + x = ulong(-1); // x != ushort(x) + break; + } + } + if (x == ushort(x)) { + port = ushort(x); } else { - x = ulong(-1); // x != ushort(x) - break; + setError(InvalidPortError, auth, colonIndex + 1); + if (mode == QUrl::StrictMode) + break; } - } - if (x == ushort(x)) { - port = ushort(x); } else { - setError(InvalidPortError, auth, colonIndex + 1); + port = -1; + } + + setHost(auth, from, qMin<uint>(end, colonIndex), mode); + if (mode == QUrl::StrictMode && !validateComponent(Host, auth, from, qMin<uint>(end, colonIndex))) { + // clear host too + sectionIsPresent &= ~Authority; + break; } - } else { - port = -1; - } - setHost(auth, from, qMin<uint>(end, colonIndex), mode); + // success + return; + } + // clear all sections but host + sectionIsPresent &= ~Authority | Host; + userName.clear(); + password.clear(); + host.clear(); + port = -1; } inline void QUrlPrivate::setUserInfo(const QString &userInfo, int from, int end) @@ -1519,6 +1537,67 @@ inline QUrlPrivate::ErrorCode QUrlPrivate::validityError(QString *source, int *p return NoError; } +bool QUrlPrivate::validateComponent(QUrlPrivate::Section section, const QString &input, + int begin, int end) +{ + // What we need to look out for, that the regular parser tolerates: + // - percent signs not followed by two hex digits + // - forbidden characters, which should always appear encoded + // '"' / '<' / '>' / '\' / '^' / '`' / '{' / '|' / '}' / BKSP + // control characters + // - delimiters not allowed in certain positions + // . scheme: parser is already strict + // . user info: gen-delims except ":" disallowed ("/" / "?" / "#" / "[" / "]" / "@") + // . host: parser is stricter than the standard + // . port: parser is stricter than the standard + // . path: all delimiters allowed + // . fragment: all delimiters allowed + // . query: all delimiters allowed + static const char forbidden[] = "\"<>\\^`{|}\x7F"; + static const char forbiddenUserInfo[] = ":/?#[]@"; + + Q_ASSERT(section != Authority && section != Hierarchy && section != FullUrl); + + const ushort *const data = reinterpret_cast<const ushort *>(input.constData()); + for (uint i = uint(begin); i < uint(end); ++i) { + register uint uc = data[i]; + if (uc >= 0x80) + continue; + + bool error = false; + if ((uc == '%' && (uint(end) < i + 2 || !isHex(data[i + 1]) || !isHex(data[i + 2]))) + || uc <= 0x20 || strchr(forbidden, uc)) { + // found an error + error = true; + } else if (section & UserInfo) { + if (section == UserInfo && strchr(forbiddenUserInfo + 1, uc)) + error = true; + else if (section != UserInfo && strchr(forbiddenUserInfo, uc)) + error = true; + } + + if (!error) + continue; + + ErrorCode errorCode = ErrorCode(int(section) << 8); + if (section == UserInfo) { + // is it the user name or the password? + errorCode = InvalidUserNameError; + for (uint j = uint(begin); j < i; ++j) + if (data[j] == ':') { + errorCode = InvalidPasswordError; + break; + } + } + + setError(errorCode, input, i); + return false; + } + + // no errors + return true; +} + #if 0 inline void QUrlPrivate::validate() const { @@ -1954,6 +2033,10 @@ void QUrl::setUserInfo(const QString &userInfo, ParsingMode mode) // QUrlPrivate::setUserInfo cleared almost everything // but it leaves the UserName bit set d->sectionIsPresent &= ~QUrlPrivate::UserInfo; + } else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::UserInfo, userInfo)) { + d->sectionIsPresent &= ~QUrlPrivate::UserInfo; + d->userName.clear(); + d->password.clear(); } } @@ -2010,10 +2093,11 @@ void QUrl::setUserName(const QString &userName, ParsingMode mode) mode = TolerantMode; } - d->setUserName(data, 0, data.length()); if (userName.isNull()) d->sectionIsPresent &= ~QUrlPrivate::UserName; + else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::UserName, userName)) + d->userName.clear(); } /*! @@ -2105,6 +2189,8 @@ void QUrl::setPassword(const QString &password, ParsingMode mode) d->setPassword(data, 0, data.length()); if (password.isNull()) d->sectionIsPresent &= ~QUrlPrivate::Password; + else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Password, password)) + d->password.clear(); } /*! @@ -2354,6 +2440,9 @@ void QUrl::setPath(const QString &path, ParsingMode mode) // optimized out, since there is no path delimiter // if (path.isNull()) // d->sectionIsPresent &= ~QUrlPrivate::Path; +// else + if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Path, path)) + d->path.clear(); } /*! @@ -2474,6 +2563,8 @@ void QUrl::setQuery(const QString &query, ParsingMode mode) d->setQuery(data, 0, data.length()); if (query.isNull()) d->sectionIsPresent &= ~QUrlPrivate::Query; + else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Query, query)) + d->query.clear(); } /*! @@ -2835,6 +2926,8 @@ void QUrl::setFragment(const QString &fragment, ParsingMode mode) d->setFragment(data, 0, data.length()); if (fragment.isNull()) d->sectionIsPresent &= ~QUrlPrivate::Fragment; + else if (mode == StrictMode && !d->validateComponent(QUrlPrivate::Fragment, fragment)) + d->fragment.clear(); } /*! diff --git a/tests/auto/corelib/io/qurl/tst_qurl.cpp b/tests/auto/corelib/io/qurl/tst_qurl.cpp index 971fe27f30..a45efce0db 100644 --- a/tests/auto/corelib/io/qurl/tst_qurl.cpp +++ b/tests/auto/corelib/io/qurl/tst_qurl.cpp @@ -3165,6 +3165,31 @@ void tst_QUrl::setComponents_data() << int(Scheme) << "http%61" << Strict << false << PrettyDecoded << "" << ""; + QTest::newRow("invalid-username-1") << QUrl("http://example.com") + << int(UserName) << "{}" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("invalid-username-2") << QUrl("http://example.com") + << int(UserName) << "foo/bar" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("invalid-username-3") << QUrl("http://example.com") + << int(UserName) << "foo:bar" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("invalid-password-1") << QUrl("http://example.com") + << int(Password) << "{}" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("invalid-password-2") << QUrl("http://example.com") + << int(Password) << "foo/bar" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("invalid-password-3") << QUrl("http://example.com") + << int(Password) << "foo:bar" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("invalid-userinfo-1") << QUrl("http://example.com") + << int(UserInfo) << "{}" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("invalid-userinfo-2") << QUrl("http://example.com") + << int(UserInfo) << "foo/bar" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("invalid-host-1") << QUrl("http://example.com") << int(Host) << "-not-valid-" << Tolerant << false << PrettyDecoded << "" << ""; @@ -3178,6 +3203,16 @@ void tst_QUrl::setComponents_data() << int(Authority) << "%31%30.%30.%30.%31" << Strict << false << PrettyDecoded << "" << ""; + QTest::newRow("invalid-path-0") << QUrl("http://example.com") + << int(Path) << "{}" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("invalid-query-1") << QUrl("http://example.com") + << int(Query) << "{}" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("invalid-fragment-1") << QUrl("http://example.com") + << int(Fragment) << "{}" << Strict << false + << PrettyDecoded << "" << ""; + // these test cases are "compound invalid": // they produces isValid == false, but the original is still available QTest::newRow("invalid-path-1") << QUrl("/relative") @@ -3187,6 +3222,49 @@ void tst_QUrl::setComponents_data() << int(Path) << "relative" << Strict << false << PrettyDecoded << "relative" << ""; + // -- test bad percent encoding -- + // unnecessary to test the scheme, since percent-decoding is not performed in it; + // see tests above + QTest::newRow("bad-percent-username") << QUrl("http://example.com") + << int(UserName) << "bar%foo" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-password") << QUrl("http://user@example.com") + << int(Password) << "bar%foo" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-userinfo-1") << QUrl("http://example.com") + << int(UserInfo) << "bar%foo" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-userinfo-2") << QUrl("http://example.com") + << int(UserInfo) << "bar%:foo" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-userinfo-3") << QUrl("http://example.com") + << int(UserInfo) << "bar:%foo" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-authority-1") << QUrl("http://example.com") + << int(Authority) << "bar%foo@example.org" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-authority-2") << QUrl("http://example.com") + << int(Authority) << "bar%:foo@example.org" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-authority-3") << QUrl("http://example.com") + << int(Authority) << "bar:%foo@example.org" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-authority-4") << QUrl("http://example.com") + << int(Authority) << "bar:foo@bar%foo" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-host") << QUrl("http://example.com") + << int(Host) << "bar%foo" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-path") << QUrl("http://example.com") + << int(Path) << "/bar%foo" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-query") << QUrl("http://example.com") + << int(Query) << "bar%foo" << Strict << false + << PrettyDecoded << "" << ""; + QTest::newRow("bad-percent-fragment") << QUrl("http://example.com") + << int(Fragment) << "bar%foo" << Strict << false + << PrettyDecoded << "" << ""; + // -- test decoded behaviour -- // '%' characters are not permitted in the scheme, this tests that it fails to set anything QTest::newRow("invalid-scheme-encode") << QUrl("http://example.com") |