diff options
author | Joni Poikelin <joni.poikelin@qt.io> | 2018-12-11 11:42:34 +0200 |
---|---|---|
committer | Joni Poikelin <joni.poikelin@qt.io> | 2018-12-13 05:23:12 +0000 |
commit | eaf4438b3511c8380b9b691b656a87a60e342e29 (patch) | |
tree | 7b853fef8105d7250a3cc349dc3615cbd798cec2 | |
parent | bc997b856aa7b0b2137b568cd3fc6f190cd89f84 (diff) |
Make url normalization closer to common browser behavior
Firefox, Chrome and various http libraries normalize /./ and /../ from
urls, but retain multiple adjacent slashes as is. Qt removes
duplicated slashes which makes it impossible to access some web
resources that rely on those.
Fixes: QTBUG-71973
Change-Id: Ie18ae6ad3264acb252fcd87a754726a8c546e5ec
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Reviewed-by: MÃ¥rten Nordheim <marten.nordheim@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r-- | src/corelib/io/qdir.cpp | 64 | ||||
-rw-r--r-- | src/corelib/io/qdir_p.h | 12 | ||||
-rw-r--r-- | src/corelib/io/qurl.cpp | 8 | ||||
-rw-r--r-- | tests/auto/corelib/io/qdir/tst_qdir.cpp | 9 | ||||
-rw-r--r-- | tests/auto/corelib/io/qurl/tst_qurl.cpp | 36 |
5 files changed, 106 insertions, 23 deletions
diff --git a/src/corelib/io/qdir.cpp b/src/corelib/io/qdir.cpp index 7df461ddce..405718aba8 100644 --- a/src/corelib/io/qdir.cpp +++ b/src/corelib/io/qdir.cpp @@ -2161,9 +2161,10 @@ bool QDir::match(const QString &filter, const QString &fileName) This method is shared with QUrl, so it doesn't deal with QDir::separator(), nor does it remove the trailing slash, if any. */ -Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool allowUncPaths, - bool *ok = nullptr) +QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok) { + const bool allowUncPaths = QDirPrivate::AllowUncPaths & flags; + const bool isRemote = QDirPrivate::RemotePath & flags; const int len = name.length(); if (ok) @@ -2185,14 +2186,30 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all i -= prefixLength; // replicate trailing slash (i > 0 checks for emptiness of input string p) - if (i > 0 && p[i] == '/') { + // except for remote paths because there can be /../ or /./ ending + if (i > 0 && p[i] == '/' && !isRemote) { out[--used] = '/'; --i; } + auto isDot = [](const ushort *p, int i) { + return i > 1 && p[i - 1] == '.' && p[i - 2] == '/'; + }; + auto isDotDot = [](const ushort *p, int i) { + return i > 2 && p[i - 1] == '.' && p[i - 2] == '.' && p[i - 3] == '/'; + }; + while (i >= 0) { - // remove trailing slashes + // copy trailing slashes for remote urls if (p[i] == '/') { + if (isRemote && !up) { + if (isDot(p, i)) { + i -= 2; + continue; + } + out[--used] = p[i]; + } + --i; continue; } @@ -2204,10 +2221,17 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all } // detect up dir - if (i >= 1 && p[i] == '.' && p[i-1] == '.' - && (i == 1 || (i >= 2 && p[i-2] == '/'))) { + if (i >= 1 && p[i] == '.' && p[i-1] == '.' && (i < 2 || p[i - 2] == '/')) { ++up; - i -= 2; + i -= i >= 2 ? 3 : 2; + + if (isRemote) { + // moving up should consider empty path segments too (/path//../ -> /path/) + while (i > 0 && up && p[i] == '/') { + --up; + --i; + } + } continue; } @@ -2217,7 +2241,27 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all // skip or copy while (i >= 0) { - if (p[i] == '/') { // do not copy slashes + if (p[i] == '/') { + // copy all slashes as is for remote urls if they are not part of /./ or /../ + if (isRemote && !up) { + while (i > 0 && p[i] == '/' && !isDotDot(p, i)) { + + if (isDot(p, i)) { + i -= 2; + continue; + } + + out[--used] = p[i]; + --i; + } + + // in case of /./, jump over + if (isDot(p, i)) + i -= 2; + + break; + } + --i; break; } @@ -2238,7 +2282,7 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all *ok = prefixLength == 0 || up == 0; // add remaining '..' - while (up) { + while (up && !isRemote) { if (used != len && out[used] != '/') // is not empty and there isn't already a '/' out[--used] = '/'; out[--used] = '.'; @@ -2284,7 +2328,7 @@ static QString qt_cleanPath(const QString &path, bool *ok) if (dir_separator != QLatin1Char('/')) name.replace(dir_separator, QLatin1Char('/')); - QString ret = qt_normalizePathSegments(name, OSSupportsUncPaths, ok); + QString ret = qt_normalizePathSegments(name, OSSupportsUncPaths ? QDirPrivate::AllowUncPaths : QDirPrivate::DefaultNormalization, ok); // Strip away last slash except for root directories if (ret.length() > 1 && ret.endsWith(QLatin1Char('/'))) { diff --git a/src/corelib/io/qdir_p.h b/src/corelib/io/qdir_p.h index 85d915223c..0f3ab7f899 100644 --- a/src/corelib/io/qdir_p.h +++ b/src/corelib/io/qdir_p.h @@ -59,6 +59,14 @@ QT_BEGIN_NAMESPACE class QDirPrivate : public QSharedData { public: + enum PathNormalization { + DefaultNormalization = 0x00, + AllowUncPaths = 0x01, + RemotePath = 0x02 + }; + Q_DECLARE_FLAGS(PathNormalizations, PathNormalization) + Q_FLAGS(PathNormalizations) + explicit QDirPrivate(const QString &path, const QStringList &nameFilters_ = QStringList(), QDir::SortFlags sort_ = QDir::SortFlags(QDir::Name | QDir::IgnoreCase), QDir::Filters filters_ = QDir::AllEntries); @@ -97,6 +105,10 @@ public: mutable QFileSystemMetaData metaData; }; +Q_DECLARE_OPERATORS_FOR_FLAGS(QDirPrivate::PathNormalizations) + +Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok = nullptr); + QT_END_NAMESPACE #endif diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp index b324df53b2..6d82981fd6 100644 --- a/src/corelib/io/qurl.cpp +++ b/src/corelib/io/qurl.cpp @@ -253,7 +253,8 @@ and contains no query or fragment, a local file path is returned. \value StripTrailingSlash The trailing slash is removed from the path, if one is present. \value NormalizePathSegments Modifies the path to remove redundant directory separators, - and to resolve "."s and ".."s (as far as possible). + and to resolve "."s and ".."s (as far as possible). For non-local paths, adjacent + slashes are preserved. Note that the case folding rules in \l{RFC 3491}{Nameprep}, which QUrl conforms to, require host names to always be converted to lower case, @@ -419,10 +420,9 @@ #endif #include "private/qipaddress_p.h" #include "qurlquery.h" +#include "private/qdir_p.h" QT_BEGIN_NAMESPACE -extern QString qt_normalizePathSegments(const QString &name, bool allowUncPaths, - bool *ok = nullptr); // qdir.cpp inline static bool isHex(char c) { @@ -930,7 +930,7 @@ inline void QUrlPrivate::appendPath(QString &appendTo, QUrl::FormattingOptions o { QString thePath = path; if (options & QUrl::NormalizePathSegments) { - thePath = qt_normalizePathSegments(path, false); + thePath = qt_normalizePathSegments(path, isLocalFile() ? QDirPrivate::DefaultNormalization : QDirPrivate::RemotePath); } QStringRef thePathRef(&thePath); diff --git a/tests/auto/corelib/io/qdir/tst_qdir.cpp b/tests/auto/corelib/io/qdir/tst_qdir.cpp index 30f0e447ad..af9c6be432 100644 --- a/tests/auto/corelib/io/qdir/tst_qdir.cpp +++ b/tests/auto/corelib/io/qdir/tst_qdir.cpp @@ -62,12 +62,7 @@ #endif #ifdef QT_BUILD_INTERNAL - -QT_BEGIN_NAMESPACE -extern Q_AUTOTEST_EXPORT QString - qt_normalizePathSegments(const QString &path, bool allowUncPaths, bool *ok = nullptr); -QT_END_NAMESPACE - +#include "private/qdir_p.h" #endif static QByteArray msgDoesNotExist(const QString &name) @@ -1376,7 +1371,7 @@ void tst_QDir::normalizePathSegments() QFETCH(QString, path); QFETCH(UncHandling, uncHandling); QFETCH(QString, expected); - QString cleaned = qt_normalizePathSegments(path, uncHandling == HandleUnc); + QString cleaned = qt_normalizePathSegments(path, uncHandling == HandleUnc ? QDirPrivate::AllowUncPaths : QDirPrivate::DefaultNormalization); QCOMPARE(cleaned, expected); if (path == expected) QVERIFY2(path.isSharedWith(cleaned), "Strings are same but data is not shared"); diff --git a/tests/auto/corelib/io/qurl/tst_qurl.cpp b/tests/auto/corelib/io/qurl/tst_qurl.cpp index 84af1c255a..4f173d2dfd 100644 --- a/tests/auto/corelib/io/qurl/tst_qurl.cpp +++ b/tests/auto/corelib/io/qurl/tst_qurl.cpp @@ -182,6 +182,8 @@ private slots: void matches(); void ipv6_zoneId_data(); void ipv6_zoneId(); + void normalizeRemotePaths_data(); + void normalizeRemotePaths(); private: void testThreadingHelper(); @@ -323,7 +325,7 @@ void tst_QUrl::comparison() QUrl url3bis = QUrl::fromEncoded("example://a/b/c/%7Bfoo%7D/"); QUrl url3bisNoSlash = QUrl::fromEncoded("example://a/b/c/%7Bfoo%7D"); - QUrl url4bis = QUrl::fromEncoded("example://a/.//b/../b/c//%7Bfoo%7D/"); + QUrl url4bis = QUrl::fromEncoded("example://a/./b/../b/c/%7Bfoo%7D/"); QCOMPARE(url4bis.adjusted(QUrl::NormalizePathSegments), url3bis); QCOMPARE(url4bis.adjusted(QUrl::NormalizePathSegments | QUrl::StripTrailingSlash), url3bisNoSlash); QVERIFY(url3bis.matches(url4bis, QUrl::NormalizePathSegments)); @@ -335,7 +337,7 @@ void tst_QUrl::comparison() QCOMPARE(url4EncodedDots.path(QUrl::FullyDecoded), QString("/.//b/..//b/c/")); QCOMPARE(QString::fromLatin1(url4EncodedDots.toEncoded()), QString::fromLatin1("example://a/.//b/..%2F/b/c/")); QCOMPARE(url4EncodedDots.toString(), QString("example://a/.//b/..%2F/b/c/")); - QCOMPARE(url4EncodedDots.adjusted(QUrl::NormalizePathSegments).toString(), QString("example://a/b/..%2F/b/c/")); + QCOMPARE(url4EncodedDots.adjusted(QUrl::NormalizePathSegments).toString(), QString("example://a//b/..%2F/b/c/")); // 6.2.2.1 Make sure hexdecimal characters in percent encoding are // treated case-insensitively @@ -4201,6 +4203,36 @@ void tst_QUrl::ipv6_zoneId() QCOMPARE(url.toString(QUrl::FullyEncoded), "x://[" + encodedHost + "]"); } +void tst_QUrl::normalizeRemotePaths_data() +{ + QTest::addColumn<QUrl>("url"); + QTest::addColumn<QString>("expected"); + + QTest::newRow("dotdot-slashslash") << QUrl("http://qt-project.org/some/long/..//path") << "http://qt-project.org/some//path"; + QTest::newRow("slashslash-dotdot") << QUrl("http://qt-project.org/some//../path") << "http://qt-project.org/some/path"; + QTest::newRow("slashslash-dotdot2") << QUrl("http://qt-project.org/some//path/../") << "http://qt-project.org/some//"; + QTest::newRow("dot-slash") << QUrl("http://qt-project.org/some/./path") << "http://qt-project.org/some/path"; + QTest::newRow("slashslash-dot-slashslash") << QUrl("http://qt-project.org/some//.//path") << "http://qt-project.org/some///path"; + QTest::newRow("dot-slashslash") << QUrl("http://qt-project.org/some/.//path") << "http://qt-project.org/some//path"; + QTest::newRow("multiple-slashes") << QUrl("http://qt-project.org/some//path") << "http://qt-project.org/some//path"; + QTest::newRow("multiple-slashes4") << QUrl("http://qt-project.org/some////path") << "http://qt-project.org/some////path"; + QTest::newRow("slashes-at-end") << QUrl("http://qt-project.org/some//") << "http://qt-project.org/some//"; + QTest::newRow("dot-dotdot") << QUrl("http://qt-project.org/path/./../") << "http://qt-project.org/"; + QTest::newRow("slash-dot-slash-dot-slash") << QUrl("http://qt-project.org/path//.//.//") << "http://qt-project.org/path////"; + QTest::newRow("dotdot") << QUrl("http://qt-project.org/../") << "http://qt-project.org/"; + QTest::newRow("dotdot-dotdot") << QUrl("http://qt-project.org/path/../../") << "http://qt-project.org/"; + QTest::newRow("dot-dotdot-tail") << QUrl("http://qt-project.org/stem/path/./../tail") << "http://qt-project.org/stem/tail"; + QTest::newRow("slash-dotdot-slash-tail") << QUrl("http://qt-project.org/stem/path//..//tail") << "http://qt-project.org/stem/path//tail"; +} + +void tst_QUrl::normalizeRemotePaths() +{ + QFETCH(QUrl, url); + QFETCH(QString, expected); + + QCOMPARE(url.adjusted(QUrl::NormalizePathSegments).toString(), expected); +} + QTEST_MAIN(tst_QUrl) #include "tst_qurl.moc" |