summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoni Poikelin <joni.poikelin@qt.io>2018-12-11 11:42:34 +0200
committerJoni Poikelin <joni.poikelin@qt.io>2018-12-13 05:23:12 +0000
commiteaf4438b3511c8380b9b691b656a87a60e342e29 (patch)
tree7b853fef8105d7250a3cc349dc3615cbd798cec2
parentbc997b856aa7b0b2137b568cd3fc6f190cd89f84 (diff)
Make url normalization closer to common browser behavior
Firefox, Chrome and various http libraries normalize /./ and /../ from urls, but retain multiple adjacent slashes as is. Qt removes duplicated slashes which makes it impossible to access some web resources that rely on those. Fixes: QTBUG-71973 Change-Id: Ie18ae6ad3264acb252fcd87a754726a8c546e5ec Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> Reviewed-by: MÃ¥rten Nordheim <marten.nordheim@qt.io> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/io/qdir.cpp64
-rw-r--r--src/corelib/io/qdir_p.h12
-rw-r--r--src/corelib/io/qurl.cpp8
-rw-r--r--tests/auto/corelib/io/qdir/tst_qdir.cpp9
-rw-r--r--tests/auto/corelib/io/qurl/tst_qurl.cpp36
5 files changed, 106 insertions, 23 deletions
diff --git a/src/corelib/io/qdir.cpp b/src/corelib/io/qdir.cpp
index 7df461ddce..405718aba8 100644
--- a/src/corelib/io/qdir.cpp
+++ b/src/corelib/io/qdir.cpp
@@ -2161,9 +2161,10 @@ bool QDir::match(const QString &filter, const QString &fileName)
This method is shared with QUrl, so it doesn't deal with QDir::separator(),
nor does it remove the trailing slash, if any.
*/
-Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool allowUncPaths,
- bool *ok = nullptr)
+QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok)
{
+ const bool allowUncPaths = QDirPrivate::AllowUncPaths & flags;
+ const bool isRemote = QDirPrivate::RemotePath & flags;
const int len = name.length();
if (ok)
@@ -2185,14 +2186,30 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all
i -= prefixLength;
// replicate trailing slash (i > 0 checks for emptiness of input string p)
- if (i > 0 && p[i] == '/') {
+ // except for remote paths because there can be /../ or /./ ending
+ if (i > 0 && p[i] == '/' && !isRemote) {
out[--used] = '/';
--i;
}
+ auto isDot = [](const ushort *p, int i) {
+ return i > 1 && p[i - 1] == '.' && p[i - 2] == '/';
+ };
+ auto isDotDot = [](const ushort *p, int i) {
+ return i > 2 && p[i - 1] == '.' && p[i - 2] == '.' && p[i - 3] == '/';
+ };
+
while (i >= 0) {
- // remove trailing slashes
+ // copy trailing slashes for remote urls
if (p[i] == '/') {
+ if (isRemote && !up) {
+ if (isDot(p, i)) {
+ i -= 2;
+ continue;
+ }
+ out[--used] = p[i];
+ }
+
--i;
continue;
}
@@ -2204,10 +2221,17 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all
}
// detect up dir
- if (i >= 1 && p[i] == '.' && p[i-1] == '.'
- && (i == 1 || (i >= 2 && p[i-2] == '/'))) {
+ if (i >= 1 && p[i] == '.' && p[i-1] == '.' && (i < 2 || p[i - 2] == '/')) {
++up;
- i -= 2;
+ i -= i >= 2 ? 3 : 2;
+
+ if (isRemote) {
+ // moving up should consider empty path segments too (/path//../ -> /path/)
+ while (i > 0 && up && p[i] == '/') {
+ --up;
+ --i;
+ }
+ }
continue;
}
@@ -2217,7 +2241,27 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all
// skip or copy
while (i >= 0) {
- if (p[i] == '/') { // do not copy slashes
+ if (p[i] == '/') {
+ // copy all slashes as is for remote urls if they are not part of /./ or /../
+ if (isRemote && !up) {
+ while (i > 0 && p[i] == '/' && !isDotDot(p, i)) {
+
+ if (isDot(p, i)) {
+ i -= 2;
+ continue;
+ }
+
+ out[--used] = p[i];
+ --i;
+ }
+
+ // in case of /./, jump over
+ if (isDot(p, i))
+ i -= 2;
+
+ break;
+ }
+
--i;
break;
}
@@ -2238,7 +2282,7 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all
*ok = prefixLength == 0 || up == 0;
// add remaining '..'
- while (up) {
+ while (up && !isRemote) {
if (used != len && out[used] != '/') // is not empty and there isn't already a '/'
out[--used] = '/';
out[--used] = '.';
@@ -2284,7 +2328,7 @@ static QString qt_cleanPath(const QString &path, bool *ok)
if (dir_separator != QLatin1Char('/'))
name.replace(dir_separator, QLatin1Char('/'));
- QString ret = qt_normalizePathSegments(name, OSSupportsUncPaths, ok);
+ QString ret = qt_normalizePathSegments(name, OSSupportsUncPaths ? QDirPrivate::AllowUncPaths : QDirPrivate::DefaultNormalization, ok);
// Strip away last slash except for root directories
if (ret.length() > 1 && ret.endsWith(QLatin1Char('/'))) {
diff --git a/src/corelib/io/qdir_p.h b/src/corelib/io/qdir_p.h
index 85d915223c..0f3ab7f899 100644
--- a/src/corelib/io/qdir_p.h
+++ b/src/corelib/io/qdir_p.h
@@ -59,6 +59,14 @@ QT_BEGIN_NAMESPACE
class QDirPrivate : public QSharedData
{
public:
+ enum PathNormalization {
+ DefaultNormalization = 0x00,
+ AllowUncPaths = 0x01,
+ RemotePath = 0x02
+ };
+ Q_DECLARE_FLAGS(PathNormalizations, PathNormalization)
+ Q_FLAGS(PathNormalizations)
+
explicit QDirPrivate(const QString &path, const QStringList &nameFilters_ = QStringList(),
QDir::SortFlags sort_ = QDir::SortFlags(QDir::Name | QDir::IgnoreCase),
QDir::Filters filters_ = QDir::AllEntries);
@@ -97,6 +105,10 @@ public:
mutable QFileSystemMetaData metaData;
};
+Q_DECLARE_OPERATORS_FOR_FLAGS(QDirPrivate::PathNormalizations)
+
+Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok = nullptr);
+
QT_END_NAMESPACE
#endif
diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp
index b324df53b2..6d82981fd6 100644
--- a/src/corelib/io/qurl.cpp
+++ b/src/corelib/io/qurl.cpp
@@ -253,7 +253,8 @@
and contains no query or fragment, a local file path is returned.
\value StripTrailingSlash The trailing slash is removed from the path, if one is present.
\value NormalizePathSegments Modifies the path to remove redundant directory separators,
- and to resolve "."s and ".."s (as far as possible).
+ and to resolve "."s and ".."s (as far as possible). For non-local paths, adjacent
+ slashes are preserved.
Note that the case folding rules in \l{RFC 3491}{Nameprep}, which QUrl
conforms to, require host names to always be converted to lower case,
@@ -419,10 +420,9 @@
#endif
#include "private/qipaddress_p.h"
#include "qurlquery.h"
+#include "private/qdir_p.h"
QT_BEGIN_NAMESPACE
-extern QString qt_normalizePathSegments(const QString &name, bool allowUncPaths,
- bool *ok = nullptr); // qdir.cpp
inline static bool isHex(char c)
{
@@ -930,7 +930,7 @@ inline void QUrlPrivate::appendPath(QString &appendTo, QUrl::FormattingOptions o
{
QString thePath = path;
if (options & QUrl::NormalizePathSegments) {
- thePath = qt_normalizePathSegments(path, false);
+ thePath = qt_normalizePathSegments(path, isLocalFile() ? QDirPrivate::DefaultNormalization : QDirPrivate::RemotePath);
}
QStringRef thePathRef(&thePath);
diff --git a/tests/auto/corelib/io/qdir/tst_qdir.cpp b/tests/auto/corelib/io/qdir/tst_qdir.cpp
index 30f0e447ad..af9c6be432 100644
--- a/tests/auto/corelib/io/qdir/tst_qdir.cpp
+++ b/tests/auto/corelib/io/qdir/tst_qdir.cpp
@@ -62,12 +62,7 @@
#endif
#ifdef QT_BUILD_INTERNAL
-
-QT_BEGIN_NAMESPACE
-extern Q_AUTOTEST_EXPORT QString
- qt_normalizePathSegments(const QString &path, bool allowUncPaths, bool *ok = nullptr);
-QT_END_NAMESPACE
-
+#include "private/qdir_p.h"
#endif
static QByteArray msgDoesNotExist(const QString &name)
@@ -1376,7 +1371,7 @@ void tst_QDir::normalizePathSegments()
QFETCH(QString, path);
QFETCH(UncHandling, uncHandling);
QFETCH(QString, expected);
- QString cleaned = qt_normalizePathSegments(path, uncHandling == HandleUnc);
+ QString cleaned = qt_normalizePathSegments(path, uncHandling == HandleUnc ? QDirPrivate::AllowUncPaths : QDirPrivate::DefaultNormalization);
QCOMPARE(cleaned, expected);
if (path == expected)
QVERIFY2(path.isSharedWith(cleaned), "Strings are same but data is not shared");
diff --git a/tests/auto/corelib/io/qurl/tst_qurl.cpp b/tests/auto/corelib/io/qurl/tst_qurl.cpp
index 84af1c255a..4f173d2dfd 100644
--- a/tests/auto/corelib/io/qurl/tst_qurl.cpp
+++ b/tests/auto/corelib/io/qurl/tst_qurl.cpp
@@ -182,6 +182,8 @@ private slots:
void matches();
void ipv6_zoneId_data();
void ipv6_zoneId();
+ void normalizeRemotePaths_data();
+ void normalizeRemotePaths();
private:
void testThreadingHelper();
@@ -323,7 +325,7 @@ void tst_QUrl::comparison()
QUrl url3bis = QUrl::fromEncoded("example://a/b/c/%7Bfoo%7D/");
QUrl url3bisNoSlash = QUrl::fromEncoded("example://a/b/c/%7Bfoo%7D");
- QUrl url4bis = QUrl::fromEncoded("example://a/.//b/../b/c//%7Bfoo%7D/");
+ QUrl url4bis = QUrl::fromEncoded("example://a/./b/../b/c/%7Bfoo%7D/");
QCOMPARE(url4bis.adjusted(QUrl::NormalizePathSegments), url3bis);
QCOMPARE(url4bis.adjusted(QUrl::NormalizePathSegments | QUrl::StripTrailingSlash), url3bisNoSlash);
QVERIFY(url3bis.matches(url4bis, QUrl::NormalizePathSegments));
@@ -335,7 +337,7 @@ void tst_QUrl::comparison()
QCOMPARE(url4EncodedDots.path(QUrl::FullyDecoded), QString("/.//b/..//b/c/"));
QCOMPARE(QString::fromLatin1(url4EncodedDots.toEncoded()), QString::fromLatin1("example://a/.//b/..%2F/b/c/"));
QCOMPARE(url4EncodedDots.toString(), QString("example://a/.//b/..%2F/b/c/"));
- QCOMPARE(url4EncodedDots.adjusted(QUrl::NormalizePathSegments).toString(), QString("example://a/b/..%2F/b/c/"));
+ QCOMPARE(url4EncodedDots.adjusted(QUrl::NormalizePathSegments).toString(), QString("example://a//b/..%2F/b/c/"));
// 6.2.2.1 Make sure hexdecimal characters in percent encoding are
// treated case-insensitively
@@ -4201,6 +4203,36 @@ void tst_QUrl::ipv6_zoneId()
QCOMPARE(url.toString(QUrl::FullyEncoded), "x://[" + encodedHost + "]");
}
+void tst_QUrl::normalizeRemotePaths_data()
+{
+ QTest::addColumn<QUrl>("url");
+ QTest::addColumn<QString>("expected");
+
+ QTest::newRow("dotdot-slashslash") << QUrl("http://qt-project.org/some/long/..//path") << "http://qt-project.org/some//path";
+ QTest::newRow("slashslash-dotdot") << QUrl("http://qt-project.org/some//../path") << "http://qt-project.org/some/path";
+ QTest::newRow("slashslash-dotdot2") << QUrl("http://qt-project.org/some//path/../") << "http://qt-project.org/some//";
+ QTest::newRow("dot-slash") << QUrl("http://qt-project.org/some/./path") << "http://qt-project.org/some/path";
+ QTest::newRow("slashslash-dot-slashslash") << QUrl("http://qt-project.org/some//.//path") << "http://qt-project.org/some///path";
+ QTest::newRow("dot-slashslash") << QUrl("http://qt-project.org/some/.//path") << "http://qt-project.org/some//path";
+ QTest::newRow("multiple-slashes") << QUrl("http://qt-project.org/some//path") << "http://qt-project.org/some//path";
+ QTest::newRow("multiple-slashes4") << QUrl("http://qt-project.org/some////path") << "http://qt-project.org/some////path";
+ QTest::newRow("slashes-at-end") << QUrl("http://qt-project.org/some//") << "http://qt-project.org/some//";
+ QTest::newRow("dot-dotdot") << QUrl("http://qt-project.org/path/./../") << "http://qt-project.org/";
+ QTest::newRow("slash-dot-slash-dot-slash") << QUrl("http://qt-project.org/path//.//.//") << "http://qt-project.org/path////";
+ QTest::newRow("dotdot") << QUrl("http://qt-project.org/../") << "http://qt-project.org/";
+ QTest::newRow("dotdot-dotdot") << QUrl("http://qt-project.org/path/../../") << "http://qt-project.org/";
+ QTest::newRow("dot-dotdot-tail") << QUrl("http://qt-project.org/stem/path/./../tail") << "http://qt-project.org/stem/tail";
+ QTest::newRow("slash-dotdot-slash-tail") << QUrl("http://qt-project.org/stem/path//..//tail") << "http://qt-project.org/stem/path//tail";
+}
+
+void tst_QUrl::normalizeRemotePaths()
+{
+ QFETCH(QUrl, url);
+ QFETCH(QString, expected);
+
+ QCOMPARE(url.adjusted(QUrl::NormalizePathSegments).toString(), expected);
+}
+
QTEST_MAIN(tst_QUrl)
#include "tst_qurl.moc"