summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2013-07-01 17:16:54 -0700
committerThe Qt Project <gerrit-noreply@qt-project.org>2013-08-04 04:47:33 +0200
commit993bbb4d4be524321575668740ea46c6665d6064 (patch)
treead65a84d32bad801935a3866da4b9f4d87c9a219 /tests
parent0a796cfddf6c5753cb77eea32016e1645fce0c16 (diff)
QUrl: update our understanding of the encoding of delimiters
The longer explanation can be found in the comment in qurl.cpp. The short version is as follows: Up to now, we considered that every character could be replaced with its percent-encoding equivalent and vice-versa, so long as the parsing of the URL did not change. For example, x:/path+path and x:/path%2Bpath were the same. However, to do this and yet be compliant with most URL uses in the real world, we had to add exceptions: - "/" and "%2F" were not the same in the path, despite the delimiter being behind (rationale was the complex definition of path) - "+" and "%2B" were not the same in the query, so we ended up not transforming any sub-delim in the query at all Now, we change our understanding based on the following line from RFC 3986 section 2.2: URIs that differ in the replacement of a reserved character with its corresponding percent-encoded octet are not equivalent. From now on, QUrl will not replace any sub-delim or gen-delim ("reserved character"), except where such a character could not exist in the first place. This simplifies the code and removes all exceptions. As a side-effect, this has also changed the behaviour of the "{" and "}" characters, which we previously allowed to remain decoded. [ChangeLog][Important Behavior Changes][QUrl and QUrlQuery] QUrl no longer considers all delimiter characters equivalent to their percent-encoded forms. Now, both classes always keep all delimiters exactly as they were in the original URL text. [ChangeLog][Important Behavior Changes][QUrl and QUrlQuery] QUrl no longer decodes %7B and %7D to "{" and "}" in the output of toString() Task-number: QTBUG-31660 Change-Id: Iba0b5b31b269635ac2d0adb2bb0dfb74c139e08c Reviewed-by: David Faure (KDE) <faure@kde.org>
Diffstat (limited to 'tests')
-rw-r--r--tests/auto/corelib/io/qurl/tst_qurl.cpp166
1 files changed, 63 insertions, 103 deletions
diff --git a/tests/auto/corelib/io/qurl/tst_qurl.cpp b/tests/auto/corelib/io/qurl/tst_qurl.cpp
index c66140dfae..a55f87b20f 100644
--- a/tests/auto/corelib/io/qurl/tst_qurl.cpp
+++ b/tests/auto/corelib/io/qurl/tst_qurl.cpp
@@ -758,8 +758,8 @@ void tst_QUrl::setUrl()
QVERIFY(url.isValid());
QCOMPARE(url.scheme(), QString("data"));
QCOMPARE(url.host(), QString());
- QCOMPARE(url.path(), QString("text/javascript,d5 = 'five\\u0027s';"));
- QCOMPARE(url.encodedPath().constData(), "text/javascript,d5%20=%20'five%5Cu0027s';");
+ QCOMPARE(url.path(), QString("text/javascript,d5 %3D 'five\\u0027s'%3B"));
+ QCOMPARE(url.encodedPath().constData(), "text/javascript,d5%20%3D%20'five%5Cu0027s'%3B");
}
{
@@ -1575,17 +1575,17 @@ void tst_QUrl::relative()
void tst_QUrl::percentEncoding_data()
{
+ // This test is limited. It's superseded by componentEncodings below
QTest::addColumn<QString>("original");
QTest::addColumn<QByteArray>("encoded");
QTest::newRow("test_01") << QString::fromLatin1("sdfsdf") << QByteArray("sdfsdf");
QTest::newRow("test_02") << QString::fromUtf8("æss") << QByteArray("%C3%A6ss");
- // not unreserved or reserved
- QTest::newRow("test_03") << QString::fromLatin1("{}") << QByteArray("%7B%7D");
}
void tst_QUrl::percentEncoding()
{
+ // This test is limited. It's superseded by componentEncodings below
QFETCH(QString, original);
QFETCH(QByteArray, encoded);
@@ -1660,21 +1660,23 @@ void tst_QUrl::symmetry()
{
QString urlString = QString::fromLatin1("http://desktop:33326/upnp/{32f525a6-6f31-426e-91ca-01c2e6c2c57e}");
+ QString encodedUrlString = QString("http://desktop:33326/upnp/%7B32f525a6-6f31-426e-91ca-01c2e6c2c57e%7D");
QUrl urlPreviewList(urlString);
- QCOMPARE(urlPreviewList.toString(), urlString);
+ QCOMPARE(urlPreviewList.toString(), encodedUrlString);
QByteArray b = urlPreviewList.toEncoded();
- QCOMPARE(b.constData(), "http://desktop:33326/upnp/%7B32f525a6-6f31-426e-91ca-01c2e6c2c57e%7D");
- QCOMPARE(QUrl::fromEncoded(b).toString(), urlString);
- QCOMPARE(QUrl(b).toString(), urlString);
+ QCOMPARE(b.constData(), encodedUrlString.toLatin1().constData());
+ QCOMPARE(QUrl::fromEncoded(b).toString(), encodedUrlString);
+ QCOMPARE(QUrl(b).toString(), encodedUrlString);
}
{
QString urlString = QString::fromLatin1("http://desktop:53423/deviceDescription?uuid={7977c17b-00bf-4af9-894e-fed28573c3a9}");
+ QString encodedUrlString = QString("http://desktop:53423/deviceDescription?uuid=%7B7977c17b-00bf-4af9-894e-fed28573c3a9%7D");
QUrl urlPreviewList(urlString);
- QCOMPARE(urlPreviewList.toString(), urlString);
+ QCOMPARE(urlPreviewList.toString(), encodedUrlString);
QByteArray b = urlPreviewList.toEncoded();
- QCOMPARE(b.constData(), "http://desktop:53423/deviceDescription?uuid=%7B7977c17b-00bf-4af9-894e-fed28573c3a9%7D");
- QCOMPARE(QUrl::fromEncoded(b).toString(), urlString);
- QCOMPARE(QUrl(b).toString(), urlString);
+ QCOMPARE(b.constData(), encodedUrlString.toLatin1().constData());
+ QCOMPARE(QUrl::fromEncoded(b).toString(), encodedUrlString);
+ QCOMPARE(QUrl(b).toString(), encodedUrlString);
}
}
@@ -2180,35 +2182,22 @@ void tst_QUrl::tolerantParser()
url.setUrl("http://foo.bar/[image][1].jpg");
QVERIFY(url.isValid());
QVERIFY(!url.toString().isEmpty());
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("http://foo.bar/%5Bimage%5D%5B1%5D.jpg"));
- QCOMPARE(url.toEncoded(), QByteArray("http://foo.bar/%5Bimage%5D%5B1%5D.jpg"));
+ QCOMPARE(url.toString(QUrl::FullyEncoded), QString("http://foo.bar/[image][1].jpg"));
+ QCOMPARE(url.toEncoded(), QByteArray("http://foo.bar/[image][1].jpg"));
QCOMPARE(url.toString(), QString("http://foo.bar/[image][1].jpg"));
- url.setUrl("[].jpg");
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("%5B%5D.jpg"));
- QCOMPARE(url.toEncoded(), QByteArray("%5B%5D.jpg"));
- QCOMPARE(url.toString(), QString("[].jpg"));
-
- url.setUrl("/some/[path]/[]");
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("/some/%5Bpath%5D/%5B%5D"));
- QCOMPARE(url.toEncoded(), QByteArray("/some/%5Bpath%5D/%5B%5D"));
- QCOMPARE(url.toString(), QString("/some/[path]/[]"));
+ url.setUrl("http://foo.bar/%5Bimage%5D%5B1%5D.jpg");
+ QVERIFY(url.isValid());
+ QVERIFY(!url.toString().isEmpty());
+ QCOMPARE(url.toString(QUrl::FullyEncoded), QString("http://foo.bar/%5Bimage%5D%5B1%5D.jpg"));
+ QCOMPARE(url.toEncoded(), QByteArray("http://foo.bar/%5Bimage%5D%5B1%5D.jpg"));
+ QCOMPARE(url.toString(), QString("http://foo.bar/%5Bimage%5D%5B1%5D.jpg"));
url.setUrl("//[::56:56:56:56:56:56:56]");
QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]"));
QCOMPARE(url.toEncoded(), QByteArray("//[0:56:56:56:56:56:56:56]"));
QCOMPARE(url.toString(), QString("//[0:56:56:56:56:56:56:56]"));
- url.setUrl("//[::56:56:56:56:56:56:56]#[]");
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]#%5B%5D"));
- QCOMPARE(url.toEncoded(), QByteArray("//[0:56:56:56:56:56:56:56]#%5B%5D"));
- QCOMPARE(url.toString(), QString("//[0:56:56:56:56:56:56:56]#[]"));
-
- url.setUrl("//[::56:56:56:56:56:56:56]?[]");
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]?[]"));
- QCOMPARE(url.toEncoded(), QByteArray("//[0:56:56:56:56:56:56:56]?[]"));
- QCOMPARE(url.toString(), QString("//[0:56:56:56:56:56:56:56]?[]"));
-
// invoke the tolerant parser's error correction
url.setUrl("%hello.com/f%");
QCOMPARE(url.toString(QUrl::FullyEncoded), QString("%25hello.com/f%25"));
@@ -2221,38 +2210,24 @@ void tst_QUrl::tolerantParser()
url.setEncodedUrl("http://foo.bar/[image][1].jpg");
QVERIFY(url.isValid());
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("http://foo.bar/%5Bimage%5D%5B1%5D.jpg"));
- QCOMPARE(url.toEncoded(), QByteArray("http://foo.bar/%5Bimage%5D%5B1%5D.jpg"));
+ QCOMPARE(url.toString(QUrl::FullyEncoded), QString("http://foo.bar/[image][1].jpg"));
+ QCOMPARE(url.toEncoded(), QByteArray("http://foo.bar/[image][1].jpg"));
QCOMPARE(url.toString(), QString("http://foo.bar/[image][1].jpg"));
- url.setEncodedUrl("[].jpg");
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("%5B%5D.jpg"));
- QCOMPARE(url.toEncoded(), QByteArray("%5B%5D.jpg"));
- QCOMPARE(url.toString(), QString("[].jpg"));
-
- url.setEncodedUrl("/some/[path]/[]");
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("/some/%5Bpath%5D/%5B%5D"));
- QCOMPARE(url.toEncoded(), QByteArray("/some/%5Bpath%5D/%5B%5D"));
- QCOMPARE(url.toString(), QString("/some/[path]/[]"));
+ url.setEncodedUrl("http://foo.bar/%5Bimage%5D%5B1%5D.jpg");
+ QVERIFY(url.isValid());
+ QCOMPARE(url.toString(QUrl::FullyEncoded), QString("http://foo.bar/%5Bimage%5D%5B1%5D.jpg"));
+ QCOMPARE(url.toEncoded(), QByteArray("http://foo.bar/%5Bimage%5D%5B1%5D.jpg"));
+ QCOMPARE(url.toString(), QString("http://foo.bar/%5Bimage%5D%5B1%5D.jpg"));
url.setEncodedUrl("//[::56:56:56:56:56:56:56]");
QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]"));
QCOMPARE(url.toEncoded(), QByteArray("//[0:56:56:56:56:56:56:56]"));
- url.setEncodedUrl("//[::56:56:56:56:56:56:56]#[]");
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]#%5B%5D"));
- QCOMPARE(url.toEncoded(), QByteArray("//[0:56:56:56:56:56:56:56]#%5B%5D"));
- QCOMPARE(url.toString(), QString("//[0:56:56:56:56:56:56:56]#[]"));
-
- url.setEncodedUrl("//[::56:56:56:56:56:56:56]?[]");
- QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]?[]"));
- QCOMPARE(url.toEncoded(), QByteArray("//[0:56:56:56:56:56:56:56]?[]"));
- QCOMPARE(url.toString(), QString("//[0:56:56:56:56:56:56:56]?[]"));
-
url.setEncodedUrl("data:text/css,div%20{%20border-right:%20solid;%20}");
QCOMPARE(url.toString(QUrl::FullyEncoded), QString("data:text/css,div%20%7B%20border-right:%20solid;%20%7D"));
QCOMPARE(url.toEncoded(), QByteArray("data:text/css,div%20%7B%20border-right:%20solid;%20%7D"));
- QCOMPARE(url.toString(), QString("data:text/css,div { border-right: solid; }"));
+ QCOMPARE(url.toString(), QString("data:text/css,div %7B border-right: solid; %7D"));
}
{
@@ -3147,19 +3122,25 @@ void tst_QUrl::componentEncodings_data()
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
// / "*" / "+" / "," / ";" / "="
- // like the unreserved, these are decoded everywhere
- // don't test in query because they might remain encoded
- QTest::newRow("decoded-subdelims") << QUrl("x://%21%24%26:%27%28%29@host/%2a%2b%2c#%3b%3d")
+ // these are always left alone
+ QTest::newRow("decoded-subdelims") << QUrl("x://!$&:'()@host/*+,?$=(+)#;=")
<< int(QUrl::FullyEncoded)
<< "!$&" << "'()" << "!$&:'()"
<< "host" << "!$&:'()@host"
- << "/*+," << "" << ";="
- << "x://!$&:'()@host/*+,#;=";
+ << "/*+," << "$=(+)" << ";="
+ << "x://!$&:'()@host/*+,?$=(+)#;=";
+ QTest::newRow("encoded-subdelims") << QUrl("x://%21%24%26:%27%28%29@host/%2a%2b%2c?%26=%26&%3d=%3d#%3b%3d")
+ << MostDecoded
+ << "%21%24%26" << "%27%28%29" << "%21%24%26:%27%28%29"
+ << "host" << "%21%24%26:%27%28%29@host"
+ << "/%2A%2B%2C" << "%26=%26&%3D=%3D" << "%3B%3D"
+ << "x://%21%24%26:%27%28%29@host/%2A%2B%2C?%26=%26&%3D=%3D#%3B%3D";
// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
// these are the separators between fields
- // they must appear encoded in certain positions, no exceptions
- // in other positions, they can appear decoded, so they always do
+ // they must appear encoded in certain positions in the full URL, no exceptions
+ // when in those positions, they appear decoded in the isolated parts
+ // in other positions and the other delimiters are always left untransformed
// 1) test the delimiters that must appear encoded
// (if they were decoded, they'd would change the URL parsing)
QTest::newRow("encoded-gendelims-changing") << QUrl("x://%5b%3a%2f%3f%23%40%5d:%5b%2f%3f%23%40%5d@host/%2f%3f%23?%23")
@@ -3169,32 +3150,21 @@ void tst_QUrl::componentEncodings_data()
<< "/%2F?#" << "#" << ""
<< "x://%5B%3A%2F%3F%23%40%5D:%5B%2F%3F%23%40%5D@host/%2F%3F%23?%23";
- // 2) test the delimiters that may appear decoded and would not change the meaning
- // and test that %2f is *not* decoded to a slash in the path
- // don't test the query because in this mode it doesn't transform anything
- QTest::newRow("decoded-gendelims-unchanging") << QUrl("x://:%3a@host/%2f%3a%40#%23%3a%2f%3f%40")
+ // 2) test that the other delimiters remain decoded
+ QTest::newRow("decoded-gendelims-unchanging") << QUrl("x://::@host/:@/[]?:/?@[]?##:/?@[]")
<< int(QUrl::FullyEncoded)
<< "" << ":" << "::"
<< "host" << "::@host"
- << "/%2F:@" << "" << "#:/?@"
- << "x://::@host/%2F:@##:/?@";
-
- // 3) test "[" and "]". Even though they are not ambiguous in the path, query or fragment
- // the RFC does not allow them to appear there decoded. QUrl adheres strictly in FullyEncoded mode
- QTest::newRow("encoded-square-brackets") << QUrl("x:/[]#[]")
- << int(QUrl::FullyEncoded)
- << "" << "" << ""
- << "" << ""
- << "/%5B%5D" << "" << "%5B%5D"
- << "x:/%5B%5D#%5B%5D";
-
- // 4) like above, but now decode them, which is allowed
- QTest::newRow("decoded-square-brackets") << QUrl("x:/%5B%5D#%5B%5D")
- << MostDecoded
- << "" << "" << ""
- << "" << ""
- << "/[]" << "" << "[]"
- << "x:/[]#[]";
+ << "/:@/[]" << ":/?@[]?" << "#:/?@[]"
+ << "x://::@host/:@/[]?:/?@[]?##:/?@[]";
+
+ // 3) and test that the same encoded sequences remain encoded
+ QTest::newRow("encoded-gendelims-unchanging") << QUrl("x://:%3A@host/%3A%40%5B%5D?%3A%2F%3F%40%5B%5D#%23%3A%2F%3F%40%5B%5D")
+ << MostDecoded
+ << "" << "%3A" << ":%3A"
+ << "host" << ":%3A@host"
+ << "/%3A%40%5B%5D" << "%3A%2F%3F%40%5B%5D" << "%23%3A%2F%3F%40%5B%5D"
+ << "x://:%3A@host/%3A%40%5B%5D?%3A%2F%3F%40%5B%5D#%23%3A%2F%3F%40%5B%5D";
// test the query
// since QUrl doesn't know what chars the user wants to use for the pair and value delimiters,
@@ -3248,23 +3218,13 @@ void tst_QUrl::componentEncodings_data()
<< QString::fromUtf8("é ")
<< QString::fromUtf8("x:// é:é @smørbrød.example.no/é ? é#é ");
- // the pretty form re-encodes the subdelims (except in the query, where they are left alone)
- QTest::newRow("pretty-subdelims") << QUrl("x://%21%24%26:%27%28%29@host/%2a%2b%2c?%26=%26&%3d=%3d#%3b%3d")
- << int(QUrl::PrettyDecoded)
- << "!$&" << "'()" << "!$&:'()"
- << "host" << "!$&:'()@host"
- << "/*+," << "%26=%26&%3D=%3D" << ";="
- << "x://!$&:'()@host/*+,?%26=%26&%3D=%3D#;=";
-
- // the pretty form decodes all unambiguous gen-delims
- // (except in query, where they are left alone)
- QTest::newRow("pretty-gendelims") << QUrl("x://%5b%3a%40%2f%5d:%5b%3a%40%2f%5d@host"
- "/%3a%40%5b%3f%23%5d?[?%3f%23]%5b:%3a@%40%5d#%23")
+ // the pretty form decodes all unambiguous gen-delims in the individual parts
+ QTest::newRow("pretty-gendelims") << QUrl("x://%5b%3a%40%2f%3f%23%5d:%5b%40%2f%3f%23%5d@host/%3f%23?%23")
<< int(QUrl::PrettyDecoded)
- << "[:@/]" << "[:@/]" << "[%3A@/]:[:@/]"
- << "host" << "%5B%3A%40/%5D:%5B:%40/%5D@host"
- << "/:@[?#]" << "[?%3F#]%5B:%3A@%40%5D" << "#"
- << "x://%5B%3A%40%2F%5D:%5B:%40%2F%5D@host/:@[%3F%23]?[?%3F%23]%5B:%3A@%40%5D##";
+ << "[:@/?#]" << "[@/?#]" << "[%3A@/?#]:[@/?#]"
+ << "host" << "%5B%3A%40/?#%5D:%5B%40/?#%5D@host"
+ << "/?#" << "#" << ""
+ << "x://%5B%3A%40%2F%3F%23%5D:%5B%40%2F%3F%23%5D@host/%3F%23?%23";
// the pretty form keeps the other characters decoded everywhere
// except when rebuilding the full URL, when we only allow "{}" to remain decoded
@@ -3273,8 +3233,8 @@ void tst_QUrl::componentEncodings_data()
<< "\"<>^\\{|}" << "\"<>^\\{|}" << "\"<>^\\{|}:\"<>^\\{|}"
<< "host" << "\"<>^\\{|}:\"<>^\\{|}@host"
<< "/\"<>^\\{|}" << "\"<>^\\{|}" << "\"<>^\\{|}"
- << "x://%22%3C%3E%5E%5C%7B%7C%7D:%22%3C%3E%5E%5C%7B%7C%7D@host/%22%3C%3E%5E%5C{%7C}"
- "?%22%3C%3E%5E%5C{%7C}#%22%3C%3E%5E%5C%7B%7C%7D";
+ << "x://%22%3C%3E%5E%5C%7B%7C%7D:%22%3C%3E%5E%5C%7B%7C%7D@host/%22%3C%3E%5E%5C%7B%7C%7D"
+ "?%22%3C%3E%5E%5C%7B%7C%7D#%22%3C%3E%5E%5C%7B%7C%7D";
}
void tst_QUrl::componentEncodings()