From ef288340da61a72ac6d7ecb48e5bce5fef8cf11b Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Thu, 29 Mar 2012 22:59:56 -0300 Subject: Fix the handling of ambiguous delimiters in the query part of a URL This is the same fix as the previous commit did for the other components of the URL. But we're also changing how we handle the "[]" characters in a query: previously the handling was like for other sub-delims; now, they're always decoded, assuming that the RFC had a mistake and they were meant to be decoded. Change-Id: If4b1c3df8f341cb114f2cc4860de22f8bf0be743 Reviewed-by: Shane Kearns --- src/corelib/io/qurl.cpp | 72 ++++++++++++++++++++++----------- src/corelib/io/qurl_p.h | 2 +- src/corelib/io/qurlquery.cpp | 29 +++++-------- tests/auto/corelib/io/qurl/tst_qurl.cpp | 4 +- 4 files changed, 62 insertions(+), 45 deletions(-) diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp index d5932056be..f3c05af413 100644 --- a/src/corelib/io/qurl.cpp +++ b/src/corelib/io/qurl.cpp @@ -330,8 +330,11 @@ void QUrlPrivate::clear() // - password: none, since it's the last // - username: ":" is ambiguous -// list the recoding table modifications to be used with the recodeFromUser -// function, according to the rules above +// list the recoding table modifications to be used with the recodeFromUser and +// appendToUser functions, according to the rules above. +// the encodedXXX tables are run with the delimiters set to "leave" by default; +// the decodedXXX tables are run with the delimiters set to "decode" by default +// (except for the query, which doesn't use these functions) #define decode(x) ushort(x) #define leave(x) ushort(0x100 | (x)) @@ -399,8 +402,41 @@ static const ushort encodedFragmentActions[] = { static const ushort * const decodedFragmentInUrlActions = 0; static const ushort * const decodedFragmentInIsolationActions = 0; -// the query is handled specially, since we prefer not to transform the delims -static const ushort * const encodedQueryActions = encodedFragmentActions + 4; // encode "#" / "[" / "]" +// the query is handled specially: the decodedQueryXXX tables are run with +// the delimiters set to "leave" by default and the others set to "encode" +static const ushort encodedQueryActions[] = { + // query = *( pchar / "/" / "?" ) + // gen-delims permitted: ":" / "@" / "/" / "?" + // HOWEVER: we leave alone them alone, plus "[" and "]" + // -> must encode: "#" + encode('#'), // 0 + 0 +}; +static const ushort decodedQueryInIsolationActions[] = { + decode('"'), // 0 + decode('<'), // 1 + decode('>'), // 2 + decode('^'), // 3 + decode('\\'),// 4 + decode('|'), // 5 + decode('{'), // 6 + decode('}'), // 7 + decode('#'), // 8 + 0 +}; +static const ushort decodedQueryInUrlActions[] = { + decode('"'), // 0 + decode('<'), // 1 + decode('>'), // 2 + decode('^'), // 3 + decode('\\'),// 4 + decode('|'), // 5 + decode('{'), // 6 + decode('}'), // 7 + encode('#'), // 8 + 0 +}; + static inline QString recodeFromUser(const QString &input, const ushort *actions, int from, int to) @@ -518,19 +554,20 @@ inline void QUrlPrivate::appendFragment(QString &appendTo, QUrl::FormattingOptio appendToUser(appendTo, fragment, options, encodedFragmentActions, decodedFragmentInIsolationActions); } -inline void QUrlPrivate::appendQuery(QString &appendTo, QUrl::FormattingOptions options) const +inline void QUrlPrivate::appendQuery(QString &appendTo, QUrl::FormattingOptions options, Section appendingTo) const { // almost the same code as the previous functions // except we prefer not to touch the delimiters - if (options == QUrl::PrettyDecoded) { + if (options == QUrl::PrettyDecoded && appendingTo == Query) { appendTo += query; return; } const ushort *actions = 0; - if ((options & QUrl::DecodeAllDelimiters) == QUrl::DecodeUnambiguousDelimiters) { + if (options & QUrl::DecodeAllDelimiters) { // reset to default qt_urlRecode behaviour (leave delimiters alone) options &= ~QUrl::DecodeAllDelimiters; + actions = appendingTo == Query ? decodedQueryInIsolationActions : decodedQueryInUrlActions; } else if ((options & QUrl::DecodeAllDelimiters) == 0) { actions = encodedQueryActions; } @@ -722,25 +759,12 @@ inline void QUrlPrivate::setQuery(const QString &value, int from, int iend) sectionIsPresent |= Query; sectionHasError &= ~Query; - // use the default actions for the query - static const ushort decodeActions[] = { - decode('"'), - decode('<'), - decode('>'), - decode('\\'), - decode('^'), - decode('`'), - decode('{'), - decode('|'), - decode('}'), - encode('#'), - 0 - }; + // use the default actions for the query (don't set QUrl::DecodeAllDelimiters) QString output; const QChar *begin = value.constData() + from; const QChar *end = value.constData() + iend; if (qt_urlRecode(output, begin, end, QUrl::DecodeUnicode | QUrl::DecodeSpaces, - decodeActions)) + decodedQueryInIsolationActions)) query = output; else query = value.mid(from, iend - from); @@ -1818,7 +1842,7 @@ QString QUrl::query(ComponentFormattingOptions options) const if (!d) return QString(); QString result; - d->appendQuery(result, options); + d->appendQuery(result, options, QUrlPrivate::Query); if (d->hasQuery() && result.isNull()) result.detach(); return result; @@ -2055,7 +2079,7 @@ QString QUrl::toString(FormattingOptions options) const if (!(options & QUrl::RemoveQuery) && d->hasQuery()) { url += QLatin1Char('?'); - d->appendQuery(url, options); + d->appendQuery(url, options, QUrlPrivate::FullUrl); } if (!(options & QUrl::RemoveFragment) && d->hasFragment()) { url += QLatin1Char('#'); diff --git a/src/corelib/io/qurl_p.h b/src/corelib/io/qurl_p.h index 13abeddf9a..201ee66238 100644 --- a/src/corelib/io/qurl_p.h +++ b/src/corelib/io/qurl_p.h @@ -119,7 +119,7 @@ public: void appendPassword(QString &appendTo, QUrl::FormattingOptions options) const; void appendHost(QString &appendTo, QUrl::FormattingOptions options) const; void appendPath(QString &appendTo, QUrl::FormattingOptions options, Section appendingTo) const; - void appendQuery(QString &appendTo, QUrl::FormattingOptions options) const; + void appendQuery(QString &appendTo, QUrl::FormattingOptions options, Section appendingTo) const; void appendFragment(QString &appendTo, QUrl::FormattingOptions options) const; // the "end" parameters are like STL iterators: they point to one past the last valid element diff --git a/src/corelib/io/qurlquery.cpp b/src/corelib/io/qurlquery.cpp index a24e73332c..85180a2d72 100644 --- a/src/corelib/io/qurlquery.cpp +++ b/src/corelib/io/qurlquery.cpp @@ -180,7 +180,8 @@ template<> void QSharedDataPointer::detach() // The strict definition of query says that it can have unencoded any // unreserved, sub-delim, ":", "@", "/" and "?". Or, by exclusion, excluded // delimiters are "#", "[" and "]" -- if those are present, they must be -// percent-encoded. +// percent-encoded. The fact that "[" and "]" should be encoded is probably a +// mistake in the spec, so we ignore it and leave the decoded. // // The internal storage in the Map is equivalent to PrettyDecoded. That means // the getter methods, when called with the default encoding value, will not @@ -194,8 +195,8 @@ template<> void QSharedDataPointer::detach() // themselves. // // But when recreating the query string, in toString(), we must take care of -// the special delimiters: the pair and value delimiters and those that the -// definition says must be encoded ("#" / "[" / "]") +// the special delimiters: the pair and value delimiters, as well as the "#" +// character if unambiguous decoding is requested. #define decode(x) ushort(x) #define leave(x) ushort(0x100 | (x)) @@ -236,10 +237,9 @@ inline QString QUrlQueryPrivate::recodeToUser(const QString &input, QUrl::Compon return input; } - // re-encode the gen-delims that the RFC says must be encoded the - // query delimiter pair; the non-delimiters will get encoded too + // re-encode the "#" character and the query delimiter pair ushort actions[] = { encode(pairDelimiter.unicode()), encode(valueDelimiter.unicode()), - encode('#'), encode('['), encode(']'), 0 }; + encode('#'), 0 }; QString output; if (qt_urlRecode(output, input.constData(), input.constData() + input.length(), encoding, actions)) return output; @@ -455,33 +455,26 @@ QString QUrlQuery::query(QUrl::ComponentFormattingOptions encoding) const // unlike the component encoding, for the whole query we need to modify a little: // - the "#" character is ambiguous, so we decode it only in DecodeAllDelimiters mode // - the query delimiter pair must always be encoded - // - the "[" and "]" sub-delimiters and the non-delimiters very on DecodeUnambiguousDelimiters + // - the non-delimiters vary on DecodeUnambiguousDelimiters // so: // - full encoding: encode the non-delimiters, the pair, "#", "[" and "]" // - pretty decode: decode the non-delimiters, "[" and "]"; encode the pair and "#" // - decode all: decode the non-delimiters, "[", "]", "#"; encode the pair // start with what's always encoded - ushort tableActions[7] = { + ushort tableActions[] = { leave('+'), // 0 encode(d->pairDelimiter.unicode()), // 1 encode(d->valueDelimiter.unicode()), // 2 + decode('#'), // 3 + 0 }; - if ((encoding & QUrl::DecodeAllDelimiters) == QUrl::DecodeAllDelimiters) { + if (encoding & QUrl::DecodeAllDelimiters) { // full decoding: we only encode the characters above tableActions[3] = 0; encoding |= QUrl::DecodeAllDelimiters; } else { tableActions[3] = encode('#'); - if (encoding & QUrl::DecodeUnambiguousDelimiters) { - tableActions[4] = 0; - encoding |= QUrl::DecodeAllDelimiters; - } else { - tableActions[4] = encode('['); - tableActions[5] = encode(']'); - tableActions[6] = 0; - encoding &= ~QUrl::DecodeAllDelimiters; - } } QString result; diff --git a/tests/auto/corelib/io/qurl/tst_qurl.cpp b/tests/auto/corelib/io/qurl/tst_qurl.cpp index c9f977e11f..8bd3e94166 100644 --- a/tests/auto/corelib/io/qurl/tst_qurl.cpp +++ b/tests/auto/corelib/io/qurl/tst_qurl.cpp @@ -1866,7 +1866,7 @@ void tst_QUrl::tolerantParser() QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]#%5B%5D")); url.setUrl("//[::56:56:56:56:56:56:56]?[]"); - QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]?%5B%5D")); + QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]?[]")); url.setUrl("%hello.com/f%"); QCOMPARE(url.toString(QUrl::FullyEncoded), QString("%25hello.com/f%25")); @@ -1891,7 +1891,7 @@ void tst_QUrl::tolerantParser() QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]#%5B%5D")); url.setEncodedUrl("//[::56:56:56:56:56:56:56]?[]"); - QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]?%5B%5D")); + QCOMPARE(url.toString(QUrl::FullyEncoded), QString("//[0:56:56:56:56:56:56:56]?[]")); url.setEncodedUrl("data:text/css,div%20{%20border-right:%20solid;%20}"); QCOMPARE(url.toString(QUrl::FullyEncoded), QString("data:text/css,div%20%7B%20border-right:%20solid;%20%7D")); -- cgit v1.2.3