diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2013-07-01 17:16:54 -0700 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2013-08-04 04:47:33 +0200 |
commit | 993bbb4d4be524321575668740ea46c6665d6064 (patch) | |
tree | ad65a84d32bad801935a3866da4b9f4d87c9a219 /src/corelib/io/qurlrecode.cpp | |
parent | 0a796cfddf6c5753cb77eea32016e1645fce0c16 (diff) |
QUrl: update our understanding of the encoding of delimiters
The longer explanation can be found in the comment in qurl.cpp. The
short version is as follows:
Up to now, we considered that every character could be replaced with
its percent-encoding equivalent and vice-versa, so long as the parsing
of the URL did not change. For example, x:/path+path and
x:/path%2Bpath were the same. However, to do this and yet be compliant
with most URL uses in the real world, we had to add exceptions:
- "/" and "%2F" were not the same in the path, despite the delimiter
being behind (rationale was the complex definition of path)
- "+" and "%2B" were not the same in the query, so we ended up not
transforming any sub-delim in the query at all
Now, we change our understanding based on the following line from
RFC 3986 section 2.2:
URIs that differ in the replacement of a reserved character with
its corresponding percent-encoded octet are not equivalent.
From now on, QUrl will not replace any sub-delim or gen-delim
("reserved character"), except where such a character could not exist
in the first place. This simplifies the code and removes all
exceptions.
As a side-effect, this has also changed the behaviour of the "{" and
"}" characters, which we previously allowed to remain decoded.
[ChangeLog][Important Behavior Changes][QUrl and QUrlQuery] QUrl no
longer considers all delimiter characters equivalent to their
percent-encoded forms. Now, both classes always keep all delimiters
exactly as they were in the original URL text.
[ChangeLog][Important Behavior Changes][QUrl and QUrlQuery] QUrl no
longer decodes %7B and %7D to "{" and "}" in the output of toString()
Task-number: QTBUG-31660
Change-Id: Iba0b5b31b269635ac2d0adb2bb0dfb74c139e08c
Reviewed-by: David Faure (KDE) <faure@kde.org>
Diffstat (limited to 'src/corelib/io/qurlrecode.cpp')
-rw-r--r-- | src/corelib/io/qurlrecode.cpp | 57 |
1 files changed, 0 insertions, 57 deletions
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp index 509a92d2b0..9189cd294f 100644 --- a/src/corelib/io/qurlrecode.cpp +++ b/src/corelib/io/qurlrecode.cpp @@ -113,59 +113,6 @@ static const uchar defaultActionTable[96] = { // 0x00 if it belongs to this category // 0xff if it doesn't -static const uchar delimsMask[96] = { - 0xff, // space - 0x00, // '!' (sub-delim) - 0xff, // '"' - 0x00, // '#' (gen-delim) - 0x00, // '$' (gen-delim) - 0xff, // '%' (percent) - 0x00, // '&' (gen-delim) - 0x00, // "'" (sub-delim) - 0x00, // '(' (sub-delim) - 0x00, // ')' (sub-delim) - 0x00, // '*' (sub-delim) - 0x00, // '+' (sub-delim) - 0x00, // ',' (sub-delim) - 0xff, // '-' (unreserved) - 0xff, // '.' (unreserved) - 0x00, // '/' (gen-delim) - - 0xff, 0xff, 0xff, 0xff, 0xff, // '0' to '4' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // '5' to '9' (unreserved) - 0x00, // ':' (gen-delim) - 0x00, // ';' (sub-delim) - 0xff, // '<' - 0x00, // '=' (sub-delim) - 0xff, // '>' - 0x00, // '?' (gen-delim) - - 0x00, // '@' (gen-delim) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' to 'E' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'F' to 'J' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'K' to 'O' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' to 'T' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'U' to 'Z' (unreserved) - 0x00, // '[' (gen-delim) - 0xff, // '\' - 0x00, // ']' (gen-delim) - 0xff, // '^' - 0xff, // '_' (unreserved) - - 0xff, // '`' - 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' to 'e' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'f' to 'j' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'k' to 'o' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' to 't' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'u' to 'z' (unreserved) - 0xff, // '{' - 0xff, // '|' - 0xff, // '}' - 0xff, // '~' (unreserved) - - 0xff // BSKP -}; - static const uchar reservedMask[96] = { 0xff, // space 0xff, // '!' (sub-delim) @@ -617,8 +564,6 @@ static void maskTable(uchar (&table)[N], const uchar (&mask)[N]) The \a encoding option modifies the default behaviour: \list - \li QUrl::EncodeDelimiters: if set, delimiters will be left untransformed (note: not encoded!); - if unset, delimiters will be decoded \li QUrl::DecodeReserved: if set, reserved characters will be decoded; if unset, reserved characters will be encoded \li QUrl::EncodeSpaces: if set, spaces will be encoded to "%20"; if unset, they will be " " @@ -664,8 +609,6 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end, actionTable[0x7F - ' '] = EncodeCharacter; } else { memcpy(actionTable, defaultActionTable, sizeof actionTable); - if (!(encoding & QUrl::EncodeDelimiters)) - maskTable(actionTable, delimsMask); if (encoding & QUrl::DecodeReserved) maskTable(actionTable, reservedMask); if (!(encoding & QUrl::EncodeSpaces)) |