summaryrefslogtreecommitdiffstats
path: root/src/corelib/io/qurlrecode.cpp
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2013-07-01 17:16:54 -0700
committerThe Qt Project <gerrit-noreply@qt-project.org>2013-08-04 04:47:33 +0200
commit993bbb4d4be524321575668740ea46c6665d6064 (patch)
treead65a84d32bad801935a3866da4b9f4d87c9a219 /src/corelib/io/qurlrecode.cpp
parent0a796cfddf6c5753cb77eea32016e1645fce0c16 (diff)
QUrl: update our understanding of the encoding of delimiters
The longer explanation can be found in the comment in qurl.cpp. The short version is as follows: Up to now, we considered that every character could be replaced with its percent-encoding equivalent and vice-versa, so long as the parsing of the URL did not change. For example, x:/path+path and x:/path%2Bpath were the same. However, to do this and yet be compliant with most URL uses in the real world, we had to add exceptions: - "/" and "%2F" were not the same in the path, despite the delimiter being behind (rationale was the complex definition of path) - "+" and "%2B" were not the same in the query, so we ended up not transforming any sub-delim in the query at all Now, we change our understanding based on the following line from RFC 3986 section 2.2: URIs that differ in the replacement of a reserved character with its corresponding percent-encoded octet are not equivalent. From now on, QUrl will not replace any sub-delim or gen-delim ("reserved character"), except where such a character could not exist in the first place. This simplifies the code and removes all exceptions. As a side-effect, this has also changed the behaviour of the "{" and "}" characters, which we previously allowed to remain decoded. [ChangeLog][Important Behavior Changes][QUrl and QUrlQuery] QUrl no longer considers all delimiter characters equivalent to their percent-encoded forms. Now, both classes always keep all delimiters exactly as they were in the original URL text. [ChangeLog][Important Behavior Changes][QUrl and QUrlQuery] QUrl no longer decodes %7B and %7D to "{" and "}" in the output of toString() Task-number: QTBUG-31660 Change-Id: Iba0b5b31b269635ac2d0adb2bb0dfb74c139e08c Reviewed-by: David Faure (KDE) <faure@kde.org>
Diffstat (limited to 'src/corelib/io/qurlrecode.cpp')
-rw-r--r--src/corelib/io/qurlrecode.cpp57
1 files changed, 0 insertions, 57 deletions
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp
index 509a92d2b0..9189cd294f 100644
--- a/src/corelib/io/qurlrecode.cpp
+++ b/src/corelib/io/qurlrecode.cpp
@@ -113,59 +113,6 @@ static const uchar defaultActionTable[96] = {
// 0x00 if it belongs to this category
// 0xff if it doesn't
-static const uchar delimsMask[96] = {
- 0xff, // space
- 0x00, // '!' (sub-delim)
- 0xff, // '"'
- 0x00, // '#' (gen-delim)
- 0x00, // '$' (gen-delim)
- 0xff, // '%' (percent)
- 0x00, // '&' (gen-delim)
- 0x00, // "'" (sub-delim)
- 0x00, // '(' (sub-delim)
- 0x00, // ')' (sub-delim)
- 0x00, // '*' (sub-delim)
- 0x00, // '+' (sub-delim)
- 0x00, // ',' (sub-delim)
- 0xff, // '-' (unreserved)
- 0xff, // '.' (unreserved)
- 0x00, // '/' (gen-delim)
-
- 0xff, 0xff, 0xff, 0xff, 0xff, // '0' to '4' (unreserved)
- 0xff, 0xff, 0xff, 0xff, 0xff, // '5' to '9' (unreserved)
- 0x00, // ':' (gen-delim)
- 0x00, // ';' (sub-delim)
- 0xff, // '<'
- 0x00, // '=' (sub-delim)
- 0xff, // '>'
- 0x00, // '?' (gen-delim)
-
- 0x00, // '@' (gen-delim)
- 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' to 'E' (unreserved)
- 0xff, 0xff, 0xff, 0xff, 0xff, // 'F' to 'J' (unreserved)
- 0xff, 0xff, 0xff, 0xff, 0xff, // 'K' to 'O' (unreserved)
- 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' to 'T' (unreserved)
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'U' to 'Z' (unreserved)
- 0x00, // '[' (gen-delim)
- 0xff, // '\'
- 0x00, // ']' (gen-delim)
- 0xff, // '^'
- 0xff, // '_' (unreserved)
-
- 0xff, // '`'
- 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' to 'e' (unreserved)
- 0xff, 0xff, 0xff, 0xff, 0xff, // 'f' to 'j' (unreserved)
- 0xff, 0xff, 0xff, 0xff, 0xff, // 'k' to 'o' (unreserved)
- 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' to 't' (unreserved)
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'u' to 'z' (unreserved)
- 0xff, // '{'
- 0xff, // '|'
- 0xff, // '}'
- 0xff, // '~' (unreserved)
-
- 0xff // BSKP
-};
-
static const uchar reservedMask[96] = {
0xff, // space
0xff, // '!' (sub-delim)
@@ -617,8 +564,6 @@ static void maskTable(uchar (&table)[N], const uchar (&mask)[N])
The \a encoding option modifies the default behaviour:
\list
- \li QUrl::EncodeDelimiters: if set, delimiters will be left untransformed (note: not encoded!);
- if unset, delimiters will be decoded
\li QUrl::DecodeReserved: if set, reserved characters will be decoded;
if unset, reserved characters will be encoded
\li QUrl::EncodeSpaces: if set, spaces will be encoded to "%20"; if unset, they will be " "
@@ -664,8 +609,6 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end,
actionTable[0x7F - ' '] = EncodeCharacter;
} else {
memcpy(actionTable, defaultActionTable, sizeof actionTable);
- if (!(encoding & QUrl::EncodeDelimiters))
- maskTable(actionTable, delimsMask);
if (encoding & QUrl::DecodeReserved)
maskTable(actionTable, reservedMask);
if (!(encoding & QUrl::EncodeSpaces))