diff options
Diffstat (limited to 'src/corelib/io/qurlrecode.cpp')
-rw-r--r-- | src/corelib/io/qurlrecode.cpp | 114 |
1 files changed, 38 insertions, 76 deletions
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp index 5ff0c40a4f..7e77b9c251 100644 --- a/src/corelib/io/qurlrecode.cpp +++ b/src/corelib/io/qurlrecode.cpp @@ -113,59 +113,6 @@ static const uchar defaultActionTable[96] = { // 0x00 if it belongs to this category // 0xff if it doesn't -static const uchar delimsMask[96] = { - 0xff, // space - 0x00, // '!' (sub-delim) - 0xff, // '"' - 0x00, // '#' (gen-delim) - 0x00, // '$' (gen-delim) - 0xff, // '%' (percent) - 0x00, // '&' (gen-delim) - 0x00, // "'" (sub-delim) - 0x00, // '(' (sub-delim) - 0x00, // ')' (sub-delim) - 0x00, // '*' (sub-delim) - 0x00, // '+' (sub-delim) - 0x00, // ',' (sub-delim) - 0xff, // '-' (unreserved) - 0xff, // '.' (unreserved) - 0x00, // '/' (gen-delim) - - 0xff, 0xff, 0xff, 0xff, 0xff, // '0' to '4' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // '5' to '9' (unreserved) - 0x00, // ':' (gen-delim) - 0x00, // ';' (sub-delim) - 0xff, // '<' - 0x00, // '=' (sub-delim) - 0xff, // '>' - 0x00, // '?' (gen-delim) - - 0x00, // '@' (gen-delim) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' to 'E' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'F' to 'J' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'K' to 'O' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' to 'T' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'U' to 'Z' (unreserved) - 0x00, // '[' (gen-delim) - 0xff, // '\' - 0x00, // ']' (gen-delim) - 0xff, // '^' - 0xff, // '_' (unreserved) - - 0xff, // '`' - 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' to 'e' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'f' to 'j' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'k' to 'o' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' to 't' (unreserved) - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'u' to 'z' (unreserved) - 0xff, // '{' - 0xff, // '|' - 0xff, // '}' - 0xff, // '~' (unreserved) - - 0xff // BSKP -}; - static const uchar reservedMask[96] = { 0xff, // space 0xff, // '!' (sub-delim) @@ -560,6 +507,27 @@ non_trivial: return 0; } +/*! + \since 5.0 + \internal + + This function decodes a percent-encoded string located from \a begin to \a + end, by appending each character to \a appendTo. It returns the number of + characters appended. Each percent-encoded sequence is decoded as follows: + + \list + \li from %00 to %7F: the exact decoded value is appended; + \li from %80 to %FF: QChar::ReplacementCharacter is appended; + \li bad encoding: original input is copied to the output, undecoded. + \endlist + + Given the above, it's important for the input to already have all UTF-8 + percent sequences decoded by qt_urlRecode (that is, the input should not + have been processed with QUrl::EncodeUnicode). + + The input should also be a valid percent-encoded sequence (the output of + qt_urlRecode is always valid). +*/ static int decode(QString &appendTo, const ushort *begin, const ushort *end) { const int origSize = appendTo.size(); @@ -573,6 +541,13 @@ static int decode(QString &appendTo, const ushort *begin, const ushort *end) continue; } + if (Q_UNLIKELY(end - input < 3 || !isHex(input[1]) || !isHex(input[2]))) { + // badly-encoded data + appendTo.resize(origSize + (end - begin)); + memcpy(appendTo.begin() + origSize, begin, (end - begin) * sizeof(ushort)); + return end - begin; + } + if (Q_UNLIKELY(!output)) { // detach appendTo.resize(origSize + (end - begin)); @@ -582,10 +557,9 @@ static int decode(QString &appendTo, const ushort *begin, const ushort *end) } ++input; - Q_ASSERT(input <= end - 2); // we need two characters - Q_ASSERT(isHex(input[0])); - Q_ASSERT(isHex(input[1])); *output++ = decodeNibble(input[0]) << 4 | decodeNibble(input[1]); + if (output[-1] >= 0x80) + output[-1] = QChar::ReplacementCharacter; input += 2; } @@ -613,8 +587,6 @@ static void maskTable(uchar (&table)[N], const uchar (&mask)[N]) The \a encoding option modifies the default behaviour: \list - \li QUrl::EncodeDelimiters: if set, delimiters will be left untransformed (note: not encoded!); - if unset, delimiters will be decoded \li QUrl::DecodeReserved: if set, reserved characters will be decoded; if unset, reserved characters will be encoded \li QUrl::EncodeSpaces: if set, spaces will be encoded to "%20"; if unset, they will be " " @@ -635,6 +607,9 @@ static void maskTable(uchar (&table)[N], const uchar (&mask)[N]) handled. It consists of a sequence of 16-bit values, where the low 8 bits indicate the character in question and the high 8 bits are either \c EncodeCharacter, \c LeaveCharacter or \c DecodeCharacter. + + This function corrects percent-encoded errors by interpreting every '%' as + meaning "%25" (all percents in the same content). */ Q_AUTOTEST_EXPORT int @@ -646,24 +621,11 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end, return decode(appendTo, reinterpret_cast<const ushort *>(begin), reinterpret_cast<const ushort *>(end)); } - if (!(encoding & QUrl::EncodeDelimiters) && encoding & QUrl::DecodeReserved) { - // reset the table - memset(actionTable, DecodeCharacter, sizeof actionTable); - if (encoding & QUrl::EncodeSpaces) - actionTable[0] = EncodeCharacter; - - // these are always encoded - actionTable['%' - ' '] = EncodeCharacter; - actionTable[0x7F - ' '] = EncodeCharacter; - } else { - memcpy(actionTable, defaultActionTable, sizeof actionTable); - if (!(encoding & QUrl::EncodeDelimiters)) - maskTable(actionTable, delimsMask); - if (encoding & QUrl::DecodeReserved) - maskTable(actionTable, reservedMask); - if (!(encoding & QUrl::EncodeSpaces)) - actionTable[0] = DecodeCharacter; // decode - } + memcpy(actionTable, defaultActionTable, sizeof actionTable); + if (encoding & QUrl::DecodeReserved) + maskTable(actionTable, reservedMask); + if (!(encoding & QUrl::EncodeSpaces)) + actionTable[0] = DecodeCharacter; // decode if (tableModifications) { for (const ushort *p = tableModifications; *p; ++p) |