From a01c662d3737fd9f26a7e4455d7bcb03628155d7 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 30 Mar 2012 13:32:19 -0300 Subject: Introduce QUrl::DecodeReserved and reorder the enums DecodeReserved applies to all characters between 0x21 and 0x7E that aren't unreserved, a delimiter, or the percent sign itself. Change-Id: Ie64bddb6b814dfa3bb8380e3aa24de1bb3645a65 Reviewed-by: Shane Kearns --- src/corelib/io/qurl.h | 7 +- src/corelib/io/qurlrecode.cpp | 123 +++++++++++++++++++++++++++++++- tests/auto/corelib/io/qurl/tst_qurl.cpp | 8 +-- 3 files changed, 130 insertions(+), 8 deletions(-) diff --git a/src/corelib/io/qurl.h b/src/corelib/io/qurl.h index 4eaf652ff5..27b81dfc0f 100644 --- a/src/corelib/io/qurl.h +++ b/src/corelib/io/qurl.h @@ -140,10 +140,11 @@ public: enum ComponentFormattingOption { FullyEncoded = 0x000000, DecodeSpaces = 0x100000, - DecodeDelimiters = 0x200000 | 0x400000, - DecodeUnicode = 0x800000, + DecodeUnicode = 0x200000, + DecodeDelimiters = 0x400000 | 0x800000, + DecodeReserved = 0x1000000, - PrettyDecoded = DecodeSpaces | DecodeDelimiters | DecodeUnicode, + PrettyDecoded = DecodeSpaces | DecodeDelimiters | DecodeReserved | DecodeUnicode, MostDecoded = PrettyDecoded }; Q_DECLARE_FLAGS(ComponentFormattingOptions, ComponentFormattingOption) diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp index 50adfa0dfd..02fced5b7b 100644 --- a/src/corelib/io/qurlrecode.cpp +++ b/src/corelib/io/qurlrecode.cpp @@ -109,6 +109,116 @@ static const uchar defaultActionTable[96] = { 2 // BSKP }; +// mask tables, in negative polarity +// 0x00 if it belongs to this category +// 0xff if it doesn't + +static const uchar delimsMask[96] = { + 0xff, // space + 0x00, // '!' (sub-delim) + 0xff, // '"' + 0x00, // '#' (gen-delim) + 0x00, // '$' (gen-delim) + 0xff, // '%' (percent) + 0x00, // '&' (gen-delim) + 0x00, // "'" (sub-delim) + 0x00, // '(' (sub-delim) + 0x00, // ')' (sub-delim) + 0x00, // '*' (sub-delim) + 0x00, // '+' (sub-delim) + 0x00, // ',' (sub-delim) + 0xff, // '-' (unreserved) + 0xff, // '.' (unreserved) + 0x00, // '/' (gen-delim) + + 0xff, 0xff, 0xff, 0xff, 0xff, // '0' to '4' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // '5' to '9' (unreserved) + 0x00, // ':' (gen-delim) + 0x00, // ';' (sub-delim) + 0xff, // '<' + 0x00, // '=' (sub-delim) + 0xff, // '>' + 0x00, // '?' (gen-delim) + + 0x00, // '@' (gen-delim) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' to 'E' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'F' to 'J' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'K' to 'O' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' to 'T' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'U' to 'Z' (unreserved) + 0x00, // '[' (gen-delim) + 0xff, // '\' + 0x00, // ']' (gen-delim) + 0xff, // '^' + 0xff, // '_' (unreserved) + + 0xff, // '`' + 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' to 'e' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'f' to 'j' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'k' to 'o' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' to 't' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'u' to 'z' (unreserved) + 0xff, // '{' + 0xff, // '|' + 0xff, // '}' + 0xff, // '~' (unreserved) + + 0xff // BSKP +}; + +static const uchar reservedMask[96] = { + 0xff, // space + 0xff, // '!' (sub-delim) + 0x00, // '"' + 0xff, // '#' (gen-delim) + 0xff, // '$' (gen-delim) + 0xff, // '%' (percent) + 0xff, // '&' (gen-delim) + 0xff, // "'" (sub-delim) + 0xff, // '(' (sub-delim) + 0xff, // ')' (sub-delim) + 0xff, // '*' (sub-delim) + 0xff, // '+' (sub-delim) + 0xff, // ',' (sub-delim) + 0xff, // '-' (unreserved) + 0xff, // '.' (unreserved) + 0xff, // '/' (gen-delim) + + 0xff, 0xff, 0xff, 0xff, 0xff, // '0' to '4' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // '5' to '9' (unreserved) + 0xff, // ':' (gen-delim) + 0xff, // ';' (sub-delim) + 0x00, // '<' + 0xff, // '=' (sub-delim) + 0x00, // '>' + 0xff, // '?' (gen-delim) + + 0xff, // '@' (gen-delim) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' to 'E' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'F' to 'J' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'K' to 'O' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' to 'T' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'U' to 'Z' (unreserved) + 0xff, // '[' (gen-delim) + 0x00, // '\' + 0xff, // ']' (gen-delim) + 0x00, // '^' + 0xff, // '_' (unreserved) + + 0x00, // '`' + 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' to 'e' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'f' to 'j' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'k' to 'o' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' to 't' (unreserved) + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'u' to 'z' (unreserved) + 0x00, // '{' + 0x00, // '|' + 0x00, // '}' + 0xff, // '~' (unreserved) + + 0xff // BSKP +}; + static inline bool isHex(ushort c) { return (c >= 'a' && c <= 'f') || @@ -464,12 +574,19 @@ non_trivial: return 0; } +template +static void maskTable(uchar (&table)[N], const uchar (&mask)[N]) +{ + for (size_t i = 0; i < N; ++i) + table[i] &= mask[i]; +} + Q_AUTOTEST_EXPORT int qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end, QUrl::ComponentFormattingOptions encoding, const ushort *tableModifications) { uchar actionTable[sizeof defaultActionTable]; - if (encoding & QUrl::DecodeDelimiters) { + if (encoding & QUrl::DecodeDelimiters && encoding & QUrl::DecodeReserved) { // reset the table memset(actionTable, DecodeCharacter, sizeof actionTable); if (!(encoding & QUrl::DecodeSpaces)) @@ -480,6 +597,10 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end, actionTable[0x7F - ' '] = EncodeCharacter; } else { memcpy(actionTable, defaultActionTable, sizeof actionTable); + if (encoding & QUrl::DecodeDelimiters) + maskTable(actionTable, delimsMask); + if (encoding & QUrl::DecodeReserved) + maskTable(actionTable, reservedMask); if (encoding & QUrl::DecodeSpaces) actionTable[0] = DecodeCharacter; // decode } diff --git a/tests/auto/corelib/io/qurl/tst_qurl.cpp b/tests/auto/corelib/io/qurl/tst_qurl.cpp index aae970eac6..7deb0fc381 100644 --- a/tests/auto/corelib/io/qurl/tst_qurl.cpp +++ b/tests/auto/corelib/io/qurl/tst_qurl.cpp @@ -2713,9 +2713,9 @@ void tst_QUrl::componentEncodings_data() << QString() << "%21%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D" << QString() << "?%21%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D"; - // other characters: '"' / "<" / ">" / "^" / "\" / "{" / "|" "}" + // reserved characters: '"' / "<" / ">" / "^" / "\" / "{" / "|" "}" // the RFC does not allow them undecoded anywhere, but we do - QTest::newRow("encoded-others") << QUrl("x://\"<>^\\{|}:\"<>^\\{|}@host/\"<>^\\{|}?\"<>^\\{|}#\"<>^\\{|}") + QTest::newRow("encoded-reserved") << QUrl("x://\"<>^\\{|}:\"<>^\\{|}@host/\"<>^\\{|}?\"<>^\\{|}#\"<>^\\{|}") << int(QUrl::FullyEncoded) << "%22%3C%3E%5E%5C%7B%7C%7D" << "%22%3C%3E%5E%5C%7B%7C%7D" << "%22%3C%3E%5E%5C%7B%7C%7D:%22%3C%3E%5E%5C%7B%7C%7D" @@ -2724,9 +2724,9 @@ void tst_QUrl::componentEncodings_data() << "%22%3C%3E%5E%5C%7B%7C%7D" << "x://%22%3C%3E%5E%5C%7B%7C%7D:%22%3C%3E%5E%5C%7B%7C%7D@host/%22%3C%3E%5E%5C%7B%7C%7D" "?%22%3C%3E%5E%5C%7B%7C%7D#%22%3C%3E%5E%5C%7B%7C%7D"; - QTest::newRow("decoded-others") << QUrl("x://%22%3C%3E%5E%5C%7B%7C%7D:%22%3C%3E%5E%5C%7B%7C%7D@host" + QTest::newRow("decoded-reserved") << QUrl("x://%22%3C%3E%5E%5C%7B%7C%7D:%22%3C%3E%5E%5C%7B%7C%7D@host" "/%22%3C%3E%5E%5C%7B%7C%7D?%22%3C%3E%5E%5C%7B%7C%7D#%22%3C%3E%5E%5C%7B%7C%7D") - << int(QUrl::DecodeDelimiters) + << int(QUrl::DecodeReserved) << "\"<>^\\{|}" << "\"<>^\\{|}" << "\"<>^\\{|}:\"<>^\\{|}" << "host" << "\"<>^\\{|}:\"<>^\\{|}@host" << "/\"<>^\\{|}" << "\"<>^\\{|}" << "\"<>^\\{|}" -- cgit v1.2.3