summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/corelib/io/io.pri1
-rw-r--r--src/corelib/io/qurl.h13
-rw-r--r--src/corelib/io/qurlrecode.cpp504
-rw-r--r--tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp215
4 files changed, 733 insertions, 0 deletions
diff --git a/src/corelib/io/io.pri b/src/corelib/io/io.pri
index a3bc3afbcf..a70b3c9012 100644
--- a/src/corelib/io/io.pri
+++ b/src/corelib/io/io.pri
@@ -65,6 +65,7 @@ SOURCES += \
io/qresource_iterator.cpp \
io/qstandardpaths.cpp \
io/qurl.cpp \
+ io/qurlrecode.cpp \
io/qsettings.cpp \
io/qfsfileengine.cpp \
io/qfsfileengine_iterator.cpp \
diff --git a/src/corelib/io/qurl.h b/src/corelib/io/qurl.h
index 7c6cc29618..1600cb8fa3 100644
--- a/src/corelib/io/qurl.h
+++ b/src/corelib/io/qurl.h
@@ -82,6 +82,18 @@ public:
};
Q_DECLARE_FLAGS(FormattingOptions, FormattingOption)
+ enum ComponentFormattingOption {
+ FullyEncoded = 0x000000,
+ DecodeSpaces = 0x100000,
+ DecodeUnambiguousDelimiters = 0x200000,
+ DecodeAllDelimiters = DecodeUnambiguousDelimiters | 0x400000,
+ DecodeUnicode = 0x800000,
+
+ PrettyDecoded = DecodeSpaces | DecodeUnambiguousDelimiters | DecodeUnicode,
+ MostDecoded = PrettyDecoded | DecodeAllDelimiters
+ };
+ Q_DECLARE_FLAGS(ComponentFormattingOptions, ComponentFormattingOption)
+
QUrl();
#ifdef QT_NO_URL_CAST_FROM_STRING
explicit
@@ -236,6 +248,7 @@ inline uint qHash(const QUrl &url)
Q_DECLARE_TYPEINFO(QUrl, Q_MOVABLE_TYPE);
Q_DECLARE_SHARED(QUrl)
+Q_DECLARE_OPERATORS_FOR_FLAGS(QUrl::ComponentFormattingOptions)
Q_DECLARE_OPERATORS_FOR_FLAGS(QUrl::FormattingOptions)
#ifndef QT_NO_DATASTREAM
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp
new file mode 100644
index 0000000000..6a0517a7e5
--- /dev/null
+++ b/src/corelib/io/qurlrecode.cpp
@@ -0,0 +1,504 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Intel Corporation
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qurl.h"
+
+QT_BEGIN_NAMESPACE
+
+// ### move to qurl_p.h
+enum EncodingAction {
+ DecodeCharacter = 0,
+ LeaveCharacter = 1,
+ EncodeCharacter = 2
+};
+
+// From RFC 3896, Appendix A Collected ABNF for URI
+// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+// reserved = gen-delims / sub-delims
+// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+// / "*" / "+" / "," / ";" / "="
+static const uchar defaultActionTable[96] = {
+ 2, // space
+ 1, // '!' (sub-delim)
+ 2, // '"'
+ 1, // '#' (gen-delim)
+ 1, // '$' (gen-delim)
+ 2, // '%' (percent)
+ 1, // '&' (gen-delim)
+ 1, // "'" (sub-delim)
+ 1, // '(' (sub-delim)
+ 1, // ')' (sub-delim)
+ 1, // '*' (sub-delim)
+ 1, // '+' (sub-delim)
+ 1, // ',' (sub-delim)
+ 0, // '-' (unreserved)
+ 0, // '.' (unreserved)
+ 1, // '/' (gen-delim)
+
+ 0, 0, 0, 0, 0, // '0' to '4' (unreserved)
+ 0, 0, 0, 0, 0, // '5' to '9' (unreserved)
+ 1, // ':' (gen-delim)
+ 1, // ';' (sub-delim)
+ 2, // '<'
+ 1, // '=' (sub-delim)
+ 2, // '>'
+ 1, // '?' (gen-delim)
+
+ 1, // '@' (gen-delim)
+ 0, 0, 0, 0, 0, // 'A' to 'E' (unreserved)
+ 0, 0, 0, 0, 0, // 'F' to 'J' (unreserved)
+ 0, 0, 0, 0, 0, // 'K' to 'O' (unreserved)
+ 0, 0, 0, 0, 0, // 'P' to 'T' (unreserved)
+ 0, 0, 0, 0, 0, 0, // 'U' to 'Z' (unreserved)
+ 1, // '[' (gen-delim)
+ 2, // '\'
+ 1, // ']' (gen-delim)
+ 2, // '^'
+ 0, // '_' (unreserved)
+
+ 2, // '`'
+ 0, 0, 0, 0, 0, // 'a' to 'e' (unreserved)
+ 0, 0, 0, 0, 0, // 'f' to 'j' (unreserved)
+ 0, 0, 0, 0, 0, // 'k' to 'o' (unreserved)
+ 0, 0, 0, 0, 0, // 'p' to 't' (unreserved)
+ 0, 0, 0, 0, 0, 0, // 'u' to 'z' (unreserved)
+ 2, // '{'
+ 2, // '|'
+ 2, // '}'
+ 0, // '~' (unreserved)
+
+ 2 // BSKP
+};
+
+static inline bool isHex(ushort c)
+{
+ return (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= '0' && c <= '9');
+}
+
+static inline bool isUpperHex(ushort c)
+{
+ // undefined behaviour if c isn't an hex char!
+ return c < 0x60;
+}
+
+static inline ushort toUpperHex(ushort c)
+{
+ return isUpperHex(c) ? c : c - 0x20;
+}
+
+static inline ushort decodeNibble(ushort c)
+{
+ return c >= 'a' ? c - 'a' + 0xA :
+ c >= 'A' ? c - 'A' + 0xA : c - '0';
+}
+
+static inline ushort encodeNibble(ushort c)
+{
+ static const uchar hexnumbers[] = "0123456789ABCDEF";
+ return hexnumbers[c & 0xf];
+}
+
+static void ensureDetached(QString &result, ushort *&output, const ushort *input, const ushort *end)
+{
+ if (!output) {
+ // now detach
+ // create enough space if the rest of the string needed to be percent-encoded
+ int charsProcessed = input - reinterpret_cast<const ushort *>(result.constData()) - 1;
+ int charsRemaining = end - input + 1;
+ int newSize = result.size() + 2 * charsRemaining;
+ result.resize(newSize);
+
+ // set the output variable
+ output = reinterpret_cast<ushort *>(result.data()) + charsProcessed;
+ }
+}
+
+static inline bool isUnicodeNonCharacter(uint ucs4)
+{
+ // Unicode has a couple of "non-characters" that one can use internally,
+ // but are not allowed to be used for text interchange.
+ //
+ // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
+ // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
+ // U+FDEF (inclusive)
+
+ return (ucs4 & 0xfffe) == 0xfffe
+ || (ucs4 - 0xfdd0U) < 16;
+}
+
+// returns true if we performed an UTF-8 decoding
+static uint encodedUtf8ToUcs4(QString &result, ushort *&output, const ushort *&input, const ushort *end, ushort decoded)
+{
+ if (decoded <= 0xC1) {
+ // an UTF-8 first character must be at least 0xC0
+ // however, all 0xC0 and 0xC1 first bytes can only produce overlong sequences
+ return false;
+ }
+
+ int charsNeeded;
+ uint min_uc;
+ uint uc;
+ if (decoded < 0xe0) {
+ charsNeeded = 1;
+ min_uc = 0x80;
+ uc = decoded & 0x1f;
+ } else if (decoded < 0xf0) {
+ charsNeeded = 2;
+ min_uc = 0x800;
+ uc = decoded & 0x0f;
+ } else if (decoded < 0xf5) {
+ charsNeeded = 3;
+ min_uc = 0x10000;
+ uc = decoded & 0x07;
+ } else {
+ // the last Unicode character is U+10FFFF
+ // it's encoded in UTF-8 as "\xF4\x8F\xBF\xBF"
+ // therefore, a byte outside the range 0xC0..0xF4 is not the UTF-8 first byte
+ return false;
+ }
+
+ // are there enough remaining?
+ if (end - input < 3*charsNeeded + 2)
+ return false;
+
+ if (input[2] != '%')
+ return false;
+
+ // first continuation character
+ decoded = (decodeNibble(input[3]) << 4) | decodeNibble(input[4]);
+ if ((decoded & 0xc0) != 0x80)
+ return false;
+ uc <<= 6;
+ uc |= decoded & 0x3f;
+
+ if (charsNeeded > 1) {
+ if (input[5] != '%')
+ return false;
+
+ // second continuation character
+ decoded = (decodeNibble(input[6]) << 4) | decodeNibble(input[7]);
+ if ((decoded & 0xc0) != 0x80)
+ return false;
+ uc <<= 6;
+ uc |= decoded & 0x3f;
+
+ if (charsNeeded > 2) {
+ if (input[8] != '%')
+ return false;
+
+ // third continuation character
+ decoded = (decodeNibble(input[9]) << 4) | decodeNibble(input[10]);
+ if ((decoded & 0xc0) != 0x80)
+ return false;
+ uc <<= 6;
+ uc |= decoded & 0x3f;
+ }
+ }
+
+ // we've decoded something; safety-check it
+ if (uc < min_uc)
+ return false;
+ if (isUnicodeNonCharacter(uc) || (uc >= 0xD800 && uc <= 0xDFFF) || uc >= 0x110000)
+ return false;
+
+ // detach if necessary
+ if (!output) {
+ // create enough space if the rest of the string needed to be percent-encoded
+ int charsProcessed = input - reinterpret_cast<const ushort *>(result.constData()) - 1;
+ int charsRemaining = end - input - 2 - 3*charsNeeded;
+ int newSize = result.size() + 2 * charsRemaining;
+ result.resize(newSize);
+
+ // set the output variable
+ output = reinterpret_cast<ushort *>(result.data()) + charsProcessed;
+ }
+
+ if (!QChar::requiresSurrogates(uc)) {
+ // UTF-8 decoded and no surrogates are required
+ *output++ = uc;
+ } else {
+ // UTF-8 decoded to something that requires a surrogate pair
+ *output++ = QChar::highSurrogate(uc);
+ *output++ = QChar::lowSurrogate(uc);
+ }
+ input += charsNeeded * 3 + 2;
+ return true;
+}
+
+static void unicodeToEncodedUtf8(QString &result, ushort *&output, const ushort *&input, const ushort *end, ushort decoded)
+{
+ uint uc = decoded;
+ if (QChar::isHighSurrogate(uc)) {
+ if (QChar::isLowSurrogate(*input))
+ uc = QChar::surrogateToUcs4(uc, *input);
+ }
+
+ // note: we will encode bad UTF-16 to UTF-8
+ // but they don't get decoded back
+
+ // calculate the utf8 length
+ int utf8len = uc >= 0x10000 ? 4 : uc >= 0x800 ? 3 : 2;
+
+ // detach
+ if (!output) {
+ // create enough space if the rest of the string needed to be percent-encoded
+ int charsProcessed = input - reinterpret_cast<const ushort *>(result.constData()) - 1;
+ int charsRemaining = end - input;
+ int newSize = result.size() + 2 * charsRemaining - 1 + 3*utf8len;
+ result.resize(newSize);
+
+ // set the output variable
+ output = reinterpret_cast<ushort *>(result.data()) + charsProcessed;
+ } else {
+ // verify that there's enough space or expand
+ int charsRemaining = end - input;
+ int pos = output - reinterpret_cast<const ushort *>(result.constData());
+ int spaceRemaining = result.size() - pos;
+ if (spaceRemaining < 3*charsRemaining + 3*utf8len) {
+ // must resize
+ result.resize(result.size() + 3*utf8len);
+ output = reinterpret_cast<ushort *>(result.data()) + pos;
+ }
+ }
+
+ if (QChar::requiresSurrogates(uc))
+ ++input;
+
+ // write the sequence
+ if (uc < 0x800) {
+ // first of two bytes
+ uchar c = 0xc0 | uchar(uc >> 6);
+ *output++ = '%';
+ *output++ = encodeNibble(c >> 4);
+ *output++ = encodeNibble(c & 0xf);
+ } else {
+ uchar c;
+ if (uc > 0xFFFF) {
+ // first two of four bytes
+ c = 0xf0 | uchar(uc >> 18);
+ *output++ = '%';
+ *output++ = 'F';
+ *output++ = encodeNibble(c & 0xf);
+
+ // continuation byte
+ c = 0x80 | (uchar(uc >> 12) & 0x3f);
+ *output++ = '%';
+ *output++ = encodeNibble(c >> 4);
+ *output++ = encodeNibble(c & 0xf);
+ } else {
+ // first of three bytes
+ c = 0xe0 | uchar(uc >> 12);
+ *output++ = '%';
+ *output++ = 'E';
+ *output++ = encodeNibble(c & 0xf);
+ }
+
+ // continuation byte
+ c = 0x80 | (uchar(uc >> 6) & 0x3f);
+ *output++ = '%';
+ *output++ = encodeNibble(c >> 4);
+ *output++ = encodeNibble(c & 0xf);
+ }
+
+ // continuation byte
+ uchar c = 0x80 | (uc & 0x3f);
+ *output++ = '%';
+ *output++ = encodeNibble(c >> 4);
+ *output++ = encodeNibble(c & 0xf);
+}
+
+Q_AUTOTEST_EXPORT QString
+qt_urlRecode(const QString &component, QUrl::ComponentFormattingOptions encoding,
+ const uchar *tableModifications)
+{
+ uchar actionTable[sizeof defaultActionTable];
+ memcpy(actionTable, defaultActionTable, sizeof actionTable);
+ if (encoding & QUrl::DecodeSpaces)
+ actionTable[0] = DecodeCharacter; // decode
+
+ if (tableModifications) {
+ for (const ushort *p = tableModifications; *p; ++p)
+ actionTable[uchar(*p) - ' '] = *p >> 8;
+ }
+
+ QString result = component;
+ const ushort *input = reinterpret_cast<const ushort *>(component.constData());
+ const ushort * const end = input + component.length();
+ ushort *output = 0;
+
+ while (input != end) {
+ register ushort c = *input++;
+ register ushort decoded;
+ if (c == '%') {
+ // our input is always valid, so there are two hex characters for us to read here
+ decoded = (decodeNibble(input[0]) << 4) | decodeNibble(input[1]);
+ } else {
+ decoded = c;
+ }
+
+ EncodingAction action;
+ if (decoded < 0x20) {
+ // always encode control characters
+ action = EncodeCharacter;
+ } else if (decoded < 0x80) {
+ // use the table
+ action = EncodingAction(actionTable[decoded - ' ']);
+ } else {
+ // non-ASCII
+ bool decodeUnicode = encoding & QUrl::DecodeUnicode;
+
+ // should we leave it like this?
+ if ((c != '%' && decodeUnicode) || (c == '%' && !decodeUnicode)) {
+ action = LeaveCharacter;
+ } else if (decodeUnicode) {
+ // c == '%': decode the UTF-8 sequence
+ if (encodedUtf8ToUcs4(result, output, input, end, decoded))
+ continue;
+ action = LeaveCharacter;
+ } else {
+ // c != '%': encode the UTF-8 sequence
+ unicodeToEncodedUtf8(result, output, input, end, decoded);
+ continue;
+ }
+ }
+
+ // there are six possibilities:
+ // current \ action | DecodeCharacter | LeaveCharacter | EncodeCharacter
+ // decoded | 1:leave | 2:leave | 3:encode
+ // encoded | 4:decode | 5:leave | 6:leave
+
+ if (c != '%' && (action == LeaveCharacter || action == DecodeCharacter)) {
+ // cases 1 and 2: it's decoded and we're leaving it as is
+ // there's always enough memory allocated for a single character
+ if (output)
+ *output++ = c;
+ } else if (c == '%' && (action == LeaveCharacter || action == EncodeCharacter)) {
+ // cases 5 and 6: it's encoded and we're leaving it as it is
+ // except we're pedantic and we'll uppercase the hex
+ if (output || !isUpperHex(input[0]) || !isUpperHex(input[1])) {
+ ensureDetached(result, output, input, end);
+ *output++ = '%';
+ *output++ = toUpperHex(*input++);
+ *output++ = toUpperHex(*input++);
+ }
+ } else if (c == '%' && action == DecodeCharacter) {
+ // case 4: we need to decode
+ ensureDetached(result, output, input, end);
+ *output++ = decoded;
+ input += 2;
+ } else {
+ // must be case 3: we need to encode
+ ensureDetached(result, output, input, end);
+ *output++ = '%';
+ *output++ = encodeNibble(c >> 4);
+ *output++ = encodeNibble(c & 0xf);
+ }
+ }
+
+ if (output)
+ result.truncate(output - reinterpret_cast<const ushort *>(result.constData()));
+ return result;
+}
+
+Q_AUTOTEST_EXPORT QString
+qt_tolerantParsePercentEncoding(const QString &url)
+{
+ // are there any '%'
+ int firstPercent = url.indexOf(QLatin1Char('%'));
+ if (firstPercent == -1) {
+ // none found, the string is fine
+ return url;
+ }
+
+ // are there any invalid percents?
+ int nextPercent = firstPercent;
+ int percentCount = 0;
+
+ {
+ int len = url.length();
+ bool ok = true;
+ do {
+ ++percentCount;
+ if (nextPercent + 2 >= len ||
+ !isHex(url.at(nextPercent + 1).unicode()) ||
+ !isHex(url.at(nextPercent + 2).unicode())) {
+ ok = false;
+ }
+
+ nextPercent = url.indexOf(QLatin1Char('%'), nextPercent + 1);
+ } while (nextPercent != -1);
+
+ if (ok)
+ return url;
+ }
+
+ // we've found at least one invalid percent
+ // that means all of them are invalid
+ QString corrected(url.size() + percentCount * 2, Qt::Uninitialized);
+ ushort *output = reinterpret_cast<ushort *>(corrected.data());
+ const ushort *input = reinterpret_cast<const ushort *>(url.constData());
+ for (int i = 0; i <= firstPercent; ++i)
+ output[i] = input[i];
+
+ const ushort *const end = input + url.length();
+ output += firstPercent + 1;
+ input += firstPercent + 1;
+
+ // we've copied up to the first percent
+ // correct this one and all others
+ *output++ = '2';
+ *output++ = '5';
+ while (input != end) {
+ // copy verbatim until the next percent, inclusive
+ *output++ = *input;
+ if (*input == '%') {
+ *output++ = '2';
+ *output++ = '5';
+ }
+ ++input;
+ }
+ return corrected;
+}
+
+QT_END_NAMESPACE
diff --git a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
index 10c4736f68..c71acef148 100644
--- a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
+++ b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
@@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Copyright (C) 2012 Intel Corporation.
** Contact: http://www.qt-project.org/
**
** This file is part of the test suite of the Qt Toolkit.
@@ -49,6 +50,9 @@ Q_CORE_EXPORT extern void qt_nameprep(QString *source, int from);
Q_CORE_EXPORT extern bool qt_check_std3rules(const QChar *, int);
Q_CORE_EXPORT void qt_punycodeEncoder(const QChar *s, int ucLength, QString *output);
Q_CORE_EXPORT QString qt_punycodeDecoder(const QString &pc);
+Q_CORE_EXPORT QString qt_tolerantParsePercentEncoding(const QString &url);
+Q_CORE_EXPORT QString qt_urlRecode(const QString &component, QUrl::ComponentFormattingOptions encoding,
+ const ushort *tableModifications = 0);
QT_END_NAMESPACE
// For testsuites
@@ -72,6 +76,7 @@ struct ushortarray {
Q_DECLARE_METATYPE(ushortarray)
Q_DECLARE_METATYPE(QUrl::FormattingOptions)
+Q_DECLARE_METATYPE(QUrl::ComponentFormattingOptions)
class tst_QUrlInternal : public QObject
{
@@ -91,6 +96,14 @@ private Q_SLOTS:
void std3violations();
void std3deviations_data();
void std3deviations();
+
+ // percent-encoding internals
+ void correctEncodedMistakes_data();
+ void correctEncodedMistakes();
+ void encodingRecode_data();
+ void encodingRecode();
+ void encodingRecodeInvalidUtf8_data();
+ void encodingRecodeInvalidUtf8();
};
void tst_QUrlInternal::idna_testsuite_data()
@@ -745,6 +758,208 @@ void tst_QUrlInternal::std3deviations()
QVERIFY(!url.host().isEmpty());
}
+void tst_QUrlInternal::correctEncodedMistakes_data()
+{
+ QTest::addColumn<QString>("input");
+ QTest::addColumn<QString>("expected");
+
+ QTest::newRow("null") << QString() << QString();
+ QTest::newRow("empty") << "" << "";
+
+ // these contain one invalid percent
+ QTest::newRow("%") << QString("%") << QString("%25");
+ QTest::newRow("3%") << QString("3%") << QString("3%25");
+ QTest::newRow("13%") << QString("13%") << QString("13%25");
+ QTest::newRow("13%!") << QString("13%!") << QString("13%25!");
+ QTest::newRow("13%!!") << QString("13%!!") << QString("13%25!!");
+ QTest::newRow("13%a") << QString("13%a") << QString("13%25a");
+ QTest::newRow("13%az") << QString("13%az") << QString("13%25az");
+
+ // two invalid percents
+ QTest::newRow("13%%") << "13%%" << "13%25%25";
+ QTest::newRow("13%a%a") << "13%a%a" << "13%25a%25a";
+ QTest::newRow("13%az%az") << "13%az%az" << "13%25az%25az";
+
+ // these are correct (idempotent)
+ QTest::newRow("13%25") << QString("13%25") << QString("13%25");
+ QTest::newRow("13%25%25") << QString("13%25%25") << QString("13%25%25");
+
+ // these contain one invalid and one valid
+ // the code assumes they are all invalid
+ QTest::newRow("13%13..%") << "13%13..%" << "13%2513..%25";
+ QTest::newRow("13%..%13") << "13%..%13" << "13%25..%2513";
+
+ // three percents, one invalid
+ QTest::newRow("%01%02%3") << "%01%02%3" << "%2501%2502%253";
+}
+
+void tst_QUrlInternal::correctEncodedMistakes()
+{
+ QFETCH(QString, input);
+ QFETCH(QString, expected);
+
+ QString output = qt_tolerantParsePercentEncoding(input);
+ QCOMPARE(output, expected);
+ QCOMPARE(output.isNull(), expected.isNull());
+}
+
+static void addUtf8Data(const char *name, const char *data)
+{
+ QString encoded = QByteArray(data).toPercentEncoding();
+ QString decoded = QString::fromUtf8(data);
+
+ QTest::newRow(QByteArray("decode-") + name) << encoded << QUrl::ComponentFormattingOptions(QUrl::DecodeUnicode) << decoded;
+ QTest::newRow(QByteArray("encode-") + name) << decoded << QUrl::ComponentFormattingOptions(QUrl::FullyEncoded) << encoded;
+}
+
+void tst_QUrlInternal::encodingRecode_data()
+{
+ typedef QUrl::ComponentFormattingOptions F;
+ QTest::addColumn<QString>("input");
+ QTest::addColumn<F>("encodingMode");
+ QTest::addColumn<QString>("expected");
+
+ // -- idempotent tests --
+ for (int i = 0; i < 0x10; ++i) {
+ QByteArray code = QByteArray::number(i, 16);
+ F mode = QUrl::ComponentFormattingOption(i << 12);
+
+ QTest::newRow("null-0x" + code) << QString() << mode << QString();
+ QTest::newRow("empty-0x" + code) << "" << mode << "";
+
+ // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ // Unreserved characters are never encoded
+ QTest::newRow("alpha-0x" + code) << "abcABCZZzz" << mode << "abcABCZZzz";
+ QTest::newRow("digits-0x" + code) << "01234567890" << mode << "01234567890";
+ QTest::newRow("otherunreserved-0x" + code) << "-._~" << mode << "-._~";
+
+ // Control characters are always encoded
+ // Use uppercase because the output is also uppercased
+ QTest::newRow("control-nul-0x" + code) << "%00" << mode << "%00";
+ QTest::newRow("control-0x" + code) << "%0D%0A%1F%1A%7F" << mode << "%0D%0A%1F%1A%7F";
+
+ // The percent is always encoded
+ QTest::newRow("percent-0x" + code) << "25%2525" << mode << "25%2525";
+
+ // mixed control and unreserved
+ QTest::newRow("control-unreserved-0x" + code) << "Foo%00Bar%0D%0Abksp%7F" << mode << "Foo%00Bar%0D%0Abksp%7F";
+ }
+
+ // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ // / "*" / "+" / "," / ";" / "="
+ // in the default operation, delimiters don't get encoded or decoded
+ static const char delimiters[] = ":/?#[]@" "!$&'()*+,;=";
+ for (const char *c = delimiters; *c; ++c) {
+ QByteArray code = QByteArray::number(*c, 16);
+ QString encoded = QString("abc%") + code.toUpper() + "def" ;
+ QString decoded = QString("abc") + *c + "def" ;
+ QTest::newRow("delimiter-encoded-" + code) << encoded << F(QUrl::FullyEncoded) << encoded;
+ QTest::newRow("delimiter-decoded-" + code) << decoded << F(QUrl::FullyEncoded) << decoded;
+ }
+
+ // encode control characters
+ QTest::newRow("encode-control") << "\1abc\2\033esc" << F(QUrl::MostDecoded) << "%01abc%02%1Besc";
+ QTest::newRow("encode-nul") << QString::fromLatin1("abc\0def", 7) << F(QUrl::MostDecoded) << "abc%00def";
+
+ // space
+ QTest::newRow("space-leave-decoded") << "Hello World " << F(QUrl::DecodeSpaces) << "Hello World ";
+ QTest::newRow("space-leave-encoded") << "Hello%20World%20" << F(QUrl::FullyEncoded) << "Hello%20World%20";
+ QTest::newRow("space-encode") << "Hello World " << F(QUrl::FullyEncoded) << "Hello%20World%20";
+ QTest::newRow("space-decode") << "Hello%20World%20" << F(QUrl::DecodeSpaces) << "Hello World ";
+
+ // decode unreserved
+ QTest::newRow("unreserved-decode") << "%66%6f%6f%42a%72" << F(QUrl::FullyEncoded) << "fooBar";
+
+ // mix encoding with decoding
+ QTest::newRow("encode-control-decode-space") << "\1\2%200" << F(QUrl::DecodeSpaces) << "%01%02 0";
+ QTest::newRow("decode-space-encode-control") << "%20\1\2" << F(QUrl::DecodeSpaces) << " %01%02";
+
+ // decode and encode valid UTF-8 data
+ // invalid is tested in encodingRecodeInvalidUtf8
+ addUtf8Data("utf8-2char-1", "\xC2\x80"); // U+0080
+ addUtf8Data("utf8-2char-2", "\xDF\xBF"); // U+07FF
+ addUtf8Data("utf8-3char-1", "\xE0\xA0\x80"); // U+0800
+ addUtf8Data("utf8-3char-2", "\xED\x9F\xBF"); // U+D7FF
+ addUtf8Data("utf8-3char-3", "\xEE\x80\x80"); // U+E000
+ addUtf8Data("utf8-3char-4", "\xEF\xBF\xBD"); // U+FFFD
+ addUtf8Data("utf8-2char-1", "\xF0\x90\x80\x80"); // U+10000
+ addUtf8Data("utf8-4char-2", "\xF4\x8F\xBF\xBD"); // U+10FFFD
+
+ // longer UTF-8 sequences, mixed with unreserved
+ addUtf8Data("utf8-string-1", "R\xc3\xa9sum\xc3\xa9");
+ addUtf8Data("utf8-string-2", "\xDF\xBF\xE0\xA0\x80""A");
+ addUtf8Data("utf8-string-3", "\xE0\xA0\x80\xDF\xBF...");
+
+ // special cases: stuff we can encode, but not decode
+ QTest::newRow("unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::FullyEncoded) << "%EF%BF%BF";
+ QTest::newRow("unicode-lo-surrogate") << QString(QChar(0xD800)) << F(QUrl::FullyEncoded) << "%ED%A0%80";
+ QTest::newRow("unicode-hi-surrogate") << QString(QChar(0xDC00)) << F(QUrl::FullyEncoded) << "%ED%B0%80";
+
+ // a couple of Unicode strings with leading spaces
+ QTest::newRow("space-unicode") << QString::fromUtf8(" \xc2\xa0") << F(QUrl::FullyEncoded) << "%20%C2%A0";
+ QTest::newRow("space-space-unicode") << QString::fromUtf8(" \xc2\xa0") << F(QUrl::FullyEncoded) << "%20%20%C2%A0";
+ QTest::newRow("space-space-space-unicode") << QString::fromUtf8(" \xc2\xa0") << F(QUrl::FullyEncoded) << "%20%20%20%C2%A0";
+
+ // hex case testing
+ QTest::newRow("FF") << "%FF" << F(QUrl::FullyEncoded) << "%FF";
+ QTest::newRow("Ff") << "%Ff" << F(QUrl::FullyEncoded) << "%FF";
+ QTest::newRow("fF") << "%fF" << F(QUrl::FullyEncoded) << "%FF";
+ QTest::newRow("ff") << "%ff" << F(QUrl::FullyEncoded) << "%FF";
+
+ // decode UTF-8 mixed with non-UTF-8 and unreserved
+ QTest::newRow("utf8-mix-1") << "%80%C2%80" << F(QUrl::DecodeUnicode) << QString::fromUtf8("%80\xC2\x80");
+ QTest::newRow("utf8-mix-2") << "%C2%C2%80" << F(QUrl::DecodeUnicode) << QString::fromUtf8("%C2\xC2\x80");
+ QTest::newRow("utf8-mix-3") << "%E0%C2%80" << F(QUrl::DecodeUnicode) << QString::fromUtf8("%E0\xC2\x80");
+ QTest::newRow("utf8-mix-3") << "A%C2%80" << F(QUrl::DecodeUnicode) << QString::fromUtf8("A\xC2\x80");
+ QTest::newRow("utf8-mix-3") << "%C2%80A" << F(QUrl::DecodeUnicode) << QString::fromUtf8("\xC2\x80""A");
+}
+
+void tst_QUrlInternal::encodingRecode()
+{
+ QFETCH(QString, input);
+ QFETCH(QString, expected);
+ QFETCH(QUrl::ComponentFormattingOptions, encodingMode);
+
+ // ensure the string is properly percent-encoded
+ QVERIFY2(input == qt_tolerantParsePercentEncoding(input), "Test data is not properly encoded");
+ QVERIFY2(expected == qt_tolerantParsePercentEncoding(expected), "Test data is not properly encoded");
+
+ QString output = qt_urlRecode(input, encodingMode);
+ QCOMPARE(output, expected);
+ QCOMPARE(output.isNull(), expected.isNull());
+}
+
+void tst_QUrlInternal::encodingRecodeInvalidUtf8_data()
+{
+ QTest::addColumn<QByteArray>("utf8");
+ QTest::addColumn<QString>("utf16");
+
+ extern void loadInvalidUtf8Rows();
+ loadInvalidUtf8Rows();
+
+ extern void loadNonCharactersRows();
+ loadNonCharactersRows();
+
+ QTest::newRow("utf8-mix-4") << QByteArray("\xE0!A2\x80");
+ QTest::newRow("utf8-mix-5") << QByteArray("\xE0\xA2!80");
+ QTest::newRow("utf8-mix-5") << QByteArray("\xE0\xA2\x33");
+}
+
+void tst_QUrlInternal::encodingRecodeInvalidUtf8()
+{
+ QFETCH(QByteArray, utf8);
+ QString input = utf8.toPercentEncoding();
+
+ QString output;
+ output = qt_urlRecode(input, QUrl::DecodeUnicode);
+ QCOMPARE(output, input);
+
+ // this is just control
+ output = qt_urlRecode(input, QUrl::FullyEncoded);
+ QCOMPARE(output, input);
+}
+
QTEST_APPLESS_MAIN(tst_QUrlInternal)
#include "tst_qurlinternal.moc"